2024-05-22 01:42:50 +00:00
|
|
|
|
use std::collections::HashMap;
|
|
|
|
|
use std::convert::{identity, Infallible};
|
|
|
|
|
use std::io::{Cursor, Write};
|
|
|
|
|
use std::marker::PhantomData;
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
use compact_str::{CompactString, ToCompactString};
|
2023-10-06 23:46:20 +00:00
|
|
|
|
use either::Either;
|
2023-10-01 21:04:32 +00:00
|
|
|
|
use nom::branch::alt;
|
2023-10-08 20:15:55 +00:00
|
|
|
|
use nom::bytes::complete::{tag, tag_no_case};
|
2023-10-04 17:31:03 +00:00
|
|
|
|
use nom::character::complete::{
|
2023-10-14 19:41:36 +00:00
|
|
|
|
alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of,
|
2023-10-08 20:15:55 +00:00
|
|
|
|
satisfy, space1, tab,
|
2023-10-04 17:31:03 +00:00
|
|
|
|
};
|
2023-10-23 22:27:54 +00:00
|
|
|
|
use nom::combinator::{eof, fail, map, not, opt, peek, recognize};
|
2023-10-23 21:52:02 +00:00
|
|
|
|
use nom::error::ErrorKind;
|
2023-10-25 22:30:11 +00:00
|
|
|
|
use nom::multi::{many0_count, many1, many1_count, many_till, separated_list1};
|
2023-10-01 21:04:32 +00:00
|
|
|
|
use nom::sequence::tuple;
|
2024-09-05 15:22:54 +00:00
|
|
|
|
use nom::{IResult, Offset, Parser, Slice};
|
2023-10-01 21:04:32 +00:00
|
|
|
|
use nom_locate::LocatedSpan;
|
2023-10-25 22:30:11 +00:00
|
|
|
|
use quick_xml::events::{BytesText, Event};
|
2023-10-25 17:45:59 +00:00
|
|
|
|
use serde::{Deserialize, Serialize};
|
2023-10-25 22:30:11 +00:00
|
|
|
|
use strum::IntoStaticStr;
|
2023-10-16 21:45:45 +00:00
|
|
|
|
use tracing::trace;
|
2023-10-05 19:21:23 +00:00
|
|
|
|
use unicode_segmentation::UnicodeSegmentation;
|
2023-10-01 21:04:32 +00:00
|
|
|
|
|
2023-10-25 22:30:11 +00:00
|
|
|
|
#[derive(Copy, Clone, Debug, Eq, PartialEq, Deserialize, Serialize, IntoStaticStr)]
|
|
|
|
|
// The alternative would be to implement a serde serializer for this one enum, but that's disgusting
|
|
|
|
|
#[strum(serialize_all = "snake_case")]
|
|
|
|
|
#[serde(rename_all = "snake_case")]
|
2023-10-05 18:05:03 +00:00
|
|
|
|
pub enum MentionType {
|
|
|
|
|
Community,
|
|
|
|
|
User,
|
2023-10-26 19:23:59 +00:00
|
|
|
|
MatrixUser,
|
2023-10-05 18:05:03 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl MentionType {
|
|
|
|
|
pub fn to_char(&self) -> char {
|
|
|
|
|
match self {
|
|
|
|
|
MentionType::Community => '!',
|
|
|
|
|
MentionType::User => '@',
|
2023-10-26 19:23:59 +00:00
|
|
|
|
MentionType::MatrixUser => ':',
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn separator(&self) -> char {
|
|
|
|
|
match self {
|
|
|
|
|
MentionType::Community | MentionType::User => '@',
|
|
|
|
|
MentionType::MatrixUser => ':',
|
2023-10-05 18:05:03 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-25 17:45:59 +00:00
|
|
|
|
#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)]
|
2023-10-14 19:41:36 +00:00
|
|
|
|
pub enum Token {
|
|
|
|
|
PlainText(CompactString),
|
|
|
|
|
Sequence(Vec<Token>),
|
|
|
|
|
Quote(Box<Token>),
|
|
|
|
|
Small(Box<Token>),
|
|
|
|
|
Bold(Box<Token>),
|
|
|
|
|
Italic(Box<Token>),
|
|
|
|
|
Center(Box<Token>),
|
|
|
|
|
Strikethrough(Box<Token>),
|
|
|
|
|
PlainTag(String),
|
|
|
|
|
InlineCode(String),
|
|
|
|
|
InlineMath(String),
|
|
|
|
|
UrlRaw(String),
|
|
|
|
|
UrlNoEmbed(String),
|
2023-10-05 17:09:26 +00:00
|
|
|
|
Link {
|
2023-10-14 19:41:36 +00:00
|
|
|
|
label: Box<Token>,
|
|
|
|
|
href: String,
|
2023-10-05 17:09:26 +00:00
|
|
|
|
embed: bool,
|
|
|
|
|
},
|
2023-10-04 17:31:03 +00:00
|
|
|
|
BlockCode {
|
2023-10-14 19:41:36 +00:00
|
|
|
|
lang: Option<String>,
|
|
|
|
|
inner: String,
|
2023-10-04 17:31:03 +00:00
|
|
|
|
},
|
2023-10-14 19:41:36 +00:00
|
|
|
|
BlockMath(String),
|
2023-10-04 17:31:03 +00:00
|
|
|
|
Function {
|
2023-10-14 19:41:36 +00:00
|
|
|
|
name: String,
|
|
|
|
|
params: HashMap<String, Option<String>>,
|
|
|
|
|
inner: Box<Token>,
|
2023-10-04 17:31:03 +00:00
|
|
|
|
},
|
2023-10-05 18:05:03 +00:00
|
|
|
|
Mention {
|
2023-10-14 19:41:36 +00:00
|
|
|
|
name: String,
|
|
|
|
|
host: Option<String>,
|
2023-10-05 18:05:03 +00:00
|
|
|
|
mention_type: MentionType,
|
|
|
|
|
},
|
2023-10-14 19:41:36 +00:00
|
|
|
|
UnicodeEmoji(String),
|
2023-10-30 22:00:46 +00:00
|
|
|
|
ShortcodeEmoji {
|
|
|
|
|
shortcode: String,
|
|
|
|
|
host: Option<String>,
|
|
|
|
|
},
|
2023-10-14 19:41:36 +00:00
|
|
|
|
Hashtag(String),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
impl Token {
|
2023-10-08 20:15:55 +00:00
|
|
|
|
fn str_content_left(&self) -> Option<&str> {
|
|
|
|
|
match self {
|
|
|
|
|
Token::PlainText(text) => Some(text.as_ref()),
|
|
|
|
|
Token::Sequence(tokens) => tokens.first().and_then(Token::str_content_left),
|
|
|
|
|
Token::Quote(inner) => inner.str_content_left(),
|
|
|
|
|
Token::Small(inner) => inner.str_content_left(),
|
|
|
|
|
Token::Bold(inner) => inner.str_content_left(),
|
|
|
|
|
Token::Italic(inner) => inner.str_content_left(),
|
|
|
|
|
Token::Center(inner) => inner.str_content_left(),
|
|
|
|
|
Token::Strikethrough(inner) => inner.str_content_left(),
|
|
|
|
|
Token::PlainTag(tag) => Some(tag.as_ref()),
|
|
|
|
|
Token::UrlRaw(url) => Some(url.as_ref()),
|
|
|
|
|
Token::UrlNoEmbed(url) => Some(url.as_ref()),
|
|
|
|
|
Token::Link { label, .. } => label.str_content_left(),
|
|
|
|
|
Token::Function { inner, .. } => inner.str_content_left(),
|
|
|
|
|
Token::Mention { name, .. } => Some(name.as_ref()),
|
|
|
|
|
Token::UnicodeEmoji(code) => Some(code.as_ref()),
|
|
|
|
|
Token::Hashtag(tag) => Some(tag.as_ref()),
|
|
|
|
|
_ => None,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn str_content_right(&self) -> Option<&str> {
|
|
|
|
|
match self {
|
|
|
|
|
Token::PlainText(text) => Some(text.as_ref()),
|
|
|
|
|
Token::Sequence(tokens) => tokens.last().and_then(Token::str_content_right),
|
|
|
|
|
Token::Quote(inner) => inner.str_content_right(),
|
|
|
|
|
Token::Small(inner) => inner.str_content_right(),
|
|
|
|
|
Token::Bold(inner) => inner.str_content_right(),
|
|
|
|
|
Token::Italic(inner) => inner.str_content_right(),
|
|
|
|
|
Token::Center(inner) => inner.str_content_right(),
|
|
|
|
|
Token::Strikethrough(inner) => inner.str_content_right(),
|
|
|
|
|
Token::PlainTag(tag) => Some(tag.as_ref()),
|
|
|
|
|
Token::UrlRaw(url) => Some(url.as_ref()),
|
|
|
|
|
Token::UrlNoEmbed(url) => Some(url.as_ref()),
|
|
|
|
|
Token::Link { label, .. } => label.str_content_right(),
|
|
|
|
|
Token::Function { inner, .. } => inner.str_content_right(),
|
|
|
|
|
Token::Mention { name, .. } => Some(name.as_ref()),
|
|
|
|
|
Token::UnicodeEmoji(code) => Some(code.as_ref()),
|
|
|
|
|
Token::Hashtag(tag) => Some(tag.as_ref()),
|
|
|
|
|
_ => None,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn inner(&self) -> Token {
|
|
|
|
|
match self {
|
|
|
|
|
plain @ Token::PlainText(_) => plain.clone(),
|
|
|
|
|
sequence @ Token::Sequence(_) => sequence.clone(),
|
|
|
|
|
Token::Quote(inner) => inner.inner(),
|
|
|
|
|
Token::Small(inner) => inner.inner(),
|
|
|
|
|
Token::Bold(inner) => inner.inner(),
|
|
|
|
|
Token::Italic(inner) => inner.inner(),
|
|
|
|
|
Token::Center(inner) => inner.inner(),
|
|
|
|
|
Token::Strikethrough(inner) => inner.inner(),
|
2023-10-14 19:41:36 +00:00
|
|
|
|
Token::PlainTag(text) => Token::PlainText(text.clone().into()),
|
|
|
|
|
Token::InlineCode(code) => Token::PlainText(code.clone().into()),
|
|
|
|
|
Token::InlineMath(math) => Token::PlainText(math.clone().into()),
|
|
|
|
|
Token::UrlRaw(url) => Token::PlainText(url.clone().into()),
|
|
|
|
|
Token::UrlNoEmbed(url) => Token::PlainText(url.clone().into()),
|
2023-10-08 20:15:55 +00:00
|
|
|
|
Token::Link { label, .. } => label.inner(),
|
2023-10-14 19:41:36 +00:00
|
|
|
|
Token::BlockCode { inner, .. } => Token::PlainText(inner.clone().into()),
|
|
|
|
|
Token::BlockMath(math) => Token::PlainText(math.clone().into()),
|
2023-10-08 20:15:55 +00:00
|
|
|
|
Token::Function { inner, .. } => inner.inner(),
|
2023-10-14 19:41:36 +00:00
|
|
|
|
Token::Mention { name, .. } => Token::PlainText(name.clone().into()),
|
|
|
|
|
Token::UnicodeEmoji(code) => Token::PlainText(code.clone().into()),
|
2023-10-30 22:00:46 +00:00
|
|
|
|
Token::ShortcodeEmoji { shortcode, .. } => Token::PlainText(shortcode.clone().into()),
|
2023-10-14 19:41:36 +00:00
|
|
|
|
Token::Hashtag(tag) => Token::PlainText(tag.clone().into()),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2023-10-05 22:17:52 +00:00
|
|
|
|
|
|
|
|
|
fn merged(&self) -> Token {
|
|
|
|
|
match self {
|
|
|
|
|
Token::Sequence(tokens) => {
|
|
|
|
|
let tokens_multi = tokens.iter().fold(Vec::new(), |mut acc, tok| {
|
|
|
|
|
if let Some(Token::PlainText(last)) = acc.last_mut() {
|
|
|
|
|
if let Token::PlainText(tok_text) = tok {
|
2023-10-14 19:41:36 +00:00
|
|
|
|
*last += tok_text.as_ref();
|
2023-10-05 22:17:52 +00:00
|
|
|
|
|
|
|
|
|
return acc;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-06 23:46:20 +00:00
|
|
|
|
if let Token::Sequence(seq) = tok {
|
|
|
|
|
let items = seq.iter().map(Token::merged).flat_map(|t| match t {
|
|
|
|
|
Token::Sequence(seq) => Either::Left(seq.into_iter()),
|
|
|
|
|
other => Either::Right(std::iter::once(other)),
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
for item in items {
|
|
|
|
|
if let Some(Token::PlainText(last)) = acc.last_mut() {
|
|
|
|
|
if let Token::PlainText(tok_text) = item {
|
2023-10-14 19:41:36 +00:00
|
|
|
|
*last += tok_text.as_ref();
|
2023-10-06 23:46:20 +00:00
|
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
acc.push(item);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return acc;
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-05 22:17:52 +00:00
|
|
|
|
acc.push(tok.merged());
|
|
|
|
|
acc
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if tokens_multi.len() == 1 {
|
|
|
|
|
return tokens_multi.into_iter().next().unwrap();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Token::Sequence(tokens_multi)
|
|
|
|
|
}
|
|
|
|
|
Token::Quote(inner) => Token::Quote(Box::new(inner.merged())),
|
|
|
|
|
Token::Small(inner) => Token::Small(Box::new(inner.merged())),
|
|
|
|
|
Token::Bold(inner) => Token::Bold(Box::new(inner.merged())),
|
|
|
|
|
Token::Italic(inner) => Token::Italic(Box::new(inner.merged())),
|
|
|
|
|
Token::Center(inner) => Token::Center(Box::new(inner.merged())),
|
|
|
|
|
Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.merged())),
|
2023-10-07 18:40:01 +00:00
|
|
|
|
Token::Link { embed, label, href } => Token::Link {
|
|
|
|
|
label: Box::new(label.merged()),
|
|
|
|
|
href: href.clone(),
|
|
|
|
|
embed: *embed,
|
|
|
|
|
},
|
2023-10-05 22:17:52 +00:00
|
|
|
|
Token::Function {
|
|
|
|
|
name,
|
|
|
|
|
params,
|
|
|
|
|
inner,
|
|
|
|
|
} => Token::Function {
|
|
|
|
|
name: name.clone(),
|
|
|
|
|
params: params.clone(),
|
|
|
|
|
inner: Box::new(inner.merged()),
|
|
|
|
|
},
|
|
|
|
|
other => other.clone(),
|
|
|
|
|
}
|
|
|
|
|
}
|
2023-10-25 22:30:11 +00:00
|
|
|
|
|
2023-10-27 22:33:09 +00:00
|
|
|
|
pub fn walk_map_collect<T>(&self, func: &impl Fn(&Token) -> Option<T>, out: &mut Vec<T>) {
|
2023-10-26 23:41:48 +00:00
|
|
|
|
if let Some(v) = func(self) {
|
|
|
|
|
out.push(v)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
match self {
|
|
|
|
|
Token::Sequence(items) => {
|
2023-10-27 22:33:09 +00:00
|
|
|
|
items.iter().for_each(|tok| tok.walk_map_collect(func, out));
|
2023-10-26 23:41:48 +00:00
|
|
|
|
}
|
|
|
|
|
Token::Quote(inner)
|
|
|
|
|
| Token::Small(inner)
|
|
|
|
|
| Token::Bold(inner)
|
|
|
|
|
| Token::Italic(inner)
|
|
|
|
|
| Token::Center(inner)
|
|
|
|
|
| Token::Function { inner, .. }
|
|
|
|
|
| Token::Link { label: inner, .. }
|
|
|
|
|
| Token::Strikethrough(inner) => inner.walk_map_collect(func, out),
|
|
|
|
|
_ => {}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-29 01:10:48 +00:00
|
|
|
|
pub fn walk_speech_transform(&mut self, func: &impl Fn(&mut CompactString)) {
|
|
|
|
|
match self {
|
|
|
|
|
Token::Sequence(items) => {
|
|
|
|
|
items
|
|
|
|
|
.iter_mut()
|
|
|
|
|
.for_each(|tok| tok.walk_speech_transform(func));
|
|
|
|
|
}
|
|
|
|
|
Token::Small(inner)
|
|
|
|
|
| Token::Bold(inner)
|
|
|
|
|
| Token::Italic(inner)
|
|
|
|
|
| Token::Center(inner)
|
|
|
|
|
| Token::Function { inner, .. }
|
|
|
|
|
| Token::Strikethrough(inner) => inner.walk_speech_transform(func),
|
|
|
|
|
Token::PlainText(text) => func(text),
|
|
|
|
|
_ => {}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-25 22:30:11 +00:00
|
|
|
|
fn write<T: Write>(&self, writer: &mut quick_xml::Writer<T>) -> quick_xml::Result<()> {
|
|
|
|
|
match self {
|
|
|
|
|
Token::PlainText(plain) => {
|
|
|
|
|
writer.write_event(Event::Text(BytesText::new(plain.as_str())))?;
|
|
|
|
|
}
|
|
|
|
|
Token::Sequence(sequence) => {
|
|
|
|
|
sequence.iter().try_for_each(|item| item.write(writer))?;
|
|
|
|
|
}
|
|
|
|
|
Token::Quote(inner) => {
|
|
|
|
|
writer
|
|
|
|
|
.create_element("quote")
|
|
|
|
|
.write_inner_content(|w| inner.write(w))?;
|
|
|
|
|
}
|
|
|
|
|
Token::Small(inner) => {
|
|
|
|
|
writer
|
|
|
|
|
.create_element("small")
|
|
|
|
|
.write_inner_content(|w| inner.write(w))?;
|
|
|
|
|
}
|
|
|
|
|
Token::Bold(inner) => {
|
|
|
|
|
writer
|
|
|
|
|
.create_element("b")
|
|
|
|
|
.write_inner_content(|w| inner.write(w))?;
|
|
|
|
|
}
|
|
|
|
|
Token::Italic(inner) => {
|
|
|
|
|
writer
|
|
|
|
|
.create_element("i")
|
|
|
|
|
.write_inner_content(|w| inner.write(w))?;
|
|
|
|
|
}
|
|
|
|
|
Token::Center(inner) => {
|
|
|
|
|
writer
|
|
|
|
|
.create_element("center")
|
|
|
|
|
.write_inner_content(|w| inner.write(w))?;
|
|
|
|
|
}
|
|
|
|
|
Token::Strikethrough(inner) => {
|
|
|
|
|
writer
|
|
|
|
|
.create_element("s")
|
|
|
|
|
.write_inner_content(|w| inner.write(w))?;
|
|
|
|
|
}
|
|
|
|
|
Token::PlainTag(plain) => {
|
|
|
|
|
writer.write_event(Event::Text(BytesText::new(plain.as_str())))?;
|
|
|
|
|
}
|
|
|
|
|
Token::InlineCode(code) => {
|
|
|
|
|
writer
|
|
|
|
|
.create_element("inline-code")
|
|
|
|
|
.write_text_content(BytesText::new(code))?;
|
|
|
|
|
}
|
|
|
|
|
Token::InlineMath(math) => {
|
|
|
|
|
writer
|
|
|
|
|
.create_element("inline-math")
|
|
|
|
|
.write_text_content(BytesText::new(math))?;
|
|
|
|
|
}
|
|
|
|
|
Token::UrlRaw(url) => {
|
|
|
|
|
writer
|
|
|
|
|
.create_element("a")
|
|
|
|
|
.with_attribute(("href", url.as_str()))
|
|
|
|
|
.write_text_content(BytesText::new(url))?;
|
|
|
|
|
}
|
|
|
|
|
Token::UrlNoEmbed(url) => {
|
|
|
|
|
writer
|
|
|
|
|
.create_element("a")
|
|
|
|
|
.with_attribute(("href", url.as_str()))
|
|
|
|
|
.with_attribute(("embed", "false"))
|
|
|
|
|
.write_text_content(BytesText::new(url))?;
|
|
|
|
|
}
|
|
|
|
|
Token::Link { label, href, embed } => {
|
|
|
|
|
writer
|
|
|
|
|
.create_element("a")
|
|
|
|
|
.with_attribute(("href", href.as_str()))
|
|
|
|
|
.with_attribute(("embed", if *embed { "true" } else { "false" }))
|
|
|
|
|
.write_inner_content(|w| label.write(w))?;
|
|
|
|
|
}
|
|
|
|
|
Token::BlockCode { inner, lang } => {
|
|
|
|
|
let mut ew = writer.create_element("code");
|
|
|
|
|
|
|
|
|
|
if let Some(language) = lang {
|
|
|
|
|
ew = ew.with_attribute(("lang", language.as_str()));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ew.write_text_content(BytesText::new(inner))?;
|
|
|
|
|
}
|
|
|
|
|
Token::BlockMath(math) => {
|
|
|
|
|
writer
|
|
|
|
|
.create_element("math")
|
|
|
|
|
.write_text_content(BytesText::new(math))?;
|
|
|
|
|
}
|
|
|
|
|
Token::Function {
|
|
|
|
|
inner,
|
|
|
|
|
name,
|
|
|
|
|
params,
|
|
|
|
|
} => {
|
|
|
|
|
let mut ew = writer
|
|
|
|
|
.create_element("fn")
|
|
|
|
|
.with_attribute(("name", name.as_str()));
|
|
|
|
|
|
|
|
|
|
for (k, v) in params {
|
|
|
|
|
ew = ew
|
|
|
|
|
.with_attribute((format!("arg-{k}").as_str(), v.as_deref().unwrap_or("")));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ew.write_inner_content(|w| inner.write(w))?;
|
|
|
|
|
}
|
|
|
|
|
Token::Mention {
|
|
|
|
|
name,
|
|
|
|
|
host,
|
|
|
|
|
mention_type,
|
|
|
|
|
} => {
|
|
|
|
|
let mut ew = writer
|
|
|
|
|
.create_element("mention")
|
|
|
|
|
.with_attribute(("name", name.as_str()))
|
|
|
|
|
.with_attribute(("type", mention_type.into()));
|
|
|
|
|
|
|
|
|
|
if let Some(host) = host {
|
|
|
|
|
ew = ew.with_attribute(("host", host.as_str()));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ew.write_empty()?;
|
|
|
|
|
}
|
|
|
|
|
Token::UnicodeEmoji(text) => {
|
|
|
|
|
writer
|
|
|
|
|
.create_element("ue")
|
|
|
|
|
.write_text_content(BytesText::new(text))?;
|
|
|
|
|
}
|
2023-10-30 22:00:46 +00:00
|
|
|
|
Token::ShortcodeEmoji { shortcode, host } => {
|
|
|
|
|
let mut ew = writer.create_element("ee");
|
|
|
|
|
|
|
|
|
|
if let Some(host) = host {
|
|
|
|
|
ew = ew.with_attribute(("host", host.as_str()));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ew.write_text_content(BytesText::new(shortcode))?;
|
2023-10-25 22:30:11 +00:00
|
|
|
|
}
|
|
|
|
|
Token::Hashtag(tag) => {
|
|
|
|
|
writer
|
|
|
|
|
.create_element("hashtag")
|
2023-10-29 11:52:20 +00:00
|
|
|
|
.write_text_content(BytesText::new(tag.as_str()))?;
|
2023-10-25 22:30:11 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn to_xml_string(token: &Token) -> quick_xml::Result<String> {
|
|
|
|
|
let mut writer = quick_xml::Writer::new(Cursor::new(Vec::new()));
|
|
|
|
|
writer
|
|
|
|
|
.create_element("mmm")
|
|
|
|
|
.write_inner_content(|writer| token.write(writer))?;
|
|
|
|
|
Ok(String::from_utf8(writer.into_inner().into_inner())?)
|
2023-10-01 21:04:32 +00:00
|
|
|
|
}
|
|
|
|
|
|
2024-05-22 01:42:50 +00:00
|
|
|
|
pub fn janky_is_line_begin(input: Span<'_>) -> bool {
|
|
|
|
|
let offset = input.location_offset();
|
|
|
|
|
|
|
|
|
|
// VERY BAD
|
|
|
|
|
// Safety: This is very janky, but hopefully will work as long as nom-locate keeps the invariant of fragments being subslices of the input
|
|
|
|
|
// We do this to avoid scanning the entire input for a line separator when we just need the previous byte
|
|
|
|
|
offset == 0 || unsafe {
|
|
|
|
|
let frag_bytes = input.fragment().as_bytes();
|
|
|
|
|
let frag_ptr = frag_bytes.as_ptr();
|
|
|
|
|
let prev_byte = frag_ptr.offset(-1);
|
|
|
|
|
matches!(*prev_byte, b'\n')
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-16 21:45:45 +00:00
|
|
|
|
#[derive(Debug, Default, Copy, Clone)]
|
|
|
|
|
pub struct SpanMeta {
|
|
|
|
|
depth: usize,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl SpanMeta {
|
|
|
|
|
fn new(depth: usize) -> Self {
|
|
|
|
|
Self { depth }
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type Span<'a> = LocatedSpan<&'a str, SpanMeta>;
|
2023-10-01 21:04:32 +00:00
|
|
|
|
|
|
|
|
|
trait SliceOffset {
|
|
|
|
|
fn up_to(&self, other: &Self) -> Self;
|
|
|
|
|
|
|
|
|
|
fn fragment_between<'a>(&self, other: &Self) -> &'a str
|
2024-09-05 15:22:54 +00:00
|
|
|
|
where
|
|
|
|
|
Self: 'a;
|
2023-10-01 21:04:32 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl SliceOffset for Span<'_> {
|
|
|
|
|
fn up_to(&self, other: &Self) -> Self {
|
|
|
|
|
self.slice(..self.offset(other))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn fragment_between<'a>(&self, other: &Self) -> &'a str
|
2024-09-05 15:22:54 +00:00
|
|
|
|
where
|
|
|
|
|
Self: 'a,
|
2023-10-01 21:04:32 +00:00
|
|
|
|
{
|
|
|
|
|
self.up_to(other).into_fragment()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-06 23:46:20 +00:00
|
|
|
|
#[inline]
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn boxing_token(func: impl Fn(Box<Token>) -> Token) -> impl Fn(Token) -> Token {
|
2023-10-06 23:46:20 +00:00
|
|
|
|
move |tokens| func(Box::new(tokens))
|
2023-10-01 21:04:32 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-10-07 17:44:39 +00:00
|
|
|
|
#[inline]
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn collect_sequence<T>(
|
|
|
|
|
func: impl Fn(Vec<T>) -> Token,
|
|
|
|
|
transform: impl Fn(Token) -> Token,
|
2024-05-22 01:42:50 +00:00
|
|
|
|
) -> impl Fn(&mut dyn Iterator<Item=T>) -> Token {
|
2023-10-07 17:44:39 +00:00
|
|
|
|
move |tokens| transform(func(tokens.collect()))
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-06 23:46:20 +00:00
|
|
|
|
#[inline]
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn collect_char_sequence(
|
|
|
|
|
func: impl Fn(String) -> Token,
|
2024-05-22 01:42:50 +00:00
|
|
|
|
) -> impl Fn(&mut dyn Iterator<Item=char>) -> Token {
|
2023-10-14 19:41:36 +00:00
|
|
|
|
move |chars| func(chars.collect())
|
2023-10-01 21:04:32 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-10-23 21:52:02 +00:00
|
|
|
|
#[inline]
|
|
|
|
|
fn space1_unicode(input: Span) -> IResult<Span, Span> {
|
|
|
|
|
recognize(many1_count(tuple((
|
|
|
|
|
not(line_ending),
|
|
|
|
|
satisfy(char::is_whitespace),
|
|
|
|
|
))))(input)
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-08 20:15:55 +00:00
|
|
|
|
#[inline]
|
|
|
|
|
fn alphanumeric1_unicode(input: Span) -> IResult<Span, Span> {
|
2024-05-22 01:42:50 +00:00
|
|
|
|
recognize(many1_count(char_alphanumeric_unicode))(input)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline]
|
|
|
|
|
fn char_alphanumeric_unicode(input: Span) -> IResult<Span, char> {
|
|
|
|
|
satisfy(char::is_alphanumeric)(input)
|
2023-10-08 20:15:55 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-10-01 21:04:32 +00:00
|
|
|
|
fn spliced<'a>(
|
|
|
|
|
segments: &[Span<'a>],
|
|
|
|
|
func: impl Fn(Span) -> IResult<Span, Token>,
|
|
|
|
|
parent: Span<'a>,
|
2023-10-14 19:41:36 +00:00
|
|
|
|
) -> IResult<Span<'a>, Token, nom::error::Error<Span<'a>>> {
|
2023-10-01 21:04:32 +00:00
|
|
|
|
let combined = segments
|
|
|
|
|
.iter()
|
|
|
|
|
.copied()
|
|
|
|
|
.map(Span::into_fragment)
|
2023-10-06 23:46:20 +00:00
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
|
.join("\n");
|
2023-10-01 21:04:32 +00:00
|
|
|
|
let cum_offset_combined = segments
|
|
|
|
|
.iter()
|
|
|
|
|
.scan(0, |acc, &x| {
|
|
|
|
|
*acc += x.len();
|
|
|
|
|
Some(*acc)
|
|
|
|
|
})
|
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
|
let current_seg = |input: Span| {
|
|
|
|
|
cum_offset_combined
|
|
|
|
|
.iter()
|
|
|
|
|
.enumerate()
|
2023-10-06 23:46:20 +00:00
|
|
|
|
.take_while(|(_, &o)| o > input.location_offset())
|
2023-10-01 21:04:32 +00:00
|
|
|
|
.map(|(i, o)| (segments[i], o))
|
|
|
|
|
.last()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
type NE<E> = nom::Err<E>;
|
|
|
|
|
type NomError<'x> = nom::error::Error<Span<'x>>;
|
|
|
|
|
|
2023-10-26 16:38:45 +00:00
|
|
|
|
let spliced_span = Span::new_extra(
|
2023-10-16 21:45:45 +00:00
|
|
|
|
&combined,
|
|
|
|
|
segments.first().map_or(SpanMeta::new(0), |s| s.extra),
|
|
|
|
|
);
|
2023-10-26 16:38:45 +00:00
|
|
|
|
let (input, inner) = match func(spliced_span) {
|
2023-10-14 19:41:36 +00:00
|
|
|
|
Ok(s) => s,
|
2023-10-01 21:04:32 +00:00
|
|
|
|
Err(e) => {
|
|
|
|
|
return match e {
|
|
|
|
|
NE::Error(e) => {
|
|
|
|
|
let offset_new = e.input.location_offset();
|
|
|
|
|
if let Some((seg_parent, offset_seg_new)) = current_seg(e.input) {
|
|
|
|
|
let offset = offset_new - offset_seg_new;
|
|
|
|
|
let offset_orig = offset + seg_parent.location_offset();
|
|
|
|
|
Err(NE::Error(NomError::new(
|
2023-10-16 21:45:45 +00:00
|
|
|
|
Span::new_extra(
|
|
|
|
|
&parent.into_fragment()[offset_orig..],
|
|
|
|
|
seg_parent.extra,
|
|
|
|
|
),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
e.code,
|
|
|
|
|
)))
|
|
|
|
|
} else {
|
|
|
|
|
// ???
|
|
|
|
|
Err(NE::Failure(NomError::new(parent, ErrorKind::Fail)))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
NE::Failure(e) => Err(NE::Error(NomError::new(parent, e.code))),
|
|
|
|
|
NE::Incomplete(i) => Err(NE::Incomplete(i)),
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let out = if let Some((seg_parent, offset_seg_new)) = current_seg(input) {
|
|
|
|
|
let offset = input.location_offset() - offset_seg_new;
|
|
|
|
|
let offset_orig = offset + seg_parent.location_offset();
|
|
|
|
|
parent.slice(offset_orig..)
|
|
|
|
|
} else {
|
|
|
|
|
parent
|
|
|
|
|
};
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
Ok((out, inner))
|
2023-10-01 21:04:32 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn space(input: Span) -> IResult<Span, Token> {
|
2023-10-05 17:09:26 +00:00
|
|
|
|
let (input, frag) = recognize(alt((one_char('\u{0020}'), one_char('\u{3000}'), tab)))(input)?;
|
|
|
|
|
Ok((input, Token::PlainText(frag.into_fragment().into())))
|
2023-10-01 21:04:32 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-10-08 20:15:55 +00:00
|
|
|
|
#[derive(Copy, Clone)]
|
|
|
|
|
struct Matcher<'a, 'b, T: Clone> {
|
2023-10-07 17:44:39 +00:00
|
|
|
|
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
collector: &'a (dyn Fn(&mut dyn Iterator<Item=T>) -> Token + 'a),
|
2023-10-08 20:15:55 +00:00
|
|
|
|
_phantom_closure: PhantomData<&'a ()>,
|
|
|
|
|
_phantom_data: PhantomData<&'b ()>,
|
|
|
|
|
_phantom_output: PhantomData<fn() -> T>,
|
2023-10-07 17:44:39 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-10-08 20:15:55 +00:00
|
|
|
|
impl<'a, 'b, T: Clone> Matcher<'a, 'b, T> {
|
2023-10-07 17:44:39 +00:00
|
|
|
|
fn new(
|
|
|
|
|
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
collector: &'a (dyn Fn(&mut dyn Iterator<Item=T>) -> Token + 'a),
|
2023-10-07 17:44:39 +00:00
|
|
|
|
) -> Self {
|
|
|
|
|
Self {
|
|
|
|
|
matcher_inner,
|
|
|
|
|
collector,
|
2023-10-08 20:15:55 +00:00
|
|
|
|
_phantom_closure: PhantomData,
|
|
|
|
|
_phantom_data: PhantomData,
|
|
|
|
|
_phantom_output: PhantomData,
|
2023-10-07 17:44:39 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<'a, 'b> Matcher<'a, 'b, Infallible> {
|
|
|
|
|
// Don't break this invariant, else a monster will come at night and eat all your socks
|
|
|
|
|
fn reject() -> Self {
|
|
|
|
|
Self {
|
|
|
|
|
matcher_inner: &fail::<_, Infallible, _>,
|
|
|
|
|
collector: &|_| unreachable!(),
|
2023-10-08 20:15:55 +00:00
|
|
|
|
_phantom_closure: PhantomData,
|
|
|
|
|
_phantom_data: PhantomData,
|
|
|
|
|
_phantom_output: PhantomData,
|
2023-10-07 17:44:39 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-08 20:15:55 +00:00
|
|
|
|
#[derive(Copy, Clone, Debug)]
|
|
|
|
|
enum FlankingRule {
|
|
|
|
|
Lenient,
|
|
|
|
|
Strict,
|
|
|
|
|
DontCare,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct FlankingDelim<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>>(
|
|
|
|
|
T,
|
|
|
|
|
FlankingRule,
|
|
|
|
|
PhantomData<&'a ()>,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
impl<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>> From<(T, FlankingRule)>
|
2024-05-22 01:42:50 +00:00
|
|
|
|
for FlankingDelim<'a, T>
|
2023-10-08 20:15:55 +00:00
|
|
|
|
{
|
|
|
|
|
fn from((func, rule): (T, FlankingRule)) -> Self {
|
|
|
|
|
FlankingDelim(func, rule, PhantomData)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>> From<T> for FlankingDelim<'a, T> {
|
|
|
|
|
fn from(func: T) -> Self {
|
|
|
|
|
FlankingDelim(func, FlankingRule::DontCare, PhantomData)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-16 21:45:45 +00:00
|
|
|
|
pub struct Context {
|
|
|
|
|
depth_limit: usize,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const DEFAULT_DEPTH_LIMIT: usize = 24;
|
|
|
|
|
|
|
|
|
|
impl Default for Context {
|
|
|
|
|
fn default() -> Self {
|
|
|
|
|
Context::new(DEFAULT_DEPTH_LIMIT)
|
|
|
|
|
}
|
|
|
|
|
}
|
2023-10-01 21:04:32 +00:00
|
|
|
|
|
|
|
|
|
impl Context {
|
2023-10-16 21:45:45 +00:00
|
|
|
|
pub fn new(depth_limit: usize) -> Self {
|
|
|
|
|
Self { depth_limit }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn parse_full(&self, input: &str) -> Token {
|
|
|
|
|
match self.full(Span::new_extra(input, SpanMeta::default())) {
|
|
|
|
|
Ok((_, t)) => t.merged(),
|
|
|
|
|
Err(e) => {
|
|
|
|
|
trace!(input = input, "Full parser fail: {:?}", e);
|
|
|
|
|
Token::PlainText(e.to_compact_string())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn parse_inline(&self, input: &str) -> Token {
|
|
|
|
|
match self.full(Span::new_extra(input, SpanMeta::default())) {
|
|
|
|
|
Ok((_, t)) => t.merged(),
|
|
|
|
|
Err(e) => {
|
|
|
|
|
trace!(input = input, "Inline parser fail: {:?}", e);
|
|
|
|
|
Token::PlainText(e.to_compact_string())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn parse_ui(&self, input: &str) -> Token {
|
|
|
|
|
match self.inline_ui(Span::new_extra(input, SpanMeta::default())) {
|
|
|
|
|
Ok((_, t)) => t.merged(),
|
|
|
|
|
Err(e) => {
|
|
|
|
|
trace!(input = input, "Inline parser fail: {:?}", e);
|
|
|
|
|
Token::PlainText(e.to_compact_string())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-26 19:08:51 +00:00
|
|
|
|
pub fn parse_profile_fields(&self, input: &str) -> Token {
|
|
|
|
|
match self.inline_profile_fields(Span::new_extra(input, SpanMeta::default())) {
|
|
|
|
|
Ok((_, t)) => t.merged(),
|
|
|
|
|
Err(e) => {
|
|
|
|
|
trace!(input = input, "Profile field parser fail: {:?}", e);
|
|
|
|
|
Token::PlainText(e.to_compact_string())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-04 17:44:27 +00:00
|
|
|
|
#[inline]
|
2023-10-08 20:15:55 +00:00
|
|
|
|
fn partial(
|
2023-10-01 21:04:32 +00:00
|
|
|
|
&self,
|
2023-10-14 19:41:36 +00:00
|
|
|
|
func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token> + 'static,
|
|
|
|
|
) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token> + '_ {
|
2023-10-01 21:04:32 +00:00
|
|
|
|
move |input| func(self, input)
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-16 21:45:45 +00:00
|
|
|
|
#[inline]
|
|
|
|
|
fn partial_span(
|
|
|
|
|
&self,
|
|
|
|
|
func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Span<'a>> + 'static,
|
|
|
|
|
) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>> + '_ {
|
|
|
|
|
move |input| func(self, input)
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
pub fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-12-30 18:23:32 +00:00
|
|
|
|
map(
|
|
|
|
|
many_till(self.partial(Self::full_single), eof).map(|v| v.0),
|
|
|
|
|
Token::Sequence,
|
|
|
|
|
)(input)
|
2023-10-01 21:04:32 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
pub fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-12-30 18:23:32 +00:00
|
|
|
|
map(
|
|
|
|
|
many_till(self.partial(Self::inline_single), eof).map(|v| v.0),
|
|
|
|
|
Token::Sequence,
|
|
|
|
|
)(input)
|
2023-10-05 22:17:52 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
pub fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-05 22:17:52 +00:00
|
|
|
|
map(
|
2023-12-30 18:23:32 +00:00
|
|
|
|
many_till(self.partial(Self::inline_label_safe_single), eof).map(|v| v.0),
|
2023-10-05 22:17:52 +00:00
|
|
|
|
Token::Sequence,
|
|
|
|
|
)(input)
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-26 19:08:51 +00:00
|
|
|
|
fn inline_profile_fields<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
|
|
|
|
map(
|
2023-12-30 18:23:32 +00:00
|
|
|
|
many_till(
|
|
|
|
|
alt((
|
|
|
|
|
self.partial(Self::unicode_emoji),
|
|
|
|
|
self.partial(Self::tag_mention),
|
|
|
|
|
self.partial(Self::tag_hashtag),
|
|
|
|
|
self.partial(Self::raw_url),
|
|
|
|
|
self.partial(Self::tag_raw_text),
|
|
|
|
|
)),
|
|
|
|
|
eof,
|
|
|
|
|
)
|
2024-05-22 01:42:50 +00:00
|
|
|
|
.map(|v| v.0),
|
2023-10-26 19:08:51 +00:00
|
|
|
|
Token::Sequence,
|
|
|
|
|
)(input)
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-16 21:45:45 +00:00
|
|
|
|
fn inline_ui<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
|
|
|
|
map(
|
2023-12-30 18:23:32 +00:00
|
|
|
|
many_till(
|
|
|
|
|
alt((
|
|
|
|
|
self.partial(Self::unicode_emoji),
|
|
|
|
|
self.partial(Self::shortcode_emoji),
|
|
|
|
|
self.partial(Self::tag_raw_text),
|
|
|
|
|
)),
|
|
|
|
|
eof,
|
|
|
|
|
)
|
2024-05-22 01:42:50 +00:00
|
|
|
|
.map(|v| v.0),
|
2023-10-16 21:45:45 +00:00
|
|
|
|
Token::Sequence,
|
|
|
|
|
)(input)
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-05 22:17:52 +00:00
|
|
|
|
alt((
|
|
|
|
|
self.partial(Self::tag_bold_asterisk),
|
|
|
|
|
self.partial(Self::tag_italic_asterisk),
|
|
|
|
|
self.partial(Self::tag_bold_underscore),
|
|
|
|
|
self.partial(Self::tag_italic_underscore),
|
|
|
|
|
))(input)
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-05 22:17:52 +00:00
|
|
|
|
let (input, token) = alt((
|
2023-10-16 21:45:45 +00:00
|
|
|
|
self.increase_nesting(alt((
|
|
|
|
|
self.partial(Self::unicode_emoji),
|
2023-10-07 18:40:01 +00:00
|
|
|
|
self.partial(Self::tag_block_center),
|
|
|
|
|
self.partial(Self::tag_small),
|
|
|
|
|
self.partial(Self::tag_plain),
|
|
|
|
|
self.partial(Self::tag_bold),
|
|
|
|
|
self.partial(Self::tag_italic),
|
|
|
|
|
self.partial(Self::tag_strikethrough),
|
2023-10-16 21:45:45 +00:00
|
|
|
|
self.partial(Self::url_no_embed),
|
|
|
|
|
self.partial(Self::base_bold_italic),
|
|
|
|
|
self.partial(Self::tag_block_code),
|
|
|
|
|
self.partial(Self::tag_inline_code),
|
|
|
|
|
self.partial(Self::tag_quote),
|
|
|
|
|
self.partial(Self::tag_block_math),
|
|
|
|
|
self.partial(Self::tag_inline_math),
|
|
|
|
|
self.partial(Self::tag_strikethrough_tilde),
|
|
|
|
|
self.partial(Self::tag_func),
|
|
|
|
|
self.partial(Self::tag_mention),
|
|
|
|
|
self.partial(Self::tag_hashtag),
|
|
|
|
|
self.partial(Self::shortcode_emoji),
|
|
|
|
|
self.partial(Self::link),
|
|
|
|
|
self.partial(Self::raw_url),
|
|
|
|
|
))),
|
2023-10-14 19:41:36 +00:00
|
|
|
|
self.partial(Self::tag_raw_text),
|
2023-10-05 22:17:52 +00:00
|
|
|
|
))(input)?;
|
2023-10-01 21:04:32 +00:00
|
|
|
|
Ok((input, token))
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn inline_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-16 21:45:45 +00:00
|
|
|
|
alt((
|
|
|
|
|
self.increase_nesting(alt((
|
|
|
|
|
self.partial(Self::unicode_emoji),
|
|
|
|
|
self.partial(Self::tag_small),
|
|
|
|
|
self.partial(Self::tag_plain),
|
|
|
|
|
self.partial(Self::tag_bold),
|
|
|
|
|
self.partial(Self::tag_italic),
|
|
|
|
|
self.partial(Self::tag_strikethrough),
|
|
|
|
|
self.partial(Self::url_no_embed),
|
|
|
|
|
self.partial(Self::base_bold_italic),
|
|
|
|
|
self.partial(Self::tag_inline_code),
|
|
|
|
|
self.partial(Self::tag_inline_math),
|
|
|
|
|
self.partial(Self::tag_strikethrough_tilde),
|
|
|
|
|
self.partial(Self::tag_func),
|
|
|
|
|
self.partial(Self::tag_mention),
|
|
|
|
|
self.partial(Self::tag_hashtag),
|
|
|
|
|
self.partial(Self::shortcode_emoji),
|
|
|
|
|
self.partial(Self::link),
|
|
|
|
|
self.partial(Self::raw_url),
|
|
|
|
|
))),
|
2023-10-14 19:41:36 +00:00
|
|
|
|
self.partial(Self::tag_raw_text),
|
2023-10-16 21:45:45 +00:00
|
|
|
|
))(input)
|
2023-10-05 22:17:52 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn inline_non_formatting_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-06 23:46:20 +00:00
|
|
|
|
let (input, token) = alt((
|
2023-10-16 21:45:45 +00:00
|
|
|
|
self.increase_nesting(alt((
|
|
|
|
|
self.partial(Self::unicode_emoji),
|
|
|
|
|
self.partial(Self::url_no_embed),
|
|
|
|
|
self.partial(Self::tag_inline_code),
|
|
|
|
|
self.partial(Self::tag_inline_math),
|
|
|
|
|
self.partial(Self::tag_func),
|
|
|
|
|
self.partial(Self::tag_mention),
|
|
|
|
|
self.partial(Self::tag_hashtag),
|
|
|
|
|
self.partial(Self::shortcode_emoji),
|
|
|
|
|
self.partial(Self::raw_url),
|
|
|
|
|
))),
|
2023-10-14 19:41:36 +00:00
|
|
|
|
self.partial(Self::tag_raw_text),
|
2023-10-06 23:46:20 +00:00
|
|
|
|
))(input)?;
|
|
|
|
|
Ok((input, token))
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-05 22:17:52 +00:00
|
|
|
|
let (input, token) = alt((
|
2023-10-16 21:45:45 +00:00
|
|
|
|
self.increase_nesting(alt((
|
|
|
|
|
self.partial(Self::unicode_emoji),
|
|
|
|
|
self.partial(Self::tag_small),
|
|
|
|
|
self.partial(Self::tag_plain),
|
|
|
|
|
self.partial(Self::tag_bold),
|
|
|
|
|
self.partial(Self::tag_italic),
|
|
|
|
|
self.partial(Self::tag_strikethrough),
|
|
|
|
|
self.partial(Self::base_bold_italic),
|
|
|
|
|
self.partial(Self::tag_strikethrough_tilde),
|
|
|
|
|
self.partial(Self::tag_func),
|
|
|
|
|
self.partial(Self::shortcode_emoji),
|
|
|
|
|
))),
|
2023-10-14 19:41:36 +00:00
|
|
|
|
self.partial(Self::tag_raw_text),
|
2023-10-05 22:17:52 +00:00
|
|
|
|
))(input)?;
|
2023-10-05 17:09:26 +00:00
|
|
|
|
Ok((input, token))
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn tag_quote<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-01 21:04:32 +00:00
|
|
|
|
let (input, leading_spaces) = tuple((opt(line_ending), opt(line_ending)))(input)?;
|
|
|
|
|
|
|
|
|
|
if let (None, None) = leading_spaces {
|
2024-05-22 01:42:50 +00:00
|
|
|
|
if !janky_is_line_begin(input) {
|
2023-10-01 21:04:32 +00:00
|
|
|
|
return fail(input);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let quote_line = |input| tuple((tag(">"), opt(space), not_line_ending))(input);
|
|
|
|
|
|
|
|
|
|
let orig_input = input;
|
|
|
|
|
let (input, lines) = separated_list1(line_ending, quote_line)(input)?;
|
|
|
|
|
|
|
|
|
|
let quote_lines = lines
|
|
|
|
|
.into_iter()
|
|
|
|
|
.map(|(_, _, text)| text)
|
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
|
|
|
|
|
|
if quote_lines.len() == 1
|
|
|
|
|
&& quote_lines
|
2024-05-22 01:42:50 +00:00
|
|
|
|
.iter()
|
|
|
|
|
.map(Span::fragment)
|
|
|
|
|
.copied()
|
|
|
|
|
.any(&str::is_empty)
|
2023-10-01 21:04:32 +00:00
|
|
|
|
{
|
|
|
|
|
return fail(input);
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-06 23:46:20 +00:00
|
|
|
|
let (_, inner) = spliced("e_lines, self.partial(Self::full), orig_input)?;
|
2023-10-01 21:04:32 +00:00
|
|
|
|
|
|
|
|
|
let (input, _) = tuple((opt(line_ending), opt(line_ending)))(input)?;
|
|
|
|
|
|
|
|
|
|
Ok((input, Token::Quote(Box::new(inner))))
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn tag_block_center<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-04 17:31:03 +00:00
|
|
|
|
let tag_start = &tag("<center>");
|
|
|
|
|
let tag_end = &tag("</center>");
|
|
|
|
|
|
|
|
|
|
let (input, _) = opt(line_ending)(input)?;
|
|
|
|
|
|
2024-05-22 01:42:50 +00:00
|
|
|
|
if !janky_is_line_begin(input) {
|
2023-10-04 17:31:03 +00:00
|
|
|
|
return fail(input);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let (input, _) = tag_start(input)?;
|
|
|
|
|
let (input, _) = opt(line_ending)(input)?;
|
|
|
|
|
|
2023-10-05 22:17:52 +00:00
|
|
|
|
let (input, (center_seq, _)) = many_till(
|
|
|
|
|
self.partial(Self::inline_single),
|
2023-10-07 17:44:39 +00:00
|
|
|
|
tuple((opt(space1), opt(line_ending), tag_end)),
|
2023-10-05 22:17:52 +00:00
|
|
|
|
)(input)?;
|
2023-10-04 17:31:03 +00:00
|
|
|
|
|
2023-10-06 23:46:20 +00:00
|
|
|
|
Ok((
|
|
|
|
|
input,
|
|
|
|
|
boxing_token(Token::Center)(Token::Sequence(center_seq)),
|
|
|
|
|
))
|
2023-10-04 17:31:03 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-05 20:25:29 +00:00
|
|
|
|
let delim = &tag("```");
|
|
|
|
|
|
|
|
|
|
let (input, _) = opt(line_ending)(input)?;
|
|
|
|
|
|
2024-05-22 01:42:50 +00:00
|
|
|
|
if !janky_is_line_begin(input) {
|
2023-10-05 20:25:29 +00:00
|
|
|
|
return fail(input);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let (input, _) = delim(input)?;
|
|
|
|
|
let (input, lang) = opt(map(
|
2023-10-25 22:30:11 +00:00
|
|
|
|
recognize(many1(tuple((not(delim), not(line_ending), anychar)))),
|
2023-10-05 20:25:29 +00:00
|
|
|
|
Span::into_fragment,
|
|
|
|
|
))(input)?;
|
|
|
|
|
let (input, _) = line_ending(input)?;
|
|
|
|
|
|
|
|
|
|
let (input, code) = map(
|
|
|
|
|
recognize(many1_count(tuple((
|
|
|
|
|
not(tuple((line_ending, delim))),
|
|
|
|
|
anychar,
|
|
|
|
|
)))),
|
|
|
|
|
Span::into_fragment,
|
|
|
|
|
)(input)?;
|
|
|
|
|
|
|
|
|
|
let (input, _) = line_ending(input)?;
|
|
|
|
|
let (input, _) = delim(input)?;
|
2023-10-25 22:30:11 +00:00
|
|
|
|
// Trailing whitespace after the triple backtick
|
|
|
|
|
let (input, _) = opt(space1_unicode)(input)?;
|
|
|
|
|
// If we got this far, the next character should be a line ending
|
|
|
|
|
let (input, _) = not(tuple((not(line_ending), anychar)))(input)?;
|
2023-10-05 20:25:29 +00:00
|
|
|
|
let (input, _) = opt(line_ending)(input)?;
|
|
|
|
|
|
|
|
|
|
Ok((
|
|
|
|
|
input,
|
|
|
|
|
Token::BlockCode {
|
|
|
|
|
lang: lang.map(<&str>::into),
|
|
|
|
|
inner: code.into(),
|
|
|
|
|
},
|
|
|
|
|
))
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn tag_block_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-04 17:44:27 +00:00
|
|
|
|
let start = &tag("\\[");
|
|
|
|
|
let end = &tag("\\]");
|
|
|
|
|
|
2023-10-04 17:31:03 +00:00
|
|
|
|
let (input, _) = opt(line_ending)(input)?;
|
|
|
|
|
|
2024-05-22 01:42:50 +00:00
|
|
|
|
if !janky_is_line_begin(input) {
|
2023-10-04 17:31:03 +00:00
|
|
|
|
return fail(input);
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-04 17:44:27 +00:00
|
|
|
|
let (input, _) = start(input)?;
|
2023-10-04 17:31:03 +00:00
|
|
|
|
let (input, _) = opt(line_ending)(input)?;
|
2023-12-30 19:02:25 +00:00
|
|
|
|
let (input, _) = opt(space1_unicode)(input)?;
|
2023-10-04 17:31:03 +00:00
|
|
|
|
|
2023-12-30 19:02:25 +00:00
|
|
|
|
let (input, math_span) = map(
|
|
|
|
|
many_till(anychar, tuple((opt(space1_unicode), opt(line_ending), end))),
|
|
|
|
|
|v| v.0,
|
|
|
|
|
)(input)?;
|
2023-10-04 17:31:03 +00:00
|
|
|
|
|
2023-10-25 22:30:11 +00:00
|
|
|
|
// Trailing whitespace after the closing delim
|
|
|
|
|
let (input, _) = opt(space1_unicode)(input)?;
|
|
|
|
|
// If we got this far, the next character should be a line ending
|
|
|
|
|
let (input, _) = not(tuple((not(line_ending), anychar)))(input)?;
|
2023-10-04 17:31:03 +00:00
|
|
|
|
let (input, _) = opt(line_ending)(input)?;
|
|
|
|
|
|
|
|
|
|
Ok((
|
|
|
|
|
input,
|
2023-12-30 19:02:25 +00:00
|
|
|
|
Token::BlockMath(math_span.into_iter().collect::<String>()),
|
2023-10-04 17:31:03 +00:00
|
|
|
|
))
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-06 23:46:20 +00:00
|
|
|
|
#[inline]
|
2023-10-08 20:15:55 +00:00
|
|
|
|
fn tag_delimited<'a, 'b: 'a, T: Clone, S: Clone, FOpen, FClose>(
|
2023-10-01 21:04:32 +00:00
|
|
|
|
&'a self,
|
2023-10-08 20:15:55 +00:00
|
|
|
|
opening_tag: impl Into<FlankingDelim<'b, FOpen>> + 'a,
|
|
|
|
|
closing_tag: impl Into<FlankingDelim<'b, FClose>> + 'a,
|
2023-10-01 21:04:32 +00:00
|
|
|
|
escape: bool,
|
2023-10-07 17:44:39 +00:00
|
|
|
|
matcher: Matcher<'a, 'b, T>,
|
|
|
|
|
fallback: Matcher<'a, 'b, S>,
|
2024-09-05 15:22:54 +00:00
|
|
|
|
) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token> + 'a
|
|
|
|
|
where
|
|
|
|
|
FOpen: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
|
|
|
|
|
FClose: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
|
2023-10-08 20:15:55 +00:00
|
|
|
|
{
|
|
|
|
|
let FlankingDelim(opening_tag, opening_rule, ..) = opening_tag.into();
|
|
|
|
|
let FlankingDelim(closing_tag, closing_rule, ..) = closing_tag.into();
|
|
|
|
|
|
2023-10-01 21:04:32 +00:00
|
|
|
|
move |input| {
|
2024-03-13 13:02:34 +00:00
|
|
|
|
if let FlankingRule::Strict = opening_rule {
|
|
|
|
|
let (input, pre) = opt(recognize(tuple((
|
2024-05-22 01:42:50 +00:00
|
|
|
|
char_alphanumeric_unicode,
|
2024-03-13 13:02:34 +00:00
|
|
|
|
opt(tag("\\")),
|
|
|
|
|
&opening_tag,
|
2024-03-13 13:31:05 +00:00
|
|
|
|
peek(not(alt((recognize(satisfy(|c| c.is_whitespace())), eof)))),
|
2024-03-13 13:02:34 +00:00
|
|
|
|
))))(input)?;
|
|
|
|
|
|
|
|
|
|
if let Some(pre_text) = pre {
|
|
|
|
|
return Ok((input, Token::PlainText(pre_text.into_fragment().into())));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-01 21:04:32 +00:00
|
|
|
|
if escape {
|
2023-10-07 17:44:39 +00:00
|
|
|
|
if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), &opening_tag))(input) {
|
|
|
|
|
return Ok((
|
|
|
|
|
input_escaped,
|
2023-10-14 19:41:36 +00:00
|
|
|
|
Token::PlainText(mark.fragment().to_string().into()),
|
2023-10-07 17:44:39 +00:00
|
|
|
|
));
|
2023-10-01 21:04:32 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let begin = input;
|
|
|
|
|
let (post_open, _) = opening_tag(input)?;
|
|
|
|
|
|
|
|
|
|
let res = tuple((
|
2023-10-07 17:44:39 +00:00
|
|
|
|
many1(tuple((not(&closing_tag), &matcher.matcher_inner))),
|
|
|
|
|
&closing_tag,
|
2023-10-01 21:04:32 +00:00
|
|
|
|
))(post_open);
|
|
|
|
|
|
2024-03-13 13:02:34 +00:00
|
|
|
|
if let Err(nom::Err::Error(nom::error::Error { .. })) = res {
|
2023-10-06 23:46:20 +00:00
|
|
|
|
let res_fallback = tuple((
|
2023-10-07 17:44:39 +00:00
|
|
|
|
many1(tuple((not(&closing_tag), &fallback.matcher_inner))),
|
|
|
|
|
&closing_tag,
|
2023-10-06 23:46:20 +00:00
|
|
|
|
))(post_open);
|
|
|
|
|
|
|
|
|
|
if res_fallback.is_err() {
|
|
|
|
|
return Ok((
|
2024-03-13 13:02:34 +00:00
|
|
|
|
post_open,
|
|
|
|
|
Token::PlainText(begin.fragment_between(&post_open).into()),
|
2023-10-06 23:46:20 +00:00
|
|
|
|
));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let (input, (inner, closing)) = res_fallback.unwrap();
|
2023-10-07 17:44:39 +00:00
|
|
|
|
let mut inner = inner.into_iter().map(|(_, t)| t);
|
2023-10-06 23:46:20 +00:00
|
|
|
|
|
2023-10-01 21:04:32 +00:00
|
|
|
|
return Ok((
|
2023-10-06 23:46:20 +00:00
|
|
|
|
input,
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText(begin.fragment_between(&post_open).into()),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
(fallback.collector)(&mut inner),
|
2023-10-06 23:46:20 +00:00
|
|
|
|
Token::PlainText(closing.into_fragment().into()),
|
|
|
|
|
]),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
));
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-08 20:15:55 +00:00
|
|
|
|
let (input, (inner, closing)) = res?;
|
2023-10-07 17:44:39 +00:00
|
|
|
|
let mut inner = inner.into_iter().map(|(_, t)| t);
|
2023-10-01 21:04:32 +00:00
|
|
|
|
|
2023-10-08 20:15:55 +00:00
|
|
|
|
let inner_tok = (matcher.collector)(&mut inner);
|
|
|
|
|
|
|
|
|
|
let correct_left_flanking =
|
|
|
|
|
if let FlankingRule::Lenient | FlankingRule::Strict = opening_rule {
|
|
|
|
|
let text_left = inner_tok.str_content_left();
|
|
|
|
|
|
|
|
|
|
!(text_left.is_some_and(|s| s.starts_with(char::is_whitespace))
|
|
|
|
|
|| text_left.is_none())
|
|
|
|
|
} else {
|
|
|
|
|
true
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let correct_right_flanking =
|
|
|
|
|
if let FlankingRule::Lenient | FlankingRule::Strict = closing_rule {
|
|
|
|
|
let text_right = inner_tok.str_content_right();
|
|
|
|
|
!(text_right.is_some_and(|s| s.ends_with(char::is_whitespace))
|
|
|
|
|
|| text_right.is_none())
|
|
|
|
|
} else {
|
|
|
|
|
true
|
|
|
|
|
};
|
|
|
|
|
|
2023-10-23 22:27:54 +00:00
|
|
|
|
let (input, alphanum) = opt(peek(alphanumeric1_unicode))(input)?;
|
|
|
|
|
let correct_right_outer =
|
|
|
|
|
alphanum.is_none() || !matches!(closing_rule, FlankingRule::Strict);
|
|
|
|
|
|
|
|
|
|
let correct_flanking =
|
|
|
|
|
correct_left_flanking && correct_right_flanking && correct_right_outer;
|
2023-10-08 20:15:55 +00:00
|
|
|
|
|
|
|
|
|
if !correct_flanking {
|
|
|
|
|
return Ok((
|
|
|
|
|
input,
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText(begin.fragment_between(&post_open).into()),
|
2023-10-14 19:41:36 +00:00
|
|
|
|
inner_tok.inner(),
|
2023-10-08 20:15:55 +00:00
|
|
|
|
Token::PlainText(closing.into_fragment().into()),
|
|
|
|
|
]),
|
|
|
|
|
));
|
|
|
|
|
}
|
|
|
|
|
Ok((input, Token::Sequence(vec![inner_tok])))
|
2023-10-01 21:04:32 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn tag_func<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-04 17:31:03 +00:00
|
|
|
|
let (input, _) = tag("$[")(input)?;
|
|
|
|
|
|
|
|
|
|
let func_ident = |input| {
|
|
|
|
|
recognize(tuple((
|
|
|
|
|
many1_count(alt((alpha1, tag("_")))),
|
|
|
|
|
many0_count(alt((alphanumeric1, tag("_")))),
|
|
|
|
|
)))(input)
|
|
|
|
|
};
|
|
|
|
|
|
2023-10-25 22:30:11 +00:00
|
|
|
|
let arg_value = recognize(many1_count(alt((
|
2023-10-04 17:31:03 +00:00
|
|
|
|
alphanumeric1,
|
|
|
|
|
tag("."),
|
|
|
|
|
tag("-"),
|
|
|
|
|
tag("_"),
|
|
|
|
|
))));
|
|
|
|
|
|
2023-10-05 22:17:52 +00:00
|
|
|
|
let (input, func_name) = map(func_ident, Span::into_fragment)(input)?;
|
2023-10-04 17:31:03 +00:00
|
|
|
|
|
2023-10-25 22:30:11 +00:00
|
|
|
|
let arg = tuple((func_ident, opt(tuple((tag("="), arg_value)))));
|
2023-10-04 17:31:03 +00:00
|
|
|
|
|
|
|
|
|
let (input, args) =
|
|
|
|
|
opt(tuple((one_char('.'), separated_list1(one_char(','), arg))))(input)?;
|
|
|
|
|
|
|
|
|
|
let args_out = args.map_or_else(HashMap::new, |(_, items)| {
|
|
|
|
|
items
|
|
|
|
|
.into_iter()
|
|
|
|
|
.map(|(k, v)| {
|
|
|
|
|
(
|
2023-10-14 19:41:36 +00:00
|
|
|
|
k.into_fragment().to_string(),
|
|
|
|
|
v.map(|(_, val)| val.into_fragment().to_string()),
|
2023-10-04 17:31:03 +00:00
|
|
|
|
)
|
|
|
|
|
})
|
|
|
|
|
.collect::<HashMap<_, _>>()
|
|
|
|
|
});
|
|
|
|
|
|
2023-10-05 22:17:52 +00:00
|
|
|
|
let (input, _) = opt(space)(input)?;
|
2023-10-04 17:31:03 +00:00
|
|
|
|
|
2023-10-05 22:17:52 +00:00
|
|
|
|
let (input, (inner, _)) = many_till(self.partial(Self::inline_single), tag("]"))(input)?;
|
2023-10-04 17:31:03 +00:00
|
|
|
|
|
|
|
|
|
Ok((
|
|
|
|
|
input,
|
|
|
|
|
Token::Function {
|
2023-10-14 19:41:36 +00:00
|
|
|
|
name: func_name.to_string(),
|
2023-10-04 17:31:03 +00:00
|
|
|
|
params: args_out,
|
2023-10-05 22:17:52 +00:00
|
|
|
|
inner: Box::new(Token::Sequence(inner)),
|
2023-10-04 17:31:03 +00:00
|
|
|
|
},
|
|
|
|
|
))
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn tag_plain<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-08 20:15:55 +00:00
|
|
|
|
let opening_tag = &tag("<plain>");
|
|
|
|
|
let closing_tag = &tag("</plain>");
|
2023-10-05 20:12:51 +00:00
|
|
|
|
|
|
|
|
|
let (input, _) = opening_tag(input)?;
|
|
|
|
|
let (input, text) = map(
|
2023-10-08 20:15:55 +00:00
|
|
|
|
recognize(many1(tuple((not(line_ending), not(closing_tag), anychar)))),
|
2023-10-05 20:12:51 +00:00
|
|
|
|
Span::into_fragment,
|
|
|
|
|
)(input)?;
|
|
|
|
|
let (input, _) = closing_tag(input)?;
|
|
|
|
|
|
|
|
|
|
Ok((input, Token::PlainTag(text.into())))
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-01 21:04:32 +00:00
|
|
|
|
self.tag_delimited(
|
2023-10-08 20:15:55 +00:00
|
|
|
|
tag_no_case("<small>"),
|
|
|
|
|
tag_no_case("</small>"),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
false,
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Matcher::new(
|
|
|
|
|
&self.partial(Self::inline_single),
|
|
|
|
|
&collect_sequence(Token::Sequence, boxing_token(Token::Small)),
|
|
|
|
|
),
|
|
|
|
|
Matcher::new(
|
|
|
|
|
&self.partial(Self::inline_non_formatting_single),
|
|
|
|
|
&collect_sequence(Token::Sequence, identity),
|
|
|
|
|
),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
)(input)
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-01 21:04:32 +00:00
|
|
|
|
self.tag_delimited(
|
2023-10-08 20:15:55 +00:00
|
|
|
|
tag_no_case("<b>"),
|
|
|
|
|
tag_no_case("</b>"),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
false,
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Matcher::new(
|
|
|
|
|
&self.partial(Self::inline_single),
|
|
|
|
|
&collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
|
|
|
|
|
),
|
|
|
|
|
Matcher::new(
|
|
|
|
|
&self.partial(Self::inline_non_formatting_single),
|
|
|
|
|
&collect_sequence(Token::Sequence, identity),
|
|
|
|
|
),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
)(input)
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-01 21:04:32 +00:00
|
|
|
|
self.tag_delimited(
|
2023-10-08 20:15:55 +00:00
|
|
|
|
(tag("**"), FlankingRule::Lenient),
|
|
|
|
|
(tag("**"), FlankingRule::Lenient),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
true,
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Matcher::new(
|
|
|
|
|
&self.partial(Self::inline_single),
|
|
|
|
|
&collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
|
|
|
|
|
),
|
|
|
|
|
Matcher::new(
|
|
|
|
|
&self.partial(Self::inline_non_formatting_single),
|
|
|
|
|
&collect_sequence(Token::Sequence, identity),
|
|
|
|
|
),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
)(input)
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-01 21:04:32 +00:00
|
|
|
|
self.tag_delimited(
|
2023-10-08 20:15:55 +00:00
|
|
|
|
(tag("__"), FlankingRule::Strict),
|
|
|
|
|
(tag("__"), FlankingRule::Strict),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
true,
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Matcher::new(
|
|
|
|
|
&self.partial(Self::inline_single),
|
|
|
|
|
&collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
|
|
|
|
|
),
|
|
|
|
|
Matcher::new(
|
|
|
|
|
&self.partial(Self::inline_non_formatting_single),
|
|
|
|
|
&collect_sequence(Token::Sequence, identity),
|
|
|
|
|
),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
)(input)
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-01 21:04:32 +00:00
|
|
|
|
self.tag_delimited(
|
2023-10-08 20:15:55 +00:00
|
|
|
|
tag_no_case("<i>"),
|
|
|
|
|
tag_no_case("</i>"),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
false,
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Matcher::new(
|
|
|
|
|
&self.partial(Self::inline_single),
|
|
|
|
|
&collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
|
|
|
|
|
),
|
|
|
|
|
Matcher::new(
|
|
|
|
|
&self.partial(Self::inline_non_formatting_single),
|
|
|
|
|
&collect_sequence(Token::Sequence, identity),
|
|
|
|
|
),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
)(input)
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-01 21:04:32 +00:00
|
|
|
|
self.tag_delimited(
|
2023-10-08 20:15:55 +00:00
|
|
|
|
(tag("*"), FlankingRule::Lenient),
|
|
|
|
|
(tag("*"), FlankingRule::Lenient),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
true,
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Matcher::new(
|
|
|
|
|
&self.partial(Self::inline_single),
|
|
|
|
|
&collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
|
|
|
|
|
),
|
|
|
|
|
Matcher::new(
|
|
|
|
|
&self.partial(Self::inline_non_formatting_single),
|
|
|
|
|
&collect_sequence(Token::Sequence, identity),
|
|
|
|
|
),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
)(input)
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-01 21:04:32 +00:00
|
|
|
|
self.tag_delimited(
|
2023-10-08 20:15:55 +00:00
|
|
|
|
(tag("_"), FlankingRule::Strict),
|
|
|
|
|
(tag("_"), FlankingRule::Strict),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
true,
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Matcher::new(
|
|
|
|
|
&self.partial(Self::inline_single),
|
|
|
|
|
&collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
|
|
|
|
|
),
|
|
|
|
|
Matcher::new(
|
|
|
|
|
&self.partial(Self::inline_non_formatting_single),
|
|
|
|
|
&collect_sequence(Token::Sequence, identity),
|
|
|
|
|
),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
)(input)
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-01 21:04:32 +00:00
|
|
|
|
self.tag_delimited(
|
2023-10-08 20:15:55 +00:00
|
|
|
|
tag_no_case("<s>"),
|
|
|
|
|
tag_no_case("</s>"),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
false,
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Matcher::new(
|
|
|
|
|
&self.partial(Self::inline_single),
|
|
|
|
|
&collect_sequence(Token::Sequence, boxing_token(Token::Strikethrough)),
|
|
|
|
|
),
|
|
|
|
|
Matcher::new(
|
|
|
|
|
&self.partial(Self::inline_non_formatting_single),
|
|
|
|
|
&collect_sequence(Token::Sequence, identity),
|
|
|
|
|
),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
)(input)
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-01 21:04:32 +00:00
|
|
|
|
self.tag_delimited(
|
2023-10-08 20:15:55 +00:00
|
|
|
|
(tag("~~"), FlankingRule::Lenient),
|
|
|
|
|
(tag("~~"), FlankingRule::Lenient),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
true,
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Matcher::new(
|
|
|
|
|
&move |input| {
|
|
|
|
|
map(
|
2024-05-22 01:42:50 +00:00
|
|
|
|
tuple((not(line_ending), self.partial(Self::inline_single))),
|
2023-10-07 17:44:39 +00:00
|
|
|
|
|(_, captured)| captured,
|
|
|
|
|
)(input)
|
|
|
|
|
},
|
|
|
|
|
&collect_sequence(Token::Sequence, boxing_token(Token::Strikethrough)),
|
|
|
|
|
),
|
|
|
|
|
Matcher::new(
|
|
|
|
|
&move |input| {
|
|
|
|
|
map(
|
|
|
|
|
tuple((
|
2024-05-22 01:42:50 +00:00
|
|
|
|
not(line_ending),
|
2023-10-07 17:44:39 +00:00
|
|
|
|
self.partial(Self::inline_non_formatting_single),
|
|
|
|
|
)),
|
|
|
|
|
|(_, captured)| captured,
|
|
|
|
|
)(input)
|
|
|
|
|
},
|
|
|
|
|
&collect_sequence(Token::Sequence, identity),
|
|
|
|
|
),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
)(input)
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-01 21:04:32 +00:00
|
|
|
|
self.tag_delimited(
|
2023-10-07 17:44:39 +00:00
|
|
|
|
tag("`"),
|
|
|
|
|
|input| alt((tag("`"), tag("´")))(input),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
true,
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Matcher::new(
|
|
|
|
|
&move |input| {
|
|
|
|
|
map(
|
|
|
|
|
tuple((not(alt((tag("`"), tag("´"), line_ending))), anychar)),
|
|
|
|
|
|(_, captured)| captured,
|
|
|
|
|
)(input)
|
|
|
|
|
},
|
|
|
|
|
&collect_char_sequence(Token::InlineCode),
|
|
|
|
|
),
|
|
|
|
|
Matcher::reject(),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
)(input)
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-01 21:04:32 +00:00
|
|
|
|
self.tag_delimited(
|
2023-10-07 17:44:39 +00:00
|
|
|
|
tag("\\("),
|
|
|
|
|
tag("\\)"),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
false,
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Matcher::new(
|
|
|
|
|
&move |input| {
|
|
|
|
|
map(tuple((not(line_ending), anychar)), |(_, captured)| captured)(input)
|
|
|
|
|
},
|
|
|
|
|
&collect_char_sequence(Token::InlineMath),
|
|
|
|
|
),
|
|
|
|
|
Matcher::reject(),
|
2023-10-01 21:04:32 +00:00
|
|
|
|
)(input)
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn tag_raw_text<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
|
|
|
|
let (input, text) = anychar(input)?;
|
|
|
|
|
Ok((input, Token::PlainText(text.to_compact_string())))
|
2023-10-01 21:04:32 +00:00
|
|
|
|
}
|
2023-10-05 17:09:26 +00:00
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn raw_url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-05 17:09:26 +00:00
|
|
|
|
let (input, url_span) = recognize(tuple((
|
2023-10-16 21:45:45 +00:00
|
|
|
|
self.partial_span(Self::protocol),
|
|
|
|
|
self.url_chars(
|
|
|
|
|
|input| recognize(not(self.partial_span(Self::url_chars_base)))(input),
|
|
|
|
|
false,
|
|
|
|
|
),
|
2023-10-05 17:09:26 +00:00
|
|
|
|
)))(input)?;
|
|
|
|
|
|
|
|
|
|
let url = url_span.into_fragment();
|
|
|
|
|
let url_bytes = url.as_bytes();
|
|
|
|
|
|
|
|
|
|
// Strip punctuation at the end of sentences that might have been consumed as a part of the URL
|
|
|
|
|
let final_url = if matches!(url_bytes.last(), Some(b'.' | b',' | b'?')) {
|
|
|
|
|
url.slice(..url.len() - 1)
|
|
|
|
|
} else {
|
|
|
|
|
url
|
|
|
|
|
};
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
Ok((input, Token::UrlRaw(final_url.to_string())))
|
2023-10-05 17:09:26 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn url_no_embed<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-05 17:09:26 +00:00
|
|
|
|
let (input, _) = tag("<")(input)?;
|
2023-10-16 21:45:45 +00:00
|
|
|
|
let (input, url_span) = recognize(tuple((
|
|
|
|
|
self.partial_span(Self::protocol),
|
|
|
|
|
self.url_chars(tag(">"), true),
|
|
|
|
|
)))(input)?;
|
2023-10-05 17:09:26 +00:00
|
|
|
|
let (input, _) = tag(">")(input)?;
|
|
|
|
|
|
2023-10-07 19:22:21 +00:00
|
|
|
|
Ok((
|
|
|
|
|
input,
|
2023-10-14 19:41:36 +00:00
|
|
|
|
Token::UrlNoEmbed(url_span.into_fragment().to_string()),
|
2023-10-07 19:22:21 +00:00
|
|
|
|
))
|
2023-10-05 17:09:26 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-05 17:09:26 +00:00
|
|
|
|
let (input, no_embed) = opt(tag("?"))(input)?;
|
|
|
|
|
let (input, _) = tag("[")(input)?;
|
|
|
|
|
let (input, _) = not(tag("["))(input)?;
|
2023-10-07 18:40:01 +00:00
|
|
|
|
let (input, (label_tok, _)) =
|
|
|
|
|
many_till(self.partial(Self::inline_label_safe_single), tag("]("))(input)?;
|
2023-10-16 21:45:45 +00:00
|
|
|
|
let (input, url_span) = recognize(tuple((
|
|
|
|
|
self.partial_span(Self::protocol),
|
|
|
|
|
self.url_chars(tag(")"), true),
|
|
|
|
|
)))(input)?;
|
2023-10-05 17:09:26 +00:00
|
|
|
|
let (input, _) = tag(")")(input)?;
|
|
|
|
|
|
|
|
|
|
Ok((
|
|
|
|
|
input,
|
|
|
|
|
Token::Link {
|
2023-10-07 18:40:01 +00:00
|
|
|
|
label: Box::new(Token::Sequence(label_tok)),
|
2023-10-05 17:09:26 +00:00
|
|
|
|
href: url_span.into_fragment().into(),
|
|
|
|
|
embed: no_embed.is_none(),
|
|
|
|
|
},
|
|
|
|
|
))
|
|
|
|
|
}
|
2023-10-05 18:05:03 +00:00
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn unicode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-05 19:21:23 +00:00
|
|
|
|
let frag = input.fragment();
|
|
|
|
|
let Some(grapheme) = frag.graphemes(true).next() else {
|
|
|
|
|
return fail(input);
|
|
|
|
|
};
|
|
|
|
|
|
2024-09-05 15:22:54 +00:00
|
|
|
|
let grapheme = grapheme.trim_end_matches(['\u{200c}', '\u{200d}']);
|
2023-10-07 17:44:39 +00:00
|
|
|
|
|
2023-10-05 19:21:23 +00:00
|
|
|
|
let emoji = emojis::get(grapheme);
|
|
|
|
|
|
|
|
|
|
if emoji.is_none() {
|
|
|
|
|
return fail(input);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok((
|
|
|
|
|
input.slice(grapheme.len()..),
|
|
|
|
|
Token::UnicodeEmoji(grapheme.into()),
|
|
|
|
|
))
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-22 01:42:50 +00:00
|
|
|
|
fn shortcode_emoji_inner<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-05 20:32:53 +00:00
|
|
|
|
let (input, _) = tag(":")(input)?;
|
|
|
|
|
let (input, shortcode) = map(
|
2023-10-08 20:15:55 +00:00
|
|
|
|
recognize(many1(alt((
|
|
|
|
|
alphanumeric1_unicode,
|
|
|
|
|
recognize(one_of("_+-")),
|
|
|
|
|
)))),
|
2023-10-05 20:32:53 +00:00
|
|
|
|
Span::into_fragment,
|
|
|
|
|
)(input)?;
|
2023-10-30 22:00:46 +00:00
|
|
|
|
let (input, host) = opt(map(
|
|
|
|
|
tuple((
|
|
|
|
|
tag("@"),
|
|
|
|
|
map(
|
|
|
|
|
recognize(many1(alt((alphanumeric1, recognize(one_of("-.")))))),
|
|
|
|
|
Span::into_fragment,
|
|
|
|
|
),
|
|
|
|
|
)),
|
|
|
|
|
|(_at, host)| host,
|
|
|
|
|
))(input)?;
|
2023-10-05 20:32:53 +00:00
|
|
|
|
let (input, _) = tag(":")(input)?;
|
2023-10-08 20:15:55 +00:00
|
|
|
|
let (input, _) = not(alphanumeric1_unicode)(input)?;
|
2023-10-05 20:32:53 +00:00
|
|
|
|
|
2023-10-30 22:00:46 +00:00
|
|
|
|
Ok((
|
|
|
|
|
input,
|
|
|
|
|
Token::ShortcodeEmoji {
|
|
|
|
|
shortcode: shortcode.into(),
|
|
|
|
|
host: host.map(str::to_string),
|
|
|
|
|
},
|
|
|
|
|
))
|
2023-10-05 20:32:53 +00:00
|
|
|
|
}
|
|
|
|
|
|
2024-05-22 01:42:50 +00:00
|
|
|
|
fn shortcode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-08 20:15:55 +00:00
|
|
|
|
if let (plain_out, Some(plain)) = map(
|
|
|
|
|
opt(recognize(tuple((
|
2024-05-22 01:42:50 +00:00
|
|
|
|
char_alphanumeric_unicode,
|
|
|
|
|
self.partial(Self::shortcode_emoji_inner),
|
2023-10-08 20:15:55 +00:00
|
|
|
|
)))),
|
|
|
|
|
|o| o.map(Span::into_fragment),
|
|
|
|
|
)(input)?
|
|
|
|
|
{
|
|
|
|
|
return Ok((plain_out, Token::PlainText(plain.into())));
|
|
|
|
|
}
|
2023-10-05 18:05:03 +00:00
|
|
|
|
|
2024-05-22 01:42:50 +00:00
|
|
|
|
self.shortcode_emoji_inner(input)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn tag_mention_inner<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-05 18:05:03 +00:00
|
|
|
|
let tags = one_of("@!");
|
|
|
|
|
let (input, mention_type) = map(tags, |c| match c {
|
|
|
|
|
'@' => MentionType::User,
|
|
|
|
|
'!' => MentionType::Community,
|
|
|
|
|
_ => unreachable!(),
|
|
|
|
|
})(input)?;
|
|
|
|
|
|
|
|
|
|
let (input, name) = map(
|
2024-09-05 15:22:54 +00:00
|
|
|
|
recognize(many1(alt((alphanumeric1, recognize(one_of("-_.")))))),
|
2023-10-05 18:05:03 +00:00
|
|
|
|
Span::into_fragment,
|
|
|
|
|
)(input)?;
|
|
|
|
|
|
2023-10-07 18:40:01 +00:00
|
|
|
|
let before = input;
|
2023-10-26 19:23:59 +00:00
|
|
|
|
let (_, host_opt) = opt(tuple((
|
|
|
|
|
one_of(if matches!(mention_type, MentionType::User) {
|
|
|
|
|
"@:"
|
|
|
|
|
} else {
|
|
|
|
|
"@"
|
|
|
|
|
}),
|
|
|
|
|
map(
|
|
|
|
|
recognize(many1(alt((alphanumeric1, recognize(one_of("-_.")))))),
|
|
|
|
|
Span::into_fragment,
|
|
|
|
|
),
|
|
|
|
|
)))(input)?;
|
2023-10-05 18:05:03 +00:00
|
|
|
|
|
2023-10-26 19:23:59 +00:00
|
|
|
|
// Promote tags with a colon separator to Matrix handles
|
|
|
|
|
let mention_type = if let Some((':', _)) = host_opt {
|
|
|
|
|
MentionType::MatrixUser
|
|
|
|
|
} else {
|
|
|
|
|
mention_type
|
|
|
|
|
};
|
|
|
|
|
let host =
|
2024-09-05 15:22:54 +00:00
|
|
|
|
host_opt.map(|(_, name)| name.trim_end_matches(['.', '-', '_']));
|
2023-10-26 19:08:51 +00:00
|
|
|
|
let input = host.map(|c| before.slice(c.len() + 1..)).unwrap_or(before);
|
|
|
|
|
|
2023-10-05 18:05:03 +00:00
|
|
|
|
Ok((
|
2023-10-26 19:08:51 +00:00
|
|
|
|
input,
|
2023-10-05 18:05:03 +00:00
|
|
|
|
Token::Mention {
|
|
|
|
|
mention_type,
|
|
|
|
|
name: name.into(),
|
|
|
|
|
host: host.map(|h| h.into()),
|
|
|
|
|
},
|
|
|
|
|
))
|
|
|
|
|
}
|
2023-10-05 20:02:46 +00:00
|
|
|
|
|
2024-05-22 01:42:50 +00:00
|
|
|
|
fn tag_mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
|
|
|
|
if let (plain_out, Some(plain)) = map(
|
|
|
|
|
opt(recognize(tuple((
|
|
|
|
|
alt((tag("\\"), recognize(char_alphanumeric_unicode))),
|
|
|
|
|
self.partial(Self::tag_mention_inner),
|
|
|
|
|
)))),
|
|
|
|
|
|o| o.map(Span::into_fragment),
|
|
|
|
|
)(input)?
|
|
|
|
|
{
|
|
|
|
|
return Ok((plain_out, Token::PlainText(plain.into())));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.tag_mention_inner(input)
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-14 19:41:36 +00:00
|
|
|
|
fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
2023-10-23 21:52:02 +00:00
|
|
|
|
let (input, maybe_preceded) =
|
2024-05-22 01:42:50 +00:00
|
|
|
|
opt(recognize(tuple((char_alphanumeric_unicode, tag("#")))))(input)?;
|
2023-10-23 21:52:02 +00:00
|
|
|
|
|
|
|
|
|
if let Some(preceded) = maybe_preceded {
|
|
|
|
|
return Ok((input, Token::PlainText(preceded.into_fragment().into())));
|
|
|
|
|
}
|
2023-10-05 20:02:46 +00:00
|
|
|
|
|
|
|
|
|
let (input, _) = tag("#")(input)?;
|
|
|
|
|
|
2023-10-16 21:45:45 +00:00
|
|
|
|
let (input, hashtag_text) = map(
|
|
|
|
|
recognize(many1(self.partial_span(Self::hashtag_chars))),
|
|
|
|
|
Span::into_fragment,
|
|
|
|
|
)(input)?;
|
2023-10-05 20:02:46 +00:00
|
|
|
|
|
|
|
|
|
Ok((input, Token::Hashtag(hashtag_text.into())))
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-16 21:45:45 +00:00
|
|
|
|
#[inline]
|
|
|
|
|
fn increase_nesting<'a, 'b, O, F>(
|
|
|
|
|
&'b self,
|
|
|
|
|
mut func: F,
|
|
|
|
|
) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, O> + 'b
|
2024-09-05 15:22:54 +00:00
|
|
|
|
where
|
|
|
|
|
F: Parser<Span<'a>, O, nom::error::Error<Span<'a>>> + 'b,
|
2023-10-16 21:45:45 +00:00
|
|
|
|
{
|
|
|
|
|
move |mut input| {
|
|
|
|
|
if input.extra.depth >= self.depth_limit {
|
|
|
|
|
return fail(input);
|
|
|
|
|
}
|
2023-10-05 17:09:26 +00:00
|
|
|
|
|
2023-10-16 21:45:45 +00:00
|
|
|
|
input.extra.depth += 1;
|
2023-12-31 16:18:27 +00:00
|
|
|
|
func.parse(input).map(|mut v| {
|
|
|
|
|
v.0.extra.depth -= 1;
|
|
|
|
|
v
|
|
|
|
|
})
|
2023-10-16 21:45:45 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2023-10-05 17:09:26 +00:00
|
|
|
|
|
2023-10-16 21:45:45 +00:00
|
|
|
|
#[inline]
|
|
|
|
|
fn hashtag_chars<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
|
|
|
|
|
recognize(alt((
|
|
|
|
|
recognize(tuple((
|
|
|
|
|
tag("("),
|
|
|
|
|
self.increase_nesting(self.partial_span(Self::hashtag_chars)),
|
|
|
|
|
tag(")"),
|
|
|
|
|
))),
|
|
|
|
|
recognize(tuple((
|
|
|
|
|
tag("["),
|
|
|
|
|
self.increase_nesting(self.partial_span(Self::hashtag_chars)),
|
|
|
|
|
tag("]"),
|
|
|
|
|
))),
|
|
|
|
|
recognize(tuple((
|
|
|
|
|
tag("「"),
|
|
|
|
|
self.increase_nesting(self.partial_span(Self::hashtag_chars)),
|
|
|
|
|
tag("」"),
|
|
|
|
|
))),
|
|
|
|
|
recognize(tuple((
|
|
|
|
|
tag("("),
|
|
|
|
|
self.increase_nesting(self.partial_span(Self::hashtag_chars)),
|
|
|
|
|
tag(")"),
|
|
|
|
|
))),
|
|
|
|
|
recognize(tuple((
|
2023-10-23 21:52:02 +00:00
|
|
|
|
not(space1_unicode),
|
|
|
|
|
not(line_ending),
|
2023-10-16 21:45:45 +00:00
|
|
|
|
not(one_of(".,:;!?#?/[]【】()「」()<>")),
|
|
|
|
|
anychar,
|
|
|
|
|
))),
|
|
|
|
|
)))(input)
|
|
|
|
|
}
|
2023-10-05 17:09:26 +00:00
|
|
|
|
|
2023-10-16 21:45:45 +00:00
|
|
|
|
#[inline]
|
|
|
|
|
fn protocol<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
|
|
|
|
|
alt((tag("https://"), tag("http://")))(input)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline]
|
|
|
|
|
fn url_chars_base<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
|
|
|
|
|
alt((
|
|
|
|
|
alphanumeric1_unicode,
|
|
|
|
|
recognize(tuple((
|
|
|
|
|
tag("["),
|
|
|
|
|
many_till(
|
|
|
|
|
self.increase_nesting(self.partial_span(Self::url_chars_base)),
|
|
|
|
|
tag("]"),
|
|
|
|
|
),
|
|
|
|
|
))),
|
|
|
|
|
recognize(tuple((
|
|
|
|
|
tag("("),
|
|
|
|
|
many_till(
|
|
|
|
|
self.increase_nesting(self.partial_span(Self::url_chars_base)),
|
|
|
|
|
tag(")"),
|
|
|
|
|
),
|
|
|
|
|
))),
|
|
|
|
|
recognize(one_of(".,_/:%#$&?!~=+-@")),
|
|
|
|
|
))(input)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[inline]
|
|
|
|
|
fn url_chars<'a, 'b, F>(
|
|
|
|
|
&'b self,
|
|
|
|
|
mut terminator: F,
|
|
|
|
|
spaces: bool,
|
|
|
|
|
) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, Span<'a>> + 'b
|
2024-09-05 15:22:54 +00:00
|
|
|
|
where
|
|
|
|
|
F: Parser<Span<'a>, Span<'a>, nom::error::Error<Span<'a>>> + 'b,
|
2023-10-16 21:45:45 +00:00
|
|
|
|
{
|
|
|
|
|
move |input| {
|
|
|
|
|
recognize(many1_count(tuple((
|
|
|
|
|
not(tuple((space1, eof))),
|
|
|
|
|
not(tuple((space1, tag("\"")))),
|
|
|
|
|
not(tuple((opt(space1), |input| terminator.parse(input)))),
|
|
|
|
|
alt((
|
|
|
|
|
|input| self.url_chars_base(input),
|
|
|
|
|
if spaces { space1 } else { fail },
|
|
|
|
|
)),
|
|
|
|
|
))))(input)
|
|
|
|
|
}
|
|
|
|
|
}
|
2023-10-05 17:09:26 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod test {
|
2023-10-05 22:17:52 +00:00
|
|
|
|
use std::collections::HashMap;
|
2023-10-05 17:09:26 +00:00
|
|
|
|
|
2024-05-22 01:42:50 +00:00
|
|
|
|
use nom::bytes::complete::tag;
|
|
|
|
|
|
2024-09-05 15:22:54 +00:00
|
|
|
|
use crate::{to_xml_string, Context, Span, SpanMeta, Token, DEFAULT_DEPTH_LIMIT};
|
2024-05-22 01:42:50 +00:00
|
|
|
|
|
2023-10-07 17:44:39 +00:00
|
|
|
|
fn parse_full(string: &str) -> Token {
|
2023-10-16 21:45:45 +00:00
|
|
|
|
Context::default()
|
|
|
|
|
.full(Span::new_extra(string, SpanMeta::default()))
|
|
|
|
|
.unwrap()
|
|
|
|
|
.1
|
|
|
|
|
.merged()
|
2023-10-07 17:44:39 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-12-30 18:23:32 +00:00
|
|
|
|
#[test]
|
|
|
|
|
fn parse_empty() {
|
|
|
|
|
assert_eq!(parse_full(""), Token::Sequence(vec![]));
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-05 17:09:26 +00:00
|
|
|
|
#[test]
|
|
|
|
|
fn parse_url_chars() {
|
2023-10-16 21:45:45 +00:00
|
|
|
|
let ctx = Context::default();
|
|
|
|
|
|
2023-10-05 17:09:26 +00:00
|
|
|
|
assert_eq!(
|
2023-10-16 21:45:45 +00:00
|
|
|
|
ctx.url_chars(tag(")"), true)(Span::new_extra(
|
|
|
|
|
"https://en.wikipedia.org/wiki/Sandbox_(computer_security))",
|
2024-05-22 01:42:50 +00:00
|
|
|
|
SpanMeta::default(),
|
2023-10-07 19:22:21 +00:00
|
|
|
|
))
|
2024-05-22 01:42:50 +00:00
|
|
|
|
.unwrap()
|
|
|
|
|
.1
|
|
|
|
|
.into_fragment(),
|
2023-10-07 19:22:21 +00:00
|
|
|
|
"https://en.wikipedia.org/wiki/Sandbox_(computer_security)"
|
2023-10-05 17:09:26 +00:00
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
2023-10-16 21:45:45 +00:00
|
|
|
|
ctx.url_chars(tag(")"), true)(Span::new_extra(
|
|
|
|
|
"https://en.wikipedia.org/wiki/Sandbox_(computer_security)))",
|
|
|
|
|
SpanMeta::default()
|
2023-10-07 19:22:21 +00:00
|
|
|
|
))
|
|
|
|
|
.unwrap()
|
|
|
|
|
.1
|
|
|
|
|
.into_fragment(),
|
|
|
|
|
"https://en.wikipedia.org/wiki/Sandbox_(computer_security)",
|
2023-10-05 17:09:26 +00:00
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
2023-10-16 21:45:45 +00:00
|
|
|
|
ctx.url_chars(tag(")"), true)(Span::new_extra(
|
|
|
|
|
"https://cs.wikipedia.org/wiki/Among_Us ",
|
|
|
|
|
SpanMeta::default()
|
|
|
|
|
))
|
|
|
|
|
.unwrap()
|
|
|
|
|
.1
|
|
|
|
|
.into_fragment(),
|
2023-10-07 19:22:21 +00:00
|
|
|
|
"https://cs.wikipedia.org/wiki/Among_Us",
|
2023-10-05 17:09:26 +00:00
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
2023-10-16 21:45:45 +00:00
|
|
|
|
ctx.url_chars(tag(")"), true)(Span::new_extra(
|
|
|
|
|
"https://cs.wikipedia.org/wiki/Among Us )",
|
2024-05-22 01:42:50 +00:00
|
|
|
|
SpanMeta::default(),
|
2023-10-16 21:45:45 +00:00
|
|
|
|
))
|
2024-05-22 01:42:50 +00:00
|
|
|
|
.unwrap()
|
|
|
|
|
.1
|
|
|
|
|
.into_fragment(),
|
2023-10-07 19:22:21 +00:00
|
|
|
|
"https://cs.wikipedia.org/wiki/Among Us"
|
2023-10-05 17:09:26 +00:00
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
2023-10-16 21:45:45 +00:00
|
|
|
|
ctx.url_chars(tag(")"), false)(Span::new_extra(
|
|
|
|
|
"https://en.wikipedia.org/wiki/Among Us )",
|
2024-05-22 01:42:50 +00:00
|
|
|
|
SpanMeta::default(),
|
2023-10-16 21:45:45 +00:00
|
|
|
|
))
|
2024-05-22 01:42:50 +00:00
|
|
|
|
.unwrap()
|
|
|
|
|
.1
|
|
|
|
|
.into_fragment(),
|
2023-10-07 19:22:21 +00:00
|
|
|
|
"https://en.wikipedia.org/wiki/Among"
|
2023-10-05 17:09:26 +00:00
|
|
|
|
);
|
|
|
|
|
}
|
2023-10-05 19:21:23 +00:00
|
|
|
|
|
2023-10-07 17:44:39 +00:00
|
|
|
|
#[test]
|
|
|
|
|
fn parse_formatting() {
|
|
|
|
|
assert_eq!(
|
2023-10-07 19:26:25 +00:00
|
|
|
|
parse_full(r#"~~stikethrough~~"#),
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Token::Strikethrough(Box::new(Token::PlainText("stikethrough".into()))),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
2023-10-07 19:26:25 +00:00
|
|
|
|
parse_full(r#"**bold**"#),
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Token::Bold(Box::new(Token::PlainText("bold".into()))),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
2023-10-07 19:26:25 +00:00
|
|
|
|
parse_full(r#"*italic*"#),
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Token::Italic(Box::new(Token::PlainText("italic".into()))),
|
|
|
|
|
);
|
|
|
|
|
|
2023-10-08 20:15:55 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(r#"* italic *"#),
|
|
|
|
|
Token::PlainText("* italic *".into())
|
|
|
|
|
);
|
|
|
|
|
|
2023-10-23 22:27:54 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("snake_case_variable"),
|
|
|
|
|
Token::PlainText("snake_case_variable".into())
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("intra*word*italic"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("intra".into()),
|
|
|
|
|
Token::Italic(Box::new(Token::PlainText("word".into()))),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::PlainText("italic".into()),
|
2023-10-23 22:27:54 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
2023-10-08 20:15:55 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(r#"_ italic *"#),
|
|
|
|
|
Token::PlainText("_ italic *".into())
|
|
|
|
|
);
|
|
|
|
|
|
2024-03-13 13:02:34 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(r#"long text with a *footnote <b>text</b>"#),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("long text with a *footnote ".into()),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::Bold(Box::new(Token::PlainText("text".into()))),
|
2024-03-13 13:02:34 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
2023-10-08 20:15:55 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(r#"*"italic"*"#),
|
|
|
|
|
Token::Italic(Box::new(Token::PlainText("\"italic\"".into())))
|
|
|
|
|
);
|
|
|
|
|
|
2023-10-07 17:44:39 +00:00
|
|
|
|
assert_eq!(
|
2023-10-07 19:26:25 +00:00
|
|
|
|
parse_full(r#"not code `code` also not code"#),
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("not code ".into()),
|
|
|
|
|
Token::InlineCode("code".into()),
|
|
|
|
|
Token::PlainText(" also not code".into())
|
|
|
|
|
]),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
2023-10-07 19:26:25 +00:00
|
|
|
|
parse_full(r#"not code `code` also `not code"#),
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("not code ".into()),
|
|
|
|
|
Token::InlineCode("code".into()),
|
|
|
|
|
Token::PlainText(" also `not code".into())
|
|
|
|
|
]),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
2023-10-07 19:26:25 +00:00
|
|
|
|
parse_full(r#"not code `*not bold*` also not code"#),
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("not code ".into()),
|
|
|
|
|
Token::InlineCode("*not bold*".into()),
|
|
|
|
|
Token::PlainText(" also not code".into())
|
|
|
|
|
]),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
2023-10-07 19:26:25 +00:00
|
|
|
|
parse_full(r#"***bold italic***"#),
|
2024-09-05 15:22:54 +00:00
|
|
|
|
Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText("bold italic".into())))))
|
2023-10-07 17:44:39 +00:00
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
2023-10-07 19:26:25 +00:00
|
|
|
|
parse_full(r#"<b><i>bold italic</i></b>"#),
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
|
|
|
|
|
"bold italic".into()
|
2023-10-07 19:26:25 +00:00
|
|
|
|
)))))
|
2023-10-07 17:44:39 +00:00
|
|
|
|
);
|
2023-10-26 16:38:45 +00:00
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("~~*hello\nworld*"),
|
2024-03-13 13:02:34 +00:00
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("~~".into()),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::Italic(Box::new(Token::PlainText("hello\nworld".into()))),
|
2024-03-13 13:02:34 +00:00
|
|
|
|
])
|
2023-10-26 16:38:45 +00:00
|
|
|
|
)
|
2023-10-07 17:44:39 +00:00
|
|
|
|
}
|
|
|
|
|
|
2024-03-13 13:02:34 +00:00
|
|
|
|
#[test]
|
|
|
|
|
fn parse_flanking() {
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(r#"aaa*iii*bbb"#),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("aaa".into()),
|
|
|
|
|
Token::Italic(Box::new(Token::PlainText("iii".into()))),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::PlainText("bbb".into()),
|
2024-03-13 13:02:34 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(r#"aaa_nnn_bbb"#),
|
|
|
|
|
Token::PlainText("aaa_nnn_bbb".into())
|
|
|
|
|
);
|
|
|
|
|
|
2024-03-13 13:31:05 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("aaa\n_iii_\nbbb"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("aaa\n".into()),
|
|
|
|
|
Token::Italic(Box::new(Token::PlainText("iii".into()))),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::PlainText("\nbbb".into()),
|
2024-03-13 13:31:05 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
2024-03-13 13:02:34 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(r#"*iii*"#),
|
|
|
|
|
Token::Italic(Box::new(Token::PlainText("iii".into())))
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(r#"_iii_"#),
|
|
|
|
|
Token::Italic(Box::new(Token::PlainText("iii".into())))
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(r#"aaa*iii*"#),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("aaa".into()),
|
|
|
|
|
Token::Italic(Box::new(Token::PlainText("iii".into()))),
|
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(r#"*iii*bbb"#),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::Italic(Box::new(Token::PlainText("iii".into()))),
|
|
|
|
|
Token::PlainText("bbb".into()),
|
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(r#"aaa_nnn_"#),
|
|
|
|
|
Token::PlainText("aaa_nnn_".into())
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(r#"_nnn_bbb"#),
|
|
|
|
|
Token::PlainText("_nnn_bbb".into())
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-22 01:42:50 +00:00
|
|
|
|
#[test]
|
|
|
|
|
fn parse_long() {
|
|
|
|
|
parse_full(&"A".repeat(20000));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
parse_full(&"*A".repeat(20000));
|
|
|
|
|
|
|
|
|
|
parse_full(&"@A".repeat(20000));
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-05 22:17:52 +00:00
|
|
|
|
#[test]
|
|
|
|
|
fn parse_complex() {
|
2023-10-25 22:30:11 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(r"\( nya^3 \)"),
|
|
|
|
|
Token::InlineMath(" nya^3 ".to_string())
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("\\( nya^3 \n \\)"),
|
|
|
|
|
Token::PlainText("\\( nya^3 \n \\)".into())
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(r"`AbstractProxyFactoryBean`"),
|
|
|
|
|
Token::InlineCode("AbstractProxyFactoryBean".to_string())
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("`let x = \n 5;`"),
|
|
|
|
|
Token::PlainText("`let x = \n 5;`".into())
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(
|
|
|
|
|
r#"
|
|
|
|
|
```js
|
|
|
|
|
var x = undefined;
|
|
|
|
|
```"#
|
|
|
|
|
),
|
|
|
|
|
Token::BlockCode {
|
|
|
|
|
lang: Some("js".to_string()),
|
|
|
|
|
inner: "var x = undefined;".to_string(),
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(
|
|
|
|
|
r"
|
|
|
|
|
\[
|
|
|
|
|
a^2 + b^2 = c^2
|
|
|
|
|
\]"
|
|
|
|
|
),
|
|
|
|
|
Token::BlockMath("a^2 + b^2 = c^2".to_string())
|
|
|
|
|
);
|
|
|
|
|
|
2023-12-30 19:02:25 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(r"\[ x^2 + y^2 = z^2 \]"),
|
|
|
|
|
Token::BlockMath("x^2 + y^2 = z^2".to_string())
|
|
|
|
|
);
|
|
|
|
|
|
2023-10-07 17:44:39 +00:00
|
|
|
|
assert_eq!(
|
2023-10-07 19:26:25 +00:00
|
|
|
|
parse_full(
|
|
|
|
|
r#"<center>centered
|
|
|
|
|
🦋🏳️⚧️
|
|
|
|
|
text</center>"#
|
|
|
|
|
),
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Token::Center(Box::new(Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("centered\n".into()),
|
|
|
|
|
Token::UnicodeEmoji("🦋".into()),
|
|
|
|
|
Token::UnicodeEmoji("🏳️⚧️".into()),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::PlainText("\ntext".into()),
|
2023-10-07 19:26:25 +00:00
|
|
|
|
])))
|
2023-10-07 17:44:39 +00:00
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
2023-10-07 19:26:25 +00:00
|
|
|
|
parse_full(
|
|
|
|
|
r#"> <center>centered
|
|
|
|
|
> 👩🏽🤝👩🏼
|
|
|
|
|
> text</center>"#
|
|
|
|
|
),
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Token::Quote(Box::new(Token::Center(Box::new(Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("centered\n".into()),
|
|
|
|
|
Token::UnicodeEmoji("👩🏽🤝👩🏼".into()),
|
|
|
|
|
Token::PlainText("\ntext".into())
|
|
|
|
|
]))))),
|
|
|
|
|
);
|
|
|
|
|
|
2023-10-05 22:17:52 +00:00
|
|
|
|
assert_eq!(
|
2023-10-07 19:26:25 +00:00
|
|
|
|
parse_full(r#"$[x2 $[sparkle 🥺]💜$[spin.y,speed=5s ❤️]🦊]"#),
|
2023-10-05 22:17:52 +00:00
|
|
|
|
Token::Function {
|
|
|
|
|
name: "x2".into(),
|
|
|
|
|
params: HashMap::new(),
|
|
|
|
|
inner: Box::new(Token::Sequence(vec![
|
|
|
|
|
Token::Function {
|
|
|
|
|
name: "sparkle".into(),
|
|
|
|
|
params: HashMap::new(),
|
|
|
|
|
inner: Box::new(Token::UnicodeEmoji("🥺".into())),
|
|
|
|
|
},
|
|
|
|
|
Token::UnicodeEmoji("💜".into()),
|
|
|
|
|
Token::Function {
|
|
|
|
|
name: "spin".into(),
|
|
|
|
|
params: {
|
|
|
|
|
let mut params = HashMap::new();
|
|
|
|
|
params.insert("y".into(), None);
|
|
|
|
|
params.insert("speed".into(), Some("5s".into()));
|
|
|
|
|
params
|
|
|
|
|
},
|
|
|
|
|
inner: Box::new(Token::UnicodeEmoji("❤️".into())),
|
|
|
|
|
},
|
|
|
|
|
Token::UnicodeEmoji("🦊".into()),
|
|
|
|
|
]))
|
|
|
|
|
},
|
2023-10-06 23:46:20 +00:00
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
2023-10-07 19:26:25 +00:00
|
|
|
|
parse_full(r#"<b>bold @tag1 <i> @tag2 </b>italic</i>"#),
|
2023-10-06 23:46:20 +00:00
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("<b>bold ".into()),
|
|
|
|
|
Token::Mention {
|
|
|
|
|
mention_type: crate::MentionType::User,
|
|
|
|
|
name: "tag1".into(),
|
|
|
|
|
host: None
|
|
|
|
|
},
|
|
|
|
|
Token::PlainText(" <i> ".into()),
|
|
|
|
|
Token::Mention {
|
|
|
|
|
mention_type: crate::MentionType::User,
|
|
|
|
|
name: "tag2".into(),
|
|
|
|
|
host: None
|
|
|
|
|
},
|
|
|
|
|
Token::PlainText(" </b>italic</i>".into())
|
|
|
|
|
]),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
2023-10-07 17:44:39 +00:00
|
|
|
|
parse_full(
|
|
|
|
|
r#"
|
|
|
|
|
> test
|
|
|
|
|
> <i>
|
|
|
|
|
> italic
|
|
|
|
|
> </i>
|
|
|
|
|
>> Nested quote
|
|
|
|
|
"#
|
2023-10-07 19:26:25 +00:00
|
|
|
|
),
|
|
|
|
|
Token::Quote(Box::new(Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("test\n".into()),
|
|
|
|
|
Token::Italic(Box::new(Token::PlainText("\nitalic\n".into()))),
|
|
|
|
|
Token::Quote(Box::new(Token::PlainText("Nested quote".into())))
|
|
|
|
|
]))),
|
2023-10-06 23:46:20 +00:00
|
|
|
|
);
|
2023-10-05 22:17:52 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-10-07 18:40:01 +00:00
|
|
|
|
#[test]
|
|
|
|
|
fn parse_link() {
|
2023-10-08 20:15:55 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("IPv4 test: <https://0>"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("IPv4 test: ".into()),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::UrlNoEmbed("https://0".into()),
|
2023-10-08 20:15:55 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("IPv4 test: <https://127.0.0.1>"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("IPv4 test: ".into()),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::UrlNoEmbed("https://127.0.0.1".into()),
|
2023-10-08 20:15:55 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("IPv6 test: <https://[::2f:1]/nya>"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("IPv6 test: ".into()),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::UrlNoEmbed("https://[::2f:1]/nya".into()),
|
2023-10-08 20:15:55 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("IPv6 test: https://[::2f:1]/nya"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("IPv6 test: ".into()),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::UrlRaw("https://[::2f:1]/nya".into()),
|
2023-10-08 20:15:55 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// IDNs
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("IDN test: https://www.háčkyčárky.cz/"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("IDN test: ".into()),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::UrlRaw("https://www.háčkyčárky.cz/".into()),
|
2023-10-08 20:15:55 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
2023-10-07 18:40:01 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("Link test: [label](https://example.com)"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("Link test: ".into()),
|
|
|
|
|
Token::Link {
|
|
|
|
|
label: Box::new(Token::PlainText("label".into())),
|
|
|
|
|
href: "https://example.com".into(),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
embed: true,
|
|
|
|
|
},
|
2023-10-07 18:40:01 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
2023-10-23 21:52:02 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("test #hashtag tail"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("test ".into()),
|
|
|
|
|
Token::Hashtag("hashtag".into()),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::PlainText(" tail".into()),
|
2023-10-23 21:52:02 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("not#hashtag tail"),
|
|
|
|
|
Token::PlainText("not#hashtag tail".into())
|
|
|
|
|
);
|
|
|
|
|
|
2023-10-07 19:22:21 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("<https://example.com>"),
|
|
|
|
|
Token::UrlNoEmbed("https://example.com".into())
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// Adjacent links okay
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("<https://example.com/><https://awawa.gay/>"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::UrlNoEmbed("https://example.com/".into()),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::UrlNoEmbed("https://awawa.gay/".into()),
|
2023-10-07 19:22:21 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
2023-10-07 18:40:01 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("Link test: ?[label](https://awawa.gay)"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("Link test: ".into()),
|
|
|
|
|
Token::Link {
|
|
|
|
|
label: Box::new(Token::PlainText("label".into())),
|
|
|
|
|
href: "https://awawa.gay".into(),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
embed: false,
|
|
|
|
|
},
|
2023-10-07 18:40:01 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
2023-10-07 19:22:21 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("Link test: ?[label](https://awawa.gay)test"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("Link test: ".into()),
|
|
|
|
|
Token::Link {
|
|
|
|
|
label: Box::new(Token::PlainText("label".into())),
|
|
|
|
|
href: "https://awawa.gay".into(),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
embed: false,
|
2023-10-07 19:22:21 +00:00
|
|
|
|
},
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::PlainText("test".into()),
|
2023-10-07 19:22:21 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("Link test: (?[label](https://awawa.gay))"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("Link test: (".into()),
|
|
|
|
|
Token::Link {
|
|
|
|
|
label: Box::new(Token::PlainText("label".into())),
|
|
|
|
|
href: "https://awawa.gay".into(),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
embed: false,
|
2023-10-07 19:22:21 +00:00
|
|
|
|
},
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::PlainText(")".into()),
|
2023-10-07 19:22:21 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
2023-10-07 18:40:01 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("Link test: ?[label](https://awawa.gay"), // Missing closing bracket
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("Link test: ?[label](".into()),
|
|
|
|
|
Token::UrlRaw("https://awawa.gay".into()),
|
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-16 21:45:45 +00:00
|
|
|
|
#[test]
|
|
|
|
|
fn limit_nesting() {
|
|
|
|
|
let mut tok = Token::PlainText(" <s><i>test</i></s> ".into());
|
|
|
|
|
for _ in 0..DEFAULT_DEPTH_LIMIT {
|
|
|
|
|
tok = Token::Bold(Box::new(tok));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(
|
|
|
|
|
&("<b>".repeat(DEFAULT_DEPTH_LIMIT)
|
|
|
|
|
+ " <s><i>test</i></s> "
|
|
|
|
|
+ &*"</b>".repeat(DEFAULT_DEPTH_LIMIT))
|
|
|
|
|
),
|
|
|
|
|
tok
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-07 18:40:01 +00:00
|
|
|
|
#[test]
|
|
|
|
|
fn parse_mention() {
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("@tag"),
|
|
|
|
|
Token::Mention {
|
|
|
|
|
mention_type: crate::MentionType::User,
|
|
|
|
|
name: "tag".into(),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
host: None,
|
2023-10-07 18:40:01 +00:00
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
|
2023-10-08 20:15:55 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("email@notactuallyamenmtion.org"),
|
|
|
|
|
Token::PlainText("email@notactuallyamenmtion.org".into())
|
|
|
|
|
);
|
|
|
|
|
|
2023-10-07 18:40:01 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("hgsjlkdsa @tag fgahjsdkd"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("hgsjlkdsa ".into()),
|
|
|
|
|
Token::Mention {
|
|
|
|
|
mention_type: crate::MentionType::User,
|
|
|
|
|
name: "tag".into(),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
host: None,
|
2023-10-07 18:40:01 +00:00
|
|
|
|
},
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::PlainText(" fgahjsdkd".into()),
|
2023-10-07 18:40:01 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("hgsjlkdsa @tag@ fgahjsdkd"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("hgsjlkdsa ".into()),
|
|
|
|
|
Token::Mention {
|
|
|
|
|
mention_type: crate::MentionType::User,
|
|
|
|
|
name: "tag".into(),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
host: None,
|
2023-10-07 18:40:01 +00:00
|
|
|
|
},
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::PlainText("@ fgahjsdkd".into()),
|
2023-10-07 18:40:01 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("aaaa @tag@domain bbbbb"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("aaaa ".into()),
|
|
|
|
|
Token::Mention {
|
|
|
|
|
mention_type: crate::MentionType::User,
|
|
|
|
|
name: "tag".into(),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
host: Some("domain".into()),
|
2023-10-07 18:40:01 +00:00
|
|
|
|
},
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::PlainText(" bbbbb".into()),
|
2023-10-07 18:40:01 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("test @tag@domain, test"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("test ".into()),
|
|
|
|
|
Token::Mention {
|
|
|
|
|
mention_type: crate::MentionType::User,
|
|
|
|
|
name: "tag".into(),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
host: Some("domain".into()),
|
2023-10-07 18:40:01 +00:00
|
|
|
|
},
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::PlainText(", test".into()),
|
2023-10-07 18:40:01 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("test @tag@domain.gay. test"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("test ".into()),
|
|
|
|
|
Token::Mention {
|
|
|
|
|
mention_type: crate::MentionType::User,
|
|
|
|
|
name: "tag".into(),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
host: Some("domain.gay".into()),
|
2023-10-07 18:40:01 +00:00
|
|
|
|
},
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::PlainText(". test".into()),
|
2023-10-07 18:40:01 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("test @tag@domain? test"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("test ".into()),
|
|
|
|
|
Token::Mention {
|
|
|
|
|
mention_type: crate::MentionType::User,
|
|
|
|
|
name: "tag".into(),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
host: Some("domain".into()),
|
2023-10-07 18:40:01 +00:00
|
|
|
|
},
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::PlainText("? test".into()),
|
2023-10-07 18:40:01 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("test !tag@domain.com test"),
|
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("test ".into()),
|
|
|
|
|
Token::Mention {
|
|
|
|
|
mention_type: crate::MentionType::Community,
|
|
|
|
|
name: "tag".into(),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
host: Some("domain.com".into()),
|
2023-10-07 18:40:01 +00:00
|
|
|
|
},
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::PlainText(" test".into()),
|
2023-10-07 18:40:01 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
2023-10-26 19:08:51 +00:00
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("@tag:domain.com"),
|
2023-10-26 19:23:59 +00:00
|
|
|
|
Token::Mention {
|
|
|
|
|
mention_type: crate::MentionType::MatrixUser,
|
|
|
|
|
name: "tag".into(),
|
|
|
|
|
host: Some("domain.com".into())
|
|
|
|
|
},
|
2023-10-26 19:08:51 +00:00
|
|
|
|
);
|
2023-10-07 18:40:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-10-08 20:15:55 +00:00
|
|
|
|
#[test]
|
|
|
|
|
fn parse_shortcodes() {
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(":bottom:"),
|
2023-10-30 22:00:46 +00:00
|
|
|
|
Token::ShortcodeEmoji {
|
|
|
|
|
shortcode: "bottom".into(),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
host: None,
|
2023-10-30 22:00:46 +00:00
|
|
|
|
}
|
2023-10-08 20:15:55 +00:00
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(":bottom::blobfox:"),
|
|
|
|
|
Token::Sequence(vec![
|
2023-10-30 22:00:46 +00:00
|
|
|
|
Token::ShortcodeEmoji {
|
|
|
|
|
shortcode: "bottom".into(),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
host: None,
|
2023-10-30 22:00:46 +00:00
|
|
|
|
},
|
|
|
|
|
Token::ShortcodeEmoji {
|
|
|
|
|
shortcode: "blobfox".into(),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
host: None,
|
|
|
|
|
},
|
2023-10-08 20:15:55 +00:00
|
|
|
|
])
|
|
|
|
|
);
|
|
|
|
|
|
2023-10-30 22:00:46 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(":bottom@magnetar.social:"),
|
|
|
|
|
Token::ShortcodeEmoji {
|
|
|
|
|
shortcode: "bottom".into(),
|
2024-05-22 01:42:50 +00:00
|
|
|
|
host: Some("magnetar.social".into()),
|
2023-10-30 22:00:46 +00:00
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
|
2023-10-08 20:15:55 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full(":bottom:blobfox"),
|
|
|
|
|
Token::PlainText(":bottom:blobfox".into())
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
parse_full("bottom:blobfox:"),
|
|
|
|
|
Token::PlainText("bottom:blobfox:".into())
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-05 19:21:23 +00:00
|
|
|
|
#[test]
|
|
|
|
|
fn parse_emoji() {
|
2023-10-07 17:44:39 +00:00
|
|
|
|
assert_eq!(
|
2023-10-07 19:26:25 +00:00
|
|
|
|
parse_full("🥺💜❤️🦊"),
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Token::Sequence(
|
|
|
|
|
vec!["🥺", "💜", "❤️", "🦊"]
|
|
|
|
|
.into_iter()
|
2023-10-14 19:41:36 +00:00
|
|
|
|
.map(str::to_string)
|
2023-10-07 17:44:39 +00:00
|
|
|
|
.map(Token::UnicodeEmoji)
|
|
|
|
|
.collect::<Vec<_>>()
|
2023-10-07 19:26:25 +00:00
|
|
|
|
)
|
2023-10-07 17:44:39 +00:00
|
|
|
|
);
|
2023-10-05 19:21:23 +00:00
|
|
|
|
|
2023-10-07 17:44:39 +00:00
|
|
|
|
// Trans flag, ZWJ
|
2023-10-05 22:17:52 +00:00
|
|
|
|
assert_eq!(
|
2023-10-07 19:26:25 +00:00
|
|
|
|
parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}"),
|
|
|
|
|
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}".into())
|
2023-10-07 17:44:39 +00:00
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
2023-10-07 19:26:25 +00:00
|
|
|
|
parse_full("\u{0200d}\u{1f3f3}\u{0fe0f}"),
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::PlainText("\u{0200d}".into()), // ZWJ
|
|
|
|
|
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
|
2023-10-07 19:26:25 +00:00
|
|
|
|
])
|
2023-10-07 17:44:39 +00:00
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// Trans flag, ZWNJ
|
|
|
|
|
assert_eq!(
|
2023-10-07 19:26:25 +00:00
|
|
|
|
parse_full("\u{1f3f3}\u{0fe0f}\u{0200c}\u{026a7}\u{0fe0f}"),
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
|
|
|
|
|
Token::PlainText("\u{0200c}".into()), // ZWNJ
|
2024-05-22 01:42:50 +00:00
|
|
|
|
Token::UnicodeEmoji("\u{026a7}\u{0fe0f}".into()), // Trans symbol
|
2023-10-07 19:26:25 +00:00
|
|
|
|
])
|
2023-10-07 17:44:39 +00:00
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
2023-10-07 19:26:25 +00:00
|
|
|
|
parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{0200d}\u{0200d}"),
|
2023-10-07 17:44:39 +00:00
|
|
|
|
Token::Sequence(vec![
|
|
|
|
|
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
|
|
|
|
|
Token::PlainText("\u{0200d}\u{0200d}\u{0200d}".into()), // ZWJ
|
2023-10-07 19:26:25 +00:00
|
|
|
|
])
|
2023-10-05 22:17:52 +00:00
|
|
|
|
);
|
2023-10-05 19:21:23 +00:00
|
|
|
|
}
|
2023-10-25 22:30:11 +00:00
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn xml_serialization() {
|
|
|
|
|
assert_eq!(
|
|
|
|
|
&to_xml_string(&parse_full("***nyaaa***")).unwrap(),
|
|
|
|
|
r#"<mmm><b><i>nyaaa</i></b></mmm>"#
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
&to_xml_string(&parse_full(
|
|
|
|
|
"@natty $[spin.speed=0.5s 🥺]:cat_attack: <plain>test</plain>"
|
|
|
|
|
))
|
2024-05-22 01:42:50 +00:00
|
|
|
|
.unwrap(),
|
2023-10-25 22:30:11 +00:00
|
|
|
|
r#"<mmm><mention name="natty" type="user"/> <fn name="spin" arg-speed="0.5s"><ue>🥺</ue></fn><ee>cat_attack</ee> test</mmm>"#
|
|
|
|
|
);
|
|
|
|
|
|
2023-10-29 11:52:20 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
&to_xml_string(&parse_full(
|
|
|
|
|
"Ring Galaxy AM 0644 741 from Hubble\nCredits: AURA, STScI, J. Higdon, Cornell, ESA, #NASA\n#nature #space #astrophotography"
|
|
|
|
|
))
|
2024-05-22 01:42:50 +00:00
|
|
|
|
.unwrap(),
|
2023-10-29 11:52:20 +00:00
|
|
|
|
r#"<mmm>Ring Galaxy AM 0644 741 from Hubble
|
|
|
|
|
Credits: AURA, STScI, J. Higdon, Cornell, ESA, <hashtag>NASA</hashtag>
|
|
|
|
|
<hashtag>nature</hashtag> <hashtag>space</hashtag> <hashtag>astrophotography</hashtag></mmm>"#
|
|
|
|
|
);
|
|
|
|
|
|
2023-10-25 22:30:11 +00:00
|
|
|
|
assert_eq!(
|
|
|
|
|
&to_xml_string(&parse_full(
|
|
|
|
|
r#"
|
|
|
|
|
```js
|
|
|
|
|
var x = undefined;
|
|
|
|
|
``` "#
|
|
|
|
|
))
|
2024-05-22 01:42:50 +00:00
|
|
|
|
.unwrap(),
|
2023-10-25 22:30:11 +00:00
|
|
|
|
"<mmm><code lang=\"js\">var x = undefined;</code></mmm>"
|
|
|
|
|
);
|
|
|
|
|
}
|
2023-10-01 21:04:32 +00:00
|
|
|
|
}
|