More precise emoji extraction and fixed center tag parsing

This commit is contained in:
Natty 2023-10-07 19:44:39 +02:00
parent 95bce443be
commit 154cc27c07
Signed by: natty
GPG Key ID: BF6CB659ADEE60EC
2 changed files with 350 additions and 156 deletions

View File

@ -43,6 +43,7 @@ nom = "7"
nom_locate = "4" nom_locate = "4"
percent-encoding = "2.2" percent-encoding = "2.2"
redis = "0.23" redis = "0.23"
regex = "1.9"
reqwest = "0.11" reqwest = "0.11"
sea-orm = "0.12" sea-orm = "0.12"
sea-orm-migration = "0.12" sea-orm-migration = "0.12"

View File

@ -13,7 +13,7 @@ use nom::{IResult, Offset, Slice};
use nom_locate::LocatedSpan; use nom_locate::LocatedSpan;
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashMap; use std::collections::HashMap;
use std::convert::identity; use std::convert::{identity, Infallible};
use unicode_segmentation::UnicodeSegmentation; use unicode_segmentation::UnicodeSegmentation;
#[derive(Copy, Clone, Debug, Eq, PartialEq)] #[derive(Copy, Clone, Debug, Eq, PartialEq)]
@ -37,7 +37,6 @@ pub enum Token<'a> {
Sequence(Vec<Token<'a>>), Sequence(Vec<Token<'a>>),
Quote(Box<Token<'a>>), Quote(Box<Token<'a>>),
Small(Box<Token<'a>>), Small(Box<Token<'a>>),
Big(Box<Token<'a>>),
BoldItalic(Box<Token<'a>>), BoldItalic(Box<Token<'a>>),
Bold(Box<Token<'a>>), Bold(Box<Token<'a>>),
Italic(Box<Token<'a>>), Italic(Box<Token<'a>>),
@ -80,7 +79,6 @@ impl Token<'_> {
Token::Sequence(tokens) => Token::Sequence(tokens.iter().map(Token::owned).collect()), Token::Sequence(tokens) => Token::Sequence(tokens.iter().map(Token::owned).collect()),
Token::Quote(inner) => Token::Quote(Box::new(inner.owned())), Token::Quote(inner) => Token::Quote(Box::new(inner.owned())),
Token::Small(inner) => Token::Small(Box::new(inner.owned())), Token::Small(inner) => Token::Small(Box::new(inner.owned())),
Token::Big(inner) => Token::Big(Box::new(inner.owned())),
Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.owned())), Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.owned())),
Token::Bold(inner) => Token::Bold(Box::new(inner.owned())), Token::Bold(inner) => Token::Bold(Box::new(inner.owned())),
Token::Italic(inner) => Token::Italic(Box::new(inner.owned())), Token::Italic(inner) => Token::Italic(Box::new(inner.owned())),
@ -180,7 +178,6 @@ impl Token<'_> {
} }
Token::Quote(inner) => Token::Quote(Box::new(inner.merged())), Token::Quote(inner) => Token::Quote(Box::new(inner.merged())),
Token::Small(inner) => Token::Small(Box::new(inner.merged())), Token::Small(inner) => Token::Small(Box::new(inner.merged())),
Token::Big(inner) => Token::Big(Box::new(inner.merged())),
Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.merged())), Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.merged())),
Token::Bold(inner) => Token::Bold(Box::new(inner.merged())), Token::Bold(inner) => Token::Bold(Box::new(inner.merged())),
Token::Italic(inner) => Token::Italic(Box::new(inner.merged())), Token::Italic(inner) => Token::Italic(Box::new(inner.merged())),
@ -228,11 +225,19 @@ fn boxing_token<'a>(func: impl Fn(Box<Token<'a>>) -> Token<'a>) -> impl Fn(Token
move |tokens| func(Box::new(tokens)) move |tokens| func(Box::new(tokens))
} }
#[inline]
fn collect_sequence<'a, T>(
func: impl Fn(Vec<T>) -> Token<'a>,
transform: impl Fn(Token<'a>) -> Token<'a>,
) -> impl Fn(&mut dyn Iterator<Item = T>) -> Token<'a> {
move |tokens| transform(func(tokens.collect()))
}
#[inline] #[inline]
fn collect_char_sequence<'a>( fn collect_char_sequence<'a>(
func: impl Fn(Cow<'a, str>) -> Token<'a>, func: impl Fn(Cow<'a, str>) -> Token<'a>,
) -> impl Fn(Vec<char>) -> Token<'a> { ) -> impl Fn(&mut dyn Iterator<Item = char>) -> Token<'a> {
move |chars| func(Cow::Owned(chars.into_iter().collect())) move |chars| func(Cow::Owned(chars.collect()))
} }
fn spliced<'a>( fn spliced<'a>(
@ -306,6 +311,42 @@ fn space(input: Span) -> IResult<Span, Token> {
Ok((input, Token::PlainText(frag.into_fragment().into()))) Ok((input, Token::PlainText(frag.into_fragment().into())))
} }
struct Matcher<'a, 'b, T> {
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
_phantom_closure: std::marker::PhantomData<&'a ()>,
_phantom_data: std::marker::PhantomData<&'b ()>,
_phantom_output: std::marker::PhantomData<fn() -> T>,
}
impl<'a, 'b, T> Matcher<'a, 'b, T> {
fn new(
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
) -> Self {
Self {
matcher_inner,
collector,
_phantom_closure: std::marker::PhantomData,
_phantom_data: std::marker::PhantomData,
_phantom_output: std::marker::PhantomData,
}
}
}
impl<'a, 'b> Matcher<'a, 'b, Infallible> {
// Don't break this invariant, else a monster will come at night and eat all your socks
fn reject() -> Self {
Self {
matcher_inner: &fail::<_, Infallible, _>,
collector: &|_| unreachable!(),
_phantom_closure: std::marker::PhantomData,
_phantom_data: std::marker::PhantomData,
_phantom_output: std::marker::PhantomData,
}
}
}
struct Context; struct Context;
impl Context { impl Context {
@ -477,13 +518,9 @@ impl Context {
let (input, (center_seq, _)) = many_till( let (input, (center_seq, _)) = many_till(
self.partial(Self::inline_single), self.partial(Self::inline_single),
tuple((opt(line_ending), tag_end)), tuple((opt(space1), opt(line_ending), tag_end)),
)(input)?; )(input)?;
let (input, _) = many0(space)(input)?;
let (input, _) = not(not(line_ending))(input)?;
let (input, _) = opt(line_ending)(input)?;
Ok(( Ok((
input, input,
boxing_token(Token::Center)(Token::Sequence(center_seq)), boxing_token(Token::Center)(Token::Sequence(center_seq)),
@ -560,23 +597,21 @@ impl Context {
} }
#[inline] #[inline]
fn tag_delimited<'a, 'b: 'a, T>( fn tag_delimited<'a, 'b: 'a, T, S>(
&'a self, &'a self,
start: &'b str, opening_tag: impl Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
end: &'b str, closing_tag: impl Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
escape: bool, escape: bool,
matcher_inner: impl Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a, matcher: Matcher<'a, 'b, T>,
matcher_inner_fallback: impl Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a, fallback: Matcher<'a, 'b, S>,
collector: impl Fn(Vec<T>) -> Token<'b> + 'a,
mapper: impl Fn(Token<'b>) -> Token<'b> + 'a,
) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token<'b>> + '_ { ) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token<'b>> + '_ {
move |input| { move |input| {
let opening_tag = &tag(start);
let closing_tag = &tag(end);
if escape { if escape {
if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), opening_tag))(input) { if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), &opening_tag))(input) {
return Ok((input_escaped, Token::PlainText(Cow::Borrowed(&mark)))); return Ok((
input_escaped,
Token::PlainText(Cow::Borrowed(mark.fragment())),
));
} }
} }
@ -584,8 +619,8 @@ impl Context {
let (post_open, _) = opening_tag(input)?; let (post_open, _) = opening_tag(input)?;
let res = tuple(( let res = tuple((
many1(tuple((not(closing_tag), &matcher_inner))), many1(tuple((not(&closing_tag), &matcher.matcher_inner))),
closing_tag, &closing_tag,
))(post_open); ))(post_open);
if let Err(nom::Err::Error(nom::error::Error { if let Err(nom::Err::Error(nom::error::Error {
@ -594,8 +629,8 @@ impl Context {
})) = res })) = res
{ {
let res_fallback = tuple(( let res_fallback = tuple((
many1(tuple((not(closing_tag), &matcher_inner_fallback))), many1(tuple((not(&closing_tag), &fallback.matcher_inner))),
closing_tag, &closing_tag,
))(post_open); ))(post_open);
if res_fallback.is_err() { if res_fallback.is_err() {
@ -606,22 +641,22 @@ impl Context {
} }
let (input, (inner, closing)) = res_fallback.unwrap(); let (input, (inner, closing)) = res_fallback.unwrap();
let inner = inner.into_iter().map(|(_, t)| t).collect::<Vec<_>>(); let mut inner = inner.into_iter().map(|(_, t)| t);
return Ok(( return Ok((
input, input,
Token::Sequence(vec![ Token::Sequence(vec![
Token::PlainText(begin.fragment_between(&post_open).into()), Token::PlainText(begin.fragment_between(&post_open).into()),
collector(inner), ((fallback.collector)(&mut inner)),
Token::PlainText(closing.into_fragment().into()), Token::PlainText(closing.into_fragment().into()),
]), ]),
)); ));
} }
let (input, (inner, _)) = res?; let (input, (inner, _)) = res?;
let inner = inner.into_iter().map(|(_, t)| t).collect::<Vec<_>>(); let mut inner = inner.into_iter().map(|(_, t)| t);
Ok((input, mapper(collector(inner)))) Ok((input, (matcher.collector)(&mut inner)))
} }
} }
@ -691,176 +726,230 @@ impl Context {
fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
self.tag_delimited( self.tag_delimited(
"<small>", tag("<small>"),
"</small>", tag("</small>"),
false, false,
self.partial(Self::inline_single), Matcher::new(
self.partial(Self::inline_non_formatting_single), &self.partial(Self::inline_single),
Token::Sequence, &collect_sequence(Token::Sequence, boxing_token(Token::Small)),
boxing_token(Token::Small), ),
Matcher::new(
&self.partial(Self::inline_non_formatting_single),
&collect_sequence(Token::Sequence, identity),
),
)(input) )(input)
} }
// TODO: CommonMark flanking rules // TODO: CommonMark flanking rules
fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
self.tag_delimited( self.tag_delimited(
"***", tag("***"),
"***", tag("***"),
true, true,
self.partial(Self::inline_single), Matcher::new(
self.partial(Self::inline_non_formatting_single), &self.partial(Self::inline_single),
Token::Sequence, &collect_sequence(Token::Sequence, boxing_token(Token::BoldItalic)),
boxing_token(Token::BoldItalic), ),
Matcher::new(
&self.partial(Self::inline_non_formatting_single),
&collect_sequence(Token::Sequence, identity),
),
)(input) )(input)
} }
// TODO: CommonMark flanking rules // TODO: CommonMark flanking rules
fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
self.tag_delimited( self.tag_delimited(
"___", tag("___"),
"___", tag("___"),
true, true,
self.partial(Self::inline_single), Matcher::new(
self.partial(Self::inline_non_formatting_single), &self.partial(Self::inline_single),
Token::Sequence, &collect_sequence(Token::Sequence, boxing_token(Token::BoldItalic)),
boxing_token(Token::BoldItalic), ),
Matcher::new(
&self.partial(Self::inline_non_formatting_single),
&collect_sequence(Token::Sequence, identity),
),
)(input) )(input)
} }
fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
self.tag_delimited( self.tag_delimited(
"<b>", tag("<b>"),
"</b>", tag("</b>"),
false, false,
self.partial(Self::inline_single), Matcher::new(
self.partial(Self::inline_non_formatting_single), &self.partial(Self::inline_single),
Token::Sequence, &collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
boxing_token(Token::Bold), ),
Matcher::new(
&self.partial(Self::inline_non_formatting_single),
&collect_sequence(Token::Sequence, identity),
),
)(input) )(input)
} }
// TODO: CommonMark flanking rules // TODO: CommonMark flanking rules
fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
self.tag_delimited( self.tag_delimited(
"**", tag("**"),
"**", tag("**"),
true, true,
self.partial(Self::inline_single), Matcher::new(
self.partial(Self::inline_non_formatting_single), &self.partial(Self::inline_single),
Token::Sequence, &collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
boxing_token(Token::Bold), ),
Matcher::new(
&self.partial(Self::inline_non_formatting_single),
&collect_sequence(Token::Sequence, identity),
),
)(input) )(input)
} }
// TODO: CommonMark flanking rules // TODO: CommonMark flanking rules
fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
self.tag_delimited( self.tag_delimited(
"__", tag("__"),
"__", tag("__"),
true, true,
self.partial(Self::inline_single), Matcher::new(
self.partial(Self::inline_non_formatting_single), &self.partial(Self::inline_single),
Token::Sequence, &collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
boxing_token(Token::Bold), ),
Matcher::new(
&self.partial(Self::inline_non_formatting_single),
&collect_sequence(Token::Sequence, identity),
),
)(input) )(input)
} }
fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
self.tag_delimited( self.tag_delimited(
"<i>", tag("<i>"),
"</i>", tag("</i>"),
false, false,
self.partial(Self::inline_single), Matcher::new(
self.partial(Self::inline_non_formatting_single), &self.partial(Self::inline_single),
Token::Sequence, &collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
boxing_token(Token::Italic), ),
Matcher::new(
&self.partial(Self::inline_non_formatting_single),
&collect_sequence(Token::Sequence, identity),
),
)(input) )(input)
} }
// TODO: CommonMark flanking rules // TODO: CommonMark flanking rules
fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
self.tag_delimited( self.tag_delimited(
"*", tag("*"),
"*", tag("*"),
true, true,
self.partial(Self::inline_single), Matcher::new(
self.partial(Self::inline_non_formatting_single), &self.partial(Self::inline_single),
Token::Sequence, &collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
boxing_token(Token::Italic), ),
Matcher::new(
&self.partial(Self::inline_non_formatting_single),
&collect_sequence(Token::Sequence, identity),
),
)(input) )(input)
} }
// TODO: CommonMark flanking rules // TODO: CommonMark flanking rules
fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
self.tag_delimited( self.tag_delimited(
"_", tag("_"),
"_", tag("_"),
true, true,
self.partial(Self::inline_single), Matcher::new(
self.partial(Self::inline_non_formatting_single), &self.partial(Self::inline_single),
Token::Sequence, &collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
boxing_token(Token::Italic), ),
Matcher::new(
&self.partial(Self::inline_non_formatting_single),
&collect_sequence(Token::Sequence, identity),
),
)(input) )(input)
} }
fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
self.tag_delimited( self.tag_delimited(
"<s>", tag("<s>"),
"</s>", tag("</s>"),
false, false,
self.partial(Self::inline_single), Matcher::new(
self.partial(Self::inline_non_formatting_single), &self.partial(Self::inline_single),
Token::Sequence, &collect_sequence(Token::Sequence, boxing_token(Token::Strikethrough)),
boxing_token(Token::Strikethrough), ),
Matcher::new(
&self.partial(Self::inline_non_formatting_single),
&collect_sequence(Token::Sequence, identity),
),
)(input) )(input)
} }
// TODO: CommonMark flanking rules // TODO: CommonMark flanking rules
fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
self.tag_delimited( self.tag_delimited(
"~~", tag("~~"),
"~~", tag("~~"),
true, true,
move |input| { Matcher::new(
tuple((not_line_ending, self.partial(Self::inline_single)))(input) &move |input| {
.map(|(i, t)| (i, t.1)) map(
tuple(((not(line_ending)), self.partial(Self::inline_single))),
|(_, captured)| captured,
)(input)
}, },
move |input| { &collect_sequence(Token::Sequence, boxing_token(Token::Strikethrough)),
),
Matcher::new(
&move |input| {
map(
tuple(( tuple((
not_line_ending, (not(line_ending)),
self.partial(Self::inline_non_formatting_single), self.partial(Self::inline_non_formatting_single),
))(input) )),
.map(|(i, t)| (i, t.1)) |(_, captured)| captured,
)(input)
}, },
Token::Sequence, &collect_sequence(Token::Sequence, identity),
boxing_token(Token::Strikethrough), ),
)(input) )(input)
} }
fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
self.tag_delimited( self.tag_delimited(
"`", tag("`"),
"", |input| alt((tag("`"), tag("´")))(input),
true, true,
move |input| { Matcher::new(
tuple((not(alt((tag("`"), tag("´"), line_ending))), anychar))(input) &move |input| {
.map(|(i, (_skip, c))| (i, c)) map(
tuple((not(alt((tag("`"), tag("´"), line_ending))), anychar)),
|(_, captured)| captured,
)(input)
}, },
fail, &collect_char_sequence(Token::InlineCode),
collect_char_sequence(Token::InlineCode), ),
identity, Matcher::reject(),
)(input) )(input)
} }
fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
self.tag_delimited( self.tag_delimited(
"\\(", tag("\\("),
"\\)", tag("\\)"),
false, false,
move |input| tuple((not(line_ending), anychar))(input).map(|(i, (_skip, c))| (i, c)), Matcher::new(
fail, &move |input| {
collect_char_sequence(Token::InlineMath), map(tuple((not(line_ending), anychar)), |(_, captured)| captured)(input)
identity, },
&collect_char_sequence(Token::InlineMath),
),
Matcher::reject(),
)(input) )(input)
} }
@ -925,6 +1014,8 @@ impl Context {
return fail(input); return fail(input);
}; };
let grapheme = grapheme.trim_end_matches(|c| c == '\u{200c}' || c == '\u{200d}');
let emoji = emojis::get(grapheme); let emoji = emojis::get(grapheme);
if emoji.is_none() { if emoji.is_none() {
@ -1059,10 +1150,13 @@ fn url_chars<'a, T: 'a>(
mod test { mod test {
use crate::{url_chars, Context, Span, Token}; use crate::{url_chars, Context, Span, Token};
use nom::bytes::complete::tag; use nom::bytes::complete::tag;
use nom::multi::many1;
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashMap; use std::collections::HashMap;
fn parse_full(string: &str) -> Token {
Context.full(Span::new(string)).unwrap().1.merged().owned()
}
#[test] #[test]
fn parse_url_chars() { fn parse_url_chars() {
let test1 = "https://en.wikipedia.org/wiki/Sandbox_(computer_security))"; let test1 = "https://en.wikipedia.org/wiki/Sandbox_(computer_security))";
@ -1111,9 +1205,92 @@ mod test {
); );
} }
#[test]
fn parse_formatting() {
assert_eq!(
Token::Strikethrough(Box::new(Token::PlainText("stikethrough".into()))),
parse_full(r#"~~stikethrough~~"#)
);
assert_eq!(
Token::Bold(Box::new(Token::PlainText("bold".into()))),
parse_full(r#"**bold**"#)
);
assert_eq!(
Token::Italic(Box::new(Token::PlainText("italic".into()))),
parse_full(r#"*italic*"#)
);
assert_eq!(
Token::Sequence(vec![
Token::PlainText("not code ".into()),
Token::InlineCode("code".into()),
Token::PlainText(" also not code".into())
]),
parse_full(r#"not code `code` also not code"#)
);
assert_eq!(
Token::Sequence(vec![
Token::PlainText("not code ".into()),
Token::InlineCode("code".into()),
Token::PlainText(" also `not code".into())
]),
parse_full(r#"not code `code` also `not code"#)
);
assert_eq!(
Token::Sequence(vec![
Token::PlainText("not code ".into()),
Token::InlineCode("*not bold*".into()),
Token::PlainText(" also not code".into())
]),
parse_full(r#"not code `*not bold*` also not code"#)
);
assert_eq!(
Token::BoldItalic(Box::new(Token::PlainText("bold italic".into()))),
parse_full(r#"***bold italic***"#)
);
assert_eq!(
Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
"bold italic".into()
))))),
parse_full(r#"<b><i>bold italic</i></b>"#)
);
}
#[test] #[test]
fn parse_complex() { fn parse_complex() {
let emoji = r#"$[x2 $[sparkle 🥺]💜$[spin.y,speed=5s ❤️]🦊]"#; assert_eq!(
Token::Center(Box::new(Token::Sequence(vec![
Token::PlainText("centered\n".into()),
Token::UnicodeEmoji("🦋".into()),
Token::UnicodeEmoji("🏳️‍⚧️".into()),
Token::PlainText("\ntext".into())
]))),
parse_full(
r#"<center>centered
🦋🏳
text</center>"#
)
);
assert_eq!(
Token::Quote(Box::new(Token::Center(Box::new(Token::Sequence(vec![
Token::PlainText("centered\n".into()),
Token::UnicodeEmoji("👩🏽‍🤝‍👩🏼".into()),
Token::PlainText("\ntext".into())
]))))),
parse_full(
r#"> <center>centered
> 👩🏽🤝👩🏼
> text</center>"#
)
);
assert_eq!( assert_eq!(
Token::Function { Token::Function {
name: "x2".into(), name: "x2".into(),
@ -1138,21 +1315,7 @@ mod test {
Token::UnicodeEmoji("🦊".into()), Token::UnicodeEmoji("🦊".into()),
])) ]))
}, },
Context.full(Span::new(emoji)).unwrap().1.merged() parse_full(r#"$[x2 $[sparkle 🥺]💜$[spin.y,speed=5s ❤️]🦊]"#)
);
let bold_italic = r#"***bold italic***"#;
assert_eq!(
Token::BoldItalic(Box::new(Token::PlainText("bold italic".into()))),
Context.full(Span::new(bold_italic)).unwrap().1.merged()
);
let bold_italic_tag = r#"<b><i>bold italic</i></b>"#;
assert_eq!(
Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
"bold italic".into()
))))),
Context.full(Span::new(bold_italic_tag)).unwrap().1.merged()
); );
assert_eq!( assert_eq!(
@ -1178,37 +1341,67 @@ mod test {
.merged() .merged()
); );
let quote = r#"
> test
> <i>
> italic
> </i>
>> Nested quote
"#;
assert_eq!( assert_eq!(
Token::Quote(Box::new(Token::Sequence(vec![ Token::Quote(Box::new(Token::Sequence(vec![
Token::PlainText("test\n".into()), Token::PlainText("test\n".into()),
Token::Italic(Box::new(Token::PlainText("\nitalic\n".into()))), Token::Italic(Box::new(Token::PlainText("\nitalic\n".into()))),
Token::Quote(Box::new(Token::PlainText("Nested quote".into()))) Token::Quote(Box::new(Token::PlainText("Nested quote".into())))
]))), ]))),
Context.full(Span::new(quote)).unwrap().1.merged() parse_full(
r#"
> test
> <i>
> italic
> </i>
>> Nested quote
"#
)
); );
} }
#[test] #[test]
fn parse_emoji() { fn parse_emoji() {
let test = "🥺💜❤️🦊";
let ctx = Context;
let tokens = many1(ctx.partial(Context::unicode_emoji))(Span::from(test)).unwrap();
assert_eq!( assert_eq!(
Token::Sequence(
vec!["🥺", "💜", "❤️", "🦊"] vec!["🥺", "💜", "❤️", "🦊"]
.into_iter() .into_iter()
.map(<&str as Into<Cow<_>>>::into) .map(<&str as Into<Cow<_>>>::into)
.map(Token::UnicodeEmoji) .map(Token::UnicodeEmoji)
.collect::<Vec<_>>(), .collect::<Vec<_>>()
tokens.1 ),
parse_full("🥺💜❤️🦊")
);
// Trans flag, ZWJ
assert_eq!(
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}".into()),
parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}")
);
assert_eq!(
Token::Sequence(vec![
Token::PlainText("\u{0200d}".into()), // ZWJ
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
]),
parse_full("\u{0200d}\u{1f3f3}\u{0fe0f}")
);
// Trans flag, ZWNJ
assert_eq!(
Token::Sequence(vec![
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
Token::PlainText("\u{0200c}".into()), // ZWNJ
Token::UnicodeEmoji("\u{026a7}\u{0fe0f}".into()) // Trans symbol
]),
parse_full("\u{1f3f3}\u{0fe0f}\u{0200c}\u{026a7}\u{0fe0f}")
);
assert_eq!(
Token::Sequence(vec![
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
Token::PlainText("\u{0200d}\u{0200d}\u{0200d}".into()), // ZWJ
]),
parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{0200d}\u{0200d}")
); );
} }
} }