More precise emoji extraction and fixed center tag parsing
This commit is contained in:
parent
95bce443be
commit
154cc27c07
|
@ -43,6 +43,7 @@ nom = "7"
|
||||||
nom_locate = "4"
|
nom_locate = "4"
|
||||||
percent-encoding = "2.2"
|
percent-encoding = "2.2"
|
||||||
redis = "0.23"
|
redis = "0.23"
|
||||||
|
regex = "1.9"
|
||||||
reqwest = "0.11"
|
reqwest = "0.11"
|
||||||
sea-orm = "0.12"
|
sea-orm = "0.12"
|
||||||
sea-orm-migration = "0.12"
|
sea-orm-migration = "0.12"
|
||||||
|
|
|
@ -13,7 +13,7 @@ use nom::{IResult, Offset, Slice};
|
||||||
use nom_locate::LocatedSpan;
|
use nom_locate::LocatedSpan;
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::convert::identity;
|
use std::convert::{identity, Infallible};
|
||||||
use unicode_segmentation::UnicodeSegmentation;
|
use unicode_segmentation::UnicodeSegmentation;
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||||
|
@ -37,7 +37,6 @@ pub enum Token<'a> {
|
||||||
Sequence(Vec<Token<'a>>),
|
Sequence(Vec<Token<'a>>),
|
||||||
Quote(Box<Token<'a>>),
|
Quote(Box<Token<'a>>),
|
||||||
Small(Box<Token<'a>>),
|
Small(Box<Token<'a>>),
|
||||||
Big(Box<Token<'a>>),
|
|
||||||
BoldItalic(Box<Token<'a>>),
|
BoldItalic(Box<Token<'a>>),
|
||||||
Bold(Box<Token<'a>>),
|
Bold(Box<Token<'a>>),
|
||||||
Italic(Box<Token<'a>>),
|
Italic(Box<Token<'a>>),
|
||||||
|
@ -80,7 +79,6 @@ impl Token<'_> {
|
||||||
Token::Sequence(tokens) => Token::Sequence(tokens.iter().map(Token::owned).collect()),
|
Token::Sequence(tokens) => Token::Sequence(tokens.iter().map(Token::owned).collect()),
|
||||||
Token::Quote(inner) => Token::Quote(Box::new(inner.owned())),
|
Token::Quote(inner) => Token::Quote(Box::new(inner.owned())),
|
||||||
Token::Small(inner) => Token::Small(Box::new(inner.owned())),
|
Token::Small(inner) => Token::Small(Box::new(inner.owned())),
|
||||||
Token::Big(inner) => Token::Big(Box::new(inner.owned())),
|
|
||||||
Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.owned())),
|
Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.owned())),
|
||||||
Token::Bold(inner) => Token::Bold(Box::new(inner.owned())),
|
Token::Bold(inner) => Token::Bold(Box::new(inner.owned())),
|
||||||
Token::Italic(inner) => Token::Italic(Box::new(inner.owned())),
|
Token::Italic(inner) => Token::Italic(Box::new(inner.owned())),
|
||||||
|
@ -180,7 +178,6 @@ impl Token<'_> {
|
||||||
}
|
}
|
||||||
Token::Quote(inner) => Token::Quote(Box::new(inner.merged())),
|
Token::Quote(inner) => Token::Quote(Box::new(inner.merged())),
|
||||||
Token::Small(inner) => Token::Small(Box::new(inner.merged())),
|
Token::Small(inner) => Token::Small(Box::new(inner.merged())),
|
||||||
Token::Big(inner) => Token::Big(Box::new(inner.merged())),
|
|
||||||
Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.merged())),
|
Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.merged())),
|
||||||
Token::Bold(inner) => Token::Bold(Box::new(inner.merged())),
|
Token::Bold(inner) => Token::Bold(Box::new(inner.merged())),
|
||||||
Token::Italic(inner) => Token::Italic(Box::new(inner.merged())),
|
Token::Italic(inner) => Token::Italic(Box::new(inner.merged())),
|
||||||
|
@ -228,11 +225,19 @@ fn boxing_token<'a>(func: impl Fn(Box<Token<'a>>) -> Token<'a>) -> impl Fn(Token
|
||||||
move |tokens| func(Box::new(tokens))
|
move |tokens| func(Box::new(tokens))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn collect_sequence<'a, T>(
|
||||||
|
func: impl Fn(Vec<T>) -> Token<'a>,
|
||||||
|
transform: impl Fn(Token<'a>) -> Token<'a>,
|
||||||
|
) -> impl Fn(&mut dyn Iterator<Item = T>) -> Token<'a> {
|
||||||
|
move |tokens| transform(func(tokens.collect()))
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn collect_char_sequence<'a>(
|
fn collect_char_sequence<'a>(
|
||||||
func: impl Fn(Cow<'a, str>) -> Token<'a>,
|
func: impl Fn(Cow<'a, str>) -> Token<'a>,
|
||||||
) -> impl Fn(Vec<char>) -> Token<'a> {
|
) -> impl Fn(&mut dyn Iterator<Item = char>) -> Token<'a> {
|
||||||
move |chars| func(Cow::Owned(chars.into_iter().collect()))
|
move |chars| func(Cow::Owned(chars.collect()))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn spliced<'a>(
|
fn spliced<'a>(
|
||||||
|
@ -306,6 +311,42 @@ fn space(input: Span) -> IResult<Span, Token> {
|
||||||
Ok((input, Token::PlainText(frag.into_fragment().into())))
|
Ok((input, Token::PlainText(frag.into_fragment().into())))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct Matcher<'a, 'b, T> {
|
||||||
|
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
|
||||||
|
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
|
||||||
|
_phantom_closure: std::marker::PhantomData<&'a ()>,
|
||||||
|
_phantom_data: std::marker::PhantomData<&'b ()>,
|
||||||
|
_phantom_output: std::marker::PhantomData<fn() -> T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, 'b, T> Matcher<'a, 'b, T> {
|
||||||
|
fn new(
|
||||||
|
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
|
||||||
|
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
matcher_inner,
|
||||||
|
collector,
|
||||||
|
_phantom_closure: std::marker::PhantomData,
|
||||||
|
_phantom_data: std::marker::PhantomData,
|
||||||
|
_phantom_output: std::marker::PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, 'b> Matcher<'a, 'b, Infallible> {
|
||||||
|
// Don't break this invariant, else a monster will come at night and eat all your socks
|
||||||
|
fn reject() -> Self {
|
||||||
|
Self {
|
||||||
|
matcher_inner: &fail::<_, Infallible, _>,
|
||||||
|
collector: &|_| unreachable!(),
|
||||||
|
_phantom_closure: std::marker::PhantomData,
|
||||||
|
_phantom_data: std::marker::PhantomData,
|
||||||
|
_phantom_output: std::marker::PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
struct Context;
|
struct Context;
|
||||||
|
|
||||||
impl Context {
|
impl Context {
|
||||||
|
@ -477,13 +518,9 @@ impl Context {
|
||||||
|
|
||||||
let (input, (center_seq, _)) = many_till(
|
let (input, (center_seq, _)) = many_till(
|
||||||
self.partial(Self::inline_single),
|
self.partial(Self::inline_single),
|
||||||
tuple((opt(line_ending), tag_end)),
|
tuple((opt(space1), opt(line_ending), tag_end)),
|
||||||
)(input)?;
|
)(input)?;
|
||||||
|
|
||||||
let (input, _) = many0(space)(input)?;
|
|
||||||
let (input, _) = not(not(line_ending))(input)?;
|
|
||||||
let (input, _) = opt(line_ending)(input)?;
|
|
||||||
|
|
||||||
Ok((
|
Ok((
|
||||||
input,
|
input,
|
||||||
boxing_token(Token::Center)(Token::Sequence(center_seq)),
|
boxing_token(Token::Center)(Token::Sequence(center_seq)),
|
||||||
|
@ -560,23 +597,21 @@ impl Context {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn tag_delimited<'a, 'b: 'a, T>(
|
fn tag_delimited<'a, 'b: 'a, T, S>(
|
||||||
&'a self,
|
&'a self,
|
||||||
start: &'b str,
|
opening_tag: impl Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
|
||||||
end: &'b str,
|
closing_tag: impl Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
|
||||||
escape: bool,
|
escape: bool,
|
||||||
matcher_inner: impl Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a,
|
matcher: Matcher<'a, 'b, T>,
|
||||||
matcher_inner_fallback: impl Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a,
|
fallback: Matcher<'a, 'b, S>,
|
||||||
collector: impl Fn(Vec<T>) -> Token<'b> + 'a,
|
|
||||||
mapper: impl Fn(Token<'b>) -> Token<'b> + 'a,
|
|
||||||
) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token<'b>> + '_ {
|
) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token<'b>> + '_ {
|
||||||
move |input| {
|
move |input| {
|
||||||
let opening_tag = &tag(start);
|
|
||||||
let closing_tag = &tag(end);
|
|
||||||
|
|
||||||
if escape {
|
if escape {
|
||||||
if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), opening_tag))(input) {
|
if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), &opening_tag))(input) {
|
||||||
return Ok((input_escaped, Token::PlainText(Cow::Borrowed(&mark))));
|
return Ok((
|
||||||
|
input_escaped,
|
||||||
|
Token::PlainText(Cow::Borrowed(mark.fragment())),
|
||||||
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -584,8 +619,8 @@ impl Context {
|
||||||
let (post_open, _) = opening_tag(input)?;
|
let (post_open, _) = opening_tag(input)?;
|
||||||
|
|
||||||
let res = tuple((
|
let res = tuple((
|
||||||
many1(tuple((not(closing_tag), &matcher_inner))),
|
many1(tuple((not(&closing_tag), &matcher.matcher_inner))),
|
||||||
closing_tag,
|
&closing_tag,
|
||||||
))(post_open);
|
))(post_open);
|
||||||
|
|
||||||
if let Err(nom::Err::Error(nom::error::Error {
|
if let Err(nom::Err::Error(nom::error::Error {
|
||||||
|
@ -594,8 +629,8 @@ impl Context {
|
||||||
})) = res
|
})) = res
|
||||||
{
|
{
|
||||||
let res_fallback = tuple((
|
let res_fallback = tuple((
|
||||||
many1(tuple((not(closing_tag), &matcher_inner_fallback))),
|
many1(tuple((not(&closing_tag), &fallback.matcher_inner))),
|
||||||
closing_tag,
|
&closing_tag,
|
||||||
))(post_open);
|
))(post_open);
|
||||||
|
|
||||||
if res_fallback.is_err() {
|
if res_fallback.is_err() {
|
||||||
|
@ -606,22 +641,22 @@ impl Context {
|
||||||
}
|
}
|
||||||
|
|
||||||
let (input, (inner, closing)) = res_fallback.unwrap();
|
let (input, (inner, closing)) = res_fallback.unwrap();
|
||||||
let inner = inner.into_iter().map(|(_, t)| t).collect::<Vec<_>>();
|
let mut inner = inner.into_iter().map(|(_, t)| t);
|
||||||
|
|
||||||
return Ok((
|
return Ok((
|
||||||
input,
|
input,
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::PlainText(begin.fragment_between(&post_open).into()),
|
Token::PlainText(begin.fragment_between(&post_open).into()),
|
||||||
collector(inner),
|
((fallback.collector)(&mut inner)),
|
||||||
Token::PlainText(closing.into_fragment().into()),
|
Token::PlainText(closing.into_fragment().into()),
|
||||||
]),
|
]),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
let (input, (inner, _)) = res?;
|
let (input, (inner, _)) = res?;
|
||||||
let inner = inner.into_iter().map(|(_, t)| t).collect::<Vec<_>>();
|
let mut inner = inner.into_iter().map(|(_, t)| t);
|
||||||
|
|
||||||
Ok((input, mapper(collector(inner))))
|
Ok((input, (matcher.collector)(&mut inner)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -691,176 +726,230 @@ impl Context {
|
||||||
|
|
||||||
fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
"<small>",
|
tag("<small>"),
|
||||||
"</small>",
|
tag("</small>"),
|
||||||
false,
|
false,
|
||||||
self.partial(Self::inline_single),
|
Matcher::new(
|
||||||
self.partial(Self::inline_non_formatting_single),
|
&self.partial(Self::inline_single),
|
||||||
Token::Sequence,
|
&collect_sequence(Token::Sequence, boxing_token(Token::Small)),
|
||||||
boxing_token(Token::Small),
|
),
|
||||||
|
Matcher::new(
|
||||||
|
&self.partial(Self::inline_non_formatting_single),
|
||||||
|
&collect_sequence(Token::Sequence, identity),
|
||||||
|
),
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: CommonMark flanking rules
|
// TODO: CommonMark flanking rules
|
||||||
fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
"***",
|
tag("***"),
|
||||||
"***",
|
tag("***"),
|
||||||
true,
|
true,
|
||||||
self.partial(Self::inline_single),
|
Matcher::new(
|
||||||
self.partial(Self::inline_non_formatting_single),
|
&self.partial(Self::inline_single),
|
||||||
Token::Sequence,
|
&collect_sequence(Token::Sequence, boxing_token(Token::BoldItalic)),
|
||||||
boxing_token(Token::BoldItalic),
|
),
|
||||||
|
Matcher::new(
|
||||||
|
&self.partial(Self::inline_non_formatting_single),
|
||||||
|
&collect_sequence(Token::Sequence, identity),
|
||||||
|
),
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: CommonMark flanking rules
|
// TODO: CommonMark flanking rules
|
||||||
fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
"___",
|
tag("___"),
|
||||||
"___",
|
tag("___"),
|
||||||
true,
|
true,
|
||||||
self.partial(Self::inline_single),
|
Matcher::new(
|
||||||
self.partial(Self::inline_non_formatting_single),
|
&self.partial(Self::inline_single),
|
||||||
Token::Sequence,
|
&collect_sequence(Token::Sequence, boxing_token(Token::BoldItalic)),
|
||||||
boxing_token(Token::BoldItalic),
|
),
|
||||||
|
Matcher::new(
|
||||||
|
&self.partial(Self::inline_non_formatting_single),
|
||||||
|
&collect_sequence(Token::Sequence, identity),
|
||||||
|
),
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
"<b>",
|
tag("<b>"),
|
||||||
"</b>",
|
tag("</b>"),
|
||||||
false,
|
false,
|
||||||
self.partial(Self::inline_single),
|
Matcher::new(
|
||||||
self.partial(Self::inline_non_formatting_single),
|
&self.partial(Self::inline_single),
|
||||||
Token::Sequence,
|
&collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
|
||||||
boxing_token(Token::Bold),
|
),
|
||||||
|
Matcher::new(
|
||||||
|
&self.partial(Self::inline_non_formatting_single),
|
||||||
|
&collect_sequence(Token::Sequence, identity),
|
||||||
|
),
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: CommonMark flanking rules
|
// TODO: CommonMark flanking rules
|
||||||
fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
"**",
|
tag("**"),
|
||||||
"**",
|
tag("**"),
|
||||||
true,
|
true,
|
||||||
self.partial(Self::inline_single),
|
Matcher::new(
|
||||||
self.partial(Self::inline_non_formatting_single),
|
&self.partial(Self::inline_single),
|
||||||
Token::Sequence,
|
&collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
|
||||||
boxing_token(Token::Bold),
|
),
|
||||||
|
Matcher::new(
|
||||||
|
&self.partial(Self::inline_non_formatting_single),
|
||||||
|
&collect_sequence(Token::Sequence, identity),
|
||||||
|
),
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: CommonMark flanking rules
|
// TODO: CommonMark flanking rules
|
||||||
fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
"__",
|
tag("__"),
|
||||||
"__",
|
tag("__"),
|
||||||
true,
|
true,
|
||||||
self.partial(Self::inline_single),
|
Matcher::new(
|
||||||
self.partial(Self::inline_non_formatting_single),
|
&self.partial(Self::inline_single),
|
||||||
Token::Sequence,
|
&collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
|
||||||
boxing_token(Token::Bold),
|
),
|
||||||
|
Matcher::new(
|
||||||
|
&self.partial(Self::inline_non_formatting_single),
|
||||||
|
&collect_sequence(Token::Sequence, identity),
|
||||||
|
),
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
"<i>",
|
tag("<i>"),
|
||||||
"</i>",
|
tag("</i>"),
|
||||||
false,
|
false,
|
||||||
self.partial(Self::inline_single),
|
Matcher::new(
|
||||||
self.partial(Self::inline_non_formatting_single),
|
&self.partial(Self::inline_single),
|
||||||
Token::Sequence,
|
&collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
|
||||||
boxing_token(Token::Italic),
|
),
|
||||||
|
Matcher::new(
|
||||||
|
&self.partial(Self::inline_non_formatting_single),
|
||||||
|
&collect_sequence(Token::Sequence, identity),
|
||||||
|
),
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: CommonMark flanking rules
|
// TODO: CommonMark flanking rules
|
||||||
fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
"*",
|
tag("*"),
|
||||||
"*",
|
tag("*"),
|
||||||
true,
|
true,
|
||||||
self.partial(Self::inline_single),
|
Matcher::new(
|
||||||
self.partial(Self::inline_non_formatting_single),
|
&self.partial(Self::inline_single),
|
||||||
Token::Sequence,
|
&collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
|
||||||
boxing_token(Token::Italic),
|
),
|
||||||
|
Matcher::new(
|
||||||
|
&self.partial(Self::inline_non_formatting_single),
|
||||||
|
&collect_sequence(Token::Sequence, identity),
|
||||||
|
),
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: CommonMark flanking rules
|
// TODO: CommonMark flanking rules
|
||||||
fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
"_",
|
tag("_"),
|
||||||
"_",
|
tag("_"),
|
||||||
true,
|
true,
|
||||||
self.partial(Self::inline_single),
|
Matcher::new(
|
||||||
self.partial(Self::inline_non_formatting_single),
|
&self.partial(Self::inline_single),
|
||||||
Token::Sequence,
|
&collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
|
||||||
boxing_token(Token::Italic),
|
),
|
||||||
|
Matcher::new(
|
||||||
|
&self.partial(Self::inline_non_formatting_single),
|
||||||
|
&collect_sequence(Token::Sequence, identity),
|
||||||
|
),
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
"<s>",
|
tag("<s>"),
|
||||||
"</s>",
|
tag("</s>"),
|
||||||
false,
|
false,
|
||||||
self.partial(Self::inline_single),
|
Matcher::new(
|
||||||
self.partial(Self::inline_non_formatting_single),
|
&self.partial(Self::inline_single),
|
||||||
Token::Sequence,
|
&collect_sequence(Token::Sequence, boxing_token(Token::Strikethrough)),
|
||||||
boxing_token(Token::Strikethrough),
|
),
|
||||||
|
Matcher::new(
|
||||||
|
&self.partial(Self::inline_non_formatting_single),
|
||||||
|
&collect_sequence(Token::Sequence, identity),
|
||||||
|
),
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: CommonMark flanking rules
|
// TODO: CommonMark flanking rules
|
||||||
fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
"~~",
|
tag("~~"),
|
||||||
"~~",
|
tag("~~"),
|
||||||
true,
|
true,
|
||||||
move |input| {
|
Matcher::new(
|
||||||
tuple((not_line_ending, self.partial(Self::inline_single)))(input)
|
&move |input| {
|
||||||
.map(|(i, t)| (i, t.1))
|
map(
|
||||||
|
tuple(((not(line_ending)), self.partial(Self::inline_single))),
|
||||||
|
|(_, captured)| captured,
|
||||||
|
)(input)
|
||||||
},
|
},
|
||||||
move |input| {
|
&collect_sequence(Token::Sequence, boxing_token(Token::Strikethrough)),
|
||||||
|
),
|
||||||
|
Matcher::new(
|
||||||
|
&move |input| {
|
||||||
|
map(
|
||||||
tuple((
|
tuple((
|
||||||
not_line_ending,
|
(not(line_ending)),
|
||||||
self.partial(Self::inline_non_formatting_single),
|
self.partial(Self::inline_non_formatting_single),
|
||||||
))(input)
|
)),
|
||||||
.map(|(i, t)| (i, t.1))
|
|(_, captured)| captured,
|
||||||
|
)(input)
|
||||||
},
|
},
|
||||||
Token::Sequence,
|
&collect_sequence(Token::Sequence, identity),
|
||||||
boxing_token(Token::Strikethrough),
|
),
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
"`",
|
tag("`"),
|
||||||
"",
|
|input| alt((tag("`"), tag("´")))(input),
|
||||||
true,
|
true,
|
||||||
move |input| {
|
Matcher::new(
|
||||||
tuple((not(alt((tag("`"), tag("´"), line_ending))), anychar))(input)
|
&move |input| {
|
||||||
.map(|(i, (_skip, c))| (i, c))
|
map(
|
||||||
|
tuple((not(alt((tag("`"), tag("´"), line_ending))), anychar)),
|
||||||
|
|(_, captured)| captured,
|
||||||
|
)(input)
|
||||||
},
|
},
|
||||||
fail,
|
&collect_char_sequence(Token::InlineCode),
|
||||||
collect_char_sequence(Token::InlineCode),
|
),
|
||||||
identity,
|
Matcher::reject(),
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
"\\(",
|
tag("\\("),
|
||||||
"\\)",
|
tag("\\)"),
|
||||||
false,
|
false,
|
||||||
move |input| tuple((not(line_ending), anychar))(input).map(|(i, (_skip, c))| (i, c)),
|
Matcher::new(
|
||||||
fail,
|
&move |input| {
|
||||||
collect_char_sequence(Token::InlineMath),
|
map(tuple((not(line_ending), anychar)), |(_, captured)| captured)(input)
|
||||||
identity,
|
},
|
||||||
|
&collect_char_sequence(Token::InlineMath),
|
||||||
|
),
|
||||||
|
Matcher::reject(),
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -925,6 +1014,8 @@ impl Context {
|
||||||
return fail(input);
|
return fail(input);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let grapheme = grapheme.trim_end_matches(|c| c == '\u{200c}' || c == '\u{200d}');
|
||||||
|
|
||||||
let emoji = emojis::get(grapheme);
|
let emoji = emojis::get(grapheme);
|
||||||
|
|
||||||
if emoji.is_none() {
|
if emoji.is_none() {
|
||||||
|
@ -1059,10 +1150,13 @@ fn url_chars<'a, T: 'a>(
|
||||||
mod test {
|
mod test {
|
||||||
use crate::{url_chars, Context, Span, Token};
|
use crate::{url_chars, Context, Span, Token};
|
||||||
use nom::bytes::complete::tag;
|
use nom::bytes::complete::tag;
|
||||||
use nom::multi::many1;
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
fn parse_full(string: &str) -> Token {
|
||||||
|
Context.full(Span::new(string)).unwrap().1.merged().owned()
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_url_chars() {
|
fn parse_url_chars() {
|
||||||
let test1 = "https://en.wikipedia.org/wiki/Sandbox_(computer_security))";
|
let test1 = "https://en.wikipedia.org/wiki/Sandbox_(computer_security))";
|
||||||
|
@ -1111,9 +1205,92 @@ mod test {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_formatting() {
|
||||||
|
assert_eq!(
|
||||||
|
Token::Strikethrough(Box::new(Token::PlainText("stikethrough".into()))),
|
||||||
|
parse_full(r#"~~stikethrough~~"#)
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
Token::Bold(Box::new(Token::PlainText("bold".into()))),
|
||||||
|
parse_full(r#"**bold**"#)
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
Token::Italic(Box::new(Token::PlainText("italic".into()))),
|
||||||
|
parse_full(r#"*italic*"#)
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
Token::Sequence(vec![
|
||||||
|
Token::PlainText("not code ".into()),
|
||||||
|
Token::InlineCode("code".into()),
|
||||||
|
Token::PlainText(" also not code".into())
|
||||||
|
]),
|
||||||
|
parse_full(r#"not code `code` also not code"#)
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
Token::Sequence(vec![
|
||||||
|
Token::PlainText("not code ".into()),
|
||||||
|
Token::InlineCode("code".into()),
|
||||||
|
Token::PlainText(" also `not code".into())
|
||||||
|
]),
|
||||||
|
parse_full(r#"not code `code` also `not code"#)
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
Token::Sequence(vec![
|
||||||
|
Token::PlainText("not code ".into()),
|
||||||
|
Token::InlineCode("*not bold*".into()),
|
||||||
|
Token::PlainText(" also not code".into())
|
||||||
|
]),
|
||||||
|
parse_full(r#"not code `*not bold*` also not code"#)
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
Token::BoldItalic(Box::new(Token::PlainText("bold italic".into()))),
|
||||||
|
parse_full(r#"***bold italic***"#)
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
|
||||||
|
"bold italic".into()
|
||||||
|
))))),
|
||||||
|
parse_full(r#"<b><i>bold italic</i></b>"#)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_complex() {
|
fn parse_complex() {
|
||||||
let emoji = r#"$[x2 $[sparkle 🥺]💜$[spin.y,speed=5s ❤️]🦊]"#;
|
assert_eq!(
|
||||||
|
Token::Center(Box::new(Token::Sequence(vec![
|
||||||
|
Token::PlainText("centered\n".into()),
|
||||||
|
Token::UnicodeEmoji("🦋".into()),
|
||||||
|
Token::UnicodeEmoji("🏳️⚧️".into()),
|
||||||
|
Token::PlainText("\ntext".into())
|
||||||
|
]))),
|
||||||
|
parse_full(
|
||||||
|
r#"<center>centered
|
||||||
|
🦋🏳️⚧️
|
||||||
|
text</center>"#
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
Token::Quote(Box::new(Token::Center(Box::new(Token::Sequence(vec![
|
||||||
|
Token::PlainText("centered\n".into()),
|
||||||
|
Token::UnicodeEmoji("👩🏽🤝👩🏼".into()),
|
||||||
|
Token::PlainText("\ntext".into())
|
||||||
|
]))))),
|
||||||
|
parse_full(
|
||||||
|
r#"> <center>centered
|
||||||
|
> 👩🏽🤝👩🏼
|
||||||
|
> text</center>"#
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
Token::Function {
|
Token::Function {
|
||||||
name: "x2".into(),
|
name: "x2".into(),
|
||||||
|
@ -1138,21 +1315,7 @@ mod test {
|
||||||
Token::UnicodeEmoji("🦊".into()),
|
Token::UnicodeEmoji("🦊".into()),
|
||||||
]))
|
]))
|
||||||
},
|
},
|
||||||
Context.full(Span::new(emoji)).unwrap().1.merged()
|
parse_full(r#"$[x2 $[sparkle 🥺]💜$[spin.y,speed=5s ❤️]🦊]"#)
|
||||||
);
|
|
||||||
|
|
||||||
let bold_italic = r#"***bold italic***"#;
|
|
||||||
assert_eq!(
|
|
||||||
Token::BoldItalic(Box::new(Token::PlainText("bold italic".into()))),
|
|
||||||
Context.full(Span::new(bold_italic)).unwrap().1.merged()
|
|
||||||
);
|
|
||||||
|
|
||||||
let bold_italic_tag = r#"<b><i>bold italic</i></b>"#;
|
|
||||||
assert_eq!(
|
|
||||||
Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
|
|
||||||
"bold italic".into()
|
|
||||||
))))),
|
|
||||||
Context.full(Span::new(bold_italic_tag)).unwrap().1.merged()
|
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
@ -1178,37 +1341,67 @@ mod test {
|
||||||
.merged()
|
.merged()
|
||||||
);
|
);
|
||||||
|
|
||||||
let quote = r#"
|
|
||||||
> test
|
|
||||||
> <i>
|
|
||||||
> italic
|
|
||||||
> </i>
|
|
||||||
>> Nested quote
|
|
||||||
"#;
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
Token::Quote(Box::new(Token::Sequence(vec![
|
Token::Quote(Box::new(Token::Sequence(vec![
|
||||||
Token::PlainText("test\n".into()),
|
Token::PlainText("test\n".into()),
|
||||||
Token::Italic(Box::new(Token::PlainText("\nitalic\n".into()))),
|
Token::Italic(Box::new(Token::PlainText("\nitalic\n".into()))),
|
||||||
Token::Quote(Box::new(Token::PlainText("Nested quote".into())))
|
Token::Quote(Box::new(Token::PlainText("Nested quote".into())))
|
||||||
]))),
|
]))),
|
||||||
Context.full(Span::new(quote)).unwrap().1.merged()
|
parse_full(
|
||||||
|
r#"
|
||||||
|
> test
|
||||||
|
> <i>
|
||||||
|
> italic
|
||||||
|
> </i>
|
||||||
|
>> Nested quote
|
||||||
|
"#
|
||||||
|
)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_emoji() {
|
fn parse_emoji() {
|
||||||
let test = "🥺💜❤️🦊";
|
|
||||||
let ctx = Context;
|
|
||||||
let tokens = many1(ctx.partial(Context::unicode_emoji))(Span::from(test)).unwrap();
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
Token::Sequence(
|
||||||
vec!["🥺", "💜", "❤️", "🦊"]
|
vec!["🥺", "💜", "❤️", "🦊"]
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(<&str as Into<Cow<_>>>::into)
|
.map(<&str as Into<Cow<_>>>::into)
|
||||||
.map(Token::UnicodeEmoji)
|
.map(Token::UnicodeEmoji)
|
||||||
.collect::<Vec<_>>(),
|
.collect::<Vec<_>>()
|
||||||
tokens.1
|
),
|
||||||
|
parse_full("🥺💜❤️🦊")
|
||||||
|
);
|
||||||
|
|
||||||
|
// Trans flag, ZWJ
|
||||||
|
assert_eq!(
|
||||||
|
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}".into()),
|
||||||
|
parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}")
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
Token::Sequence(vec![
|
||||||
|
Token::PlainText("\u{0200d}".into()), // ZWJ
|
||||||
|
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
|
||||||
|
]),
|
||||||
|
parse_full("\u{0200d}\u{1f3f3}\u{0fe0f}")
|
||||||
|
);
|
||||||
|
|
||||||
|
// Trans flag, ZWNJ
|
||||||
|
assert_eq!(
|
||||||
|
Token::Sequence(vec![
|
||||||
|
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
|
||||||
|
Token::PlainText("\u{0200c}".into()), // ZWNJ
|
||||||
|
Token::UnicodeEmoji("\u{026a7}\u{0fe0f}".into()) // Trans symbol
|
||||||
|
]),
|
||||||
|
parse_full("\u{1f3f3}\u{0fe0f}\u{0200c}\u{026a7}\u{0fe0f}")
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
Token::Sequence(vec![
|
||||||
|
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
|
||||||
|
Token::PlainText("\u{0200d}\u{0200d}\u{0200d}".into()), // ZWJ
|
||||||
|
]),
|
||||||
|
parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{0200d}\u{0200d}")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue