1554 lines
51 KiB
Rust
1554 lines
51 KiB
Rust
use either::Either;
|
||
use nom::branch::alt;
|
||
use nom::bytes::complete::tag;
|
||
use nom::character::complete::{
|
||
alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of, space1,
|
||
tab,
|
||
};
|
||
use nom::combinator::{eof, fail, map, not, opt, recognize};
|
||
use nom::error::ErrorKind;
|
||
use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
|
||
use nom::sequence::tuple;
|
||
use nom::{IResult, Offset, Slice};
|
||
use nom_locate::LocatedSpan;
|
||
use std::borrow::Cow;
|
||
use std::collections::HashMap;
|
||
use std::convert::{identity, Infallible};
|
||
use unicode_segmentation::UnicodeSegmentation;
|
||
|
||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||
pub enum MentionType {
|
||
Community,
|
||
User,
|
||
}
|
||
|
||
impl MentionType {
|
||
pub fn to_char(&self) -> char {
|
||
match self {
|
||
MentionType::Community => '!',
|
||
MentionType::User => '@',
|
||
}
|
||
}
|
||
}
|
||
|
||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||
pub enum Token<'a> {
|
||
PlainText(Cow<'a, str>),
|
||
Sequence(Vec<Token<'a>>),
|
||
Quote(Box<Token<'a>>),
|
||
Small(Box<Token<'a>>),
|
||
BoldItalic(Box<Token<'a>>),
|
||
Bold(Box<Token<'a>>),
|
||
Italic(Box<Token<'a>>),
|
||
Center(Box<Token<'a>>),
|
||
Strikethrough(Box<Token<'a>>),
|
||
PlainTag(Cow<'a, str>),
|
||
InlineCode(Cow<'a, str>),
|
||
InlineMath(Cow<'a, str>),
|
||
UrlRaw(Cow<'a, str>),
|
||
UrlNoEmbed(Cow<'a, str>),
|
||
Link {
|
||
label: Box<Token<'a>>,
|
||
href: Cow<'a, str>,
|
||
embed: bool,
|
||
},
|
||
BlockCode {
|
||
lang: Option<Cow<'a, str>>,
|
||
inner: Cow<'a, str>,
|
||
},
|
||
BlockMath(Cow<'a, str>),
|
||
Function {
|
||
name: Cow<'a, str>,
|
||
params: HashMap<Cow<'a, str>, Option<Cow<'a, str>>>,
|
||
inner: Box<Token<'a>>,
|
||
},
|
||
Mention {
|
||
name: Cow<'a, str>,
|
||
host: Option<Cow<'a, str>>,
|
||
mention_type: MentionType,
|
||
},
|
||
UnicodeEmoji(Cow<'a, str>),
|
||
ShortcodeEmoji(Cow<'a, str>),
|
||
Hashtag(Cow<'a, str>),
|
||
}
|
||
|
||
impl Token<'_> {
|
||
fn owned(&self) -> Token<'static> {
|
||
match self {
|
||
Token::PlainText(text) => Token::PlainText(Cow::Owned(text.clone().into_owned())),
|
||
Token::Sequence(tokens) => Token::Sequence(tokens.iter().map(Token::owned).collect()),
|
||
Token::Quote(inner) => Token::Quote(Box::new(inner.owned())),
|
||
Token::Small(inner) => Token::Small(Box::new(inner.owned())),
|
||
Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.owned())),
|
||
Token::Bold(inner) => Token::Bold(Box::new(inner.owned())),
|
||
Token::Italic(inner) => Token::Italic(Box::new(inner.owned())),
|
||
Token::Center(inner) => Token::Center(Box::new(inner.owned())),
|
||
Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.owned())),
|
||
Token::PlainTag(tag) => Token::PlainTag(Cow::Owned(tag.clone().into_owned())),
|
||
Token::InlineCode(code) => Token::InlineCode(Cow::Owned(code.clone().into_owned())),
|
||
Token::InlineMath(math) => Token::InlineMath(Cow::Owned(math.clone().into_owned())),
|
||
Token::UrlRaw(url) => Token::UrlRaw(Cow::Owned(url.clone().into_owned())),
|
||
Token::UrlNoEmbed(url) => Token::UrlNoEmbed(Cow::Owned(url.clone().into_owned())),
|
||
Token::Link { embed, label, href } => Token::Link {
|
||
embed: *embed,
|
||
label: Box::new(label.owned()),
|
||
href: Cow::Owned(href.clone().into_owned()),
|
||
},
|
||
Token::BlockCode { inner, lang } => Token::BlockCode {
|
||
lang: lang.as_ref().map(|l| Cow::Owned(l.clone().into_owned())),
|
||
inner: Cow::Owned(inner.clone().into_owned()),
|
||
},
|
||
Token::BlockMath(math) => Token::BlockMath(Cow::Owned(math.clone().into_owned())),
|
||
Token::Function {
|
||
name,
|
||
params,
|
||
inner,
|
||
} => Token::Function {
|
||
name: Cow::Owned(name.clone().into_owned()),
|
||
params: params
|
||
.iter()
|
||
.map(|(k, v)| {
|
||
(
|
||
Cow::Owned(k.clone().into_owned()),
|
||
v.as_ref().map(|val| Cow::Owned(val.clone().into_owned())),
|
||
)
|
||
})
|
||
.collect(),
|
||
inner: Box::new(inner.owned()),
|
||
},
|
||
Token::Mention {
|
||
name,
|
||
host,
|
||
mention_type,
|
||
} => Token::Mention {
|
||
name: Cow::Owned(name.clone().into_owned()),
|
||
host: host.as_ref().map(|v| Cow::Owned(v.clone().into_owned())),
|
||
mention_type: *mention_type,
|
||
},
|
||
Token::UnicodeEmoji(code) => Token::UnicodeEmoji(Cow::Owned(code.clone().into_owned())),
|
||
Token::ShortcodeEmoji(shortcode) => {
|
||
Token::ShortcodeEmoji(Cow::Owned(shortcode.clone().into_owned()))
|
||
}
|
||
Token::Hashtag(url) => Token::Hashtag(Cow::Owned(url.clone().into_owned())),
|
||
}
|
||
}
|
||
|
||
fn merged(&self) -> Token {
|
||
match self {
|
||
Token::Sequence(tokens) => {
|
||
let tokens_multi = tokens.iter().fold(Vec::new(), |mut acc, tok| {
|
||
if let Some(Token::PlainText(last)) = acc.last_mut() {
|
||
if let Token::PlainText(tok_text) = tok {
|
||
*last = Cow::from(last.to_string() + tok_text.as_ref());
|
||
|
||
return acc;
|
||
}
|
||
}
|
||
|
||
if let Token::Sequence(seq) = tok {
|
||
let items = seq.iter().map(Token::merged).flat_map(|t| match t {
|
||
Token::Sequence(seq) => Either::Left(seq.into_iter()),
|
||
other => Either::Right(std::iter::once(other)),
|
||
});
|
||
|
||
for item in items {
|
||
if let Some(Token::PlainText(last)) = acc.last_mut() {
|
||
if let Token::PlainText(tok_text) = item {
|
||
*last = Cow::from(last.to_string() + tok_text.as_ref());
|
||
|
||
continue;
|
||
}
|
||
}
|
||
|
||
acc.push(item);
|
||
}
|
||
|
||
return acc;
|
||
}
|
||
|
||
acc.push(tok.merged());
|
||
acc
|
||
});
|
||
|
||
if tokens_multi.len() == 1 {
|
||
return tokens_multi.into_iter().next().unwrap();
|
||
}
|
||
|
||
Token::Sequence(tokens_multi)
|
||
}
|
||
Token::Quote(inner) => Token::Quote(Box::new(inner.merged())),
|
||
Token::Small(inner) => Token::Small(Box::new(inner.merged())),
|
||
Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.merged())),
|
||
Token::Bold(inner) => Token::Bold(Box::new(inner.merged())),
|
||
Token::Italic(inner) => Token::Italic(Box::new(inner.merged())),
|
||
Token::Center(inner) => Token::Center(Box::new(inner.merged())),
|
||
Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.merged())),
|
||
Token::Link { embed, label, href } => Token::Link {
|
||
label: Box::new(label.merged()),
|
||
href: href.clone(),
|
||
embed: *embed,
|
||
},
|
||
Token::Function {
|
||
name,
|
||
params,
|
||
inner,
|
||
} => Token::Function {
|
||
name: name.clone(),
|
||
params: params.clone(),
|
||
inner: Box::new(inner.merged()),
|
||
},
|
||
other => other.clone(),
|
||
}
|
||
}
|
||
}
|
||
|
||
type Span<'a> = LocatedSpan<&'a str>;
|
||
|
||
trait SliceOffset {
|
||
fn up_to(&self, other: &Self) -> Self;
|
||
|
||
fn fragment_between<'a>(&self, other: &Self) -> &'a str
|
||
where
|
||
Self: 'a;
|
||
}
|
||
|
||
impl SliceOffset for Span<'_> {
|
||
fn up_to(&self, other: &Self) -> Self {
|
||
self.slice(..self.offset(other))
|
||
}
|
||
|
||
fn fragment_between<'a>(&self, other: &Self) -> &'a str
|
||
where
|
||
Self: 'a,
|
||
{
|
||
self.up_to(other).into_fragment()
|
||
}
|
||
}
|
||
|
||
#[inline]
|
||
fn boxing_token<'a>(func: impl Fn(Box<Token<'a>>) -> Token<'a>) -> impl Fn(Token<'a>) -> Token<'a> {
|
||
move |tokens| func(Box::new(tokens))
|
||
}
|
||
|
||
#[inline]
|
||
fn collect_sequence<'a, T>(
|
||
func: impl Fn(Vec<T>) -> Token<'a>,
|
||
transform: impl Fn(Token<'a>) -> Token<'a>,
|
||
) -> impl Fn(&mut dyn Iterator<Item = T>) -> Token<'a> {
|
||
move |tokens| transform(func(tokens.collect()))
|
||
}
|
||
|
||
#[inline]
|
||
fn collect_char_sequence<'a>(
|
||
func: impl Fn(Cow<'a, str>) -> Token<'a>,
|
||
) -> impl Fn(&mut dyn Iterator<Item = char>) -> Token<'a> {
|
||
move |chars| func(Cow::Owned(chars.collect()))
|
||
}
|
||
|
||
fn spliced<'a>(
|
||
segments: &[Span<'a>],
|
||
func: impl Fn(Span) -> IResult<Span, Token>,
|
||
parent: Span<'a>,
|
||
) -> IResult<Span<'a>, Token<'static>, nom::error::Error<Span<'a>>> {
|
||
let combined = segments
|
||
.iter()
|
||
.copied()
|
||
.map(Span::into_fragment)
|
||
.collect::<Vec<_>>()
|
||
.join("\n");
|
||
let cum_offset_combined = segments
|
||
.iter()
|
||
.scan(0, |acc, &x| {
|
||
*acc += x.len();
|
||
Some(*acc)
|
||
})
|
||
.collect::<Vec<_>>();
|
||
let current_seg = |input: Span| {
|
||
cum_offset_combined
|
||
.iter()
|
||
.enumerate()
|
||
.take_while(|(_, &o)| o > input.location_offset())
|
||
.map(|(i, o)| (segments[i], o))
|
||
.last()
|
||
};
|
||
|
||
type NE<E> = nom::Err<E>;
|
||
type NomError<'x> = nom::error::Error<Span<'x>>;
|
||
|
||
let quote_span = Span::new(&combined);
|
||
let (input, inner) = match func(quote_span) {
|
||
Ok((input, token)) => (input, token.owned()),
|
||
Err(e) => {
|
||
return match e {
|
||
NE::Error(e) => {
|
||
let offset_new = e.input.location_offset();
|
||
if let Some((seg_parent, offset_seg_new)) = current_seg(e.input) {
|
||
let offset = offset_new - offset_seg_new;
|
||
let offset_orig = offset + seg_parent.location_offset();
|
||
Err(NE::Error(NomError::new(
|
||
Span::new(&parent.into_fragment()[offset_orig..]),
|
||
e.code,
|
||
)))
|
||
} else {
|
||
// ???
|
||
Err(NE::Failure(NomError::new(parent, ErrorKind::Fail)))
|
||
}
|
||
}
|
||
NE::Failure(e) => Err(NE::Error(NomError::new(parent, e.code))),
|
||
NE::Incomplete(i) => Err(NE::Incomplete(i)),
|
||
};
|
||
}
|
||
};
|
||
|
||
let out = if let Some((seg_parent, offset_seg_new)) = current_seg(input) {
|
||
let offset = input.location_offset() - offset_seg_new;
|
||
let offset_orig = offset + seg_parent.location_offset();
|
||
parent.slice(offset_orig..)
|
||
} else {
|
||
parent
|
||
};
|
||
|
||
Ok((out, inner.owned()))
|
||
}
|
||
|
||
fn space(input: Span) -> IResult<Span, Token> {
|
||
let (input, frag) = recognize(alt((one_char('\u{0020}'), one_char('\u{3000}'), tab)))(input)?;
|
||
Ok((input, Token::PlainText(frag.into_fragment().into())))
|
||
}
|
||
|
||
struct Matcher<'a, 'b, T> {
|
||
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
|
||
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
|
||
_phantom_closure: std::marker::PhantomData<&'a ()>,
|
||
_phantom_data: std::marker::PhantomData<&'b ()>,
|
||
_phantom_output: std::marker::PhantomData<fn() -> T>,
|
||
}
|
||
|
||
impl<'a, 'b, T> Matcher<'a, 'b, T> {
|
||
fn new(
|
||
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
|
||
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
|
||
) -> Self {
|
||
Self {
|
||
matcher_inner,
|
||
collector,
|
||
_phantom_closure: std::marker::PhantomData,
|
||
_phantom_data: std::marker::PhantomData,
|
||
_phantom_output: std::marker::PhantomData,
|
||
}
|
||
}
|
||
}
|
||
|
||
impl<'a, 'b> Matcher<'a, 'b, Infallible> {
|
||
// Don't break this invariant, else a monster will come at night and eat all your socks
|
||
fn reject() -> Self {
|
||
Self {
|
||
matcher_inner: &fail::<_, Infallible, _>,
|
||
collector: &|_| unreachable!(),
|
||
_phantom_closure: std::marker::PhantomData,
|
||
_phantom_data: std::marker::PhantomData,
|
||
_phantom_output: std::marker::PhantomData,
|
||
}
|
||
}
|
||
}
|
||
|
||
struct Context;
|
||
|
||
impl Context {
|
||
#[inline]
|
||
const fn partial(
|
||
&self,
|
||
func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token<'a>> + 'static,
|
||
) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token<'a>> + '_ {
|
||
move |input| func(self, input)
|
||
}
|
||
|
||
fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
map(many1(self.partial(Self::full_single)), Token::Sequence)(input)
|
||
}
|
||
|
||
fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
map(many1(self.partial(Self::inline_single)), Token::Sequence)(input)
|
||
}
|
||
|
||
fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
map(
|
||
many1(self.partial(Self::inline_label_safe_single)),
|
||
Token::Sequence,
|
||
)(input)
|
||
}
|
||
|
||
fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
alt((
|
||
self.partial(Self::tag_bold_italic_asterisk),
|
||
self.partial(Self::tag_bold_italic_underscore),
|
||
self.partial(Self::tag_bold_asterisk),
|
||
self.partial(Self::tag_italic_asterisk),
|
||
self.partial(Self::tag_bold_underscore),
|
||
self.partial(Self::tag_italic_underscore),
|
||
))(input)
|
||
}
|
||
|
||
fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
let (input, token) = alt((
|
||
self.partial(Self::unicode_emoji),
|
||
alt((
|
||
self.partial(Self::tag_block_center),
|
||
self.partial(Self::tag_small),
|
||
self.partial(Self::tag_plain),
|
||
self.partial(Self::tag_bold),
|
||
self.partial(Self::tag_italic),
|
||
self.partial(Self::tag_strikethrough),
|
||
)),
|
||
self.partial(Self::url_no_embed),
|
||
self.partial(Self::base_bold_italic),
|
||
self.partial(Self::tag_block_code),
|
||
self.partial(Self::tag_inline_code),
|
||
self.partial(Self::tag_quote),
|
||
self.partial(Self::tag_block_math),
|
||
self.partial(Self::tag_inline_math),
|
||
self.partial(Self::tag_strikethrough_tilde),
|
||
self.partial(Self::tag_func),
|
||
self.partial(Self::tag_mention),
|
||
self.partial(Self::tag_hashtag),
|
||
self.partial(Self::shortcode_emoji),
|
||
self.partial(Self::link),
|
||
self.partial(Self::raw_url),
|
||
self.partial(Self::text),
|
||
))(input)?;
|
||
Ok((input, token))
|
||
}
|
||
|
||
fn inline_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
let (input, token) = alt((
|
||
self.partial(Self::unicode_emoji),
|
||
self.partial(Self::tag_small),
|
||
self.partial(Self::tag_plain),
|
||
self.partial(Self::tag_bold),
|
||
self.partial(Self::tag_italic),
|
||
self.partial(Self::tag_strikethrough),
|
||
self.partial(Self::url_no_embed),
|
||
self.partial(Self::base_bold_italic),
|
||
self.partial(Self::tag_inline_code),
|
||
self.partial(Self::tag_inline_math),
|
||
self.partial(Self::tag_strikethrough_tilde),
|
||
self.partial(Self::tag_func),
|
||
self.partial(Self::tag_mention),
|
||
self.partial(Self::tag_hashtag),
|
||
self.partial(Self::shortcode_emoji),
|
||
self.partial(Self::link),
|
||
self.partial(Self::raw_url),
|
||
self.partial(Self::text),
|
||
))(input)?;
|
||
Ok((input, token))
|
||
}
|
||
|
||
fn inline_non_formatting_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
let (input, token) = alt((
|
||
self.partial(Self::unicode_emoji),
|
||
self.partial(Self::url_no_embed),
|
||
self.partial(Self::tag_inline_code),
|
||
self.partial(Self::tag_inline_math),
|
||
self.partial(Self::tag_func),
|
||
self.partial(Self::tag_mention),
|
||
self.partial(Self::tag_hashtag),
|
||
self.partial(Self::shortcode_emoji),
|
||
self.partial(Self::raw_url),
|
||
self.partial(Self::text),
|
||
))(input)?;
|
||
Ok((input, token))
|
||
}
|
||
|
||
fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
let (input, token) = alt((
|
||
self.partial(Self::unicode_emoji),
|
||
self.partial(Self::tag_small),
|
||
self.partial(Self::tag_plain),
|
||
self.partial(Self::tag_bold),
|
||
self.partial(Self::tag_italic),
|
||
self.partial(Self::tag_strikethrough),
|
||
self.partial(Self::base_bold_italic),
|
||
self.partial(Self::tag_strikethrough_tilde),
|
||
self.partial(Self::tag_func),
|
||
self.partial(Self::shortcode_emoji),
|
||
self.partial(Self::text),
|
||
))(input)?;
|
||
Ok((input, token))
|
||
}
|
||
|
||
fn tag_quote<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
let (input, leading_spaces) = tuple((opt(line_ending), opt(line_ending)))(input)?;
|
||
|
||
if let (None, None) = leading_spaces {
|
||
if input.get_column() != 1 {
|
||
return fail(input);
|
||
}
|
||
}
|
||
|
||
let quote_line = |input| tuple((tag(">"), opt(space), not_line_ending))(input);
|
||
|
||
let orig_input = input;
|
||
let (input, lines) = separated_list1(line_ending, quote_line)(input)?;
|
||
|
||
let quote_lines = lines
|
||
.into_iter()
|
||
.map(|(_, _, text)| text)
|
||
.collect::<Vec<_>>();
|
||
|
||
if quote_lines.len() == 1
|
||
&& quote_lines
|
||
.iter()
|
||
.map(Span::fragment)
|
||
.copied()
|
||
.any(&str::is_empty)
|
||
{
|
||
return fail(input);
|
||
}
|
||
|
||
let (_, inner) = spliced("e_lines, self.partial(Self::full), orig_input)?;
|
||
|
||
let (input, _) = tuple((opt(line_ending), opt(line_ending)))(input)?;
|
||
|
||
Ok((input, Token::Quote(Box::new(inner))))
|
||
}
|
||
|
||
fn tag_block_center<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
let tag_start = &tag("<center>");
|
||
let tag_end = &tag("</center>");
|
||
|
||
let (input, _) = opt(line_ending)(input)?;
|
||
|
||
if input.get_column() != 1 {
|
||
return fail(input);
|
||
}
|
||
|
||
let (input, _) = tag_start(input)?;
|
||
let (input, _) = opt(line_ending)(input)?;
|
||
|
||
let (input, (center_seq, _)) = many_till(
|
||
self.partial(Self::inline_single),
|
||
tuple((opt(space1), opt(line_ending), tag_end)),
|
||
)(input)?;
|
||
|
||
Ok((
|
||
input,
|
||
boxing_token(Token::Center)(Token::Sequence(center_seq)),
|
||
))
|
||
}
|
||
|
||
fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
let delim = &tag("```");
|
||
|
||
let (input, _) = opt(line_ending)(input)?;
|
||
|
||
if input.get_column() != 1 {
|
||
return fail(input);
|
||
}
|
||
|
||
let (input, _) = delim(input)?;
|
||
let (input, lang) = opt(map(
|
||
recognize(many1(tuple((not(delim), not_line_ending)))),
|
||
Span::into_fragment,
|
||
))(input)?;
|
||
let (input, _) = line_ending(input)?;
|
||
|
||
let (input, code) = map(
|
||
recognize(many1_count(tuple((
|
||
not(tuple((line_ending, delim))),
|
||
anychar,
|
||
)))),
|
||
Span::into_fragment,
|
||
)(input)?;
|
||
|
||
let (input, _) = line_ending(input)?;
|
||
let (input, _) = delim(input)?;
|
||
let (input, _) = many0(space)(input)?;
|
||
let (input, _) = not(not(line_ending))(input)?;
|
||
let (input, _) = opt(line_ending)(input)?;
|
||
|
||
Ok((
|
||
input,
|
||
Token::BlockCode {
|
||
lang: lang.map(<&str>::into),
|
||
inner: code.into(),
|
||
},
|
||
))
|
||
}
|
||
|
||
fn tag_block_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
let start = &tag("\\[");
|
||
let end = &tag("\\]");
|
||
|
||
let (input, _) = opt(line_ending)(input)?;
|
||
|
||
if input.get_column() != 1 {
|
||
return fail(input);
|
||
}
|
||
|
||
let (input, _) = start(input)?;
|
||
let (input, _) = opt(line_ending)(input)?;
|
||
|
||
let (input, math_span) = recognize(many1_count(tuple((
|
||
not(tuple((opt(line_ending), end))),
|
||
not_line_ending,
|
||
))))(input)?;
|
||
|
||
let (input, _) = opt(line_ending)(input)?;
|
||
let (input, _) = end(input)?;
|
||
let (input, _) = many0(space)(input)?;
|
||
let (input, _) = not(not_line_ending)(input)?;
|
||
let (input, _) = opt(line_ending)(input)?;
|
||
|
||
Ok((
|
||
input,
|
||
Token::BlockMath(Cow::Borrowed(math_span.into_fragment())),
|
||
))
|
||
}
|
||
|
||
#[inline]
|
||
fn tag_delimited<'a, 'b: 'a, T, S>(
|
||
&'a self,
|
||
opening_tag: impl Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
|
||
closing_tag: impl Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
|
||
escape: bool,
|
||
matcher: Matcher<'a, 'b, T>,
|
||
fallback: Matcher<'a, 'b, S>,
|
||
) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token<'b>> + '_ {
|
||
move |input| {
|
||
if escape {
|
||
if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), &opening_tag))(input) {
|
||
return Ok((
|
||
input_escaped,
|
||
Token::PlainText(Cow::Borrowed(mark.fragment())),
|
||
));
|
||
}
|
||
}
|
||
|
||
let begin = input;
|
||
let (post_open, _) = opening_tag(input)?;
|
||
|
||
let res = tuple((
|
||
many1(tuple((not(&closing_tag), &matcher.matcher_inner))),
|
||
&closing_tag,
|
||
))(post_open);
|
||
|
||
if let Err(nom::Err::Error(nom::error::Error {
|
||
input: input_past_err,
|
||
..
|
||
})) = res
|
||
{
|
||
let res_fallback = tuple((
|
||
many1(tuple((not(&closing_tag), &fallback.matcher_inner))),
|
||
&closing_tag,
|
||
))(post_open);
|
||
|
||
if res_fallback.is_err() {
|
||
return Ok((
|
||
input_past_err,
|
||
Token::PlainText(begin.fragment_between(&input_past_err).into()),
|
||
));
|
||
}
|
||
|
||
let (input, (inner, closing)) = res_fallback.unwrap();
|
||
let mut inner = inner.into_iter().map(|(_, t)| t);
|
||
|
||
return Ok((
|
||
input,
|
||
Token::Sequence(vec![
|
||
Token::PlainText(begin.fragment_between(&post_open).into()),
|
||
((fallback.collector)(&mut inner)),
|
||
Token::PlainText(closing.into_fragment().into()),
|
||
]),
|
||
));
|
||
}
|
||
|
||
let (input, (inner, _)) = res?;
|
||
let mut inner = inner.into_iter().map(|(_, t)| t);
|
||
|
||
Ok((input, (matcher.collector)(&mut inner)))
|
||
}
|
||
}
|
||
|
||
fn tag_func<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
let (input, _) = tag("$[")(input)?;
|
||
|
||
let func_ident = |input| {
|
||
recognize(tuple((
|
||
many1_count(alt((alpha1, tag("_")))),
|
||
many0_count(alt((alphanumeric1, tag("_")))),
|
||
)))(input)
|
||
};
|
||
|
||
let param_value = recognize(many1_count(alt((
|
||
alphanumeric1,
|
||
tag("."),
|
||
tag("-"),
|
||
tag("_"),
|
||
))));
|
||
|
||
let (input, func_name) = map(func_ident, Span::into_fragment)(input)?;
|
||
|
||
let arg = tuple((func_ident, opt(tuple((tag("="), param_value)))));
|
||
|
||
let (input, args) =
|
||
opt(tuple((one_char('.'), separated_list1(one_char(','), arg))))(input)?;
|
||
|
||
let args_out = args.map_or_else(HashMap::new, |(_, items)| {
|
||
items
|
||
.into_iter()
|
||
.map(|(k, v)| {
|
||
(
|
||
Cow::from(k.into_fragment()),
|
||
v.map(|(_, val)| Cow::from(val.into_fragment())),
|
||
)
|
||
})
|
||
.collect::<HashMap<_, _>>()
|
||
});
|
||
|
||
let (input, _) = opt(space)(input)?;
|
||
|
||
let (input, (inner, _)) = many_till(self.partial(Self::inline_single), tag("]"))(input)?;
|
||
|
||
Ok((
|
||
input,
|
||
Token::Function {
|
||
name: Cow::from(func_name),
|
||
params: args_out,
|
||
inner: Box::new(Token::Sequence(inner)),
|
||
},
|
||
))
|
||
}
|
||
|
||
fn tag_plain<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
let opening_tag = &tag("<small>");
|
||
let closing_tag = &tag("</small>");
|
||
|
||
let (input, _) = opening_tag(input)?;
|
||
let (input, text) = map(
|
||
recognize(many1(tuple((not_line_ending, not(closing_tag))))),
|
||
Span::into_fragment,
|
||
)(input)?;
|
||
let (input, _) = closing_tag(input)?;
|
||
|
||
Ok((input, Token::PlainTag(text.into())))
|
||
}
|
||
|
||
fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
self.tag_delimited(
|
||
tag("<small>"),
|
||
tag("</small>"),
|
||
false,
|
||
Matcher::new(
|
||
&self.partial(Self::inline_single),
|
||
&collect_sequence(Token::Sequence, boxing_token(Token::Small)),
|
||
),
|
||
Matcher::new(
|
||
&self.partial(Self::inline_non_formatting_single),
|
||
&collect_sequence(Token::Sequence, identity),
|
||
),
|
||
)(input)
|
||
}
|
||
|
||
// TODO: CommonMark flanking rules
|
||
fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
self.tag_delimited(
|
||
tag("***"),
|
||
tag("***"),
|
||
true,
|
||
Matcher::new(
|
||
&self.partial(Self::inline_single),
|
||
&collect_sequence(Token::Sequence, boxing_token(Token::BoldItalic)),
|
||
),
|
||
Matcher::new(
|
||
&self.partial(Self::inline_non_formatting_single),
|
||
&collect_sequence(Token::Sequence, identity),
|
||
),
|
||
)(input)
|
||
}
|
||
|
||
// TODO: CommonMark flanking rules
|
||
fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
self.tag_delimited(
|
||
tag("___"),
|
||
tag("___"),
|
||
true,
|
||
Matcher::new(
|
||
&self.partial(Self::inline_single),
|
||
&collect_sequence(Token::Sequence, boxing_token(Token::BoldItalic)),
|
||
),
|
||
Matcher::new(
|
||
&self.partial(Self::inline_non_formatting_single),
|
||
&collect_sequence(Token::Sequence, identity),
|
||
),
|
||
)(input)
|
||
}
|
||
|
||
fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
self.tag_delimited(
|
||
tag("<b>"),
|
||
tag("</b>"),
|
||
false,
|
||
Matcher::new(
|
||
&self.partial(Self::inline_single),
|
||
&collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
|
||
),
|
||
Matcher::new(
|
||
&self.partial(Self::inline_non_formatting_single),
|
||
&collect_sequence(Token::Sequence, identity),
|
||
),
|
||
)(input)
|
||
}
|
||
|
||
// TODO: CommonMark flanking rules
|
||
fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
self.tag_delimited(
|
||
tag("**"),
|
||
tag("**"),
|
||
true,
|
||
Matcher::new(
|
||
&self.partial(Self::inline_single),
|
||
&collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
|
||
),
|
||
Matcher::new(
|
||
&self.partial(Self::inline_non_formatting_single),
|
||
&collect_sequence(Token::Sequence, identity),
|
||
),
|
||
)(input)
|
||
}
|
||
|
||
// TODO: CommonMark flanking rules
|
||
fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
self.tag_delimited(
|
||
tag("__"),
|
||
tag("__"),
|
||
true,
|
||
Matcher::new(
|
||
&self.partial(Self::inline_single),
|
||
&collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
|
||
),
|
||
Matcher::new(
|
||
&self.partial(Self::inline_non_formatting_single),
|
||
&collect_sequence(Token::Sequence, identity),
|
||
),
|
||
)(input)
|
||
}
|
||
|
||
fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
self.tag_delimited(
|
||
tag("<i>"),
|
||
tag("</i>"),
|
||
false,
|
||
Matcher::new(
|
||
&self.partial(Self::inline_single),
|
||
&collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
|
||
),
|
||
Matcher::new(
|
||
&self.partial(Self::inline_non_formatting_single),
|
||
&collect_sequence(Token::Sequence, identity),
|
||
),
|
||
)(input)
|
||
}
|
||
|
||
// TODO: CommonMark flanking rules
|
||
fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
self.tag_delimited(
|
||
tag("*"),
|
||
tag("*"),
|
||
true,
|
||
Matcher::new(
|
||
&self.partial(Self::inline_single),
|
||
&collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
|
||
),
|
||
Matcher::new(
|
||
&self.partial(Self::inline_non_formatting_single),
|
||
&collect_sequence(Token::Sequence, identity),
|
||
),
|
||
)(input)
|
||
}
|
||
|
||
// TODO: CommonMark flanking rules
|
||
fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
self.tag_delimited(
|
||
tag("_"),
|
||
tag("_"),
|
||
true,
|
||
Matcher::new(
|
||
&self.partial(Self::inline_single),
|
||
&collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
|
||
),
|
||
Matcher::new(
|
||
&self.partial(Self::inline_non_formatting_single),
|
||
&collect_sequence(Token::Sequence, identity),
|
||
),
|
||
)(input)
|
||
}
|
||
|
||
fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
self.tag_delimited(
|
||
tag("<s>"),
|
||
tag("</s>"),
|
||
false,
|
||
Matcher::new(
|
||
&self.partial(Self::inline_single),
|
||
&collect_sequence(Token::Sequence, boxing_token(Token::Strikethrough)),
|
||
),
|
||
Matcher::new(
|
||
&self.partial(Self::inline_non_formatting_single),
|
||
&collect_sequence(Token::Sequence, identity),
|
||
),
|
||
)(input)
|
||
}
|
||
|
||
// TODO: CommonMark flanking rules
|
||
fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
self.tag_delimited(
|
||
tag("~~"),
|
||
tag("~~"),
|
||
true,
|
||
Matcher::new(
|
||
&move |input| {
|
||
map(
|
||
tuple(((not(line_ending)), self.partial(Self::inline_single))),
|
||
|(_, captured)| captured,
|
||
)(input)
|
||
},
|
||
&collect_sequence(Token::Sequence, boxing_token(Token::Strikethrough)),
|
||
),
|
||
Matcher::new(
|
||
&move |input| {
|
||
map(
|
||
tuple((
|
||
(not(line_ending)),
|
||
self.partial(Self::inline_non_formatting_single),
|
||
)),
|
||
|(_, captured)| captured,
|
||
)(input)
|
||
},
|
||
&collect_sequence(Token::Sequence, identity),
|
||
),
|
||
)(input)
|
||
}
|
||
|
||
fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
self.tag_delimited(
|
||
tag("`"),
|
||
|input| alt((tag("`"), tag("´")))(input),
|
||
true,
|
||
Matcher::new(
|
||
&move |input| {
|
||
map(
|
||
tuple((not(alt((tag("`"), tag("´"), line_ending))), anychar)),
|
||
|(_, captured)| captured,
|
||
)(input)
|
||
},
|
||
&collect_char_sequence(Token::InlineCode),
|
||
),
|
||
Matcher::reject(),
|
||
)(input)
|
||
}
|
||
|
||
fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
self.tag_delimited(
|
||
tag("\\("),
|
||
tag("\\)"),
|
||
false,
|
||
Matcher::new(
|
||
&move |input| {
|
||
map(tuple((not(line_ending), anychar)), |(_, captured)| captured)(input)
|
||
},
|
||
&collect_char_sequence(Token::InlineMath),
|
||
),
|
||
Matcher::reject(),
|
||
)(input)
|
||
}
|
||
|
||
fn text<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
let (input, text) = map(recognize(anychar), Span::into_fragment)(input)?;
|
||
Ok((input, Token::PlainText(text.into())))
|
||
}
|
||
|
||
fn raw_url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
let (input, url_span) = recognize(tuple((
|
||
protocol,
|
||
url_chars(|input| not(url_chars_base)(input), false),
|
||
)))(input)?;
|
||
|
||
let url = url_span.into_fragment();
|
||
let url_bytes = url.as_bytes();
|
||
|
||
// Strip punctuation at the end of sentences that might have been consumed as a part of the URL
|
||
let final_url = if matches!(url_bytes.last(), Some(b'.' | b',' | b'?')) {
|
||
url.slice(..url.len() - 1)
|
||
} else {
|
||
url
|
||
};
|
||
|
||
Ok((input, Token::UrlRaw(Cow::from(final_url))))
|
||
}
|
||
|
||
fn url_no_embed<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
let (input, _) = tag("<")(input)?;
|
||
let (input, url_span) = recognize(tuple((protocol, url_chars(tag(">"), true))))(input)?;
|
||
let (input, _) = tag(">")(input)?;
|
||
|
||
Ok((input, Token::UrlRaw(Cow::from(url_span.into_fragment()))))
|
||
}
|
||
|
||
fn link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
let (input, no_embed) = opt(tag("?"))(input)?;
|
||
let (input, _) = tag("[")(input)?;
|
||
let (input, _) = not(tag("["))(input)?;
|
||
let (input, (label_tok, _)) =
|
||
many_till(self.partial(Self::inline_label_safe_single), tag("]("))(input)?;
|
||
let (input, url_span) = recognize(tuple((protocol, url_chars(tag(")"), true))))(input)?;
|
||
let (input, _) = tag(")")(input)?;
|
||
|
||
Ok((
|
||
input,
|
||
Token::Link {
|
||
label: Box::new(Token::Sequence(label_tok)),
|
||
href: url_span.into_fragment().into(),
|
||
embed: no_embed.is_none(),
|
||
},
|
||
))
|
||
}
|
||
|
||
fn unicode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
let frag = input.fragment();
|
||
let Some(grapheme) = frag.graphemes(true).next() else {
|
||
return fail(input);
|
||
};
|
||
|
||
let grapheme = grapheme.trim_end_matches(|c| c == '\u{200c}' || c == '\u{200d}');
|
||
|
||
let emoji = emojis::get(grapheme);
|
||
|
||
if emoji.is_none() {
|
||
return fail(input);
|
||
}
|
||
|
||
Ok((
|
||
input.slice(grapheme.len()..),
|
||
Token::UnicodeEmoji(grapheme.into()),
|
||
))
|
||
}
|
||
|
||
fn shortcode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
// TODO: Fail when preceded by alphanumerics
|
||
let (input, _) = tag(":")(input)?;
|
||
let (input, shortcode) = map(
|
||
recognize(many1(alt((alphanumeric1, recognize(one_of("_+-")))))),
|
||
Span::into_fragment,
|
||
)(input)?;
|
||
let (input, _) = tag(":")(input)?;
|
||
let (input, _) = not(alphanumeric1)(input)?;
|
||
|
||
Ok((input, Token::ShortcodeEmoji(shortcode.into())))
|
||
}
|
||
|
||
fn tag_mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
// TODO: Escaping and skip when preceded by alphanumerics
|
||
|
||
let tags = one_of("@!");
|
||
let (input, mention_type) = map(tags, |c| match c {
|
||
'@' => MentionType::User,
|
||
'!' => MentionType::Community,
|
||
_ => unreachable!(),
|
||
})(input)?;
|
||
|
||
let (input, name) = map(
|
||
recognize(many1(alt((alphanumeric1, recognize(one_of("-_")))))),
|
||
Span::into_fragment,
|
||
)(input)?;
|
||
|
||
let before = input;
|
||
let (_, host) = map(
|
||
opt(tuple((
|
||
tag("@"),
|
||
map(
|
||
recognize(many1(alt((alphanumeric1, recognize(one_of("-_.")))))),
|
||
Span::into_fragment,
|
||
),
|
||
))),
|
||
|maybe_tag_host| maybe_tag_host.map(|(_, host)| host),
|
||
)(input)?;
|
||
|
||
let host = host.map(|h| h.trim_end_matches(|c| matches!(c, '.' | '-' | '_')));
|
||
|
||
Ok((
|
||
host.map(|c| before.slice(c.len() + 1..)).unwrap_or(before),
|
||
Token::Mention {
|
||
mention_type,
|
||
name: name.into(),
|
||
host: host.map(|h| h.into()),
|
||
},
|
||
))
|
||
}
|
||
|
||
fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||
// TODO: Skip when preceded by alphanumerics
|
||
|
||
let (input, _) = tag("#")(input)?;
|
||
|
||
let (input, hashtag_text) =
|
||
map(recognize(many1(hashtag_chars)), Span::into_fragment)(input)?;
|
||
|
||
Ok((input, Token::Hashtag(hashtag_text.into())))
|
||
}
|
||
}
|
||
|
||
#[inline]
|
||
fn hashtag_chars(input: Span) -> IResult<Span, Span> {
|
||
recognize(alt((
|
||
recognize(tuple((tag("("), hashtag_chars, tag(")")))),
|
||
recognize(tuple((tag("["), hashtag_chars, tag("]")))),
|
||
recognize(tuple((tag("「"), hashtag_chars, tag("」")))),
|
||
recognize(tuple((tag("("), hashtag_chars, tag(")")))),
|
||
recognize(tuple((
|
||
not(space1),
|
||
not_line_ending,
|
||
not(one_of(".,:;!?#?/[]【】()「」()<>")),
|
||
anychar,
|
||
))),
|
||
)))(input)
|
||
}
|
||
|
||
#[inline]
|
||
fn protocol(input: Span) -> IResult<Span, Span> {
|
||
alt((tag("https://"), tag("http://")))(input)
|
||
}
|
||
|
||
#[inline]
|
||
fn url_chars_base(input: Span) -> IResult<Span, Span> {
|
||
recognize(alt((alpha1, recognize(one_of(".,_/:%#$&?!~=+-()[]@")))))(input)
|
||
}
|
||
|
||
#[inline]
|
||
fn url_chars<'a, T: 'a>(
|
||
terminator: impl Fn(Span<'a>) -> IResult<Span<'a>, T> + 'a,
|
||
spaces: bool,
|
||
) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, Span<'a>> + 'a {
|
||
let terminating = move |input| {
|
||
tuple((
|
||
&terminator,
|
||
alt((
|
||
space1,
|
||
line_ending,
|
||
eof,
|
||
recognize(one_of("([<'\"")),
|
||
recognize(tuple((
|
||
alt((alpha1, recognize(one_of("*")))),
|
||
alt((space1, line_ending, eof)),
|
||
))),
|
||
)),
|
||
))(input)
|
||
};
|
||
|
||
let chars = tuple((
|
||
not(tuple((space1, eof))),
|
||
not(tuple((space1, tag("\"")))),
|
||
not(tuple((opt(space1), terminating))),
|
||
alt((url_chars_base, if spaces { space1 } else { fail })),
|
||
));
|
||
|
||
recognize(many1_count(chars))
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod test {
|
||
use crate::{url_chars, Context, Span, Token};
|
||
use nom::bytes::complete::tag;
|
||
use std::borrow::Cow;
|
||
use std::collections::HashMap;
|
||
|
||
fn parse_full(string: &str) -> Token {
|
||
Context.full(Span::new(string)).unwrap().1.merged().owned()
|
||
}
|
||
|
||
#[test]
|
||
fn parse_url_chars() {
|
||
let test1 = "https://en.wikipedia.org/wiki/Sandbox_(computer_security))";
|
||
assert_eq!(
|
||
"https://en.wikipedia.org/wiki/Sandbox_(computer_security)",
|
||
url_chars(tag(")"), true)(Span::new(test1))
|
||
.unwrap()
|
||
.1
|
||
.into_fragment()
|
||
);
|
||
|
||
let test2 = "https://en.wikipedia.org/wiki/Sandbox_(computer_security)))";
|
||
assert_eq!(
|
||
"https://en.wikipedia.org/wiki/Sandbox_(computer_security))",
|
||
url_chars(tag(")"), true)(Span::new(test2))
|
||
.unwrap()
|
||
.1
|
||
.into_fragment()
|
||
);
|
||
|
||
let test3 = "https://en.wikipedia.org/wiki/(";
|
||
assert_eq!(
|
||
test3,
|
||
url_chars(tag(")"), true)(Span::new(test3))
|
||
.unwrap()
|
||
.1
|
||
.into_fragment()
|
||
);
|
||
|
||
let test4 = "https://cs.wikipedia.org/wiki/Among_Us ";
|
||
assert_eq!(
|
||
"https://cs.wikipedia.org/wiki/Among_Us",
|
||
url_chars(tag(")"), true)(Span::new(test4))
|
||
.unwrap()
|
||
.1
|
||
.into_fragment()
|
||
);
|
||
|
||
let test5 = "https://cs.wikipedia.org/wiki/Among Us )";
|
||
assert_eq!(
|
||
"https://cs.wikipedia.org/wiki/Among Us",
|
||
url_chars(tag(")"), true)(Span::new(test5))
|
||
.unwrap()
|
||
.1
|
||
.into_fragment()
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn parse_formatting() {
|
||
assert_eq!(
|
||
Token::Strikethrough(Box::new(Token::PlainText("stikethrough".into()))),
|
||
parse_full(r#"~~stikethrough~~"#)
|
||
);
|
||
|
||
assert_eq!(
|
||
Token::Bold(Box::new(Token::PlainText("bold".into()))),
|
||
parse_full(r#"**bold**"#)
|
||
);
|
||
|
||
assert_eq!(
|
||
Token::Italic(Box::new(Token::PlainText("italic".into()))),
|
||
parse_full(r#"*italic*"#)
|
||
);
|
||
|
||
assert_eq!(
|
||
Token::Sequence(vec![
|
||
Token::PlainText("not code ".into()),
|
||
Token::InlineCode("code".into()),
|
||
Token::PlainText(" also not code".into())
|
||
]),
|
||
parse_full(r#"not code `code` also not code"#)
|
||
);
|
||
|
||
assert_eq!(
|
||
Token::Sequence(vec![
|
||
Token::PlainText("not code ".into()),
|
||
Token::InlineCode("code".into()),
|
||
Token::PlainText(" also `not code".into())
|
||
]),
|
||
parse_full(r#"not code `code` also `not code"#)
|
||
);
|
||
|
||
assert_eq!(
|
||
Token::Sequence(vec![
|
||
Token::PlainText("not code ".into()),
|
||
Token::InlineCode("*not bold*".into()),
|
||
Token::PlainText(" also not code".into())
|
||
]),
|
||
parse_full(r#"not code `*not bold*` also not code"#)
|
||
);
|
||
|
||
assert_eq!(
|
||
Token::BoldItalic(Box::new(Token::PlainText("bold italic".into()))),
|
||
parse_full(r#"***bold italic***"#)
|
||
);
|
||
|
||
assert_eq!(
|
||
Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
|
||
"bold italic".into()
|
||
))))),
|
||
parse_full(r#"<b><i>bold italic</i></b>"#)
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn parse_complex() {
|
||
assert_eq!(
|
||
Token::Center(Box::new(Token::Sequence(vec![
|
||
Token::PlainText("centered\n".into()),
|
||
Token::UnicodeEmoji("🦋".into()),
|
||
Token::UnicodeEmoji("🏳️⚧️".into()),
|
||
Token::PlainText("\ntext".into())
|
||
]))),
|
||
parse_full(
|
||
r#"<center>centered
|
||
🦋🏳️⚧️
|
||
text</center>"#
|
||
)
|
||
);
|
||
|
||
assert_eq!(
|
||
Token::Quote(Box::new(Token::Center(Box::new(Token::Sequence(vec![
|
||
Token::PlainText("centered\n".into()),
|
||
Token::UnicodeEmoji("👩🏽🤝👩🏼".into()),
|
||
Token::PlainText("\ntext".into())
|
||
]))))),
|
||
parse_full(
|
||
r#"> <center>centered
|
||
> 👩🏽🤝👩🏼
|
||
> text</center>"#
|
||
)
|
||
);
|
||
|
||
assert_eq!(
|
||
Token::Function {
|
||
name: "x2".into(),
|
||
params: HashMap::new(),
|
||
inner: Box::new(Token::Sequence(vec![
|
||
Token::Function {
|
||
name: "sparkle".into(),
|
||
params: HashMap::new(),
|
||
inner: Box::new(Token::UnicodeEmoji("🥺".into())),
|
||
},
|
||
Token::UnicodeEmoji("💜".into()),
|
||
Token::Function {
|
||
name: "spin".into(),
|
||
params: {
|
||
let mut params = HashMap::new();
|
||
params.insert("y".into(), None);
|
||
params.insert("speed".into(), Some("5s".into()));
|
||
params
|
||
},
|
||
inner: Box::new(Token::UnicodeEmoji("❤️".into())),
|
||
},
|
||
Token::UnicodeEmoji("🦊".into()),
|
||
]))
|
||
},
|
||
parse_full(r#"$[x2 $[sparkle 🥺]💜$[spin.y,speed=5s ❤️]🦊]"#)
|
||
);
|
||
|
||
assert_eq!(
|
||
Token::Sequence(vec![
|
||
Token::PlainText("<b>bold ".into()),
|
||
Token::Mention {
|
||
mention_type: crate::MentionType::User,
|
||
name: "tag1".into(),
|
||
host: None
|
||
},
|
||
Token::PlainText(" <i> ".into()),
|
||
Token::Mention {
|
||
mention_type: crate::MentionType::User,
|
||
name: "tag2".into(),
|
||
host: None
|
||
},
|
||
Token::PlainText(" </b>italic</i>".into())
|
||
]),
|
||
Context
|
||
.full(Span::new(r#"<b>bold @tag1 <i> @tag2 </b>italic</i>"#))
|
||
.unwrap()
|
||
.1
|
||
.merged()
|
||
);
|
||
|
||
assert_eq!(
|
||
Token::Quote(Box::new(Token::Sequence(vec![
|
||
Token::PlainText("test\n".into()),
|
||
Token::Italic(Box::new(Token::PlainText("\nitalic\n".into()))),
|
||
Token::Quote(Box::new(Token::PlainText("Nested quote".into())))
|
||
]))),
|
||
parse_full(
|
||
r#"
|
||
> test
|
||
> <i>
|
||
> italic
|
||
> </i>
|
||
>> Nested quote
|
||
"#
|
||
)
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn parse_link() {
|
||
assert_eq!(
|
||
parse_full("Link test: [label](https://example.com)"),
|
||
Token::Sequence(vec![
|
||
Token::PlainText("Link test: ".into()),
|
||
Token::Link {
|
||
label: Box::new(Token::PlainText("label".into())),
|
||
href: "https://example.com".into(),
|
||
embed: true
|
||
}
|
||
])
|
||
);
|
||
|
||
assert_eq!(
|
||
parse_full("Link test: ?[label](https://awawa.gay)"),
|
||
Token::Sequence(vec![
|
||
Token::PlainText("Link test: ".into()),
|
||
Token::Link {
|
||
label: Box::new(Token::PlainText("label".into())),
|
||
href: "https://awawa.gay".into(),
|
||
embed: false
|
||
}
|
||
])
|
||
);
|
||
|
||
assert_eq!(
|
||
parse_full("Link test: ?[label](https://awawa.gay"), // Missing closing bracket
|
||
Token::Sequence(vec![
|
||
Token::PlainText("Link test: ?[label](".into()),
|
||
Token::UrlRaw("https://awawa.gay".into()),
|
||
])
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn parse_mention() {
|
||
assert_eq!(
|
||
parse_full("@tag"),
|
||
Token::Mention {
|
||
mention_type: crate::MentionType::User,
|
||
name: "tag".into(),
|
||
host: None
|
||
}
|
||
);
|
||
|
||
assert_eq!(
|
||
parse_full("hgsjlkdsa @tag fgahjsdkd"),
|
||
Token::Sequence(vec![
|
||
Token::PlainText("hgsjlkdsa ".into()),
|
||
Token::Mention {
|
||
mention_type: crate::MentionType::User,
|
||
name: "tag".into(),
|
||
host: None
|
||
},
|
||
Token::PlainText(" fgahjsdkd".into())
|
||
])
|
||
);
|
||
|
||
assert_eq!(
|
||
parse_full("hgsjlkdsa @tag@ fgahjsdkd"),
|
||
Token::Sequence(vec![
|
||
Token::PlainText("hgsjlkdsa ".into()),
|
||
Token::Mention {
|
||
mention_type: crate::MentionType::User,
|
||
name: "tag".into(),
|
||
host: None
|
||
},
|
||
Token::PlainText("@ fgahjsdkd".into())
|
||
])
|
||
);
|
||
|
||
assert_eq!(
|
||
parse_full("aaaa @tag@domain bbbbb"),
|
||
Token::Sequence(vec![
|
||
Token::PlainText("aaaa ".into()),
|
||
Token::Mention {
|
||
mention_type: crate::MentionType::User,
|
||
name: "tag".into(),
|
||
host: Some("domain".into())
|
||
},
|
||
Token::PlainText(" bbbbb".into())
|
||
])
|
||
);
|
||
|
||
assert_eq!(
|
||
parse_full("test @tag@domain, test"),
|
||
Token::Sequence(vec![
|
||
Token::PlainText("test ".into()),
|
||
Token::Mention {
|
||
mention_type: crate::MentionType::User,
|
||
name: "tag".into(),
|
||
host: Some("domain".into())
|
||
},
|
||
Token::PlainText(", test".into())
|
||
])
|
||
);
|
||
|
||
assert_eq!(
|
||
parse_full("test @tag@domain.gay. test"),
|
||
Token::Sequence(vec![
|
||
Token::PlainText("test ".into()),
|
||
Token::Mention {
|
||
mention_type: crate::MentionType::User,
|
||
name: "tag".into(),
|
||
host: Some("domain.gay".into())
|
||
},
|
||
Token::PlainText(". test".into())
|
||
])
|
||
);
|
||
|
||
assert_eq!(
|
||
parse_full("test @tag@domain? test"),
|
||
Token::Sequence(vec![
|
||
Token::PlainText("test ".into()),
|
||
Token::Mention {
|
||
mention_type: crate::MentionType::User,
|
||
name: "tag".into(),
|
||
host: Some("domain".into())
|
||
},
|
||
Token::PlainText("? test".into())
|
||
])
|
||
);
|
||
|
||
assert_eq!(
|
||
parse_full("test !tag@domain.com test"),
|
||
Token::Sequence(vec![
|
||
Token::PlainText("test ".into()),
|
||
Token::Mention {
|
||
mention_type: crate::MentionType::Community,
|
||
name: "tag".into(),
|
||
host: Some("domain.com".into())
|
||
},
|
||
Token::PlainText(" test".into())
|
||
])
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn parse_emoji() {
|
||
assert_eq!(
|
||
Token::Sequence(
|
||
vec!["🥺", "💜", "❤️", "🦊"]
|
||
.into_iter()
|
||
.map(<&str as Into<Cow<_>>>::into)
|
||
.map(Token::UnicodeEmoji)
|
||
.collect::<Vec<_>>()
|
||
),
|
||
parse_full("🥺💜❤️🦊")
|
||
);
|
||
|
||
// Trans flag, ZWJ
|
||
assert_eq!(
|
||
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}".into()),
|
||
parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}")
|
||
);
|
||
|
||
assert_eq!(
|
||
Token::Sequence(vec![
|
||
Token::PlainText("\u{0200d}".into()), // ZWJ
|
||
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
|
||
]),
|
||
parse_full("\u{0200d}\u{1f3f3}\u{0fe0f}")
|
||
);
|
||
|
||
// Trans flag, ZWNJ
|
||
assert_eq!(
|
||
Token::Sequence(vec![
|
||
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
|
||
Token::PlainText("\u{0200c}".into()), // ZWNJ
|
||
Token::UnicodeEmoji("\u{026a7}\u{0fe0f}".into()) // Trans symbol
|
||
]),
|
||
parse_full("\u{1f3f3}\u{0fe0f}\u{0200c}\u{026a7}\u{0fe0f}")
|
||
);
|
||
|
||
assert_eq!(
|
||
Token::Sequence(vec![
|
||
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
|
||
Token::PlainText("\u{0200d}\u{0200d}\u{0200d}".into()), // ZWJ
|
||
]),
|
||
parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{0200d}\u{0200d}")
|
||
);
|
||
}
|
||
}
|