Connected it all
This commit is contained in:
parent
c45ec852dd
commit
453891ddf4
|
@ -6,7 +6,7 @@ use nom::character::complete::{
|
|||
};
|
||||
use nom::combinator::{eof, fail, map, not, opt, recognize};
|
||||
use nom::error::ErrorKind;
|
||||
use nom::multi::{many0, many0_count, many1, many1_count, separated_list1};
|
||||
use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
|
||||
use nom::sequence::tuple;
|
||||
use nom::{IResult, Offset, Slice};
|
||||
use nom_locate::LocatedSpan;
|
||||
|
@ -14,7 +14,7 @@ use std::borrow::Cow;
|
|||
use std::collections::HashMap;
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||
pub enum MentionType {
|
||||
Community,
|
||||
User,
|
||||
|
@ -29,7 +29,7 @@ impl MentionType {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub enum Token<'a> {
|
||||
PlainText(Cow<'a, str>),
|
||||
Sequence(Vec<Token<'a>>),
|
||||
|
@ -132,6 +132,49 @@ impl Token<'_> {
|
|||
Token::Hashtag(url) => Token::Hashtag(Cow::Owned(url.clone().into_owned())),
|
||||
}
|
||||
}
|
||||
|
||||
fn merged(&self) -> Token {
|
||||
match self {
|
||||
Token::Sequence(tokens) => {
|
||||
let tokens_multi = tokens.iter().fold(Vec::new(), |mut acc, tok| {
|
||||
if let Some(Token::PlainText(last)) = acc.last_mut() {
|
||||
if let Token::PlainText(tok_text) = tok {
|
||||
*last = Cow::from(last.to_string() + tok_text.as_ref());
|
||||
|
||||
return acc;
|
||||
}
|
||||
}
|
||||
|
||||
acc.push(tok.merged());
|
||||
acc
|
||||
});
|
||||
|
||||
if tokens_multi.len() == 1 {
|
||||
return tokens_multi.into_iter().next().unwrap();
|
||||
}
|
||||
|
||||
Token::Sequence(tokens_multi)
|
||||
}
|
||||
Token::Quote(inner) => Token::Quote(Box::new(inner.merged())),
|
||||
Token::Small(inner) => Token::Small(Box::new(inner.merged())),
|
||||
Token::Big(inner) => Token::Big(Box::new(inner.merged())),
|
||||
Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.merged())),
|
||||
Token::Bold(inner) => Token::Bold(Box::new(inner.merged())),
|
||||
Token::Italic(inner) => Token::Italic(Box::new(inner.merged())),
|
||||
Token::Center(inner) => Token::Center(Box::new(inner.merged())),
|
||||
Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.merged())),
|
||||
Token::Function {
|
||||
name,
|
||||
params,
|
||||
inner,
|
||||
} => Token::Function {
|
||||
name: name.clone(),
|
||||
params: params.clone(),
|
||||
inner: Box::new(inner.merged()),
|
||||
},
|
||||
other => other.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type Span<'a> = LocatedSpan<&'a str>;
|
||||
|
@ -244,25 +287,103 @@ struct Context;
|
|||
|
||||
impl Context {
|
||||
#[inline]
|
||||
const fn partial<'a>(
|
||||
const fn partial(
|
||||
&self,
|
||||
func: impl Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token<'a>> + 'static,
|
||||
) -> impl Fn(Span<'a>) -> IResult<Span<'a>, Token<'a>> + '_ {
|
||||
func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token<'a>> + 'static,
|
||||
) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token<'a>> + '_ {
|
||||
move |input| func(self, input)
|
||||
}
|
||||
|
||||
fn root<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
let (input, token) = alt((self.partial(Self::tag_quote),))(input)?;
|
||||
Ok((input, token))
|
||||
fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
map(many1(self.partial(Self::full_single)), Token::Sequence)(input)
|
||||
}
|
||||
|
||||
fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
let (input, token) = alt((self.partial(Self::tag_small), self.partial(Self::text)))(input)?;
|
||||
map(many1(self.partial(Self::inline_single)), Token::Sequence)(input)
|
||||
}
|
||||
|
||||
fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
map(
|
||||
many1(self.partial(Self::inline_label_safe_single)),
|
||||
Token::Sequence,
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
alt((
|
||||
self.partial(Self::tag_bold_italic_asterisk),
|
||||
self.partial(Self::tag_bold_italic_underscore),
|
||||
self.partial(Self::tag_bold_asterisk),
|
||||
self.partial(Self::tag_italic_asterisk),
|
||||
self.partial(Self::tag_bold_underscore),
|
||||
self.partial(Self::tag_italic_underscore),
|
||||
))(input)
|
||||
}
|
||||
|
||||
fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
let (input, token) = alt((
|
||||
self.partial(Self::unicode_emoji),
|
||||
self.partial(Self::tag_block_center),
|
||||
self.partial(Self::tag_small),
|
||||
self.partial(Self::tag_plain),
|
||||
self.partial(Self::tag_bold),
|
||||
self.partial(Self::tag_italic),
|
||||
self.partial(Self::tag_strikethrough),
|
||||
self.partial(Self::url_no_embed),
|
||||
self.partial(Self::base_bold_italic),
|
||||
self.partial(Self::tag_block_code),
|
||||
self.partial(Self::tag_inline_code),
|
||||
self.partial(Self::tag_quote),
|
||||
self.partial(Self::tag_block_math),
|
||||
self.partial(Self::tag_inline_math),
|
||||
self.partial(Self::tag_strikethrough_tilde),
|
||||
self.partial(Self::tag_func),
|
||||
self.partial(Self::tag_mention),
|
||||
self.partial(Self::tag_hashtag),
|
||||
self.partial(Self::shortcode_emoji),
|
||||
self.partial(Self::raw_url),
|
||||
self.partial(Self::text),
|
||||
))(input)?;
|
||||
Ok((input, token))
|
||||
}
|
||||
|
||||
fn inline_no_link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
let (input, token) = alt((self.partial(Self::tag_small), self.partial(Self::text)))(input)?;
|
||||
fn inline_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
let (input, token) = alt((
|
||||
self.partial(Self::unicode_emoji),
|
||||
self.partial(Self::tag_small),
|
||||
self.partial(Self::tag_plain),
|
||||
self.partial(Self::tag_bold),
|
||||
self.partial(Self::tag_italic),
|
||||
self.partial(Self::tag_strikethrough),
|
||||
self.partial(Self::url_no_embed),
|
||||
self.partial(Self::base_bold_italic),
|
||||
self.partial(Self::tag_inline_code),
|
||||
self.partial(Self::tag_inline_math),
|
||||
self.partial(Self::tag_strikethrough_tilde),
|
||||
self.partial(Self::tag_func),
|
||||
self.partial(Self::tag_mention),
|
||||
self.partial(Self::tag_hashtag),
|
||||
self.partial(Self::shortcode_emoji),
|
||||
self.partial(Self::raw_url),
|
||||
self.partial(Self::text),
|
||||
))(input)?;
|
||||
Ok((input, token))
|
||||
}
|
||||
|
||||
fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
let (input, token) = alt((
|
||||
self.partial(Self::unicode_emoji),
|
||||
self.partial(Self::tag_small),
|
||||
self.partial(Self::tag_plain),
|
||||
self.partial(Self::tag_bold),
|
||||
self.partial(Self::tag_italic),
|
||||
self.partial(Self::tag_strikethrough),
|
||||
self.partial(Self::base_bold_italic),
|
||||
self.partial(Self::tag_strikethrough_tilde),
|
||||
self.partial(Self::tag_func),
|
||||
self.partial(Self::shortcode_emoji),
|
||||
self.partial(Self::text),
|
||||
))(input)?;
|
||||
Ok((input, token))
|
||||
}
|
||||
|
||||
|
@ -270,7 +391,7 @@ impl Context {
|
|||
let (input, leading_spaces) = tuple((opt(line_ending), opt(line_ending)))(input)?;
|
||||
|
||||
if let (None, None) = leading_spaces {
|
||||
if input.get_column() != 0 {
|
||||
if input.get_column() != 1 {
|
||||
return fail(input);
|
||||
}
|
||||
}
|
||||
|
@ -295,7 +416,12 @@ impl Context {
|
|||
return fail(input);
|
||||
}
|
||||
|
||||
let (_, inner) = spliced("e_lines, space, Token::Quote, orig_input)?;
|
||||
let (_, inner) = spliced(
|
||||
"e_lines,
|
||||
self.partial(Self::full),
|
||||
Token::Quote,
|
||||
orig_input,
|
||||
)?;
|
||||
|
||||
let (input, _) = tuple((opt(line_ending), opt(line_ending)))(input)?;
|
||||
|
||||
|
@ -308,27 +434,23 @@ impl Context {
|
|||
|
||||
let (input, _) = opt(line_ending)(input)?;
|
||||
|
||||
if input.get_column() != 0 {
|
||||
if input.get_column() != 1 {
|
||||
return fail(input);
|
||||
}
|
||||
|
||||
let (input, _) = tag_start(input)?;
|
||||
let (input, _) = opt(line_ending)(input)?;
|
||||
|
||||
let (input, center_seq) = many0(tuple((
|
||||
not(tuple((opt(line_ending), tag_end))),
|
||||
self.partial(Self::inline),
|
||||
)))(input)?;
|
||||
let (input, (center_seq, _)) = many_till(
|
||||
self.partial(Self::inline_single),
|
||||
tuple((opt(line_ending), tag_end)),
|
||||
)(input)?;
|
||||
|
||||
let (input, _) = opt(line_ending)(input)?;
|
||||
let (input, _) = tag_end(input)?;
|
||||
let (input, _) = many0(space)(input)?;
|
||||
let (input, _) = not(not_line_ending)(input)?;
|
||||
let (input, _) = not(not(line_ending))(input)?;
|
||||
let (input, _) = opt(line_ending)(input)?;
|
||||
|
||||
let tokens = center_seq.into_iter().map(|(_, v)| v).collect::<Vec<_>>();
|
||||
|
||||
Ok((input, boxing_sequence(Token::Center)(tokens)))
|
||||
Ok((input, boxing_sequence(Token::Center)(center_seq)))
|
||||
}
|
||||
|
||||
fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
|
@ -336,7 +458,7 @@ impl Context {
|
|||
|
||||
let (input, _) = opt(line_ending)(input)?;
|
||||
|
||||
if input.get_column() != 0 {
|
||||
if input.get_column() != 1 {
|
||||
return fail(input);
|
||||
}
|
||||
|
||||
|
@ -358,7 +480,7 @@ impl Context {
|
|||
let (input, _) = line_ending(input)?;
|
||||
let (input, _) = delim(input)?;
|
||||
let (input, _) = many0(space)(input)?;
|
||||
let (input, _) = not(not_line_ending)(input)?;
|
||||
let (input, _) = not(not(line_ending))(input)?;
|
||||
let (input, _) = opt(line_ending)(input)?;
|
||||
|
||||
Ok((
|
||||
|
@ -376,7 +498,7 @@ impl Context {
|
|||
|
||||
let (input, _) = opt(line_ending)(input)?;
|
||||
|
||||
if input.get_column() != 0 {
|
||||
if input.get_column() != 1 {
|
||||
return fail(input);
|
||||
}
|
||||
|
||||
|
@ -458,8 +580,7 @@ impl Context {
|
|||
tag("_"),
|
||||
))));
|
||||
|
||||
let (input, func_name_span) = func_ident(input)?;
|
||||
let func_name = func_name_span.into_fragment();
|
||||
let (input, func_name) = map(func_ident, Span::into_fragment)(input)?;
|
||||
|
||||
let arg = tuple((func_ident, opt(tuple((tag("="), param_value)))));
|
||||
|
||||
|
@ -478,16 +599,16 @@ impl Context {
|
|||
.collect::<HashMap<_, _>>()
|
||||
});
|
||||
|
||||
let (input, inner) = self.partial(Self::inline)(input)?;
|
||||
let (input, _) = opt(space)(input)?;
|
||||
|
||||
let (input, _) = tag("]")(input)?;
|
||||
let (input, (inner, _)) = many_till(self.partial(Self::inline_single), tag("]"))(input)?;
|
||||
|
||||
Ok((
|
||||
input,
|
||||
Token::Function {
|
||||
name: Cow::from(func_name),
|
||||
params: args_out,
|
||||
inner: Box::new(inner),
|
||||
inner: Box::new(Token::Sequence(inner)),
|
||||
},
|
||||
))
|
||||
}
|
||||
|
@ -649,15 +770,11 @@ impl Context {
|
|||
}
|
||||
|
||||
fn text<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
let before = input;
|
||||
let (input, _) = anychar(input)?;
|
||||
Ok((
|
||||
input,
|
||||
Token::PlainText(before.fragment_between(&input).into()),
|
||||
))
|
||||
let (input, text) = map(recognize(anychar), Span::into_fragment)(input)?;
|
||||
Ok((input, Token::PlainText(text.into())))
|
||||
}
|
||||
|
||||
fn url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn raw_url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
let (input, url_span) = recognize(tuple((
|
||||
protocol,
|
||||
url_chars(|input| not(url_chars_base)(input), false),
|
||||
|
@ -688,8 +805,10 @@ impl Context {
|
|||
let (input, no_embed) = opt(tag("?"))(input)?;
|
||||
let (input, _) = tag("[")(input)?;
|
||||
let (input, _) = not(tag("["))(input)?;
|
||||
let (input, label_span) =
|
||||
recognize(many1(tuple((not(tag("](")), not_line_ending))))(input)?;
|
||||
let (input, label_span) = recognize(many1(tuple((
|
||||
not(tag("](")),
|
||||
self.partial(Self::inline_label_safe_single),
|
||||
))))(input)?;
|
||||
let (input, _) = tag("]")(input)?;
|
||||
let (input, _) = tag("(")(input)?;
|
||||
let (input, url_span) = recognize(tuple((protocol, url_chars(tag("]"), true))))(input)?;
|
||||
|
@ -772,7 +891,7 @@ impl Context {
|
|||
))
|
||||
}
|
||||
|
||||
fn hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
// TODO: Skip when preceded by alphanumerics
|
||||
|
||||
let (input, _) = tag("#")(input)?;
|
||||
|
@ -843,9 +962,11 @@ fn url_chars<'a, T: 'a>(
|
|||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::{url_chars, Context, Span};
|
||||
use crate::{url_chars, Context, Span, Token};
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::multi::many1;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[test]
|
||||
fn parse_url_chars() {
|
||||
|
@ -895,12 +1016,50 @@ mod test {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_complex() {
|
||||
let emoji = r#"$[x2 $[sparkle 🥺]💜$[spin.y,speed=5s ❤️]🦊]"#;
|
||||
assert_eq!(
|
||||
Token::Function {
|
||||
name: "x2".into(),
|
||||
params: HashMap::new(),
|
||||
inner: Box::new(Token::Sequence(vec![
|
||||
Token::Function {
|
||||
name: "sparkle".into(),
|
||||
params: HashMap::new(),
|
||||
inner: Box::new(Token::UnicodeEmoji("🥺".into())),
|
||||
},
|
||||
Token::UnicodeEmoji("💜".into()),
|
||||
Token::Function {
|
||||
name: "spin".into(),
|
||||
params: {
|
||||
let mut params = HashMap::new();
|
||||
params.insert("y".into(), None);
|
||||
params.insert("speed".into(), Some("5s".into()));
|
||||
params
|
||||
},
|
||||
inner: Box::new(Token::UnicodeEmoji("❤️".into())),
|
||||
},
|
||||
Token::UnicodeEmoji("🦊".into()),
|
||||
]))
|
||||
},
|
||||
Context.full(Span::new(emoji)).unwrap().1.merged()
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_emoji() {
|
||||
let test = "🥺💜❤️🦊";
|
||||
let ctx = Context;
|
||||
let tokens = many1(ctx.partial(Context::unicode_emoji))(Span::from(test)).unwrap();
|
||||
|
||||
println!("{:#?}", tokens.1)
|
||||
assert_eq!(
|
||||
vec!["🥺", "💜", "❤️", "🦊"]
|
||||
.into_iter()
|
||||
.map(<&str as Into<Cow<_>>>::into)
|
||||
.map(Token::UnicodeEmoji)
|
||||
.collect::<Vec<_>>(),
|
||||
tokens.1
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue