Connected it all
This commit is contained in:
parent
c45ec852dd
commit
453891ddf4
|
@ -6,7 +6,7 @@ use nom::character::complete::{
|
||||||
};
|
};
|
||||||
use nom::combinator::{eof, fail, map, not, opt, recognize};
|
use nom::combinator::{eof, fail, map, not, opt, recognize};
|
||||||
use nom::error::ErrorKind;
|
use nom::error::ErrorKind;
|
||||||
use nom::multi::{many0, many0_count, many1, many1_count, separated_list1};
|
use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
|
||||||
use nom::sequence::tuple;
|
use nom::sequence::tuple;
|
||||||
use nom::{IResult, Offset, Slice};
|
use nom::{IResult, Offset, Slice};
|
||||||
use nom_locate::LocatedSpan;
|
use nom_locate::LocatedSpan;
|
||||||
|
@ -14,7 +14,7 @@ use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use unicode_segmentation::UnicodeSegmentation;
|
use unicode_segmentation::UnicodeSegmentation;
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug)]
|
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||||
pub enum MentionType {
|
pub enum MentionType {
|
||||||
Community,
|
Community,
|
||||||
User,
|
User,
|
||||||
|
@ -29,7 +29,7 @@ impl MentionType {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
pub enum Token<'a> {
|
pub enum Token<'a> {
|
||||||
PlainText(Cow<'a, str>),
|
PlainText(Cow<'a, str>),
|
||||||
Sequence(Vec<Token<'a>>),
|
Sequence(Vec<Token<'a>>),
|
||||||
|
@ -132,6 +132,49 @@ impl Token<'_> {
|
||||||
Token::Hashtag(url) => Token::Hashtag(Cow::Owned(url.clone().into_owned())),
|
Token::Hashtag(url) => Token::Hashtag(Cow::Owned(url.clone().into_owned())),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn merged(&self) -> Token {
|
||||||
|
match self {
|
||||||
|
Token::Sequence(tokens) => {
|
||||||
|
let tokens_multi = tokens.iter().fold(Vec::new(), |mut acc, tok| {
|
||||||
|
if let Some(Token::PlainText(last)) = acc.last_mut() {
|
||||||
|
if let Token::PlainText(tok_text) = tok {
|
||||||
|
*last = Cow::from(last.to_string() + tok_text.as_ref());
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
acc.push(tok.merged());
|
||||||
|
acc
|
||||||
|
});
|
||||||
|
|
||||||
|
if tokens_multi.len() == 1 {
|
||||||
|
return tokens_multi.into_iter().next().unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
Token::Sequence(tokens_multi)
|
||||||
|
}
|
||||||
|
Token::Quote(inner) => Token::Quote(Box::new(inner.merged())),
|
||||||
|
Token::Small(inner) => Token::Small(Box::new(inner.merged())),
|
||||||
|
Token::Big(inner) => Token::Big(Box::new(inner.merged())),
|
||||||
|
Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.merged())),
|
||||||
|
Token::Bold(inner) => Token::Bold(Box::new(inner.merged())),
|
||||||
|
Token::Italic(inner) => Token::Italic(Box::new(inner.merged())),
|
||||||
|
Token::Center(inner) => Token::Center(Box::new(inner.merged())),
|
||||||
|
Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.merged())),
|
||||||
|
Token::Function {
|
||||||
|
name,
|
||||||
|
params,
|
||||||
|
inner,
|
||||||
|
} => Token::Function {
|
||||||
|
name: name.clone(),
|
||||||
|
params: params.clone(),
|
||||||
|
inner: Box::new(inner.merged()),
|
||||||
|
},
|
||||||
|
other => other.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type Span<'a> = LocatedSpan<&'a str>;
|
type Span<'a> = LocatedSpan<&'a str>;
|
||||||
|
@ -244,25 +287,103 @@ struct Context;
|
||||||
|
|
||||||
impl Context {
|
impl Context {
|
||||||
#[inline]
|
#[inline]
|
||||||
const fn partial<'a>(
|
const fn partial(
|
||||||
&self,
|
&self,
|
||||||
func: impl Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token<'a>> + 'static,
|
func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token<'a>> + 'static,
|
||||||
) -> impl Fn(Span<'a>) -> IResult<Span<'a>, Token<'a>> + '_ {
|
) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token<'a>> + '_ {
|
||||||
move |input| func(self, input)
|
move |input| func(self, input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn root<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
let (input, token) = alt((self.partial(Self::tag_quote),))(input)?;
|
map(many1(self.partial(Self::full_single)), Token::Sequence)(input)
|
||||||
Ok((input, token))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
let (input, token) = alt((self.partial(Self::tag_small), self.partial(Self::text)))(input)?;
|
map(many1(self.partial(Self::inline_single)), Token::Sequence)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
|
map(
|
||||||
|
many1(self.partial(Self::inline_label_safe_single)),
|
||||||
|
Token::Sequence,
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
|
alt((
|
||||||
|
self.partial(Self::tag_bold_italic_asterisk),
|
||||||
|
self.partial(Self::tag_bold_italic_underscore),
|
||||||
|
self.partial(Self::tag_bold_asterisk),
|
||||||
|
self.partial(Self::tag_italic_asterisk),
|
||||||
|
self.partial(Self::tag_bold_underscore),
|
||||||
|
self.partial(Self::tag_italic_underscore),
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
|
let (input, token) = alt((
|
||||||
|
self.partial(Self::unicode_emoji),
|
||||||
|
self.partial(Self::tag_block_center),
|
||||||
|
self.partial(Self::tag_small),
|
||||||
|
self.partial(Self::tag_plain),
|
||||||
|
self.partial(Self::tag_bold),
|
||||||
|
self.partial(Self::tag_italic),
|
||||||
|
self.partial(Self::tag_strikethrough),
|
||||||
|
self.partial(Self::url_no_embed),
|
||||||
|
self.partial(Self::base_bold_italic),
|
||||||
|
self.partial(Self::tag_block_code),
|
||||||
|
self.partial(Self::tag_inline_code),
|
||||||
|
self.partial(Self::tag_quote),
|
||||||
|
self.partial(Self::tag_block_math),
|
||||||
|
self.partial(Self::tag_inline_math),
|
||||||
|
self.partial(Self::tag_strikethrough_tilde),
|
||||||
|
self.partial(Self::tag_func),
|
||||||
|
self.partial(Self::tag_mention),
|
||||||
|
self.partial(Self::tag_hashtag),
|
||||||
|
self.partial(Self::shortcode_emoji),
|
||||||
|
self.partial(Self::raw_url),
|
||||||
|
self.partial(Self::text),
|
||||||
|
))(input)?;
|
||||||
Ok((input, token))
|
Ok((input, token))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn inline_no_link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn inline_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
let (input, token) = alt((self.partial(Self::tag_small), self.partial(Self::text)))(input)?;
|
let (input, token) = alt((
|
||||||
|
self.partial(Self::unicode_emoji),
|
||||||
|
self.partial(Self::tag_small),
|
||||||
|
self.partial(Self::tag_plain),
|
||||||
|
self.partial(Self::tag_bold),
|
||||||
|
self.partial(Self::tag_italic),
|
||||||
|
self.partial(Self::tag_strikethrough),
|
||||||
|
self.partial(Self::url_no_embed),
|
||||||
|
self.partial(Self::base_bold_italic),
|
||||||
|
self.partial(Self::tag_inline_code),
|
||||||
|
self.partial(Self::tag_inline_math),
|
||||||
|
self.partial(Self::tag_strikethrough_tilde),
|
||||||
|
self.partial(Self::tag_func),
|
||||||
|
self.partial(Self::tag_mention),
|
||||||
|
self.partial(Self::tag_hashtag),
|
||||||
|
self.partial(Self::shortcode_emoji),
|
||||||
|
self.partial(Self::raw_url),
|
||||||
|
self.partial(Self::text),
|
||||||
|
))(input)?;
|
||||||
|
Ok((input, token))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
|
let (input, token) = alt((
|
||||||
|
self.partial(Self::unicode_emoji),
|
||||||
|
self.partial(Self::tag_small),
|
||||||
|
self.partial(Self::tag_plain),
|
||||||
|
self.partial(Self::tag_bold),
|
||||||
|
self.partial(Self::tag_italic),
|
||||||
|
self.partial(Self::tag_strikethrough),
|
||||||
|
self.partial(Self::base_bold_italic),
|
||||||
|
self.partial(Self::tag_strikethrough_tilde),
|
||||||
|
self.partial(Self::tag_func),
|
||||||
|
self.partial(Self::shortcode_emoji),
|
||||||
|
self.partial(Self::text),
|
||||||
|
))(input)?;
|
||||||
Ok((input, token))
|
Ok((input, token))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -270,7 +391,7 @@ impl Context {
|
||||||
let (input, leading_spaces) = tuple((opt(line_ending), opt(line_ending)))(input)?;
|
let (input, leading_spaces) = tuple((opt(line_ending), opt(line_ending)))(input)?;
|
||||||
|
|
||||||
if let (None, None) = leading_spaces {
|
if let (None, None) = leading_spaces {
|
||||||
if input.get_column() != 0 {
|
if input.get_column() != 1 {
|
||||||
return fail(input);
|
return fail(input);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -295,7 +416,12 @@ impl Context {
|
||||||
return fail(input);
|
return fail(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
let (_, inner) = spliced("e_lines, space, Token::Quote, orig_input)?;
|
let (_, inner) = spliced(
|
||||||
|
"e_lines,
|
||||||
|
self.partial(Self::full),
|
||||||
|
Token::Quote,
|
||||||
|
orig_input,
|
||||||
|
)?;
|
||||||
|
|
||||||
let (input, _) = tuple((opt(line_ending), opt(line_ending)))(input)?;
|
let (input, _) = tuple((opt(line_ending), opt(line_ending)))(input)?;
|
||||||
|
|
||||||
|
@ -308,27 +434,23 @@ impl Context {
|
||||||
|
|
||||||
let (input, _) = opt(line_ending)(input)?;
|
let (input, _) = opt(line_ending)(input)?;
|
||||||
|
|
||||||
if input.get_column() != 0 {
|
if input.get_column() != 1 {
|
||||||
return fail(input);
|
return fail(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
let (input, _) = tag_start(input)?;
|
let (input, _) = tag_start(input)?;
|
||||||
let (input, _) = opt(line_ending)(input)?;
|
let (input, _) = opt(line_ending)(input)?;
|
||||||
|
|
||||||
let (input, center_seq) = many0(tuple((
|
let (input, (center_seq, _)) = many_till(
|
||||||
not(tuple((opt(line_ending), tag_end))),
|
self.partial(Self::inline_single),
|
||||||
self.partial(Self::inline),
|
tuple((opt(line_ending), tag_end)),
|
||||||
)))(input)?;
|
)(input)?;
|
||||||
|
|
||||||
let (input, _) = opt(line_ending)(input)?;
|
|
||||||
let (input, _) = tag_end(input)?;
|
|
||||||
let (input, _) = many0(space)(input)?;
|
let (input, _) = many0(space)(input)?;
|
||||||
let (input, _) = not(not_line_ending)(input)?;
|
let (input, _) = not(not(line_ending))(input)?;
|
||||||
let (input, _) = opt(line_ending)(input)?;
|
let (input, _) = opt(line_ending)(input)?;
|
||||||
|
|
||||||
let tokens = center_seq.into_iter().map(|(_, v)| v).collect::<Vec<_>>();
|
Ok((input, boxing_sequence(Token::Center)(center_seq)))
|
||||||
|
|
||||||
Ok((input, boxing_sequence(Token::Center)(tokens)))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
|
@ -336,7 +458,7 @@ impl Context {
|
||||||
|
|
||||||
let (input, _) = opt(line_ending)(input)?;
|
let (input, _) = opt(line_ending)(input)?;
|
||||||
|
|
||||||
if input.get_column() != 0 {
|
if input.get_column() != 1 {
|
||||||
return fail(input);
|
return fail(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -358,7 +480,7 @@ impl Context {
|
||||||
let (input, _) = line_ending(input)?;
|
let (input, _) = line_ending(input)?;
|
||||||
let (input, _) = delim(input)?;
|
let (input, _) = delim(input)?;
|
||||||
let (input, _) = many0(space)(input)?;
|
let (input, _) = many0(space)(input)?;
|
||||||
let (input, _) = not(not_line_ending)(input)?;
|
let (input, _) = not(not(line_ending))(input)?;
|
||||||
let (input, _) = opt(line_ending)(input)?;
|
let (input, _) = opt(line_ending)(input)?;
|
||||||
|
|
||||||
Ok((
|
Ok((
|
||||||
|
@ -376,7 +498,7 @@ impl Context {
|
||||||
|
|
||||||
let (input, _) = opt(line_ending)(input)?;
|
let (input, _) = opt(line_ending)(input)?;
|
||||||
|
|
||||||
if input.get_column() != 0 {
|
if input.get_column() != 1 {
|
||||||
return fail(input);
|
return fail(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -458,8 +580,7 @@ impl Context {
|
||||||
tag("_"),
|
tag("_"),
|
||||||
))));
|
))));
|
||||||
|
|
||||||
let (input, func_name_span) = func_ident(input)?;
|
let (input, func_name) = map(func_ident, Span::into_fragment)(input)?;
|
||||||
let func_name = func_name_span.into_fragment();
|
|
||||||
|
|
||||||
let arg = tuple((func_ident, opt(tuple((tag("="), param_value)))));
|
let arg = tuple((func_ident, opt(tuple((tag("="), param_value)))));
|
||||||
|
|
||||||
|
@ -478,16 +599,16 @@ impl Context {
|
||||||
.collect::<HashMap<_, _>>()
|
.collect::<HashMap<_, _>>()
|
||||||
});
|
});
|
||||||
|
|
||||||
let (input, inner) = self.partial(Self::inline)(input)?;
|
let (input, _) = opt(space)(input)?;
|
||||||
|
|
||||||
let (input, _) = tag("]")(input)?;
|
let (input, (inner, _)) = many_till(self.partial(Self::inline_single), tag("]"))(input)?;
|
||||||
|
|
||||||
Ok((
|
Ok((
|
||||||
input,
|
input,
|
||||||
Token::Function {
|
Token::Function {
|
||||||
name: Cow::from(func_name),
|
name: Cow::from(func_name),
|
||||||
params: args_out,
|
params: args_out,
|
||||||
inner: Box::new(inner),
|
inner: Box::new(Token::Sequence(inner)),
|
||||||
},
|
},
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
@ -649,15 +770,11 @@ impl Context {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn text<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn text<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
let before = input;
|
let (input, text) = map(recognize(anychar), Span::into_fragment)(input)?;
|
||||||
let (input, _) = anychar(input)?;
|
Ok((input, Token::PlainText(text.into())))
|
||||||
Ok((
|
|
||||||
input,
|
|
||||||
Token::PlainText(before.fragment_between(&input).into()),
|
|
||||||
))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn raw_url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
let (input, url_span) = recognize(tuple((
|
let (input, url_span) = recognize(tuple((
|
||||||
protocol,
|
protocol,
|
||||||
url_chars(|input| not(url_chars_base)(input), false),
|
url_chars(|input| not(url_chars_base)(input), false),
|
||||||
|
@ -688,8 +805,10 @@ impl Context {
|
||||||
let (input, no_embed) = opt(tag("?"))(input)?;
|
let (input, no_embed) = opt(tag("?"))(input)?;
|
||||||
let (input, _) = tag("[")(input)?;
|
let (input, _) = tag("[")(input)?;
|
||||||
let (input, _) = not(tag("["))(input)?;
|
let (input, _) = not(tag("["))(input)?;
|
||||||
let (input, label_span) =
|
let (input, label_span) = recognize(many1(tuple((
|
||||||
recognize(many1(tuple((not(tag("](")), not_line_ending))))(input)?;
|
not(tag("](")),
|
||||||
|
self.partial(Self::inline_label_safe_single),
|
||||||
|
))))(input)?;
|
||||||
let (input, _) = tag("]")(input)?;
|
let (input, _) = tag("]")(input)?;
|
||||||
let (input, _) = tag("(")(input)?;
|
let (input, _) = tag("(")(input)?;
|
||||||
let (input, url_span) = recognize(tuple((protocol, url_chars(tag("]"), true))))(input)?;
|
let (input, url_span) = recognize(tuple((protocol, url_chars(tag("]"), true))))(input)?;
|
||||||
|
@ -772,7 +891,7 @@ impl Context {
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
// TODO: Skip when preceded by alphanumerics
|
// TODO: Skip when preceded by alphanumerics
|
||||||
|
|
||||||
let (input, _) = tag("#")(input)?;
|
let (input, _) = tag("#")(input)?;
|
||||||
|
@ -843,9 +962,11 @@ fn url_chars<'a, T: 'a>(
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use crate::{url_chars, Context, Span};
|
use crate::{url_chars, Context, Span, Token};
|
||||||
use nom::bytes::complete::tag;
|
use nom::bytes::complete::tag;
|
||||||
use nom::multi::many1;
|
use nom::multi::many1;
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_url_chars() {
|
fn parse_url_chars() {
|
||||||
|
@ -895,12 +1016,50 @@ mod test {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_complex() {
|
||||||
|
let emoji = r#"$[x2 $[sparkle 🥺]💜$[spin.y,speed=5s ❤️]🦊]"#;
|
||||||
|
assert_eq!(
|
||||||
|
Token::Function {
|
||||||
|
name: "x2".into(),
|
||||||
|
params: HashMap::new(),
|
||||||
|
inner: Box::new(Token::Sequence(vec![
|
||||||
|
Token::Function {
|
||||||
|
name: "sparkle".into(),
|
||||||
|
params: HashMap::new(),
|
||||||
|
inner: Box::new(Token::UnicodeEmoji("🥺".into())),
|
||||||
|
},
|
||||||
|
Token::UnicodeEmoji("💜".into()),
|
||||||
|
Token::Function {
|
||||||
|
name: "spin".into(),
|
||||||
|
params: {
|
||||||
|
let mut params = HashMap::new();
|
||||||
|
params.insert("y".into(), None);
|
||||||
|
params.insert("speed".into(), Some("5s".into()));
|
||||||
|
params
|
||||||
|
},
|
||||||
|
inner: Box::new(Token::UnicodeEmoji("❤️".into())),
|
||||||
|
},
|
||||||
|
Token::UnicodeEmoji("🦊".into()),
|
||||||
|
]))
|
||||||
|
},
|
||||||
|
Context.full(Span::new(emoji)).unwrap().1.merged()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_emoji() {
|
fn parse_emoji() {
|
||||||
let test = "🥺💜❤️🦊";
|
let test = "🥺💜❤️🦊";
|
||||||
let ctx = Context;
|
let ctx = Context;
|
||||||
let tokens = many1(ctx.partial(Context::unicode_emoji))(Span::from(test)).unwrap();
|
let tokens = many1(ctx.partial(Context::unicode_emoji))(Span::from(test)).unwrap();
|
||||||
|
|
||||||
println!("{:#?}", tokens.1)
|
assert_eq!(
|
||||||
|
vec!["🥺", "💜", "❤️", "🦊"]
|
||||||
|
.into_iter()
|
||||||
|
.map(<&str as Into<Cow<_>>>::into)
|
||||||
|
.map(Token::UnicodeEmoji)
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
tokens.1
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue