Connected it all

This commit is contained in:
Natty 2023-10-06 00:17:52 +02:00
parent c45ec852dd
commit 453891ddf4
Signed by: natty
GPG Key ID: BF6CB659ADEE60EC
1 changed files with 204 additions and 45 deletions

View File

@ -6,7 +6,7 @@ use nom::character::complete::{
};
use nom::combinator::{eof, fail, map, not, opt, recognize};
use nom::error::ErrorKind;
use nom::multi::{many0, many0_count, many1, many1_count, separated_list1};
use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
use nom::sequence::tuple;
use nom::{IResult, Offset, Slice};
use nom_locate::LocatedSpan;
@ -14,7 +14,7 @@ use std::borrow::Cow;
use std::collections::HashMap;
use unicode_segmentation::UnicodeSegmentation;
#[derive(Copy, Clone, Debug)]
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum MentionType {
Community,
User,
@ -29,7 +29,7 @@ impl MentionType {
}
}
#[derive(Clone, Debug)]
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Token<'a> {
PlainText(Cow<'a, str>),
Sequence(Vec<Token<'a>>),
@ -132,6 +132,49 @@ impl Token<'_> {
Token::Hashtag(url) => Token::Hashtag(Cow::Owned(url.clone().into_owned())),
}
}
fn merged(&self) -> Token {
match self {
Token::Sequence(tokens) => {
let tokens_multi = tokens.iter().fold(Vec::new(), |mut acc, tok| {
if let Some(Token::PlainText(last)) = acc.last_mut() {
if let Token::PlainText(tok_text) = tok {
*last = Cow::from(last.to_string() + tok_text.as_ref());
return acc;
}
}
acc.push(tok.merged());
acc
});
if tokens_multi.len() == 1 {
return tokens_multi.into_iter().next().unwrap();
}
Token::Sequence(tokens_multi)
}
Token::Quote(inner) => Token::Quote(Box::new(inner.merged())),
Token::Small(inner) => Token::Small(Box::new(inner.merged())),
Token::Big(inner) => Token::Big(Box::new(inner.merged())),
Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.merged())),
Token::Bold(inner) => Token::Bold(Box::new(inner.merged())),
Token::Italic(inner) => Token::Italic(Box::new(inner.merged())),
Token::Center(inner) => Token::Center(Box::new(inner.merged())),
Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.merged())),
Token::Function {
name,
params,
inner,
} => Token::Function {
name: name.clone(),
params: params.clone(),
inner: Box::new(inner.merged()),
},
other => other.clone(),
}
}
}
type Span<'a> = LocatedSpan<&'a str>;
@ -244,25 +287,103 @@ struct Context;
impl Context {
#[inline]
const fn partial<'a>(
const fn partial(
&self,
func: impl Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token<'a>> + 'static,
) -> impl Fn(Span<'a>) -> IResult<Span<'a>, Token<'a>> + '_ {
func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token<'a>> + 'static,
) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token<'a>> + '_ {
move |input| func(self, input)
}
fn root<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
let (input, token) = alt((self.partial(Self::tag_quote),))(input)?;
Ok((input, token))
fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
map(many1(self.partial(Self::full_single)), Token::Sequence)(input)
}
fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
let (input, token) = alt((self.partial(Self::tag_small), self.partial(Self::text)))(input)?;
map(many1(self.partial(Self::inline_single)), Token::Sequence)(input)
}
fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
map(
many1(self.partial(Self::inline_label_safe_single)),
Token::Sequence,
)(input)
}
fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
alt((
self.partial(Self::tag_bold_italic_asterisk),
self.partial(Self::tag_bold_italic_underscore),
self.partial(Self::tag_bold_asterisk),
self.partial(Self::tag_italic_asterisk),
self.partial(Self::tag_bold_underscore),
self.partial(Self::tag_italic_underscore),
))(input)
}
fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
let (input, token) = alt((
self.partial(Self::unicode_emoji),
self.partial(Self::tag_block_center),
self.partial(Self::tag_small),
self.partial(Self::tag_plain),
self.partial(Self::tag_bold),
self.partial(Self::tag_italic),
self.partial(Self::tag_strikethrough),
self.partial(Self::url_no_embed),
self.partial(Self::base_bold_italic),
self.partial(Self::tag_block_code),
self.partial(Self::tag_inline_code),
self.partial(Self::tag_quote),
self.partial(Self::tag_block_math),
self.partial(Self::tag_inline_math),
self.partial(Self::tag_strikethrough_tilde),
self.partial(Self::tag_func),
self.partial(Self::tag_mention),
self.partial(Self::tag_hashtag),
self.partial(Self::shortcode_emoji),
self.partial(Self::raw_url),
self.partial(Self::text),
))(input)?;
Ok((input, token))
}
fn inline_no_link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
let (input, token) = alt((self.partial(Self::tag_small), self.partial(Self::text)))(input)?;
fn inline_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
let (input, token) = alt((
self.partial(Self::unicode_emoji),
self.partial(Self::tag_small),
self.partial(Self::tag_plain),
self.partial(Self::tag_bold),
self.partial(Self::tag_italic),
self.partial(Self::tag_strikethrough),
self.partial(Self::url_no_embed),
self.partial(Self::base_bold_italic),
self.partial(Self::tag_inline_code),
self.partial(Self::tag_inline_math),
self.partial(Self::tag_strikethrough_tilde),
self.partial(Self::tag_func),
self.partial(Self::tag_mention),
self.partial(Self::tag_hashtag),
self.partial(Self::shortcode_emoji),
self.partial(Self::raw_url),
self.partial(Self::text),
))(input)?;
Ok((input, token))
}
fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
let (input, token) = alt((
self.partial(Self::unicode_emoji),
self.partial(Self::tag_small),
self.partial(Self::tag_plain),
self.partial(Self::tag_bold),
self.partial(Self::tag_italic),
self.partial(Self::tag_strikethrough),
self.partial(Self::base_bold_italic),
self.partial(Self::tag_strikethrough_tilde),
self.partial(Self::tag_func),
self.partial(Self::shortcode_emoji),
self.partial(Self::text),
))(input)?;
Ok((input, token))
}
@ -270,7 +391,7 @@ impl Context {
let (input, leading_spaces) = tuple((opt(line_ending), opt(line_ending)))(input)?;
if let (None, None) = leading_spaces {
if input.get_column() != 0 {
if input.get_column() != 1 {
return fail(input);
}
}
@ -295,7 +416,12 @@ impl Context {
return fail(input);
}
let (_, inner) = spliced(&quote_lines, space, Token::Quote, orig_input)?;
let (_, inner) = spliced(
&quote_lines,
self.partial(Self::full),
Token::Quote,
orig_input,
)?;
let (input, _) = tuple((opt(line_ending), opt(line_ending)))(input)?;
@ -308,27 +434,23 @@ impl Context {
let (input, _) = opt(line_ending)(input)?;
if input.get_column() != 0 {
if input.get_column() != 1 {
return fail(input);
}
let (input, _) = tag_start(input)?;
let (input, _) = opt(line_ending)(input)?;
let (input, center_seq) = many0(tuple((
not(tuple((opt(line_ending), tag_end))),
self.partial(Self::inline),
)))(input)?;
let (input, (center_seq, _)) = many_till(
self.partial(Self::inline_single),
tuple((opt(line_ending), tag_end)),
)(input)?;
let (input, _) = opt(line_ending)(input)?;
let (input, _) = tag_end(input)?;
let (input, _) = many0(space)(input)?;
let (input, _) = not(not_line_ending)(input)?;
let (input, _) = not(not(line_ending))(input)?;
let (input, _) = opt(line_ending)(input)?;
let tokens = center_seq.into_iter().map(|(_, v)| v).collect::<Vec<_>>();
Ok((input, boxing_sequence(Token::Center)(tokens)))
Ok((input, boxing_sequence(Token::Center)(center_seq)))
}
fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
@ -336,7 +458,7 @@ impl Context {
let (input, _) = opt(line_ending)(input)?;
if input.get_column() != 0 {
if input.get_column() != 1 {
return fail(input);
}
@ -358,7 +480,7 @@ impl Context {
let (input, _) = line_ending(input)?;
let (input, _) = delim(input)?;
let (input, _) = many0(space)(input)?;
let (input, _) = not(not_line_ending)(input)?;
let (input, _) = not(not(line_ending))(input)?;
let (input, _) = opt(line_ending)(input)?;
Ok((
@ -376,7 +498,7 @@ impl Context {
let (input, _) = opt(line_ending)(input)?;
if input.get_column() != 0 {
if input.get_column() != 1 {
return fail(input);
}
@ -458,8 +580,7 @@ impl Context {
tag("_"),
))));
let (input, func_name_span) = func_ident(input)?;
let func_name = func_name_span.into_fragment();
let (input, func_name) = map(func_ident, Span::into_fragment)(input)?;
let arg = tuple((func_ident, opt(tuple((tag("="), param_value)))));
@ -478,16 +599,16 @@ impl Context {
.collect::<HashMap<_, _>>()
});
let (input, inner) = self.partial(Self::inline)(input)?;
let (input, _) = opt(space)(input)?;
let (input, _) = tag("]")(input)?;
let (input, (inner, _)) = many_till(self.partial(Self::inline_single), tag("]"))(input)?;
Ok((
input,
Token::Function {
name: Cow::from(func_name),
params: args_out,
inner: Box::new(inner),
inner: Box::new(Token::Sequence(inner)),
},
))
}
@ -649,15 +770,11 @@ impl Context {
}
fn text<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
let before = input;
let (input, _) = anychar(input)?;
Ok((
input,
Token::PlainText(before.fragment_between(&input).into()),
))
let (input, text) = map(recognize(anychar), Span::into_fragment)(input)?;
Ok((input, Token::PlainText(text.into())))
}
fn url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn raw_url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
let (input, url_span) = recognize(tuple((
protocol,
url_chars(|input| not(url_chars_base)(input), false),
@ -688,8 +805,10 @@ impl Context {
let (input, no_embed) = opt(tag("?"))(input)?;
let (input, _) = tag("[")(input)?;
let (input, _) = not(tag("["))(input)?;
let (input, label_span) =
recognize(many1(tuple((not(tag("](")), not_line_ending))))(input)?;
let (input, label_span) = recognize(many1(tuple((
not(tag("](")),
self.partial(Self::inline_label_safe_single),
))))(input)?;
let (input, _) = tag("]")(input)?;
let (input, _) = tag("(")(input)?;
let (input, url_span) = recognize(tuple((protocol, url_chars(tag("]"), true))))(input)?;
@ -772,7 +891,7 @@ impl Context {
))
}
fn hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
// TODO: Skip when preceded by alphanumerics
let (input, _) = tag("#")(input)?;
@ -843,9 +962,11 @@ fn url_chars<'a, T: 'a>(
#[cfg(test)]
mod test {
use crate::{url_chars, Context, Span};
use crate::{url_chars, Context, Span, Token};
use nom::bytes::complete::tag;
use nom::multi::many1;
use std::borrow::Cow;
use std::collections::HashMap;
#[test]
fn parse_url_chars() {
@ -895,12 +1016,50 @@ mod test {
);
}
#[test]
fn parse_complex() {
let emoji = r#"$[x2 $[sparkle 🥺]💜$[spin.y,speed=5s ❤️]🦊]"#;
assert_eq!(
Token::Function {
name: "x2".into(),
params: HashMap::new(),
inner: Box::new(Token::Sequence(vec![
Token::Function {
name: "sparkle".into(),
params: HashMap::new(),
inner: Box::new(Token::UnicodeEmoji("🥺".into())),
},
Token::UnicodeEmoji("💜".into()),
Token::Function {
name: "spin".into(),
params: {
let mut params = HashMap::new();
params.insert("y".into(), None);
params.insert("speed".into(), Some("5s".into()));
params
},
inner: Box::new(Token::UnicodeEmoji("❤️".into())),
},
Token::UnicodeEmoji("🦊".into()),
]))
},
Context.full(Span::new(emoji)).unwrap().1.merged()
)
}
#[test]
fn parse_emoji() {
let test = "🥺💜❤️🦊";
let ctx = Context;
let tokens = many1(ctx.partial(Context::unicode_emoji))(Span::from(test)).unwrap();
println!("{:#?}", tokens.1)
assert_eq!(
vec!["🥺", "💜", "❤️", "🦊"]
.into_iter()
.map(<&str as Into<Cow<_>>>::into)
.map(Token::UnicodeEmoji)
.collect::<Vec<_>>(),
tokens.1
);
}
}