diff --git a/Cargo.lock b/Cargo.lock index 5b8dcd4..9abfe30 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2499,18 +2499,18 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" [[package]] name = "serde" -version = "1.0.180" +version = "1.0.188" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ea67f183f058fe88a4e3ec6e2788e003840893b91bac4559cabedd00863b3ed" +checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.180" +version = "1.0.188" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24e744d7782b686ab3b73267ef05697159cc0e5abbed3f47f9933165e5219036" +checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" dependencies = [ "proc-macro2", "quote", diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs index 9ed647e..ee70239 100644 --- a/magnetar_mmm_parser/src/lib.rs +++ b/magnetar_mmm_parser/src/lib.rs @@ -1,16 +1,20 @@ use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete; -use nom::character::complete::{anychar, line_ending, not_line_ending, tab}; -use nom::combinator::{fail, not, opt}; +use nom::character::complete::{ + alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, tab, +}; +use nom::combinator::{fail, not, opt, recognize}; use nom::error::ErrorKind; -use nom::multi::{many1, separated_list1}; +use nom::multi::{many0, many0_count, many1, many1_count, separated_list1}; use nom::sequence::tuple; use nom::{IResult, Offset, Slice}; use nom_locate::LocatedSpan; use std::borrow::Cow; +use std::collections::HashMap; -enum Token<'a> { +#[derive(Clone, Debug)] +pub enum Token<'a> { PlainText(Cow<'a, str>), Sequence(Vec>), Quote(Box>), @@ -24,6 +28,16 @@ enum Token<'a> { PlainTag(Cow<'a, str>), InlineCode(Cow<'a, str>), InlineMath(Cow<'a, str>), + BlockCode { + lang: Option>, + inner: Cow<'a, str>, + }, + BlockMath(Cow<'a, str>), + Function { + name: Cow<'a, str>, + params: HashMap, Option>>, + inner: Box>, + }, } impl Token<'_> { @@ -42,6 +56,28 @@ impl Token<'_> { Token::PlainTag(tag) => Token::PlainTag(Cow::Owned(tag.clone().into_owned())), Token::InlineCode(code) => Token::InlineCode(Cow::Owned(code.clone().into_owned())), Token::InlineMath(math) => Token::InlineMath(Cow::Owned(math.clone().into_owned())), + Token::BlockCode { inner, lang } => Token::BlockCode { + lang: lang.as_ref().map(|l| Cow::Owned(l.clone().into_owned())), + inner: Cow::Owned(inner.clone().into_owned()), + }, + Token::BlockMath(math) => Token::BlockMath(Cow::Owned(math.clone().into_owned())), + Token::Function { + name, + params, + inner, + } => Token::Function { + name: Cow::Owned(name.clone().into_owned()), + params: params + .iter() + .map(|(k, v)| { + ( + Cow::Owned(k.clone().into_owned()), + v.as_ref().map(|val| Cow::Owned(val.clone().into_owned())), + ) + }) + .collect(), + inner: Box::new(inner.owned()), + }, } } } @@ -84,6 +120,7 @@ const fn collect_char_sequence<'a>( fn spliced<'a>( segments: &[Span<'a>], func: impl Fn(Span) -> IResult, + output_mapper: impl Fn(Box>) -> Token<'static>, parent: Span<'a>, ) -> IResult, Token<'static>, nom::error::Error>> { let combined = segments @@ -143,7 +180,7 @@ fn spliced<'a>( parent }; - Ok((out, Token::Quote(Box::new(inner.owned())))) + Ok((out, output_mapper(Box::new(inner.owned())))) } fn space(input: Span) -> IResult { @@ -204,13 +241,69 @@ impl Context { return fail(input); } - let (_, inner) = spliced("e_lines, space, orig_input)?; + let (_, inner) = spliced("e_lines, space, Token::Quote, orig_input)?; let (input, _) = tuple((opt(line_ending), opt(line_ending)))(input)?; Ok((input, Token::Quote(Box::new(inner)))) } + fn tag_block_center<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + let tag_start = &tag("
"); + let tag_end = &tag("
"); + + let (input, _) = opt(line_ending)(input)?; + + if input.get_column() != 0 { + return fail(input); + } + + let (input, _) = tag_start(input)?; + let (input, _) = opt(line_ending)(input)?; + + let (input, center_seq) = many0(tuple(( + not(tuple((opt(line_ending), tag_end))), + self.partial(Self::inline), + )))(input)?; + + let (input, _) = opt(line_ending)(input)?; + let (input, _) = tag_end(input)?; + let (input, _) = many0(space)(input)?; + let (input, _) = not(not_line_ending)(input)?; + let (input, _) = opt(line_ending)(input)?; + + let tokens = center_seq.into_iter().map(|(_, v)| v).collect::>(); + + Ok((input, boxing_sequence(Token::Center)(tokens))) + } + + fn tag_block_math<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + let (input, _) = opt(line_ending)(input)?; + + if input.get_column() != 0 { + return fail(input); + } + + let (input, _) = tag("\\[")(input)?; + let (input, _) = opt(line_ending)(input)?; + + let (input, math_span) = recognize(many1_count(tuple(( + not(tuple((opt(line_ending), tag("\\]")))), + not_line_ending, + ))))(input)?; + + let (input, _) = opt(line_ending)(input)?; + let (input, _) = tag("\\]")(input)?; + let (input, _) = many0(space)(input)?; + let (input, _) = not(not_line_ending)(input)?; + let (input, _) = opt(line_ending)(input)?; + + Ok(( + input, + Token::BlockMath(Cow::Borrowed(math_span.into_fragment())), + )) + } + const fn tag_delimited<'a, 'b: 'a, T>( &'a self, start: &'b str, @@ -252,6 +345,57 @@ impl Context { } } + fn tag_func<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + let (input, _) = tag("$[")(input)?; + + let func_ident = |input| { + recognize(tuple(( + many1_count(alt((alpha1, tag("_")))), + many0_count(alt((alphanumeric1, tag("_")))), + )))(input) + }; + + let param_value = recognize(many1_count(alt(( + alphanumeric1, + tag("."), + tag("-"), + tag("_"), + )))); + + let (input, func_name_span) = func_ident(input)?; + let func_name = func_name_span.into_fragment(); + + let arg = tuple((func_ident, opt(tuple((tag("="), param_value))))); + + let (input, args) = + opt(tuple((one_char('.'), separated_list1(one_char(','), arg))))(input)?; + + let args_out = args.map_or_else(HashMap::new, |(_, items)| { + items + .into_iter() + .map(|(k, v)| { + ( + Cow::from(k.into_fragment()), + v.map(|(_, val)| Cow::from(val.into_fragment())), + ) + }) + .collect::>() + }); + + let (input, inner) = self.partial(Self::inline)(input)?; + + let (input, _) = tag("]")(input)?; + + Ok(( + input, + Token::Function { + name: Cow::from(func_name), + params: args_out, + inner: Box::new(inner), + }, + )) + } + fn tag_small<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { self.tag_delimited( "",