use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete; use nom::character::complete::{ alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, tab, }; use nom::combinator::{fail, not, opt, recognize}; use nom::error::ErrorKind; use nom::multi::{many0, many0_count, many1, many1_count, separated_list1}; use nom::sequence::tuple; use nom::{IResult, Offset, Slice}; use nom_locate::LocatedSpan; use std::borrow::Cow; use std::collections::HashMap; #[derive(Clone, Debug)] pub enum Token<'a> { PlainText(Cow<'a, str>), Sequence(Vec>), Quote(Box>), Small(Box>), Big(Box>), BoldItalic(Box>), Bold(Box>), Italic(Box>), Center(Box>), Strikethrough(Box>), PlainTag(Cow<'a, str>), InlineCode(Cow<'a, str>), InlineMath(Cow<'a, str>), BlockCode { lang: Option>, inner: Cow<'a, str>, }, BlockMath(Cow<'a, str>), Function { name: Cow<'a, str>, params: HashMap, Option>>, inner: Box>, }, } impl Token<'_> { fn owned(&self) -> Token<'static> { match self { Token::PlainText(text) => Token::PlainText(Cow::Owned(text.clone().into_owned())), Token::Sequence(tokens) => Token::Sequence(tokens.iter().map(Token::owned).collect()), Token::Quote(inner) => Token::Quote(Box::new(inner.owned())), Token::Small(inner) => Token::Small(Box::new(inner.owned())), Token::Big(inner) => Token::Big(Box::new(inner.owned())), Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.owned())), Token::Bold(inner) => Token::Bold(Box::new(inner.owned())), Token::Italic(inner) => Token::Italic(Box::new(inner.owned())), Token::Center(inner) => Token::Center(Box::new(inner.owned())), Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.owned())), Token::PlainTag(tag) => Token::PlainTag(Cow::Owned(tag.clone().into_owned())), Token::InlineCode(code) => Token::InlineCode(Cow::Owned(code.clone().into_owned())), Token::InlineMath(math) => Token::InlineMath(Cow::Owned(math.clone().into_owned())), Token::BlockCode { inner, lang } => Token::BlockCode { lang: lang.as_ref().map(|l| Cow::Owned(l.clone().into_owned())), inner: Cow::Owned(inner.clone().into_owned()), }, Token::BlockMath(math) => Token::BlockMath(Cow::Owned(math.clone().into_owned())), Token::Function { name, params, inner, } => Token::Function { name: Cow::Owned(name.clone().into_owned()), params: params .iter() .map(|(k, v)| { ( Cow::Owned(k.clone().into_owned()), v.as_ref().map(|val| Cow::Owned(val.clone().into_owned())), ) }) .collect(), inner: Box::new(inner.owned()), }, } } } type Span<'a> = LocatedSpan<&'a str>; trait SliceOffset { fn up_to(&self, other: &Self) -> Self; fn fragment_between<'a>(&self, other: &Self) -> &'a str where Self: 'a; } impl SliceOffset for Span<'_> { fn up_to(&self, other: &Self) -> Self { self.slice(..self.offset(other)) } fn fragment_between<'a>(&self, other: &Self) -> &'a str where Self: 'a, { self.up_to(other).into_fragment() } } const fn boxing_sequence<'a>( func: impl Fn(Box>) -> Token<'a>, ) -> impl Fn(Vec>) -> Token<'a> { move |tokens| func(Box::new(Token::Sequence(tokens))) } const fn collect_char_sequence<'a>( func: impl Fn(Cow<'a, str>) -> Token<'a>, ) -> impl Fn(Vec) -> Token<'a> { move |chars| func(Cow::Owned(chars.into_iter().collect())) } fn spliced<'a>( segments: &[Span<'a>], func: impl Fn(Span) -> IResult, output_mapper: impl Fn(Box>) -> Token<'static>, parent: Span<'a>, ) -> IResult, Token<'static>, nom::error::Error>> { let combined = segments .iter() .copied() .map(Span::into_fragment) .collect::(); let cum_offset_combined = segments .iter() .scan(0, |acc, &x| { *acc += x.len(); Some(*acc) }) .collect::>(); let current_seg = |input: Span| { cum_offset_combined .iter() .enumerate() .filter(|(_, &o)| o >= input.location_offset()) .map(|(i, o)| (segments[i], o)) .last() }; type NE = nom::Err; type NomError<'x> = nom::error::Error>; let quote_span = Span::new(&combined); let (input, inner) = match func(quote_span) { Ok((input, token)) => (input, token.owned()), Err(e) => { return match e { NE::Error(e) => { let offset_new = e.input.location_offset(); if let Some((seg_parent, offset_seg_new)) = current_seg(e.input) { let offset = offset_new - offset_seg_new; let offset_orig = offset + seg_parent.location_offset(); Err(NE::Error(NomError::new( Span::new(&parent.into_fragment()[offset_orig..]), e.code, ))) } else { // ??? Err(NE::Failure(NomError::new(parent, ErrorKind::Fail))) } } NE::Failure(e) => Err(NE::Error(NomError::new(parent, e.code))), NE::Incomplete(i) => Err(NE::Incomplete(i)), }; } }; let out = if let Some((seg_parent, offset_seg_new)) = current_seg(input) { let offset = input.location_offset() - offset_seg_new; let offset_orig = offset + seg_parent.location_offset(); parent.slice(offset_orig..) } else { parent }; Ok((out, output_mapper(Box::new(inner.owned())))) } fn space(input: Span) -> IResult { let start = input; let (input, _) = alt((complete::char('\u{0020}'), complete::char('\u{3000}'), tab))(input)?; Ok(( input, Token::PlainText(start.fragment_between(&input).into()), )) } struct Context; impl Context { #[inline] const fn partial<'a>( &self, func: impl Fn(&Self, Span<'a>) -> IResult, Token<'a>> + 'static, ) -> impl Fn(Span<'a>) -> IResult, Token<'a>> + '_ { move |input| func(self, input) } fn root<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { let (input, token) = alt((self.partial(Self::tag_quote),))(input)?; Ok((input, token)) } fn inline<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { let (input, token) = alt((self.partial(Self::tag_small), self.partial(Self::text)))(input)?; Ok((input, token)) } fn tag_quote<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { let (input, leading_spaces) = tuple((opt(line_ending), opt(line_ending)))(input)?; if let (None, None) = leading_spaces { if input.get_column() != 0 { return fail(input); } } let quote_line = |input| tuple((tag(">"), opt(space), not_line_ending))(input); let orig_input = input; let (input, lines) = separated_list1(line_ending, quote_line)(input)?; let quote_lines = lines .into_iter() .map(|(_, _, text)| text) .collect::>(); if quote_lines.len() == 1 && quote_lines .iter() .map(Span::fragment) .copied() .any(&str::is_empty) { return fail(input); } let (_, inner) = spliced("e_lines, space, Token::Quote, orig_input)?; let (input, _) = tuple((opt(line_ending), opt(line_ending)))(input)?; Ok((input, Token::Quote(Box::new(inner)))) } fn tag_block_center<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { let tag_start = &tag("
"); let tag_end = &tag("
"); let (input, _) = opt(line_ending)(input)?; if input.get_column() != 0 { return fail(input); } let (input, _) = tag_start(input)?; let (input, _) = opt(line_ending)(input)?; let (input, center_seq) = many0(tuple(( not(tuple((opt(line_ending), tag_end))), self.partial(Self::inline), )))(input)?; let (input, _) = opt(line_ending)(input)?; let (input, _) = tag_end(input)?; let (input, _) = many0(space)(input)?; let (input, _) = not(not_line_ending)(input)?; let (input, _) = opt(line_ending)(input)?; let tokens = center_seq.into_iter().map(|(_, v)| v).collect::>(); Ok((input, boxing_sequence(Token::Center)(tokens))) } fn tag_block_math<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { let start = &tag("\\["); let end = &tag("\\]"); let (input, _) = opt(line_ending)(input)?; if input.get_column() != 0 { return fail(input); } let (input, _) = start(input)?; let (input, _) = opt(line_ending)(input)?; let (input, math_span) = recognize(many1_count(tuple(( not(tuple((opt(line_ending), end))), not_line_ending, ))))(input)?; let (input, _) = opt(line_ending)(input)?; let (input, _) = end(input)?; let (input, _) = many0(space)(input)?; let (input, _) = not(not_line_ending)(input)?; let (input, _) = opt(line_ending)(input)?; Ok(( input, Token::BlockMath(Cow::Borrowed(math_span.into_fragment())), )) } const fn tag_delimited<'a, 'b: 'a, T>( &'a self, start: &'b str, end: &'b str, escape: bool, matcher_inner: impl Fn(Span<'b>) -> IResult, T> + 'a, mapper: impl Fn(Vec) -> Token<'b> + 'a, ) -> impl Fn(Span<'b>) -> IResult, Token<'b>> + '_ { move |input| { let opening_tag = &tag(start); let closing_tag = &tag(end); if escape { if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), opening_tag))(input) { return Ok((input_escaped, Token::PlainText(Cow::Borrowed(&mark)))); } } let begin = input; let (post_open, _) = opening_tag(input)?; let res = tuple(( many1(tuple((not(closing_tag), &matcher_inner))), closing_tag, ))(post_open); if let Err(nom::Err::Error(nom::error::Error { .. })) = res { return Ok(( post_open, Token::PlainText(begin.fragment_between(&post_open).into()), )); } let (input, (inner, _)) = res?; let inner = inner.into_iter().map(|(_, t)| t).collect::>(); Ok((input, mapper(inner))) } } fn tag_func<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { let (input, _) = tag("$[")(input)?; let func_ident = |input| { recognize(tuple(( many1_count(alt((alpha1, tag("_")))), many0_count(alt((alphanumeric1, tag("_")))), )))(input) }; let param_value = recognize(many1_count(alt(( alphanumeric1, tag("."), tag("-"), tag("_"), )))); let (input, func_name_span) = func_ident(input)?; let func_name = func_name_span.into_fragment(); let arg = tuple((func_ident, opt(tuple((tag("="), param_value))))); let (input, args) = opt(tuple((one_char('.'), separated_list1(one_char(','), arg))))(input)?; let args_out = args.map_or_else(HashMap::new, |(_, items)| { items .into_iter() .map(|(k, v)| { ( Cow::from(k.into_fragment()), v.map(|(_, val)| Cow::from(val.into_fragment())), ) }) .collect::>() }); let (input, inner) = self.partial(Self::inline)(input)?; let (input, _) = tag("]")(input)?; Ok(( input, Token::Function { name: Cow::from(func_name), params: args_out, inner: Box::new(inner), }, )) } fn tag_small<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { self.tag_delimited( "", "", false, self.partial(Self::inline), boxing_sequence(Token::Small), )(input) } // TODO: CommonMark flanking rules fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { self.tag_delimited( "***", "***", true, self.partial(Self::inline), boxing_sequence(Token::BoldItalic), )(input) } // TODO: CommonMark flanking rules fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { self.tag_delimited( "___", "___", true, self.partial(Self::inline), boxing_sequence(Token::BoldItalic), )(input) } fn tag_bold<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { self.tag_delimited( "", "", false, self.partial(Self::inline), boxing_sequence(Token::Bold), )(input) } // TODO: CommonMark flanking rules fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { self.tag_delimited( "**", "**", true, self.partial(Self::inline), boxing_sequence(Token::Bold), )(input) } // TODO: CommonMark flanking rules fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { self.tag_delimited( "__", "__", true, self.partial(Self::inline), boxing_sequence(Token::Bold), )(input) } fn tag_italic<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { self.tag_delimited( "", "", false, self.partial(Self::inline), boxing_sequence(Token::Italic), )(input) } // TODO: CommonMark flanking rules fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { self.tag_delimited( "*", "*", true, self.partial(Self::inline), boxing_sequence(Token::Italic), )(input) } // TODO: CommonMark flanking rules fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { self.tag_delimited( "_", "_", true, self.partial(Self::inline), boxing_sequence(Token::Italic), )(input) } fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { self.tag_delimited( "", "", false, self.partial(Self::inline), boxing_sequence(Token::Strikethrough), )(input) } // TODO: CommonMark flanking rules fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { self.tag_delimited( "~~", "~~", true, move |input| { tuple((not_line_ending, self.partial(Self::inline)))(input).map(|(i, t)| (i, t.1)) }, boxing_sequence(Token::Strikethrough), )(input) } fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { self.tag_delimited( "`", "", true, move |input| { tuple((not(alt((tag("`"), tag("ยด"), line_ending))), anychar))(input) .map(|(i, (_skip, c))| (i, c)) }, collect_char_sequence(Token::InlineCode), )(input) } fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { self.tag_delimited( "\\(", "\\)", false, move |input| tuple((not_line_ending, anychar))(input).map(|(i, (_skip, c))| (i, c)), collect_char_sequence(Token::InlineMath), )(input) } fn text<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { let before = input; let (input, _) = anychar(input)?; Ok(( input, Token::PlainText(before.fragment_between(&input).into()), )) } }