diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs index 4806587..26661e6 100644 --- a/magnetar_mmm_parser/src/lib.rs +++ b/magnetar_mmm_parser/src/lib.rs @@ -6,7 +6,7 @@ use nom::character::complete::{ alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of, satisfy, space1, tab, }; -use nom::combinator::{eof, fail, map, not, opt, recognize}; +use nom::combinator::{eof, fail, map, not, opt, peek, recognize}; use nom::error::ErrorKind; use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1}; use nom::sequence::tuple; @@ -272,11 +272,6 @@ fn collect_char_sequence( move |chars| func(chars.collect()) } -#[inline] -fn alpha1_unicode(input: Span) -> IResult { - recognize(many1_count(satisfy(char::is_alphanumeric)))(input) -} - #[inline] fn space1_unicode(input: Span) -> IResult { recognize(many1_count(tuple(( @@ -778,6 +773,14 @@ impl Context { } } + if let FlankingRule::Strict = opening_rule { + let (input, pre) = + opt(recognize(tuple((alphanumeric1_unicode, &opening_tag))))(input)?; + if let Some(pre_text) = pre { + return Ok((input, Token::PlainText(pre_text.into_fragment().into()))); + } + } + let begin = input; let (post_open, _) = opening_tag(input)?; @@ -840,8 +843,12 @@ impl Context { true }; - // TODO: Unfinished flanking rules - let correct_flanking = correct_left_flanking && correct_right_flanking; + let (input, alphanum) = opt(peek(alphanumeric1_unicode))(input)?; + let correct_right_outer = + alphanum.is_none() || !matches!(closing_rule, FlankingRule::Strict); + + let correct_flanking = + correct_left_flanking && correct_right_flanking && correct_right_outer; if !correct_flanking { return Ok(( @@ -1514,6 +1521,20 @@ mod test { Token::PlainText("* italic *".into()) ); + assert_eq!( + parse_full("snake_case_variable"), + Token::PlainText("snake_case_variable".into()) + ); + + assert_eq!( + parse_full("intra*word*italic"), + Token::Sequence(vec![ + Token::PlainText("intra".into()), + Token::Italic(Box::new(Token::PlainText("word".into()))), + Token::PlainText("italic".into()) + ]) + ); + assert_eq!( parse_full(r#"_ italic *"#), Token::PlainText("_ italic *".into())