MMM: Janky outer flanking rules implementation

MMM: Fixed hashtag parsing
2023-10-24 00:27:54 +02:00 · 2023-10-23 23:52:02 +02:00
1 changed files with 57 additions and 9 deletions
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@ -6,8 +6,8 @@ use nom::character::complete::{
    alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of,
    satisfy, space1, tab,
 };
-use nom::combinator::{eof, fail, map, not, opt, recognize};
+use nom::combinator::{eof, fail, map, not, opt, peek, recognize};
-use nom::error::{ErrorKind, ParseError};
+use nom::error::ErrorKind;
 use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
 use nom::sequence::tuple;
 use nom::{IResult, Offset, Parser, Slice};
@ -273,8 +273,11 @@ fn collect_char_sequence(
 }
 #[inline]
-fn alpha1_unicode(input: Span) -> IResult<Span, Span> {
+fn space1_unicode(input: Span) -> IResult<Span, Span> {
-    recognize(many1_count(satisfy(char::is_alphanumeric)))(input)
+    recognize(many1_count(tuple((
        not(line_ending),
        satisfy(char::is_whitespace),
    ))))(input)
 }
 #[inline]
@ -770,6 +773,14 @@ impl Context {
                }
            }
            if let FlankingRule::Strict = opening_rule {
                let (input, pre) =
                    opt(recognize(tuple((alphanumeric1_unicode, &opening_tag))))(input)?;
                if let Some(pre_text) = pre {
                    return Ok((input, Token::PlainText(pre_text.into_fragment().into())));
                }
            }
            let begin = input;
            let (post_open, _) = opening_tag(input)?;
@ -832,8 +843,12 @@ impl Context {
                    true
                };
-            // TODO: Unfinished flanking rules
+            let (input, alphanum) = opt(peek(alphanumeric1_unicode))(input)?;
-            let correct_flanking = correct_left_flanking && correct_right_flanking;
+            let correct_right_outer =
                alphanum.is_none() || !matches!(closing_rule, FlankingRule::Strict);
            let correct_flanking =
                correct_left_flanking && correct_right_flanking && correct_right_outer;
            if !correct_flanking {
                return Ok((
@ -1293,7 +1308,12 @@ impl Context {
    }
    fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-        // TODO: Skip when preceded by alphanumerics
+        let (input, maybe_preceded) =
            opt(recognize(tuple((alphanumeric1_unicode, tag("#")))))(input)?;
        if let Some(preceded) = maybe_preceded {
            return Ok((input, Token::PlainText(preceded.into_fragment().into())));
        }
        let (input, _) = tag("#")(input)?;
@ -1347,8 +1367,8 @@ impl Context {
                tag("）"),
            ))),
            recognize(tuple((
-                not(space1),
+                not(space1_unicode),
-                not_line_ending,
+                not(line_ending),
                not(one_of(".,:;!?#?/[]【】()「」（）<>")),
                anychar,
            ))),
@ -1501,6 +1521,20 @@ mod test {
            Token::PlainText("* italic *".into())
        );
        assert_eq!(
            parse_full("snake_case_variable"),
            Token::PlainText("snake_case_variable".into())
        );
        assert_eq!(
            parse_full("intra*word*italic"),
            Token::Sequence(vec![
                Token::PlainText("intra".into()),
                Token::Italic(Box::new(Token::PlainText("word".into()))),
                Token::PlainText("italic".into())
            ])
        );
        assert_eq!(
            parse_full(r#"_ italic *"#),
            Token::PlainText("_ italic *".into())
@ -1699,6 +1733,20 @@ text</center>"#
            ])
        );
        assert_eq!(
            parse_full("test #hashtag tail"),
            Token::Sequence(vec![
                Token::PlainText("test ".into()),
                Token::Hashtag("hashtag".into()),
                Token::PlainText(" tail".into())
            ])
        );
        assert_eq!(
            parse_full("not#hashtag tail"),
            Token::PlainText("not#hashtag tail".into())
        );
        assert_eq!(
            parse_full("<https://example.com>"),
            Token::UrlNoEmbed("https://example.com".into())
Author	SHA1	Message	Date
Natty	c4a8ebebf3	MMM: Janky outer flanking rules implementation	2023-10-24 00:27:54 +02:00
Natty	42fa83c6e2	MMM: Fixed hashtag parsing	2023-10-23 23:52:02 +02:00