1 changed files with 9 additions and 57 deletions
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@ -6,8 +6,8 @@ use nom::character::complete::{
    alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of,
    satisfy, space1, tab,
 };
-use nom::combinator::{eof, fail, map, not, opt, peek, recognize};
+use nom::combinator::{eof, fail, map, not, opt, recognize};
-use nom::error::ErrorKind;
+use nom::error::{ErrorKind, ParseError};
 use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
 use nom::sequence::tuple;
 use nom::{IResult, Offset, Parser, Slice};
@ -273,11 +273,8 @@ fn collect_char_sequence(
 }
 #[inline]
-fn space1_unicode(input: Span) -> IResult<Span, Span> {
+fn alpha1_unicode(input: Span) -> IResult<Span, Span> {
-    recognize(many1_count(tuple((
+    recognize(many1_count(satisfy(char::is_alphanumeric)))(input)
        not(line_ending),
        satisfy(char::is_whitespace),
    ))))(input)
 }
 #[inline]
@ -773,14 +770,6 @@ impl Context {
                }
            }
            if let FlankingRule::Strict = opening_rule {
                let (input, pre) =
                    opt(recognize(tuple((alphanumeric1_unicode, &opening_tag))))(input)?;
                if let Some(pre_text) = pre {
                    return Ok((input, Token::PlainText(pre_text.into_fragment().into())));
                }
            }
            let begin = input;
            let (post_open, _) = opening_tag(input)?;
@ -843,12 +832,8 @@ impl Context {
                    true
                };
-            let (input, alphanum) = opt(peek(alphanumeric1_unicode))(input)?;
+            // TODO: Unfinished flanking rules
-            let correct_right_outer =
+            let correct_flanking = correct_left_flanking && correct_right_flanking;
                alphanum.is_none() || !matches!(closing_rule, FlankingRule::Strict);
            let correct_flanking =
                correct_left_flanking && correct_right_flanking && correct_right_outer;
            if !correct_flanking {
                return Ok((
@ -1308,12 +1293,7 @@ impl Context {
    }
    fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-        let (input, maybe_preceded) =
+        // TODO: Skip when preceded by alphanumerics
            opt(recognize(tuple((alphanumeric1_unicode, tag("#")))))(input)?;
        if let Some(preceded) = maybe_preceded {
            return Ok((input, Token::PlainText(preceded.into_fragment().into())));
        }
        let (input, _) = tag("#")(input)?;
@ -1367,8 +1347,8 @@ impl Context {
                tag("）"),
            ))),
            recognize(tuple((
-                not(space1_unicode),
+                not(space1),
-                not(line_ending),
+                not_line_ending,
                not(one_of(".,:;!?#?/[]【】()「」（）<>")),
                anychar,
            ))),
@ -1521,20 +1501,6 @@ mod test {
            Token::PlainText("* italic *".into())
        );
        assert_eq!(
            parse_full("snake_case_variable"),
            Token::PlainText("snake_case_variable".into())
        );
        assert_eq!(
            parse_full("intra*word*italic"),
            Token::Sequence(vec![
                Token::PlainText("intra".into()),
                Token::Italic(Box::new(Token::PlainText("word".into()))),
                Token::PlainText("italic".into())
            ])
        );
        assert_eq!(
            parse_full(r#"_ italic *"#),
            Token::PlainText("_ italic *".into())
@ -1733,20 +1699,6 @@ text</center>"#
            ])
        );
        assert_eq!(
            parse_full("test #hashtag tail"),
            Token::Sequence(vec![
                Token::PlainText("test ".into()),
                Token::Hashtag("hashtag".into()),
                Token::PlainText(" tail".into())
            ])
        );
        assert_eq!(
            parse_full("not#hashtag tail"),
            Token::PlainText("not#hashtag tail".into())
        );
        assert_eq!(
            parse_full("<https://example.com>"),
            Token::UrlNoEmbed("https://example.com".into())