MMM: Fixed hashtag parsing

2023-10-23 23:52:02 +02:00 · 2023-10-23 23:52:02 +02:00 · 42fa83c6e2
parent 86d5c87e9a
commit 42fa83c6e2
1 changed files with 31 additions and 4 deletions
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@ -7,7 +7,7 @@ use nom::character::complete::{
    satisfy, space1, tab,
 };
 use nom::combinator::{eof, fail, map, not, opt, recognize};
-use nom::error::{ErrorKind, ParseError};
+use nom::error::ErrorKind;
 use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
 use nom::sequence::tuple;
 use nom::{IResult, Offset, Parser, Slice};
@ -277,6 +277,14 @@ fn alpha1_unicode(input: Span) -> IResult<Span, Span> {
    recognize(many1_count(satisfy(char::is_alphanumeric)))(input)
 }
 #[inline]
 fn space1_unicode(input: Span) -> IResult<Span, Span> {
    recognize(many1_count(tuple((
        not(line_ending),
        satisfy(char::is_whitespace),
    ))))(input)
 }
 #[inline]
 fn alphanumeric1_unicode(input: Span) -> IResult<Span, Span> {
    recognize(many1_count(satisfy(char::is_alphanumeric)))(input)
@ -1293,7 +1301,12 @@ impl Context {
    }
    fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-        // TODO: Skip when preceded by alphanumerics
+        let (input, maybe_preceded) =
            opt(recognize(tuple((alphanumeric1_unicode, tag("#")))))(input)?;
        if let Some(preceded) = maybe_preceded {
            return Ok((input, Token::PlainText(preceded.into_fragment().into())));
        }
        let (input, _) = tag("#")(input)?;
@ -1347,8 +1360,8 @@ impl Context {
                tag("）"),
            ))),
            recognize(tuple((
-                not(space1),
+                not(space1_unicode),
-                not_line_ending,
+                not(line_ending),
                not(one_of(".,:;!?#?/[]【】()「」（）<>")),
                anychar,
            ))),
@ -1699,6 +1712,20 @@ text</center>"#
            ])
        );
        assert_eq!(
            parse_full("test #hashtag tail"),
            Token::Sequence(vec![
                Token::PlainText("test ".into()),
                Token::Hashtag("hashtag".into()),
                Token::PlainText(" tail".into())
            ])
        );
        assert_eq!(
            parse_full("not#hashtag tail"),
            Token::PlainText("not#hashtag tail".into())
        );
        assert_eq!(
            parse_full("<https://example.com>"),
            Token::UrlNoEmbed("https://example.com".into())