MMM: Fixed hashtag parsing

This commit is contained in:
Natty 2023-10-23 23:52:02 +02:00
parent 86d5c87e9a
commit 42fa83c6e2
Signed by: natty
GPG Key ID: BF6CB659ADEE60EC
1 changed files with 31 additions and 4 deletions

View File

@ -7,7 +7,7 @@ use nom::character::complete::{
satisfy, space1, tab,
};
use nom::combinator::{eof, fail, map, not, opt, recognize};
use nom::error::{ErrorKind, ParseError};
use nom::error::ErrorKind;
use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
use nom::sequence::tuple;
use nom::{IResult, Offset, Parser, Slice};
@ -277,6 +277,14 @@ fn alpha1_unicode(input: Span) -> IResult<Span, Span> {
recognize(many1_count(satisfy(char::is_alphanumeric)))(input)
}
#[inline]
fn space1_unicode(input: Span) -> IResult<Span, Span> {
recognize(many1_count(tuple((
not(line_ending),
satisfy(char::is_whitespace),
))))(input)
}
#[inline]
fn alphanumeric1_unicode(input: Span) -> IResult<Span, Span> {
recognize(many1_count(satisfy(char::is_alphanumeric)))(input)
@ -1293,7 +1301,12 @@ impl Context {
}
fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
// TODO: Skip when preceded by alphanumerics
let (input, maybe_preceded) =
opt(recognize(tuple((alphanumeric1_unicode, tag("#")))))(input)?;
if let Some(preceded) = maybe_preceded {
return Ok((input, Token::PlainText(preceded.into_fragment().into())));
}
let (input, _) = tag("#")(input)?;
@ -1347,8 +1360,8 @@ impl Context {
tag(""),
))),
recognize(tuple((
not(space1),
not_line_ending,
not(space1_unicode),
not(line_ending),
not(one_of(".,:;!?#?/[]【】()「」()<>")),
anychar,
))),
@ -1699,6 +1712,20 @@ text</center>"#
])
);
assert_eq!(
parse_full("test #hashtag tail"),
Token::Sequence(vec![
Token::PlainText("test ".into()),
Token::Hashtag("hashtag".into()),
Token::PlainText(" tail".into())
])
);
assert_eq!(
parse_full("not#hashtag tail"),
Token::PlainText("not#hashtag tail".into())
);
assert_eq!(
parse_full("<https://example.com>"),
Token::UrlNoEmbed("https://example.com".into())