Hashtag parsing

This commit is contained in:
Natty 2023-10-05 22:02:46 +02:00
parent 8009546bfe
commit 7c8e65f556
Signed by: natty
GPG Key ID: BF6CB659ADEE60EC
1 changed files with 29 additions and 0 deletions

View File

@ -68,6 +68,7 @@ pub enum Token<'a> {
}, },
UnicodeEmoji(Cow<'a, str>), UnicodeEmoji(Cow<'a, str>),
ShortcodeEmoji(Cow<'a, str>), ShortcodeEmoji(Cow<'a, str>),
Hashtag(Cow<'a, str>),
} }
impl Token<'_> { impl Token<'_> {
@ -128,6 +129,7 @@ impl Token<'_> {
Token::ShortcodeEmoji(shortcode) => { Token::ShortcodeEmoji(shortcode) => {
Token::ShortcodeEmoji(Cow::Owned(shortcode.clone().into_owned())) Token::ShortcodeEmoji(Cow::Owned(shortcode.clone().into_owned()))
} }
Token::Hashtag(url) => Token::Hashtag(Cow::Owned(url.clone().into_owned())),
} }
} }
} }
@ -703,6 +705,33 @@ impl Context {
}, },
)) ))
} }
fn hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
// TODO: Skip when preceded by alphanumerics
let (input, _) = tag("#")(input)?;
let (input, hashtag_text) =
map(recognize(many1(hashtag_chars)), Span::into_fragment)(input)?;
Ok((input, Token::Hashtag(hashtag_text.into())))
}
}
#[inline]
fn hashtag_chars(input: Span) -> IResult<Span, Span> {
recognize(alt((
recognize(tuple((tag("("), hashtag_chars, tag(")")))),
recognize(tuple((tag("["), hashtag_chars, tag("]")))),
recognize(tuple((tag(""), hashtag_chars, tag("")))),
recognize(tuple((tag(""), hashtag_chars, tag("")))),
recognize(tuple((
not(space1),
not_line_ending,
not(one_of(".,:;!?#?/[]【】()「」()<>")),
anychar,
))),
)))(input)
} }
#[inline] #[inline]