Fixed link parsing

This commit is contained in:
Natty 2023-10-07 20:40:01 +02:00
parent 154cc27c07
commit d2bc679740
Signed by: natty
GPG Key ID: BF6CB659ADEE60EC
1 changed files with 165 additions and 19 deletions

View File

@ -48,7 +48,7 @@ pub enum Token<'a> {
UrlRaw(Cow<'a, str>), UrlRaw(Cow<'a, str>),
UrlNoEmbed(Cow<'a, str>), UrlNoEmbed(Cow<'a, str>),
Link { Link {
label: Cow<'a, str>, label: Box<Token<'a>>,
href: Cow<'a, str>, href: Cow<'a, str>,
embed: bool, embed: bool,
}, },
@ -91,7 +91,7 @@ impl Token<'_> {
Token::UrlNoEmbed(url) => Token::UrlNoEmbed(Cow::Owned(url.clone().into_owned())), Token::UrlNoEmbed(url) => Token::UrlNoEmbed(Cow::Owned(url.clone().into_owned())),
Token::Link { embed, label, href } => Token::Link { Token::Link { embed, label, href } => Token::Link {
embed: *embed, embed: *embed,
label: Cow::Owned(label.clone().into_owned()), label: Box::new(label.owned()),
href: Cow::Owned(href.clone().into_owned()), href: Cow::Owned(href.clone().into_owned()),
}, },
Token::BlockCode { inner, lang } => Token::BlockCode { Token::BlockCode { inner, lang } => Token::BlockCode {
@ -183,6 +183,11 @@ impl Token<'_> {
Token::Italic(inner) => Token::Italic(Box::new(inner.merged())), Token::Italic(inner) => Token::Italic(Box::new(inner.merged())),
Token::Center(inner) => Token::Center(Box::new(inner.merged())), Token::Center(inner) => Token::Center(Box::new(inner.merged())),
Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.merged())), Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.merged())),
Token::Link { embed, label, href } => Token::Link {
label: Box::new(label.merged()),
href: href.clone(),
embed: *embed,
},
Token::Function { Token::Function {
name, name,
params, params,
@ -387,12 +392,14 @@ impl Context {
fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
let (input, token) = alt(( let (input, token) = alt((
self.partial(Self::unicode_emoji), self.partial(Self::unicode_emoji),
self.partial(Self::tag_block_center), alt((
self.partial(Self::tag_small), self.partial(Self::tag_block_center),
self.partial(Self::tag_plain), self.partial(Self::tag_small),
self.partial(Self::tag_bold), self.partial(Self::tag_plain),
self.partial(Self::tag_italic), self.partial(Self::tag_bold),
self.partial(Self::tag_strikethrough), self.partial(Self::tag_italic),
self.partial(Self::tag_strikethrough),
)),
self.partial(Self::url_no_embed), self.partial(Self::url_no_embed),
self.partial(Self::base_bold_italic), self.partial(Self::base_bold_italic),
self.partial(Self::tag_block_code), self.partial(Self::tag_block_code),
@ -405,6 +412,7 @@ impl Context {
self.partial(Self::tag_mention), self.partial(Self::tag_mention),
self.partial(Self::tag_hashtag), self.partial(Self::tag_hashtag),
self.partial(Self::shortcode_emoji), self.partial(Self::shortcode_emoji),
self.partial(Self::link),
self.partial(Self::raw_url), self.partial(Self::raw_url),
self.partial(Self::text), self.partial(Self::text),
))(input)?; ))(input)?;
@ -428,6 +436,7 @@ impl Context {
self.partial(Self::tag_mention), self.partial(Self::tag_mention),
self.partial(Self::tag_hashtag), self.partial(Self::tag_hashtag),
self.partial(Self::shortcode_emoji), self.partial(Self::shortcode_emoji),
self.partial(Self::link),
self.partial(Self::raw_url), self.partial(Self::raw_url),
self.partial(Self::text), self.partial(Self::text),
))(input)?; ))(input)?;
@ -989,19 +998,15 @@ impl Context {
let (input, no_embed) = opt(tag("?"))(input)?; let (input, no_embed) = opt(tag("?"))(input)?;
let (input, _) = tag("[")(input)?; let (input, _) = tag("[")(input)?;
let (input, _) = not(tag("["))(input)?; let (input, _) = not(tag("["))(input)?;
let (input, label_span) = recognize(many1(tuple(( let (input, (label_tok, _)) =
not(tag("](")), many_till(self.partial(Self::inline_label_safe_single), tag("]("))(input)?;
self.partial(Self::inline_label_safe_single), let (input, url_span) = recognize(tuple((protocol, url_chars(tag(")"), true))))(input)?;
))))(input)?;
let (input, _) = tag("]")(input)?;
let (input, _) = tag("(")(input)?;
let (input, url_span) = recognize(tuple((protocol, url_chars(tag("]"), true))))(input)?;
let (input, _) = tag(")")(input)?; let (input, _) = tag(")")(input)?;
Ok(( Ok((
input, input,
Token::Link { Token::Link {
label: label_span.into_fragment().into(), label: Box::new(Token::Sequence(label_tok)),
href: url_span.into_fragment().into(), href: url_span.into_fragment().into(),
embed: no_embed.is_none(), embed: no_embed.is_none(),
}, },
@ -1056,19 +1061,22 @@ impl Context {
Span::into_fragment, Span::into_fragment,
)(input)?; )(input)?;
let (input, host) = map( let before = input;
let (_, host) = map(
opt(tuple(( opt(tuple((
tag("@"), tag("@"),
map( map(
recognize(many1(alt((alphanumeric1, recognize(one_of("-_")))))), recognize(many1(alt((alphanumeric1, recognize(one_of("-_.")))))),
Span::into_fragment, Span::into_fragment,
), ),
))), ))),
|maybe_tag_host| maybe_tag_host.map(|(_, host)| host), |maybe_tag_host| maybe_tag_host.map(|(_, host)| host),
)(input)?; )(input)?;
let host = host.map(|h| h.trim_end_matches(|c| matches!(c, '.' | '-' | '_')));
Ok(( Ok((
input, host.map(|c| before.slice(c.len() + 1..)).unwrap_or(before),
Token::Mention { Token::Mention {
mention_type, mention_type,
name: name.into(), name: name.into(),
@ -1359,6 +1367,144 @@ text</center>"#
); );
} }
#[test]
fn parse_link() {
assert_eq!(
parse_full("Link test: [label](https://example.com)"),
Token::Sequence(vec![
Token::PlainText("Link test: ".into()),
Token::Link {
label: Box::new(Token::PlainText("label".into())),
href: "https://example.com".into(),
embed: true
}
])
);
assert_eq!(
parse_full("Link test: ?[label](https://awawa.gay)"),
Token::Sequence(vec![
Token::PlainText("Link test: ".into()),
Token::Link {
label: Box::new(Token::PlainText("label".into())),
href: "https://awawa.gay".into(),
embed: false
}
])
);
assert_eq!(
parse_full("Link test: ?[label](https://awawa.gay"), // Missing closing bracket
Token::Sequence(vec![
Token::PlainText("Link test: ?[label](".into()),
Token::UrlRaw("https://awawa.gay".into()),
])
);
}
#[test]
fn parse_mention() {
assert_eq!(
parse_full("@tag"),
Token::Mention {
mention_type: crate::MentionType::User,
name: "tag".into(),
host: None
}
);
assert_eq!(
parse_full("hgsjlkdsa @tag fgahjsdkd"),
Token::Sequence(vec![
Token::PlainText("hgsjlkdsa ".into()),
Token::Mention {
mention_type: crate::MentionType::User,
name: "tag".into(),
host: None
},
Token::PlainText(" fgahjsdkd".into())
])
);
assert_eq!(
parse_full("hgsjlkdsa @tag@ fgahjsdkd"),
Token::Sequence(vec![
Token::PlainText("hgsjlkdsa ".into()),
Token::Mention {
mention_type: crate::MentionType::User,
name: "tag".into(),
host: None
},
Token::PlainText("@ fgahjsdkd".into())
])
);
assert_eq!(
parse_full("aaaa @tag@domain bbbbb"),
Token::Sequence(vec![
Token::PlainText("aaaa ".into()),
Token::Mention {
mention_type: crate::MentionType::User,
name: "tag".into(),
host: Some("domain".into())
},
Token::PlainText(" bbbbb".into())
])
);
assert_eq!(
parse_full("test @tag@domain, test"),
Token::Sequence(vec![
Token::PlainText("test ".into()),
Token::Mention {
mention_type: crate::MentionType::User,
name: "tag".into(),
host: Some("domain".into())
},
Token::PlainText(", test".into())
])
);
assert_eq!(
parse_full("test @tag@domain.gay. test"),
Token::Sequence(vec![
Token::PlainText("test ".into()),
Token::Mention {
mention_type: crate::MentionType::User,
name: "tag".into(),
host: Some("domain.gay".into())
},
Token::PlainText(". test".into())
])
);
assert_eq!(
parse_full("test @tag@domain? test"),
Token::Sequence(vec![
Token::PlainText("test ".into()),
Token::Mention {
mention_type: crate::MentionType::User,
name: "tag".into(),
host: Some("domain".into())
},
Token::PlainText("? test".into())
])
);
assert_eq!(
parse_full("test !tag@domain.com test"),
Token::Sequence(vec![
Token::PlainText("test ".into()),
Token::Mention {
mention_type: crate::MentionType::Community,
name: "tag".into(),
host: Some("domain.com".into())
},
Token::PlainText(" test".into())
])
);
}
#[test] #[test]
fn parse_emoji() { fn parse_emoji() {
assert_eq!( assert_eq!(