diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs index 8dccf96..63e55c5 100644 --- a/magnetar_mmm_parser/src/lib.rs +++ b/magnetar_mmm_parser/src/lib.rs @@ -48,7 +48,7 @@ pub enum Token<'a> { UrlRaw(Cow<'a, str>), UrlNoEmbed(Cow<'a, str>), Link { - label: Cow<'a, str>, + label: Box>, href: Cow<'a, str>, embed: bool, }, @@ -91,7 +91,7 @@ impl Token<'_> { Token::UrlNoEmbed(url) => Token::UrlNoEmbed(Cow::Owned(url.clone().into_owned())), Token::Link { embed, label, href } => Token::Link { embed: *embed, - label: Cow::Owned(label.clone().into_owned()), + label: Box::new(label.owned()), href: Cow::Owned(href.clone().into_owned()), }, Token::BlockCode { inner, lang } => Token::BlockCode { @@ -183,6 +183,11 @@ impl Token<'_> { Token::Italic(inner) => Token::Italic(Box::new(inner.merged())), Token::Center(inner) => Token::Center(Box::new(inner.merged())), Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.merged())), + Token::Link { embed, label, href } => Token::Link { + label: Box::new(label.merged()), + href: href.clone(), + embed: *embed, + }, Token::Function { name, params, @@ -387,12 +392,14 @@ impl Context { fn full_single<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { let (input, token) = alt(( self.partial(Self::unicode_emoji), - self.partial(Self::tag_block_center), - self.partial(Self::tag_small), - self.partial(Self::tag_plain), - self.partial(Self::tag_bold), - self.partial(Self::tag_italic), - self.partial(Self::tag_strikethrough), + alt(( + self.partial(Self::tag_block_center), + self.partial(Self::tag_small), + self.partial(Self::tag_plain), + self.partial(Self::tag_bold), + self.partial(Self::tag_italic), + self.partial(Self::tag_strikethrough), + )), self.partial(Self::url_no_embed), self.partial(Self::base_bold_italic), self.partial(Self::tag_block_code), @@ -405,6 +412,7 @@ impl Context { self.partial(Self::tag_mention), self.partial(Self::tag_hashtag), self.partial(Self::shortcode_emoji), + self.partial(Self::link), self.partial(Self::raw_url), self.partial(Self::text), ))(input)?; @@ -428,6 +436,7 @@ impl Context { self.partial(Self::tag_mention), self.partial(Self::tag_hashtag), self.partial(Self::shortcode_emoji), + self.partial(Self::link), self.partial(Self::raw_url), self.partial(Self::text), ))(input)?; @@ -989,19 +998,15 @@ impl Context { let (input, no_embed) = opt(tag("?"))(input)?; let (input, _) = tag("[")(input)?; let (input, _) = not(tag("["))(input)?; - let (input, label_span) = recognize(many1(tuple(( - not(tag("](")), - self.partial(Self::inline_label_safe_single), - ))))(input)?; - let (input, _) = tag("]")(input)?; - let (input, _) = tag("(")(input)?; - let (input, url_span) = recognize(tuple((protocol, url_chars(tag("]"), true))))(input)?; + let (input, (label_tok, _)) = + many_till(self.partial(Self::inline_label_safe_single), tag("]("))(input)?; + let (input, url_span) = recognize(tuple((protocol, url_chars(tag(")"), true))))(input)?; let (input, _) = tag(")")(input)?; Ok(( input, Token::Link { - label: label_span.into_fragment().into(), + label: Box::new(Token::Sequence(label_tok)), href: url_span.into_fragment().into(), embed: no_embed.is_none(), }, @@ -1056,19 +1061,22 @@ impl Context { Span::into_fragment, )(input)?; - let (input, host) = map( + let before = input; + let (_, host) = map( opt(tuple(( tag("@"), map( - recognize(many1(alt((alphanumeric1, recognize(one_of("-_")))))), + recognize(many1(alt((alphanumeric1, recognize(one_of("-_.")))))), Span::into_fragment, ), ))), |maybe_tag_host| maybe_tag_host.map(|(_, host)| host), )(input)?; + let host = host.map(|h| h.trim_end_matches(|c| matches!(c, '.' | '-' | '_'))); + Ok(( - input, + host.map(|c| before.slice(c.len() + 1..)).unwrap_or(before), Token::Mention { mention_type, name: name.into(), @@ -1359,6 +1367,144 @@ text"# ); } + #[test] + fn parse_link() { + assert_eq!( + parse_full("Link test: [label](https://example.com)"), + Token::Sequence(vec![ + Token::PlainText("Link test: ".into()), + Token::Link { + label: Box::new(Token::PlainText("label".into())), + href: "https://example.com".into(), + embed: true + } + ]) + ); + + assert_eq!( + parse_full("Link test: ?[label](https://awawa.gay)"), + Token::Sequence(vec![ + Token::PlainText("Link test: ".into()), + Token::Link { + label: Box::new(Token::PlainText("label".into())), + href: "https://awawa.gay".into(), + embed: false + } + ]) + ); + + assert_eq!( + parse_full("Link test: ?[label](https://awawa.gay"), // Missing closing bracket + Token::Sequence(vec![ + Token::PlainText("Link test: ?[label](".into()), + Token::UrlRaw("https://awawa.gay".into()), + ]) + ); + } + + #[test] + fn parse_mention() { + assert_eq!( + parse_full("@tag"), + Token::Mention { + mention_type: crate::MentionType::User, + name: "tag".into(), + host: None + } + ); + + assert_eq!( + parse_full("hgsjlkdsa @tag fgahjsdkd"), + Token::Sequence(vec![ + Token::PlainText("hgsjlkdsa ".into()), + Token::Mention { + mention_type: crate::MentionType::User, + name: "tag".into(), + host: None + }, + Token::PlainText(" fgahjsdkd".into()) + ]) + ); + + assert_eq!( + parse_full("hgsjlkdsa @tag@ fgahjsdkd"), + Token::Sequence(vec![ + Token::PlainText("hgsjlkdsa ".into()), + Token::Mention { + mention_type: crate::MentionType::User, + name: "tag".into(), + host: None + }, + Token::PlainText("@ fgahjsdkd".into()) + ]) + ); + + assert_eq!( + parse_full("aaaa @tag@domain bbbbb"), + Token::Sequence(vec![ + Token::PlainText("aaaa ".into()), + Token::Mention { + mention_type: crate::MentionType::User, + name: "tag".into(), + host: Some("domain".into()) + }, + Token::PlainText(" bbbbb".into()) + ]) + ); + + assert_eq!( + parse_full("test @tag@domain, test"), + Token::Sequence(vec![ + Token::PlainText("test ".into()), + Token::Mention { + mention_type: crate::MentionType::User, + name: "tag".into(), + host: Some("domain".into()) + }, + Token::PlainText(", test".into()) + ]) + ); + + assert_eq!( + parse_full("test @tag@domain.gay. test"), + Token::Sequence(vec![ + Token::PlainText("test ".into()), + Token::Mention { + mention_type: crate::MentionType::User, + name: "tag".into(), + host: Some("domain.gay".into()) + }, + Token::PlainText(". test".into()) + ]) + ); + + assert_eq!( + parse_full("test @tag@domain? test"), + Token::Sequence(vec![ + Token::PlainText("test ".into()), + Token::Mention { + mention_type: crate::MentionType::User, + name: "tag".into(), + host: Some("domain".into()) + }, + Token::PlainText("? test".into()) + ]) + ); + + assert_eq!( + parse_full("test !tag@domain.com test"), + Token::Sequence(vec![ + Token::PlainText("test ".into()), + Token::Mention { + mention_type: crate::MentionType::Community, + name: "tag".into(), + host: Some("domain.com".into()) + }, + Token::PlainText(" test".into()) + ]) + ); + } + #[test] fn parse_emoji() { assert_eq!(