diff --git a/Cargo.lock b/Cargo.lock index e2d79c5..cb3905d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -462,6 +462,15 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a4f925191b4367301851c6d99b09890311d74b0d43f274c0b34c86d308a3663" +[[package]] +name = "castaway" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a17ed5635fc8536268e5d4de1e22e81ac34419e5f052d4d51f4e01dcc263fcc" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" version = "1.0.81" @@ -584,6 +593,19 @@ dependencies = [ "tokio-util", ] +[[package]] +name = "compact_str" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f86b9c4c00838774a6d902ef931eff7470720c51d90c2e32cfe15dc304737b3f" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "ryu", + "static_assertions", +] + [[package]] name = "const-oid" version = "0.9.4" @@ -1622,6 +1644,7 @@ dependencies = [ name = "mmm_parser" version = "0.2.1-alpha" dependencies = [ + "compact_str", "either", "emojis", "nom", diff --git a/Cargo.toml b/Cargo.toml index c5d0c4e..9828764 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,7 @@ axum = "0.6" cached = "0.46" cfg-if = "1" chrono = "0.4" +compact_str = "0.7" dotenvy = "0.15" either = "1.9" emojis = "0.6" diff --git a/magnetar_mmm_parser/Cargo.toml b/magnetar_mmm_parser/Cargo.toml index 25faa6b..14e36f7 100644 --- a/magnetar_mmm_parser/Cargo.toml +++ b/magnetar_mmm_parser/Cargo.toml @@ -9,4 +9,5 @@ either = { workspace = true } emojis = { workspace = true } nom = { workspace = true } nom_locate = { workspace = true } +compact_str = { workspace = true } unicode-segmentation = { workspace = true } diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs index d270760..b940145 100644 --- a/magnetar_mmm_parser/src/lib.rs +++ b/magnetar_mmm_parser/src/lib.rs @@ -1,17 +1,17 @@ +use compact_str::{CompactString, ToCompactString}; use either::Either; use nom::branch::alt; use nom::bytes::complete::{tag, tag_no_case}; use nom::character::complete::{ - alpha1, alphanumeric1, anychar, char as one_char, char, line_ending, not_line_ending, one_of, + alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of, satisfy, space1, tab, }; use nom::combinator::{eof, fail, map, not, opt, recognize}; use nom::error::ErrorKind; use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1}; use nom::sequence::tuple; -use nom::{Compare, IResult, Offset, Slice}; +use nom::{IResult, Offset, Slice}; use nom_locate::LocatedSpan; -use std::borrow::Cow; use std::collections::HashMap; use std::convert::{identity, Infallible}; use std::marker::PhantomData; @@ -33,47 +33,47 @@ impl MentionType { } #[derive(Clone, Debug, Eq, PartialEq)] -pub enum Token<'a> { - PlainText(Cow<'a, str>), - Sequence(Vec>), - Quote(Box>), - Small(Box>), - BoldItalic(Box>), - Bold(Box>), - Italic(Box>), - Center(Box>), - Strikethrough(Box>), - PlainTag(Cow<'a, str>), - InlineCode(Cow<'a, str>), - InlineMath(Cow<'a, str>), - UrlRaw(Cow<'a, str>), - UrlNoEmbed(Cow<'a, str>), +pub enum Token { + PlainText(CompactString), + Sequence(Vec), + Quote(Box), + Small(Box), + BoldItalic(Box), + Bold(Box), + Italic(Box), + Center(Box), + Strikethrough(Box), + PlainTag(String), + InlineCode(String), + InlineMath(String), + UrlRaw(String), + UrlNoEmbed(String), Link { - label: Box>, - href: Cow<'a, str>, + label: Box, + href: String, embed: bool, }, BlockCode { - lang: Option>, - inner: Cow<'a, str>, + lang: Option, + inner: String, }, - BlockMath(Cow<'a, str>), + BlockMath(String), Function { - name: Cow<'a, str>, - params: HashMap, Option>>, - inner: Box>, + name: String, + params: HashMap>, + inner: Box, }, Mention { - name: Cow<'a, str>, - host: Option>, + name: String, + host: Option, mention_type: MentionType, }, - UnicodeEmoji(Cow<'a, str>), - ShortcodeEmoji(Cow<'a, str>), - Hashtag(Cow<'a, str>), + UnicodeEmoji(String), + ShortcodeEmoji(String), + Hashtag(String), } -impl Token<'_> { +impl Token { fn str_content_left(&self) -> Option<&str> { match self { Token::PlainText(text) => Some(text.as_ref()), @@ -132,79 +132,19 @@ impl Token<'_> { Token::Italic(inner) => inner.inner(), Token::Center(inner) => inner.inner(), Token::Strikethrough(inner) => inner.inner(), - Token::PlainTag(text) => Token::PlainText(text.clone()), - Token::InlineCode(code) => Token::PlainText(code.clone()), - Token::InlineMath(math) => Token::PlainText(math.clone()), - Token::UrlRaw(url) => Token::PlainText(url.clone()), - Token::UrlNoEmbed(url) => Token::PlainText(url.clone()), + Token::PlainTag(text) => Token::PlainText(text.clone().into()), + Token::InlineCode(code) => Token::PlainText(code.clone().into()), + Token::InlineMath(math) => Token::PlainText(math.clone().into()), + Token::UrlRaw(url) => Token::PlainText(url.clone().into()), + Token::UrlNoEmbed(url) => Token::PlainText(url.clone().into()), Token::Link { label, .. } => label.inner(), - Token::BlockCode { inner, .. } => Token::PlainText(inner.clone()), - Token::BlockMath(math) => Token::PlainText(math.clone()), + Token::BlockCode { inner, .. } => Token::PlainText(inner.clone().into()), + Token::BlockMath(math) => Token::PlainText(math.clone().into()), Token::Function { inner, .. } => inner.inner(), - Token::Mention { name, .. } => Token::PlainText(name.clone()), - Token::UnicodeEmoji(code) => Token::PlainText(code.clone()), - Token::ShortcodeEmoji(shortcode) => Token::PlainText(shortcode.clone()), - Token::Hashtag(tag) => Token::PlainText(tag.clone()), - } - } - - fn owned(&self) -> Token<'static> { - match self { - Token::PlainText(text) => Token::PlainText(Cow::Owned(text.clone().into_owned())), - Token::Sequence(tokens) => Token::Sequence(tokens.iter().map(Token::owned).collect()), - Token::Quote(inner) => Token::Quote(Box::new(inner.owned())), - Token::Small(inner) => Token::Small(Box::new(inner.owned())), - Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.owned())), - Token::Bold(inner) => Token::Bold(Box::new(inner.owned())), - Token::Italic(inner) => Token::Italic(Box::new(inner.owned())), - Token::Center(inner) => Token::Center(Box::new(inner.owned())), - Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.owned())), - Token::PlainTag(tag) => Token::PlainTag(Cow::Owned(tag.clone().into_owned())), - Token::InlineCode(code) => Token::InlineCode(Cow::Owned(code.clone().into_owned())), - Token::InlineMath(math) => Token::InlineMath(Cow::Owned(math.clone().into_owned())), - Token::UrlRaw(url) => Token::UrlRaw(Cow::Owned(url.clone().into_owned())), - Token::UrlNoEmbed(url) => Token::UrlNoEmbed(Cow::Owned(url.clone().into_owned())), - Token::Link { embed, label, href } => Token::Link { - embed: *embed, - label: Box::new(label.owned()), - href: Cow::Owned(href.clone().into_owned()), - }, - Token::BlockCode { inner, lang } => Token::BlockCode { - lang: lang.as_ref().map(|l| Cow::Owned(l.clone().into_owned())), - inner: Cow::Owned(inner.clone().into_owned()), - }, - Token::BlockMath(math) => Token::BlockMath(Cow::Owned(math.clone().into_owned())), - Token::Function { - name, - params, - inner, - } => Token::Function { - name: Cow::Owned(name.clone().into_owned()), - params: params - .iter() - .map(|(k, v)| { - ( - Cow::Owned(k.clone().into_owned()), - v.as_ref().map(|val| Cow::Owned(val.clone().into_owned())), - ) - }) - .collect(), - inner: Box::new(inner.owned()), - }, - Token::Mention { - name, - host, - mention_type, - } => Token::Mention { - name: Cow::Owned(name.clone().into_owned()), - host: host.as_ref().map(|v| Cow::Owned(v.clone().into_owned())), - mention_type: *mention_type, - }, - Token::UnicodeEmoji(code) => Token::UnicodeEmoji(Cow::Owned(code.clone().into_owned())), - Token::ShortcodeEmoji(shortcode) => { - Token::ShortcodeEmoji(Cow::Owned(shortcode.clone().into_owned())) - } - Token::Hashtag(tag) => Token::Hashtag(Cow::Owned(tag.clone().into_owned())), + Token::Mention { name, .. } => Token::PlainText(name.clone().into()), + Token::UnicodeEmoji(code) => Token::PlainText(code.clone().into()), + Token::ShortcodeEmoji(shortcode) => Token::PlainText(shortcode.clone().into()), + Token::Hashtag(tag) => Token::PlainText(tag.clone().into()), } } @@ -214,7 +154,7 @@ impl Token<'_> { let tokens_multi = tokens.iter().fold(Vec::new(), |mut acc, tok| { if let Some(Token::PlainText(last)) = acc.last_mut() { if let Token::PlainText(tok_text) = tok { - *last = Cow::from(last.to_string() + tok_text.as_ref()); + *last += tok_text.as_ref(); return acc; } @@ -229,7 +169,7 @@ impl Token<'_> { for item in items { if let Some(Token::PlainText(last)) = acc.last_mut() { if let Token::PlainText(tok_text) = item { - *last = Cow::from(last.to_string() + tok_text.as_ref()); + *last += tok_text.as_ref(); continue; } @@ -301,23 +241,23 @@ impl SliceOffset for Span<'_> { } #[inline] -fn boxing_token<'a>(func: impl Fn(Box>) -> Token<'a>) -> impl Fn(Token<'a>) -> Token<'a> { +fn boxing_token(func: impl Fn(Box) -> Token) -> impl Fn(Token) -> Token { move |tokens| func(Box::new(tokens)) } #[inline] -fn collect_sequence<'a, T>( - func: impl Fn(Vec) -> Token<'a>, - transform: impl Fn(Token<'a>) -> Token<'a>, -) -> impl Fn(&mut dyn Iterator) -> Token<'a> { +fn collect_sequence( + func: impl Fn(Vec) -> Token, + transform: impl Fn(Token) -> Token, +) -> impl Fn(&mut dyn Iterator) -> Token { move |tokens| transform(func(tokens.collect())) } #[inline] -fn collect_char_sequence<'a>( - func: impl Fn(Cow<'a, str>) -> Token<'a>, -) -> impl Fn(&mut dyn Iterator) -> Token<'a> { - move |chars| func(Cow::Owned(chars.collect())) +fn collect_char_sequence( + func: impl Fn(String) -> Token, +) -> impl Fn(&mut dyn Iterator) -> Token { + move |chars| func(chars.collect()) } #[inline] @@ -334,7 +274,7 @@ fn spliced<'a>( segments: &[Span<'a>], func: impl Fn(Span) -> IResult, parent: Span<'a>, -) -> IResult, Token<'static>, nom::error::Error>> { +) -> IResult, Token, nom::error::Error>> { let combined = segments .iter() .copied() @@ -362,7 +302,7 @@ fn spliced<'a>( let quote_span = Span::new(&combined); let (input, inner) = match func(quote_span) { - Ok((input, token)) => (input, token.owned()), + Ok(s) => s, Err(e) => { return match e { NE::Error(e) => { @@ -393,7 +333,7 @@ fn spliced<'a>( parent }; - Ok((out, inner.owned())) + Ok((out, inner)) } fn space(input: Span) -> IResult { @@ -404,7 +344,7 @@ fn space(input: Span) -> IResult { #[derive(Copy, Clone)] struct Matcher<'a, 'b, T: Clone> { matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult, T> + 'a), - collector: &'a (dyn Fn(&mut dyn Iterator) -> Token<'b> + 'a), + collector: &'a (dyn Fn(&mut dyn Iterator) -> Token + 'a), _phantom_closure: PhantomData<&'a ()>, _phantom_data: PhantomData<&'b ()>, _phantom_output: PhantomData T>, @@ -413,7 +353,7 @@ struct Matcher<'a, 'b, T: Clone> { impl<'a, 'b, T: Clone> Matcher<'a, 'b, T> { fn new( matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult, T> + 'a), - collector: &'a (dyn Fn(&mut dyn Iterator) -> Token<'b> + 'a), + collector: &'a (dyn Fn(&mut dyn Iterator) -> Token + 'a), ) -> Self { Self { matcher_inner, @@ -471,27 +411,27 @@ impl Context { #[inline] fn partial( &self, - func: impl for<'a> Fn(&Self, Span<'a>) -> IResult, Token<'a>> + 'static, - ) -> impl for<'a> Fn(Span<'a>) -> IResult, Token<'a>> + '_ { + func: impl for<'a> Fn(&Self, Span<'a>) -> IResult, Token> + 'static, + ) -> impl for<'a> Fn(Span<'a>) -> IResult, Token> + '_ { move |input| func(self, input) } - pub fn full<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + pub fn full<'a>(&self, input: Span<'a>) -> IResult, Token> { map(many1(self.partial(Self::full_single)), Token::Sequence)(input) } - pub fn inline<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + pub fn inline<'a>(&self, input: Span<'a>) -> IResult, Token> { map(many1(self.partial(Self::inline_single)), Token::Sequence)(input) } - pub fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + pub fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult, Token> { map( many1(self.partial(Self::inline_label_safe_single)), Token::Sequence, )(input) } - fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult, Token> { alt(( self.partial(Self::tag_bold_italic_asterisk), self.partial(Self::tag_bold_italic_underscore), @@ -502,7 +442,7 @@ impl Context { ))(input) } - fn full_single<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn full_single<'a>(&self, input: Span<'a>) -> IResult, Token> { let (input, token) = alt(( self.partial(Self::unicode_emoji), alt(( @@ -527,12 +467,12 @@ impl Context { self.partial(Self::shortcode_emoji), self.partial(Self::link), self.partial(Self::raw_url), - self.partial(Self::text), + self.partial(Self::tag_raw_text), ))(input)?; Ok((input, token)) } - fn inline_single<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn inline_single<'a>(&self, input: Span<'a>) -> IResult, Token> { let (input, token) = alt(( self.partial(Self::unicode_emoji), self.partial(Self::tag_small), @@ -551,12 +491,12 @@ impl Context { self.partial(Self::shortcode_emoji), self.partial(Self::link), self.partial(Self::raw_url), - self.partial(Self::text), + self.partial(Self::tag_raw_text), ))(input)?; Ok((input, token)) } - fn inline_non_formatting_single<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn inline_non_formatting_single<'a>(&self, input: Span<'a>) -> IResult, Token> { let (input, token) = alt(( self.partial(Self::unicode_emoji), self.partial(Self::url_no_embed), @@ -567,12 +507,12 @@ impl Context { self.partial(Self::tag_hashtag), self.partial(Self::shortcode_emoji), self.partial(Self::raw_url), - self.partial(Self::text), + self.partial(Self::tag_raw_text), ))(input)?; Ok((input, token)) } - fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult, Token> { let (input, token) = alt(( self.partial(Self::unicode_emoji), self.partial(Self::tag_small), @@ -584,12 +524,12 @@ impl Context { self.partial(Self::tag_strikethrough_tilde), self.partial(Self::tag_func), self.partial(Self::shortcode_emoji), - self.partial(Self::text), + self.partial(Self::tag_raw_text), ))(input)?; Ok((input, token)) } - fn tag_quote<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_quote<'a>(&self, input: Span<'a>) -> IResult, Token> { let (input, leading_spaces) = tuple((opt(line_ending), opt(line_ending)))(input)?; if let (None, None) = leading_spaces { @@ -625,7 +565,7 @@ impl Context { Ok((input, Token::Quote(Box::new(inner)))) } - fn tag_block_center<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_block_center<'a>(&self, input: Span<'a>) -> IResult, Token> { let tag_start = &tag("
"); let tag_end = &tag("
"); @@ -649,7 +589,7 @@ impl Context { )) } - fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult, Token> { let delim = &tag("```"); let (input, _) = opt(line_ending)(input)?; @@ -688,7 +628,7 @@ impl Context { )) } - fn tag_block_math<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_block_math<'a>(&self, input: Span<'a>) -> IResult, Token> { let start = &tag("\\["); let end = &tag("\\]"); @@ -714,7 +654,7 @@ impl Context { Ok(( input, - Token::BlockMath(Cow::Borrowed(math_span.into_fragment())), + Token::BlockMath(math_span.into_fragment().to_string()), )) } @@ -726,7 +666,7 @@ impl Context { escape: bool, matcher: Matcher<'a, 'b, T>, fallback: Matcher<'a, 'b, S>, - ) -> impl Fn(Span<'b>) -> IResult, Token<'b>> + '_ + ) -> impl Fn(Span<'b>) -> IResult, Token> + '_ where FOpen: Fn(Span<'b>) -> IResult, Span<'b>> + 'a, FClose: Fn(Span<'b>) -> IResult, Span<'b>> + 'a, @@ -739,7 +679,7 @@ impl Context { if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), &opening_tag))(input) { return Ok(( input_escaped, - Token::PlainText(Cow::Borrowed(mark.fragment())), + Token::PlainText(mark.fragment().to_string().into()), )); } } @@ -814,7 +754,7 @@ impl Context { input, Token::Sequence(vec![ Token::PlainText(begin.fragment_between(&post_open).into()), - inner_tok.inner().owned(), + inner_tok.inner(), Token::PlainText(closing.into_fragment().into()), ]), )); @@ -823,7 +763,7 @@ impl Context { } } - fn tag_func<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_func<'a>(&self, input: Span<'a>) -> IResult, Token> { let (input, _) = tag("$[")(input)?; let func_ident = |input| { @@ -852,8 +792,8 @@ impl Context { .into_iter() .map(|(k, v)| { ( - Cow::from(k.into_fragment()), - v.map(|(_, val)| Cow::from(val.into_fragment())), + k.into_fragment().to_string(), + v.map(|(_, val)| val.into_fragment().to_string()), ) }) .collect::>() @@ -866,14 +806,14 @@ impl Context { Ok(( input, Token::Function { - name: Cow::from(func_name), + name: func_name.to_string(), params: args_out, inner: Box::new(Token::Sequence(inner)), }, )) } - fn tag_plain<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_plain<'a>(&self, input: Span<'a>) -> IResult, Token> { let opening_tag = &tag(""); let closing_tag = &tag(""); @@ -887,7 +827,7 @@ impl Context { Ok((input, Token::PlainTag(text.into()))) } - fn tag_small<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_small<'a>(&self, input: Span<'a>) -> IResult, Token> { self.tag_delimited( tag_no_case(""), tag_no_case(""), @@ -903,7 +843,7 @@ impl Context { )(input) } - fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult, Token> { self.tag_delimited( (tag("***"), FlankingRule::Lenient), (tag("***"), FlankingRule::Lenient), @@ -919,7 +859,7 @@ impl Context { )(input) } - fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult, Token> { self.tag_delimited( (tag("___"), FlankingRule::Strict), (tag("___"), FlankingRule::Strict), @@ -935,7 +875,7 @@ impl Context { )(input) } - fn tag_bold<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_bold<'a>(&self, input: Span<'a>) -> IResult, Token> { self.tag_delimited( tag_no_case(""), tag_no_case(""), @@ -951,7 +891,7 @@ impl Context { )(input) } - fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult, Token> { self.tag_delimited( (tag("**"), FlankingRule::Lenient), (tag("**"), FlankingRule::Lenient), @@ -967,7 +907,7 @@ impl Context { )(input) } - fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult, Token> { self.tag_delimited( (tag("__"), FlankingRule::Strict), (tag("__"), FlankingRule::Strict), @@ -983,7 +923,7 @@ impl Context { )(input) } - fn tag_italic<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_italic<'a>(&self, input: Span<'a>) -> IResult, Token> { self.tag_delimited( tag_no_case(""), tag_no_case(""), @@ -999,7 +939,7 @@ impl Context { )(input) } - fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult, Token> { self.tag_delimited( (tag("*"), FlankingRule::Lenient), (tag("*"), FlankingRule::Lenient), @@ -1015,7 +955,7 @@ impl Context { )(input) } - fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult, Token> { self.tag_delimited( (tag("_"), FlankingRule::Strict), (tag("_"), FlankingRule::Strict), @@ -1031,7 +971,7 @@ impl Context { )(input) } - fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult, Token> { self.tag_delimited( tag_no_case(""), tag_no_case(""), @@ -1047,7 +987,7 @@ impl Context { )(input) } - fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult, Token> { self.tag_delimited( (tag("~~"), FlankingRule::Lenient), (tag("~~"), FlankingRule::Lenient), @@ -1076,7 +1016,7 @@ impl Context { )(input) } - fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult, Token> { self.tag_delimited( tag("`"), |input| alt((tag("`"), tag("´")))(input), @@ -1094,7 +1034,7 @@ impl Context { )(input) } - fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult, Token> { self.tag_delimited( tag("\\("), tag("\\)"), @@ -1109,12 +1049,12 @@ impl Context { )(input) } - fn text<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { - let (input, text) = map(recognize(anychar), Span::into_fragment)(input)?; - Ok((input, Token::PlainText(text.into()))) + fn tag_raw_text<'a>(&self, input: Span<'a>) -> IResult, Token> { + let (input, text) = anychar(input)?; + Ok((input, Token::PlainText(text.to_compact_string()))) } - fn raw_url<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn raw_url<'a>(&self, input: Span<'a>) -> IResult, Token> { let (input, url_span) = recognize(tuple(( protocol, url_chars(|input| not(url_chars_base)(input), false), @@ -1130,21 +1070,21 @@ impl Context { url }; - Ok((input, Token::UrlRaw(Cow::from(final_url)))) + Ok((input, Token::UrlRaw(final_url.to_string()))) } - fn url_no_embed<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn url_no_embed<'a>(&self, input: Span<'a>) -> IResult, Token> { let (input, _) = tag("<")(input)?; let (input, url_span) = recognize(tuple((protocol, url_chars(tag(">"), true))))(input)?; let (input, _) = tag(">")(input)?; Ok(( input, - Token::UrlNoEmbed(Cow::from(url_span.into_fragment())), + Token::UrlNoEmbed(url_span.into_fragment().to_string()), )) } - fn link<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn link<'a>(&self, input: Span<'a>) -> IResult, Token> { let (input, no_embed) = opt(tag("?"))(input)?; let (input, _) = tag("[")(input)?; let (input, _) = not(tag("["))(input)?; @@ -1163,7 +1103,7 @@ impl Context { )) } - fn unicode_emoji<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn unicode_emoji<'a>(&self, input: Span<'a>) -> IResult, Token> { let frag = input.fragment(); let Some(grapheme) = frag.graphemes(true).next() else { return fail(input); @@ -1183,7 +1123,7 @@ impl Context { )) } - fn shortcode_emoji<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn shortcode_emoji<'a>(&self, input: Span<'a>) -> IResult, Token> { if let (plain_out, Some(plain)) = map( opt(recognize(tuple(( alphanumeric1_unicode, @@ -1209,7 +1149,7 @@ impl Context { Ok((input, Token::ShortcodeEmoji(shortcode.into()))) } - fn tag_mention<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_mention<'a>(&self, input: Span<'a>) -> IResult, Token> { if let (plain_out, Some(plain)) = map( opt(recognize(tuple(( alt((tag("\\"), alphanumeric1_unicode)), @@ -1257,7 +1197,7 @@ impl Context { )) } - fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult, Token<'a>> { + fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult, Token> { // TODO: Skip when preceded by alphanumerics let (input, _) = tag("#")(input)?; @@ -1319,11 +1259,10 @@ fn url_chars<'a, T: 'a>( mod test { use crate::{url_chars, Context, Span, Token}; use nom::bytes::complete::tag; - use std::borrow::Cow; use std::collections::HashMap; fn parse_full(string: &str) -> Token { - Context.full(Span::new(string)).unwrap().1.merged().owned() + Context.full(Span::new(string)).unwrap().1.merged() } #[test] @@ -1795,7 +1734,7 @@ text"# Token::Sequence( vec!["🥺", "💜", "❤️", "🦊"] .into_iter() - .map(<&str as Into>>::into) + .map(str::to_string) .map(Token::UnicodeEmoji) .collect::>() )