MMM: Made the parser always output owned tokens

This commit is contained in:
Natty 2023-10-14 21:41:36 +02:00
parent d0d977e6eb
commit 23a63f2fe9
Signed by: natty
GPG Key ID: BF6CB659ADEE60EC
4 changed files with 138 additions and 174 deletions

23
Cargo.lock generated
View File

@ -462,6 +462,15 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a4f925191b4367301851c6d99b09890311d74b0d43f274c0b34c86d308a3663" checksum = "3a4f925191b4367301851c6d99b09890311d74b0d43f274c0b34c86d308a3663"
[[package]]
name = "castaway"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a17ed5635fc8536268e5d4de1e22e81ac34419e5f052d4d51f4e01dcc263fcc"
dependencies = [
"rustversion",
]
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.0.81" version = "1.0.81"
@ -584,6 +593,19 @@ dependencies = [
"tokio-util", "tokio-util",
] ]
[[package]]
name = "compact_str"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f86b9c4c00838774a6d902ef931eff7470720c51d90c2e32cfe15dc304737b3f"
dependencies = [
"castaway",
"cfg-if",
"itoa",
"ryu",
"static_assertions",
]
[[package]] [[package]]
name = "const-oid" name = "const-oid"
version = "0.9.4" version = "0.9.4"
@ -1622,6 +1644,7 @@ dependencies = [
name = "mmm_parser" name = "mmm_parser"
version = "0.2.1-alpha" version = "0.2.1-alpha"
dependencies = [ dependencies = [
"compact_str",
"either", "either",
"emojis", "emojis",
"nom", "nom",

View File

@ -28,6 +28,7 @@ axum = "0.6"
cached = "0.46" cached = "0.46"
cfg-if = "1" cfg-if = "1"
chrono = "0.4" chrono = "0.4"
compact_str = "0.7"
dotenvy = "0.15" dotenvy = "0.15"
either = "1.9" either = "1.9"
emojis = "0.6" emojis = "0.6"

View File

@ -9,4 +9,5 @@ either = { workspace = true }
emojis = { workspace = true } emojis = { workspace = true }
nom = { workspace = true } nom = { workspace = true }
nom_locate = { workspace = true } nom_locate = { workspace = true }
compact_str = { workspace = true }
unicode-segmentation = { workspace = true } unicode-segmentation = { workspace = true }

View File

@ -1,17 +1,17 @@
use compact_str::{CompactString, ToCompactString};
use either::Either; use either::Either;
use nom::branch::alt; use nom::branch::alt;
use nom::bytes::complete::{tag, tag_no_case}; use nom::bytes::complete::{tag, tag_no_case};
use nom::character::complete::{ use nom::character::complete::{
alpha1, alphanumeric1, anychar, char as one_char, char, line_ending, not_line_ending, one_of, alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of,
satisfy, space1, tab, satisfy, space1, tab,
}; };
use nom::combinator::{eof, fail, map, not, opt, recognize}; use nom::combinator::{eof, fail, map, not, opt, recognize};
use nom::error::ErrorKind; use nom::error::ErrorKind;
use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1}; use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
use nom::sequence::tuple; use nom::sequence::tuple;
use nom::{Compare, IResult, Offset, Slice}; use nom::{IResult, Offset, Slice};
use nom_locate::LocatedSpan; use nom_locate::LocatedSpan;
use std::borrow::Cow;
use std::collections::HashMap; use std::collections::HashMap;
use std::convert::{identity, Infallible}; use std::convert::{identity, Infallible};
use std::marker::PhantomData; use std::marker::PhantomData;
@ -33,47 +33,47 @@ impl MentionType {
} }
#[derive(Clone, Debug, Eq, PartialEq)] #[derive(Clone, Debug, Eq, PartialEq)]
pub enum Token<'a> { pub enum Token {
PlainText(Cow<'a, str>), PlainText(CompactString),
Sequence(Vec<Token<'a>>), Sequence(Vec<Token>),
Quote(Box<Token<'a>>), Quote(Box<Token>),
Small(Box<Token<'a>>), Small(Box<Token>),
BoldItalic(Box<Token<'a>>), BoldItalic(Box<Token>),
Bold(Box<Token<'a>>), Bold(Box<Token>),
Italic(Box<Token<'a>>), Italic(Box<Token>),
Center(Box<Token<'a>>), Center(Box<Token>),
Strikethrough(Box<Token<'a>>), Strikethrough(Box<Token>),
PlainTag(Cow<'a, str>), PlainTag(String),
InlineCode(Cow<'a, str>), InlineCode(String),
InlineMath(Cow<'a, str>), InlineMath(String),
UrlRaw(Cow<'a, str>), UrlRaw(String),
UrlNoEmbed(Cow<'a, str>), UrlNoEmbed(String),
Link { Link {
label: Box<Token<'a>>, label: Box<Token>,
href: Cow<'a, str>, href: String,
embed: bool, embed: bool,
}, },
BlockCode { BlockCode {
lang: Option<Cow<'a, str>>, lang: Option<String>,
inner: Cow<'a, str>, inner: String,
}, },
BlockMath(Cow<'a, str>), BlockMath(String),
Function { Function {
name: Cow<'a, str>, name: String,
params: HashMap<Cow<'a, str>, Option<Cow<'a, str>>>, params: HashMap<String, Option<String>>,
inner: Box<Token<'a>>, inner: Box<Token>,
}, },
Mention { Mention {
name: Cow<'a, str>, name: String,
host: Option<Cow<'a, str>>, host: Option<String>,
mention_type: MentionType, mention_type: MentionType,
}, },
UnicodeEmoji(Cow<'a, str>), UnicodeEmoji(String),
ShortcodeEmoji(Cow<'a, str>), ShortcodeEmoji(String),
Hashtag(Cow<'a, str>), Hashtag(String),
} }
impl Token<'_> { impl Token {
fn str_content_left(&self) -> Option<&str> { fn str_content_left(&self) -> Option<&str> {
match self { match self {
Token::PlainText(text) => Some(text.as_ref()), Token::PlainText(text) => Some(text.as_ref()),
@ -132,79 +132,19 @@ impl Token<'_> {
Token::Italic(inner) => inner.inner(), Token::Italic(inner) => inner.inner(),
Token::Center(inner) => inner.inner(), Token::Center(inner) => inner.inner(),
Token::Strikethrough(inner) => inner.inner(), Token::Strikethrough(inner) => inner.inner(),
Token::PlainTag(text) => Token::PlainText(text.clone()), Token::PlainTag(text) => Token::PlainText(text.clone().into()),
Token::InlineCode(code) => Token::PlainText(code.clone()), Token::InlineCode(code) => Token::PlainText(code.clone().into()),
Token::InlineMath(math) => Token::PlainText(math.clone()), Token::InlineMath(math) => Token::PlainText(math.clone().into()),
Token::UrlRaw(url) => Token::PlainText(url.clone()), Token::UrlRaw(url) => Token::PlainText(url.clone().into()),
Token::UrlNoEmbed(url) => Token::PlainText(url.clone()), Token::UrlNoEmbed(url) => Token::PlainText(url.clone().into()),
Token::Link { label, .. } => label.inner(), Token::Link { label, .. } => label.inner(),
Token::BlockCode { inner, .. } => Token::PlainText(inner.clone()), Token::BlockCode { inner, .. } => Token::PlainText(inner.clone().into()),
Token::BlockMath(math) => Token::PlainText(math.clone()), Token::BlockMath(math) => Token::PlainText(math.clone().into()),
Token::Function { inner, .. } => inner.inner(), Token::Function { inner, .. } => inner.inner(),
Token::Mention { name, .. } => Token::PlainText(name.clone()), Token::Mention { name, .. } => Token::PlainText(name.clone().into()),
Token::UnicodeEmoji(code) => Token::PlainText(code.clone()), Token::UnicodeEmoji(code) => Token::PlainText(code.clone().into()),
Token::ShortcodeEmoji(shortcode) => Token::PlainText(shortcode.clone()), Token::ShortcodeEmoji(shortcode) => Token::PlainText(shortcode.clone().into()),
Token::Hashtag(tag) => Token::PlainText(tag.clone()), Token::Hashtag(tag) => Token::PlainText(tag.clone().into()),
}
}
fn owned(&self) -> Token<'static> {
match self {
Token::PlainText(text) => Token::PlainText(Cow::Owned(text.clone().into_owned())),
Token::Sequence(tokens) => Token::Sequence(tokens.iter().map(Token::owned).collect()),
Token::Quote(inner) => Token::Quote(Box::new(inner.owned())),
Token::Small(inner) => Token::Small(Box::new(inner.owned())),
Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.owned())),
Token::Bold(inner) => Token::Bold(Box::new(inner.owned())),
Token::Italic(inner) => Token::Italic(Box::new(inner.owned())),
Token::Center(inner) => Token::Center(Box::new(inner.owned())),
Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.owned())),
Token::PlainTag(tag) => Token::PlainTag(Cow::Owned(tag.clone().into_owned())),
Token::InlineCode(code) => Token::InlineCode(Cow::Owned(code.clone().into_owned())),
Token::InlineMath(math) => Token::InlineMath(Cow::Owned(math.clone().into_owned())),
Token::UrlRaw(url) => Token::UrlRaw(Cow::Owned(url.clone().into_owned())),
Token::UrlNoEmbed(url) => Token::UrlNoEmbed(Cow::Owned(url.clone().into_owned())),
Token::Link { embed, label, href } => Token::Link {
embed: *embed,
label: Box::new(label.owned()),
href: Cow::Owned(href.clone().into_owned()),
},
Token::BlockCode { inner, lang } => Token::BlockCode {
lang: lang.as_ref().map(|l| Cow::Owned(l.clone().into_owned())),
inner: Cow::Owned(inner.clone().into_owned()),
},
Token::BlockMath(math) => Token::BlockMath(Cow::Owned(math.clone().into_owned())),
Token::Function {
name,
params,
inner,
} => Token::Function {
name: Cow::Owned(name.clone().into_owned()),
params: params
.iter()
.map(|(k, v)| {
(
Cow::Owned(k.clone().into_owned()),
v.as_ref().map(|val| Cow::Owned(val.clone().into_owned())),
)
})
.collect(),
inner: Box::new(inner.owned()),
},
Token::Mention {
name,
host,
mention_type,
} => Token::Mention {
name: Cow::Owned(name.clone().into_owned()),
host: host.as_ref().map(|v| Cow::Owned(v.clone().into_owned())),
mention_type: *mention_type,
},
Token::UnicodeEmoji(code) => Token::UnicodeEmoji(Cow::Owned(code.clone().into_owned())),
Token::ShortcodeEmoji(shortcode) => {
Token::ShortcodeEmoji(Cow::Owned(shortcode.clone().into_owned()))
}
Token::Hashtag(tag) => Token::Hashtag(Cow::Owned(tag.clone().into_owned())),
} }
} }
@ -214,7 +154,7 @@ impl Token<'_> {
let tokens_multi = tokens.iter().fold(Vec::new(), |mut acc, tok| { let tokens_multi = tokens.iter().fold(Vec::new(), |mut acc, tok| {
if let Some(Token::PlainText(last)) = acc.last_mut() { if let Some(Token::PlainText(last)) = acc.last_mut() {
if let Token::PlainText(tok_text) = tok { if let Token::PlainText(tok_text) = tok {
*last = Cow::from(last.to_string() + tok_text.as_ref()); *last += tok_text.as_ref();
return acc; return acc;
} }
@ -229,7 +169,7 @@ impl Token<'_> {
for item in items { for item in items {
if let Some(Token::PlainText(last)) = acc.last_mut() { if let Some(Token::PlainText(last)) = acc.last_mut() {
if let Token::PlainText(tok_text) = item { if let Token::PlainText(tok_text) = item {
*last = Cow::from(last.to_string() + tok_text.as_ref()); *last += tok_text.as_ref();
continue; continue;
} }
@ -301,23 +241,23 @@ impl SliceOffset for Span<'_> {
} }
#[inline] #[inline]
fn boxing_token<'a>(func: impl Fn(Box<Token<'a>>) -> Token<'a>) -> impl Fn(Token<'a>) -> Token<'a> { fn boxing_token(func: impl Fn(Box<Token>) -> Token) -> impl Fn(Token) -> Token {
move |tokens| func(Box::new(tokens)) move |tokens| func(Box::new(tokens))
} }
#[inline] #[inline]
fn collect_sequence<'a, T>( fn collect_sequence<T>(
func: impl Fn(Vec<T>) -> Token<'a>, func: impl Fn(Vec<T>) -> Token,
transform: impl Fn(Token<'a>) -> Token<'a>, transform: impl Fn(Token) -> Token,
) -> impl Fn(&mut dyn Iterator<Item = T>) -> Token<'a> { ) -> impl Fn(&mut dyn Iterator<Item = T>) -> Token {
move |tokens| transform(func(tokens.collect())) move |tokens| transform(func(tokens.collect()))
} }
#[inline] #[inline]
fn collect_char_sequence<'a>( fn collect_char_sequence(
func: impl Fn(Cow<'a, str>) -> Token<'a>, func: impl Fn(String) -> Token,
) -> impl Fn(&mut dyn Iterator<Item = char>) -> Token<'a> { ) -> impl Fn(&mut dyn Iterator<Item = char>) -> Token {
move |chars| func(Cow::Owned(chars.collect())) move |chars| func(chars.collect())
} }
#[inline] #[inline]
@ -334,7 +274,7 @@ fn spliced<'a>(
segments: &[Span<'a>], segments: &[Span<'a>],
func: impl Fn(Span) -> IResult<Span, Token>, func: impl Fn(Span) -> IResult<Span, Token>,
parent: Span<'a>, parent: Span<'a>,
) -> IResult<Span<'a>, Token<'static>, nom::error::Error<Span<'a>>> { ) -> IResult<Span<'a>, Token, nom::error::Error<Span<'a>>> {
let combined = segments let combined = segments
.iter() .iter()
.copied() .copied()
@ -362,7 +302,7 @@ fn spliced<'a>(
let quote_span = Span::new(&combined); let quote_span = Span::new(&combined);
let (input, inner) = match func(quote_span) { let (input, inner) = match func(quote_span) {
Ok((input, token)) => (input, token.owned()), Ok(s) => s,
Err(e) => { Err(e) => {
return match e { return match e {
NE::Error(e) => { NE::Error(e) => {
@ -393,7 +333,7 @@ fn spliced<'a>(
parent parent
}; };
Ok((out, inner.owned())) Ok((out, inner))
} }
fn space(input: Span) -> IResult<Span, Token> { fn space(input: Span) -> IResult<Span, Token> {
@ -404,7 +344,7 @@ fn space(input: Span) -> IResult<Span, Token> {
#[derive(Copy, Clone)] #[derive(Copy, Clone)]
struct Matcher<'a, 'b, T: Clone> { struct Matcher<'a, 'b, T: Clone> {
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a), matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a), collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token + 'a),
_phantom_closure: PhantomData<&'a ()>, _phantom_closure: PhantomData<&'a ()>,
_phantom_data: PhantomData<&'b ()>, _phantom_data: PhantomData<&'b ()>,
_phantom_output: PhantomData<fn() -> T>, _phantom_output: PhantomData<fn() -> T>,
@ -413,7 +353,7 @@ struct Matcher<'a, 'b, T: Clone> {
impl<'a, 'b, T: Clone> Matcher<'a, 'b, T> { impl<'a, 'b, T: Clone> Matcher<'a, 'b, T> {
fn new( fn new(
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a), matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a), collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token + 'a),
) -> Self { ) -> Self {
Self { Self {
matcher_inner, matcher_inner,
@ -471,27 +411,27 @@ impl Context {
#[inline] #[inline]
fn partial( fn partial(
&self, &self,
func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token<'a>> + 'static, func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token> + 'static,
) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token<'a>> + '_ { ) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token> + '_ {
move |input| func(self, input) move |input| func(self, input)
} }
pub fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { pub fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
map(many1(self.partial(Self::full_single)), Token::Sequence)(input) map(many1(self.partial(Self::full_single)), Token::Sequence)(input)
} }
pub fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { pub fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
map(many1(self.partial(Self::inline_single)), Token::Sequence)(input) map(many1(self.partial(Self::inline_single)), Token::Sequence)(input)
} }
pub fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { pub fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
map( map(
many1(self.partial(Self::inline_label_safe_single)), many1(self.partial(Self::inline_label_safe_single)),
Token::Sequence, Token::Sequence,
)(input) )(input)
} }
fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
alt(( alt((
self.partial(Self::tag_bold_italic_asterisk), self.partial(Self::tag_bold_italic_asterisk),
self.partial(Self::tag_bold_italic_underscore), self.partial(Self::tag_bold_italic_underscore),
@ -502,7 +442,7 @@ impl Context {
))(input) ))(input)
} }
fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, token) = alt(( let (input, token) = alt((
self.partial(Self::unicode_emoji), self.partial(Self::unicode_emoji),
alt(( alt((
@ -527,12 +467,12 @@ impl Context {
self.partial(Self::shortcode_emoji), self.partial(Self::shortcode_emoji),
self.partial(Self::link), self.partial(Self::link),
self.partial(Self::raw_url), self.partial(Self::raw_url),
self.partial(Self::text), self.partial(Self::tag_raw_text),
))(input)?; ))(input)?;
Ok((input, token)) Ok((input, token))
} }
fn inline_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn inline_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, token) = alt(( let (input, token) = alt((
self.partial(Self::unicode_emoji), self.partial(Self::unicode_emoji),
self.partial(Self::tag_small), self.partial(Self::tag_small),
@ -551,12 +491,12 @@ impl Context {
self.partial(Self::shortcode_emoji), self.partial(Self::shortcode_emoji),
self.partial(Self::link), self.partial(Self::link),
self.partial(Self::raw_url), self.partial(Self::raw_url),
self.partial(Self::text), self.partial(Self::tag_raw_text),
))(input)?; ))(input)?;
Ok((input, token)) Ok((input, token))
} }
fn inline_non_formatting_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn inline_non_formatting_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, token) = alt(( let (input, token) = alt((
self.partial(Self::unicode_emoji), self.partial(Self::unicode_emoji),
self.partial(Self::url_no_embed), self.partial(Self::url_no_embed),
@ -567,12 +507,12 @@ impl Context {
self.partial(Self::tag_hashtag), self.partial(Self::tag_hashtag),
self.partial(Self::shortcode_emoji), self.partial(Self::shortcode_emoji),
self.partial(Self::raw_url), self.partial(Self::raw_url),
self.partial(Self::text), self.partial(Self::tag_raw_text),
))(input)?; ))(input)?;
Ok((input, token)) Ok((input, token))
} }
fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, token) = alt(( let (input, token) = alt((
self.partial(Self::unicode_emoji), self.partial(Self::unicode_emoji),
self.partial(Self::tag_small), self.partial(Self::tag_small),
@ -584,12 +524,12 @@ impl Context {
self.partial(Self::tag_strikethrough_tilde), self.partial(Self::tag_strikethrough_tilde),
self.partial(Self::tag_func), self.partial(Self::tag_func),
self.partial(Self::shortcode_emoji), self.partial(Self::shortcode_emoji),
self.partial(Self::text), self.partial(Self::tag_raw_text),
))(input)?; ))(input)?;
Ok((input, token)) Ok((input, token))
} }
fn tag_quote<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_quote<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, leading_spaces) = tuple((opt(line_ending), opt(line_ending)))(input)?; let (input, leading_spaces) = tuple((opt(line_ending), opt(line_ending)))(input)?;
if let (None, None) = leading_spaces { if let (None, None) = leading_spaces {
@ -625,7 +565,7 @@ impl Context {
Ok((input, Token::Quote(Box::new(inner)))) Ok((input, Token::Quote(Box::new(inner))))
} }
fn tag_block_center<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_block_center<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let tag_start = &tag("<center>"); let tag_start = &tag("<center>");
let tag_end = &tag("</center>"); let tag_end = &tag("</center>");
@ -649,7 +589,7 @@ impl Context {
)) ))
} }
fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let delim = &tag("```"); let delim = &tag("```");
let (input, _) = opt(line_ending)(input)?; let (input, _) = opt(line_ending)(input)?;
@ -688,7 +628,7 @@ impl Context {
)) ))
} }
fn tag_block_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_block_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let start = &tag("\\["); let start = &tag("\\[");
let end = &tag("\\]"); let end = &tag("\\]");
@ -714,7 +654,7 @@ impl Context {
Ok(( Ok((
input, input,
Token::BlockMath(Cow::Borrowed(math_span.into_fragment())), Token::BlockMath(math_span.into_fragment().to_string()),
)) ))
} }
@ -726,7 +666,7 @@ impl Context {
escape: bool, escape: bool,
matcher: Matcher<'a, 'b, T>, matcher: Matcher<'a, 'b, T>,
fallback: Matcher<'a, 'b, S>, fallback: Matcher<'a, 'b, S>,
) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token<'b>> + '_ ) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token> + '_
where where
FOpen: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a, FOpen: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
FClose: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a, FClose: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
@ -739,7 +679,7 @@ impl Context {
if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), &opening_tag))(input) { if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), &opening_tag))(input) {
return Ok(( return Ok((
input_escaped, input_escaped,
Token::PlainText(Cow::Borrowed(mark.fragment())), Token::PlainText(mark.fragment().to_string().into()),
)); ));
} }
} }
@ -814,7 +754,7 @@ impl Context {
input, input,
Token::Sequence(vec![ Token::Sequence(vec![
Token::PlainText(begin.fragment_between(&post_open).into()), Token::PlainText(begin.fragment_between(&post_open).into()),
inner_tok.inner().owned(), inner_tok.inner(),
Token::PlainText(closing.into_fragment().into()), Token::PlainText(closing.into_fragment().into()),
]), ]),
)); ));
@ -823,7 +763,7 @@ impl Context {
} }
} }
fn tag_func<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_func<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, _) = tag("$[")(input)?; let (input, _) = tag("$[")(input)?;
let func_ident = |input| { let func_ident = |input| {
@ -852,8 +792,8 @@ impl Context {
.into_iter() .into_iter()
.map(|(k, v)| { .map(|(k, v)| {
( (
Cow::from(k.into_fragment()), k.into_fragment().to_string(),
v.map(|(_, val)| Cow::from(val.into_fragment())), v.map(|(_, val)| val.into_fragment().to_string()),
) )
}) })
.collect::<HashMap<_, _>>() .collect::<HashMap<_, _>>()
@ -866,14 +806,14 @@ impl Context {
Ok(( Ok((
input, input,
Token::Function { Token::Function {
name: Cow::from(func_name), name: func_name.to_string(),
params: args_out, params: args_out,
inner: Box::new(Token::Sequence(inner)), inner: Box::new(Token::Sequence(inner)),
}, },
)) ))
} }
fn tag_plain<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_plain<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let opening_tag = &tag("<plain>"); let opening_tag = &tag("<plain>");
let closing_tag = &tag("</plain>"); let closing_tag = &tag("</plain>");
@ -887,7 +827,7 @@ impl Context {
Ok((input, Token::PlainTag(text.into()))) Ok((input, Token::PlainTag(text.into())))
} }
fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited( self.tag_delimited(
tag_no_case("<small>"), tag_no_case("<small>"),
tag_no_case("</small>"), tag_no_case("</small>"),
@ -903,7 +843,7 @@ impl Context {
)(input) )(input)
} }
fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited( self.tag_delimited(
(tag("***"), FlankingRule::Lenient), (tag("***"), FlankingRule::Lenient),
(tag("***"), FlankingRule::Lenient), (tag("***"), FlankingRule::Lenient),
@ -919,7 +859,7 @@ impl Context {
)(input) )(input)
} }
fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited( self.tag_delimited(
(tag("___"), FlankingRule::Strict), (tag("___"), FlankingRule::Strict),
(tag("___"), FlankingRule::Strict), (tag("___"), FlankingRule::Strict),
@ -935,7 +875,7 @@ impl Context {
)(input) )(input)
} }
fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited( self.tag_delimited(
tag_no_case("<b>"), tag_no_case("<b>"),
tag_no_case("</b>"), tag_no_case("</b>"),
@ -951,7 +891,7 @@ impl Context {
)(input) )(input)
} }
fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited( self.tag_delimited(
(tag("**"), FlankingRule::Lenient), (tag("**"), FlankingRule::Lenient),
(tag("**"), FlankingRule::Lenient), (tag("**"), FlankingRule::Lenient),
@ -967,7 +907,7 @@ impl Context {
)(input) )(input)
} }
fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited( self.tag_delimited(
(tag("__"), FlankingRule::Strict), (tag("__"), FlankingRule::Strict),
(tag("__"), FlankingRule::Strict), (tag("__"), FlankingRule::Strict),
@ -983,7 +923,7 @@ impl Context {
)(input) )(input)
} }
fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited( self.tag_delimited(
tag_no_case("<i>"), tag_no_case("<i>"),
tag_no_case("</i>"), tag_no_case("</i>"),
@ -999,7 +939,7 @@ impl Context {
)(input) )(input)
} }
fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited( self.tag_delimited(
(tag("*"), FlankingRule::Lenient), (tag("*"), FlankingRule::Lenient),
(tag("*"), FlankingRule::Lenient), (tag("*"), FlankingRule::Lenient),
@ -1015,7 +955,7 @@ impl Context {
)(input) )(input)
} }
fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited( self.tag_delimited(
(tag("_"), FlankingRule::Strict), (tag("_"), FlankingRule::Strict),
(tag("_"), FlankingRule::Strict), (tag("_"), FlankingRule::Strict),
@ -1031,7 +971,7 @@ impl Context {
)(input) )(input)
} }
fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited( self.tag_delimited(
tag_no_case("<s>"), tag_no_case("<s>"),
tag_no_case("</s>"), tag_no_case("</s>"),
@ -1047,7 +987,7 @@ impl Context {
)(input) )(input)
} }
fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited( self.tag_delimited(
(tag("~~"), FlankingRule::Lenient), (tag("~~"), FlankingRule::Lenient),
(tag("~~"), FlankingRule::Lenient), (tag("~~"), FlankingRule::Lenient),
@ -1076,7 +1016,7 @@ impl Context {
)(input) )(input)
} }
fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited( self.tag_delimited(
tag("`"), tag("`"),
|input| alt((tag("`"), tag("´")))(input), |input| alt((tag("`"), tag("´")))(input),
@ -1094,7 +1034,7 @@ impl Context {
)(input) )(input)
} }
fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited( self.tag_delimited(
tag("\\("), tag("\\("),
tag("\\)"), tag("\\)"),
@ -1109,12 +1049,12 @@ impl Context {
)(input) )(input)
} }
fn text<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_raw_text<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, text) = map(recognize(anychar), Span::into_fragment)(input)?; let (input, text) = anychar(input)?;
Ok((input, Token::PlainText(text.into()))) Ok((input, Token::PlainText(text.to_compact_string())))
} }
fn raw_url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn raw_url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, url_span) = recognize(tuple(( let (input, url_span) = recognize(tuple((
protocol, protocol,
url_chars(|input| not(url_chars_base)(input), false), url_chars(|input| not(url_chars_base)(input), false),
@ -1130,21 +1070,21 @@ impl Context {
url url
}; };
Ok((input, Token::UrlRaw(Cow::from(final_url)))) Ok((input, Token::UrlRaw(final_url.to_string())))
} }
fn url_no_embed<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn url_no_embed<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, _) = tag("<")(input)?; let (input, _) = tag("<")(input)?;
let (input, url_span) = recognize(tuple((protocol, url_chars(tag(">"), true))))(input)?; let (input, url_span) = recognize(tuple((protocol, url_chars(tag(">"), true))))(input)?;
let (input, _) = tag(">")(input)?; let (input, _) = tag(">")(input)?;
Ok(( Ok((
input, input,
Token::UrlNoEmbed(Cow::from(url_span.into_fragment())), Token::UrlNoEmbed(url_span.into_fragment().to_string()),
)) ))
} }
fn link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, no_embed) = opt(tag("?"))(input)?; let (input, no_embed) = opt(tag("?"))(input)?;
let (input, _) = tag("[")(input)?; let (input, _) = tag("[")(input)?;
let (input, _) = not(tag("["))(input)?; let (input, _) = not(tag("["))(input)?;
@ -1163,7 +1103,7 @@ impl Context {
)) ))
} }
fn unicode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn unicode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let frag = input.fragment(); let frag = input.fragment();
let Some(grapheme) = frag.graphemes(true).next() else { let Some(grapheme) = frag.graphemes(true).next() else {
return fail(input); return fail(input);
@ -1183,7 +1123,7 @@ impl Context {
)) ))
} }
fn shortcode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn shortcode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
if let (plain_out, Some(plain)) = map( if let (plain_out, Some(plain)) = map(
opt(recognize(tuple(( opt(recognize(tuple((
alphanumeric1_unicode, alphanumeric1_unicode,
@ -1209,7 +1149,7 @@ impl Context {
Ok((input, Token::ShortcodeEmoji(shortcode.into()))) Ok((input, Token::ShortcodeEmoji(shortcode.into())))
} }
fn tag_mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
if let (plain_out, Some(plain)) = map( if let (plain_out, Some(plain)) = map(
opt(recognize(tuple(( opt(recognize(tuple((
alt((tag("\\"), alphanumeric1_unicode)), alt((tag("\\"), alphanumeric1_unicode)),
@ -1257,7 +1197,7 @@ impl Context {
)) ))
} }
fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> { fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
// TODO: Skip when preceded by alphanumerics // TODO: Skip when preceded by alphanumerics
let (input, _) = tag("#")(input)?; let (input, _) = tag("#")(input)?;
@ -1319,11 +1259,10 @@ fn url_chars<'a, T: 'a>(
mod test { mod test {
use crate::{url_chars, Context, Span, Token}; use crate::{url_chars, Context, Span, Token};
use nom::bytes::complete::tag; use nom::bytes::complete::tag;
use std::borrow::Cow;
use std::collections::HashMap; use std::collections::HashMap;
fn parse_full(string: &str) -> Token { fn parse_full(string: &str) -> Token {
Context.full(Span::new(string)).unwrap().1.merged().owned() Context.full(Span::new(string)).unwrap().1.merged()
} }
#[test] #[test]
@ -1795,7 +1734,7 @@ text</center>"#
Token::Sequence( Token::Sequence(
vec!["🥺", "💜", "❤️", "🦊"] vec!["🥺", "💜", "❤️", "🦊"]
.into_iter() .into_iter()
.map(<&str as Into<Cow<_>>>::into) .map(str::to_string)
.map(Token::UnicodeEmoji) .map(Token::UnicodeEmoji)
.collect::<Vec<_>>() .collect::<Vec<_>>()
) )