MMM: Made the parser always output owned tokens

This commit is contained in:
Natty 2023-10-14 21:41:36 +02:00
parent d0d977e6eb
commit 23a63f2fe9
Signed by: natty
GPG Key ID: BF6CB659ADEE60EC
4 changed files with 138 additions and 174 deletions

23
Cargo.lock generated
View File

@ -462,6 +462,15 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a4f925191b4367301851c6d99b09890311d74b0d43f274c0b34c86d308a3663"
[[package]]
name = "castaway"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a17ed5635fc8536268e5d4de1e22e81ac34419e5f052d4d51f4e01dcc263fcc"
dependencies = [
"rustversion",
]
[[package]]
name = "cc"
version = "1.0.81"
@ -584,6 +593,19 @@ dependencies = [
"tokio-util",
]
[[package]]
name = "compact_str"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f86b9c4c00838774a6d902ef931eff7470720c51d90c2e32cfe15dc304737b3f"
dependencies = [
"castaway",
"cfg-if",
"itoa",
"ryu",
"static_assertions",
]
[[package]]
name = "const-oid"
version = "0.9.4"
@ -1622,6 +1644,7 @@ dependencies = [
name = "mmm_parser"
version = "0.2.1-alpha"
dependencies = [
"compact_str",
"either",
"emojis",
"nom",

View File

@ -28,6 +28,7 @@ axum = "0.6"
cached = "0.46"
cfg-if = "1"
chrono = "0.4"
compact_str = "0.7"
dotenvy = "0.15"
either = "1.9"
emojis = "0.6"

View File

@ -9,4 +9,5 @@ either = { workspace = true }
emojis = { workspace = true }
nom = { workspace = true }
nom_locate = { workspace = true }
compact_str = { workspace = true }
unicode-segmentation = { workspace = true }

View File

@ -1,17 +1,17 @@
use compact_str::{CompactString, ToCompactString};
use either::Either;
use nom::branch::alt;
use nom::bytes::complete::{tag, tag_no_case};
use nom::character::complete::{
alpha1, alphanumeric1, anychar, char as one_char, char, line_ending, not_line_ending, one_of,
alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of,
satisfy, space1, tab,
};
use nom::combinator::{eof, fail, map, not, opt, recognize};
use nom::error::ErrorKind;
use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
use nom::sequence::tuple;
use nom::{Compare, IResult, Offset, Slice};
use nom::{IResult, Offset, Slice};
use nom_locate::LocatedSpan;
use std::borrow::Cow;
use std::collections::HashMap;
use std::convert::{identity, Infallible};
use std::marker::PhantomData;
@ -33,47 +33,47 @@ impl MentionType {
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Token<'a> {
PlainText(Cow<'a, str>),
Sequence(Vec<Token<'a>>),
Quote(Box<Token<'a>>),
Small(Box<Token<'a>>),
BoldItalic(Box<Token<'a>>),
Bold(Box<Token<'a>>),
Italic(Box<Token<'a>>),
Center(Box<Token<'a>>),
Strikethrough(Box<Token<'a>>),
PlainTag(Cow<'a, str>),
InlineCode(Cow<'a, str>),
InlineMath(Cow<'a, str>),
UrlRaw(Cow<'a, str>),
UrlNoEmbed(Cow<'a, str>),
pub enum Token {
PlainText(CompactString),
Sequence(Vec<Token>),
Quote(Box<Token>),
Small(Box<Token>),
BoldItalic(Box<Token>),
Bold(Box<Token>),
Italic(Box<Token>),
Center(Box<Token>),
Strikethrough(Box<Token>),
PlainTag(String),
InlineCode(String),
InlineMath(String),
UrlRaw(String),
UrlNoEmbed(String),
Link {
label: Box<Token<'a>>,
href: Cow<'a, str>,
label: Box<Token>,
href: String,
embed: bool,
},
BlockCode {
lang: Option<Cow<'a, str>>,
inner: Cow<'a, str>,
lang: Option<String>,
inner: String,
},
BlockMath(Cow<'a, str>),
BlockMath(String),
Function {
name: Cow<'a, str>,
params: HashMap<Cow<'a, str>, Option<Cow<'a, str>>>,
inner: Box<Token<'a>>,
name: String,
params: HashMap<String, Option<String>>,
inner: Box<Token>,
},
Mention {
name: Cow<'a, str>,
host: Option<Cow<'a, str>>,
name: String,
host: Option<String>,
mention_type: MentionType,
},
UnicodeEmoji(Cow<'a, str>),
ShortcodeEmoji(Cow<'a, str>),
Hashtag(Cow<'a, str>),
UnicodeEmoji(String),
ShortcodeEmoji(String),
Hashtag(String),
}
impl Token<'_> {
impl Token {
fn str_content_left(&self) -> Option<&str> {
match self {
Token::PlainText(text) => Some(text.as_ref()),
@ -132,79 +132,19 @@ impl Token<'_> {
Token::Italic(inner) => inner.inner(),
Token::Center(inner) => inner.inner(),
Token::Strikethrough(inner) => inner.inner(),
Token::PlainTag(text) => Token::PlainText(text.clone()),
Token::InlineCode(code) => Token::PlainText(code.clone()),
Token::InlineMath(math) => Token::PlainText(math.clone()),
Token::UrlRaw(url) => Token::PlainText(url.clone()),
Token::UrlNoEmbed(url) => Token::PlainText(url.clone()),
Token::PlainTag(text) => Token::PlainText(text.clone().into()),
Token::InlineCode(code) => Token::PlainText(code.clone().into()),
Token::InlineMath(math) => Token::PlainText(math.clone().into()),
Token::UrlRaw(url) => Token::PlainText(url.clone().into()),
Token::UrlNoEmbed(url) => Token::PlainText(url.clone().into()),
Token::Link { label, .. } => label.inner(),
Token::BlockCode { inner, .. } => Token::PlainText(inner.clone()),
Token::BlockMath(math) => Token::PlainText(math.clone()),
Token::BlockCode { inner, .. } => Token::PlainText(inner.clone().into()),
Token::BlockMath(math) => Token::PlainText(math.clone().into()),
Token::Function { inner, .. } => inner.inner(),
Token::Mention { name, .. } => Token::PlainText(name.clone()),
Token::UnicodeEmoji(code) => Token::PlainText(code.clone()),
Token::ShortcodeEmoji(shortcode) => Token::PlainText(shortcode.clone()),
Token::Hashtag(tag) => Token::PlainText(tag.clone()),
}
}
fn owned(&self) -> Token<'static> {
match self {
Token::PlainText(text) => Token::PlainText(Cow::Owned(text.clone().into_owned())),
Token::Sequence(tokens) => Token::Sequence(tokens.iter().map(Token::owned).collect()),
Token::Quote(inner) => Token::Quote(Box::new(inner.owned())),
Token::Small(inner) => Token::Small(Box::new(inner.owned())),
Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.owned())),
Token::Bold(inner) => Token::Bold(Box::new(inner.owned())),
Token::Italic(inner) => Token::Italic(Box::new(inner.owned())),
Token::Center(inner) => Token::Center(Box::new(inner.owned())),
Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.owned())),
Token::PlainTag(tag) => Token::PlainTag(Cow::Owned(tag.clone().into_owned())),
Token::InlineCode(code) => Token::InlineCode(Cow::Owned(code.clone().into_owned())),
Token::InlineMath(math) => Token::InlineMath(Cow::Owned(math.clone().into_owned())),
Token::UrlRaw(url) => Token::UrlRaw(Cow::Owned(url.clone().into_owned())),
Token::UrlNoEmbed(url) => Token::UrlNoEmbed(Cow::Owned(url.clone().into_owned())),
Token::Link { embed, label, href } => Token::Link {
embed: *embed,
label: Box::new(label.owned()),
href: Cow::Owned(href.clone().into_owned()),
},
Token::BlockCode { inner, lang } => Token::BlockCode {
lang: lang.as_ref().map(|l| Cow::Owned(l.clone().into_owned())),
inner: Cow::Owned(inner.clone().into_owned()),
},
Token::BlockMath(math) => Token::BlockMath(Cow::Owned(math.clone().into_owned())),
Token::Function {
name,
params,
inner,
} => Token::Function {
name: Cow::Owned(name.clone().into_owned()),
params: params
.iter()
.map(|(k, v)| {
(
Cow::Owned(k.clone().into_owned()),
v.as_ref().map(|val| Cow::Owned(val.clone().into_owned())),
)
})
.collect(),
inner: Box::new(inner.owned()),
},
Token::Mention {
name,
host,
mention_type,
} => Token::Mention {
name: Cow::Owned(name.clone().into_owned()),
host: host.as_ref().map(|v| Cow::Owned(v.clone().into_owned())),
mention_type: *mention_type,
},
Token::UnicodeEmoji(code) => Token::UnicodeEmoji(Cow::Owned(code.clone().into_owned())),
Token::ShortcodeEmoji(shortcode) => {
Token::ShortcodeEmoji(Cow::Owned(shortcode.clone().into_owned()))
}
Token::Hashtag(tag) => Token::Hashtag(Cow::Owned(tag.clone().into_owned())),
Token::Mention { name, .. } => Token::PlainText(name.clone().into()),
Token::UnicodeEmoji(code) => Token::PlainText(code.clone().into()),
Token::ShortcodeEmoji(shortcode) => Token::PlainText(shortcode.clone().into()),
Token::Hashtag(tag) => Token::PlainText(tag.clone().into()),
}
}
@ -214,7 +154,7 @@ impl Token<'_> {
let tokens_multi = tokens.iter().fold(Vec::new(), |mut acc, tok| {
if let Some(Token::PlainText(last)) = acc.last_mut() {
if let Token::PlainText(tok_text) = tok {
*last = Cow::from(last.to_string() + tok_text.as_ref());
*last += tok_text.as_ref();
return acc;
}
@ -229,7 +169,7 @@ impl Token<'_> {
for item in items {
if let Some(Token::PlainText(last)) = acc.last_mut() {
if let Token::PlainText(tok_text) = item {
*last = Cow::from(last.to_string() + tok_text.as_ref());
*last += tok_text.as_ref();
continue;
}
@ -301,23 +241,23 @@ impl SliceOffset for Span<'_> {
}
#[inline]
fn boxing_token<'a>(func: impl Fn(Box<Token<'a>>) -> Token<'a>) -> impl Fn(Token<'a>) -> Token<'a> {
fn boxing_token(func: impl Fn(Box<Token>) -> Token) -> impl Fn(Token) -> Token {
move |tokens| func(Box::new(tokens))
}
#[inline]
fn collect_sequence<'a, T>(
func: impl Fn(Vec<T>) -> Token<'a>,
transform: impl Fn(Token<'a>) -> Token<'a>,
) -> impl Fn(&mut dyn Iterator<Item = T>) -> Token<'a> {
fn collect_sequence<T>(
func: impl Fn(Vec<T>) -> Token,
transform: impl Fn(Token) -> Token,
) -> impl Fn(&mut dyn Iterator<Item = T>) -> Token {
move |tokens| transform(func(tokens.collect()))
}
#[inline]
fn collect_char_sequence<'a>(
func: impl Fn(Cow<'a, str>) -> Token<'a>,
) -> impl Fn(&mut dyn Iterator<Item = char>) -> Token<'a> {
move |chars| func(Cow::Owned(chars.collect()))
fn collect_char_sequence(
func: impl Fn(String) -> Token,
) -> impl Fn(&mut dyn Iterator<Item = char>) -> Token {
move |chars| func(chars.collect())
}
#[inline]
@ -334,7 +274,7 @@ fn spliced<'a>(
segments: &[Span<'a>],
func: impl Fn(Span) -> IResult<Span, Token>,
parent: Span<'a>,
) -> IResult<Span<'a>, Token<'static>, nom::error::Error<Span<'a>>> {
) -> IResult<Span<'a>, Token, nom::error::Error<Span<'a>>> {
let combined = segments
.iter()
.copied()
@ -362,7 +302,7 @@ fn spliced<'a>(
let quote_span = Span::new(&combined);
let (input, inner) = match func(quote_span) {
Ok((input, token)) => (input, token.owned()),
Ok(s) => s,
Err(e) => {
return match e {
NE::Error(e) => {
@ -393,7 +333,7 @@ fn spliced<'a>(
parent
};
Ok((out, inner.owned()))
Ok((out, inner))
}
fn space(input: Span) -> IResult<Span, Token> {
@ -404,7 +344,7 @@ fn space(input: Span) -> IResult<Span, Token> {
#[derive(Copy, Clone)]
struct Matcher<'a, 'b, T: Clone> {
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token + 'a),
_phantom_closure: PhantomData<&'a ()>,
_phantom_data: PhantomData<&'b ()>,
_phantom_output: PhantomData<fn() -> T>,
@ -413,7 +353,7 @@ struct Matcher<'a, 'b, T: Clone> {
impl<'a, 'b, T: Clone> Matcher<'a, 'b, T> {
fn new(
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token + 'a),
) -> Self {
Self {
matcher_inner,
@ -471,27 +411,27 @@ impl Context {
#[inline]
fn partial(
&self,
func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token<'a>> + 'static,
) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token<'a>> + '_ {
func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token> + 'static,
) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token> + '_ {
move |input| func(self, input)
}
pub fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
pub fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
map(many1(self.partial(Self::full_single)), Token::Sequence)(input)
}
pub fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
pub fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
map(many1(self.partial(Self::inline_single)), Token::Sequence)(input)
}
pub fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
pub fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
map(
many1(self.partial(Self::inline_label_safe_single)),
Token::Sequence,
)(input)
}
fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
alt((
self.partial(Self::tag_bold_italic_asterisk),
self.partial(Self::tag_bold_italic_underscore),
@ -502,7 +442,7 @@ impl Context {
))(input)
}
fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, token) = alt((
self.partial(Self::unicode_emoji),
alt((
@ -527,12 +467,12 @@ impl Context {
self.partial(Self::shortcode_emoji),
self.partial(Self::link),
self.partial(Self::raw_url),
self.partial(Self::text),
self.partial(Self::tag_raw_text),
))(input)?;
Ok((input, token))
}
fn inline_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn inline_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, token) = alt((
self.partial(Self::unicode_emoji),
self.partial(Self::tag_small),
@ -551,12 +491,12 @@ impl Context {
self.partial(Self::shortcode_emoji),
self.partial(Self::link),
self.partial(Self::raw_url),
self.partial(Self::text),
self.partial(Self::tag_raw_text),
))(input)?;
Ok((input, token))
}
fn inline_non_formatting_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn inline_non_formatting_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, token) = alt((
self.partial(Self::unicode_emoji),
self.partial(Self::url_no_embed),
@ -567,12 +507,12 @@ impl Context {
self.partial(Self::tag_hashtag),
self.partial(Self::shortcode_emoji),
self.partial(Self::raw_url),
self.partial(Self::text),
self.partial(Self::tag_raw_text),
))(input)?;
Ok((input, token))
}
fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, token) = alt((
self.partial(Self::unicode_emoji),
self.partial(Self::tag_small),
@ -584,12 +524,12 @@ impl Context {
self.partial(Self::tag_strikethrough_tilde),
self.partial(Self::tag_func),
self.partial(Self::shortcode_emoji),
self.partial(Self::text),
self.partial(Self::tag_raw_text),
))(input)?;
Ok((input, token))
}
fn tag_quote<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_quote<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, leading_spaces) = tuple((opt(line_ending), opt(line_ending)))(input)?;
if let (None, None) = leading_spaces {
@ -625,7 +565,7 @@ impl Context {
Ok((input, Token::Quote(Box::new(inner))))
}
fn tag_block_center<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_block_center<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let tag_start = &tag("<center>");
let tag_end = &tag("</center>");
@ -649,7 +589,7 @@ impl Context {
))
}
fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let delim = &tag("```");
let (input, _) = opt(line_ending)(input)?;
@ -688,7 +628,7 @@ impl Context {
))
}
fn tag_block_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_block_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let start = &tag("\\[");
let end = &tag("\\]");
@ -714,7 +654,7 @@ impl Context {
Ok((
input,
Token::BlockMath(Cow::Borrowed(math_span.into_fragment())),
Token::BlockMath(math_span.into_fragment().to_string()),
))
}
@ -726,7 +666,7 @@ impl Context {
escape: bool,
matcher: Matcher<'a, 'b, T>,
fallback: Matcher<'a, 'b, S>,
) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token<'b>> + '_
) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token> + '_
where
FOpen: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
FClose: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
@ -739,7 +679,7 @@ impl Context {
if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), &opening_tag))(input) {
return Ok((
input_escaped,
Token::PlainText(Cow::Borrowed(mark.fragment())),
Token::PlainText(mark.fragment().to_string().into()),
));
}
}
@ -814,7 +754,7 @@ impl Context {
input,
Token::Sequence(vec![
Token::PlainText(begin.fragment_between(&post_open).into()),
inner_tok.inner().owned(),
inner_tok.inner(),
Token::PlainText(closing.into_fragment().into()),
]),
));
@ -823,7 +763,7 @@ impl Context {
}
}
fn tag_func<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_func<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, _) = tag("$[")(input)?;
let func_ident = |input| {
@ -852,8 +792,8 @@ impl Context {
.into_iter()
.map(|(k, v)| {
(
Cow::from(k.into_fragment()),
v.map(|(_, val)| Cow::from(val.into_fragment())),
k.into_fragment().to_string(),
v.map(|(_, val)| val.into_fragment().to_string()),
)
})
.collect::<HashMap<_, _>>()
@ -866,14 +806,14 @@ impl Context {
Ok((
input,
Token::Function {
name: Cow::from(func_name),
name: func_name.to_string(),
params: args_out,
inner: Box::new(Token::Sequence(inner)),
},
))
}
fn tag_plain<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_plain<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let opening_tag = &tag("<plain>");
let closing_tag = &tag("</plain>");
@ -887,7 +827,7 @@ impl Context {
Ok((input, Token::PlainTag(text.into())))
}
fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited(
tag_no_case("<small>"),
tag_no_case("</small>"),
@ -903,7 +843,7 @@ impl Context {
)(input)
}
fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited(
(tag("***"), FlankingRule::Lenient),
(tag("***"), FlankingRule::Lenient),
@ -919,7 +859,7 @@ impl Context {
)(input)
}
fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited(
(tag("___"), FlankingRule::Strict),
(tag("___"), FlankingRule::Strict),
@ -935,7 +875,7 @@ impl Context {
)(input)
}
fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited(
tag_no_case("<b>"),
tag_no_case("</b>"),
@ -951,7 +891,7 @@ impl Context {
)(input)
}
fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited(
(tag("**"), FlankingRule::Lenient),
(tag("**"), FlankingRule::Lenient),
@ -967,7 +907,7 @@ impl Context {
)(input)
}
fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited(
(tag("__"), FlankingRule::Strict),
(tag("__"), FlankingRule::Strict),
@ -983,7 +923,7 @@ impl Context {
)(input)
}
fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited(
tag_no_case("<i>"),
tag_no_case("</i>"),
@ -999,7 +939,7 @@ impl Context {
)(input)
}
fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited(
(tag("*"), FlankingRule::Lenient),
(tag("*"), FlankingRule::Lenient),
@ -1015,7 +955,7 @@ impl Context {
)(input)
}
fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited(
(tag("_"), FlankingRule::Strict),
(tag("_"), FlankingRule::Strict),
@ -1031,7 +971,7 @@ impl Context {
)(input)
}
fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited(
tag_no_case("<s>"),
tag_no_case("</s>"),
@ -1047,7 +987,7 @@ impl Context {
)(input)
}
fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited(
(tag("~~"), FlankingRule::Lenient),
(tag("~~"), FlankingRule::Lenient),
@ -1076,7 +1016,7 @@ impl Context {
)(input)
}
fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited(
tag("`"),
|input| alt((tag("`"), tag("´")))(input),
@ -1094,7 +1034,7 @@ impl Context {
)(input)
}
fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
self.tag_delimited(
tag("\\("),
tag("\\)"),
@ -1109,12 +1049,12 @@ impl Context {
)(input)
}
fn text<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
let (input, text) = map(recognize(anychar), Span::into_fragment)(input)?;
Ok((input, Token::PlainText(text.into())))
fn tag_raw_text<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, text) = anychar(input)?;
Ok((input, Token::PlainText(text.to_compact_string())))
}
fn raw_url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn raw_url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, url_span) = recognize(tuple((
protocol,
url_chars(|input| not(url_chars_base)(input), false),
@ -1130,21 +1070,21 @@ impl Context {
url
};
Ok((input, Token::UrlRaw(Cow::from(final_url))))
Ok((input, Token::UrlRaw(final_url.to_string())))
}
fn url_no_embed<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn url_no_embed<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, _) = tag("<")(input)?;
let (input, url_span) = recognize(tuple((protocol, url_chars(tag(">"), true))))(input)?;
let (input, _) = tag(">")(input)?;
Ok((
input,
Token::UrlNoEmbed(Cow::from(url_span.into_fragment())),
Token::UrlNoEmbed(url_span.into_fragment().to_string()),
))
}
fn link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let (input, no_embed) = opt(tag("?"))(input)?;
let (input, _) = tag("[")(input)?;
let (input, _) = not(tag("["))(input)?;
@ -1163,7 +1103,7 @@ impl Context {
))
}
fn unicode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn unicode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
let frag = input.fragment();
let Some(grapheme) = frag.graphemes(true).next() else {
return fail(input);
@ -1183,7 +1123,7 @@ impl Context {
))
}
fn shortcode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn shortcode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
if let (plain_out, Some(plain)) = map(
opt(recognize(tuple((
alphanumeric1_unicode,
@ -1209,7 +1149,7 @@ impl Context {
Ok((input, Token::ShortcodeEmoji(shortcode.into())))
}
fn tag_mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
if let (plain_out, Some(plain)) = map(
opt(recognize(tuple((
alt((tag("\\"), alphanumeric1_unicode)),
@ -1257,7 +1197,7 @@ impl Context {
))
}
fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
// TODO: Skip when preceded by alphanumerics
let (input, _) = tag("#")(input)?;
@ -1319,11 +1259,10 @@ fn url_chars<'a, T: 'a>(
mod test {
use crate::{url_chars, Context, Span, Token};
use nom::bytes::complete::tag;
use std::borrow::Cow;
use std::collections::HashMap;
fn parse_full(string: &str) -> Token {
Context.full(Span::new(string)).unwrap().1.merged().owned()
Context.full(Span::new(string)).unwrap().1.merged()
}
#[test]
@ -1795,7 +1734,7 @@ text</center>"#
Token::Sequence(
vec!["🥺", "💜", "❤️", "🦊"]
.into_iter()
.map(<&str as Into<Cow<_>>>::into)
.map(str::to_string)
.map(Token::UnicodeEmoji)
.collect::<Vec<_>>()
)