MMM: Made the parser always output owned tokens
This commit is contained in:
parent
d0d977e6eb
commit
23a63f2fe9
|
@ -462,6 +462,15 @@ version = "0.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a4f925191b4367301851c6d99b09890311d74b0d43f274c0b34c86d308a3663"
|
||||
|
||||
[[package]]
|
||||
name = "castaway"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a17ed5635fc8536268e5d4de1e22e81ac34419e5f052d4d51f4e01dcc263fcc"
|
||||
dependencies = [
|
||||
"rustversion",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.81"
|
||||
|
@ -584,6 +593,19 @@ dependencies = [
|
|||
"tokio-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "compact_str"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f86b9c4c00838774a6d902ef931eff7470720c51d90c2e32cfe15dc304737b3f"
|
||||
dependencies = [
|
||||
"castaway",
|
||||
"cfg-if",
|
||||
"itoa",
|
||||
"ryu",
|
||||
"static_assertions",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "const-oid"
|
||||
version = "0.9.4"
|
||||
|
@ -1622,6 +1644,7 @@ dependencies = [
|
|||
name = "mmm_parser"
|
||||
version = "0.2.1-alpha"
|
||||
dependencies = [
|
||||
"compact_str",
|
||||
"either",
|
||||
"emojis",
|
||||
"nom",
|
||||
|
|
|
@ -28,6 +28,7 @@ axum = "0.6"
|
|||
cached = "0.46"
|
||||
cfg-if = "1"
|
||||
chrono = "0.4"
|
||||
compact_str = "0.7"
|
||||
dotenvy = "0.15"
|
||||
either = "1.9"
|
||||
emojis = "0.6"
|
||||
|
|
|
@ -9,4 +9,5 @@ either = { workspace = true }
|
|||
emojis = { workspace = true }
|
||||
nom = { workspace = true }
|
||||
nom_locate = { workspace = true }
|
||||
compact_str = { workspace = true }
|
||||
unicode-segmentation = { workspace = true }
|
||||
|
|
|
@ -1,17 +1,17 @@
|
|||
use compact_str::{CompactString, ToCompactString};
|
||||
use either::Either;
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::{tag, tag_no_case};
|
||||
use nom::character::complete::{
|
||||
alpha1, alphanumeric1, anychar, char as one_char, char, line_ending, not_line_ending, one_of,
|
||||
alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of,
|
||||
satisfy, space1, tab,
|
||||
};
|
||||
use nom::combinator::{eof, fail, map, not, opt, recognize};
|
||||
use nom::error::ErrorKind;
|
||||
use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
|
||||
use nom::sequence::tuple;
|
||||
use nom::{Compare, IResult, Offset, Slice};
|
||||
use nom::{IResult, Offset, Slice};
|
||||
use nom_locate::LocatedSpan;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::convert::{identity, Infallible};
|
||||
use std::marker::PhantomData;
|
||||
|
@ -33,47 +33,47 @@ impl MentionType {
|
|||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub enum Token<'a> {
|
||||
PlainText(Cow<'a, str>),
|
||||
Sequence(Vec<Token<'a>>),
|
||||
Quote(Box<Token<'a>>),
|
||||
Small(Box<Token<'a>>),
|
||||
BoldItalic(Box<Token<'a>>),
|
||||
Bold(Box<Token<'a>>),
|
||||
Italic(Box<Token<'a>>),
|
||||
Center(Box<Token<'a>>),
|
||||
Strikethrough(Box<Token<'a>>),
|
||||
PlainTag(Cow<'a, str>),
|
||||
InlineCode(Cow<'a, str>),
|
||||
InlineMath(Cow<'a, str>),
|
||||
UrlRaw(Cow<'a, str>),
|
||||
UrlNoEmbed(Cow<'a, str>),
|
||||
pub enum Token {
|
||||
PlainText(CompactString),
|
||||
Sequence(Vec<Token>),
|
||||
Quote(Box<Token>),
|
||||
Small(Box<Token>),
|
||||
BoldItalic(Box<Token>),
|
||||
Bold(Box<Token>),
|
||||
Italic(Box<Token>),
|
||||
Center(Box<Token>),
|
||||
Strikethrough(Box<Token>),
|
||||
PlainTag(String),
|
||||
InlineCode(String),
|
||||
InlineMath(String),
|
||||
UrlRaw(String),
|
||||
UrlNoEmbed(String),
|
||||
Link {
|
||||
label: Box<Token<'a>>,
|
||||
href: Cow<'a, str>,
|
||||
label: Box<Token>,
|
||||
href: String,
|
||||
embed: bool,
|
||||
},
|
||||
BlockCode {
|
||||
lang: Option<Cow<'a, str>>,
|
||||
inner: Cow<'a, str>,
|
||||
lang: Option<String>,
|
||||
inner: String,
|
||||
},
|
||||
BlockMath(Cow<'a, str>),
|
||||
BlockMath(String),
|
||||
Function {
|
||||
name: Cow<'a, str>,
|
||||
params: HashMap<Cow<'a, str>, Option<Cow<'a, str>>>,
|
||||
inner: Box<Token<'a>>,
|
||||
name: String,
|
||||
params: HashMap<String, Option<String>>,
|
||||
inner: Box<Token>,
|
||||
},
|
||||
Mention {
|
||||
name: Cow<'a, str>,
|
||||
host: Option<Cow<'a, str>>,
|
||||
name: String,
|
||||
host: Option<String>,
|
||||
mention_type: MentionType,
|
||||
},
|
||||
UnicodeEmoji(Cow<'a, str>),
|
||||
ShortcodeEmoji(Cow<'a, str>),
|
||||
Hashtag(Cow<'a, str>),
|
||||
UnicodeEmoji(String),
|
||||
ShortcodeEmoji(String),
|
||||
Hashtag(String),
|
||||
}
|
||||
|
||||
impl Token<'_> {
|
||||
impl Token {
|
||||
fn str_content_left(&self) -> Option<&str> {
|
||||
match self {
|
||||
Token::PlainText(text) => Some(text.as_ref()),
|
||||
|
@ -132,79 +132,19 @@ impl Token<'_> {
|
|||
Token::Italic(inner) => inner.inner(),
|
||||
Token::Center(inner) => inner.inner(),
|
||||
Token::Strikethrough(inner) => inner.inner(),
|
||||
Token::PlainTag(text) => Token::PlainText(text.clone()),
|
||||
Token::InlineCode(code) => Token::PlainText(code.clone()),
|
||||
Token::InlineMath(math) => Token::PlainText(math.clone()),
|
||||
Token::UrlRaw(url) => Token::PlainText(url.clone()),
|
||||
Token::UrlNoEmbed(url) => Token::PlainText(url.clone()),
|
||||
Token::PlainTag(text) => Token::PlainText(text.clone().into()),
|
||||
Token::InlineCode(code) => Token::PlainText(code.clone().into()),
|
||||
Token::InlineMath(math) => Token::PlainText(math.clone().into()),
|
||||
Token::UrlRaw(url) => Token::PlainText(url.clone().into()),
|
||||
Token::UrlNoEmbed(url) => Token::PlainText(url.clone().into()),
|
||||
Token::Link { label, .. } => label.inner(),
|
||||
Token::BlockCode { inner, .. } => Token::PlainText(inner.clone()),
|
||||
Token::BlockMath(math) => Token::PlainText(math.clone()),
|
||||
Token::BlockCode { inner, .. } => Token::PlainText(inner.clone().into()),
|
||||
Token::BlockMath(math) => Token::PlainText(math.clone().into()),
|
||||
Token::Function { inner, .. } => inner.inner(),
|
||||
Token::Mention { name, .. } => Token::PlainText(name.clone()),
|
||||
Token::UnicodeEmoji(code) => Token::PlainText(code.clone()),
|
||||
Token::ShortcodeEmoji(shortcode) => Token::PlainText(shortcode.clone()),
|
||||
Token::Hashtag(tag) => Token::PlainText(tag.clone()),
|
||||
}
|
||||
}
|
||||
|
||||
fn owned(&self) -> Token<'static> {
|
||||
match self {
|
||||
Token::PlainText(text) => Token::PlainText(Cow::Owned(text.clone().into_owned())),
|
||||
Token::Sequence(tokens) => Token::Sequence(tokens.iter().map(Token::owned).collect()),
|
||||
Token::Quote(inner) => Token::Quote(Box::new(inner.owned())),
|
||||
Token::Small(inner) => Token::Small(Box::new(inner.owned())),
|
||||
Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.owned())),
|
||||
Token::Bold(inner) => Token::Bold(Box::new(inner.owned())),
|
||||
Token::Italic(inner) => Token::Italic(Box::new(inner.owned())),
|
||||
Token::Center(inner) => Token::Center(Box::new(inner.owned())),
|
||||
Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.owned())),
|
||||
Token::PlainTag(tag) => Token::PlainTag(Cow::Owned(tag.clone().into_owned())),
|
||||
Token::InlineCode(code) => Token::InlineCode(Cow::Owned(code.clone().into_owned())),
|
||||
Token::InlineMath(math) => Token::InlineMath(Cow::Owned(math.clone().into_owned())),
|
||||
Token::UrlRaw(url) => Token::UrlRaw(Cow::Owned(url.clone().into_owned())),
|
||||
Token::UrlNoEmbed(url) => Token::UrlNoEmbed(Cow::Owned(url.clone().into_owned())),
|
||||
Token::Link { embed, label, href } => Token::Link {
|
||||
embed: *embed,
|
||||
label: Box::new(label.owned()),
|
||||
href: Cow::Owned(href.clone().into_owned()),
|
||||
},
|
||||
Token::BlockCode { inner, lang } => Token::BlockCode {
|
||||
lang: lang.as_ref().map(|l| Cow::Owned(l.clone().into_owned())),
|
||||
inner: Cow::Owned(inner.clone().into_owned()),
|
||||
},
|
||||
Token::BlockMath(math) => Token::BlockMath(Cow::Owned(math.clone().into_owned())),
|
||||
Token::Function {
|
||||
name,
|
||||
params,
|
||||
inner,
|
||||
} => Token::Function {
|
||||
name: Cow::Owned(name.clone().into_owned()),
|
||||
params: params
|
||||
.iter()
|
||||
.map(|(k, v)| {
|
||||
(
|
||||
Cow::Owned(k.clone().into_owned()),
|
||||
v.as_ref().map(|val| Cow::Owned(val.clone().into_owned())),
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
inner: Box::new(inner.owned()),
|
||||
},
|
||||
Token::Mention {
|
||||
name,
|
||||
host,
|
||||
mention_type,
|
||||
} => Token::Mention {
|
||||
name: Cow::Owned(name.clone().into_owned()),
|
||||
host: host.as_ref().map(|v| Cow::Owned(v.clone().into_owned())),
|
||||
mention_type: *mention_type,
|
||||
},
|
||||
Token::UnicodeEmoji(code) => Token::UnicodeEmoji(Cow::Owned(code.clone().into_owned())),
|
||||
Token::ShortcodeEmoji(shortcode) => {
|
||||
Token::ShortcodeEmoji(Cow::Owned(shortcode.clone().into_owned()))
|
||||
}
|
||||
Token::Hashtag(tag) => Token::Hashtag(Cow::Owned(tag.clone().into_owned())),
|
||||
Token::Mention { name, .. } => Token::PlainText(name.clone().into()),
|
||||
Token::UnicodeEmoji(code) => Token::PlainText(code.clone().into()),
|
||||
Token::ShortcodeEmoji(shortcode) => Token::PlainText(shortcode.clone().into()),
|
||||
Token::Hashtag(tag) => Token::PlainText(tag.clone().into()),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -214,7 +154,7 @@ impl Token<'_> {
|
|||
let tokens_multi = tokens.iter().fold(Vec::new(), |mut acc, tok| {
|
||||
if let Some(Token::PlainText(last)) = acc.last_mut() {
|
||||
if let Token::PlainText(tok_text) = tok {
|
||||
*last = Cow::from(last.to_string() + tok_text.as_ref());
|
||||
*last += tok_text.as_ref();
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
@ -229,7 +169,7 @@ impl Token<'_> {
|
|||
for item in items {
|
||||
if let Some(Token::PlainText(last)) = acc.last_mut() {
|
||||
if let Token::PlainText(tok_text) = item {
|
||||
*last = Cow::from(last.to_string() + tok_text.as_ref());
|
||||
*last += tok_text.as_ref();
|
||||
|
||||
continue;
|
||||
}
|
||||
|
@ -301,23 +241,23 @@ impl SliceOffset for Span<'_> {
|
|||
}
|
||||
|
||||
#[inline]
|
||||
fn boxing_token<'a>(func: impl Fn(Box<Token<'a>>) -> Token<'a>) -> impl Fn(Token<'a>) -> Token<'a> {
|
||||
fn boxing_token(func: impl Fn(Box<Token>) -> Token) -> impl Fn(Token) -> Token {
|
||||
move |tokens| func(Box::new(tokens))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn collect_sequence<'a, T>(
|
||||
func: impl Fn(Vec<T>) -> Token<'a>,
|
||||
transform: impl Fn(Token<'a>) -> Token<'a>,
|
||||
) -> impl Fn(&mut dyn Iterator<Item = T>) -> Token<'a> {
|
||||
fn collect_sequence<T>(
|
||||
func: impl Fn(Vec<T>) -> Token,
|
||||
transform: impl Fn(Token) -> Token,
|
||||
) -> impl Fn(&mut dyn Iterator<Item = T>) -> Token {
|
||||
move |tokens| transform(func(tokens.collect()))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn collect_char_sequence<'a>(
|
||||
func: impl Fn(Cow<'a, str>) -> Token<'a>,
|
||||
) -> impl Fn(&mut dyn Iterator<Item = char>) -> Token<'a> {
|
||||
move |chars| func(Cow::Owned(chars.collect()))
|
||||
fn collect_char_sequence(
|
||||
func: impl Fn(String) -> Token,
|
||||
) -> impl Fn(&mut dyn Iterator<Item = char>) -> Token {
|
||||
move |chars| func(chars.collect())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -334,7 +274,7 @@ fn spliced<'a>(
|
|||
segments: &[Span<'a>],
|
||||
func: impl Fn(Span) -> IResult<Span, Token>,
|
||||
parent: Span<'a>,
|
||||
) -> IResult<Span<'a>, Token<'static>, nom::error::Error<Span<'a>>> {
|
||||
) -> IResult<Span<'a>, Token, nom::error::Error<Span<'a>>> {
|
||||
let combined = segments
|
||||
.iter()
|
||||
.copied()
|
||||
|
@ -362,7 +302,7 @@ fn spliced<'a>(
|
|||
|
||||
let quote_span = Span::new(&combined);
|
||||
let (input, inner) = match func(quote_span) {
|
||||
Ok((input, token)) => (input, token.owned()),
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
return match e {
|
||||
NE::Error(e) => {
|
||||
|
@ -393,7 +333,7 @@ fn spliced<'a>(
|
|||
parent
|
||||
};
|
||||
|
||||
Ok((out, inner.owned()))
|
||||
Ok((out, inner))
|
||||
}
|
||||
|
||||
fn space(input: Span) -> IResult<Span, Token> {
|
||||
|
@ -404,7 +344,7 @@ fn space(input: Span) -> IResult<Span, Token> {
|
|||
#[derive(Copy, Clone)]
|
||||
struct Matcher<'a, 'b, T: Clone> {
|
||||
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
|
||||
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
|
||||
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token + 'a),
|
||||
_phantom_closure: PhantomData<&'a ()>,
|
||||
_phantom_data: PhantomData<&'b ()>,
|
||||
_phantom_output: PhantomData<fn() -> T>,
|
||||
|
@ -413,7 +353,7 @@ struct Matcher<'a, 'b, T: Clone> {
|
|||
impl<'a, 'b, T: Clone> Matcher<'a, 'b, T> {
|
||||
fn new(
|
||||
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
|
||||
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
|
||||
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token + 'a),
|
||||
) -> Self {
|
||||
Self {
|
||||
matcher_inner,
|
||||
|
@ -471,27 +411,27 @@ impl Context {
|
|||
#[inline]
|
||||
fn partial(
|
||||
&self,
|
||||
func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token<'a>> + 'static,
|
||||
) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token<'a>> + '_ {
|
||||
func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token> + 'static,
|
||||
) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token> + '_ {
|
||||
move |input| func(self, input)
|
||||
}
|
||||
|
||||
pub fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
pub fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
map(many1(self.partial(Self::full_single)), Token::Sequence)(input)
|
||||
}
|
||||
|
||||
pub fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
pub fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
map(many1(self.partial(Self::inline_single)), Token::Sequence)(input)
|
||||
}
|
||||
|
||||
pub fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
pub fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
map(
|
||||
many1(self.partial(Self::inline_label_safe_single)),
|
||||
Token::Sequence,
|
||||
)(input)
|
||||
}
|
||||
|
||||
fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
alt((
|
||||
self.partial(Self::tag_bold_italic_asterisk),
|
||||
self.partial(Self::tag_bold_italic_underscore),
|
||||
|
@ -502,7 +442,7 @@ impl Context {
|
|||
))(input)
|
||||
}
|
||||
|
||||
fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
let (input, token) = alt((
|
||||
self.partial(Self::unicode_emoji),
|
||||
alt((
|
||||
|
@ -527,12 +467,12 @@ impl Context {
|
|||
self.partial(Self::shortcode_emoji),
|
||||
self.partial(Self::link),
|
||||
self.partial(Self::raw_url),
|
||||
self.partial(Self::text),
|
||||
self.partial(Self::tag_raw_text),
|
||||
))(input)?;
|
||||
Ok((input, token))
|
||||
}
|
||||
|
||||
fn inline_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn inline_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
let (input, token) = alt((
|
||||
self.partial(Self::unicode_emoji),
|
||||
self.partial(Self::tag_small),
|
||||
|
@ -551,12 +491,12 @@ impl Context {
|
|||
self.partial(Self::shortcode_emoji),
|
||||
self.partial(Self::link),
|
||||
self.partial(Self::raw_url),
|
||||
self.partial(Self::text),
|
||||
self.partial(Self::tag_raw_text),
|
||||
))(input)?;
|
||||
Ok((input, token))
|
||||
}
|
||||
|
||||
fn inline_non_formatting_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn inline_non_formatting_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
let (input, token) = alt((
|
||||
self.partial(Self::unicode_emoji),
|
||||
self.partial(Self::url_no_embed),
|
||||
|
@ -567,12 +507,12 @@ impl Context {
|
|||
self.partial(Self::tag_hashtag),
|
||||
self.partial(Self::shortcode_emoji),
|
||||
self.partial(Self::raw_url),
|
||||
self.partial(Self::text),
|
||||
self.partial(Self::tag_raw_text),
|
||||
))(input)?;
|
||||
Ok((input, token))
|
||||
}
|
||||
|
||||
fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
let (input, token) = alt((
|
||||
self.partial(Self::unicode_emoji),
|
||||
self.partial(Self::tag_small),
|
||||
|
@ -584,12 +524,12 @@ impl Context {
|
|||
self.partial(Self::tag_strikethrough_tilde),
|
||||
self.partial(Self::tag_func),
|
||||
self.partial(Self::shortcode_emoji),
|
||||
self.partial(Self::text),
|
||||
self.partial(Self::tag_raw_text),
|
||||
))(input)?;
|
||||
Ok((input, token))
|
||||
}
|
||||
|
||||
fn tag_quote<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_quote<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
let (input, leading_spaces) = tuple((opt(line_ending), opt(line_ending)))(input)?;
|
||||
|
||||
if let (None, None) = leading_spaces {
|
||||
|
@ -625,7 +565,7 @@ impl Context {
|
|||
Ok((input, Token::Quote(Box::new(inner))))
|
||||
}
|
||||
|
||||
fn tag_block_center<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_block_center<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
let tag_start = &tag("<center>");
|
||||
let tag_end = &tag("</center>");
|
||||
|
||||
|
@ -649,7 +589,7 @@ impl Context {
|
|||
))
|
||||
}
|
||||
|
||||
fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
let delim = &tag("```");
|
||||
|
||||
let (input, _) = opt(line_ending)(input)?;
|
||||
|
@ -688,7 +628,7 @@ impl Context {
|
|||
))
|
||||
}
|
||||
|
||||
fn tag_block_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_block_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
let start = &tag("\\[");
|
||||
let end = &tag("\\]");
|
||||
|
||||
|
@ -714,7 +654,7 @@ impl Context {
|
|||
|
||||
Ok((
|
||||
input,
|
||||
Token::BlockMath(Cow::Borrowed(math_span.into_fragment())),
|
||||
Token::BlockMath(math_span.into_fragment().to_string()),
|
||||
))
|
||||
}
|
||||
|
||||
|
@ -726,7 +666,7 @@ impl Context {
|
|||
escape: bool,
|
||||
matcher: Matcher<'a, 'b, T>,
|
||||
fallback: Matcher<'a, 'b, S>,
|
||||
) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token<'b>> + '_
|
||||
) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token> + '_
|
||||
where
|
||||
FOpen: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
|
||||
FClose: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
|
||||
|
@ -739,7 +679,7 @@ impl Context {
|
|||
if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), &opening_tag))(input) {
|
||||
return Ok((
|
||||
input_escaped,
|
||||
Token::PlainText(Cow::Borrowed(mark.fragment())),
|
||||
Token::PlainText(mark.fragment().to_string().into()),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
@ -814,7 +754,7 @@ impl Context {
|
|||
input,
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText(begin.fragment_between(&post_open).into()),
|
||||
inner_tok.inner().owned(),
|
||||
inner_tok.inner(),
|
||||
Token::PlainText(closing.into_fragment().into()),
|
||||
]),
|
||||
));
|
||||
|
@ -823,7 +763,7 @@ impl Context {
|
|||
}
|
||||
}
|
||||
|
||||
fn tag_func<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_func<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
let (input, _) = tag("$[")(input)?;
|
||||
|
||||
let func_ident = |input| {
|
||||
|
@ -852,8 +792,8 @@ impl Context {
|
|||
.into_iter()
|
||||
.map(|(k, v)| {
|
||||
(
|
||||
Cow::from(k.into_fragment()),
|
||||
v.map(|(_, val)| Cow::from(val.into_fragment())),
|
||||
k.into_fragment().to_string(),
|
||||
v.map(|(_, val)| val.into_fragment().to_string()),
|
||||
)
|
||||
})
|
||||
.collect::<HashMap<_, _>>()
|
||||
|
@ -866,14 +806,14 @@ impl Context {
|
|||
Ok((
|
||||
input,
|
||||
Token::Function {
|
||||
name: Cow::from(func_name),
|
||||
name: func_name.to_string(),
|
||||
params: args_out,
|
||||
inner: Box::new(Token::Sequence(inner)),
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
fn tag_plain<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_plain<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
let opening_tag = &tag("<plain>");
|
||||
let closing_tag = &tag("</plain>");
|
||||
|
||||
|
@ -887,7 +827,7 @@ impl Context {
|
|||
Ok((input, Token::PlainTag(text.into())))
|
||||
}
|
||||
|
||||
fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
self.tag_delimited(
|
||||
tag_no_case("<small>"),
|
||||
tag_no_case("</small>"),
|
||||
|
@ -903,7 +843,7 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
self.tag_delimited(
|
||||
(tag("***"), FlankingRule::Lenient),
|
||||
(tag("***"), FlankingRule::Lenient),
|
||||
|
@ -919,7 +859,7 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
self.tag_delimited(
|
||||
(tag("___"), FlankingRule::Strict),
|
||||
(tag("___"), FlankingRule::Strict),
|
||||
|
@ -935,7 +875,7 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
self.tag_delimited(
|
||||
tag_no_case("<b>"),
|
||||
tag_no_case("</b>"),
|
||||
|
@ -951,7 +891,7 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
self.tag_delimited(
|
||||
(tag("**"), FlankingRule::Lenient),
|
||||
(tag("**"), FlankingRule::Lenient),
|
||||
|
@ -967,7 +907,7 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
self.tag_delimited(
|
||||
(tag("__"), FlankingRule::Strict),
|
||||
(tag("__"), FlankingRule::Strict),
|
||||
|
@ -983,7 +923,7 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
self.tag_delimited(
|
||||
tag_no_case("<i>"),
|
||||
tag_no_case("</i>"),
|
||||
|
@ -999,7 +939,7 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
self.tag_delimited(
|
||||
(tag("*"), FlankingRule::Lenient),
|
||||
(tag("*"), FlankingRule::Lenient),
|
||||
|
@ -1015,7 +955,7 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
self.tag_delimited(
|
||||
(tag("_"), FlankingRule::Strict),
|
||||
(tag("_"), FlankingRule::Strict),
|
||||
|
@ -1031,7 +971,7 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
self.tag_delimited(
|
||||
tag_no_case("<s>"),
|
||||
tag_no_case("</s>"),
|
||||
|
@ -1047,7 +987,7 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
self.tag_delimited(
|
||||
(tag("~~"), FlankingRule::Lenient),
|
||||
(tag("~~"), FlankingRule::Lenient),
|
||||
|
@ -1076,7 +1016,7 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
self.tag_delimited(
|
||||
tag("`"),
|
||||
|input| alt((tag("`"), tag("´")))(input),
|
||||
|
@ -1094,7 +1034,7 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
self.tag_delimited(
|
||||
tag("\\("),
|
||||
tag("\\)"),
|
||||
|
@ -1109,12 +1049,12 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
fn text<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
let (input, text) = map(recognize(anychar), Span::into_fragment)(input)?;
|
||||
Ok((input, Token::PlainText(text.into())))
|
||||
fn tag_raw_text<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
let (input, text) = anychar(input)?;
|
||||
Ok((input, Token::PlainText(text.to_compact_string())))
|
||||
}
|
||||
|
||||
fn raw_url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn raw_url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
let (input, url_span) = recognize(tuple((
|
||||
protocol,
|
||||
url_chars(|input| not(url_chars_base)(input), false),
|
||||
|
@ -1130,21 +1070,21 @@ impl Context {
|
|||
url
|
||||
};
|
||||
|
||||
Ok((input, Token::UrlRaw(Cow::from(final_url))))
|
||||
Ok((input, Token::UrlRaw(final_url.to_string())))
|
||||
}
|
||||
|
||||
fn url_no_embed<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn url_no_embed<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
let (input, _) = tag("<")(input)?;
|
||||
let (input, url_span) = recognize(tuple((protocol, url_chars(tag(">"), true))))(input)?;
|
||||
let (input, _) = tag(">")(input)?;
|
||||
|
||||
Ok((
|
||||
input,
|
||||
Token::UrlNoEmbed(Cow::from(url_span.into_fragment())),
|
||||
Token::UrlNoEmbed(url_span.into_fragment().to_string()),
|
||||
))
|
||||
}
|
||||
|
||||
fn link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
let (input, no_embed) = opt(tag("?"))(input)?;
|
||||
let (input, _) = tag("[")(input)?;
|
||||
let (input, _) = not(tag("["))(input)?;
|
||||
|
@ -1163,7 +1103,7 @@ impl Context {
|
|||
))
|
||||
}
|
||||
|
||||
fn unicode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn unicode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
let frag = input.fragment();
|
||||
let Some(grapheme) = frag.graphemes(true).next() else {
|
||||
return fail(input);
|
||||
|
@ -1183,7 +1123,7 @@ impl Context {
|
|||
))
|
||||
}
|
||||
|
||||
fn shortcode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn shortcode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
if let (plain_out, Some(plain)) = map(
|
||||
opt(recognize(tuple((
|
||||
alphanumeric1_unicode,
|
||||
|
@ -1209,7 +1149,7 @@ impl Context {
|
|||
Ok((input, Token::ShortcodeEmoji(shortcode.into())))
|
||||
}
|
||||
|
||||
fn tag_mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
if let (plain_out, Some(plain)) = map(
|
||||
opt(recognize(tuple((
|
||||
alt((tag("\\"), alphanumeric1_unicode)),
|
||||
|
@ -1257,7 +1197,7 @@ impl Context {
|
|||
))
|
||||
}
|
||||
|
||||
fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||
// TODO: Skip when preceded by alphanumerics
|
||||
|
||||
let (input, _) = tag("#")(input)?;
|
||||
|
@ -1319,11 +1259,10 @@ fn url_chars<'a, T: 'a>(
|
|||
mod test {
|
||||
use crate::{url_chars, Context, Span, Token};
|
||||
use nom::bytes::complete::tag;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
|
||||
fn parse_full(string: &str) -> Token {
|
||||
Context.full(Span::new(string)).unwrap().1.merged().owned()
|
||||
Context.full(Span::new(string)).unwrap().1.merged()
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -1795,7 +1734,7 @@ text</center>"#
|
|||
Token::Sequence(
|
||||
vec!["🥺", "💜", "❤️", "🦊"]
|
||||
.into_iter()
|
||||
.map(<&str as Into<Cow<_>>>::into)
|
||||
.map(str::to_string)
|
||||
.map(Token::UnicodeEmoji)
|
||||
.collect::<Vec<_>>()
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue