Switch towards recursive ascent
This commit is contained in:
parent
f71429bfe0
commit
9f62c72f29
|
@ -435,12 +435,6 @@ dependencies = [
|
|||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bytecount"
|
||||
version = "0.6.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.5.0"
|
||||
|
@ -632,7 +626,6 @@ dependencies = [
|
|||
"itoa",
|
||||
"rustversion",
|
||||
"ryu",
|
||||
"serde",
|
||||
"static_assertions",
|
||||
]
|
||||
|
||||
|
@ -2054,13 +2047,11 @@ dependencies = [
|
|||
name = "magnetar_mmm_parser"
|
||||
version = "0.3.0-alpha"
|
||||
dependencies = [
|
||||
"compact_str",
|
||||
"either",
|
||||
"emojis",
|
||||
"nom",
|
||||
"nom_locate",
|
||||
"quick-xml",
|
||||
"serde",
|
||||
"smallvec",
|
||||
"strum",
|
||||
"tracing",
|
||||
"unicode-segmentation",
|
||||
|
@ -2325,17 +2316,6 @@ dependencies = [
|
|||
"minimal-lexical",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nom_locate"
|
||||
version = "4.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e3c83c053b0713da60c5b8de47fe8e494fe3ece5267b2f23090a07a053ba8f3"
|
||||
dependencies = [
|
||||
"bytecount",
|
||||
"memchr",
|
||||
"nom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nu-ansi-term"
|
||||
version = "0.46.0"
|
||||
|
|
|
@ -11,10 +11,8 @@ xml = ["dep:quick-xml"]
|
|||
[dependencies]
|
||||
either = { workspace = true }
|
||||
emojis = { workspace = true }
|
||||
nom = { workspace = true }
|
||||
nom_locate = { workspace = true }
|
||||
compact_str = { workspace = true, features = ["serde"] }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
smallvec = { workspace = true }
|
||||
strum = { workspace = true, features = ["derive"] }
|
||||
tracing = { workspace = true }
|
||||
unicode-segmentation = { workspace = true }
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,261 @@
|
|||
use either::Either;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use strum::IntoStaticStr;
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, Eq, PartialEq)]
|
||||
pub enum Token<'a> {
|
||||
PlainText(Cow<'a, str>),
|
||||
Sequence(Vec<Token<'a>>),
|
||||
Quote(Vec<Token<'a>>),
|
||||
Small(Vec<Token<'a>>),
|
||||
BoldItalic(Vec<Token<'a>>),
|
||||
Bold(Vec<Token<'a>>),
|
||||
Italic(Vec<Token<'a>>),
|
||||
Center(Vec<Token<'a>>),
|
||||
Strikethrough(Vec<Token<'a>>),
|
||||
PlainTag(String),
|
||||
InlineCode(String),
|
||||
InlineMath(String),
|
||||
UrlRaw(String),
|
||||
UrlNoEmbed(String),
|
||||
Link {
|
||||
label: Vec<Token<'a>>,
|
||||
href: String,
|
||||
},
|
||||
LinkNoEmbed {
|
||||
label: Vec<Token<'a>>,
|
||||
href: String,
|
||||
},
|
||||
BlockCode {
|
||||
lang: Option<String>,
|
||||
inner: String,
|
||||
},
|
||||
BlockMath(String),
|
||||
Function {
|
||||
name: String,
|
||||
params: HashMap<String, Option<String>>,
|
||||
inner: Vec<Token<'a>>,
|
||||
},
|
||||
Mention {
|
||||
name: String,
|
||||
host: Option<String>,
|
||||
mention_type: MentionType,
|
||||
},
|
||||
UnicodeEmoji(String),
|
||||
ShortcodeEmoji {
|
||||
shortcode: String,
|
||||
host: Option<String>,
|
||||
},
|
||||
Hashtag(String),
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq, Deserialize, Serialize, IntoStaticStr)]
|
||||
// The alternative would be to implement a serde serializer for this one enum, but that's disgusting
|
||||
#[strum(serialize_all = "snake_case")]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum MentionType {
|
||||
Community,
|
||||
User,
|
||||
MatrixUser,
|
||||
}
|
||||
|
||||
impl MentionType {
|
||||
pub fn to_char(&self) -> char {
|
||||
match self {
|
||||
MentionType::Community => '!',
|
||||
MentionType::User => '@',
|
||||
MentionType::MatrixUser => ':',
|
||||
}
|
||||
}
|
||||
|
||||
pub fn separator(&self) -> char {
|
||||
match self {
|
||||
MentionType::Community | MentionType::User => '@',
|
||||
MentionType::MatrixUser => ':',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl Token<'_> {
|
||||
fn str_content_left(&self) -> Option<&str> {
|
||||
match self {
|
||||
Token::PlainText(text) => Some(text.as_ref()),
|
||||
Token::Sequence(tokens) => tokens.first().and_then(Token::str_content_left),
|
||||
Token::Quote(inner) => inner.str_content_left(),
|
||||
Token::Small(inner) => inner.str_content_left(),
|
||||
Token::Bold(inner) => inner.str_content_left(),
|
||||
Token::Italic(inner) => inner.str_content_left(),
|
||||
Token::Center(inner) => inner.str_content_left(),
|
||||
Token::Strikethrough(inner) => inner.str_content_left(),
|
||||
Token::PlainTag(tag) => Some(tag.as_ref()),
|
||||
Token::UrlRaw(url) => Some(url.as_ref()),
|
||||
Token::UrlNoEmbed(url) => Some(url.as_ref()),
|
||||
Token::Link { label, .. } => label.str_content_left(),
|
||||
Token::Function { inner, .. } => inner.str_content_left(),
|
||||
Token::Mention { name, .. } => Some(name.as_ref()),
|
||||
Token::UnicodeEmoji(code) => Some(code.as_ref()),
|
||||
Token::Hashtag(tag) => Some(tag.as_ref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn str_content_right(&self) -> Option<&str> {
|
||||
match self {
|
||||
Token::PlainText(text) => Some(text.as_ref()),
|
||||
Token::Sequence(tokens) => tokens.last().and_then(Token::str_content_right),
|
||||
Token::Quote(inner) => inner.str_content_right(),
|
||||
Token::Small(inner) => inner.str_content_right(),
|
||||
Token::Bold(inner) => inner.str_content_right(),
|
||||
Token::Italic(inner) => inner.str_content_right(),
|
||||
Token::Center(inner) => inner.str_content_right(),
|
||||
Token::Strikethrough(inner) => inner.str_content_right(),
|
||||
Token::PlainTag(tag) => Some(tag.as_ref()),
|
||||
Token::UrlRaw(url) => Some(url.as_ref()),
|
||||
Token::UrlNoEmbed(url) => Some(url.as_ref()),
|
||||
Token::Link { label, .. } => label.str_content_right(),
|
||||
Token::Function { inner, .. } => inner.str_content_right(),
|
||||
Token::Mention { name, .. } => Some(name.as_ref()),
|
||||
Token::UnicodeEmoji(code) => Some(code.as_ref()),
|
||||
Token::Hashtag(tag) => Some(tag.as_ref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn inner(&self) -> Token {
|
||||
match self {
|
||||
plain @ Token::PlainText(_) => plain.clone(),
|
||||
sequence @ Token::Sequence(_) => sequence.clone(),
|
||||
Token::Quote(inner) => inner.inner(),
|
||||
Token::Small(inner) => inner.inner(),
|
||||
Token::Bold(inner) => inner.inner(),
|
||||
Token::Italic(inner) => inner.inner(),
|
||||
Token::Center(inner) => inner.inner(),
|
||||
Token::Strikethrough(inner) => inner.inner(),
|
||||
Token::PlainTag(text) => Token::PlainText(text.clone().into()),
|
||||
Token::InlineCode(code) => Token::PlainText(code.clone().into()),
|
||||
Token::InlineMath(math) => Token::PlainText(math.clone().into()),
|
||||
Token::UrlRaw(url) => Token::PlainText(url.clone().into()),
|
||||
Token::UrlNoEmbed(url) => Token::PlainText(url.clone().into()),
|
||||
Token::Link { label, .. } => label.inner(),
|
||||
Token::BlockCode { inner, .. } => Token::PlainText(inner.clone().into()),
|
||||
Token::BlockMath(math) => Token::PlainText(math.clone().into()),
|
||||
Token::Function { inner, .. } => inner.inner(),
|
||||
Token::Mention { name, .. } => Token::PlainText(name.clone().into()),
|
||||
Token::UnicodeEmoji(code) => Token::PlainText(code.clone().into()),
|
||||
Token::ShortcodeEmoji { shortcode, .. } => Token::PlainText(shortcode.clone().into()),
|
||||
Token::Hashtag(tag) => Token::PlainText(tag.clone().into()),
|
||||
}
|
||||
}
|
||||
|
||||
fn merged(&self) -> Token {
|
||||
match self {
|
||||
Token::Sequence(tokens) => {
|
||||
let tokens_multi = tokens.iter().fold(Vec::new(), |mut acc, tok| {
|
||||
if let Some(Token::PlainText(last)) = acc.last_mut() {
|
||||
if let Token::PlainText(tok_text) = tok {
|
||||
*last += tok_text.as_ref();
|
||||
|
||||
return acc;
|
||||
}
|
||||
}
|
||||
|
||||
if let Token::Sequence(seq) = tok {
|
||||
let items = seq.iter().map(Token::merged).flat_map(|t| match t {
|
||||
Token::Sequence(seq) => Either::Left(seq.into_iter()),
|
||||
other => Either::Right(std::iter::once(other)),
|
||||
});
|
||||
|
||||
for item in items {
|
||||
if let Some(Token::PlainText(last)) = acc.last_mut() {
|
||||
if let Token::PlainText(tok_text) = item {
|
||||
*last += tok_text.as_ref();
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
acc.push(item);
|
||||
}
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
acc.push(tok.merged());
|
||||
acc
|
||||
});
|
||||
|
||||
if tokens_multi.len() == 1 {
|
||||
return tokens_multi.into_iter().next().unwrap();
|
||||
}
|
||||
|
||||
Token::Sequence(tokens_multi)
|
||||
}
|
||||
Token::Quote(inner) => Token::Quote(Box::new(inner.merged())),
|
||||
Token::Small(inner) => Token::Small(Box::new(inner.merged())),
|
||||
Token::Bold(inner) => Token::Bold(Box::new(inner.merged())),
|
||||
Token::Italic(inner) => Token::Italic(Box::new(inner.merged())),
|
||||
Token::Center(inner) => Token::Center(Box::new(inner.merged())),
|
||||
Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.merged())),
|
||||
Token::Link { label, href } => Token::Link {
|
||||
label: Box::new(label.merged()),
|
||||
href: href.clone(),
|
||||
},
|
||||
Token::LinkNoEmbed { label, href } => Token::LinkNoEmbed {
|
||||
label: Box::new(label.merged()),
|
||||
href: href.clone(),
|
||||
},
|
||||
Token::Function {
|
||||
name,
|
||||
params,
|
||||
inner,
|
||||
} => Token::Function {
|
||||
name: name.clone(),
|
||||
params: params.clone(),
|
||||
inner: Box::new(inner.merged()),
|
||||
},
|
||||
other => other.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn walk_map_collect<T>(&self, func: &impl Fn(&Token) -> Option<T>, out: &mut Vec<T>) {
|
||||
if let Some(v) = func(self) {
|
||||
out.push(v)
|
||||
}
|
||||
|
||||
match self {
|
||||
Token::Sequence(items) => {
|
||||
items.iter().for_each(|tok| tok.walk_map_collect(func, out));
|
||||
}
|
||||
Token::Quote(inner)
|
||||
| Token::Small(inner)
|
||||
| Token::Bold(inner)
|
||||
| Token::Italic(inner)
|
||||
| Token::Center(inner)
|
||||
| Token::Function { inner, .. }
|
||||
| Token::Link { label: inner, .. }
|
||||
| Token::Strikethrough(inner) => inner.walk_map_collect(func, out),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn walk_speech_transform(&mut self, func: &impl Fn(&mut Cow<'_, str>)) {
|
||||
match self {
|
||||
Token::Sequence(items) => {
|
||||
items
|
||||
.iter_mut()
|
||||
.for_each(|tok| tok.walk_speech_transform(func));
|
||||
}
|
||||
Token::Small(inner)
|
||||
| Token::Bold(inner)
|
||||
| Token::Italic(inner)
|
||||
| Token::Center(inner)
|
||||
| Token::Function { inner, .. }
|
||||
| Token::Strikethrough(inner) => inner.walk_speech_transform(func),
|
||||
Token::PlainText(text) => func(text),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,157 @@
|
|||
use crate::types::{Effect, Input, Parser, ParserCont, ParserRet, State};
|
||||
|
||||
fn line_start<'a>(
|
||||
state: &mut State,
|
||||
inp: &mut impl Input<'a>,
|
||||
_output: &'_ mut impl FnMut(Effect<'a>),
|
||||
cont: impl ParserCont,
|
||||
) -> ParserRet {
|
||||
match inp.view().as_bytes() {
|
||||
[b'>', b' ', ..] => cont.continue_with2((line_start, quote)),
|
||||
[b'`', b'`', b'`', ..] => cont.continue_with(CodeBlock {}),
|
||||
[b'\\', b'[', ..] => cont.continue_with(BlockMath {}),
|
||||
[b'<', b'c', b'e', b'n', b't', b'e', b'r', b'>', ..] => cont.continue_with2((inline, center_tag_end)),
|
||||
_ => cont.continue_with(inline)
|
||||
}
|
||||
}
|
||||
|
||||
fn inline<'a>(
|
||||
state: &mut State,
|
||||
inp: &mut impl Input<'a>,
|
||||
_output: &'_ mut impl FnMut(Effect<'a>),
|
||||
cont: impl ParserCont,
|
||||
) -> ParserRet {
|
||||
match inp.view().as_bytes() {
|
||||
[b'\n', ..] => return cont.continue_with(line_start),
|
||||
[b'<', b'b', b'>', ..] => return cont.continue_with(inline),
|
||||
[b'<', b's', b'>', ..] => return cont.continue_with(inline),
|
||||
[b'<', b'i', b'>', ..] => return cont.continue_with(inline),
|
||||
[b'<', b'p', b'l', b'a', b'i', b'n', b'>', ..] => return cont.continue_with(inline),
|
||||
[b'<', b's', b'm', b'a', b'l', b'l', b'>', ..] => return cont.continue_with(inline),
|
||||
[b'*', b'*', ..] => return cont.continue_with(inline),
|
||||
[b'_', b'_', ..] => return cont.continue_with(inline),
|
||||
[b'*', ..] => return cont.continue_with(inline),
|
||||
[b'_', ..] => return cont.continue_with(inline),
|
||||
[b'~', b'~', ..] => return cont.continue_with(inline),
|
||||
[b'`', ..] => return cont.continue_with(inline),
|
||||
[b'\\', b'(', ..] => return cont.continue_with(inline),
|
||||
};
|
||||
}
|
||||
|
||||
fn text_or_emoji<'a>(
|
||||
state: &mut State,
|
||||
input: &mut impl Input<'a>,
|
||||
_output: &'_ mut impl FnMut(Effect<'a>),
|
||||
cont: impl ParserCont,
|
||||
) -> ParserRet {
|
||||
let Some(view) = input.next() else {
|
||||
return;
|
||||
};
|
||||
|
||||
let emoji_str = view.trim_end_matches(['\u{200c}', '\u{200d}']);
|
||||
if let Some(_) = emojis::get(emoji_str) {
|
||||
output(Effect::Output(emoji_str));
|
||||
return;
|
||||
};
|
||||
|
||||
output(Effect::Output(view));
|
||||
}
|
||||
|
||||
fn block_quote_end<'a>(
|
||||
state: &mut State,
|
||||
inp: &mut impl Input<'a>,
|
||||
_output: &'_ mut impl FnMut(Effect<'a>),
|
||||
cont: impl ParserCont,
|
||||
) -> ParserRet {}
|
||||
|
||||
fn code_block_end<'a>(
|
||||
state: &mut State,
|
||||
inp: &mut impl Input<'a>,
|
||||
_output: &'_ mut impl FnMut(Effect<'a>),
|
||||
cont: impl ParserCont,
|
||||
) -> ParserRet {}
|
||||
|
||||
fn block_math_end<'a>(
|
||||
state: &mut State,
|
||||
inp: &mut impl Input<'a>,
|
||||
_output: &'_ mut impl FnMut(Effect<'a>),
|
||||
cont: impl ParserCont,
|
||||
) -> ParserRet {}
|
||||
|
||||
|
||||
fn center_tag_end<'a>(
|
||||
state: &mut State,
|
||||
inp: &mut impl Input<'a>,
|
||||
_output: &'_ mut impl FnMut(Effect<'a>),
|
||||
cont: impl ParserCont,
|
||||
) -> ParserRet {}
|
||||
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
enum TagInlineKind {
|
||||
TagSmall,
|
||||
TagPlain,
|
||||
TagBold,
|
||||
TagItalic,
|
||||
TagStrikethrough,
|
||||
}
|
||||
|
||||
struct TagInline {
|
||||
kind: TagInlineKind,
|
||||
}
|
||||
|
||||
impl Parser for TagInline {}
|
||||
|
||||
|
||||
fn inline_math_end<'a>(
|
||||
state: &mut State,
|
||||
inp: &mut impl Input<'a>,
|
||||
_output: &'_ mut impl FnMut(Effect<'a>),
|
||||
cont: impl ParserCont,
|
||||
) -> ParserRet {}
|
||||
|
||||
|
||||
fn inline_code_end<'a>(
|
||||
state: &mut State,
|
||||
inp: &mut impl Input<'a>,
|
||||
_output: &'_ mut impl FnMut(Effect<'a>),
|
||||
cont: impl ParserCont,
|
||||
) -> ParserRet {}
|
||||
|
||||
|
||||
struct Url {}
|
||||
|
||||
impl Parser for Url {
|
||||
fn take<'a>(
|
||||
&mut self,
|
||||
state: State,
|
||||
input: &mut impl Input<'a>,
|
||||
output: &'_ mut impl FnMut(Effect<'a>),
|
||||
) -> impl Parser {}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn url_chars_base<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
|
||||
alt((
|
||||
recognize(tuple((
|
||||
tag("["),
|
||||
many_till(
|
||||
self.increase_nesting(self.partial_span(Self::url_chars_base)),
|
||||
tag("]"),
|
||||
),
|
||||
))),
|
||||
recognize(tuple((
|
||||
tag("("),
|
||||
many_till(
|
||||
self.increase_nesting(self.partial_span(Self::url_chars_base)),
|
||||
tag(")"),
|
||||
),
|
||||
))),
|
||||
recognize(tuple((
|
||||
not(satisfy(char::is_control)),
|
||||
not(satisfy(char::is_whitespace)),
|
||||
not(one_of(")]>")),
|
||||
anychar,
|
||||
))),
|
||||
))(input)
|
||||
}
|
|
@ -1,17 +1,8 @@
|
|||
#![cfg(test)]
|
||||
use std::collections::HashMap;
|
||||
|
||||
use nom::bytes::complete::tag;
|
||||
|
||||
use crate::{xml_write::to_xml_string, Context, Span, SpanMeta, Token, DEFAULT_DEPTH_LIMIT};
|
||||
|
||||
fn parse_full(string: &str) -> Token {
|
||||
Context::default()
|
||||
.full(Span::new_extra(string, SpanMeta::default()))
|
||||
.unwrap()
|
||||
.1
|
||||
.merged()
|
||||
}
|
||||
use crate::output_types::{MentionType, Token};
|
||||
use crate::{parse_full, xml_write::to_xml_string};
|
||||
|
||||
#[test]
|
||||
fn parse_empty() {
|
||||
|
@ -27,9 +18,9 @@ fn parse_url_chars() {
|
|||
"https://en.wikipedia.org/wiki/Sandbox_(computer_security))",
|
||||
SpanMeta::default(),
|
||||
))
|
||||
.unwrap()
|
||||
.1
|
||||
.into_fragment(),
|
||||
.unwrap()
|
||||
.1
|
||||
.into_fragment(),
|
||||
"https://en.wikipedia.org/wiki/Sandbox_(computer_security)"
|
||||
);
|
||||
|
||||
|
@ -60,9 +51,9 @@ fn parse_url_chars() {
|
|||
"https://cs.wikipedia.org/wiki/Among Us )",
|
||||
SpanMeta::default(),
|
||||
))
|
||||
.unwrap()
|
||||
.1
|
||||
.into_fragment(),
|
||||
.unwrap()
|
||||
.1
|
||||
.into_fragment(),
|
||||
"https://cs.wikipedia.org/wiki/Among Us"
|
||||
);
|
||||
|
||||
|
@ -71,9 +62,9 @@ fn parse_url_chars() {
|
|||
"https://en.wikipedia.org/wiki/Among Us )",
|
||||
SpanMeta::default(),
|
||||
))
|
||||
.unwrap()
|
||||
.1
|
||||
.into_fragment(),
|
||||
.unwrap()
|
||||
.1
|
||||
.into_fragment(),
|
||||
"https://en.wikipedia.org/wiki/Among"
|
||||
);
|
||||
}
|
||||
|
@ -82,17 +73,17 @@ fn parse_url_chars() {
|
|||
fn parse_formatting() {
|
||||
assert_eq!(
|
||||
parse_full(r#"~~stikethrough~~"#),
|
||||
Token::Strikethrough(Box::new(Token::PlainText("stikethrough".into()))),
|
||||
Token::Strikethrough(vec![Token::PlainText("stikethrough".into())]),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"**bold**"#),
|
||||
Token::Bold(Box::new(Token::PlainText("bold".into()))),
|
||||
Token::Bold(vec![Token::PlainText("bold".into())]),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"*italic*"#),
|
||||
Token::Italic(Box::new(Token::PlainText("italic".into()))),
|
||||
Token::Italic(vec![Token::PlainText("italic".into())]),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
|
@ -109,7 +100,7 @@ fn parse_formatting() {
|
|||
parse_full("intra*word*italic"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("intra".into()),
|
||||
Token::Italic(Box::new(Token::PlainText("word".into()))),
|
||||
Token::Italic(vec![Token::PlainText("word".into())]),
|
||||
Token::PlainText("italic".into()),
|
||||
])
|
||||
);
|
||||
|
@ -123,13 +114,13 @@ fn parse_formatting() {
|
|||
parse_full(r#"long text with a *footnote <b>text</b>"#),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("long text with a *footnote ".into()),
|
||||
Token::Bold(Box::new(Token::PlainText("text".into()))),
|
||||
Token::Bold(vec![Token::PlainText("text".into())]),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"*"italic"*"#),
|
||||
Token::Italic(Box::new(Token::PlainText("\"italic\"".into())))
|
||||
Token::Italic(vec![Token::PlainText("\"italic\"".into())])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
|
@ -161,23 +152,23 @@ fn parse_formatting() {
|
|||
|
||||
assert_eq!(
|
||||
parse_full(r#"***bold italic***"#),
|
||||
Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
|
||||
Token::Bold(vec![Token::Italic(vec![Token::PlainText(
|
||||
"bold italic".into()
|
||||
)))))
|
||||
)])])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"<b><i>bold italic</i></b>"#),
|
||||
Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
|
||||
Token::Bold(vec![Token::Italic(vec![Token::PlainText(
|
||||
"bold italic".into()
|
||||
)))))
|
||||
)])])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("~~*hello\nworld*"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("~~".into()),
|
||||
Token::Italic(Box::new(Token::PlainText("hello\nworld".into()))),
|
||||
Token::Italic(vec![Token::PlainText("hello\nworld".into())]),
|
||||
])
|
||||
)
|
||||
}
|
||||
|
@ -188,7 +179,7 @@ fn parse_flanking() {
|
|||
parse_full(r#"aaa*iii*bbb"#),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("aaa".into()),
|
||||
Token::Italic(Box::new(Token::PlainText("iii".into()))),
|
||||
Token::Italic(vec![Token::PlainText("iii".into())]),
|
||||
Token::PlainText("bbb".into()),
|
||||
])
|
||||
);
|
||||
|
@ -202,33 +193,33 @@ fn parse_flanking() {
|
|||
parse_full("aaa\n_iii_\nbbb"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("aaa\n".into()),
|
||||
Token::Italic(Box::new(Token::PlainText("iii".into()))),
|
||||
Token::Italic(vec![Token::PlainText("iii".into())]),
|
||||
Token::PlainText("\nbbb".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"*iii*"#),
|
||||
Token::Italic(Box::new(Token::PlainText("iii".into())))
|
||||
Token::Italic(vec![Token::PlainText("iii".into())])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"_iii_"#),
|
||||
Token::Italic(Box::new(Token::PlainText("iii".into())))
|
||||
Token::Italic(vec![Token::PlainText("iii".into())])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"aaa*iii*"#),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("aaa".into()),
|
||||
Token::Italic(Box::new(Token::PlainText("iii".into()))),
|
||||
Token::Italic(vec![Token::PlainText("iii".into())]),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"*iii*bbb"#),
|
||||
Token::Sequence(vec![
|
||||
Token::Italic(Box::new(Token::PlainText("iii".into()))),
|
||||
Token::Italic(vec![Token::PlainText("iii".into())]),
|
||||
Token::PlainText("bbb".into()),
|
||||
])
|
||||
);
|
||||
|
@ -309,12 +300,12 @@ a^2 + b^2 = c^2
|
|||
🦋🏳️⚧️
|
||||
text</center>"#
|
||||
),
|
||||
Token::Center(Box::new(Token::Sequence(vec![
|
||||
Token::Center(vec![
|
||||
Token::PlainText("centered\n".into()),
|
||||
Token::UnicodeEmoji("🦋".into()),
|
||||
Token::UnicodeEmoji("🏳️⚧️".into()),
|
||||
Token::PlainText("\ntext".into()),
|
||||
])))
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
|
@ -323,11 +314,11 @@ a^2 + b^2 = c^2
|
|||
> 👩🏽🤝👩🏼
|
||||
> text</center>"#
|
||||
),
|
||||
Token::Quote(Box::new(Token::Center(Box::new(Token::Sequence(vec![
|
||||
Token::Quote(vec![Token::Center(vec![
|
||||
Token::PlainText("centered\n".into()),
|
||||
Token::UnicodeEmoji("👩🏽🤝👩🏼".into()),
|
||||
Token::PlainText("\ntext".into())
|
||||
]))))),
|
||||
])]),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
|
@ -335,11 +326,11 @@ a^2 + b^2 = c^2
|
|||
Token::Function {
|
||||
name: "x2".into(),
|
||||
params: HashMap::new(),
|
||||
inner: Box::new(Token::Sequence(vec![
|
||||
inner: vec![
|
||||
Token::Function {
|
||||
name: "sparkle".into(),
|
||||
params: HashMap::new(),
|
||||
inner: Box::new(Token::UnicodeEmoji("🥺".into())),
|
||||
inner: vec![Token::UnicodeEmoji("🥺".into())],
|
||||
},
|
||||
Token::UnicodeEmoji("💜".into()),
|
||||
Token::Function {
|
||||
|
@ -350,10 +341,10 @@ a^2 + b^2 = c^2
|
|||
params.insert("speed".into(), Some("5s".into()));
|
||||
params
|
||||
},
|
||||
inner: Box::new(Token::UnicodeEmoji("❤️".into())),
|
||||
inner: vec![Token::UnicodeEmoji("❤️".into())],
|
||||
},
|
||||
Token::UnicodeEmoji("🦊".into()),
|
||||
]))
|
||||
]
|
||||
},
|
||||
);
|
||||
|
||||
|
@ -362,13 +353,13 @@ a^2 + b^2 = c^2
|
|||
Token::Sequence(vec![
|
||||
Token::PlainText("<b>bold ".into()),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::User,
|
||||
mention_type: MentionType::User,
|
||||
name: "tag1".into(),
|
||||
host: None
|
||||
},
|
||||
Token::PlainText(" <i> ".into()),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::User,
|
||||
mention_type: MentionType::User,
|
||||
name: "tag2".into(),
|
||||
host: None
|
||||
},
|
||||
|
@ -386,11 +377,11 @@ a^2 + b^2 = c^2
|
|||
>> Nested quote
|
||||
"#
|
||||
),
|
||||
Token::Quote(Box::new(Token::Sequence(vec![
|
||||
Token::Quote(vec![
|
||||
Token::PlainText("test\n".into()),
|
||||
Token::Italic(Box::new(Token::PlainText("\nitalic\n".into()))),
|
||||
Token::Quote(Box::new(Token::PlainText("Nested quote".into())))
|
||||
]))),
|
||||
Token::Italic(vec![Token::PlainText("\nitalic\n".into())]),
|
||||
Token::Quote(vec![Token::PlainText("Nested quote".into())])
|
||||
]),
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -442,9 +433,8 @@ fn parse_link() {
|
|||
Token::Sequence(vec![
|
||||
Token::PlainText("Link test: ".into()),
|
||||
Token::Link {
|
||||
label: Box::new(Token::PlainText("label".into())),
|
||||
href: "https://example.com".into(),
|
||||
embed: true,
|
||||
label: vec![Token::PlainText("label".into())],
|
||||
href: "https://example.com".into()
|
||||
},
|
||||
])
|
||||
);
|
||||
|
@ -481,10 +471,9 @@ fn parse_link() {
|
|||
parse_full("Link test: ?[label](https://awawa.gay)"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("Link test: ".into()),
|
||||
Token::Link {
|
||||
label: Box::new(Token::PlainText("label".into())),
|
||||
Token::LinkNoEmbed {
|
||||
label: vec![Token::PlainText("label".into())],
|
||||
href: "https://awawa.gay".into(),
|
||||
embed: false,
|
||||
},
|
||||
])
|
||||
);
|
||||
|
@ -493,10 +482,9 @@ fn parse_link() {
|
|||
parse_full("Link test: ?[label](https://awawa.gay)test"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("Link test: ".into()),
|
||||
Token::Link {
|
||||
label: Box::new(Token::PlainText("label".into())),
|
||||
Token::LinkNoEmbed {
|
||||
label: vec![Token::PlainText("label".into())],
|
||||
href: "https://awawa.gay".into(),
|
||||
embed: false,
|
||||
},
|
||||
Token::PlainText("test".into()),
|
||||
])
|
||||
|
@ -506,10 +494,9 @@ fn parse_link() {
|
|||
parse_full("Link test: (?[label](https://awawa.gay))"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("Link test: (".into()),
|
||||
Token::Link {
|
||||
label: Box::new(Token::PlainText("label".into())),
|
||||
Token::LinkNoEmbed {
|
||||
label: vec![Token::PlainText("label".into())],
|
||||
href: "https://awawa.gay".into(),
|
||||
embed: false,
|
||||
},
|
||||
Token::PlainText(")".into()),
|
||||
])
|
||||
|
@ -546,7 +533,7 @@ fn parse_mention() {
|
|||
assert_eq!(
|
||||
parse_full("@tag"),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::User,
|
||||
mention_type: MentionType::User,
|
||||
name: "tag".into(),
|
||||
host: None,
|
||||
}
|
||||
|
@ -562,7 +549,7 @@ fn parse_mention() {
|
|||
Token::Sequence(vec![
|
||||
Token::PlainText("hgsjlkdsa ".into()),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::User,
|
||||
mention_type: MentionType::User,
|
||||
name: "tag".into(),
|
||||
host: None,
|
||||
},
|
||||
|
@ -575,7 +562,7 @@ fn parse_mention() {
|
|||
Token::Sequence(vec![
|
||||
Token::PlainText("hgsjlkdsa ".into()),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::User,
|
||||
mention_type: MentionType::User,
|
||||
name: "tag".into(),
|
||||
host: None,
|
||||
},
|
||||
|
@ -588,7 +575,7 @@ fn parse_mention() {
|
|||
Token::Sequence(vec![
|
||||
Token::PlainText("aaaa ".into()),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::User,
|
||||
mention_type: MentionType::User,
|
||||
name: "tag".into(),
|
||||
host: Some("domain".into()),
|
||||
},
|
||||
|
@ -601,7 +588,7 @@ fn parse_mention() {
|
|||
Token::Sequence(vec![
|
||||
Token::PlainText("test ".into()),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::User,
|
||||
mention_type: MentionType::User,
|
||||
name: "tag".into(),
|
||||
host: Some("domain".into()),
|
||||
},
|
||||
|
@ -614,7 +601,7 @@ fn parse_mention() {
|
|||
Token::Sequence(vec![
|
||||
Token::PlainText("test ".into()),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::User,
|
||||
mention_type: MentionType::User,
|
||||
name: "tag".into(),
|
||||
host: Some("domain.gay".into()),
|
||||
},
|
||||
|
@ -627,7 +614,7 @@ fn parse_mention() {
|
|||
Token::Sequence(vec![
|
||||
Token::PlainText("test ".into()),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::User,
|
||||
mention_type: MentionType::User,
|
||||
name: "tag".into(),
|
||||
host: Some("domain".into()),
|
||||
},
|
||||
|
@ -640,7 +627,7 @@ fn parse_mention() {
|
|||
Token::Sequence(vec![
|
||||
Token::PlainText("test ".into()),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::Community,
|
||||
mention_type: MentionType::Community,
|
||||
name: "tag".into(),
|
||||
host: Some("domain.com".into()),
|
||||
},
|
||||
|
@ -651,7 +638,7 @@ fn parse_mention() {
|
|||
assert_eq!(
|
||||
parse_full("@tag:domain.com"),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::MatrixUser,
|
||||
mention_type: MentionType::MatrixUser,
|
||||
name: "tag".into(),
|
||||
host: Some("domain.com".into())
|
||||
},
|
||||
|
@ -758,20 +745,10 @@ fn xml_serialization() {
|
|||
&to_xml_string(&parse_full(
|
||||
"@natty $[spin.speed=0.5s 🥺]:cat_attack: <plain>test</plain>"
|
||||
))
|
||||
.unwrap(),
|
||||
.unwrap(),
|
||||
r#"<mmm><mention name="natty" type="user"/> <fn name="spin" arg-speed="0.5s"><ue>🥺</ue></fn><ee>cat_attack</ee> test</mmm>"#
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&to_xml_string(&parse_full(
|
||||
"Ring Galaxy AM 0644 741 from Hubble\nCredits: AURA, STScI, J. Higdon, Cornell, ESA, #NASA\n#nature #space #astrophotography"
|
||||
))
|
||||
.unwrap(),
|
||||
r#"<mmm>Ring Galaxy AM 0644 741 from Hubble
|
||||
Credits: AURA, STScI, J. Higdon, Cornell, ESA, <hashtag>NASA</hashtag>
|
||||
<hashtag>nature</hashtag> <hashtag>space</hashtag> <hashtag>astrophotography</hashtag></mmm>"#
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&to_xml_string(&parse_full(
|
||||
r#"
|
||||
|
@ -779,7 +756,7 @@ Credits: AURA, STScI, J. Higdon, Cornell, ESA, <hashtag>NASA</hashtag>
|
|||
var x = undefined;
|
||||
``` "#
|
||||
))
|
||||
.unwrap(),
|
||||
.unwrap(),
|
||||
"<mmm><code lang=\"js\">var x = undefined;</code></mmm>"
|
||||
);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,120 @@
|
|||
use unicode_segmentation::{Graphemes, UnicodeSegmentation};
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub(crate) struct ParseSpan<'a> {
|
||||
pub(crate) source: &'a str,
|
||||
pub(crate) offset: usize,
|
||||
pub(crate) length: usize,
|
||||
}
|
||||
|
||||
impl ParseSpan<'_> {
|
||||
pub(crate) fn concat(self, other: Self) -> Option<Self> {
|
||||
if self.source != other.source {
|
||||
panic!("Attempted to concat slices from different strings");
|
||||
}
|
||||
|
||||
if self.offset + self.length != other.offset {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(ParseSpan {
|
||||
source: self.source,
|
||||
offset: self.offset,
|
||||
length: self.length + other.length,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn spanned_source(&self) -> &str {
|
||||
&self.source[self.offset..self.offset + self.length]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub(crate) struct TokStream<'a>(ParseSpan<'a>, Graphemes<'a>);
|
||||
|
||||
impl<'a> From<&'a str> for TokStream<'a> {
|
||||
fn from(source: &'a str) -> Self {
|
||||
TokStream(
|
||||
ParseSpan {
|
||||
source,
|
||||
length: source.len(),
|
||||
offset: 0,
|
||||
},
|
||||
source.graphemes(true),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) trait Input<'a> {
|
||||
fn next(&mut self) -> Option<&'a str>;
|
||||
fn view(&self) -> &'a str;
|
||||
}
|
||||
|
||||
impl<'a> Input<'a> for TokStream<'a> {
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<&'a str> {
|
||||
if let Some(p) = self.1.next() {
|
||||
let length = p.len();
|
||||
self.0.offset += length;
|
||||
self.0.length -= length;
|
||||
return Some(p);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn view(&self) -> &'a str {
|
||||
&self.0.source[self.0.offset..self.0.offset + self.0.length]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub(crate) struct Lex<'a> {
|
||||
pub(crate) token: &'a str,
|
||||
pub(crate) span: ParseSpan<'a>,
|
||||
}
|
||||
|
||||
pub(crate) type OutTok<'a> = Lex<'a>;
|
||||
|
||||
pub(crate) const MAX_DEPTH: usize = 24;
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy)]
|
||||
pub(crate) struct State {
|
||||
pub(crate) depth: usize,
|
||||
}
|
||||
|
||||
pub(crate) enum Effect<'a> {
|
||||
Output(OutTok<'a>)
|
||||
}
|
||||
|
||||
|
||||
#[must_use]
|
||||
pub(crate) struct ParserRet {
|
||||
_private: (),
|
||||
}
|
||||
|
||||
pub(crate) trait ParserCont {
|
||||
fn continue_with(self, to: impl Parser) -> ParserRet;
|
||||
fn continue_with2(self, to: (impl Parser, impl Parser)) -> ParserRet;
|
||||
}
|
||||
|
||||
pub(crate) trait Parser {
|
||||
fn take<'a>(
|
||||
&mut self,
|
||||
state: &mut State,
|
||||
input: &mut impl Input<'a>,
|
||||
handler: &'_ mut impl FnMut(Effect<'a>),
|
||||
visitor: impl ParserCont,
|
||||
) -> ParserRet;
|
||||
}
|
||||
|
||||
impl<I, F, V> Parser for fn(&mut State, &mut I, &'_ mut F, V) -> ParserRet {
|
||||
fn take<'a>(&mut self,
|
||||
state: &mut State,
|
||||
input: &mut impl Input<'a>,
|
||||
handler: &'_ mut impl FnMut(Effect<'a>),
|
||||
visitor: impl ParserCont) -> ParserRet {
|
||||
self(state, input, handler, visitor)
|
||||
}
|
||||
}
|
|
@ -1,9 +1,8 @@
|
|||
use std::io::{Cursor, Write};
|
||||
|
||||
use crate::output_types::Token;
|
||||
use quick_xml::events::{BytesText, Event};
|
||||
|
||||
use crate::Token;
|
||||
|
||||
impl Token {
|
||||
fn write<T: Write>(&self, writer: &mut quick_xml::Writer<T>) -> quick_xml::Result<()> {
|
||||
match self {
|
||||
|
|
Loading…
Reference in New Issue