Switch towards recursive ascent
This commit is contained in:
parent
f71429bfe0
commit
9f62c72f29
|
@ -435,12 +435,6 @@ dependencies = [
|
||||||
"syn 1.0.109",
|
"syn 1.0.109",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "bytecount"
|
|
||||||
version = "0.6.8"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "byteorder"
|
name = "byteorder"
|
||||||
version = "1.5.0"
|
version = "1.5.0"
|
||||||
|
@ -632,7 +626,6 @@ dependencies = [
|
||||||
"itoa",
|
"itoa",
|
||||||
"rustversion",
|
"rustversion",
|
||||||
"ryu",
|
"ryu",
|
||||||
"serde",
|
|
||||||
"static_assertions",
|
"static_assertions",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -2054,13 +2047,11 @@ dependencies = [
|
||||||
name = "magnetar_mmm_parser"
|
name = "magnetar_mmm_parser"
|
||||||
version = "0.3.0-alpha"
|
version = "0.3.0-alpha"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"compact_str",
|
|
||||||
"either",
|
"either",
|
||||||
"emojis",
|
"emojis",
|
||||||
"nom",
|
|
||||||
"nom_locate",
|
|
||||||
"quick-xml",
|
"quick-xml",
|
||||||
"serde",
|
"serde",
|
||||||
|
"smallvec",
|
||||||
"strum",
|
"strum",
|
||||||
"tracing",
|
"tracing",
|
||||||
"unicode-segmentation",
|
"unicode-segmentation",
|
||||||
|
@ -2325,17 +2316,6 @@ dependencies = [
|
||||||
"minimal-lexical",
|
"minimal-lexical",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "nom_locate"
|
|
||||||
version = "4.2.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "1e3c83c053b0713da60c5b8de47fe8e494fe3ece5267b2f23090a07a053ba8f3"
|
|
||||||
dependencies = [
|
|
||||||
"bytecount",
|
|
||||||
"memchr",
|
|
||||||
"nom",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nu-ansi-term"
|
name = "nu-ansi-term"
|
||||||
version = "0.46.0"
|
version = "0.46.0"
|
||||||
|
|
|
@ -11,10 +11,8 @@ xml = ["dep:quick-xml"]
|
||||||
[dependencies]
|
[dependencies]
|
||||||
either = { workspace = true }
|
either = { workspace = true }
|
||||||
emojis = { workspace = true }
|
emojis = { workspace = true }
|
||||||
nom = { workspace = true }
|
|
||||||
nom_locate = { workspace = true }
|
|
||||||
compact_str = { workspace = true, features = ["serde"] }
|
|
||||||
serde = { workspace = true, features = ["derive"] }
|
serde = { workspace = true, features = ["derive"] }
|
||||||
|
smallvec = { workspace = true }
|
||||||
strum = { workspace = true, features = ["derive"] }
|
strum = { workspace = true, features = ["derive"] }
|
||||||
tracing = { workspace = true }
|
tracing = { workspace = true }
|
||||||
unicode-segmentation = { workspace = true }
|
unicode-segmentation = { workspace = true }
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,261 @@
|
||||||
|
use either::Either;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use strum::IntoStaticStr;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Deserialize, Serialize, Eq, PartialEq)]
|
||||||
|
pub enum Token<'a> {
|
||||||
|
PlainText(Cow<'a, str>),
|
||||||
|
Sequence(Vec<Token<'a>>),
|
||||||
|
Quote(Vec<Token<'a>>),
|
||||||
|
Small(Vec<Token<'a>>),
|
||||||
|
BoldItalic(Vec<Token<'a>>),
|
||||||
|
Bold(Vec<Token<'a>>),
|
||||||
|
Italic(Vec<Token<'a>>),
|
||||||
|
Center(Vec<Token<'a>>),
|
||||||
|
Strikethrough(Vec<Token<'a>>),
|
||||||
|
PlainTag(String),
|
||||||
|
InlineCode(String),
|
||||||
|
InlineMath(String),
|
||||||
|
UrlRaw(String),
|
||||||
|
UrlNoEmbed(String),
|
||||||
|
Link {
|
||||||
|
label: Vec<Token<'a>>,
|
||||||
|
href: String,
|
||||||
|
},
|
||||||
|
LinkNoEmbed {
|
||||||
|
label: Vec<Token<'a>>,
|
||||||
|
href: String,
|
||||||
|
},
|
||||||
|
BlockCode {
|
||||||
|
lang: Option<String>,
|
||||||
|
inner: String,
|
||||||
|
},
|
||||||
|
BlockMath(String),
|
||||||
|
Function {
|
||||||
|
name: String,
|
||||||
|
params: HashMap<String, Option<String>>,
|
||||||
|
inner: Vec<Token<'a>>,
|
||||||
|
},
|
||||||
|
Mention {
|
||||||
|
name: String,
|
||||||
|
host: Option<String>,
|
||||||
|
mention_type: MentionType,
|
||||||
|
},
|
||||||
|
UnicodeEmoji(String),
|
||||||
|
ShortcodeEmoji {
|
||||||
|
shortcode: String,
|
||||||
|
host: Option<String>,
|
||||||
|
},
|
||||||
|
Hashtag(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug, Eq, PartialEq, Deserialize, Serialize, IntoStaticStr)]
|
||||||
|
// The alternative would be to implement a serde serializer for this one enum, but that's disgusting
|
||||||
|
#[strum(serialize_all = "snake_case")]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
pub enum MentionType {
|
||||||
|
Community,
|
||||||
|
User,
|
||||||
|
MatrixUser,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MentionType {
|
||||||
|
pub fn to_char(&self) -> char {
|
||||||
|
match self {
|
||||||
|
MentionType::Community => '!',
|
||||||
|
MentionType::User => '@',
|
||||||
|
MentionType::MatrixUser => ':',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn separator(&self) -> char {
|
||||||
|
match self {
|
||||||
|
MentionType::Community | MentionType::User => '@',
|
||||||
|
MentionType::MatrixUser => ':',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
impl Token<'_> {
|
||||||
|
fn str_content_left(&self) -> Option<&str> {
|
||||||
|
match self {
|
||||||
|
Token::PlainText(text) => Some(text.as_ref()),
|
||||||
|
Token::Sequence(tokens) => tokens.first().and_then(Token::str_content_left),
|
||||||
|
Token::Quote(inner) => inner.str_content_left(),
|
||||||
|
Token::Small(inner) => inner.str_content_left(),
|
||||||
|
Token::Bold(inner) => inner.str_content_left(),
|
||||||
|
Token::Italic(inner) => inner.str_content_left(),
|
||||||
|
Token::Center(inner) => inner.str_content_left(),
|
||||||
|
Token::Strikethrough(inner) => inner.str_content_left(),
|
||||||
|
Token::PlainTag(tag) => Some(tag.as_ref()),
|
||||||
|
Token::UrlRaw(url) => Some(url.as_ref()),
|
||||||
|
Token::UrlNoEmbed(url) => Some(url.as_ref()),
|
||||||
|
Token::Link { label, .. } => label.str_content_left(),
|
||||||
|
Token::Function { inner, .. } => inner.str_content_left(),
|
||||||
|
Token::Mention { name, .. } => Some(name.as_ref()),
|
||||||
|
Token::UnicodeEmoji(code) => Some(code.as_ref()),
|
||||||
|
Token::Hashtag(tag) => Some(tag.as_ref()),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn str_content_right(&self) -> Option<&str> {
|
||||||
|
match self {
|
||||||
|
Token::PlainText(text) => Some(text.as_ref()),
|
||||||
|
Token::Sequence(tokens) => tokens.last().and_then(Token::str_content_right),
|
||||||
|
Token::Quote(inner) => inner.str_content_right(),
|
||||||
|
Token::Small(inner) => inner.str_content_right(),
|
||||||
|
Token::Bold(inner) => inner.str_content_right(),
|
||||||
|
Token::Italic(inner) => inner.str_content_right(),
|
||||||
|
Token::Center(inner) => inner.str_content_right(),
|
||||||
|
Token::Strikethrough(inner) => inner.str_content_right(),
|
||||||
|
Token::PlainTag(tag) => Some(tag.as_ref()),
|
||||||
|
Token::UrlRaw(url) => Some(url.as_ref()),
|
||||||
|
Token::UrlNoEmbed(url) => Some(url.as_ref()),
|
||||||
|
Token::Link { label, .. } => label.str_content_right(),
|
||||||
|
Token::Function { inner, .. } => inner.str_content_right(),
|
||||||
|
Token::Mention { name, .. } => Some(name.as_ref()),
|
||||||
|
Token::UnicodeEmoji(code) => Some(code.as_ref()),
|
||||||
|
Token::Hashtag(tag) => Some(tag.as_ref()),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn inner(&self) -> Token {
|
||||||
|
match self {
|
||||||
|
plain @ Token::PlainText(_) => plain.clone(),
|
||||||
|
sequence @ Token::Sequence(_) => sequence.clone(),
|
||||||
|
Token::Quote(inner) => inner.inner(),
|
||||||
|
Token::Small(inner) => inner.inner(),
|
||||||
|
Token::Bold(inner) => inner.inner(),
|
||||||
|
Token::Italic(inner) => inner.inner(),
|
||||||
|
Token::Center(inner) => inner.inner(),
|
||||||
|
Token::Strikethrough(inner) => inner.inner(),
|
||||||
|
Token::PlainTag(text) => Token::PlainText(text.clone().into()),
|
||||||
|
Token::InlineCode(code) => Token::PlainText(code.clone().into()),
|
||||||
|
Token::InlineMath(math) => Token::PlainText(math.clone().into()),
|
||||||
|
Token::UrlRaw(url) => Token::PlainText(url.clone().into()),
|
||||||
|
Token::UrlNoEmbed(url) => Token::PlainText(url.clone().into()),
|
||||||
|
Token::Link { label, .. } => label.inner(),
|
||||||
|
Token::BlockCode { inner, .. } => Token::PlainText(inner.clone().into()),
|
||||||
|
Token::BlockMath(math) => Token::PlainText(math.clone().into()),
|
||||||
|
Token::Function { inner, .. } => inner.inner(),
|
||||||
|
Token::Mention { name, .. } => Token::PlainText(name.clone().into()),
|
||||||
|
Token::UnicodeEmoji(code) => Token::PlainText(code.clone().into()),
|
||||||
|
Token::ShortcodeEmoji { shortcode, .. } => Token::PlainText(shortcode.clone().into()),
|
||||||
|
Token::Hashtag(tag) => Token::PlainText(tag.clone().into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn merged(&self) -> Token {
|
||||||
|
match self {
|
||||||
|
Token::Sequence(tokens) => {
|
||||||
|
let tokens_multi = tokens.iter().fold(Vec::new(), |mut acc, tok| {
|
||||||
|
if let Some(Token::PlainText(last)) = acc.last_mut() {
|
||||||
|
if let Token::PlainText(tok_text) = tok {
|
||||||
|
*last += tok_text.as_ref();
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Token::Sequence(seq) = tok {
|
||||||
|
let items = seq.iter().map(Token::merged).flat_map(|t| match t {
|
||||||
|
Token::Sequence(seq) => Either::Left(seq.into_iter()),
|
||||||
|
other => Either::Right(std::iter::once(other)),
|
||||||
|
});
|
||||||
|
|
||||||
|
for item in items {
|
||||||
|
if let Some(Token::PlainText(last)) = acc.last_mut() {
|
||||||
|
if let Token::PlainText(tok_text) = item {
|
||||||
|
*last += tok_text.as_ref();
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
acc.push(item);
|
||||||
|
}
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
}
|
||||||
|
|
||||||
|
acc.push(tok.merged());
|
||||||
|
acc
|
||||||
|
});
|
||||||
|
|
||||||
|
if tokens_multi.len() == 1 {
|
||||||
|
return tokens_multi.into_iter().next().unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
Token::Sequence(tokens_multi)
|
||||||
|
}
|
||||||
|
Token::Quote(inner) => Token::Quote(Box::new(inner.merged())),
|
||||||
|
Token::Small(inner) => Token::Small(Box::new(inner.merged())),
|
||||||
|
Token::Bold(inner) => Token::Bold(Box::new(inner.merged())),
|
||||||
|
Token::Italic(inner) => Token::Italic(Box::new(inner.merged())),
|
||||||
|
Token::Center(inner) => Token::Center(Box::new(inner.merged())),
|
||||||
|
Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.merged())),
|
||||||
|
Token::Link { label, href } => Token::Link {
|
||||||
|
label: Box::new(label.merged()),
|
||||||
|
href: href.clone(),
|
||||||
|
},
|
||||||
|
Token::LinkNoEmbed { label, href } => Token::LinkNoEmbed {
|
||||||
|
label: Box::new(label.merged()),
|
||||||
|
href: href.clone(),
|
||||||
|
},
|
||||||
|
Token::Function {
|
||||||
|
name,
|
||||||
|
params,
|
||||||
|
inner,
|
||||||
|
} => Token::Function {
|
||||||
|
name: name.clone(),
|
||||||
|
params: params.clone(),
|
||||||
|
inner: Box::new(inner.merged()),
|
||||||
|
},
|
||||||
|
other => other.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn walk_map_collect<T>(&self, func: &impl Fn(&Token) -> Option<T>, out: &mut Vec<T>) {
|
||||||
|
if let Some(v) = func(self) {
|
||||||
|
out.push(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
match self {
|
||||||
|
Token::Sequence(items) => {
|
||||||
|
items.iter().for_each(|tok| tok.walk_map_collect(func, out));
|
||||||
|
}
|
||||||
|
Token::Quote(inner)
|
||||||
|
| Token::Small(inner)
|
||||||
|
| Token::Bold(inner)
|
||||||
|
| Token::Italic(inner)
|
||||||
|
| Token::Center(inner)
|
||||||
|
| Token::Function { inner, .. }
|
||||||
|
| Token::Link { label: inner, .. }
|
||||||
|
| Token::Strikethrough(inner) => inner.walk_map_collect(func, out),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn walk_speech_transform(&mut self, func: &impl Fn(&mut Cow<'_, str>)) {
|
||||||
|
match self {
|
||||||
|
Token::Sequence(items) => {
|
||||||
|
items
|
||||||
|
.iter_mut()
|
||||||
|
.for_each(|tok| tok.walk_speech_transform(func));
|
||||||
|
}
|
||||||
|
Token::Small(inner)
|
||||||
|
| Token::Bold(inner)
|
||||||
|
| Token::Italic(inner)
|
||||||
|
| Token::Center(inner)
|
||||||
|
| Token::Function { inner, .. }
|
||||||
|
| Token::Strikethrough(inner) => inner.walk_speech_transform(func),
|
||||||
|
Token::PlainText(text) => func(text),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,157 @@
|
||||||
|
use crate::types::{Effect, Input, Parser, ParserCont, ParserRet, State};
|
||||||
|
|
||||||
|
fn line_start<'a>(
|
||||||
|
state: &mut State,
|
||||||
|
inp: &mut impl Input<'a>,
|
||||||
|
_output: &'_ mut impl FnMut(Effect<'a>),
|
||||||
|
cont: impl ParserCont,
|
||||||
|
) -> ParserRet {
|
||||||
|
match inp.view().as_bytes() {
|
||||||
|
[b'>', b' ', ..] => cont.continue_with2((line_start, quote)),
|
||||||
|
[b'`', b'`', b'`', ..] => cont.continue_with(CodeBlock {}),
|
||||||
|
[b'\\', b'[', ..] => cont.continue_with(BlockMath {}),
|
||||||
|
[b'<', b'c', b'e', b'n', b't', b'e', b'r', b'>', ..] => cont.continue_with2((inline, center_tag_end)),
|
||||||
|
_ => cont.continue_with(inline)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn inline<'a>(
|
||||||
|
state: &mut State,
|
||||||
|
inp: &mut impl Input<'a>,
|
||||||
|
_output: &'_ mut impl FnMut(Effect<'a>),
|
||||||
|
cont: impl ParserCont,
|
||||||
|
) -> ParserRet {
|
||||||
|
match inp.view().as_bytes() {
|
||||||
|
[b'\n', ..] => return cont.continue_with(line_start),
|
||||||
|
[b'<', b'b', b'>', ..] => return cont.continue_with(inline),
|
||||||
|
[b'<', b's', b'>', ..] => return cont.continue_with(inline),
|
||||||
|
[b'<', b'i', b'>', ..] => return cont.continue_with(inline),
|
||||||
|
[b'<', b'p', b'l', b'a', b'i', b'n', b'>', ..] => return cont.continue_with(inline),
|
||||||
|
[b'<', b's', b'm', b'a', b'l', b'l', b'>', ..] => return cont.continue_with(inline),
|
||||||
|
[b'*', b'*', ..] => return cont.continue_with(inline),
|
||||||
|
[b'_', b'_', ..] => return cont.continue_with(inline),
|
||||||
|
[b'*', ..] => return cont.continue_with(inline),
|
||||||
|
[b'_', ..] => return cont.continue_with(inline),
|
||||||
|
[b'~', b'~', ..] => return cont.continue_with(inline),
|
||||||
|
[b'`', ..] => return cont.continue_with(inline),
|
||||||
|
[b'\\', b'(', ..] => return cont.continue_with(inline),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
fn text_or_emoji<'a>(
|
||||||
|
state: &mut State,
|
||||||
|
input: &mut impl Input<'a>,
|
||||||
|
_output: &'_ mut impl FnMut(Effect<'a>),
|
||||||
|
cont: impl ParserCont,
|
||||||
|
) -> ParserRet {
|
||||||
|
let Some(view) = input.next() else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
let emoji_str = view.trim_end_matches(['\u{200c}', '\u{200d}']);
|
||||||
|
if let Some(_) = emojis::get(emoji_str) {
|
||||||
|
output(Effect::Output(emoji_str));
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
output(Effect::Output(view));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn block_quote_end<'a>(
|
||||||
|
state: &mut State,
|
||||||
|
inp: &mut impl Input<'a>,
|
||||||
|
_output: &'_ mut impl FnMut(Effect<'a>),
|
||||||
|
cont: impl ParserCont,
|
||||||
|
) -> ParserRet {}
|
||||||
|
|
||||||
|
fn code_block_end<'a>(
|
||||||
|
state: &mut State,
|
||||||
|
inp: &mut impl Input<'a>,
|
||||||
|
_output: &'_ mut impl FnMut(Effect<'a>),
|
||||||
|
cont: impl ParserCont,
|
||||||
|
) -> ParserRet {}
|
||||||
|
|
||||||
|
fn block_math_end<'a>(
|
||||||
|
state: &mut State,
|
||||||
|
inp: &mut impl Input<'a>,
|
||||||
|
_output: &'_ mut impl FnMut(Effect<'a>),
|
||||||
|
cont: impl ParserCont,
|
||||||
|
) -> ParserRet {}
|
||||||
|
|
||||||
|
|
||||||
|
fn center_tag_end<'a>(
|
||||||
|
state: &mut State,
|
||||||
|
inp: &mut impl Input<'a>,
|
||||||
|
_output: &'_ mut impl FnMut(Effect<'a>),
|
||||||
|
cont: impl ParserCont,
|
||||||
|
) -> ParserRet {}
|
||||||
|
|
||||||
|
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
enum TagInlineKind {
|
||||||
|
TagSmall,
|
||||||
|
TagPlain,
|
||||||
|
TagBold,
|
||||||
|
TagItalic,
|
||||||
|
TagStrikethrough,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct TagInline {
|
||||||
|
kind: TagInlineKind,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Parser for TagInline {}
|
||||||
|
|
||||||
|
|
||||||
|
fn inline_math_end<'a>(
|
||||||
|
state: &mut State,
|
||||||
|
inp: &mut impl Input<'a>,
|
||||||
|
_output: &'_ mut impl FnMut(Effect<'a>),
|
||||||
|
cont: impl ParserCont,
|
||||||
|
) -> ParserRet {}
|
||||||
|
|
||||||
|
|
||||||
|
fn inline_code_end<'a>(
|
||||||
|
state: &mut State,
|
||||||
|
inp: &mut impl Input<'a>,
|
||||||
|
_output: &'_ mut impl FnMut(Effect<'a>),
|
||||||
|
cont: impl ParserCont,
|
||||||
|
) -> ParserRet {}
|
||||||
|
|
||||||
|
|
||||||
|
struct Url {}
|
||||||
|
|
||||||
|
impl Parser for Url {
|
||||||
|
fn take<'a>(
|
||||||
|
&mut self,
|
||||||
|
state: State,
|
||||||
|
input: &mut impl Input<'a>,
|
||||||
|
output: &'_ mut impl FnMut(Effect<'a>),
|
||||||
|
) -> impl Parser {}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn url_chars_base<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
|
||||||
|
alt((
|
||||||
|
recognize(tuple((
|
||||||
|
tag("["),
|
||||||
|
many_till(
|
||||||
|
self.increase_nesting(self.partial_span(Self::url_chars_base)),
|
||||||
|
tag("]"),
|
||||||
|
),
|
||||||
|
))),
|
||||||
|
recognize(tuple((
|
||||||
|
tag("("),
|
||||||
|
many_till(
|
||||||
|
self.increase_nesting(self.partial_span(Self::url_chars_base)),
|
||||||
|
tag(")"),
|
||||||
|
),
|
||||||
|
))),
|
||||||
|
recognize(tuple((
|
||||||
|
not(satisfy(char::is_control)),
|
||||||
|
not(satisfy(char::is_whitespace)),
|
||||||
|
not(one_of(")]>")),
|
||||||
|
anychar,
|
||||||
|
))),
|
||||||
|
))(input)
|
||||||
|
}
|
|
@ -1,17 +1,8 @@
|
||||||
#![cfg(test)]
|
#![cfg(test)]
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use nom::bytes::complete::tag;
|
use crate::output_types::{MentionType, Token};
|
||||||
|
use crate::{parse_full, xml_write::to_xml_string};
|
||||||
use crate::{xml_write::to_xml_string, Context, Span, SpanMeta, Token, DEFAULT_DEPTH_LIMIT};
|
|
||||||
|
|
||||||
fn parse_full(string: &str) -> Token {
|
|
||||||
Context::default()
|
|
||||||
.full(Span::new_extra(string, SpanMeta::default()))
|
|
||||||
.unwrap()
|
|
||||||
.1
|
|
||||||
.merged()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_empty() {
|
fn parse_empty() {
|
||||||
|
@ -27,9 +18,9 @@ fn parse_url_chars() {
|
||||||
"https://en.wikipedia.org/wiki/Sandbox_(computer_security))",
|
"https://en.wikipedia.org/wiki/Sandbox_(computer_security))",
|
||||||
SpanMeta::default(),
|
SpanMeta::default(),
|
||||||
))
|
))
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.1
|
.1
|
||||||
.into_fragment(),
|
.into_fragment(),
|
||||||
"https://en.wikipedia.org/wiki/Sandbox_(computer_security)"
|
"https://en.wikipedia.org/wiki/Sandbox_(computer_security)"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -60,9 +51,9 @@ fn parse_url_chars() {
|
||||||
"https://cs.wikipedia.org/wiki/Among Us )",
|
"https://cs.wikipedia.org/wiki/Among Us )",
|
||||||
SpanMeta::default(),
|
SpanMeta::default(),
|
||||||
))
|
))
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.1
|
.1
|
||||||
.into_fragment(),
|
.into_fragment(),
|
||||||
"https://cs.wikipedia.org/wiki/Among Us"
|
"https://cs.wikipedia.org/wiki/Among Us"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -71,9 +62,9 @@ fn parse_url_chars() {
|
||||||
"https://en.wikipedia.org/wiki/Among Us )",
|
"https://en.wikipedia.org/wiki/Among Us )",
|
||||||
SpanMeta::default(),
|
SpanMeta::default(),
|
||||||
))
|
))
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.1
|
.1
|
||||||
.into_fragment(),
|
.into_fragment(),
|
||||||
"https://en.wikipedia.org/wiki/Among"
|
"https://en.wikipedia.org/wiki/Among"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -82,17 +73,17 @@ fn parse_url_chars() {
|
||||||
fn parse_formatting() {
|
fn parse_formatting() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_full(r#"~~stikethrough~~"#),
|
parse_full(r#"~~stikethrough~~"#),
|
||||||
Token::Strikethrough(Box::new(Token::PlainText("stikethrough".into()))),
|
Token::Strikethrough(vec![Token::PlainText("stikethrough".into())]),
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_full(r#"**bold**"#),
|
parse_full(r#"**bold**"#),
|
||||||
Token::Bold(Box::new(Token::PlainText("bold".into()))),
|
Token::Bold(vec![Token::PlainText("bold".into())]),
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_full(r#"*italic*"#),
|
parse_full(r#"*italic*"#),
|
||||||
Token::Italic(Box::new(Token::PlainText("italic".into()))),
|
Token::Italic(vec![Token::PlainText("italic".into())]),
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
@ -109,7 +100,7 @@ fn parse_formatting() {
|
||||||
parse_full("intra*word*italic"),
|
parse_full("intra*word*italic"),
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::PlainText("intra".into()),
|
Token::PlainText("intra".into()),
|
||||||
Token::Italic(Box::new(Token::PlainText("word".into()))),
|
Token::Italic(vec![Token::PlainText("word".into())]),
|
||||||
Token::PlainText("italic".into()),
|
Token::PlainText("italic".into()),
|
||||||
])
|
])
|
||||||
);
|
);
|
||||||
|
@ -123,13 +114,13 @@ fn parse_formatting() {
|
||||||
parse_full(r#"long text with a *footnote <b>text</b>"#),
|
parse_full(r#"long text with a *footnote <b>text</b>"#),
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::PlainText("long text with a *footnote ".into()),
|
Token::PlainText("long text with a *footnote ".into()),
|
||||||
Token::Bold(Box::new(Token::PlainText("text".into()))),
|
Token::Bold(vec![Token::PlainText("text".into())]),
|
||||||
])
|
])
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_full(r#"*"italic"*"#),
|
parse_full(r#"*"italic"*"#),
|
||||||
Token::Italic(Box::new(Token::PlainText("\"italic\"".into())))
|
Token::Italic(vec![Token::PlainText("\"italic\"".into())])
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
@ -161,23 +152,23 @@ fn parse_formatting() {
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_full(r#"***bold italic***"#),
|
parse_full(r#"***bold italic***"#),
|
||||||
Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
|
Token::Bold(vec![Token::Italic(vec![Token::PlainText(
|
||||||
"bold italic".into()
|
"bold italic".into()
|
||||||
)))))
|
)])])
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_full(r#"<b><i>bold italic</i></b>"#),
|
parse_full(r#"<b><i>bold italic</i></b>"#),
|
||||||
Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
|
Token::Bold(vec![Token::Italic(vec![Token::PlainText(
|
||||||
"bold italic".into()
|
"bold italic".into()
|
||||||
)))))
|
)])])
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_full("~~*hello\nworld*"),
|
parse_full("~~*hello\nworld*"),
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::PlainText("~~".into()),
|
Token::PlainText("~~".into()),
|
||||||
Token::Italic(Box::new(Token::PlainText("hello\nworld".into()))),
|
Token::Italic(vec![Token::PlainText("hello\nworld".into())]),
|
||||||
])
|
])
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -188,7 +179,7 @@ fn parse_flanking() {
|
||||||
parse_full(r#"aaa*iii*bbb"#),
|
parse_full(r#"aaa*iii*bbb"#),
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::PlainText("aaa".into()),
|
Token::PlainText("aaa".into()),
|
||||||
Token::Italic(Box::new(Token::PlainText("iii".into()))),
|
Token::Italic(vec![Token::PlainText("iii".into())]),
|
||||||
Token::PlainText("bbb".into()),
|
Token::PlainText("bbb".into()),
|
||||||
])
|
])
|
||||||
);
|
);
|
||||||
|
@ -202,33 +193,33 @@ fn parse_flanking() {
|
||||||
parse_full("aaa\n_iii_\nbbb"),
|
parse_full("aaa\n_iii_\nbbb"),
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::PlainText("aaa\n".into()),
|
Token::PlainText("aaa\n".into()),
|
||||||
Token::Italic(Box::new(Token::PlainText("iii".into()))),
|
Token::Italic(vec![Token::PlainText("iii".into())]),
|
||||||
Token::PlainText("\nbbb".into()),
|
Token::PlainText("\nbbb".into()),
|
||||||
])
|
])
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_full(r#"*iii*"#),
|
parse_full(r#"*iii*"#),
|
||||||
Token::Italic(Box::new(Token::PlainText("iii".into())))
|
Token::Italic(vec![Token::PlainText("iii".into())])
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_full(r#"_iii_"#),
|
parse_full(r#"_iii_"#),
|
||||||
Token::Italic(Box::new(Token::PlainText("iii".into())))
|
Token::Italic(vec![Token::PlainText("iii".into())])
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_full(r#"aaa*iii*"#),
|
parse_full(r#"aaa*iii*"#),
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::PlainText("aaa".into()),
|
Token::PlainText("aaa".into()),
|
||||||
Token::Italic(Box::new(Token::PlainText("iii".into()))),
|
Token::Italic(vec![Token::PlainText("iii".into())]),
|
||||||
])
|
])
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_full(r#"*iii*bbb"#),
|
parse_full(r#"*iii*bbb"#),
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::Italic(Box::new(Token::PlainText("iii".into()))),
|
Token::Italic(vec![Token::PlainText("iii".into())]),
|
||||||
Token::PlainText("bbb".into()),
|
Token::PlainText("bbb".into()),
|
||||||
])
|
])
|
||||||
);
|
);
|
||||||
|
@ -309,12 +300,12 @@ a^2 + b^2 = c^2
|
||||||
🦋🏳️⚧️
|
🦋🏳️⚧️
|
||||||
text</center>"#
|
text</center>"#
|
||||||
),
|
),
|
||||||
Token::Center(Box::new(Token::Sequence(vec![
|
Token::Center(vec![
|
||||||
Token::PlainText("centered\n".into()),
|
Token::PlainText("centered\n".into()),
|
||||||
Token::UnicodeEmoji("🦋".into()),
|
Token::UnicodeEmoji("🦋".into()),
|
||||||
Token::UnicodeEmoji("🏳️⚧️".into()),
|
Token::UnicodeEmoji("🏳️⚧️".into()),
|
||||||
Token::PlainText("\ntext".into()),
|
Token::PlainText("\ntext".into()),
|
||||||
])))
|
])
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
@ -323,11 +314,11 @@ a^2 + b^2 = c^2
|
||||||
> 👩🏽🤝👩🏼
|
> 👩🏽🤝👩🏼
|
||||||
> text</center>"#
|
> text</center>"#
|
||||||
),
|
),
|
||||||
Token::Quote(Box::new(Token::Center(Box::new(Token::Sequence(vec![
|
Token::Quote(vec![Token::Center(vec![
|
||||||
Token::PlainText("centered\n".into()),
|
Token::PlainText("centered\n".into()),
|
||||||
Token::UnicodeEmoji("👩🏽🤝👩🏼".into()),
|
Token::UnicodeEmoji("👩🏽🤝👩🏼".into()),
|
||||||
Token::PlainText("\ntext".into())
|
Token::PlainText("\ntext".into())
|
||||||
]))))),
|
])]),
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
@ -335,11 +326,11 @@ a^2 + b^2 = c^2
|
||||||
Token::Function {
|
Token::Function {
|
||||||
name: "x2".into(),
|
name: "x2".into(),
|
||||||
params: HashMap::new(),
|
params: HashMap::new(),
|
||||||
inner: Box::new(Token::Sequence(vec![
|
inner: vec![
|
||||||
Token::Function {
|
Token::Function {
|
||||||
name: "sparkle".into(),
|
name: "sparkle".into(),
|
||||||
params: HashMap::new(),
|
params: HashMap::new(),
|
||||||
inner: Box::new(Token::UnicodeEmoji("🥺".into())),
|
inner: vec![Token::UnicodeEmoji("🥺".into())],
|
||||||
},
|
},
|
||||||
Token::UnicodeEmoji("💜".into()),
|
Token::UnicodeEmoji("💜".into()),
|
||||||
Token::Function {
|
Token::Function {
|
||||||
|
@ -350,10 +341,10 @@ a^2 + b^2 = c^2
|
||||||
params.insert("speed".into(), Some("5s".into()));
|
params.insert("speed".into(), Some("5s".into()));
|
||||||
params
|
params
|
||||||
},
|
},
|
||||||
inner: Box::new(Token::UnicodeEmoji("❤️".into())),
|
inner: vec![Token::UnicodeEmoji("❤️".into())],
|
||||||
},
|
},
|
||||||
Token::UnicodeEmoji("🦊".into()),
|
Token::UnicodeEmoji("🦊".into()),
|
||||||
]))
|
]
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -362,13 +353,13 @@ a^2 + b^2 = c^2
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::PlainText("<b>bold ".into()),
|
Token::PlainText("<b>bold ".into()),
|
||||||
Token::Mention {
|
Token::Mention {
|
||||||
mention_type: crate::MentionType::User,
|
mention_type: MentionType::User,
|
||||||
name: "tag1".into(),
|
name: "tag1".into(),
|
||||||
host: None
|
host: None
|
||||||
},
|
},
|
||||||
Token::PlainText(" <i> ".into()),
|
Token::PlainText(" <i> ".into()),
|
||||||
Token::Mention {
|
Token::Mention {
|
||||||
mention_type: crate::MentionType::User,
|
mention_type: MentionType::User,
|
||||||
name: "tag2".into(),
|
name: "tag2".into(),
|
||||||
host: None
|
host: None
|
||||||
},
|
},
|
||||||
|
@ -386,11 +377,11 @@ a^2 + b^2 = c^2
|
||||||
>> Nested quote
|
>> Nested quote
|
||||||
"#
|
"#
|
||||||
),
|
),
|
||||||
Token::Quote(Box::new(Token::Sequence(vec![
|
Token::Quote(vec![
|
||||||
Token::PlainText("test\n".into()),
|
Token::PlainText("test\n".into()),
|
||||||
Token::Italic(Box::new(Token::PlainText("\nitalic\n".into()))),
|
Token::Italic(vec![Token::PlainText("\nitalic\n".into())]),
|
||||||
Token::Quote(Box::new(Token::PlainText("Nested quote".into())))
|
Token::Quote(vec![Token::PlainText("Nested quote".into())])
|
||||||
]))),
|
]),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -442,9 +433,8 @@ fn parse_link() {
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::PlainText("Link test: ".into()),
|
Token::PlainText("Link test: ".into()),
|
||||||
Token::Link {
|
Token::Link {
|
||||||
label: Box::new(Token::PlainText("label".into())),
|
label: vec![Token::PlainText("label".into())],
|
||||||
href: "https://example.com".into(),
|
href: "https://example.com".into()
|
||||||
embed: true,
|
|
||||||
},
|
},
|
||||||
])
|
])
|
||||||
);
|
);
|
||||||
|
@ -481,10 +471,9 @@ fn parse_link() {
|
||||||
parse_full("Link test: ?[label](https://awawa.gay)"),
|
parse_full("Link test: ?[label](https://awawa.gay)"),
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::PlainText("Link test: ".into()),
|
Token::PlainText("Link test: ".into()),
|
||||||
Token::Link {
|
Token::LinkNoEmbed {
|
||||||
label: Box::new(Token::PlainText("label".into())),
|
label: vec![Token::PlainText("label".into())],
|
||||||
href: "https://awawa.gay".into(),
|
href: "https://awawa.gay".into(),
|
||||||
embed: false,
|
|
||||||
},
|
},
|
||||||
])
|
])
|
||||||
);
|
);
|
||||||
|
@ -493,10 +482,9 @@ fn parse_link() {
|
||||||
parse_full("Link test: ?[label](https://awawa.gay)test"),
|
parse_full("Link test: ?[label](https://awawa.gay)test"),
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::PlainText("Link test: ".into()),
|
Token::PlainText("Link test: ".into()),
|
||||||
Token::Link {
|
Token::LinkNoEmbed {
|
||||||
label: Box::new(Token::PlainText("label".into())),
|
label: vec![Token::PlainText("label".into())],
|
||||||
href: "https://awawa.gay".into(),
|
href: "https://awawa.gay".into(),
|
||||||
embed: false,
|
|
||||||
},
|
},
|
||||||
Token::PlainText("test".into()),
|
Token::PlainText("test".into()),
|
||||||
])
|
])
|
||||||
|
@ -506,10 +494,9 @@ fn parse_link() {
|
||||||
parse_full("Link test: (?[label](https://awawa.gay))"),
|
parse_full("Link test: (?[label](https://awawa.gay))"),
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::PlainText("Link test: (".into()),
|
Token::PlainText("Link test: (".into()),
|
||||||
Token::Link {
|
Token::LinkNoEmbed {
|
||||||
label: Box::new(Token::PlainText("label".into())),
|
label: vec![Token::PlainText("label".into())],
|
||||||
href: "https://awawa.gay".into(),
|
href: "https://awawa.gay".into(),
|
||||||
embed: false,
|
|
||||||
},
|
},
|
||||||
Token::PlainText(")".into()),
|
Token::PlainText(")".into()),
|
||||||
])
|
])
|
||||||
|
@ -546,7 +533,7 @@ fn parse_mention() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_full("@tag"),
|
parse_full("@tag"),
|
||||||
Token::Mention {
|
Token::Mention {
|
||||||
mention_type: crate::MentionType::User,
|
mention_type: MentionType::User,
|
||||||
name: "tag".into(),
|
name: "tag".into(),
|
||||||
host: None,
|
host: None,
|
||||||
}
|
}
|
||||||
|
@ -562,7 +549,7 @@ fn parse_mention() {
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::PlainText("hgsjlkdsa ".into()),
|
Token::PlainText("hgsjlkdsa ".into()),
|
||||||
Token::Mention {
|
Token::Mention {
|
||||||
mention_type: crate::MentionType::User,
|
mention_type: MentionType::User,
|
||||||
name: "tag".into(),
|
name: "tag".into(),
|
||||||
host: None,
|
host: None,
|
||||||
},
|
},
|
||||||
|
@ -575,7 +562,7 @@ fn parse_mention() {
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::PlainText("hgsjlkdsa ".into()),
|
Token::PlainText("hgsjlkdsa ".into()),
|
||||||
Token::Mention {
|
Token::Mention {
|
||||||
mention_type: crate::MentionType::User,
|
mention_type: MentionType::User,
|
||||||
name: "tag".into(),
|
name: "tag".into(),
|
||||||
host: None,
|
host: None,
|
||||||
},
|
},
|
||||||
|
@ -588,7 +575,7 @@ fn parse_mention() {
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::PlainText("aaaa ".into()),
|
Token::PlainText("aaaa ".into()),
|
||||||
Token::Mention {
|
Token::Mention {
|
||||||
mention_type: crate::MentionType::User,
|
mention_type: MentionType::User,
|
||||||
name: "tag".into(),
|
name: "tag".into(),
|
||||||
host: Some("domain".into()),
|
host: Some("domain".into()),
|
||||||
},
|
},
|
||||||
|
@ -601,7 +588,7 @@ fn parse_mention() {
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::PlainText("test ".into()),
|
Token::PlainText("test ".into()),
|
||||||
Token::Mention {
|
Token::Mention {
|
||||||
mention_type: crate::MentionType::User,
|
mention_type: MentionType::User,
|
||||||
name: "tag".into(),
|
name: "tag".into(),
|
||||||
host: Some("domain".into()),
|
host: Some("domain".into()),
|
||||||
},
|
},
|
||||||
|
@ -614,7 +601,7 @@ fn parse_mention() {
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::PlainText("test ".into()),
|
Token::PlainText("test ".into()),
|
||||||
Token::Mention {
|
Token::Mention {
|
||||||
mention_type: crate::MentionType::User,
|
mention_type: MentionType::User,
|
||||||
name: "tag".into(),
|
name: "tag".into(),
|
||||||
host: Some("domain.gay".into()),
|
host: Some("domain.gay".into()),
|
||||||
},
|
},
|
||||||
|
@ -627,7 +614,7 @@ fn parse_mention() {
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::PlainText("test ".into()),
|
Token::PlainText("test ".into()),
|
||||||
Token::Mention {
|
Token::Mention {
|
||||||
mention_type: crate::MentionType::User,
|
mention_type: MentionType::User,
|
||||||
name: "tag".into(),
|
name: "tag".into(),
|
||||||
host: Some("domain".into()),
|
host: Some("domain".into()),
|
||||||
},
|
},
|
||||||
|
@ -640,7 +627,7 @@ fn parse_mention() {
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
Token::PlainText("test ".into()),
|
Token::PlainText("test ".into()),
|
||||||
Token::Mention {
|
Token::Mention {
|
||||||
mention_type: crate::MentionType::Community,
|
mention_type: MentionType::Community,
|
||||||
name: "tag".into(),
|
name: "tag".into(),
|
||||||
host: Some("domain.com".into()),
|
host: Some("domain.com".into()),
|
||||||
},
|
},
|
||||||
|
@ -651,7 +638,7 @@ fn parse_mention() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_full("@tag:domain.com"),
|
parse_full("@tag:domain.com"),
|
||||||
Token::Mention {
|
Token::Mention {
|
||||||
mention_type: crate::MentionType::MatrixUser,
|
mention_type: MentionType::MatrixUser,
|
||||||
name: "tag".into(),
|
name: "tag".into(),
|
||||||
host: Some("domain.com".into())
|
host: Some("domain.com".into())
|
||||||
},
|
},
|
||||||
|
@ -758,20 +745,10 @@ fn xml_serialization() {
|
||||||
&to_xml_string(&parse_full(
|
&to_xml_string(&parse_full(
|
||||||
"@natty $[spin.speed=0.5s 🥺]:cat_attack: <plain>test</plain>"
|
"@natty $[spin.speed=0.5s 🥺]:cat_attack: <plain>test</plain>"
|
||||||
))
|
))
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
r#"<mmm><mention name="natty" type="user"/> <fn name="spin" arg-speed="0.5s"><ue>🥺</ue></fn><ee>cat_attack</ee> test</mmm>"#
|
r#"<mmm><mention name="natty" type="user"/> <fn name="spin" arg-speed="0.5s"><ue>🥺</ue></fn><ee>cat_attack</ee> test</mmm>"#
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
|
||||||
&to_xml_string(&parse_full(
|
|
||||||
"Ring Galaxy AM 0644 741 from Hubble\nCredits: AURA, STScI, J. Higdon, Cornell, ESA, #NASA\n#nature #space #astrophotography"
|
|
||||||
))
|
|
||||||
.unwrap(),
|
|
||||||
r#"<mmm>Ring Galaxy AM 0644 741 from Hubble
|
|
||||||
Credits: AURA, STScI, J. Higdon, Cornell, ESA, <hashtag>NASA</hashtag>
|
|
||||||
<hashtag>nature</hashtag> <hashtag>space</hashtag> <hashtag>astrophotography</hashtag></mmm>"#
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&to_xml_string(&parse_full(
|
&to_xml_string(&parse_full(
|
||||||
r#"
|
r#"
|
||||||
|
@ -779,7 +756,7 @@ Credits: AURA, STScI, J. Higdon, Cornell, ESA, <hashtag>NASA</hashtag>
|
||||||
var x = undefined;
|
var x = undefined;
|
||||||
``` "#
|
``` "#
|
||||||
))
|
))
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
"<mmm><code lang=\"js\">var x = undefined;</code></mmm>"
|
"<mmm><code lang=\"js\">var x = undefined;</code></mmm>"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,120 @@
|
||||||
|
use unicode_segmentation::{Graphemes, UnicodeSegmentation};
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
pub(crate) struct ParseSpan<'a> {
|
||||||
|
pub(crate) source: &'a str,
|
||||||
|
pub(crate) offset: usize,
|
||||||
|
pub(crate) length: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ParseSpan<'_> {
|
||||||
|
pub(crate) fn concat(self, other: Self) -> Option<Self> {
|
||||||
|
if self.source != other.source {
|
||||||
|
panic!("Attempted to concat slices from different strings");
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.offset + self.length != other.offset {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(ParseSpan {
|
||||||
|
source: self.source,
|
||||||
|
offset: self.offset,
|
||||||
|
length: self.length + other.length,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn spanned_source(&self) -> &str {
|
||||||
|
&self.source[self.offset..self.offset + self.length]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
pub(crate) struct TokStream<'a>(ParseSpan<'a>, Graphemes<'a>);
|
||||||
|
|
||||||
|
impl<'a> From<&'a str> for TokStream<'a> {
|
||||||
|
fn from(source: &'a str) -> Self {
|
||||||
|
TokStream(
|
||||||
|
ParseSpan {
|
||||||
|
source,
|
||||||
|
length: source.len(),
|
||||||
|
offset: 0,
|
||||||
|
},
|
||||||
|
source.graphemes(true),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) trait Input<'a> {
|
||||||
|
fn next(&mut self) -> Option<&'a str>;
|
||||||
|
fn view(&self) -> &'a str;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Input<'a> for TokStream<'a> {
|
||||||
|
#[inline]
|
||||||
|
fn next(&mut self) -> Option<&'a str> {
|
||||||
|
if let Some(p) = self.1.next() {
|
||||||
|
let length = p.len();
|
||||||
|
self.0.offset += length;
|
||||||
|
self.0.length -= length;
|
||||||
|
return Some(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn view(&self) -> &'a str {
|
||||||
|
&self.0.source[self.0.offset..self.0.offset + self.0.length]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
pub(crate) struct Lex<'a> {
|
||||||
|
pub(crate) token: &'a str,
|
||||||
|
pub(crate) span: ParseSpan<'a>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) type OutTok<'a> = Lex<'a>;
|
||||||
|
|
||||||
|
pub(crate) const MAX_DEPTH: usize = 24;
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Clone, Copy)]
|
||||||
|
pub(crate) struct State {
|
||||||
|
pub(crate) depth: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) enum Effect<'a> {
|
||||||
|
Output(OutTok<'a>)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) struct ParserRet {
|
||||||
|
_private: (),
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) trait ParserCont {
|
||||||
|
fn continue_with(self, to: impl Parser) -> ParserRet;
|
||||||
|
fn continue_with2(self, to: (impl Parser, impl Parser)) -> ParserRet;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) trait Parser {
|
||||||
|
fn take<'a>(
|
||||||
|
&mut self,
|
||||||
|
state: &mut State,
|
||||||
|
input: &mut impl Input<'a>,
|
||||||
|
handler: &'_ mut impl FnMut(Effect<'a>),
|
||||||
|
visitor: impl ParserCont,
|
||||||
|
) -> ParserRet;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I, F, V> Parser for fn(&mut State, &mut I, &'_ mut F, V) -> ParserRet {
|
||||||
|
fn take<'a>(&mut self,
|
||||||
|
state: &mut State,
|
||||||
|
input: &mut impl Input<'a>,
|
||||||
|
handler: &'_ mut impl FnMut(Effect<'a>),
|
||||||
|
visitor: impl ParserCont) -> ParserRet {
|
||||||
|
self(state, input, handler, visitor)
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,9 +1,8 @@
|
||||||
use std::io::{Cursor, Write};
|
use std::io::{Cursor, Write};
|
||||||
|
|
||||||
|
use crate::output_types::Token;
|
||||||
use quick_xml::events::{BytesText, Event};
|
use quick_xml::events::{BytesText, Event};
|
||||||
|
|
||||||
use crate::Token;
|
|
||||||
|
|
||||||
impl Token {
|
impl Token {
|
||||||
fn write<T: Write>(&self, writer: &mut quick_xml::Writer<T>) -> quick_xml::Result<()> {
|
fn write<T: Write>(&self, writer: &mut quick_xml::Writer<T>) -> quick_xml::Result<()> {
|
||||||
match self {
|
match self {
|
||||||
|
|
Loading…
Reference in New Issue