Restructured MMM parser and updated URL parsing
This commit is contained in:
parent
82945279de
commit
f71429bfe0
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,785 @@
|
|||
#![cfg(test)]
|
||||
use std::collections::HashMap;
|
||||
|
||||
use nom::bytes::complete::tag;
|
||||
|
||||
use crate::{xml_write::to_xml_string, Context, Span, SpanMeta, Token, DEFAULT_DEPTH_LIMIT};
|
||||
|
||||
fn parse_full(string: &str) -> Token {
|
||||
Context::default()
|
||||
.full(Span::new_extra(string, SpanMeta::default()))
|
||||
.unwrap()
|
||||
.1
|
||||
.merged()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_empty() {
|
||||
assert_eq!(parse_full(""), Token::Sequence(vec![]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_url_chars() {
|
||||
let ctx = Context::default();
|
||||
|
||||
assert_eq!(
|
||||
ctx.url_chars(tag(")"), true)(Span::new_extra(
|
||||
"https://en.wikipedia.org/wiki/Sandbox_(computer_security))",
|
||||
SpanMeta::default(),
|
||||
))
|
||||
.unwrap()
|
||||
.1
|
||||
.into_fragment(),
|
||||
"https://en.wikipedia.org/wiki/Sandbox_(computer_security)"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
ctx.url_chars(tag(")"), true)(Span::new_extra(
|
||||
"https://en.wikipedia.org/wiki/Sandbox_(computer_security)))",
|
||||
SpanMeta::default()
|
||||
))
|
||||
.unwrap()
|
||||
.1
|
||||
.into_fragment(),
|
||||
"https://en.wikipedia.org/wiki/Sandbox_(computer_security)",
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
ctx.url_chars(tag(")"), true)(Span::new_extra(
|
||||
"https://cs.wikipedia.org/wiki/Among_Us ",
|
||||
SpanMeta::default()
|
||||
))
|
||||
.unwrap()
|
||||
.1
|
||||
.into_fragment(),
|
||||
"https://cs.wikipedia.org/wiki/Among_Us",
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
ctx.url_chars(tag(")"), true)(Span::new_extra(
|
||||
"https://cs.wikipedia.org/wiki/Among Us )",
|
||||
SpanMeta::default(),
|
||||
))
|
||||
.unwrap()
|
||||
.1
|
||||
.into_fragment(),
|
||||
"https://cs.wikipedia.org/wiki/Among Us"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
ctx.url_chars(tag(")"), false)(Span::new_extra(
|
||||
"https://en.wikipedia.org/wiki/Among Us )",
|
||||
SpanMeta::default(),
|
||||
))
|
||||
.unwrap()
|
||||
.1
|
||||
.into_fragment(),
|
||||
"https://en.wikipedia.org/wiki/Among"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_formatting() {
|
||||
assert_eq!(
|
||||
parse_full(r#"~~stikethrough~~"#),
|
||||
Token::Strikethrough(Box::new(Token::PlainText("stikethrough".into()))),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"**bold**"#),
|
||||
Token::Bold(Box::new(Token::PlainText("bold".into()))),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"*italic*"#),
|
||||
Token::Italic(Box::new(Token::PlainText("italic".into()))),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"* italic *"#),
|
||||
Token::PlainText("* italic *".into())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("snake_case_variable"),
|
||||
Token::PlainText("snake_case_variable".into())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("intra*word*italic"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("intra".into()),
|
||||
Token::Italic(Box::new(Token::PlainText("word".into()))),
|
||||
Token::PlainText("italic".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"_ italic *"#),
|
||||
Token::PlainText("_ italic *".into())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"long text with a *footnote <b>text</b>"#),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("long text with a *footnote ".into()),
|
||||
Token::Bold(Box::new(Token::PlainText("text".into()))),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"*"italic"*"#),
|
||||
Token::Italic(Box::new(Token::PlainText("\"italic\"".into())))
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"not code `code` also not code"#),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("not code ".into()),
|
||||
Token::InlineCode("code".into()),
|
||||
Token::PlainText(" also not code".into())
|
||||
]),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"not code `code` also `not code"#),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("not code ".into()),
|
||||
Token::InlineCode("code".into()),
|
||||
Token::PlainText(" also `not code".into())
|
||||
]),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"not code `*not bold*` also not code"#),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("not code ".into()),
|
||||
Token::InlineCode("*not bold*".into()),
|
||||
Token::PlainText(" also not code".into())
|
||||
]),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"***bold italic***"#),
|
||||
Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
|
||||
"bold italic".into()
|
||||
)))))
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"<b><i>bold italic</i></b>"#),
|
||||
Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
|
||||
"bold italic".into()
|
||||
)))))
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("~~*hello\nworld*"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("~~".into()),
|
||||
Token::Italic(Box::new(Token::PlainText("hello\nworld".into()))),
|
||||
])
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_flanking() {
|
||||
assert_eq!(
|
||||
parse_full(r#"aaa*iii*bbb"#),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("aaa".into()),
|
||||
Token::Italic(Box::new(Token::PlainText("iii".into()))),
|
||||
Token::PlainText("bbb".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"aaa_nnn_bbb"#),
|
||||
Token::PlainText("aaa_nnn_bbb".into())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("aaa\n_iii_\nbbb"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("aaa\n".into()),
|
||||
Token::Italic(Box::new(Token::PlainText("iii".into()))),
|
||||
Token::PlainText("\nbbb".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"*iii*"#),
|
||||
Token::Italic(Box::new(Token::PlainText("iii".into())))
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"_iii_"#),
|
||||
Token::Italic(Box::new(Token::PlainText("iii".into())))
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"aaa*iii*"#),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("aaa".into()),
|
||||
Token::Italic(Box::new(Token::PlainText("iii".into()))),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"*iii*bbb"#),
|
||||
Token::Sequence(vec![
|
||||
Token::Italic(Box::new(Token::PlainText("iii".into()))),
|
||||
Token::PlainText("bbb".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"aaa_nnn_"#),
|
||||
Token::PlainText("aaa_nnn_".into())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"_nnn_bbb"#),
|
||||
Token::PlainText("_nnn_bbb".into())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_long() {
|
||||
parse_full(&"A".repeat(20000));
|
||||
|
||||
parse_full(&"*A".repeat(20000));
|
||||
|
||||
parse_full(&"@A".repeat(20000));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_complex() {
|
||||
assert_eq!(
|
||||
parse_full(r"\( nya^3 \)"),
|
||||
Token::InlineMath(" nya^3 ".to_string())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("\\( nya^3 \n \\)"),
|
||||
Token::PlainText("\\( nya^3 \n \\)".into())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r"`AbstractProxyFactoryBean`"),
|
||||
Token::InlineCode("AbstractProxyFactoryBean".to_string())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("`let x = \n 5;`"),
|
||||
Token::PlainText("`let x = \n 5;`".into())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(
|
||||
r#"
|
||||
```js
|
||||
var x = undefined;
|
||||
```"#
|
||||
),
|
||||
Token::BlockCode {
|
||||
lang: Some("js".to_string()),
|
||||
inner: "var x = undefined;".to_string(),
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(
|
||||
r"
|
||||
\[
|
||||
a^2 + b^2 = c^2
|
||||
\]"
|
||||
),
|
||||
Token::BlockMath("a^2 + b^2 = c^2".to_string())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r"\[ x^2 + y^2 = z^2 \]"),
|
||||
Token::BlockMath("x^2 + y^2 = z^2".to_string())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(
|
||||
r#"<center>centered
|
||||
🦋🏳️⚧️
|
||||
text</center>"#
|
||||
),
|
||||
Token::Center(Box::new(Token::Sequence(vec![
|
||||
Token::PlainText("centered\n".into()),
|
||||
Token::UnicodeEmoji("🦋".into()),
|
||||
Token::UnicodeEmoji("🏳️⚧️".into()),
|
||||
Token::PlainText("\ntext".into()),
|
||||
])))
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(
|
||||
r#"> <center>centered
|
||||
> 👩🏽🤝👩🏼
|
||||
> text</center>"#
|
||||
),
|
||||
Token::Quote(Box::new(Token::Center(Box::new(Token::Sequence(vec![
|
||||
Token::PlainText("centered\n".into()),
|
||||
Token::UnicodeEmoji("👩🏽🤝👩🏼".into()),
|
||||
Token::PlainText("\ntext".into())
|
||||
]))))),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"$[x2 $[sparkle 🥺]💜$[spin.y,speed=5s ❤️]🦊]"#),
|
||||
Token::Function {
|
||||
name: "x2".into(),
|
||||
params: HashMap::new(),
|
||||
inner: Box::new(Token::Sequence(vec![
|
||||
Token::Function {
|
||||
name: "sparkle".into(),
|
||||
params: HashMap::new(),
|
||||
inner: Box::new(Token::UnicodeEmoji("🥺".into())),
|
||||
},
|
||||
Token::UnicodeEmoji("💜".into()),
|
||||
Token::Function {
|
||||
name: "spin".into(),
|
||||
params: {
|
||||
let mut params = HashMap::new();
|
||||
params.insert("y".into(), None);
|
||||
params.insert("speed".into(), Some("5s".into()));
|
||||
params
|
||||
},
|
||||
inner: Box::new(Token::UnicodeEmoji("❤️".into())),
|
||||
},
|
||||
Token::UnicodeEmoji("🦊".into()),
|
||||
]))
|
||||
},
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"<b>bold @tag1 <i> @tag2 </b>italic</i>"#),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("<b>bold ".into()),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::User,
|
||||
name: "tag1".into(),
|
||||
host: None
|
||||
},
|
||||
Token::PlainText(" <i> ".into()),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::User,
|
||||
name: "tag2".into(),
|
||||
host: None
|
||||
},
|
||||
Token::PlainText(" </b>italic</i>".into())
|
||||
]),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(
|
||||
r#"
|
||||
> test
|
||||
> <i>
|
||||
> italic
|
||||
> </i>
|
||||
>> Nested quote
|
||||
"#
|
||||
),
|
||||
Token::Quote(Box::new(Token::Sequence(vec![
|
||||
Token::PlainText("test\n".into()),
|
||||
Token::Italic(Box::new(Token::PlainText("\nitalic\n".into()))),
|
||||
Token::Quote(Box::new(Token::PlainText("Nested quote".into())))
|
||||
]))),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_link() {
|
||||
assert_eq!(
|
||||
parse_full("IPv4 test: <https://0>"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("IPv4 test: ".into()),
|
||||
Token::UrlNoEmbed("https://0".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("IPv4 test: <https://127.0.0.1>"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("IPv4 test: ".into()),
|
||||
Token::UrlNoEmbed("https://127.0.0.1".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("IPv6 test: <https://[::2f:1]/nya>"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("IPv6 test: ".into()),
|
||||
Token::UrlNoEmbed("https://[::2f:1]/nya".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("IPv6 test: https://[::2f:1]/nya"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("IPv6 test: ".into()),
|
||||
Token::UrlRaw("https://[::2f:1]/nya".into()),
|
||||
])
|
||||
);
|
||||
|
||||
// IDNs
|
||||
assert_eq!(
|
||||
parse_full("IDN test: https://www.háčkyčárky.cz/"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("IDN test: ".into()),
|
||||
Token::UrlRaw("https://www.háčkyčárky.cz/".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("Link test: [label](https://example.com)"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("Link test: ".into()),
|
||||
Token::Link {
|
||||
label: Box::new(Token::PlainText("label".into())),
|
||||
href: "https://example.com".into(),
|
||||
embed: true,
|
||||
},
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("test #hashtag tail"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("test ".into()),
|
||||
Token::Hashtag("hashtag".into()),
|
||||
Token::PlainText(" tail".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("not#hashtag tail"),
|
||||
Token::PlainText("not#hashtag tail".into())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("<https://example.com>"),
|
||||
Token::UrlNoEmbed("https://example.com".into())
|
||||
);
|
||||
|
||||
// Adjacent links okay
|
||||
assert_eq!(
|
||||
parse_full("<https://example.com/><https://awawa.gay/>"),
|
||||
Token::Sequence(vec![
|
||||
Token::UrlNoEmbed("https://example.com/".into()),
|
||||
Token::UrlNoEmbed("https://awawa.gay/".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("Link test: ?[label](https://awawa.gay)"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("Link test: ".into()),
|
||||
Token::Link {
|
||||
label: Box::new(Token::PlainText("label".into())),
|
||||
href: "https://awawa.gay".into(),
|
||||
embed: false,
|
||||
},
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("Link test: ?[label](https://awawa.gay)test"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("Link test: ".into()),
|
||||
Token::Link {
|
||||
label: Box::new(Token::PlainText("label".into())),
|
||||
href: "https://awawa.gay".into(),
|
||||
embed: false,
|
||||
},
|
||||
Token::PlainText("test".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("Link test: (?[label](https://awawa.gay))"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("Link test: (".into()),
|
||||
Token::Link {
|
||||
label: Box::new(Token::PlainText("label".into())),
|
||||
href: "https://awawa.gay".into(),
|
||||
embed: false,
|
||||
},
|
||||
Token::PlainText(")".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("Link test: ?[label](https://awawa.gay"), // Missing closing bracket
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("Link test: ?[label](".into()),
|
||||
Token::UrlRaw("https://awawa.gay".into()),
|
||||
])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn limit_nesting() {
|
||||
let mut tok = Token::PlainText(" <s><i>test</i></s> ".into());
|
||||
for _ in 0..DEFAULT_DEPTH_LIMIT {
|
||||
tok = Token::Bold(Box::new(tok));
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
parse_full(
|
||||
&("<b>".repeat(DEFAULT_DEPTH_LIMIT)
|
||||
+ " <s><i>test</i></s> "
|
||||
+ &*"</b>".repeat(DEFAULT_DEPTH_LIMIT))
|
||||
),
|
||||
tok
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_mention() {
|
||||
assert_eq!(
|
||||
parse_full("@tag"),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::User,
|
||||
name: "tag".into(),
|
||||
host: None,
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("email@notactuallyamenmtion.org"),
|
||||
Token::PlainText("email@notactuallyamenmtion.org".into())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("hgsjlkdsa @tag fgahjsdkd"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("hgsjlkdsa ".into()),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::User,
|
||||
name: "tag".into(),
|
||||
host: None,
|
||||
},
|
||||
Token::PlainText(" fgahjsdkd".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("hgsjlkdsa @tag@ fgahjsdkd"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("hgsjlkdsa ".into()),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::User,
|
||||
name: "tag".into(),
|
||||
host: None,
|
||||
},
|
||||
Token::PlainText("@ fgahjsdkd".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("aaaa @tag@domain bbbbb"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("aaaa ".into()),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::User,
|
||||
name: "tag".into(),
|
||||
host: Some("domain".into()),
|
||||
},
|
||||
Token::PlainText(" bbbbb".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("test @tag@domain, test"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("test ".into()),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::User,
|
||||
name: "tag".into(),
|
||||
host: Some("domain".into()),
|
||||
},
|
||||
Token::PlainText(", test".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("test @tag@domain.gay. test"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("test ".into()),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::User,
|
||||
name: "tag".into(),
|
||||
host: Some("domain.gay".into()),
|
||||
},
|
||||
Token::PlainText(". test".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("test @tag@domain? test"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("test ".into()),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::User,
|
||||
name: "tag".into(),
|
||||
host: Some("domain".into()),
|
||||
},
|
||||
Token::PlainText("? test".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("test !tag@domain.com test"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("test ".into()),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::Community,
|
||||
name: "tag".into(),
|
||||
host: Some("domain.com".into()),
|
||||
},
|
||||
Token::PlainText(" test".into()),
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("@tag:domain.com"),
|
||||
Token::Mention {
|
||||
mention_type: crate::MentionType::MatrixUser,
|
||||
name: "tag".into(),
|
||||
host: Some("domain.com".into())
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_shortcodes() {
|
||||
assert_eq!(
|
||||
parse_full(":bottom:"),
|
||||
Token::ShortcodeEmoji {
|
||||
shortcode: "bottom".into(),
|
||||
host: None,
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(":bottom::blobfox:"),
|
||||
Token::Sequence(vec![
|
||||
Token::ShortcodeEmoji {
|
||||
shortcode: "bottom".into(),
|
||||
host: None,
|
||||
},
|
||||
Token::ShortcodeEmoji {
|
||||
shortcode: "blobfox".into(),
|
||||
host: None,
|
||||
},
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(":bottom@magnetar.social:"),
|
||||
Token::ShortcodeEmoji {
|
||||
shortcode: "bottom".into(),
|
||||
host: Some("magnetar.social".into()),
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(":bottom:blobfox"),
|
||||
Token::PlainText(":bottom:blobfox".into())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("bottom:blobfox:"),
|
||||
Token::PlainText("bottom:blobfox:".into())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_emoji() {
|
||||
assert_eq!(
|
||||
parse_full("🥺💜❤️🦊"),
|
||||
Token::Sequence(
|
||||
vec!["🥺", "💜", "❤️", "🦊"]
|
||||
.into_iter()
|
||||
.map(str::to_string)
|
||||
.map(Token::UnicodeEmoji)
|
||||
.collect::<Vec<_>>()
|
||||
)
|
||||
);
|
||||
|
||||
// Trans flag, ZWJ
|
||||
assert_eq!(
|
||||
parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}"),
|
||||
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}".into())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("\u{0200d}\u{1f3f3}\u{0fe0f}"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("\u{0200d}".into()), // ZWJ
|
||||
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
|
||||
])
|
||||
);
|
||||
|
||||
// Trans flag, ZWNJ
|
||||
assert_eq!(
|
||||
parse_full("\u{1f3f3}\u{0fe0f}\u{0200c}\u{026a7}\u{0fe0f}"),
|
||||
Token::Sequence(vec![
|
||||
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
|
||||
Token::PlainText("\u{0200c}".into()), // ZWNJ
|
||||
Token::UnicodeEmoji("\u{026a7}\u{0fe0f}".into()), // Trans symbol
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{0200d}\u{0200d}"),
|
||||
Token::Sequence(vec![
|
||||
Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
|
||||
Token::PlainText("\u{0200d}\u{0200d}\u{0200d}".into()), // ZWJ
|
||||
])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn xml_serialization() {
|
||||
assert_eq!(
|
||||
&to_xml_string(&parse_full("***nyaaa***")).unwrap(),
|
||||
r#"<mmm><b><i>nyaaa</i></b></mmm>"#
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&to_xml_string(&parse_full(
|
||||
"@natty $[spin.speed=0.5s 🥺]:cat_attack: <plain>test</plain>"
|
||||
))
|
||||
.unwrap(),
|
||||
r#"<mmm><mention name="natty" type="user"/> <fn name="spin" arg-speed="0.5s"><ue>🥺</ue></fn><ee>cat_attack</ee> test</mmm>"#
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&to_xml_string(&parse_full(
|
||||
"Ring Galaxy AM 0644 741 from Hubble\nCredits: AURA, STScI, J. Higdon, Cornell, ESA, #NASA\n#nature #space #astrophotography"
|
||||
))
|
||||
.unwrap(),
|
||||
r#"<mmm>Ring Galaxy AM 0644 741 from Hubble
|
||||
Credits: AURA, STScI, J. Higdon, Cornell, ESA, <hashtag>NASA</hashtag>
|
||||
<hashtag>nature</hashtag> <hashtag>space</hashtag> <hashtag>astrophotography</hashtag></mmm>"#
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&to_xml_string(&parse_full(
|
||||
r#"
|
||||
```js
|
||||
var x = undefined;
|
||||
``` "#
|
||||
))
|
||||
.unwrap(),
|
||||
"<mmm><code lang=\"js\">var x = undefined;</code></mmm>"
|
||||
);
|
||||
}
|
|
@ -0,0 +1,156 @@
|
|||
use std::io::{Cursor, Write};
|
||||
|
||||
use quick_xml::events::{BytesText, Event};
|
||||
|
||||
use crate::Token;
|
||||
|
||||
impl Token {
|
||||
fn write<T: Write>(&self, writer: &mut quick_xml::Writer<T>) -> quick_xml::Result<()> {
|
||||
match self {
|
||||
Token::PlainText(plain) => {
|
||||
writer.write_event(Event::Text(BytesText::new(plain.as_str())))?;
|
||||
}
|
||||
Token::Sequence(sequence) => {
|
||||
sequence.iter().try_for_each(|item| item.write(writer))?;
|
||||
}
|
||||
Token::Quote(inner) => {
|
||||
writer
|
||||
.create_element("quote")
|
||||
.write_inner_content(|w| inner.write(w))?;
|
||||
}
|
||||
Token::Small(inner) => {
|
||||
writer
|
||||
.create_element("small")
|
||||
.write_inner_content(|w| inner.write(w))?;
|
||||
}
|
||||
Token::Bold(inner) => {
|
||||
writer
|
||||
.create_element("b")
|
||||
.write_inner_content(|w| inner.write(w))?;
|
||||
}
|
||||
Token::Italic(inner) => {
|
||||
writer
|
||||
.create_element("i")
|
||||
.write_inner_content(|w| inner.write(w))?;
|
||||
}
|
||||
Token::Center(inner) => {
|
||||
writer
|
||||
.create_element("center")
|
||||
.write_inner_content(|w| inner.write(w))?;
|
||||
}
|
||||
Token::Strikethrough(inner) => {
|
||||
writer
|
||||
.create_element("s")
|
||||
.write_inner_content(|w| inner.write(w))?;
|
||||
}
|
||||
Token::PlainTag(plain) => {
|
||||
writer.write_event(Event::Text(BytesText::new(plain.as_str())))?;
|
||||
}
|
||||
Token::InlineCode(code) => {
|
||||
writer
|
||||
.create_element("inline-code")
|
||||
.write_text_content(BytesText::new(code))?;
|
||||
}
|
||||
Token::InlineMath(math) => {
|
||||
writer
|
||||
.create_element("inline-math")
|
||||
.write_text_content(BytesText::new(math))?;
|
||||
}
|
||||
Token::UrlRaw(url) => {
|
||||
writer
|
||||
.create_element("a")
|
||||
.with_attribute(("href", url.as_str()))
|
||||
.write_text_content(BytesText::new(url))?;
|
||||
}
|
||||
Token::UrlNoEmbed(url) => {
|
||||
writer
|
||||
.create_element("a")
|
||||
.with_attribute(("href", url.as_str()))
|
||||
.with_attribute(("embed", "false"))
|
||||
.write_text_content(BytesText::new(url))?;
|
||||
}
|
||||
Token::Link { label, href, embed } => {
|
||||
writer
|
||||
.create_element("a")
|
||||
.with_attribute(("href", href.as_str()))
|
||||
.with_attribute(("embed", if *embed { "true" } else { "false" }))
|
||||
.write_inner_content(|w| label.write(w))?;
|
||||
}
|
||||
Token::BlockCode { inner, lang } => {
|
||||
let mut ew = writer.create_element("code");
|
||||
|
||||
if let Some(language) = lang {
|
||||
ew = ew.with_attribute(("lang", language.as_str()));
|
||||
}
|
||||
|
||||
ew.write_text_content(BytesText::new(inner))?;
|
||||
}
|
||||
Token::BlockMath(math) => {
|
||||
writer
|
||||
.create_element("math")
|
||||
.write_text_content(BytesText::new(math))?;
|
||||
}
|
||||
Token::Function {
|
||||
inner,
|
||||
name,
|
||||
params,
|
||||
} => {
|
||||
let mut ew = writer
|
||||
.create_element("fn")
|
||||
.with_attribute(("name", name.as_str()));
|
||||
|
||||
for (k, v) in params {
|
||||
ew = ew
|
||||
.with_attribute((format!("arg-{k}").as_str(), v.as_deref().unwrap_or("")));
|
||||
}
|
||||
|
||||
ew.write_inner_content(|w| inner.write(w))?;
|
||||
}
|
||||
Token::Mention {
|
||||
name,
|
||||
host,
|
||||
mention_type,
|
||||
} => {
|
||||
let mut ew = writer
|
||||
.create_element("mention")
|
||||
.with_attribute(("name", name.as_str()))
|
||||
.with_attribute(("type", mention_type.into()));
|
||||
|
||||
if let Some(host) = host {
|
||||
ew = ew.with_attribute(("host", host.as_str()));
|
||||
}
|
||||
|
||||
ew.write_empty()?;
|
||||
}
|
||||
Token::UnicodeEmoji(text) => {
|
||||
writer
|
||||
.create_element("ue")
|
||||
.write_text_content(BytesText::new(text))?;
|
||||
}
|
||||
Token::ShortcodeEmoji { shortcode, host } => {
|
||||
let mut ew = writer.create_element("ee");
|
||||
|
||||
if let Some(host) = host {
|
||||
ew = ew.with_attribute(("host", host.as_str()));
|
||||
}
|
||||
|
||||
ew.write_text_content(BytesText::new(shortcode))?;
|
||||
}
|
||||
Token::Hashtag(tag) => {
|
||||
writer
|
||||
.create_element("hashtag")
|
||||
.write_text_content(BytesText::new(tag.as_str()))?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_xml_string(token: &Token) -> quick_xml::Result<String> {
|
||||
let mut writer = quick_xml::Writer::new(Cursor::new(Vec::new()));
|
||||
writer
|
||||
.create_element("mmm")
|
||||
.write_inner_content(|writer| token.write(writer))?;
|
||||
Ok(String::from_utf8(writer.into_inner().into_inner())?)
|
||||
}
|
Loading…
Reference in New Issue