Compare commits

..

4 Commits

Author SHA1 Message Date
Natty 216f4229fc
Merge branch 'development' 2023-10-26 19:29:43 +02:00
Natty c6a282c26e
MMM: Coverage and naming fix 2023-10-26 18:38:45 +02:00
Natty 91883c6b36
MMM: XML serialization and fixed block code parsing 2023-10-26 00:30:11 +02:00
Natty 6ed6066b1f
MMM: Reexport in the SDK 2023-10-25 19:45:59 +02:00
6 changed files with 295 additions and 29 deletions

41
Cargo.lock generated
View File

@ -603,6 +603,7 @@ dependencies = [
"cfg-if", "cfg-if",
"itoa", "itoa",
"ryu", "ryu",
"serde",
"static_assertions", "static_assertions",
] ]
@ -1507,6 +1508,22 @@ dependencies = [
"serde_json", "serde_json",
] ]
[[package]]
name = "magnetar_mmm_parser"
version = "0.2.1-alpha"
dependencies = [
"compact_str",
"either",
"emojis",
"nom",
"nom_locate",
"quick-xml",
"serde",
"strum",
"tracing",
"unicode-segmentation",
]
[[package]] [[package]]
name = "magnetar_nodeinfo" name = "magnetar_nodeinfo"
version = "0.2.1-alpha" version = "0.2.1-alpha"
@ -1521,6 +1538,7 @@ version = "0.2.1-alpha"
dependencies = [ dependencies = [
"chrono", "chrono",
"http", "http",
"magnetar_mmm_parser",
"magnetar_sdk_macros", "magnetar_sdk_macros",
"serde", "serde",
"serde_json", "serde_json",
@ -1640,19 +1658,6 @@ dependencies = [
"windows-sys", "windows-sys",
] ]
[[package]]
name = "mmm_parser"
version = "0.2.1-alpha"
dependencies = [
"compact_str",
"either",
"emojis",
"nom",
"nom_locate",
"tracing",
"unicode-segmentation",
]
[[package]] [[package]]
name = "nom" name = "nom"
version = "7.1.3" version = "7.1.3"
@ -2070,6 +2075,16 @@ dependencies = [
"syn 1.0.109", "syn 1.0.109",
] ]
[[package]]
name = "quick-xml"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33"
dependencies = [
"memchr",
"serde",
]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.32" version = "1.0.32"

View File

@ -43,6 +43,7 @@ miette = "5.9"
nom = "7" nom = "7"
nom_locate = "4" nom_locate = "4"
percent-encoding = "2.2" percent-encoding = "2.2"
quick-xml = "0.31"
redis = "0.23" redis = "0.23"
regex = "1.9" regex = "1.9"
reqwest = "0.11" reqwest = "0.11"

View File

@ -1,14 +1,21 @@
[package] [package]
name = "mmm_parser" name = "magnetar_mmm_parser"
version.workspace = true version.workspace = true
edition.workspace = true edition.workspace = true
license = "MIT OR Apache-2.0" license = "MIT OR Apache-2.0"
[features]
default = ["xml"]
xml = ["dep:quick-xml"]
[dependencies] [dependencies]
either = { workspace = true } either = { workspace = true }
emojis = { workspace = true } emojis = { workspace = true }
nom = { workspace = true } nom = { workspace = true }
nom_locate = { workspace = true } nom_locate = { workspace = true }
compact_str = { workspace = true } compact_str = { workspace = true, features = ["serde"] }
serde = { workspace = true, features = ["derive"] }
strum = { workspace = true, features = ["derive"] }
tracing = { workspace = true } tracing = { workspace = true }
unicode-segmentation = { workspace = true } unicode-segmentation = { workspace = true }
quick-xml = { workspace = "true", optional = true, features = ["serialize"] }

View File

@ -8,17 +8,24 @@ use nom::character::complete::{
}; };
use nom::combinator::{eof, fail, map, not, opt, peek, recognize}; use nom::combinator::{eof, fail, map, not, opt, peek, recognize};
use nom::error::ErrorKind; use nom::error::ErrorKind;
use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1}; use nom::multi::{many0_count, many1, many1_count, many_till, separated_list1};
use nom::sequence::tuple; use nom::sequence::tuple;
use nom::{IResult, Offset, Parser, Slice}; use nom::{IResult, Offset, Parser, Slice};
use nom_locate::LocatedSpan; use nom_locate::LocatedSpan;
use quick_xml::events::{BytesText, Event};
use serde::{Deserialize, Serialize};
use std::collections::HashMap; use std::collections::HashMap;
use std::convert::{identity, Infallible}; use std::convert::{identity, Infallible};
use std::io::{Cursor, Write};
use std::marker::PhantomData; use std::marker::PhantomData;
use strum::IntoStaticStr;
use tracing::trace; use tracing::trace;
use unicode_segmentation::UnicodeSegmentation; use unicode_segmentation::UnicodeSegmentation;
#[derive(Copy, Clone, Debug, Eq, PartialEq)] #[derive(Copy, Clone, Debug, Eq, PartialEq, Deserialize, Serialize, IntoStaticStr)]
// The alternative would be to implement a serde serializer for this one enum, but that's disgusting
#[strum(serialize_all = "snake_case")]
#[serde(rename_all = "snake_case")]
pub enum MentionType { pub enum MentionType {
Community, Community,
User, User,
@ -33,7 +40,7 @@ impl MentionType {
} }
} }
#[derive(Clone, Debug, Eq, PartialEq)] #[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)]
pub enum Token { pub enum Token {
PlainText(CompactString), PlainText(CompactString),
Sequence(Vec<Token>), Sequence(Vec<Token>),
@ -216,6 +223,161 @@ impl Token {
other => other.clone(), other => other.clone(),
} }
} }
fn write<T: Write>(&self, writer: &mut quick_xml::Writer<T>) -> quick_xml::Result<()> {
match self {
Token::PlainText(plain) => {
writer.write_event(Event::Text(BytesText::new(plain.as_str())))?;
}
Token::Sequence(sequence) => {
sequence.iter().try_for_each(|item| item.write(writer))?;
}
Token::Quote(inner) => {
writer
.create_element("quote")
.write_inner_content(|w| inner.write(w))?;
}
Token::Small(inner) => {
writer
.create_element("small")
.write_inner_content(|w| inner.write(w))?;
}
Token::BoldItalic(inner) => {
writer
.create_element("b")
.write_inner_content::<_, quick_xml::Error>(|w| {
w.create_element("i")
.write_inner_content(|w| inner.write(w))?;
Ok(())
})?;
}
Token::Bold(inner) => {
writer
.create_element("b")
.write_inner_content(|w| inner.write(w))?;
}
Token::Italic(inner) => {
writer
.create_element("i")
.write_inner_content(|w| inner.write(w))?;
}
Token::Center(inner) => {
writer
.create_element("center")
.write_inner_content(|w| inner.write(w))?;
}
Token::Strikethrough(inner) => {
writer
.create_element("s")
.write_inner_content(|w| inner.write(w))?;
}
Token::PlainTag(plain) => {
writer.write_event(Event::Text(BytesText::new(plain.as_str())))?;
}
Token::InlineCode(code) => {
writer
.create_element("inline-code")
.write_text_content(BytesText::new(code))?;
}
Token::InlineMath(math) => {
writer
.create_element("inline-math")
.write_text_content(BytesText::new(math))?;
}
Token::UrlRaw(url) => {
writer
.create_element("a")
.with_attribute(("href", url.as_str()))
.write_text_content(BytesText::new(url))?;
}
Token::UrlNoEmbed(url) => {
writer
.create_element("a")
.with_attribute(("href", url.as_str()))
.with_attribute(("embed", "false"))
.write_text_content(BytesText::new(url))?;
}
Token::Link { label, href, embed } => {
writer
.create_element("a")
.with_attribute(("href", href.as_str()))
.with_attribute(("embed", if *embed { "true" } else { "false" }))
.write_inner_content(|w| label.write(w))?;
}
Token::BlockCode { inner, lang } => {
let mut ew = writer.create_element("code");
if let Some(language) = lang {
ew = ew.with_attribute(("lang", language.as_str()));
}
ew.write_text_content(BytesText::new(inner))?;
}
Token::BlockMath(math) => {
writer
.create_element("math")
.write_text_content(BytesText::new(math))?;
}
Token::Function {
inner,
name,
params,
} => {
let mut ew = writer
.create_element("fn")
.with_attribute(("name", name.as_str()));
for (k, v) in params {
ew = ew
.with_attribute((format!("arg-{k}").as_str(), v.as_deref().unwrap_or("")));
}
ew.write_inner_content(|w| inner.write(w))?;
}
Token::Mention {
name,
host,
mention_type,
} => {
let mut ew = writer
.create_element("mention")
.with_attribute(("name", name.as_str()))
.with_attribute(("type", mention_type.into()));
if let Some(host) = host {
ew = ew.with_attribute(("host", host.as_str()));
}
ew.write_empty()?;
}
Token::UnicodeEmoji(text) => {
writer
.create_element("ue")
.write_text_content(BytesText::new(text))?;
}
Token::ShortcodeEmoji(shortcode) => {
writer
.create_element("ee")
.write_text_content(BytesText::new(shortcode))?;
}
Token::Hashtag(tag) => {
writer
.create_element("hashtag")
.with_attribute(("tag", tag.as_str()));
}
}
Ok(())
}
}
pub fn to_xml_string(token: &Token) -> quick_xml::Result<String> {
let mut writer = quick_xml::Writer::new(Cursor::new(Vec::new()));
writer
.create_element("mmm")
.write_inner_content(|writer| token.write(writer))?;
Ok(String::from_utf8(writer.into_inner().into_inner())?)
} }
#[derive(Debug, Default, Copy, Clone)] #[derive(Debug, Default, Copy, Clone)]
@ -315,11 +477,11 @@ fn spliced<'a>(
type NE<E> = nom::Err<E>; type NE<E> = nom::Err<E>;
type NomError<'x> = nom::error::Error<Span<'x>>; type NomError<'x> = nom::error::Error<Span<'x>>;
let quote_span = Span::new_extra( let spliced_span = Span::new_extra(
&combined, &combined,
segments.first().map_or(SpanMeta::new(0), |s| s.extra), segments.first().map_or(SpanMeta::new(0), |s| s.extra),
); );
let (input, inner) = match func(quote_span) { let (input, inner) = match func(spliced_span) {
Ok(s) => s, Ok(s) => s,
Err(e) => { Err(e) => {
return match e { return match e {
@ -689,7 +851,7 @@ impl Context {
let (input, _) = delim(input)?; let (input, _) = delim(input)?;
let (input, lang) = opt(map( let (input, lang) = opt(map(
recognize(many1(tuple((not(delim), not_line_ending)))), recognize(many1(tuple((not(delim), not(line_ending), anychar)))),
Span::into_fragment, Span::into_fragment,
))(input)?; ))(input)?;
let (input, _) = line_ending(input)?; let (input, _) = line_ending(input)?;
@ -704,8 +866,10 @@ impl Context {
let (input, _) = line_ending(input)?; let (input, _) = line_ending(input)?;
let (input, _) = delim(input)?; let (input, _) = delim(input)?;
let (input, _) = many0(space)(input)?; // Trailing whitespace after the triple backtick
let (input, _) = not(not(line_ending))(input)?; let (input, _) = opt(space1_unicode)(input)?;
// If we got this far, the next character should be a line ending
let (input, _) = not(tuple((not(line_ending), anychar)))(input)?;
let (input, _) = opt(line_ending)(input)?; let (input, _) = opt(line_ending)(input)?;
Ok(( Ok((
@ -737,8 +901,10 @@ impl Context {
let (input, _) = opt(line_ending)(input)?; let (input, _) = opt(line_ending)(input)?;
let (input, _) = end(input)?; let (input, _) = end(input)?;
let (input, _) = many0(space)(input)?; // Trailing whitespace after the closing delim
let (input, _) = not(not_line_ending)(input)?; let (input, _) = opt(space1_unicode)(input)?;
// If we got this far, the next character should be a line ending
let (input, _) = not(tuple((not(line_ending), anychar)))(input)?;
let (input, _) = opt(line_ending)(input)?; let (input, _) = opt(line_ending)(input)?;
Ok(( Ok((
@ -874,7 +1040,7 @@ impl Context {
)))(input) )))(input)
}; };
let param_value = recognize(many1_count(alt(( let arg_value = recognize(many1_count(alt((
alphanumeric1, alphanumeric1,
tag("."), tag("."),
tag("-"), tag("-"),
@ -883,7 +1049,7 @@ impl Context {
let (input, func_name) = map(func_ident, Span::into_fragment)(input)?; let (input, func_name) = map(func_ident, Span::into_fragment)(input)?;
let arg = tuple((func_ident, opt(tuple((tag("="), param_value))))); let arg = tuple((func_ident, opt(tuple((tag("="), arg_value)))));
let (input, args) = let (input, args) =
opt(tuple((one_char('.'), separated_list1(one_char(','), arg))))(input)?; opt(tuple((one_char('.'), separated_list1(one_char(','), arg))))(input)?;
@ -1427,7 +1593,7 @@ impl Context {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use crate::{Context, Span, SpanMeta, Token, DEFAULT_DEPTH_LIMIT}; use crate::{to_xml_string, Context, Span, SpanMeta, Token, DEFAULT_DEPTH_LIMIT};
use nom::bytes::complete::tag; use nom::bytes::complete::tag;
use std::collections::HashMap; use std::collections::HashMap;
@ -1583,10 +1749,58 @@ mod test {
"bold italic".into() "bold italic".into()
))))) )))))
); );
assert_eq!(
parse_full("~~*hello\nworld*"),
Token::PlainText("~~*hello\nworld*".into())
)
} }
#[test] #[test]
fn parse_complex() { fn parse_complex() {
assert_eq!(
parse_full(r"\( nya^3 \)"),
Token::InlineMath(" nya^3 ".to_string())
);
assert_eq!(
parse_full("\\( nya^3 \n \\)"),
Token::PlainText("\\( nya^3 \n \\)".into())
);
assert_eq!(
parse_full(r"`AbstractProxyFactoryBean`"),
Token::InlineCode("AbstractProxyFactoryBean".to_string())
);
assert_eq!(
parse_full("`let x = \n 5;`"),
Token::PlainText("`let x = \n 5;`".into())
);
assert_eq!(
parse_full(
r#"
```js
var x = undefined;
```"#
),
Token::BlockCode {
lang: Some("js".to_string()),
inner: "var x = undefined;".to_string(),
}
);
assert_eq!(
parse_full(
r"
\[
a^2 + b^2 = c^2
\]"
),
Token::BlockMath("a^2 + b^2 = c^2".to_string())
);
assert_eq!( assert_eq!(
parse_full( parse_full(
r#"<center>centered r#"<center>centered
@ -2004,4 +2218,31 @@ text</center>"#
]) ])
); );
} }
#[test]
fn xml_serialization() {
assert_eq!(
&to_xml_string(&parse_full("***nyaaa***")).unwrap(),
r#"<mmm><b><i>nyaaa</i></b></mmm>"#
);
assert_eq!(
&to_xml_string(&parse_full(
"@natty $[spin.speed=0.5s 🥺]:cat_attack: <plain>test</plain>"
))
.unwrap(),
r#"<mmm><mention name="natty" type="user"/> <fn name="spin" arg-speed="0.5s"><ue>🥺</ue></fn><ee>cat_attack</ee> test</mmm>"#
);
assert_eq!(
&to_xml_string(&parse_full(
r#"
```js
var x = undefined;
``` "#
))
.unwrap(),
"<mmm><code lang=\"js\">var x = undefined;</code></mmm>"
);
}
} }

View File

@ -5,6 +5,7 @@ edition.workspace = true
license = "MIT OR Apache-2.0" license = "MIT OR Apache-2.0"
[dependencies] [dependencies]
magnetar_mmm_parser = { path = "../magnetar_mmm_parser" }
magnetar_sdk_macros = { path = "./macros" } magnetar_sdk_macros = { path = "./macros" }
chrono = { workspace = true, features = ["serde"] } chrono = { workspace = true, features = ["serde"] }
@ -15,4 +16,4 @@ serde_json = { workspace = true }
ts-rs = { workspace = true, features = ["chrono", "chrono-impl"] } ts-rs = { workspace = true, features = ["chrono", "chrono-impl"] }
unicode-segmentation = { workspace = true } unicode-segmentation = { workspace = true }

View File

@ -1,6 +1,7 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use ts_rs::TS; use ts_rs::TS;
pub use magnetar_mmm_parser as mmm;
pub mod endpoints; pub mod endpoints;
pub mod types; pub mod types;
pub mod util_types; pub mod util_types;