Switch towards recursive ascent

2024-12-08 00:04:48 +01:00 · 2024-12-08 00:04:48 +01:00 · 9f62c72f29
parent f71429bfe0
commit 9f62c72f29
8 changed files with 633 additions and 1091 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -435,12 +435,6 @@ dependencies = [
 "syn 1.0.109",
 ]
 [[package]]
 name = "bytecount"
 version = "0.6.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce"
 [[package]]
 name = "byteorder"
 version = "1.5.0"
@ -632,7 +626,6 @@ dependencies = [
 "itoa",
 "rustversion",
 "ryu",
 "serde",
 "static_assertions",
 ]
@ -2054,13 +2047,11 @@ dependencies = [
 name = "magnetar_mmm_parser"
 version = "0.3.0-alpha"
 dependencies = [
 "compact_str",
 "either",
 "emojis",
 "nom",
 "nom_locate",
 "quick-xml",
 "serde",
 "smallvec",
 "strum",
 "tracing",
 "unicode-segmentation",
@ -2325,17 +2316,6 @@ dependencies = [
 "minimal-lexical",
 ]
 [[package]]
 name = "nom_locate"
 version = "4.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1e3c83c053b0713da60c5b8de47fe8e494fe3ece5267b2f23090a07a053ba8f3"
 dependencies = [
 "bytecount",
 "memchr",
 "nom",
 ]
 [[package]]
 name = "nu-ansi-term"
 version = "0.46.0"
--- a/magnetar_mmm_parser/Cargo.toml
+++ b/magnetar_mmm_parser/Cargo.toml
@ -11,10 +11,8 @@ xml = ["dep:quick-xml"]
 [dependencies]
 either = { workspace = true }
 emojis = { workspace = true }
 nom = { workspace = true }
 nom_locate = { workspace = true }
 compact_str = { workspace = true, features = ["serde"] }
 serde = { workspace = true, features = ["derive"] }
 smallvec = { workspace = true }
 strum = { workspace = true, features = ["derive"] }
 tracing = { workspace = true }
 unicode-segmentation = { workspace = true }
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
--- a/magnetar_mmm_parser/src/output_types.rs
+++ b/magnetar_mmm_parser/src/output_types.rs
@ -0,0 +1,261 @@
 use either::Either;
 use serde::{Deserialize, Serialize};
 use std::borrow::Cow;
 use std::collections::HashMap;
 use strum::IntoStaticStr;
 #[derive(Debug, Clone, Deserialize, Serialize, Eq, PartialEq)]
 pub enum Token<'a> {
    PlainText(Cow<'a, str>),
    Sequence(Vec<Token<'a>>),
    Quote(Vec<Token<'a>>),
    Small(Vec<Token<'a>>),
    BoldItalic(Vec<Token<'a>>),
    Bold(Vec<Token<'a>>),
    Italic(Vec<Token<'a>>),
    Center(Vec<Token<'a>>),
    Strikethrough(Vec<Token<'a>>),
    PlainTag(String),
    InlineCode(String),
    InlineMath(String),
    UrlRaw(String),
    UrlNoEmbed(String),
    Link {
        label: Vec<Token<'a>>,
        href: String,
    },
    LinkNoEmbed {
        label: Vec<Token<'a>>,
        href: String,
    },
    BlockCode {
        lang: Option<String>,
        inner: String,
    },
    BlockMath(String),
    Function {
        name: String,
        params: HashMap<String, Option<String>>,
        inner: Vec<Token<'a>>,
    },
    Mention {
        name: String,
        host: Option<String>,
        mention_type: MentionType,
    },
    UnicodeEmoji(String),
    ShortcodeEmoji {
        shortcode: String,
        host: Option<String>,
    },
    Hashtag(String),
 }
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Deserialize, Serialize, IntoStaticStr)]
 // The alternative would be to implement a serde serializer for this one enum, but that's disgusting
 #[strum(serialize_all = "snake_case")]
 #[serde(rename_all = "snake_case")]
 pub enum MentionType {
    Community,
    User,
    MatrixUser,
 }
 impl MentionType {
    pub fn to_char(&self) -> char {
        match self {
            MentionType::Community => '!',
            MentionType::User => '@',
            MentionType::MatrixUser => ':',
        }
    }
    pub fn separator(&self) -> char {
        match self {
            MentionType::Community | MentionType::User => '@',
            MentionType::MatrixUser => ':',
        }
    }
 }
 impl Token<'_> {
    fn str_content_left(&self) -> Option<&str> {
        match self {
            Token::PlainText(text) => Some(text.as_ref()),
            Token::Sequence(tokens) => tokens.first().and_then(Token::str_content_left),
            Token::Quote(inner) => inner.str_content_left(),
            Token::Small(inner) => inner.str_content_left(),
            Token::Bold(inner) => inner.str_content_left(),
            Token::Italic(inner) => inner.str_content_left(),
            Token::Center(inner) => inner.str_content_left(),
            Token::Strikethrough(inner) => inner.str_content_left(),
            Token::PlainTag(tag) => Some(tag.as_ref()),
            Token::UrlRaw(url) => Some(url.as_ref()),
            Token::UrlNoEmbed(url) => Some(url.as_ref()),
            Token::Link { label, .. } => label.str_content_left(),
            Token::Function { inner, .. } => inner.str_content_left(),
            Token::Mention { name, .. } => Some(name.as_ref()),
            Token::UnicodeEmoji(code) => Some(code.as_ref()),
            Token::Hashtag(tag) => Some(tag.as_ref()),
            _ => None,
        }
    }
    fn str_content_right(&self) -> Option<&str> {
        match self {
            Token::PlainText(text) => Some(text.as_ref()),
            Token::Sequence(tokens) => tokens.last().and_then(Token::str_content_right),
            Token::Quote(inner) => inner.str_content_right(),
            Token::Small(inner) => inner.str_content_right(),
            Token::Bold(inner) => inner.str_content_right(),
            Token::Italic(inner) => inner.str_content_right(),
            Token::Center(inner) => inner.str_content_right(),
            Token::Strikethrough(inner) => inner.str_content_right(),
            Token::PlainTag(tag) => Some(tag.as_ref()),
            Token::UrlRaw(url) => Some(url.as_ref()),
            Token::UrlNoEmbed(url) => Some(url.as_ref()),
            Token::Link { label, .. } => label.str_content_right(),
            Token::Function { inner, .. } => inner.str_content_right(),
            Token::Mention { name, .. } => Some(name.as_ref()),
            Token::UnicodeEmoji(code) => Some(code.as_ref()),
            Token::Hashtag(tag) => Some(tag.as_ref()),
            _ => None,
        }
    }
    fn inner(&self) -> Token {
        match self {
            plain @ Token::PlainText(_) => plain.clone(),
            sequence @ Token::Sequence(_) => sequence.clone(),
            Token::Quote(inner) => inner.inner(),
            Token::Small(inner) => inner.inner(),
            Token::Bold(inner) => inner.inner(),
            Token::Italic(inner) => inner.inner(),
            Token::Center(inner) => inner.inner(),
            Token::Strikethrough(inner) => inner.inner(),
            Token::PlainTag(text) => Token::PlainText(text.clone().into()),
            Token::InlineCode(code) => Token::PlainText(code.clone().into()),
            Token::InlineMath(math) => Token::PlainText(math.clone().into()),
            Token::UrlRaw(url) => Token::PlainText(url.clone().into()),
            Token::UrlNoEmbed(url) => Token::PlainText(url.clone().into()),
            Token::Link { label, .. } => label.inner(),
            Token::BlockCode { inner, .. } => Token::PlainText(inner.clone().into()),
            Token::BlockMath(math) => Token::PlainText(math.clone().into()),
            Token::Function { inner, .. } => inner.inner(),
            Token::Mention { name, .. } => Token::PlainText(name.clone().into()),
            Token::UnicodeEmoji(code) => Token::PlainText(code.clone().into()),
            Token::ShortcodeEmoji { shortcode, .. } => Token::PlainText(shortcode.clone().into()),
            Token::Hashtag(tag) => Token::PlainText(tag.clone().into()),
        }
    }
    fn merged(&self) -> Token {
        match self {
            Token::Sequence(tokens) => {
                let tokens_multi = tokens.iter().fold(Vec::new(), |mut acc, tok| {
                    if let Some(Token::PlainText(last)) = acc.last_mut() {
                        if let Token::PlainText(tok_text) = tok {
                            *last += tok_text.as_ref();
                            return acc;
                        }
                    }
                    if let Token::Sequence(seq) = tok {
                        let items = seq.iter().map(Token::merged).flat_map(|t| match t {
                            Token::Sequence(seq) => Either::Left(seq.into_iter()),
                            other => Either::Right(std::iter::once(other)),
                        });
                        for item in items {
                            if let Some(Token::PlainText(last)) = acc.last_mut() {
                                if let Token::PlainText(tok_text) = item {
                                    *last += tok_text.as_ref();
                                    continue;
                                }
                            }
                            acc.push(item);
                        }
                        return acc;
                    }
                    acc.push(tok.merged());
                    acc
                });
                if tokens_multi.len() == 1 {
                    return tokens_multi.into_iter().next().unwrap();
                }
                Token::Sequence(tokens_multi)
            }
            Token::Quote(inner) => Token::Quote(Box::new(inner.merged())),
            Token::Small(inner) => Token::Small(Box::new(inner.merged())),
            Token::Bold(inner) => Token::Bold(Box::new(inner.merged())),
            Token::Italic(inner) => Token::Italic(Box::new(inner.merged())),
            Token::Center(inner) => Token::Center(Box::new(inner.merged())),
            Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.merged())),
            Token::Link { label, href } => Token::Link {
                label: Box::new(label.merged()),
                href: href.clone(),
            },
            Token::LinkNoEmbed { label, href } => Token::LinkNoEmbed {
                label: Box::new(label.merged()),
                href: href.clone(),
            },
            Token::Function {
                name,
                params,
                inner,
            } => Token::Function {
                name: name.clone(),
                params: params.clone(),
                inner: Box::new(inner.merged()),
            },
            other => other.clone(),
        }
    }
    pub fn walk_map_collect<T>(&self, func: &impl Fn(&Token) -> Option<T>, out: &mut Vec<T>) {
        if let Some(v) = func(self) {
            out.push(v)
        }
        match self {
            Token::Sequence(items) => {
                items.iter().for_each(|tok| tok.walk_map_collect(func, out));
            }
            Token::Quote(inner)
            | Token::Small(inner)
            | Token::Bold(inner)
            | Token::Italic(inner)
            | Token::Center(inner)
            | Token::Function { inner, .. }
            | Token::Link { label: inner, .. }
            | Token::Strikethrough(inner) => inner.walk_map_collect(func, out),
            _ => {}
        }
    }
    pub fn walk_speech_transform(&mut self, func: &impl Fn(&mut Cow<'_, str>)) {
        match self {
            Token::Sequence(items) => {
                items
                    .iter_mut()
                    .for_each(|tok| tok.walk_speech_transform(func));
            }
            Token::Small(inner)
            | Token::Bold(inner)
            | Token::Italic(inner)
            | Token::Center(inner)
            | Token::Function { inner, .. }
            | Token::Strikethrough(inner) => inner.walk_speech_transform(func),
            Token::PlainText(text) => func(text),
            _ => {}
        }
    }
 }
--- a/magnetar_mmm_parser/src/parser.rs
+++ b/magnetar_mmm_parser/src/parser.rs
@ -0,0 +1,157 @@
 use crate::types::{Effect, Input, Parser, ParserCont, ParserRet, State};
 fn line_start<'a>(
    state: &mut State,
    inp: &mut impl Input<'a>,
    _output: &'_ mut impl FnMut(Effect<'a>),
    cont: impl ParserCont,
 ) -> ParserRet {
    match inp.view().as_bytes() {
        [b'>', b' ', ..] => cont.continue_with2((line_start, quote)),
        [b'`', b'`', b'`', ..] => cont.continue_with(CodeBlock {}),
        [b'\\', b'[', ..] => cont.continue_with(BlockMath {}),
        [b'<', b'c', b'e', b'n', b't', b'e', b'r', b'>', ..] => cont.continue_with2((inline, center_tag_end)),
        _ => cont.continue_with(inline)
    }
 }
 fn inline<'a>(
    state: &mut State,
    inp: &mut impl Input<'a>,
    _output: &'_ mut impl FnMut(Effect<'a>),
    cont: impl ParserCont,
 ) -> ParserRet {
    match inp.view().as_bytes() {
        [b'\n', ..] => return cont.continue_with(line_start),
        [b'<', b'b', b'>', ..] => return cont.continue_with(inline),
        [b'<', b's', b'>', ..] => return cont.continue_with(inline),
        [b'<', b'i', b'>', ..] => return cont.continue_with(inline),
        [b'<', b'p', b'l', b'a', b'i', b'n', b'>', ..] => return cont.continue_with(inline),
        [b'<', b's', b'm', b'a', b'l', b'l', b'>', ..] => return cont.continue_with(inline),
        [b'*', b'*', ..] => return cont.continue_with(inline),
        [b'_', b'_', ..] => return cont.continue_with(inline),
        [b'*', ..] => return cont.continue_with(inline),
        [b'_', ..] => return cont.continue_with(inline),
        [b'~', b'~', ..] => return cont.continue_with(inline),
        [b'`', ..] => return cont.continue_with(inline),
        [b'\\', b'(', ..] => return cont.continue_with(inline),
    };
 }
 fn text_or_emoji<'a>(
    state: &mut State,
    input: &mut impl Input<'a>,
    _output: &'_ mut impl FnMut(Effect<'a>),
    cont: impl ParserCont,
 ) -> ParserRet {
    let Some(view) = input.next() else {
        return;
    };
    let emoji_str = view.trim_end_matches(['\u{200c}', '\u{200d}']);
    if let Some(_) = emojis::get(emoji_str) {
        output(Effect::Output(emoji_str));
        return;
    };
    output(Effect::Output(view));
 }
 fn block_quote_end<'a>(
    state: &mut State,
    inp: &mut impl Input<'a>,
    _output: &'_ mut impl FnMut(Effect<'a>),
    cont: impl ParserCont,
 ) -> ParserRet {}
 fn code_block_end<'a>(
    state: &mut State,
    inp: &mut impl Input<'a>,
    _output: &'_ mut impl FnMut(Effect<'a>),
    cont: impl ParserCont,
 ) -> ParserRet {}
 fn block_math_end<'a>(
    state: &mut State,
    inp: &mut impl Input<'a>,
    _output: &'_ mut impl FnMut(Effect<'a>),
    cont: impl ParserCont,
 ) -> ParserRet {}
 fn center_tag_end<'a>(
    state: &mut State,
    inp: &mut impl Input<'a>,
    _output: &'_ mut impl FnMut(Effect<'a>),
    cont: impl ParserCont,
 ) -> ParserRet {}
 #[derive(Copy, Clone)]
 enum TagInlineKind {
    TagSmall,
    TagPlain,
    TagBold,
    TagItalic,
    TagStrikethrough,
 }
 struct TagInline {
    kind: TagInlineKind,
 }
 impl Parser for TagInline {}
 fn inline_math_end<'a>(
    state: &mut State,
    inp: &mut impl Input<'a>,
    _output: &'_ mut impl FnMut(Effect<'a>),
    cont: impl ParserCont,
 ) -> ParserRet {}
 fn inline_code_end<'a>(
    state: &mut State,
    inp: &mut impl Input<'a>,
    _output: &'_ mut impl FnMut(Effect<'a>),
    cont: impl ParserCont,
 ) -> ParserRet {}
 struct Url {}
 impl Parser for Url {
    fn take<'a>(
        &mut self,
        state: State,
        input: &mut impl Input<'a>,
        output: &'_ mut impl FnMut(Effect<'a>),
    ) -> impl Parser {}
 }
 #[inline]
 fn url_chars_base<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
    alt((
        recognize(tuple((
            tag("["),
            many_till(
                self.increase_nesting(self.partial_span(Self::url_chars_base)),
                tag("]"),
            ),
        ))),
        recognize(tuple((
            tag("("),
            many_till(
                self.increase_nesting(self.partial_span(Self::url_chars_base)),
                tag(")"),
            ),
        ))),
        recognize(tuple((
            not(satisfy(char::is_control)),
            not(satisfy(char::is_whitespace)),
            not(one_of(")]>")),
            anychar,
        ))),
    ))(input)
 }
--- a/magnetar_mmm_parser/src/test.rs
+++ b/magnetar_mmm_parser/src/test.rs
@ -1,17 +1,8 @@
 #![cfg(test)]
 use std::collections::HashMap;
-use nom::bytes::complete::tag;
+use crate::output_types::{MentionType, Token};
-
+use crate::{parse_full, xml_write::to_xml_string};
 use crate::{xml_write::to_xml_string, Context, Span, SpanMeta, Token, DEFAULT_DEPTH_LIMIT};
 fn parse_full(string: &str) -> Token {
    Context::default()
        .full(Span::new_extra(string, SpanMeta::default()))
        .unwrap()
        .1
        .merged()
 }
 #[test]
 fn parse_empty() {
@ -27,9 +18,9 @@ fn parse_url_chars() {
            "https://en.wikipedia.org/wiki/Sandbox_(computer_security))",
            SpanMeta::default(),
        ))
-        .unwrap()
+            .unwrap()
-        .1
+            .1
-        .into_fragment(),
+            .into_fragment(),
        "https://en.wikipedia.org/wiki/Sandbox_(computer_security)"
    );
@ -60,9 +51,9 @@ fn parse_url_chars() {
            "https://cs.wikipedia.org/wiki/Among Us  )",
            SpanMeta::default(),
        ))
-        .unwrap()
+            .unwrap()
-        .1
+            .1
-        .into_fragment(),
+            .into_fragment(),
        "https://cs.wikipedia.org/wiki/Among Us"
    );
@ -71,9 +62,9 @@ fn parse_url_chars() {
            "https://en.wikipedia.org/wiki/Among Us  )",
            SpanMeta::default(),
        ))
-        .unwrap()
+            .unwrap()
-        .1
+            .1
-        .into_fragment(),
+            .into_fragment(),
        "https://en.wikipedia.org/wiki/Among"
    );
 }
@ -82,17 +73,17 @@ fn parse_url_chars() {
 fn parse_formatting() {
    assert_eq!(
        parse_full(r#"~~stikethrough~~"#),
-        Token::Strikethrough(Box::new(Token::PlainText("stikethrough".into()))),
+        Token::Strikethrough(vec![Token::PlainText("stikethrough".into())]),
    );
    assert_eq!(
        parse_full(r#"**bold**"#),
-        Token::Bold(Box::new(Token::PlainText("bold".into()))),
+        Token::Bold(vec![Token::PlainText("bold".into())]),
    );
    assert_eq!(
        parse_full(r#"*italic*"#),
-        Token::Italic(Box::new(Token::PlainText("italic".into()))),
+        Token::Italic(vec![Token::PlainText("italic".into())]),
    );
    assert_eq!(
@ -109,7 +100,7 @@ fn parse_formatting() {
        parse_full("intra*word*italic"),
        Token::Sequence(vec![
            Token::PlainText("intra".into()),
-            Token::Italic(Box::new(Token::PlainText("word".into()))),
+            Token::Italic(vec![Token::PlainText("word".into())]),
            Token::PlainText("italic".into()),
        ])
    );
@ -123,13 +114,13 @@ fn parse_formatting() {
        parse_full(r#"long text with a *footnote <b>text</b>"#),
        Token::Sequence(vec![
            Token::PlainText("long text with a *footnote ".into()),
-            Token::Bold(Box::new(Token::PlainText("text".into()))),
+            Token::Bold(vec![Token::PlainText("text".into())]),
        ])
    );
    assert_eq!(
        parse_full(r#"*"italic"*"#),
-        Token::Italic(Box::new(Token::PlainText("\"italic\"".into())))
+        Token::Italic(vec![Token::PlainText("\"italic\"".into())])
    );
    assert_eq!(
@ -161,23 +152,23 @@ fn parse_formatting() {
    assert_eq!(
        parse_full(r#"***bold italic***"#),
-        Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
+        Token::Bold(vec![Token::Italic(vec![Token::PlainText(
            "bold italic".into()
-        )))))
+        )])])
    );
    assert_eq!(
        parse_full(r#"<b><i>bold italic</i></b>"#),
-        Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
+        Token::Bold(vec![Token::Italic(vec![Token::PlainText(
            "bold italic".into()
-        )))))
+        )])])
    );
    assert_eq!(
        parse_full("~~*hello\nworld*"),
        Token::Sequence(vec![
            Token::PlainText("~~".into()),
-            Token::Italic(Box::new(Token::PlainText("hello\nworld".into()))),
+            Token::Italic(vec![Token::PlainText("hello\nworld".into())]),
        ])
    )
 }
@ -188,7 +179,7 @@ fn parse_flanking() {
        parse_full(r#"aaa*iii*bbb"#),
        Token::Sequence(vec![
            Token::PlainText("aaa".into()),
-            Token::Italic(Box::new(Token::PlainText("iii".into()))),
+            Token::Italic(vec![Token::PlainText("iii".into())]),
            Token::PlainText("bbb".into()),
        ])
    );
@ -202,33 +193,33 @@ fn parse_flanking() {
        parse_full("aaa\n_iii_\nbbb"),
        Token::Sequence(vec![
            Token::PlainText("aaa\n".into()),
-            Token::Italic(Box::new(Token::PlainText("iii".into()))),
+            Token::Italic(vec![Token::PlainText("iii".into())]),
            Token::PlainText("\nbbb".into()),
        ])
    );
    assert_eq!(
        parse_full(r#"*iii*"#),
-        Token::Italic(Box::new(Token::PlainText("iii".into())))
+        Token::Italic(vec![Token::PlainText("iii".into())])
    );
    assert_eq!(
        parse_full(r#"_iii_"#),
-        Token::Italic(Box::new(Token::PlainText("iii".into())))
+        Token::Italic(vec![Token::PlainText("iii".into())])
    );
    assert_eq!(
        parse_full(r#"aaa*iii*"#),
        Token::Sequence(vec![
            Token::PlainText("aaa".into()),
-            Token::Italic(Box::new(Token::PlainText("iii".into()))),
+            Token::Italic(vec![Token::PlainText("iii".into())]),
        ])
    );
    assert_eq!(
        parse_full(r#"*iii*bbb"#),
        Token::Sequence(vec![
-            Token::Italic(Box::new(Token::PlainText("iii".into()))),
+            Token::Italic(vec![Token::PlainText("iii".into())]),
            Token::PlainText("bbb".into()),
        ])
    );
@ -309,12 +300,12 @@ a^2 + b^2 = c^2
    🦋🏳️‍⚧️
    text</center>"#
        ),
-        Token::Center(Box::new(Token::Sequence(vec![
+        Token::Center(vec![
            Token::PlainText("centered\n".into()),
            Token::UnicodeEmoji("🦋".into()),
            Token::UnicodeEmoji("🏳️‍⚧️".into()),
            Token::PlainText("\ntext".into()),
-        ])))
+        ])
    );
    assert_eq!(
@ -323,11 +314,11 @@ a^2 + b^2 = c^2
 > 👩🏽‍🤝‍👩🏼
 > text</center>"#
        ),
-        Token::Quote(Box::new(Token::Center(Box::new(Token::Sequence(vec![
+        Token::Quote(vec![Token::Center(vec![
            Token::PlainText("centered\n".into()),
            Token::UnicodeEmoji("👩🏽‍🤝‍👩🏼".into()),
            Token::PlainText("\ntext".into())
-        ]))))),
+        ])]),
    );
    assert_eq!(
@ -335,11 +326,11 @@ a^2 + b^2 = c^2
        Token::Function {
            name: "x2".into(),
            params: HashMap::new(),
-            inner: Box::new(Token::Sequence(vec![
+            inner: vec![
                Token::Function {
                    name: "sparkle".into(),
                    params: HashMap::new(),
-                    inner: Box::new(Token::UnicodeEmoji("🥺".into())),
+                    inner: vec![Token::UnicodeEmoji("🥺".into())],
                },
                Token::UnicodeEmoji("💜".into()),
                Token::Function {
@ -350,10 +341,10 @@ a^2 + b^2 = c^2
                        params.insert("speed".into(), Some("5s".into()));
                        params
                    },
-                    inner: Box::new(Token::UnicodeEmoji("❤️".into())),
+                    inner: vec![Token::UnicodeEmoji("❤️".into())],
                },
                Token::UnicodeEmoji("🦊".into()),
-            ]))
+            ]
        },
    );
@ -362,13 +353,13 @@ a^2 + b^2 = c^2
        Token::Sequence(vec![
            Token::PlainText("<b>bold ".into()),
            Token::Mention {
-                mention_type: crate::MentionType::User,
+                mention_type: MentionType::User,
                name: "tag1".into(),
                host: None
            },
            Token::PlainText(" <i> ".into()),
            Token::Mention {
-                mention_type: crate::MentionType::User,
+                mention_type: MentionType::User,
                name: "tag2".into(),
                host: None
            },
@ -386,11 +377,11 @@ a^2 + b^2 = c^2
 >> Nested quote
 "#
        ),
-        Token::Quote(Box::new(Token::Sequence(vec![
+        Token::Quote(vec![
            Token::PlainText("test\n".into()),
-            Token::Italic(Box::new(Token::PlainText("\nitalic\n".into()))),
+            Token::Italic(vec![Token::PlainText("\nitalic\n".into())]),
-            Token::Quote(Box::new(Token::PlainText("Nested quote".into())))
+            Token::Quote(vec![Token::PlainText("Nested quote".into())])
-        ]))),
+        ]),
    );
 }
@ -442,9 +433,8 @@ fn parse_link() {
        Token::Sequence(vec![
            Token::PlainText("Link test: ".into()),
            Token::Link {
-                label: Box::new(Token::PlainText("label".into())),
+                label: vec![Token::PlainText("label".into())],
-                href: "https://example.com".into(),
+                href: "https://example.com".into()
                embed: true,
            },
        ])
    );
@ -481,10 +471,9 @@ fn parse_link() {
        parse_full("Link test: ?[label](https://awawa.gay)"),
        Token::Sequence(vec![
            Token::PlainText("Link test: ".into()),
-            Token::Link {
+            Token::LinkNoEmbed {
-                label: Box::new(Token::PlainText("label".into())),
+                label: vec![Token::PlainText("label".into())],
                href: "https://awawa.gay".into(),
                embed: false,
            },
        ])
    );
@ -493,10 +482,9 @@ fn parse_link() {
        parse_full("Link test: ?[label](https://awawa.gay)test"),
        Token::Sequence(vec![
            Token::PlainText("Link test: ".into()),
-            Token::Link {
+            Token::LinkNoEmbed {
-                label: Box::new(Token::PlainText("label".into())),
+                label: vec![Token::PlainText("label".into())],
                href: "https://awawa.gay".into(),
                embed: false,
            },
            Token::PlainText("test".into()),
        ])
@ -506,10 +494,9 @@ fn parse_link() {
        parse_full("Link test: (?[label](https://awawa.gay))"),
        Token::Sequence(vec![
            Token::PlainText("Link test: (".into()),
-            Token::Link {
+            Token::LinkNoEmbed {
-                label: Box::new(Token::PlainText("label".into())),
+                label: vec![Token::PlainText("label".into())],
                href: "https://awawa.gay".into(),
                embed: false,
            },
            Token::PlainText(")".into()),
        ])
@ -546,7 +533,7 @@ fn parse_mention() {
    assert_eq!(
        parse_full("@tag"),
        Token::Mention {
-            mention_type: crate::MentionType::User,
+            mention_type: MentionType::User,
            name: "tag".into(),
            host: None,
        }
@ -562,7 +549,7 @@ fn parse_mention() {
        Token::Sequence(vec![
            Token::PlainText("hgsjlkdsa ".into()),
            Token::Mention {
-                mention_type: crate::MentionType::User,
+                mention_type: MentionType::User,
                name: "tag".into(),
                host: None,
            },
@ -575,7 +562,7 @@ fn parse_mention() {
        Token::Sequence(vec![
            Token::PlainText("hgsjlkdsa ".into()),
            Token::Mention {
-                mention_type: crate::MentionType::User,
+                mention_type: MentionType::User,
                name: "tag".into(),
                host: None,
            },
@ -588,7 +575,7 @@ fn parse_mention() {
        Token::Sequence(vec![
            Token::PlainText("aaaa ".into()),
            Token::Mention {
-                mention_type: crate::MentionType::User,
+                mention_type: MentionType::User,
                name: "tag".into(),
                host: Some("domain".into()),
            },
@ -601,7 +588,7 @@ fn parse_mention() {
        Token::Sequence(vec![
            Token::PlainText("test ".into()),
            Token::Mention {
-                mention_type: crate::MentionType::User,
+                mention_type: MentionType::User,
                name: "tag".into(),
                host: Some("domain".into()),
            },
@ -614,7 +601,7 @@ fn parse_mention() {
        Token::Sequence(vec![
            Token::PlainText("test ".into()),
            Token::Mention {
-                mention_type: crate::MentionType::User,
+                mention_type: MentionType::User,
                name: "tag".into(),
                host: Some("domain.gay".into()),
            },
@ -627,7 +614,7 @@ fn parse_mention() {
        Token::Sequence(vec![
            Token::PlainText("test ".into()),
            Token::Mention {
-                mention_type: crate::MentionType::User,
+                mention_type: MentionType::User,
                name: "tag".into(),
                host: Some("domain".into()),
            },
@ -640,7 +627,7 @@ fn parse_mention() {
        Token::Sequence(vec![
            Token::PlainText("test ".into()),
            Token::Mention {
-                mention_type: crate::MentionType::Community,
+                mention_type: MentionType::Community,
                name: "tag".into(),
                host: Some("domain.com".into()),
            },
@ -651,7 +638,7 @@ fn parse_mention() {
    assert_eq!(
        parse_full("@tag:domain.com"),
        Token::Mention {
-            mention_type: crate::MentionType::MatrixUser,
+            mention_type: MentionType::MatrixUser,
            name: "tag".into(),
            host: Some("domain.com".into())
        },
@ -758,20 +745,10 @@ fn xml_serialization() {
        &to_xml_string(&parse_full(
            "@natty $[spin.speed=0.5s 🥺]:cat_attack: <plain>test</plain>"
        ))
-        .unwrap(),
+            .unwrap(),
        r#"<mmm><mention name="natty" type="user"/> <fn name="spin" arg-speed="0.5s"><ue>🥺</ue></fn><ee>cat_attack</ee> test</mmm>"#
    );
    assert_eq!(
            &to_xml_string(&parse_full(
                "Ring Galaxy AM 0644 741 from Hubble\nCredits: AURA, STScI, J. Higdon, Cornell, ESA, #NASA\n#nature #space #astrophotography"
            ))
                .unwrap(),
            r#"<mmm>Ring Galaxy AM 0644 741 from Hubble
 Credits: AURA, STScI, J. Higdon, Cornell, ESA, <hashtag>NASA</hashtag>
 <hashtag>nature</hashtag> <hashtag>space</hashtag> <hashtag>astrophotography</hashtag></mmm>"#
        );
    assert_eq!(
        &to_xml_string(&parse_full(
            r#"
@ -779,7 +756,7 @@ Credits: AURA, STScI, J. Higdon, Cornell, ESA, <hashtag>NASA</hashtag>
 var x = undefined;
 ```         "#
        ))
-        .unwrap(),
+            .unwrap(),
        "<mmm><code lang=\"js\">var x = undefined;</code></mmm>"
    );
 }
--- a/magnetar_mmm_parser/src/types.rs
+++ b/magnetar_mmm_parser/src/types.rs
@ -0,0 +1,120 @@
 use unicode_segmentation::{Graphemes, UnicodeSegmentation};
 #[derive(Debug, Copy, Clone)]
 pub(crate) struct ParseSpan<'a> {
    pub(crate) source: &'a str,
    pub(crate) offset: usize,
    pub(crate) length: usize,
 }
 impl ParseSpan<'_> {
    pub(crate) fn concat(self, other: Self) -> Option<Self> {
        if self.source != other.source {
            panic!("Attempted to concat slices from different strings");
        }
        if self.offset + self.length != other.offset {
            return None;
        }
        Some(ParseSpan {
            source: self.source,
            offset: self.offset,
            length: self.length + other.length,
        })
    }
    pub(crate) fn spanned_source(&self) -> &str {
        &self.source[self.offset..self.offset + self.length]
    }
 }
 pub(crate) struct TokStream<'a>(ParseSpan<'a>, Graphemes<'a>);
 impl<'a> From<&'a str> for TokStream<'a> {
    fn from(source: &'a str) -> Self {
        TokStream(
            ParseSpan {
                source,
                length: source.len(),
                offset: 0,
            },
            source.graphemes(true),
        )
    }
 }
 pub(crate) trait Input<'a> {
    fn next(&mut self) -> Option<&'a str>;
    fn view(&self) -> &'a str;
 }
 impl<'a> Input<'a> for TokStream<'a> {
    #[inline]
    fn next(&mut self) -> Option<&'a str> {
        if let Some(p) = self.1.next() {
            let length = p.len();
            self.0.offset += length;
            self.0.length -= length;
            return Some(p);
        }
        None
    }
    #[inline]
    fn view(&self) -> &'a str {
        &self.0.source[self.0.offset..self.0.offset + self.0.length]
    }
 }
 #[derive(Debug, Copy, Clone)]
 pub(crate) struct Lex<'a> {
    pub(crate) token: &'a str,
    pub(crate) span: ParseSpan<'a>,
 }
 pub(crate) type OutTok<'a> = Lex<'a>;
 pub(crate) const MAX_DEPTH: usize = 24;
 #[derive(Debug, Default, Clone, Copy)]
 pub(crate) struct State {
    pub(crate) depth: usize,
 }
 pub(crate) enum Effect<'a> {
    Output(OutTok<'a>)
 }
 #[must_use]
 pub(crate) struct ParserRet {
    _private: (),
 }
 pub(crate) trait ParserCont {
    fn continue_with(self, to: impl Parser) -> ParserRet;
    fn continue_with2(self, to: (impl Parser, impl Parser)) -> ParserRet;
 }
 pub(crate) trait Parser {
    fn take<'a>(
        &mut self,
        state: &mut State,
        input: &mut impl Input<'a>,
        handler: &'_ mut impl FnMut(Effect<'a>),
        visitor: impl ParserCont,
    ) -> ParserRet;
 }
 impl<I, F, V> Parser for fn(&mut State, &mut I, &'_ mut F, V) -> ParserRet {
    fn take<'a>(&mut self,
                state: &mut State,
                input: &mut impl Input<'a>,
                handler: &'_ mut impl FnMut(Effect<'a>),
                visitor: impl ParserCont) -> ParserRet {
        self(state, input, handler, visitor)
    }
 }
--- a/magnetar_mmm_parser/src/xml_write.rs
+++ b/magnetar_mmm_parser/src/xml_write.rs
@ -1,9 +1,8 @@
 use std::io::{Cursor, Write};
 use crate::output_types::Token;
 use quick_xml::events::{BytesText, Event};
 use crate::Token;
 impl Token {
    fn write<T: Write>(&self, writer: &mut quick_xml::Writer<T>) -> quick_xml::Result<()> {
        match self {