From 95141388faacc3e165d71f6235b44a9a292b7499 Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Fri, 29 Sep 2023 16:56:59 +0200
Subject: [PATCH 01/23] Created a project for the MMM parser

---
 Cargo.lock                     | 8 ++++++++
 Cargo.toml                     | 2 ++
 magnetar_mmm_parser/Cargo.toml | 9 +++++++++
 magnetar_mmm_parser/README.md  | 5 +++++
 magnetar_mmm_parser/src/lib.rs | 0
 5 files changed, 24 insertions(+)
 create mode 100644 magnetar_mmm_parser/Cargo.toml
 create mode 100644 magnetar_mmm_parser/README.md
 create mode 100644 magnetar_mmm_parser/src/lib.rs

diff --git a/Cargo.lock b/Cargo.lock
index 95e3419..83cf326 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1603,6 +1603,14 @@ dependencies = [
  "windows-sys",
 ]
 
+[[package]]
+name = "mmm_parser"
+version = "0.2.1-alpha"
+dependencies = [
+ "nom",
+ "thiserror",
+]
+
 [[package]]
 name = "nom"
 version = "7.1.3"
diff --git a/Cargo.toml b/Cargo.toml
index 95a4fe6..76333e2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,6 +14,7 @@ members = [
     "fe_calckey",
     "magnetar_common",
     "magnetar_sdk",
+    "magnetar_mmm_parser",
     "core"
 ]
 
@@ -36,6 +37,7 @@ hyper = "0.14"
 js-sys = "0.3"
 log = "0.4"
 miette = "5.9"
+nom = "7"
 percent-encoding = "2.2"
 redis = "0.23"
 reqwest = "0.11"
diff --git a/magnetar_mmm_parser/Cargo.toml b/magnetar_mmm_parser/Cargo.toml
new file mode 100644
index 0000000..3620931
--- /dev/null
+++ b/magnetar_mmm_parser/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "mmm_parser"
+version.workspace = true
+edition.workspace = true
+license = "MIT OR Apache-2.0"
+
+[dependencies]
+thiserror = { workspace = true }
+nom = { workspace = true }
\ No newline at end of file
diff --git a/magnetar_mmm_parser/README.md b/magnetar_mmm_parser/README.md
new file mode 100644
index 0000000..92dff9b
--- /dev/null
+++ b/magnetar_mmm_parser/README.md
@@ -0,0 +1,5 @@
+# MMM
+
+Magnetar {marinated, modified} Markdown?
+
+#TODO: Finish docs
\ No newline at end of file
diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
new file mode 100644
index 0000000..e69de29

From 1af8f4e213207d010fdceb43a4732bb76bb2ef66 Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Sun, 1 Oct 2023 23:04:32 +0200
Subject: [PATCH 02/23] Basic inline tag parsing

---
 Cargo.lock                     |  19 +-
 Cargo.toml                     |   1 +
 magnetar_mmm_parser/Cargo.toml |   4 +-
 magnetar_mmm_parser/src/lib.rs | 405 +++++++++++++++++++++++++++++++++
 4 files changed, 426 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 83cf326..5b8dcd4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -411,6 +411,12 @@ dependencies = [
  "syn 1.0.109",
 ]
 
+[[package]]
+name = "bytecount"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c"
+
 [[package]]
 name = "byteorder"
 version = "1.4.3"
@@ -1608,7 +1614,7 @@ name = "mmm_parser"
 version = "0.2.1-alpha"
 dependencies = [
  "nom",
- "thiserror",
+ "nom_locate",
 ]
 
 [[package]]
@@ -1621,6 +1627,17 @@ dependencies = [
  "minimal-lexical",
 ]
 
+[[package]]
+name = "nom_locate"
+version = "4.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e3c83c053b0713da60c5b8de47fe8e494fe3ece5267b2f23090a07a053ba8f3"
+dependencies = [
+ "bytecount",
+ "memchr",
+ "nom",
+]
+
 [[package]]
 name = "nu-ansi-term"
 version = "0.46.0"
diff --git a/Cargo.toml b/Cargo.toml
index 76333e2..a7a960f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -38,6 +38,7 @@ js-sys = "0.3"
 log = "0.4"
 miette = "5.9"
 nom = "7"
+nom_locate = "4"
 percent-encoding = "2.2"
 redis = "0.23"
 reqwest = "0.11"
diff --git a/magnetar_mmm_parser/Cargo.toml b/magnetar_mmm_parser/Cargo.toml
index 3620931..8a07618 100644
--- a/magnetar_mmm_parser/Cargo.toml
+++ b/magnetar_mmm_parser/Cargo.toml
@@ -5,5 +5,5 @@ edition.workspace = true
 license = "MIT OR Apache-2.0"
 
 [dependencies]
-thiserror = { workspace = true }
-nom = { workspace = true }
\ No newline at end of file
+nom = { workspace = true }
+nom_locate = { workspace = true }
\ No newline at end of file
diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index e69de29..9ed647e 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -0,0 +1,405 @@
+use nom::branch::alt;
+use nom::bytes::complete::tag;
+use nom::character::complete;
+use nom::character::complete::{anychar, line_ending, not_line_ending, tab};
+use nom::combinator::{fail, not, opt};
+use nom::error::ErrorKind;
+use nom::multi::{many1, separated_list1};
+use nom::sequence::tuple;
+use nom::{IResult, Offset, Slice};
+use nom_locate::LocatedSpan;
+use std::borrow::Cow;
+
+enum Token<'a> {
+    PlainText(Cow<'a, str>),
+    Sequence(Vec<Token<'a>>),
+    Quote(Box<Token<'a>>),
+    Small(Box<Token<'a>>),
+    Big(Box<Token<'a>>),
+    BoldItalic(Box<Token<'a>>),
+    Bold(Box<Token<'a>>),
+    Italic(Box<Token<'a>>),
+    Center(Box<Token<'a>>),
+    Strikethrough(Box<Token<'a>>),
+    PlainTag(Cow<'a, str>),
+    InlineCode(Cow<'a, str>),
+    InlineMath(Cow<'a, str>),
+}
+
+impl Token<'_> {
+    fn owned(&self) -> Token<'static> {
+        match self {
+            Token::PlainText(text) => Token::PlainText(Cow::Owned(text.clone().into_owned())),
+            Token::Sequence(tokens) => Token::Sequence(tokens.iter().map(Token::owned).collect()),
+            Token::Quote(inner) => Token::Quote(Box::new(inner.owned())),
+            Token::Small(inner) => Token::Small(Box::new(inner.owned())),
+            Token::Big(inner) => Token::Big(Box::new(inner.owned())),
+            Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.owned())),
+            Token::Bold(inner) => Token::Bold(Box::new(inner.owned())),
+            Token::Italic(inner) => Token::Italic(Box::new(inner.owned())),
+            Token::Center(inner) => Token::Center(Box::new(inner.owned())),
+            Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.owned())),
+            Token::PlainTag(tag) => Token::PlainTag(Cow::Owned(tag.clone().into_owned())),
+            Token::InlineCode(code) => Token::InlineCode(Cow::Owned(code.clone().into_owned())),
+            Token::InlineMath(math) => Token::InlineMath(Cow::Owned(math.clone().into_owned())),
+        }
+    }
+}
+
+type Span<'a> = LocatedSpan<&'a str>;
+
+trait SliceOffset {
+    fn up_to(&self, other: &Self) -> Self;
+
+    fn fragment_between<'a>(&self, other: &Self) -> &'a str
+    where
+        Self: 'a;
+}
+
+impl SliceOffset for Span<'_> {
+    fn up_to(&self, other: &Self) -> Self {
+        self.slice(..self.offset(other))
+    }
+
+    fn fragment_between<'a>(&self, other: &Self) -> &'a str
+    where
+        Self: 'a,
+    {
+        self.up_to(other).into_fragment()
+    }
+}
+
+const fn boxing_sequence<'a>(
+    func: impl Fn(Box<Token<'a>>) -> Token<'a>,
+) -> impl Fn(Vec<Token<'a>>) -> Token<'a> {
+    move |tokens| func(Box::new(Token::Sequence(tokens)))
+}
+
+const fn collect_char_sequence<'a>(
+    func: impl Fn(Cow<'a, str>) -> Token<'a>,
+) -> impl Fn(Vec<char>) -> Token<'a> {
+    move |chars| func(Cow::Owned(chars.into_iter().collect()))
+}
+
+fn spliced<'a>(
+    segments: &[Span<'a>],
+    func: impl Fn(Span) -> IResult<Span, Token>,
+    parent: Span<'a>,
+) -> IResult<Span<'a>, Token<'static>, nom::error::Error<Span<'a>>> {
+    let combined = segments
+        .iter()
+        .copied()
+        .map(Span::into_fragment)
+        .collect::<String>();
+    let cum_offset_combined = segments
+        .iter()
+        .scan(0, |acc, &x| {
+            *acc += x.len();
+            Some(*acc)
+        })
+        .collect::<Vec<_>>();
+    let current_seg = |input: Span| {
+        cum_offset_combined
+            .iter()
+            .enumerate()
+            .filter(|(_, &o)| o >= input.location_offset())
+            .map(|(i, o)| (segments[i], o))
+            .last()
+    };
+
+    type NE<E> = nom::Err<E>;
+    type NomError<'x> = nom::error::Error<Span<'x>>;
+
+    let quote_span = Span::new(&combined);
+    let (input, inner) = match func(quote_span) {
+        Ok((input, token)) => (input, token.owned()),
+        Err(e) => {
+            return match e {
+                NE::Error(e) => {
+                    let offset_new = e.input.location_offset();
+                    if let Some((seg_parent, offset_seg_new)) = current_seg(e.input) {
+                        let offset = offset_new - offset_seg_new;
+                        let offset_orig = offset + seg_parent.location_offset();
+                        Err(NE::Error(NomError::new(
+                            Span::new(&parent.into_fragment()[offset_orig..]),
+                            e.code,
+                        )))
+                    } else {
+                        // ???
+                        Err(NE::Failure(NomError::new(parent, ErrorKind::Fail)))
+                    }
+                }
+                NE::Failure(e) => Err(NE::Error(NomError::new(parent, e.code))),
+                NE::Incomplete(i) => Err(NE::Incomplete(i)),
+            };
+        }
+    };
+
+    let out = if let Some((seg_parent, offset_seg_new)) = current_seg(input) {
+        let offset = input.location_offset() - offset_seg_new;
+        let offset_orig = offset + seg_parent.location_offset();
+        parent.slice(offset_orig..)
+    } else {
+        parent
+    };
+
+    Ok((out, Token::Quote(Box::new(inner.owned()))))
+}
+
+fn space(input: Span) -> IResult<Span, Token> {
+    let start = input;
+    let (input, _) = alt((complete::char('\u{0020}'), complete::char('\u{3000}'), tab))(input)?;
+    Ok((
+        input,
+        Token::PlainText(start.fragment_between(&input).into()),
+    ))
+}
+
+struct Context;
+
+impl Context {
+    const fn partial<'a>(
+        &self,
+        func: impl Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token<'a>> + 'static,
+    ) -> impl Fn(Span<'a>) -> IResult<Span<'a>, Token<'a>> + '_ {
+        move |input| func(self, input)
+    }
+
+    fn root<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        let (input, token) = alt((self.partial(Self::tag_quote),))(input)?;
+        Ok((input, token))
+    }
+
+    fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        let (input, token) = alt((self.partial(Self::tag_small), self.partial(Self::text)))(input)?;
+        Ok((input, token))
+    }
+
+    fn tag_quote<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        let (input, leading_spaces) = tuple((opt(line_ending), opt(line_ending)))(input)?;
+
+        if let (None, None) = leading_spaces {
+            if input.get_column() != 0 {
+                return fail(input);
+            }
+        }
+
+        let quote_line = |input| tuple((tag(">"), opt(space), not_line_ending))(input);
+
+        let orig_input = input;
+        let (input, lines) = separated_list1(line_ending, quote_line)(input)?;
+
+        let quote_lines = lines
+            .into_iter()
+            .map(|(_, _, text)| text)
+            .collect::<Vec<_>>();
+
+        if quote_lines.len() == 1
+            && quote_lines
+                .iter()
+                .map(Span::fragment)
+                .copied()
+                .any(&str::is_empty)
+        {
+            return fail(input);
+        }
+
+        let (_, inner) = spliced(&quote_lines, space, orig_input)?;
+
+        let (input, _) = tuple((opt(line_ending), opt(line_ending)))(input)?;
+
+        Ok((input, Token::Quote(Box::new(inner))))
+    }
+
+    const fn tag_delimited<'a, 'b: 'a, T>(
+        &'a self,
+        start: &'b str,
+        end: &'b str,
+        escape: bool,
+        matcher_inner: impl Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a,
+        mapper: impl Fn(Vec<T>) -> Token<'b> + 'a,
+    ) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token<'b>> + '_ {
+        move |input| {
+            let opening_tag = &tag(start);
+            let closing_tag = &tag(end);
+
+            if escape {
+                if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), opening_tag))(input) {
+                    return Ok((input_escaped, Token::PlainText(Cow::Borrowed(&mark))));
+                }
+            }
+
+            let begin = input;
+            let (post_open, _) = opening_tag(input)?;
+
+            let res = tuple((
+                many1(tuple((not(closing_tag), &matcher_inner))),
+                closing_tag,
+            ))(post_open);
+
+            if let Err(nom::Err::Error(nom::error::Error { .. })) = res {
+                return Ok((
+                    post_open,
+                    Token::PlainText(begin.fragment_between(&post_open).into()),
+                ));
+            }
+
+            let (input, (inner, _)) = res?;
+
+            let inner = inner.into_iter().map(|(_, t)| t).collect::<Vec<_>>();
+
+            Ok((input, mapper(inner)))
+        }
+    }
+
+    fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        self.tag_delimited(
+            "<small>",
+            "</small>",
+            false,
+            self.partial(Self::inline),
+            boxing_sequence(Token::Small),
+        )(input)
+    }
+
+    // TODO: CommonMark flanking rules
+    fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        self.tag_delimited(
+            "***",
+            "***",
+            true,
+            self.partial(Self::inline),
+            boxing_sequence(Token::BoldItalic),
+        )(input)
+    }
+
+    // TODO: CommonMark flanking rules
+    fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        self.tag_delimited(
+            "___",
+            "___",
+            true,
+            self.partial(Self::inline),
+            boxing_sequence(Token::BoldItalic),
+        )(input)
+    }
+
+    fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        self.tag_delimited(
+            "<b>",
+            "</b>",
+            false,
+            self.partial(Self::inline),
+            boxing_sequence(Token::Bold),
+        )(input)
+    }
+
+    // TODO: CommonMark flanking rules
+    fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        self.tag_delimited(
+            "**",
+            "**",
+            true,
+            self.partial(Self::inline),
+            boxing_sequence(Token::Bold),
+        )(input)
+    }
+
+    // TODO: CommonMark flanking rules
+    fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        self.tag_delimited(
+            "__",
+            "__",
+            true,
+            self.partial(Self::inline),
+            boxing_sequence(Token::Bold),
+        )(input)
+    }
+
+    fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        self.tag_delimited(
+            "<i>",
+            "</i>",
+            false,
+            self.partial(Self::inline),
+            boxing_sequence(Token::Italic),
+        )(input)
+    }
+
+    // TODO: CommonMark flanking rules
+    fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        self.tag_delimited(
+            "*",
+            "*",
+            true,
+            self.partial(Self::inline),
+            boxing_sequence(Token::Italic),
+        )(input)
+    }
+
+    // TODO: CommonMark flanking rules
+    fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        self.tag_delimited(
+            "_",
+            "_",
+            true,
+            self.partial(Self::inline),
+            boxing_sequence(Token::Italic),
+        )(input)
+    }
+
+    fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        self.tag_delimited(
+            "<s>",
+            "</s>",
+            false,
+            self.partial(Self::inline),
+            boxing_sequence(Token::Strikethrough),
+        )(input)
+    }
+
+    // TODO: CommonMark flanking rules
+    fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        self.tag_delimited(
+            "~~",
+            "~~",
+            true,
+            move |input| {
+                tuple((not_line_ending, self.partial(Self::inline)))(input).map(|(i, t)| (i, t.1))
+            },
+            boxing_sequence(Token::Strikethrough),
+        )(input)
+    }
+
+    fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        self.tag_delimited(
+            "`",
+            "",
+            true,
+            move |input| {
+                tuple((not(alt((tag("`"), tag("´"), line_ending))), anychar))(input)
+                    .map(|(i, (_skip, c))| (i, c))
+            },
+            collect_char_sequence(Token::InlineCode),
+        )(input)
+    }
+
+    fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        self.tag_delimited(
+            "\\(",
+            "\\)",
+            false,
+            move |input| tuple((not_line_ending, anychar))(input).map(|(i, (_skip, c))| (i, c)),
+            collect_char_sequence(Token::InlineMath),
+        )(input)
+    }
+
+    fn text<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        let before = input;
+        let (input, _) = anychar(input)?;
+        Ok((
+            input,
+            Token::PlainText(before.fragment_between(&input).into()),
+        ))
+    }
+}

From 46e0766a36d0c84ff6f3a56785014e3695e32879 Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Wed, 4 Oct 2023 19:31:03 +0200
Subject: [PATCH 03/23] Implemented MFM functions and math and center blocks

---
 Cargo.lock                     |   8 +-
 magnetar_mmm_parser/src/lib.rs | 156 +++++++++++++++++++++++++++++++--
 2 files changed, 154 insertions(+), 10 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 5b8dcd4..9abfe30 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2499,18 +2499,18 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"
 
 [[package]]
 name = "serde"
-version = "1.0.180"
+version = "1.0.188"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ea67f183f058fe88a4e3ec6e2788e003840893b91bac4559cabedd00863b3ed"
+checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.180"
+version = "1.0.188"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24e744d7782b686ab3b73267ef05697159cc0e5abbed3f47f9933165e5219036"
+checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index 9ed647e..ee70239 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -1,16 +1,20 @@
 use nom::branch::alt;
 use nom::bytes::complete::tag;
 use nom::character::complete;
-use nom::character::complete::{anychar, line_ending, not_line_ending, tab};
-use nom::combinator::{fail, not, opt};
+use nom::character::complete::{
+    alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, tab,
+};
+use nom::combinator::{fail, not, opt, recognize};
 use nom::error::ErrorKind;
-use nom::multi::{many1, separated_list1};
+use nom::multi::{many0, many0_count, many1, many1_count, separated_list1};
 use nom::sequence::tuple;
 use nom::{IResult, Offset, Slice};
 use nom_locate::LocatedSpan;
 use std::borrow::Cow;
+use std::collections::HashMap;
 
-enum Token<'a> {
+#[derive(Clone, Debug)]
+pub enum Token<'a> {
     PlainText(Cow<'a, str>),
     Sequence(Vec<Token<'a>>),
     Quote(Box<Token<'a>>),
@@ -24,6 +28,16 @@ enum Token<'a> {
     PlainTag(Cow<'a, str>),
     InlineCode(Cow<'a, str>),
     InlineMath(Cow<'a, str>),
+    BlockCode {
+        lang: Option<Cow<'a, str>>,
+        inner: Cow<'a, str>,
+    },
+    BlockMath(Cow<'a, str>),
+    Function {
+        name: Cow<'a, str>,
+        params: HashMap<Cow<'a, str>, Option<Cow<'a, str>>>,
+        inner: Box<Token<'a>>,
+    },
 }
 
 impl Token<'_> {
@@ -42,6 +56,28 @@ impl Token<'_> {
             Token::PlainTag(tag) => Token::PlainTag(Cow::Owned(tag.clone().into_owned())),
             Token::InlineCode(code) => Token::InlineCode(Cow::Owned(code.clone().into_owned())),
             Token::InlineMath(math) => Token::InlineMath(Cow::Owned(math.clone().into_owned())),
+            Token::BlockCode { inner, lang } => Token::BlockCode {
+                lang: lang.as_ref().map(|l| Cow::Owned(l.clone().into_owned())),
+                inner: Cow::Owned(inner.clone().into_owned()),
+            },
+            Token::BlockMath(math) => Token::BlockMath(Cow::Owned(math.clone().into_owned())),
+            Token::Function {
+                name,
+                params,
+                inner,
+            } => Token::Function {
+                name: Cow::Owned(name.clone().into_owned()),
+                params: params
+                    .iter()
+                    .map(|(k, v)| {
+                        (
+                            Cow::Owned(k.clone().into_owned()),
+                            v.as_ref().map(|val| Cow::Owned(val.clone().into_owned())),
+                        )
+                    })
+                    .collect(),
+                inner: Box::new(inner.owned()),
+            },
         }
     }
 }
@@ -84,6 +120,7 @@ const fn collect_char_sequence<'a>(
 fn spliced<'a>(
     segments: &[Span<'a>],
     func: impl Fn(Span) -> IResult<Span, Token>,
+    output_mapper: impl Fn(Box<Token<'static>>) -> Token<'static>,
     parent: Span<'a>,
 ) -> IResult<Span<'a>, Token<'static>, nom::error::Error<Span<'a>>> {
     let combined = segments
@@ -143,7 +180,7 @@ fn spliced<'a>(
         parent
     };
 
-    Ok((out, Token::Quote(Box::new(inner.owned()))))
+    Ok((out, output_mapper(Box::new(inner.owned()))))
 }
 
 fn space(input: Span) -> IResult<Span, Token> {
@@ -204,13 +241,69 @@ impl Context {
             return fail(input);
         }
 
-        let (_, inner) = spliced(&quote_lines, space, orig_input)?;
+        let (_, inner) = spliced(&quote_lines, space, Token::Quote, orig_input)?;
 
         let (input, _) = tuple((opt(line_ending), opt(line_ending)))(input)?;
 
         Ok((input, Token::Quote(Box::new(inner))))
     }
 
+    fn tag_block_center<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        let tag_start = &tag("<center>");
+        let tag_end = &tag("</center>");
+
+        let (input, _) = opt(line_ending)(input)?;
+
+        if input.get_column() != 0 {
+            return fail(input);
+        }
+
+        let (input, _) = tag_start(input)?;
+        let (input, _) = opt(line_ending)(input)?;
+
+        let (input, center_seq) = many0(tuple((
+            not(tuple((opt(line_ending), tag_end))),
+            self.partial(Self::inline),
+        )))(input)?;
+
+        let (input, _) = opt(line_ending)(input)?;
+        let (input, _) = tag_end(input)?;
+        let (input, _) = many0(space)(input)?;
+        let (input, _) = not(not_line_ending)(input)?;
+        let (input, _) = opt(line_ending)(input)?;
+
+        let tokens = center_seq.into_iter().map(|(_, v)| v).collect::<Vec<_>>();
+
+        Ok((input, boxing_sequence(Token::Center)(tokens)))
+    }
+
+    fn tag_block_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        let (input, _) = opt(line_ending)(input)?;
+
+        if input.get_column() != 0 {
+            return fail(input);
+        }
+
+        let (input, _) = tag("\\[")(input)?;
+        let (input, _) = opt(line_ending)(input)?;
+
+        let (input, math_span) = recognize(many1_count(tuple((
+            not(tuple((opt(line_ending), tag("\\]")))),
+            not_line_ending,
+        ))))(input)?;
+
+        let (input, _) = opt(line_ending)(input)?;
+        let (input, _) = tag("\\]")(input)?;
+        let (input, _) = many0(space)(input)?;
+        let (input, _) = not(not_line_ending)(input)?;
+        let (input, _) = opt(line_ending)(input)?;
+
+        Ok((
+            input,
+            Token::BlockMath(Cow::Borrowed(math_span.into_fragment())),
+        ))
+    }
+
     const fn tag_delimited<'a, 'b: 'a, T>(
         &'a self,
         start: &'b str,
@@ -252,6 +345,57 @@ impl Context {
         }
     }
 
+    fn tag_func<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        let (input, _) = tag("$[")(input)?;
+
+        let func_ident = |input| {
+            recognize(tuple((
+                many1_count(alt((alpha1, tag("_")))),
+                many0_count(alt((alphanumeric1, tag("_")))),
+            )))(input)
+        };
+
+        let param_value = recognize(many1_count(alt((
+            alphanumeric1,
+            tag("."),
+            tag("-"),
+            tag("_"),
+        ))));
+
+        let (input, func_name_span) = func_ident(input)?;
+        let func_name = func_name_span.into_fragment();
+
+        let arg = tuple((func_ident, opt(tuple((tag("="), param_value)))));
+
+        let (input, args) =
+            opt(tuple((one_char('.'), separated_list1(one_char(','), arg))))(input)?;
+
+        let args_out = args.map_or_else(HashMap::new, |(_, items)| {
+            items
+                .into_iter()
+                .map(|(k, v)| {
+                    (
+                        Cow::from(k.into_fragment()),
+                        v.map(|(_, val)| Cow::from(val.into_fragment())),
+                    )
+                })
+                .collect::<HashMap<_, _>>()
+        });
+
+        let (input, inner) = self.partial(Self::inline)(input)?;
+
+        let (input, _) = tag("]")(input)?;
+
+        Ok((
+            input,
+            Token::Function {
+                name: Cow::from(func_name),
+                params: args_out,
+                inner: Box::new(inner),
+            },
+        ))
+    }
+
     fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
             "<small>",

From 24d44632e04a613e2595276a9046d46f507a6094 Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Wed, 4 Oct 2023 19:44:27 +0200
Subject: [PATCH 04/23] Minor cleanup

---
 magnetar_mmm_parser/src/lib.rs | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index ee70239..48a9e17 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -195,6 +195,7 @@ fn space(input: Span) -> IResult<Span, Token> {
 struct Context;
 
 impl Context {
+    #[inline]
     const fn partial<'a>(
         &self,
         func: impl Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token<'a>> + 'static,
@@ -278,22 +279,25 @@ impl Context {
     }
 
     fn tag_block_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        let start = &tag("\\[");
+        let end = &tag("\\]");
+
         let (input, _) = opt(line_ending)(input)?;
 
         if input.get_column() != 0 {
             return fail(input);
         }
 
-        let (input, _) = tag("\\[")(input)?;
+        let (input, _) = start(input)?;
         let (input, _) = opt(line_ending)(input)?;
 
         let (input, math_span) = recognize(many1_count(tuple((
-            not(tuple((opt(line_ending), tag("\\]")))),
+            not(tuple((opt(line_ending), end))),
             not_line_ending,
         ))))(input)?;
 
         let (input, _) = opt(line_ending)(input)?;
-        let (input, _) = tag("\\]")(input)?;
+        let (input, _) = end(input)?;
         let (input, _) = many0(space)(input)?;
         let (input, _) = not(not_line_ending)(input)?;
         let (input, _) = opt(line_ending)(input)?;

From 9b26691ff41e3418041691b2025bcca847390159 Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Thu, 5 Oct 2023 19:09:26 +0200
Subject: [PATCH 05/23] Implemented URL parsing

---
 magnetar_mmm_parser/src/lib.rs | 176 +++++++++++++++++++++++++++++++--
 1 file changed, 167 insertions(+), 9 deletions(-)

diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index 48a9e17..62d7116 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -1,10 +1,10 @@
 use nom::branch::alt;
 use nom::bytes::complete::tag;
-use nom::character::complete;
 use nom::character::complete::{
-    alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, tab,
+    alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of, space1,
+    tab,
 };
-use nom::combinator::{fail, not, opt, recognize};
+use nom::combinator::{eof, fail, not, opt, recognize};
 use nom::error::ErrorKind;
 use nom::multi::{many0, many0_count, many1, many1_count, separated_list1};
 use nom::sequence::tuple;
@@ -28,6 +28,13 @@ pub enum Token<'a> {
     PlainTag(Cow<'a, str>),
     InlineCode(Cow<'a, str>),
     InlineMath(Cow<'a, str>),
+    UrlRaw(Cow<'a, str>),
+    UrlNoEmbed(Cow<'a, str>),
+    Link {
+        label: Cow<'a, str>,
+        href: Cow<'a, str>,
+        embed: bool,
+    },
     BlockCode {
         lang: Option<Cow<'a, str>>,
         inner: Cow<'a, str>,
@@ -56,6 +63,13 @@ impl Token<'_> {
             Token::PlainTag(tag) => Token::PlainTag(Cow::Owned(tag.clone().into_owned())),
             Token::InlineCode(code) => Token::InlineCode(Cow::Owned(code.clone().into_owned())),
             Token::InlineMath(math) => Token::InlineMath(Cow::Owned(math.clone().into_owned())),
+            Token::UrlRaw(url) => Token::UrlRaw(Cow::Owned(url.clone().into_owned())),
+            Token::UrlNoEmbed(url) => Token::UrlNoEmbed(Cow::Owned(url.clone().into_owned())),
+            Token::Link { embed, label, href } => Token::Link {
+                embed: *embed,
+                label: Cow::Owned(label.clone().into_owned()),
+                href: Cow::Owned(href.clone().into_owned()),
+            },
             Token::BlockCode { inner, lang } => Token::BlockCode {
                 lang: lang.as_ref().map(|l| Cow::Owned(l.clone().into_owned())),
                 inner: Cow::Owned(inner.clone().into_owned()),
@@ -184,12 +198,8 @@ fn spliced<'a>(
 }
 
 fn space(input: Span) -> IResult<Span, Token> {
-    let start = input;
-    let (input, _) = alt((complete::char('\u{0020}'), complete::char('\u{3000}'), tab))(input)?;
-    Ok((
-        input,
-        Token::PlainText(start.fragment_between(&input).into()),
-    ))
+    let (input, frag) = recognize(alt((one_char('\u{0020}'), one_char('\u{3000}'), tab)))(input)?;
+    Ok((input, Token::PlainText(frag.into_fragment().into())))
 }
 
 struct Context;
@@ -213,6 +223,11 @@ impl Context {
         Ok((input, token))
     }
 
+    fn inline_no_link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        let (input, token) = alt((self.partial(Self::tag_small), self.partial(Self::text)))(input)?;
+        Ok((input, token))
+    }
+
     fn tag_quote<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         let (input, leading_spaces) = tuple((opt(line_ending), opt(line_ending)))(input)?;
 
@@ -550,4 +565,147 @@ impl Context {
             Token::PlainText(before.fragment_between(&input).into()),
         ))
     }
+
+    fn url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        let (input, url_span) = recognize(tuple((
+            protocol,
+            url_chars(|input| not(url_chars_base)(input), false),
+        )))(input)?;
+
+        let url = url_span.into_fragment();
+        let url_bytes = url.as_bytes();
+
+        // Strip punctuation at the end of sentences that might have been consumed as a part of the URL
+        let final_url = if matches!(url_bytes.last(), Some(b'.' | b',' | b'?')) {
+            url.slice(..url.len() - 1)
+        } else {
+            url
+        };
+
+        Ok((input, Token::UrlRaw(Cow::from(final_url))))
+    }
+
+    fn url_no_embed<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        let (input, _) = tag("<")(input)?;
+        let (input, url_span) = recognize(tuple((protocol, url_chars(tag(">"), true))))(input)?;
+        let (input, _) = tag(">")(input)?;
+
+        Ok((input, Token::UrlRaw(Cow::from(url_span.into_fragment()))))
+    }
+
+    fn link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        let (input, no_embed) = opt(tag("?"))(input)?;
+        let (input, _) = tag("[")(input)?;
+        let (input, _) = not(tag("["))(input)?;
+        let (input, label_span) =
+            recognize(many1(tuple((not(tag("](")), not_line_ending))))(input)?;
+        let (input, _) = tag("]")(input)?;
+        let (input, _) = tag("(")(input)?;
+        let (input, url_span) = recognize(tuple((protocol, url_chars(tag("]"), true))))(input)?;
+        let (input, _) = tag(")")(input)?;
+
+        Ok((
+            input,
+            Token::Link {
+                label: label_span.into_fragment().into(),
+                href: url_span.into_fragment().into(),
+                embed: no_embed.is_none(),
+            },
+        ))
+    }
+}
+
+#[inline]
+fn protocol(input: Span) -> IResult<Span, Span> {
+    alt((tag("https://"), tag("http://")))(input)
+}
+
+#[inline]
+fn url_chars_base(input: Span) -> IResult<Span, Span> {
+    recognize(alt((alpha1, recognize(one_of(".,_/:%#$&?!~=+-()[]@")))))(input)
+}
+
+#[inline]
+fn url_chars<'a, T: 'a>(
+    terminator: impl Fn(Span<'a>) -> IResult<Span<'a>, T> + 'a,
+    spaces: bool,
+) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, Span<'a>> + 'a {
+    let terminating = move |input| {
+        tuple((
+            &terminator,
+            alt((
+                space1,
+                line_ending,
+                eof,
+                recognize(one_of("([<'\"")),
+                recognize(tuple((
+                    alt((alpha1, recognize(one_of("*")))),
+                    alt((space1, line_ending, eof)),
+                ))),
+            )),
+        ))(input)
+    };
+
+    let chars = tuple((
+        not(tuple((space1, eof))),
+        not(tuple((space1, tag("\"")))),
+        not(tuple((opt(space1), terminating))),
+        alt((url_chars_base, if spaces { space1 } else { fail })),
+    ));
+
+    recognize(many1_count(chars))
+}
+
+#[cfg(test)]
+mod test {
+    use crate::{url_chars, Span};
+    use nom::bytes::complete::tag;
+
+    #[test]
+    fn parse_url_chars() {
+        let test1 = "https://en.wikipedia.org/wiki/Sandbox_(computer_security))";
+        assert_eq!(
+            "https://en.wikipedia.org/wiki/Sandbox_(computer_security)",
+            url_chars(tag(")"), true)(Span::new(test1))
+                .unwrap()
+                .1
+                .into_fragment()
+        );
+
+        let test2 = "https://en.wikipedia.org/wiki/Sandbox_(computer_security)))";
+        assert_eq!(
+            "https://en.wikipedia.org/wiki/Sandbox_(computer_security))",
+            url_chars(tag(")"), true)(Span::new(test2))
+                .unwrap()
+                .1
+                .into_fragment()
+        );
+
+        let test3 = "https://en.wikipedia.org/wiki/(";
+        assert_eq!(
+            test3,
+            url_chars(tag(")"), true)(Span::new(test3))
+                .unwrap()
+                .1
+                .into_fragment()
+        );
+
+        let test4 = "https://cs.wikipedia.org/wiki/Among_Us  ";
+        assert_eq!(
+            "https://cs.wikipedia.org/wiki/Among_Us",
+            url_chars(tag(")"), true)(Span::new(test4))
+                .unwrap()
+                .1
+                .into_fragment()
+        );
+
+        let test5 = "https://cs.wikipedia.org/wiki/Among Us  )";
+        assert_eq!(
+            "https://cs.wikipedia.org/wiki/Among Us",
+            url_chars(tag(")"), true)(Span::new(test5))
+                .unwrap()
+                .1
+                .into_fragment()
+        );
+    }
 }

From 52dc491a47cd8c5a62a24b4ad03dda2e928df513 Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Thu, 5 Oct 2023 20:05:03 +0200
Subject: [PATCH 06/23] Mention parsing

---
 magnetar_mmm_parser/src/lib.rs | 67 +++++++++++++++++++++++++++++++++-
 1 file changed, 66 insertions(+), 1 deletion(-)

diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index 62d7116..d4e9d6e 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -4,7 +4,7 @@ use nom::character::complete::{
     alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of, space1,
     tab,
 };
-use nom::combinator::{eof, fail, not, opt, recognize};
+use nom::combinator::{eof, fail, map, not, opt, recognize};
 use nom::error::ErrorKind;
 use nom::multi::{many0, many0_count, many1, many1_count, separated_list1};
 use nom::sequence::tuple;
@@ -13,6 +13,21 @@ use nom_locate::LocatedSpan;
 use std::borrow::Cow;
 use std::collections::HashMap;
 
+#[derive(Copy, Clone, Debug)]
+pub enum MentionType {
+    Community,
+    User,
+}
+
+impl MentionType {
+    pub fn to_char(&self) -> char {
+        match self {
+            MentionType::Community => '!',
+            MentionType::User => '@',
+        }
+    }
+}
+
 #[derive(Clone, Debug)]
 pub enum Token<'a> {
     PlainText(Cow<'a, str>),
@@ -45,6 +60,11 @@ pub enum Token<'a> {
         params: HashMap<Cow<'a, str>, Option<Cow<'a, str>>>,
         inner: Box<Token<'a>>,
     },
+    Mention {
+        name: Cow<'a, str>,
+        host: Option<Cow<'a, str>>,
+        mention_type: MentionType,
+    },
 }
 
 impl Token<'_> {
@@ -92,6 +112,15 @@ impl Token<'_> {
                     .collect(),
                 inner: Box::new(inner.owned()),
             },
+            Token::Mention {
+                name,
+                host,
+                mention_type,
+            } => Token::Mention {
+                name: Cow::Owned(name.clone().into_owned()),
+                host: host.as_ref().map(|v| Cow::Owned(v.clone().into_owned())),
+                mention_type: *mention_type,
+            },
         }
     }
 }
@@ -613,6 +642,42 @@ impl Context {
             },
         ))
     }
+
+    fn mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        // TODO: Escaping and skip when preceded by alphanumerics
+
+        let tags = one_of("@!");
+        let (input, mention_type) = map(tags, |c| match c {
+            '@' => MentionType::User,
+            '!' => MentionType::Community,
+            _ => unreachable!(),
+        })(input)?;
+
+        let (input, name) = map(
+            recognize(many1(alt((alphanumeric1, recognize(one_of("-_")))))),
+            Span::into_fragment,
+        )(input)?;
+
+        let (input, host) = map(
+            opt(tuple((
+                tag("@"),
+                map(
+                    recognize(many1(alt((alphanumeric1, recognize(one_of("-_")))))),
+                    Span::into_fragment,
+                ),
+            ))),
+            |maybe_tag_host| maybe_tag_host.map(|(_, host)| host),
+        )(input)?;
+
+        Ok((
+            input,
+            Token::Mention {
+                mention_type,
+                name: name.into(),
+                host: host.map(|h| h.into()),
+            },
+        ))
+    }
 }
 
 #[inline]

From 8009546bfe31e7442a4374a28ff013113ee5a10b Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Thu, 5 Oct 2023 21:21:23 +0200
Subject: [PATCH 07/23] Emoji parsing

---
 Cargo.lock                     | 11 ++++++++++
 Cargo.toml                     |  3 ++-
 magnetar_mmm_parser/Cargo.toml |  4 +++-
 magnetar_mmm_parser/src/lib.rs | 37 +++++++++++++++++++++++++++++++++-
 4 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 9abfe30..aa58d5f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -748,6 +748,15 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "emojis"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ee61eb945bff65ee7d19d157d39c67c33290ff0742907413fd5eefd29edc979"
+dependencies = [
+ "phf",
+]
+
 [[package]]
 name = "equivalent"
 version = "1.0.1"
@@ -1613,8 +1622,10 @@ dependencies = [
 name = "mmm_parser"
 version = "0.2.1-alpha"
 dependencies = [
+ "emojis",
  "nom",
  "nom_locate",
+ "unicode-segmentation",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index a7a960f..f504d67 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -29,6 +29,7 @@ cached = "0.46"
 cfg-if = "1"
 chrono = "0.4"
 dotenvy = "0.15"
+emojis = "0.6"
 futures-core = "0.3"
 futures-util = "0.3"
 headers = "0.3"
@@ -101,4 +102,4 @@ toml = { workspace = true }
 unicode-segmentation = { workspace = true }
 
 [profile.release]
-lto = true
\ No newline at end of file
+lto = true
diff --git a/magnetar_mmm_parser/Cargo.toml b/magnetar_mmm_parser/Cargo.toml
index 8a07618..30c2bad 100644
--- a/magnetar_mmm_parser/Cargo.toml
+++ b/magnetar_mmm_parser/Cargo.toml
@@ -5,5 +5,7 @@ edition.workspace = true
 license = "MIT OR Apache-2.0"
 
 [dependencies]
+emojis = { workspace = true }
 nom = { workspace = true }
-nom_locate = { workspace = true }
\ No newline at end of file
+nom_locate = { workspace = true }
+unicode-segmentation = { workspace = true }
diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index d4e9d6e..a3ddcd1 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -12,6 +12,7 @@ use nom::{IResult, Offset, Slice};
 use nom_locate::LocatedSpan;
 use std::borrow::Cow;
 use std::collections::HashMap;
+use unicode_segmentation::UnicodeSegmentation;
 
 #[derive(Copy, Clone, Debug)]
 pub enum MentionType {
@@ -65,6 +66,8 @@ pub enum Token<'a> {
         host: Option<Cow<'a, str>>,
         mention_type: MentionType,
     },
+    UnicodeEmoji(Cow<'a, str>),
+    ShortcodeEmoji(Cow<'a, str>),
 }
 
 impl Token<'_> {
@@ -121,6 +124,10 @@ impl Token<'_> {
                 host: host.as_ref().map(|v| Cow::Owned(v.clone().into_owned())),
                 mention_type: *mention_type,
             },
+            Token::UnicodeEmoji(code) => Token::UnicodeEmoji(Cow::Owned(code.clone().into_owned())),
+            Token::ShortcodeEmoji(shortcode) => {
+                Token::ShortcodeEmoji(Cow::Owned(shortcode.clone().into_owned()))
+            }
         }
     }
 }
@@ -643,6 +650,24 @@ impl Context {
         ))
     }
 
+    fn unicode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        let frag = input.fragment();
+        let Some(grapheme) = frag.graphemes(true).next() else {
+            return fail(input);
+        };
+
+        let emoji = emojis::get(grapheme);
+
+        if emoji.is_none() {
+            return fail(input);
+        }
+
+        Ok((
+            input.slice(grapheme.len()..),
+            Token::UnicodeEmoji(grapheme.into()),
+        ))
+    }
+
     fn mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         // TODO: Escaping and skip when preceded by alphanumerics
 
@@ -723,8 +748,9 @@ fn url_chars<'a, T: 'a>(
 
 #[cfg(test)]
 mod test {
-    use crate::{url_chars, Span};
+    use crate::{url_chars, Context, Span};
     use nom::bytes::complete::tag;
+    use nom::multi::many1;
 
     #[test]
     fn parse_url_chars() {
@@ -773,4 +799,13 @@ mod test {
                 .into_fragment()
         );
     }
+
+    #[test]
+    fn parse_emoji() {
+        let test = "🥺💜❤️🦊";
+        let ctx = Context;
+        let tokens = many1(ctx.partial(Context::unicode_emoji))(Span::from(test)).unwrap();
+
+        println!("{:#?}", tokens.1)
+    }
 }

From 7c8e65f5562d22455c7c45b0029af13d61db335a Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Thu, 5 Oct 2023 22:02:46 +0200
Subject: [PATCH 08/23] Hashtag parsing

---
 magnetar_mmm_parser/src/lib.rs | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index a3ddcd1..93f4da1 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -68,6 +68,7 @@ pub enum Token<'a> {
     },
     UnicodeEmoji(Cow<'a, str>),
     ShortcodeEmoji(Cow<'a, str>),
+    Hashtag(Cow<'a, str>),
 }
 
 impl Token<'_> {
@@ -128,6 +129,7 @@ impl Token<'_> {
             Token::ShortcodeEmoji(shortcode) => {
                 Token::ShortcodeEmoji(Cow::Owned(shortcode.clone().into_owned()))
             }
+            Token::Hashtag(url) => Token::Hashtag(Cow::Owned(url.clone().into_owned())),
         }
     }
 }
@@ -703,6 +705,33 @@ impl Context {
             },
         ))
     }
+
+    fn hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        // TODO: Skip when preceded by alphanumerics
+
+        let (input, _) = tag("#")(input)?;
+
+        let (input, hashtag_text) =
+            map(recognize(many1(hashtag_chars)), Span::into_fragment)(input)?;
+
+        Ok((input, Token::Hashtag(hashtag_text.into())))
+    }
+}
+
+#[inline]
+fn hashtag_chars(input: Span) -> IResult<Span, Span> {
+    recognize(alt((
+        recognize(tuple((tag("("), hashtag_chars, tag(")")))),
+        recognize(tuple((tag("["), hashtag_chars, tag("]")))),
+        recognize(tuple((tag("「"), hashtag_chars, tag("」")))),
+        recognize(tuple((tag("（"), hashtag_chars, tag("）")))),
+        recognize(tuple((
+            not(space1),
+            not_line_ending,
+            not(one_of(".,:;!?#?/[]【】()「」（）<>")),
+            anychar,
+        ))),
+    )))(input)
 }
 
 #[inline]

From a6ee6bfbde032f2b46c8a603e68ed119f5b8fdd1 Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Thu, 5 Oct 2023 22:12:51 +0200
Subject: [PATCH 09/23] Plain tag parsing

---
 magnetar_mmm_parser/src/lib.rs | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index 93f4da1..b08ae5b 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -453,6 +453,20 @@ impl Context {
         ))
     }
 
+    fn tag_plain<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        let opening_tag = &tag("<small>");
+        let closing_tag = &tag("</small>");
+
+        let (input, _) = opening_tag(input)?;
+        let (input, text) = map(
+            recognize(many1(tuple((not_line_ending, not(closing_tag))))),
+            Span::into_fragment,
+        )(input)?;
+        let (input, _) = closing_tag(input)?;
+
+        Ok((input, Token::PlainTag(text.into())))
+    }
+
     fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
             "<small>",

From 4431a3ad6274e8f608a2bafbb8f9962599ea87d7 Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Thu, 5 Oct 2023 22:25:29 +0200
Subject: [PATCH 10/23] Code block parsing

---
 magnetar_mmm_parser/src/lib.rs | 39 ++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index b08ae5b..6cc9cb7 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -331,6 +331,45 @@ impl Context {
         Ok((input, boxing_sequence(Token::Center)(tokens)))
     }
 
+    fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        let delim = &tag("```");
+
+        let (input, _) = opt(line_ending)(input)?;
+
+        if input.get_column() != 0 {
+            return fail(input);
+        }
+
+        let (input, _) = delim(input)?;
+        let (input, lang) = opt(map(
+            recognize(many1(tuple((not(delim), not_line_ending)))),
+            Span::into_fragment,
+        ))(input)?;
+        let (input, _) = line_ending(input)?;
+
+        let (input, code) = map(
+            recognize(many1_count(tuple((
+                not(tuple((line_ending, delim))),
+                anychar,
+            )))),
+            Span::into_fragment,
+        )(input)?;
+
+        let (input, _) = line_ending(input)?;
+        let (input, _) = delim(input)?;
+        let (input, _) = many0(space)(input)?;
+        let (input, _) = not(not_line_ending)(input)?;
+        let (input, _) = opt(line_ending)(input)?;
+
+        Ok((
+            input,
+            Token::BlockCode {
+                lang: lang.map(<&str>::into),
+                inner: code.into(),
+            },
+        ))
+    }
+
     fn tag_block_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         let start = &tag("\\[");
         let end = &tag("\\]");

From c45ec852dd57d7b9299b3130f995ef3cd056abdd Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Thu, 5 Oct 2023 22:32:53 +0200
Subject: [PATCH 11/23] Shortcode emoji parsing

---
 magnetar_mmm_parser/src/lib.rs | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index 6cc9cb7..4fbb9ef 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -723,7 +723,20 @@ impl Context {
         ))
     }
 
-    fn mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn shortcode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        // TODO: Fail when preceded by alphanumerics
+        let (input, _) = tag(":")(input)?;
+        let (input, shortcode) = map(
+            recognize(many1(alt((alphanumeric1, recognize(one_of("_+-")))))),
+            Span::into_fragment,
+        )(input)?;
+        let (input, _) = tag(":")(input)?;
+        let (input, _) = not(alphanumeric1)(input)?;
+
+        Ok((input, Token::ShortcodeEmoji(shortcode.into())))
+    }
+
+    fn tag_mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         // TODO: Escaping and skip when preceded by alphanumerics
 
         let tags = one_of("@!");

From 453891ddf418744c1b649097227dad9b5af9924a Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Fri, 6 Oct 2023 00:17:52 +0200
Subject: [PATCH 12/23] Connected it all

---
 magnetar_mmm_parser/src/lib.rs | 249 +++++++++++++++++++++++++++------
 1 file changed, 204 insertions(+), 45 deletions(-)

diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index 4fbb9ef..ed90585 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -6,7 +6,7 @@ use nom::character::complete::{
 };
 use nom::combinator::{eof, fail, map, not, opt, recognize};
 use nom::error::ErrorKind;
-use nom::multi::{many0, many0_count, many1, many1_count, separated_list1};
+use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
 use nom::sequence::tuple;
 use nom::{IResult, Offset, Slice};
 use nom_locate::LocatedSpan;
@@ -14,7 +14,7 @@ use std::borrow::Cow;
 use std::collections::HashMap;
 use unicode_segmentation::UnicodeSegmentation;
 
-#[derive(Copy, Clone, Debug)]
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
 pub enum MentionType {
     Community,
     User,
@@ -29,7 +29,7 @@ impl MentionType {
     }
 }
 
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, Eq, PartialEq)]
 pub enum Token<'a> {
     PlainText(Cow<'a, str>),
     Sequence(Vec<Token<'a>>),
@@ -132,6 +132,49 @@ impl Token<'_> {
             Token::Hashtag(url) => Token::Hashtag(Cow::Owned(url.clone().into_owned())),
         }
     }
+
+    fn merged(&self) -> Token {
+        match self {
+            Token::Sequence(tokens) => {
+                let tokens_multi = tokens.iter().fold(Vec::new(), |mut acc, tok| {
+                    if let Some(Token::PlainText(last)) = acc.last_mut() {
+                        if let Token::PlainText(tok_text) = tok {
+                            *last = Cow::from(last.to_string() + tok_text.as_ref());
+
+                            return acc;
+                        }
+                    }
+
+                    acc.push(tok.merged());
+                    acc
+                });
+
+                if tokens_multi.len() == 1 {
+                    return tokens_multi.into_iter().next().unwrap();
+                }
+
+                Token::Sequence(tokens_multi)
+            }
+            Token::Quote(inner) => Token::Quote(Box::new(inner.merged())),
+            Token::Small(inner) => Token::Small(Box::new(inner.merged())),
+            Token::Big(inner) => Token::Big(Box::new(inner.merged())),
+            Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.merged())),
+            Token::Bold(inner) => Token::Bold(Box::new(inner.merged())),
+            Token::Italic(inner) => Token::Italic(Box::new(inner.merged())),
+            Token::Center(inner) => Token::Center(Box::new(inner.merged())),
+            Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.merged())),
+            Token::Function {
+                name,
+                params,
+                inner,
+            } => Token::Function {
+                name: name.clone(),
+                params: params.clone(),
+                inner: Box::new(inner.merged()),
+            },
+            other => other.clone(),
+        }
+    }
 }
 
 type Span<'a> = LocatedSpan<&'a str>;
@@ -244,25 +287,103 @@ struct Context;
 
 impl Context {
     #[inline]
-    const fn partial<'a>(
+    const fn partial(
         &self,
-        func: impl Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token<'a>> + 'static,
-    ) -> impl Fn(Span<'a>) -> IResult<Span<'a>, Token<'a>> + '_ {
+        func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token<'a>> + 'static,
+    ) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token<'a>> + '_ {
         move |input| func(self, input)
     }
 
-    fn root<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
-        let (input, token) = alt((self.partial(Self::tag_quote),))(input)?;
-        Ok((input, token))
+    fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        map(many1(self.partial(Self::full_single)), Token::Sequence)(input)
     }
 
     fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
-        let (input, token) = alt((self.partial(Self::tag_small), self.partial(Self::text)))(input)?;
+        map(many1(self.partial(Self::inline_single)), Token::Sequence)(input)
+    }
+
+    fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        map(
+            many1(self.partial(Self::inline_label_safe_single)),
+            Token::Sequence,
+        )(input)
+    }
+
+    fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        alt((
+            self.partial(Self::tag_bold_italic_asterisk),
+            self.partial(Self::tag_bold_italic_underscore),
+            self.partial(Self::tag_bold_asterisk),
+            self.partial(Self::tag_italic_asterisk),
+            self.partial(Self::tag_bold_underscore),
+            self.partial(Self::tag_italic_underscore),
+        ))(input)
+    }
+
+    fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        let (input, token) = alt((
+            self.partial(Self::unicode_emoji),
+            self.partial(Self::tag_block_center),
+            self.partial(Self::tag_small),
+            self.partial(Self::tag_plain),
+            self.partial(Self::tag_bold),
+            self.partial(Self::tag_italic),
+            self.partial(Self::tag_strikethrough),
+            self.partial(Self::url_no_embed),
+            self.partial(Self::base_bold_italic),
+            self.partial(Self::tag_block_code),
+            self.partial(Self::tag_inline_code),
+            self.partial(Self::tag_quote),
+            self.partial(Self::tag_block_math),
+            self.partial(Self::tag_inline_math),
+            self.partial(Self::tag_strikethrough_tilde),
+            self.partial(Self::tag_func),
+            self.partial(Self::tag_mention),
+            self.partial(Self::tag_hashtag),
+            self.partial(Self::shortcode_emoji),
+            self.partial(Self::raw_url),
+            self.partial(Self::text),
+        ))(input)?;
         Ok((input, token))
     }
 
-    fn inline_no_link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
-        let (input, token) = alt((self.partial(Self::tag_small), self.partial(Self::text)))(input)?;
+    fn inline_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        let (input, token) = alt((
+            self.partial(Self::unicode_emoji),
+            self.partial(Self::tag_small),
+            self.partial(Self::tag_plain),
+            self.partial(Self::tag_bold),
+            self.partial(Self::tag_italic),
+            self.partial(Self::tag_strikethrough),
+            self.partial(Self::url_no_embed),
+            self.partial(Self::base_bold_italic),
+            self.partial(Self::tag_inline_code),
+            self.partial(Self::tag_inline_math),
+            self.partial(Self::tag_strikethrough_tilde),
+            self.partial(Self::tag_func),
+            self.partial(Self::tag_mention),
+            self.partial(Self::tag_hashtag),
+            self.partial(Self::shortcode_emoji),
+            self.partial(Self::raw_url),
+            self.partial(Self::text),
+        ))(input)?;
+        Ok((input, token))
+    }
+
+    fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        let (input, token) = alt((
+            self.partial(Self::unicode_emoji),
+            self.partial(Self::tag_small),
+            self.partial(Self::tag_plain),
+            self.partial(Self::tag_bold),
+            self.partial(Self::tag_italic),
+            self.partial(Self::tag_strikethrough),
+            self.partial(Self::base_bold_italic),
+            self.partial(Self::tag_strikethrough_tilde),
+            self.partial(Self::tag_func),
+            self.partial(Self::shortcode_emoji),
+            self.partial(Self::text),
+        ))(input)?;
         Ok((input, token))
     }
 
@@ -270,7 +391,7 @@ impl Context {
         let (input, leading_spaces) = tuple((opt(line_ending), opt(line_ending)))(input)?;
 
         if let (None, None) = leading_spaces {
-            if input.get_column() != 0 {
+            if input.get_column() != 1 {
                 return fail(input);
             }
         }
@@ -295,7 +416,12 @@ impl Context {
             return fail(input);
         }
 
-        let (_, inner) = spliced(&quote_lines, space, Token::Quote, orig_input)?;
+        let (_, inner) = spliced(
+            &quote_lines,
+            self.partial(Self::full),
+            Token::Quote,
+            orig_input,
+        )?;
 
         let (input, _) = tuple((opt(line_ending), opt(line_ending)))(input)?;
 
@@ -308,27 +434,23 @@ impl Context {
 
         let (input, _) = opt(line_ending)(input)?;
 
-        if input.get_column() != 0 {
+        if input.get_column() != 1 {
             return fail(input);
         }
 
         let (input, _) = tag_start(input)?;
         let (input, _) = opt(line_ending)(input)?;
 
-        let (input, center_seq) = many0(tuple((
-            not(tuple((opt(line_ending), tag_end))),
-            self.partial(Self::inline),
-        )))(input)?;
+        let (input, (center_seq, _)) = many_till(
+            self.partial(Self::inline_single),
+            tuple((opt(line_ending), tag_end)),
+        )(input)?;
 
-        let (input, _) = opt(line_ending)(input)?;
-        let (input, _) = tag_end(input)?;
         let (input, _) = many0(space)(input)?;
-        let (input, _) = not(not_line_ending)(input)?;
+        let (input, _) = not(not(line_ending))(input)?;
         let (input, _) = opt(line_ending)(input)?;
 
-        let tokens = center_seq.into_iter().map(|(_, v)| v).collect::<Vec<_>>();
-
-        Ok((input, boxing_sequence(Token::Center)(tokens)))
+        Ok((input, boxing_sequence(Token::Center)(center_seq)))
     }
 
     fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
@@ -336,7 +458,7 @@ impl Context {
 
         let (input, _) = opt(line_ending)(input)?;
 
-        if input.get_column() != 0 {
+        if input.get_column() != 1 {
             return fail(input);
         }
 
@@ -358,7 +480,7 @@ impl Context {
         let (input, _) = line_ending(input)?;
         let (input, _) = delim(input)?;
         let (input, _) = many0(space)(input)?;
-        let (input, _) = not(not_line_ending)(input)?;
+        let (input, _) = not(not(line_ending))(input)?;
         let (input, _) = opt(line_ending)(input)?;
 
         Ok((
@@ -376,7 +498,7 @@ impl Context {
 
         let (input, _) = opt(line_ending)(input)?;
 
-        if input.get_column() != 0 {
+        if input.get_column() != 1 {
             return fail(input);
         }
 
@@ -458,8 +580,7 @@ impl Context {
             tag("_"),
         ))));
 
-        let (input, func_name_span) = func_ident(input)?;
-        let func_name = func_name_span.into_fragment();
+        let (input, func_name) = map(func_ident, Span::into_fragment)(input)?;
 
         let arg = tuple((func_ident, opt(tuple((tag("="), param_value)))));
 
@@ -478,16 +599,16 @@ impl Context {
                 .collect::<HashMap<_, _>>()
         });
 
-        let (input, inner) = self.partial(Self::inline)(input)?;
+        let (input, _) = opt(space)(input)?;
 
-        let (input, _) = tag("]")(input)?;
+        let (input, (inner, _)) = many_till(self.partial(Self::inline_single), tag("]"))(input)?;
 
         Ok((
             input,
             Token::Function {
                 name: Cow::from(func_name),
                 params: args_out,
-                inner: Box::new(inner),
+                inner: Box::new(Token::Sequence(inner)),
             },
         ))
     }
@@ -649,15 +770,11 @@ impl Context {
     }
 
     fn text<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
-        let before = input;
-        let (input, _) = anychar(input)?;
-        Ok((
-            input,
-            Token::PlainText(before.fragment_between(&input).into()),
-        ))
+        let (input, text) = map(recognize(anychar), Span::into_fragment)(input)?;
+        Ok((input, Token::PlainText(text.into())))
     }
 
-    fn url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn raw_url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         let (input, url_span) = recognize(tuple((
             protocol,
             url_chars(|input| not(url_chars_base)(input), false),
@@ -688,8 +805,10 @@ impl Context {
         let (input, no_embed) = opt(tag("?"))(input)?;
         let (input, _) = tag("[")(input)?;
         let (input, _) = not(tag("["))(input)?;
-        let (input, label_span) =
-            recognize(many1(tuple((not(tag("](")), not_line_ending))))(input)?;
+        let (input, label_span) = recognize(many1(tuple((
+            not(tag("](")),
+            self.partial(Self::inline_label_safe_single),
+        ))))(input)?;
         let (input, _) = tag("]")(input)?;
         let (input, _) = tag("(")(input)?;
         let (input, url_span) = recognize(tuple((protocol, url_chars(tag("]"), true))))(input)?;
@@ -772,7 +891,7 @@ impl Context {
         ))
     }
 
-    fn hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         // TODO: Skip when preceded by alphanumerics
 
         let (input, _) = tag("#")(input)?;
@@ -843,9 +962,11 @@ fn url_chars<'a, T: 'a>(
 
 #[cfg(test)]
 mod test {
-    use crate::{url_chars, Context, Span};
+    use crate::{url_chars, Context, Span, Token};
     use nom::bytes::complete::tag;
     use nom::multi::many1;
+    use std::borrow::Cow;
+    use std::collections::HashMap;
 
     #[test]
     fn parse_url_chars() {
@@ -895,12 +1016,50 @@ mod test {
         );
     }
 
+    #[test]
+    fn parse_complex() {
+        let emoji = r#"$[x2 $[sparkle 🥺]💜$[spin.y,speed=5s ❤️]🦊]"#;
+        assert_eq!(
+            Token::Function {
+                name: "x2".into(),
+                params: HashMap::new(),
+                inner: Box::new(Token::Sequence(vec![
+                    Token::Function {
+                        name: "sparkle".into(),
+                        params: HashMap::new(),
+                        inner: Box::new(Token::UnicodeEmoji("🥺".into())),
+                    },
+                    Token::UnicodeEmoji("💜".into()),
+                    Token::Function {
+                        name: "spin".into(),
+                        params: {
+                            let mut params = HashMap::new();
+                            params.insert("y".into(), None);
+                            params.insert("speed".into(), Some("5s".into()));
+                            params
+                        },
+                        inner: Box::new(Token::UnicodeEmoji("❤️".into())),
+                    },
+                    Token::UnicodeEmoji("🦊".into()),
+                ]))
+            },
+            Context.full(Span::new(emoji)).unwrap().1.merged()
+        )
+    }
+
     #[test]
     fn parse_emoji() {
         let test = "🥺💜❤️🦊";
         let ctx = Context;
         let tokens = many1(ctx.partial(Context::unicode_emoji))(Span::from(test)).unwrap();
 
-        println!("{:#?}", tokens.1)
+        assert_eq!(
+            vec!["🥺", "💜", "❤️", "🦊"]
+                .into_iter()
+                .map(<&str as Into<Cow<_>>>::into)
+                .map(Token::UnicodeEmoji)
+                .collect::<Vec<_>>(),
+            tokens.1
+        );
     }
 }

From 703e1191c2820ca39e7a35a8f320bf4924671421 Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Sat, 7 Oct 2023 01:46:20 +0200
Subject: [PATCH 13/23] Janky sequence unnesting and attempting to salvage
 nested parsing in incorrect formatting tags

---
 Cargo.lock                     |   1 +
 Cargo.toml                     |   1 +
 magnetar_mmm_parser/Cargo.toml |   1 +
 magnetar_mmm_parser/src/lib.rs | 245 ++++++++++++++++++++++++++-------
 4 files changed, 200 insertions(+), 48 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index aa58d5f..e2d79c5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1622,6 +1622,7 @@ dependencies = [
 name = "mmm_parser"
 version = "0.2.1-alpha"
 dependencies = [
+ "either",
  "emojis",
  "nom",
  "nom_locate",
diff --git a/Cargo.toml b/Cargo.toml
index f504d67..c326183 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -29,6 +29,7 @@ cached = "0.46"
 cfg-if = "1"
 chrono = "0.4"
 dotenvy = "0.15"
+either = "1.9"
 emojis = "0.6"
 futures-core = "0.3"
 futures-util = "0.3"
diff --git a/magnetar_mmm_parser/Cargo.toml b/magnetar_mmm_parser/Cargo.toml
index 30c2bad..25faa6b 100644
--- a/magnetar_mmm_parser/Cargo.toml
+++ b/magnetar_mmm_parser/Cargo.toml
@@ -5,6 +5,7 @@ edition.workspace = true
 license = "MIT OR Apache-2.0"
 
 [dependencies]
+either = { workspace = true }
 emojis = { workspace = true }
 nom = { workspace = true }
 nom_locate = { workspace = true }
diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index ed90585..74d98ea 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -1,3 +1,4 @@
+use either::Either;
 use nom::branch::alt;
 use nom::bytes::complete::tag;
 use nom::character::complete::{
@@ -12,6 +13,7 @@ use nom::{IResult, Offset, Slice};
 use nom_locate::LocatedSpan;
 use std::borrow::Cow;
 use std::collections::HashMap;
+use std::convert::identity;
 use unicode_segmentation::UnicodeSegmentation;
 
 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
@@ -145,6 +147,27 @@ impl Token<'_> {
                         }
                     }
 
+                    if let Token::Sequence(seq) = tok {
+                        let items = seq.iter().map(Token::merged).flat_map(|t| match t {
+                            Token::Sequence(seq) => Either::Left(seq.into_iter()),
+                            other => Either::Right(std::iter::once(other)),
+                        });
+
+                        for item in items {
+                            if let Some(Token::PlainText(last)) = acc.last_mut() {
+                                if let Token::PlainText(tok_text) = item {
+                                    *last = Cow::from(last.to_string() + tok_text.as_ref());
+
+                                    continue;
+                                }
+                            }
+
+                            acc.push(item);
+                        }
+
+                        return acc;
+                    }
+
                     acc.push(tok.merged());
                     acc
                 });
@@ -200,13 +223,13 @@ impl SliceOffset for Span<'_> {
     }
 }
 
-const fn boxing_sequence<'a>(
-    func: impl Fn(Box<Token<'a>>) -> Token<'a>,
-) -> impl Fn(Vec<Token<'a>>) -> Token<'a> {
-    move |tokens| func(Box::new(Token::Sequence(tokens)))
+#[inline]
+fn boxing_token<'a>(func: impl Fn(Box<Token<'a>>) -> Token<'a>) -> impl Fn(Token<'a>) -> Token<'a> {
+    move |tokens| func(Box::new(tokens))
 }
 
-const fn collect_char_sequence<'a>(
+#[inline]
+fn collect_char_sequence<'a>(
     func: impl Fn(Cow<'a, str>) -> Token<'a>,
 ) -> impl Fn(Vec<char>) -> Token<'a> {
     move |chars| func(Cow::Owned(chars.into_iter().collect()))
@@ -215,14 +238,14 @@ const fn collect_char_sequence<'a>(
 fn spliced<'a>(
     segments: &[Span<'a>],
     func: impl Fn(Span) -> IResult<Span, Token>,
-    output_mapper: impl Fn(Box<Token<'static>>) -> Token<'static>,
     parent: Span<'a>,
 ) -> IResult<Span<'a>, Token<'static>, nom::error::Error<Span<'a>>> {
     let combined = segments
         .iter()
         .copied()
         .map(Span::into_fragment)
-        .collect::<String>();
+        .collect::<Vec<_>>()
+        .join("\n");
     let cum_offset_combined = segments
         .iter()
         .scan(0, |acc, &x| {
@@ -234,7 +257,7 @@ fn spliced<'a>(
         cum_offset_combined
             .iter()
             .enumerate()
-            .filter(|(_, &o)| o >= input.location_offset())
+            .take_while(|(_, &o)| o > input.location_offset())
             .map(|(i, o)| (segments[i], o))
             .last()
     };
@@ -275,7 +298,7 @@ fn spliced<'a>(
         parent
     };
 
-    Ok((out, output_mapper(Box::new(inner.owned()))))
+    Ok((out, inner.owned()))
 }
 
 fn space(input: Span) -> IResult<Span, Token> {
@@ -370,6 +393,22 @@ impl Context {
         Ok((input, token))
     }
 
+    fn inline_non_formatting_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+        let (input, token) = alt((
+            self.partial(Self::unicode_emoji),
+            self.partial(Self::url_no_embed),
+            self.partial(Self::tag_inline_code),
+            self.partial(Self::tag_inline_math),
+            self.partial(Self::tag_func),
+            self.partial(Self::tag_mention),
+            self.partial(Self::tag_hashtag),
+            self.partial(Self::shortcode_emoji),
+            self.partial(Self::raw_url),
+            self.partial(Self::text),
+        ))(input)?;
+        Ok((input, token))
+    }
+
     fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         let (input, token) = alt((
             self.partial(Self::unicode_emoji),
@@ -416,12 +455,7 @@ impl Context {
             return fail(input);
         }
 
-        let (_, inner) = spliced(
-            &quote_lines,
-            self.partial(Self::full),
-            Token::Quote,
-            orig_input,
-        )?;
+        let (_, inner) = spliced(&quote_lines, self.partial(Self::full), orig_input)?;
 
         let (input, _) = tuple((opt(line_ending), opt(line_ending)))(input)?;
 
@@ -450,7 +484,10 @@ impl Context {
         let (input, _) = not(not(line_ending))(input)?;
         let (input, _) = opt(line_ending)(input)?;
 
-        Ok((input, boxing_sequence(Token::Center)(center_seq)))
+        Ok((
+            input,
+            boxing_token(Token::Center)(Token::Sequence(center_seq)),
+        ))
     }
 
     fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
@@ -522,13 +559,16 @@ impl Context {
         ))
     }
 
-    const fn tag_delimited<'a, 'b: 'a, T>(
+    #[inline]
+    fn tag_delimited<'a, 'b: 'a, T>(
         &'a self,
         start: &'b str,
         end: &'b str,
         escape: bool,
         matcher_inner: impl Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a,
-        mapper: impl Fn(Vec<T>) -> Token<'b> + 'a,
+        matcher_inner_fallback: impl Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a,
+        collector: impl Fn(Vec<T>) -> Token<'b> + 'a,
+        mapper: impl Fn(Token<'b>) -> Token<'b> + 'a,
     ) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token<'b>> + '_ {
         move |input| {
             let opening_tag = &tag(start);
@@ -548,18 +588,40 @@ impl Context {
                 closing_tag,
             ))(post_open);
 
-            if let Err(nom::Err::Error(nom::error::Error { .. })) = res {
+            if let Err(nom::Err::Error(nom::error::Error {
+                input: input_past_err,
+                ..
+            })) = res
+            {
+                let res_fallback = tuple((
+                    many1(tuple((not(closing_tag), &matcher_inner_fallback))),
+                    closing_tag,
+                ))(post_open);
+
+                if res_fallback.is_err() {
+                    return Ok((
+                        input_past_err,
+                        Token::PlainText(begin.fragment_between(&input_past_err).into()),
+                    ));
+                }
+
+                let (input, (inner, closing)) = res_fallback.unwrap();
+                let inner = inner.into_iter().map(|(_, t)| t).collect::<Vec<_>>();
+
                 return Ok((
-                    post_open,
-                    Token::PlainText(begin.fragment_between(&post_open).into()),
+                    input,
+                    Token::Sequence(vec![
+                        Token::PlainText(begin.fragment_between(&post_open).into()),
+                        collector(inner),
+                        Token::PlainText(closing.into_fragment().into()),
+                    ]),
                 ));
             }
 
             let (input, (inner, _)) = res?;
-
             let inner = inner.into_iter().map(|(_, t)| t).collect::<Vec<_>>();
 
-            Ok((input, mapper(inner)))
+            Ok((input, mapper(collector(inner))))
         }
     }
 
@@ -632,8 +694,10 @@ impl Context {
             "<small>",
             "</small>",
             false,
-            self.partial(Self::inline),
-            boxing_sequence(Token::Small),
+            self.partial(Self::inline_single),
+            self.partial(Self::inline_non_formatting_single),
+            Token::Sequence,
+            boxing_token(Token::Small),
         )(input)
     }
 
@@ -643,8 +707,10 @@ impl Context {
             "***",
             "***",
             true,
-            self.partial(Self::inline),
-            boxing_sequence(Token::BoldItalic),
+            self.partial(Self::inline_single),
+            self.partial(Self::inline_non_formatting_single),
+            Token::Sequence,
+            boxing_token(Token::BoldItalic),
         )(input)
     }
 
@@ -654,8 +720,10 @@ impl Context {
             "___",
             "___",
             true,
-            self.partial(Self::inline),
-            boxing_sequence(Token::BoldItalic),
+            self.partial(Self::inline_single),
+            self.partial(Self::inline_non_formatting_single),
+            Token::Sequence,
+            boxing_token(Token::BoldItalic),
         )(input)
     }
 
@@ -664,8 +732,10 @@ impl Context {
             "<b>",
             "</b>",
             false,
-            self.partial(Self::inline),
-            boxing_sequence(Token::Bold),
+            self.partial(Self::inline_single),
+            self.partial(Self::inline_non_formatting_single),
+            Token::Sequence,
+            boxing_token(Token::Bold),
         )(input)
     }
 
@@ -675,8 +745,10 @@ impl Context {
             "**",
             "**",
             true,
-            self.partial(Self::inline),
-            boxing_sequence(Token::Bold),
+            self.partial(Self::inline_single),
+            self.partial(Self::inline_non_formatting_single),
+            Token::Sequence,
+            boxing_token(Token::Bold),
         )(input)
     }
 
@@ -686,8 +758,10 @@ impl Context {
             "__",
             "__",
             true,
-            self.partial(Self::inline),
-            boxing_sequence(Token::Bold),
+            self.partial(Self::inline_single),
+            self.partial(Self::inline_non_formatting_single),
+            Token::Sequence,
+            boxing_token(Token::Bold),
         )(input)
     }
 
@@ -696,8 +770,10 @@ impl Context {
             "<i>",
             "</i>",
             false,
-            self.partial(Self::inline),
-            boxing_sequence(Token::Italic),
+            self.partial(Self::inline_single),
+            self.partial(Self::inline_non_formatting_single),
+            Token::Sequence,
+            boxing_token(Token::Italic),
         )(input)
     }
 
@@ -707,8 +783,10 @@ impl Context {
             "*",
             "*",
             true,
-            self.partial(Self::inline),
-            boxing_sequence(Token::Italic),
+            self.partial(Self::inline_single),
+            self.partial(Self::inline_non_formatting_single),
+            Token::Sequence,
+            boxing_token(Token::Italic),
         )(input)
     }
 
@@ -718,8 +796,10 @@ impl Context {
             "_",
             "_",
             true,
-            self.partial(Self::inline),
-            boxing_sequence(Token::Italic),
+            self.partial(Self::inline_single),
+            self.partial(Self::inline_non_formatting_single),
+            Token::Sequence,
+            boxing_token(Token::Italic),
         )(input)
     }
 
@@ -728,8 +808,10 @@ impl Context {
             "<s>",
             "</s>",
             false,
-            self.partial(Self::inline),
-            boxing_sequence(Token::Strikethrough),
+            self.partial(Self::inline_single),
+            self.partial(Self::inline_non_formatting_single),
+            Token::Sequence,
+            boxing_token(Token::Strikethrough),
         )(input)
     }
 
@@ -740,9 +822,18 @@ impl Context {
             "~~",
             true,
             move |input| {
-                tuple((not_line_ending, self.partial(Self::inline)))(input).map(|(i, t)| (i, t.1))
+                tuple((not_line_ending, self.partial(Self::inline_single)))(input)
+                    .map(|(i, t)| (i, t.1))
             },
-            boxing_sequence(Token::Strikethrough),
+            move |input| {
+                tuple((
+                    not_line_ending,
+                    self.partial(Self::inline_non_formatting_single),
+                ))(input)
+                .map(|(i, t)| (i, t.1))
+            },
+            Token::Sequence,
+            boxing_token(Token::Strikethrough),
         )(input)
     }
 
@@ -755,7 +846,9 @@ impl Context {
                 tuple((not(alt((tag("`"), tag("´"), line_ending))), anychar))(input)
                     .map(|(i, (_skip, c))| (i, c))
             },
+            fail,
             collect_char_sequence(Token::InlineCode),
+            identity,
         )(input)
     }
 
@@ -764,8 +857,10 @@ impl Context {
             "\\(",
             "\\)",
             false,
-            move |input| tuple((not_line_ending, anychar))(input).map(|(i, (_skip, c))| (i, c)),
-            collect_char_sequence(Token::InlineMath),
+            move |input| tuple((not(line_ending), anychar))(input).map(|(i, (_skip, c))| (i, c)),
+            fail,
+            collect_char_sequence(Token::InlineCode),
+            identity,
         )(input)
     }
 
@@ -1044,7 +1139,61 @@ mod test {
                 ]))
             },
             Context.full(Span::new(emoji)).unwrap().1.merged()
-        )
+        );
+
+        let bold_italic = r#"***bold italic***"#;
+        assert_eq!(
+            Token::BoldItalic(Box::new(Token::PlainText("bold italic".into()))),
+            Context.full(Span::new(bold_italic)).unwrap().1.merged()
+        );
+
+        let bold_italic_tag = r#"<b><i>bold italic</i></b>"#;
+        assert_eq!(
+            Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
+                "bold italic".into()
+            ))))),
+            Context.full(Span::new(bold_italic_tag)).unwrap().1.merged()
+        );
+
+        assert_eq!(
+            Token::Sequence(vec![
+                Token::PlainText("<b>bold ".into()),
+                Token::Mention {
+                    mention_type: crate::MentionType::User,
+                    name: "tag1".into(),
+                    host: None
+                },
+                Token::PlainText(" <i> ".into()),
+                Token::Mention {
+                    mention_type: crate::MentionType::User,
+                    name: "tag2".into(),
+                    host: None
+                },
+                Token::PlainText(" </b>italic</i>".into())
+            ]),
+            Context
+                .full(Span::new(r#"<b>bold @tag1 <i> @tag2 </b>italic</i>"#))
+                .unwrap()
+                .1
+                .merged()
+        );
+
+        let quote = r#"
+> test
+> <i>
+> italic
+> </i>
+>> Nested quote
+"#;
+
+        assert_eq!(
+            Token::Quote(Box::new(Token::Sequence(vec![
+                Token::PlainText("test\n".into()),
+                Token::Italic(Box::new(Token::PlainText("\nitalic\n".into()))),
+                Token::Quote(Box::new(Token::PlainText("Nested quote".into())))
+            ]))),
+            Context.full(Span::new(quote)).unwrap().1.merged()
+        );
     }
 
     #[test]

From 95bce443be543c298f5676a84c31fb08839fd992 Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Sat, 7 Oct 2023 01:53:03 +0200
Subject: [PATCH 14/23] Fixed a code typo

---
 magnetar_mmm_parser/src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index 74d98ea..fbcfeb6 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -859,7 +859,7 @@ impl Context {
             false,
             move |input| tuple((not(line_ending), anychar))(input).map(|(i, (_skip, c))| (i, c)),
             fail,
-            collect_char_sequence(Token::InlineCode),
+            collect_char_sequence(Token::InlineMath),
             identity,
         )(input)
     }

From 154cc27c07e6c72f359ca5c045eb173062588602 Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Sat, 7 Oct 2023 19:44:39 +0200
Subject: [PATCH 15/23] More precise emoji extraction and fixed center tag
 parsing

---
 Cargo.toml                     |   1 +
 magnetar_mmm_parser/src/lib.rs | 505 +++++++++++++++++++++++----------
 2 files changed, 350 insertions(+), 156 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index c326183..c5d0c4e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -43,6 +43,7 @@ nom = "7"
 nom_locate = "4"
 percent-encoding = "2.2"
 redis = "0.23"
+regex = "1.9"
 reqwest = "0.11"
 sea-orm = "0.12"
 sea-orm-migration = "0.12"
diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index fbcfeb6..8dccf96 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -13,7 +13,7 @@ use nom::{IResult, Offset, Slice};
 use nom_locate::LocatedSpan;
 use std::borrow::Cow;
 use std::collections::HashMap;
-use std::convert::identity;
+use std::convert::{identity, Infallible};
 use unicode_segmentation::UnicodeSegmentation;
 
 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
@@ -37,7 +37,6 @@ pub enum Token<'a> {
     Sequence(Vec<Token<'a>>),
     Quote(Box<Token<'a>>),
     Small(Box<Token<'a>>),
-    Big(Box<Token<'a>>),
     BoldItalic(Box<Token<'a>>),
     Bold(Box<Token<'a>>),
     Italic(Box<Token<'a>>),
@@ -80,7 +79,6 @@ impl Token<'_> {
             Token::Sequence(tokens) => Token::Sequence(tokens.iter().map(Token::owned).collect()),
             Token::Quote(inner) => Token::Quote(Box::new(inner.owned())),
             Token::Small(inner) => Token::Small(Box::new(inner.owned())),
-            Token::Big(inner) => Token::Big(Box::new(inner.owned())),
             Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.owned())),
             Token::Bold(inner) => Token::Bold(Box::new(inner.owned())),
             Token::Italic(inner) => Token::Italic(Box::new(inner.owned())),
@@ -180,7 +178,6 @@ impl Token<'_> {
             }
             Token::Quote(inner) => Token::Quote(Box::new(inner.merged())),
             Token::Small(inner) => Token::Small(Box::new(inner.merged())),
-            Token::Big(inner) => Token::Big(Box::new(inner.merged())),
             Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.merged())),
             Token::Bold(inner) => Token::Bold(Box::new(inner.merged())),
             Token::Italic(inner) => Token::Italic(Box::new(inner.merged())),
@@ -228,11 +225,19 @@ fn boxing_token<'a>(func: impl Fn(Box<Token<'a>>) -> Token<'a>) -> impl Fn(Token
     move |tokens| func(Box::new(tokens))
 }
 
+#[inline]
+fn collect_sequence<'a, T>(
+    func: impl Fn(Vec<T>) -> Token<'a>,
+    transform: impl Fn(Token<'a>) -> Token<'a>,
+) -> impl Fn(&mut dyn Iterator<Item = T>) -> Token<'a> {
+    move |tokens| transform(func(tokens.collect()))
+}
+
 #[inline]
 fn collect_char_sequence<'a>(
     func: impl Fn(Cow<'a, str>) -> Token<'a>,
-) -> impl Fn(Vec<char>) -> Token<'a> {
-    move |chars| func(Cow::Owned(chars.into_iter().collect()))
+) -> impl Fn(&mut dyn Iterator<Item = char>) -> Token<'a> {
+    move |chars| func(Cow::Owned(chars.collect()))
 }
 
 fn spliced<'a>(
@@ -306,6 +311,42 @@ fn space(input: Span) -> IResult<Span, Token> {
     Ok((input, Token::PlainText(frag.into_fragment().into())))
 }
 
+struct Matcher<'a, 'b, T> {
+    matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
+    collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
+    _phantom_closure: std::marker::PhantomData<&'a ()>,
+    _phantom_data: std::marker::PhantomData<&'b ()>,
+    _phantom_output: std::marker::PhantomData<fn() -> T>,
+}
+
+impl<'a, 'b, T> Matcher<'a, 'b, T> {
+    fn new(
+        matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
+        collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
+    ) -> Self {
+        Self {
+            matcher_inner,
+            collector,
+            _phantom_closure: std::marker::PhantomData,
+            _phantom_data: std::marker::PhantomData,
+            _phantom_output: std::marker::PhantomData,
+        }
+    }
+}
+
+impl<'a, 'b> Matcher<'a, 'b, Infallible> {
+    // Don't break this invariant, else a monster will come at night and eat all your socks
+    fn reject() -> Self {
+        Self {
+            matcher_inner: &fail::<_, Infallible, _>,
+            collector: &|_| unreachable!(),
+            _phantom_closure: std::marker::PhantomData,
+            _phantom_data: std::marker::PhantomData,
+            _phantom_output: std::marker::PhantomData,
+        }
+    }
+}
+
 struct Context;
 
 impl Context {
@@ -477,13 +518,9 @@ impl Context {
 
         let (input, (center_seq, _)) = many_till(
             self.partial(Self::inline_single),
-            tuple((opt(line_ending), tag_end)),
+            tuple((opt(space1), opt(line_ending), tag_end)),
         )(input)?;
 
-        let (input, _) = many0(space)(input)?;
-        let (input, _) = not(not(line_ending))(input)?;
-        let (input, _) = opt(line_ending)(input)?;
-
         Ok((
             input,
             boxing_token(Token::Center)(Token::Sequence(center_seq)),
@@ -560,23 +597,21 @@ impl Context {
     }
 
     #[inline]
-    fn tag_delimited<'a, 'b: 'a, T>(
+    fn tag_delimited<'a, 'b: 'a, T, S>(
         &'a self,
-        start: &'b str,
-        end: &'b str,
+        opening_tag: impl Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
+        closing_tag: impl Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
         escape: bool,
-        matcher_inner: impl Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a,
-        matcher_inner_fallback: impl Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a,
-        collector: impl Fn(Vec<T>) -> Token<'b> + 'a,
-        mapper: impl Fn(Token<'b>) -> Token<'b> + 'a,
+        matcher: Matcher<'a, 'b, T>,
+        fallback: Matcher<'a, 'b, S>,
     ) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token<'b>> + '_ {
         move |input| {
-            let opening_tag = &tag(start);
-            let closing_tag = &tag(end);
-
             if escape {
-                if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), opening_tag))(input) {
-                    return Ok((input_escaped, Token::PlainText(Cow::Borrowed(&mark))));
+                if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), &opening_tag))(input) {
+                    return Ok((
+                        input_escaped,
+                        Token::PlainText(Cow::Borrowed(mark.fragment())),
+                    ));
                 }
             }
 
@@ -584,8 +619,8 @@ impl Context {
             let (post_open, _) = opening_tag(input)?;
 
             let res = tuple((
-                many1(tuple((not(closing_tag), &matcher_inner))),
-                closing_tag,
+                many1(tuple((not(&closing_tag), &matcher.matcher_inner))),
+                &closing_tag,
             ))(post_open);
 
             if let Err(nom::Err::Error(nom::error::Error {
@@ -594,8 +629,8 @@ impl Context {
             })) = res
             {
                 let res_fallback = tuple((
-                    many1(tuple((not(closing_tag), &matcher_inner_fallback))),
-                    closing_tag,
+                    many1(tuple((not(&closing_tag), &fallback.matcher_inner))),
+                    &closing_tag,
                 ))(post_open);
 
                 if res_fallback.is_err() {
@@ -606,22 +641,22 @@ impl Context {
                 }
 
                 let (input, (inner, closing)) = res_fallback.unwrap();
-                let inner = inner.into_iter().map(|(_, t)| t).collect::<Vec<_>>();
+                let mut inner = inner.into_iter().map(|(_, t)| t);
 
                 return Ok((
                     input,
                     Token::Sequence(vec![
                         Token::PlainText(begin.fragment_between(&post_open).into()),
-                        collector(inner),
+                        ((fallback.collector)(&mut inner)),
                         Token::PlainText(closing.into_fragment().into()),
                     ]),
                 ));
             }
 
             let (input, (inner, _)) = res?;
-            let inner = inner.into_iter().map(|(_, t)| t).collect::<Vec<_>>();
+            let mut inner = inner.into_iter().map(|(_, t)| t);
 
-            Ok((input, mapper(collector(inner))))
+            Ok((input, (matcher.collector)(&mut inner)))
         }
     }
 
@@ -691,176 +726,230 @@ impl Context {
 
     fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            "<small>",
-            "</small>",
+            tag("<small>"),
+            tag("</small>"),
             false,
-            self.partial(Self::inline_single),
-            self.partial(Self::inline_non_formatting_single),
-            Token::Sequence,
-            boxing_token(Token::Small),
+            Matcher::new(
+                &self.partial(Self::inline_single),
+                &collect_sequence(Token::Sequence, boxing_token(Token::Small)),
+            ),
+            Matcher::new(
+                &self.partial(Self::inline_non_formatting_single),
+                &collect_sequence(Token::Sequence, identity),
+            ),
         )(input)
     }
 
     // TODO: CommonMark flanking rules
     fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            "***",
-            "***",
+            tag("***"),
+            tag("***"),
             true,
-            self.partial(Self::inline_single),
-            self.partial(Self::inline_non_formatting_single),
-            Token::Sequence,
-            boxing_token(Token::BoldItalic),
+            Matcher::new(
+                &self.partial(Self::inline_single),
+                &collect_sequence(Token::Sequence, boxing_token(Token::BoldItalic)),
+            ),
+            Matcher::new(
+                &self.partial(Self::inline_non_formatting_single),
+                &collect_sequence(Token::Sequence, identity),
+            ),
         )(input)
     }
 
     // TODO: CommonMark flanking rules
     fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            "___",
-            "___",
+            tag("___"),
+            tag("___"),
             true,
-            self.partial(Self::inline_single),
-            self.partial(Self::inline_non_formatting_single),
-            Token::Sequence,
-            boxing_token(Token::BoldItalic),
+            Matcher::new(
+                &self.partial(Self::inline_single),
+                &collect_sequence(Token::Sequence, boxing_token(Token::BoldItalic)),
+            ),
+            Matcher::new(
+                &self.partial(Self::inline_non_formatting_single),
+                &collect_sequence(Token::Sequence, identity),
+            ),
         )(input)
     }
 
     fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            "<b>",
-            "</b>",
+            tag("<b>"),
+            tag("</b>"),
             false,
-            self.partial(Self::inline_single),
-            self.partial(Self::inline_non_formatting_single),
-            Token::Sequence,
-            boxing_token(Token::Bold),
+            Matcher::new(
+                &self.partial(Self::inline_single),
+                &collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
+            ),
+            Matcher::new(
+                &self.partial(Self::inline_non_formatting_single),
+                &collect_sequence(Token::Sequence, identity),
+            ),
         )(input)
     }
 
     // TODO: CommonMark flanking rules
     fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            "**",
-            "**",
+            tag("**"),
+            tag("**"),
             true,
-            self.partial(Self::inline_single),
-            self.partial(Self::inline_non_formatting_single),
-            Token::Sequence,
-            boxing_token(Token::Bold),
+            Matcher::new(
+                &self.partial(Self::inline_single),
+                &collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
+            ),
+            Matcher::new(
+                &self.partial(Self::inline_non_formatting_single),
+                &collect_sequence(Token::Sequence, identity),
+            ),
         )(input)
     }
 
     // TODO: CommonMark flanking rules
     fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            "__",
-            "__",
+            tag("__"),
+            tag("__"),
             true,
-            self.partial(Self::inline_single),
-            self.partial(Self::inline_non_formatting_single),
-            Token::Sequence,
-            boxing_token(Token::Bold),
+            Matcher::new(
+                &self.partial(Self::inline_single),
+                &collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
+            ),
+            Matcher::new(
+                &self.partial(Self::inline_non_formatting_single),
+                &collect_sequence(Token::Sequence, identity),
+            ),
         )(input)
     }
 
     fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            "<i>",
-            "</i>",
+            tag("<i>"),
+            tag("</i>"),
             false,
-            self.partial(Self::inline_single),
-            self.partial(Self::inline_non_formatting_single),
-            Token::Sequence,
-            boxing_token(Token::Italic),
+            Matcher::new(
+                &self.partial(Self::inline_single),
+                &collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
+            ),
+            Matcher::new(
+                &self.partial(Self::inline_non_formatting_single),
+                &collect_sequence(Token::Sequence, identity),
+            ),
         )(input)
     }
 
     // TODO: CommonMark flanking rules
     fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            "*",
-            "*",
+            tag("*"),
+            tag("*"),
             true,
-            self.partial(Self::inline_single),
-            self.partial(Self::inline_non_formatting_single),
-            Token::Sequence,
-            boxing_token(Token::Italic),
+            Matcher::new(
+                &self.partial(Self::inline_single),
+                &collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
+            ),
+            Matcher::new(
+                &self.partial(Self::inline_non_formatting_single),
+                &collect_sequence(Token::Sequence, identity),
+            ),
         )(input)
     }
 
     // TODO: CommonMark flanking rules
     fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            "_",
-            "_",
+            tag("_"),
+            tag("_"),
             true,
-            self.partial(Self::inline_single),
-            self.partial(Self::inline_non_formatting_single),
-            Token::Sequence,
-            boxing_token(Token::Italic),
+            Matcher::new(
+                &self.partial(Self::inline_single),
+                &collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
+            ),
+            Matcher::new(
+                &self.partial(Self::inline_non_formatting_single),
+                &collect_sequence(Token::Sequence, identity),
+            ),
         )(input)
     }
 
     fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            "<s>",
-            "</s>",
+            tag("<s>"),
+            tag("</s>"),
             false,
-            self.partial(Self::inline_single),
-            self.partial(Self::inline_non_formatting_single),
-            Token::Sequence,
-            boxing_token(Token::Strikethrough),
+            Matcher::new(
+                &self.partial(Self::inline_single),
+                &collect_sequence(Token::Sequence, boxing_token(Token::Strikethrough)),
+            ),
+            Matcher::new(
+                &self.partial(Self::inline_non_formatting_single),
+                &collect_sequence(Token::Sequence, identity),
+            ),
         )(input)
     }
 
     // TODO: CommonMark flanking rules
     fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            "~~",
-            "~~",
+            tag("~~"),
+            tag("~~"),
             true,
-            move |input| {
-                tuple((not_line_ending, self.partial(Self::inline_single)))(input)
-                    .map(|(i, t)| (i, t.1))
-            },
-            move |input| {
-                tuple((
-                    not_line_ending,
-                    self.partial(Self::inline_non_formatting_single),
-                ))(input)
-                .map(|(i, t)| (i, t.1))
-            },
-            Token::Sequence,
-            boxing_token(Token::Strikethrough),
+            Matcher::new(
+                &move |input| {
+                    map(
+                        tuple(((not(line_ending)), self.partial(Self::inline_single))),
+                        |(_, captured)| captured,
+                    )(input)
+                },
+                &collect_sequence(Token::Sequence, boxing_token(Token::Strikethrough)),
+            ),
+            Matcher::new(
+                &move |input| {
+                    map(
+                        tuple((
+                            (not(line_ending)),
+                            self.partial(Self::inline_non_formatting_single),
+                        )),
+                        |(_, captured)| captured,
+                    )(input)
+                },
+                &collect_sequence(Token::Sequence, identity),
+            ),
         )(input)
     }
 
     fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            "`",
-            "",
+            tag("`"),
+            |input| alt((tag("`"), tag("´")))(input),
             true,
-            move |input| {
-                tuple((not(alt((tag("`"), tag("´"), line_ending))), anychar))(input)
-                    .map(|(i, (_skip, c))| (i, c))
-            },
-            fail,
-            collect_char_sequence(Token::InlineCode),
-            identity,
+            Matcher::new(
+                &move |input| {
+                    map(
+                        tuple((not(alt((tag("`"), tag("´"), line_ending))), anychar)),
+                        |(_, captured)| captured,
+                    )(input)
+                },
+                &collect_char_sequence(Token::InlineCode),
+            ),
+            Matcher::reject(),
         )(input)
     }
 
     fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            "\\(",
-            "\\)",
+            tag("\\("),
+            tag("\\)"),
             false,
-            move |input| tuple((not(line_ending), anychar))(input).map(|(i, (_skip, c))| (i, c)),
-            fail,
-            collect_char_sequence(Token::InlineMath),
-            identity,
+            Matcher::new(
+                &move |input| {
+                    map(tuple((not(line_ending), anychar)), |(_, captured)| captured)(input)
+                },
+                &collect_char_sequence(Token::InlineMath),
+            ),
+            Matcher::reject(),
         )(input)
     }
 
@@ -925,6 +1014,8 @@ impl Context {
             return fail(input);
         };
 
+        let grapheme = grapheme.trim_end_matches(|c| c == '\u{200c}' || c == '\u{200d}');
+
         let emoji = emojis::get(grapheme);
 
         if emoji.is_none() {
@@ -1059,10 +1150,13 @@ fn url_chars<'a, T: 'a>(
 mod test {
     use crate::{url_chars, Context, Span, Token};
     use nom::bytes::complete::tag;
-    use nom::multi::many1;
     use std::borrow::Cow;
     use std::collections::HashMap;
 
+    fn parse_full(string: &str) -> Token {
+        Context.full(Span::new(string)).unwrap().1.merged().owned()
+    }
+
     #[test]
     fn parse_url_chars() {
         let test1 = "https://en.wikipedia.org/wiki/Sandbox_(computer_security))";
@@ -1111,9 +1205,92 @@ mod test {
         );
     }
 
+    #[test]
+    fn parse_formatting() {
+        assert_eq!(
+            Token::Strikethrough(Box::new(Token::PlainText("stikethrough".into()))),
+            parse_full(r#"~~stikethrough~~"#)
+        );
+
+        assert_eq!(
+            Token::Bold(Box::new(Token::PlainText("bold".into()))),
+            parse_full(r#"**bold**"#)
+        );
+
+        assert_eq!(
+            Token::Italic(Box::new(Token::PlainText("italic".into()))),
+            parse_full(r#"*italic*"#)
+        );
+
+        assert_eq!(
+            Token::Sequence(vec![
+                Token::PlainText("not code ".into()),
+                Token::InlineCode("code".into()),
+                Token::PlainText(" also not code".into())
+            ]),
+            parse_full(r#"not code `code` also not code"#)
+        );
+
+        assert_eq!(
+            Token::Sequence(vec![
+                Token::PlainText("not code ".into()),
+                Token::InlineCode("code".into()),
+                Token::PlainText(" also `not code".into())
+            ]),
+            parse_full(r#"not code `code` also `not code"#)
+        );
+
+        assert_eq!(
+            Token::Sequence(vec![
+                Token::PlainText("not code ".into()),
+                Token::InlineCode("*not bold*".into()),
+                Token::PlainText(" also not code".into())
+            ]),
+            parse_full(r#"not code `*not bold*` also not code"#)
+        );
+
+        assert_eq!(
+            Token::BoldItalic(Box::new(Token::PlainText("bold italic".into()))),
+            parse_full(r#"***bold italic***"#)
+        );
+
+        assert_eq!(
+            Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
+                "bold italic".into()
+            ))))),
+            parse_full(r#"<b><i>bold italic</i></b>"#)
+        );
+    }
+
     #[test]
     fn parse_complex() {
-        let emoji = r#"$[x2 $[sparkle 🥺]💜$[spin.y,speed=5s ❤️]🦊]"#;
+        assert_eq!(
+            Token::Center(Box::new(Token::Sequence(vec![
+                Token::PlainText("centered\n".into()),
+                Token::UnicodeEmoji("🦋".into()),
+                Token::UnicodeEmoji("🏳️‍⚧️".into()),
+                Token::PlainText("\ntext".into())
+            ]))),
+            parse_full(
+                r#"<center>centered
+🦋🏳️‍⚧️
+text</center>"#
+            )
+        );
+
+        assert_eq!(
+            Token::Quote(Box::new(Token::Center(Box::new(Token::Sequence(vec![
+                Token::PlainText("centered\n".into()),
+                Token::UnicodeEmoji("👩🏽‍🤝‍👩🏼".into()),
+                Token::PlainText("\ntext".into())
+            ]))))),
+            parse_full(
+                r#"> <center>centered
+> 👩🏽‍🤝‍👩🏼
+> text</center>"#
+            )
+        );
+
         assert_eq!(
             Token::Function {
                 name: "x2".into(),
@@ -1138,21 +1315,7 @@ mod test {
                     Token::UnicodeEmoji("🦊".into()),
                 ]))
             },
-            Context.full(Span::new(emoji)).unwrap().1.merged()
-        );
-
-        let bold_italic = r#"***bold italic***"#;
-        assert_eq!(
-            Token::BoldItalic(Box::new(Token::PlainText("bold italic".into()))),
-            Context.full(Span::new(bold_italic)).unwrap().1.merged()
-        );
-
-        let bold_italic_tag = r#"<b><i>bold italic</i></b>"#;
-        assert_eq!(
-            Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
-                "bold italic".into()
-            ))))),
-            Context.full(Span::new(bold_italic_tag)).unwrap().1.merged()
+            parse_full(r#"$[x2 $[sparkle 🥺]💜$[spin.y,speed=5s ❤️]🦊]"#)
         );
 
         assert_eq!(
@@ -1178,37 +1341,67 @@ mod test {
                 .merged()
         );
 
-        let quote = r#"
-> test
-> <i>
-> italic
-> </i>
->> Nested quote
-"#;
-
         assert_eq!(
             Token::Quote(Box::new(Token::Sequence(vec![
                 Token::PlainText("test\n".into()),
                 Token::Italic(Box::new(Token::PlainText("\nitalic\n".into()))),
                 Token::Quote(Box::new(Token::PlainText("Nested quote".into())))
             ]))),
-            Context.full(Span::new(quote)).unwrap().1.merged()
+            parse_full(
+                r#"
+> test
+> <i>
+> italic
+> </i>
+>> Nested quote
+"#
+            )
         );
     }
 
     #[test]
     fn parse_emoji() {
-        let test = "🥺💜❤️🦊";
-        let ctx = Context;
-        let tokens = many1(ctx.partial(Context::unicode_emoji))(Span::from(test)).unwrap();
+        assert_eq!(
+            Token::Sequence(
+                vec!["🥺", "💜", "❤️", "🦊"]
+                    .into_iter()
+                    .map(<&str as Into<Cow<_>>>::into)
+                    .map(Token::UnicodeEmoji)
+                    .collect::<Vec<_>>()
+            ),
+            parse_full("🥺💜❤️🦊")
+        );
+
+        // Trans flag, ZWJ
+        assert_eq!(
+            Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}".into()),
+            parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}")
+        );
 
         assert_eq!(
-            vec!["🥺", "💜", "❤️", "🦊"]
-                .into_iter()
-                .map(<&str as Into<Cow<_>>>::into)
-                .map(Token::UnicodeEmoji)
-                .collect::<Vec<_>>(),
-            tokens.1
+            Token::Sequence(vec![
+                Token::PlainText("\u{0200d}".into()),             // ZWJ
+                Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
+            ]),
+            parse_full("\u{0200d}\u{1f3f3}\u{0fe0f}")
+        );
+
+        // Trans flag, ZWNJ
+        assert_eq!(
+            Token::Sequence(vec![
+                Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
+                Token::PlainText("\u{0200c}".into()),             // ZWNJ
+                Token::UnicodeEmoji("\u{026a7}\u{0fe0f}".into())  // Trans symbol
+            ]),
+            parse_full("\u{1f3f3}\u{0fe0f}\u{0200c}\u{026a7}\u{0fe0f}")
+        );
+
+        assert_eq!(
+            Token::Sequence(vec![
+                Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
+                Token::PlainText("\u{0200d}\u{0200d}\u{0200d}".into()), // ZWJ
+            ]),
+            parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{0200d}\u{0200d}")
         );
     }
 }

From d2bc67974055b9f10d0dae7a1dad2d292a9ca783 Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Sat, 7 Oct 2023 20:40:01 +0200
Subject: [PATCH 16/23] Fixed link parsing

---
 magnetar_mmm_parser/src/lib.rs | 184 +++++++++++++++++++++++++++++----
 1 file changed, 165 insertions(+), 19 deletions(-)

diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index 8dccf96..63e55c5 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -48,7 +48,7 @@ pub enum Token<'a> {
     UrlRaw(Cow<'a, str>),
     UrlNoEmbed(Cow<'a, str>),
     Link {
-        label: Cow<'a, str>,
+        label: Box<Token<'a>>,
         href: Cow<'a, str>,
         embed: bool,
     },
@@ -91,7 +91,7 @@ impl Token<'_> {
             Token::UrlNoEmbed(url) => Token::UrlNoEmbed(Cow::Owned(url.clone().into_owned())),
             Token::Link { embed, label, href } => Token::Link {
                 embed: *embed,
-                label: Cow::Owned(label.clone().into_owned()),
+                label: Box::new(label.owned()),
                 href: Cow::Owned(href.clone().into_owned()),
             },
             Token::BlockCode { inner, lang } => Token::BlockCode {
@@ -183,6 +183,11 @@ impl Token<'_> {
             Token::Italic(inner) => Token::Italic(Box::new(inner.merged())),
             Token::Center(inner) => Token::Center(Box::new(inner.merged())),
             Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.merged())),
+            Token::Link { embed, label, href } => Token::Link {
+                label: Box::new(label.merged()),
+                href: href.clone(),
+                embed: *embed,
+            },
             Token::Function {
                 name,
                 params,
@@ -387,12 +392,14 @@ impl Context {
     fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         let (input, token) = alt((
             self.partial(Self::unicode_emoji),
-            self.partial(Self::tag_block_center),
-            self.partial(Self::tag_small),
-            self.partial(Self::tag_plain),
-            self.partial(Self::tag_bold),
-            self.partial(Self::tag_italic),
-            self.partial(Self::tag_strikethrough),
+            alt((
+                self.partial(Self::tag_block_center),
+                self.partial(Self::tag_small),
+                self.partial(Self::tag_plain),
+                self.partial(Self::tag_bold),
+                self.partial(Self::tag_italic),
+                self.partial(Self::tag_strikethrough),
+            )),
             self.partial(Self::url_no_embed),
             self.partial(Self::base_bold_italic),
             self.partial(Self::tag_block_code),
@@ -405,6 +412,7 @@ impl Context {
             self.partial(Self::tag_mention),
             self.partial(Self::tag_hashtag),
             self.partial(Self::shortcode_emoji),
+            self.partial(Self::link),
             self.partial(Self::raw_url),
             self.partial(Self::text),
         ))(input)?;
@@ -428,6 +436,7 @@ impl Context {
             self.partial(Self::tag_mention),
             self.partial(Self::tag_hashtag),
             self.partial(Self::shortcode_emoji),
+            self.partial(Self::link),
             self.partial(Self::raw_url),
             self.partial(Self::text),
         ))(input)?;
@@ -989,19 +998,15 @@ impl Context {
         let (input, no_embed) = opt(tag("?"))(input)?;
         let (input, _) = tag("[")(input)?;
         let (input, _) = not(tag("["))(input)?;
-        let (input, label_span) = recognize(many1(tuple((
-            not(tag("](")),
-            self.partial(Self::inline_label_safe_single),
-        ))))(input)?;
-        let (input, _) = tag("]")(input)?;
-        let (input, _) = tag("(")(input)?;
-        let (input, url_span) = recognize(tuple((protocol, url_chars(tag("]"), true))))(input)?;
+        let (input, (label_tok, _)) =
+            many_till(self.partial(Self::inline_label_safe_single), tag("]("))(input)?;
+        let (input, url_span) = recognize(tuple((protocol, url_chars(tag(")"), true))))(input)?;
         let (input, _) = tag(")")(input)?;
 
         Ok((
             input,
             Token::Link {
-                label: label_span.into_fragment().into(),
+                label: Box::new(Token::Sequence(label_tok)),
                 href: url_span.into_fragment().into(),
                 embed: no_embed.is_none(),
             },
@@ -1056,19 +1061,22 @@ impl Context {
             Span::into_fragment,
         )(input)?;
 
-        let (input, host) = map(
+        let before = input;
+        let (_, host) = map(
             opt(tuple((
                 tag("@"),
                 map(
-                    recognize(many1(alt((alphanumeric1, recognize(one_of("-_")))))),
+                    recognize(many1(alt((alphanumeric1, recognize(one_of("-_.")))))),
                     Span::into_fragment,
                 ),
             ))),
             |maybe_tag_host| maybe_tag_host.map(|(_, host)| host),
         )(input)?;
 
+        let host = host.map(|h| h.trim_end_matches(|c| matches!(c, '.' | '-' | '_')));
+
         Ok((
-            input,
+            host.map(|c| before.slice(c.len() + 1..)).unwrap_or(before),
             Token::Mention {
                 mention_type,
                 name: name.into(),
@@ -1359,6 +1367,144 @@ text</center>"#
         );
     }
 
+    #[test]
+    fn parse_link() {
+        assert_eq!(
+            parse_full("Link test: [label](https://example.com)"),
+            Token::Sequence(vec![
+                Token::PlainText("Link test: ".into()),
+                Token::Link {
+                    label: Box::new(Token::PlainText("label".into())),
+                    href: "https://example.com".into(),
+                    embed: true
+                }
+            ])
+        );
+
+        assert_eq!(
+            parse_full("Link test: ?[label](https://awawa.gay)"),
+            Token::Sequence(vec![
+                Token::PlainText("Link test: ".into()),
+                Token::Link {
+                    label: Box::new(Token::PlainText("label".into())),
+                    href: "https://awawa.gay".into(),
+                    embed: false
+                }
+            ])
+        );
+
+        assert_eq!(
+            parse_full("Link test: ?[label](https://awawa.gay"), // Missing closing bracket
+            Token::Sequence(vec![
+                Token::PlainText("Link test: ?[label](".into()),
+                Token::UrlRaw("https://awawa.gay".into()),
+            ])
+        );
+    }
+
+    #[test]
+    fn parse_mention() {
+        assert_eq!(
+            parse_full("@tag"),
+            Token::Mention {
+                mention_type: crate::MentionType::User,
+                name: "tag".into(),
+                host: None
+            }
+        );
+
+        assert_eq!(
+            parse_full("hgsjlkdsa @tag fgahjsdkd"),
+            Token::Sequence(vec![
+                Token::PlainText("hgsjlkdsa ".into()),
+                Token::Mention {
+                    mention_type: crate::MentionType::User,
+                    name: "tag".into(),
+                    host: None
+                },
+                Token::PlainText(" fgahjsdkd".into())
+            ])
+        );
+
+        assert_eq!(
+            parse_full("hgsjlkdsa @tag@ fgahjsdkd"),
+            Token::Sequence(vec![
+                Token::PlainText("hgsjlkdsa ".into()),
+                Token::Mention {
+                    mention_type: crate::MentionType::User,
+                    name: "tag".into(),
+                    host: None
+                },
+                Token::PlainText("@ fgahjsdkd".into())
+            ])
+        );
+
+        assert_eq!(
+            parse_full("aaaa @tag@domain bbbbb"),
+            Token::Sequence(vec![
+                Token::PlainText("aaaa ".into()),
+                Token::Mention {
+                    mention_type: crate::MentionType::User,
+                    name: "tag".into(),
+                    host: Some("domain".into())
+                },
+                Token::PlainText(" bbbbb".into())
+            ])
+        );
+
+        assert_eq!(
+            parse_full("test @tag@domain, test"),
+            Token::Sequence(vec![
+                Token::PlainText("test ".into()),
+                Token::Mention {
+                    mention_type: crate::MentionType::User,
+                    name: "tag".into(),
+                    host: Some("domain".into())
+                },
+                Token::PlainText(", test".into())
+            ])
+        );
+
+        assert_eq!(
+            parse_full("test @tag@domain.gay. test"),
+            Token::Sequence(vec![
+                Token::PlainText("test ".into()),
+                Token::Mention {
+                    mention_type: crate::MentionType::User,
+                    name: "tag".into(),
+                    host: Some("domain.gay".into())
+                },
+                Token::PlainText(". test".into())
+            ])
+        );
+
+        assert_eq!(
+            parse_full("test @tag@domain? test"),
+            Token::Sequence(vec![
+                Token::PlainText("test ".into()),
+                Token::Mention {
+                    mention_type: crate::MentionType::User,
+                    name: "tag".into(),
+                    host: Some("domain".into())
+                },
+                Token::PlainText("? test".into())
+            ])
+        );
+
+        assert_eq!(
+            parse_full("test !tag@domain.com test"),
+            Token::Sequence(vec![
+                Token::PlainText("test ".into()),
+                Token::Mention {
+                    mention_type: crate::MentionType::Community,
+                    name: "tag".into(),
+                    host: Some("domain.com".into())
+                },
+                Token::PlainText(" test".into())
+            ])
+        );
+    }
+
     #[test]
     fn parse_emoji() {
         assert_eq!(

From c4fd99fa45990cd68dfa4465c4635e4744a7efd1 Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Sat, 7 Oct 2023 21:22:21 +0200
Subject: [PATCH 17/23] Stricter URL parsing

---
 magnetar_mmm_parser/src/lib.rs | 129 ++++++++++++++++++++-------------
 1 file changed, 80 insertions(+), 49 deletions(-)

diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index 63e55c5..6f1bf94 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -991,7 +991,10 @@ impl Context {
         let (input, url_span) = recognize(tuple((protocol, url_chars(tag(">"), true))))(input)?;
         let (input, _) = tag(">")(input)?;
 
-        Ok((input, Token::UrlRaw(Cow::from(url_span.into_fragment()))))
+        Ok((
+            input,
+            Token::UrlNoEmbed(Cow::from(url_span.into_fragment())),
+        ))
     }
 
     fn link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
@@ -1120,7 +1123,12 @@ fn protocol(input: Span) -> IResult<Span, Span> {
 
 #[inline]
 fn url_chars_base(input: Span) -> IResult<Span, Span> {
-    recognize(alt((alpha1, recognize(one_of(".,_/:%#$&?!~=+-()[]@")))))(input)
+    recognize(alt((
+        alpha1,
+        recognize(tuple((tag("["), many_till(url_chars_base, tag("]"))))),
+        recognize(tuple((tag("("), many_till(url_chars_base, tag(")"))))),
+        recognize(one_of(".,_/:%#$&?!~=+-@")),
+    )))(input)
 }
 
 #[inline]
@@ -1128,26 +1136,10 @@ fn url_chars<'a, T: 'a>(
     terminator: impl Fn(Span<'a>) -> IResult<Span<'a>, T> + 'a,
     spaces: bool,
 ) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, Span<'a>> + 'a {
-    let terminating = move |input| {
-        tuple((
-            &terminator,
-            alt((
-                space1,
-                line_ending,
-                eof,
-                recognize(one_of("([<'\"")),
-                recognize(tuple((
-                    alt((alpha1, recognize(one_of("*")))),
-                    alt((space1, line_ending, eof)),
-                ))),
-            )),
-        ))(input)
-    };
-
     let chars = tuple((
         not(tuple((space1, eof))),
         not(tuple((space1, tag("\"")))),
-        not(tuple((opt(space1), terminating))),
+        not(tuple((opt(space1), terminator))),
         alt((url_chars_base, if spaces { space1 } else { fail })),
     ));
 
@@ -1167,49 +1159,48 @@ mod test {
 
     #[test]
     fn parse_url_chars() {
-        let test1 = "https://en.wikipedia.org/wiki/Sandbox_(computer_security))";
         assert_eq!(
+            url_chars(tag(")"), true)(Span::new(
+                "https://en.wikipedia.org/wiki/Sandbox_(computer_security))"
+            ))
+            .unwrap()
+            .1
+            .into_fragment(),
+            "https://en.wikipedia.org/wiki/Sandbox_(computer_security)"
+        );
+
+        assert_eq!(
+            url_chars(tag(")"), true)(Span::new(
+                "https://en.wikipedia.org/wiki/Sandbox_(computer_security)))"
+            ))
+            .unwrap()
+            .1
+            .into_fragment(),
             "https://en.wikipedia.org/wiki/Sandbox_(computer_security)",
-            url_chars(tag(")"), true)(Span::new(test1))
-                .unwrap()
-                .1
-                .into_fragment()
         );
 
-        let test2 = "https://en.wikipedia.org/wiki/Sandbox_(computer_security)))";
         assert_eq!(
-            "https://en.wikipedia.org/wiki/Sandbox_(computer_security))",
-            url_chars(tag(")"), true)(Span::new(test2))
+            url_chars(tag(")"), true)(Span::new("https://cs.wikipedia.org/wiki/Among_Us  "))
                 .unwrap()
                 .1
-                .into_fragment()
-        );
-
-        let test3 = "https://en.wikipedia.org/wiki/(";
-        assert_eq!(
-            test3,
-            url_chars(tag(")"), true)(Span::new(test3))
-                .unwrap()
-                .1
-                .into_fragment()
-        );
-
-        let test4 = "https://cs.wikipedia.org/wiki/Among_Us  ";
-        assert_eq!(
+                .into_fragment(),
             "https://cs.wikipedia.org/wiki/Among_Us",
-            url_chars(tag(")"), true)(Span::new(test4))
-                .unwrap()
-                .1
-                .into_fragment()
         );
 
-        let test5 = "https://cs.wikipedia.org/wiki/Among Us  )";
         assert_eq!(
-            "https://cs.wikipedia.org/wiki/Among Us",
-            url_chars(tag(")"), true)(Span::new(test5))
+            url_chars(tag(")"), true)(Span::new("https://cs.wikipedia.org/wiki/Among Us  )"))
                 .unwrap()
                 .1
-                .into_fragment()
+                .into_fragment(),
+            "https://cs.wikipedia.org/wiki/Among Us"
+        );
+
+        assert_eq!(
+            url_chars(tag(")"), false)(Span::new("https://en.wikipedia.org/wiki/Among Us  )"))
+                .unwrap()
+                .1
+                .into_fragment(),
+            "https://en.wikipedia.org/wiki/Among"
         );
     }
 
@@ -1381,6 +1372,20 @@ text</center>"#
             ])
         );
 
+        assert_eq!(
+            parse_full("<https://example.com>"),
+            Token::UrlNoEmbed("https://example.com".into())
+        );
+
+        // Adjacent links okay
+        assert_eq!(
+            parse_full("<https://example.com/><https://awawa.gay/>"),
+            Token::Sequence(vec![
+                Token::UrlNoEmbed("https://example.com/".into()),
+                Token::UrlNoEmbed("https://awawa.gay/".into())
+            ])
+        );
+
         assert_eq!(
             parse_full("Link test: ?[label](https://awawa.gay)"),
             Token::Sequence(vec![
@@ -1393,6 +1398,32 @@ text</center>"#
             ])
         );
 
+        assert_eq!(
+            parse_full("Link test: ?[label](https://awawa.gay)test"),
+            Token::Sequence(vec![
+                Token::PlainText("Link test: ".into()),
+                Token::Link {
+                    label: Box::new(Token::PlainText("label".into())),
+                    href: "https://awawa.gay".into(),
+                    embed: false
+                },
+                Token::PlainText("test".into())
+            ])
+        );
+
+        assert_eq!(
+            parse_full("Link test: (?[label](https://awawa.gay))"),
+            Token::Sequence(vec![
+                Token::PlainText("Link test: (".into()),
+                Token::Link {
+                    label: Box::new(Token::PlainText("label".into())),
+                    href: "https://awawa.gay".into(),
+                    embed: false
+                },
+                Token::PlainText(")".into())
+            ])
+        );
+
         assert_eq!(
             parse_full("Link test: ?[label](https://awawa.gay"), // Missing closing bracket
             Token::Sequence(vec![

From 26bd6fe4b2ee893d5325c19d7208b40dbf320cb8 Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Sat, 7 Oct 2023 21:26:25 +0200
Subject: [PATCH 18/23] Normalized tests

---
 magnetar_mmm_parser/src/lib.rs | 82 ++++++++++++++++------------------
 1 file changed, 39 insertions(+), 43 deletions(-)

diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index 6f1bf94..7385d65 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -1207,90 +1207,91 @@ mod test {
     #[test]
     fn parse_formatting() {
         assert_eq!(
+            parse_full(r#"~~stikethrough~~"#),
             Token::Strikethrough(Box::new(Token::PlainText("stikethrough".into()))),
-            parse_full(r#"~~stikethrough~~"#)
         );
 
         assert_eq!(
+            parse_full(r#"**bold**"#),
             Token::Bold(Box::new(Token::PlainText("bold".into()))),
-            parse_full(r#"**bold**"#)
         );
 
         assert_eq!(
+            parse_full(r#"*italic*"#),
             Token::Italic(Box::new(Token::PlainText("italic".into()))),
-            parse_full(r#"*italic*"#)
         );
 
         assert_eq!(
+            parse_full(r#"not code `code` also not code"#),
             Token::Sequence(vec![
                 Token::PlainText("not code ".into()),
                 Token::InlineCode("code".into()),
                 Token::PlainText(" also not code".into())
             ]),
-            parse_full(r#"not code `code` also not code"#)
         );
 
         assert_eq!(
+            parse_full(r#"not code `code` also `not code"#),
             Token::Sequence(vec![
                 Token::PlainText("not code ".into()),
                 Token::InlineCode("code".into()),
                 Token::PlainText(" also `not code".into())
             ]),
-            parse_full(r#"not code `code` also `not code"#)
         );
 
         assert_eq!(
+            parse_full(r#"not code `*not bold*` also not code"#),
             Token::Sequence(vec![
                 Token::PlainText("not code ".into()),
                 Token::InlineCode("*not bold*".into()),
                 Token::PlainText(" also not code".into())
             ]),
-            parse_full(r#"not code `*not bold*` also not code"#)
         );
 
         assert_eq!(
-            Token::BoldItalic(Box::new(Token::PlainText("bold italic".into()))),
-            parse_full(r#"***bold italic***"#)
+            parse_full(r#"***bold italic***"#),
+            Token::BoldItalic(Box::new(Token::PlainText("bold italic".into())))
         );
 
         assert_eq!(
+            parse_full(r#"<b><i>bold italic</i></b>"#),
             Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
                 "bold italic".into()
-            ))))),
-            parse_full(r#"<b><i>bold italic</i></b>"#)
+            )))))
         );
     }
 
     #[test]
     fn parse_complex() {
         assert_eq!(
+            parse_full(
+                r#"<center>centered
+🦋🏳️‍⚧️
+text</center>"#
+            ),
             Token::Center(Box::new(Token::Sequence(vec![
                 Token::PlainText("centered\n".into()),
                 Token::UnicodeEmoji("🦋".into()),
                 Token::UnicodeEmoji("🏳️‍⚧️".into()),
                 Token::PlainText("\ntext".into())
-            ]))),
-            parse_full(
-                r#"<center>centered
-🦋🏳️‍⚧️
-text</center>"#
-            )
+            ])))
         );
 
         assert_eq!(
+            parse_full(
+                r#"> <center>centered
+> 👩🏽‍🤝‍👩🏼
+> text</center>"#
+            ),
             Token::Quote(Box::new(Token::Center(Box::new(Token::Sequence(vec![
                 Token::PlainText("centered\n".into()),
                 Token::UnicodeEmoji("👩🏽‍🤝‍👩🏼".into()),
                 Token::PlainText("\ntext".into())
             ]))))),
-            parse_full(
-                r#"> <center>centered
-> 👩🏽‍🤝‍👩🏼
-> text</center>"#
-            )
         );
 
         assert_eq!(
+            parse_full(r#"$[x2 $[sparkle 🥺]💜$[spin.y,speed=5s ❤️]🦊]"#),
             Token::Function {
                 name: "x2".into(),
                 params: HashMap::new(),
@@ -1314,10 +1315,10 @@ text</center>"#
                     Token::UnicodeEmoji("🦊".into()),
                 ]))
             },
-            parse_full(r#"$[x2 $[sparkle 🥺]💜$[spin.y,speed=5s ❤️]🦊]"#)
         );
 
         assert_eq!(
+            parse_full(r#"<b>bold @tag1 <i> @tag2 </b>italic</i>"#),
             Token::Sequence(vec![
                 Token::PlainText("<b>bold ".into()),
                 Token::Mention {
@@ -1333,19 +1334,9 @@ text</center>"#
                 },
                 Token::PlainText(" </b>italic</i>".into())
             ]),
-            Context
-                .full(Span::new(r#"<b>bold @tag1 <i> @tag2 </b>italic</i>"#))
-                .unwrap()
-                .1
-                .merged()
         );
 
         assert_eq!(
-            Token::Quote(Box::new(Token::Sequence(vec![
-                Token::PlainText("test\n".into()),
-                Token::Italic(Box::new(Token::PlainText("\nitalic\n".into()))),
-                Token::Quote(Box::new(Token::PlainText("Nested quote".into())))
-            ]))),
             parse_full(
                 r#"
 > test
@@ -1354,7 +1345,12 @@ text</center>"#
 > </i>
 >> Nested quote
 "#
-            )
+            ),
+            Token::Quote(Box::new(Token::Sequence(vec![
+                Token::PlainText("test\n".into()),
+                Token::Italic(Box::new(Token::PlainText("\nitalic\n".into()))),
+                Token::Quote(Box::new(Token::PlainText("Nested quote".into())))
+            ]))),
         );
     }
 
@@ -1539,46 +1535,46 @@ text</center>"#
     #[test]
     fn parse_emoji() {
         assert_eq!(
+            parse_full("🥺💜❤️🦊"),
             Token::Sequence(
                 vec!["🥺", "💜", "❤️", "🦊"]
                     .into_iter()
                     .map(<&str as Into<Cow<_>>>::into)
                     .map(Token::UnicodeEmoji)
                     .collect::<Vec<_>>()
-            ),
-            parse_full("🥺💜❤️🦊")
+            )
         );
 
         // Trans flag, ZWJ
         assert_eq!(
-            Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}".into()),
-            parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}")
+            parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}"),
+            Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}".into())
         );
 
         assert_eq!(
+            parse_full("\u{0200d}\u{1f3f3}\u{0fe0f}"),
             Token::Sequence(vec![
                 Token::PlainText("\u{0200d}".into()),             // ZWJ
                 Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
-            ]),
-            parse_full("\u{0200d}\u{1f3f3}\u{0fe0f}")
+            ])
         );
 
         // Trans flag, ZWNJ
         assert_eq!(
+            parse_full("\u{1f3f3}\u{0fe0f}\u{0200c}\u{026a7}\u{0fe0f}"),
             Token::Sequence(vec![
                 Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
                 Token::PlainText("\u{0200c}".into()),             // ZWNJ
                 Token::UnicodeEmoji("\u{026a7}\u{0fe0f}".into())  // Trans symbol
-            ]),
-            parse_full("\u{1f3f3}\u{0fe0f}\u{0200c}\u{026a7}\u{0fe0f}")
+            ])
         );
 
         assert_eq!(
+            parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{0200d}\u{0200d}"),
             Token::Sequence(vec![
                 Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
                 Token::PlainText("\u{0200d}\u{0200d}\u{0200d}".into()), // ZWJ
-            ]),
-            parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{0200d}\u{0200d}")
+            ])
         );
     }
 }

From d0d977e6ebbf7676b26c1a76ab5890baa2389910 Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Sun, 8 Oct 2023 22:15:55 +0200
Subject: [PATCH 19/23] Fixed URL parsing and initial flanking rules
 implementation

---
 magnetar_mmm_parser/src/lib.rs | 388 +++++++++++++++++++++++++++------
 1 file changed, 322 insertions(+), 66 deletions(-)

diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index 7385d65..d270760 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -1,19 +1,20 @@
 use either::Either;
 use nom::branch::alt;
-use nom::bytes::complete::tag;
+use nom::bytes::complete::{tag, tag_no_case};
 use nom::character::complete::{
-    alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of, space1,
-    tab,
+    alpha1, alphanumeric1, anychar, char as one_char, char, line_ending, not_line_ending, one_of,
+    satisfy, space1, tab,
 };
 use nom::combinator::{eof, fail, map, not, opt, recognize};
 use nom::error::ErrorKind;
 use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
 use nom::sequence::tuple;
-use nom::{IResult, Offset, Slice};
+use nom::{Compare, IResult, Offset, Slice};
 use nom_locate::LocatedSpan;
 use std::borrow::Cow;
 use std::collections::HashMap;
 use std::convert::{identity, Infallible};
+use std::marker::PhantomData;
 use unicode_segmentation::UnicodeSegmentation;
 
 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
@@ -73,6 +74,80 @@ pub enum Token<'a> {
 }
 
 impl Token<'_> {
+    fn str_content_left(&self) -> Option<&str> {
+        match self {
+            Token::PlainText(text) => Some(text.as_ref()),
+            Token::Sequence(tokens) => tokens.first().and_then(Token::str_content_left),
+            Token::Quote(inner) => inner.str_content_left(),
+            Token::Small(inner) => inner.str_content_left(),
+            Token::BoldItalic(inner) => inner.str_content_left(),
+            Token::Bold(inner) => inner.str_content_left(),
+            Token::Italic(inner) => inner.str_content_left(),
+            Token::Center(inner) => inner.str_content_left(),
+            Token::Strikethrough(inner) => inner.str_content_left(),
+            Token::PlainTag(tag) => Some(tag.as_ref()),
+            Token::UrlRaw(url) => Some(url.as_ref()),
+            Token::UrlNoEmbed(url) => Some(url.as_ref()),
+            Token::Link { label, .. } => label.str_content_left(),
+            Token::Function { inner, .. } => inner.str_content_left(),
+            Token::Mention { name, .. } => Some(name.as_ref()),
+            Token::UnicodeEmoji(code) => Some(code.as_ref()),
+            Token::ShortcodeEmoji(_) => None,
+            Token::Hashtag(tag) => Some(tag.as_ref()),
+            _ => None,
+        }
+    }
+
+    fn str_content_right(&self) -> Option<&str> {
+        match self {
+            Token::PlainText(text) => Some(text.as_ref()),
+            Token::Sequence(tokens) => tokens.last().and_then(Token::str_content_right),
+            Token::Quote(inner) => inner.str_content_right(),
+            Token::Small(inner) => inner.str_content_right(),
+            Token::BoldItalic(inner) => inner.str_content_right(),
+            Token::Bold(inner) => inner.str_content_right(),
+            Token::Italic(inner) => inner.str_content_right(),
+            Token::Center(inner) => inner.str_content_right(),
+            Token::Strikethrough(inner) => inner.str_content_right(),
+            Token::PlainTag(tag) => Some(tag.as_ref()),
+            Token::UrlRaw(url) => Some(url.as_ref()),
+            Token::UrlNoEmbed(url) => Some(url.as_ref()),
+            Token::Link { label, .. } => label.str_content_right(),
+            Token::Function { inner, .. } => inner.str_content_right(),
+            Token::Mention { name, .. } => Some(name.as_ref()),
+            Token::UnicodeEmoji(code) => Some(code.as_ref()),
+            Token::Hashtag(tag) => Some(tag.as_ref()),
+            _ => None,
+        }
+    }
+
+    fn inner(&self) -> Token {
+        match self {
+            plain @ Token::PlainText(_) => plain.clone(),
+            sequence @ Token::Sequence(_) => sequence.clone(),
+            Token::Quote(inner) => inner.inner(),
+            Token::Small(inner) => inner.inner(),
+            Token::BoldItalic(inner) => inner.inner(),
+            Token::Bold(inner) => inner.inner(),
+            Token::Italic(inner) => inner.inner(),
+            Token::Center(inner) => inner.inner(),
+            Token::Strikethrough(inner) => inner.inner(),
+            Token::PlainTag(text) => Token::PlainText(text.clone()),
+            Token::InlineCode(code) => Token::PlainText(code.clone()),
+            Token::InlineMath(math) => Token::PlainText(math.clone()),
+            Token::UrlRaw(url) => Token::PlainText(url.clone()),
+            Token::UrlNoEmbed(url) => Token::PlainText(url.clone()),
+            Token::Link { label, .. } => label.inner(),
+            Token::BlockCode { inner, .. } => Token::PlainText(inner.clone()),
+            Token::BlockMath(math) => Token::PlainText(math.clone()),
+            Token::Function { inner, .. } => inner.inner(),
+            Token::Mention { name, .. } => Token::PlainText(name.clone()),
+            Token::UnicodeEmoji(code) => Token::PlainText(code.clone()),
+            Token::ShortcodeEmoji(shortcode) => Token::PlainText(shortcode.clone()),
+            Token::Hashtag(tag) => Token::PlainText(tag.clone()),
+        }
+    }
+
     fn owned(&self) -> Token<'static> {
         match self {
             Token::PlainText(text) => Token::PlainText(Cow::Owned(text.clone().into_owned())),
@@ -129,7 +204,7 @@ impl Token<'_> {
             Token::ShortcodeEmoji(shortcode) => {
                 Token::ShortcodeEmoji(Cow::Owned(shortcode.clone().into_owned()))
             }
-            Token::Hashtag(url) => Token::Hashtag(Cow::Owned(url.clone().into_owned())),
+            Token::Hashtag(tag) => Token::Hashtag(Cow::Owned(tag.clone().into_owned())),
         }
     }
 
@@ -245,6 +320,16 @@ fn collect_char_sequence<'a>(
     move |chars| func(Cow::Owned(chars.collect()))
 }
 
+#[inline]
+fn alpha1_unicode(input: Span) -> IResult<Span, Span> {
+    recognize(many1_count(satisfy(char::is_alphanumeric)))(input)
+}
+
+#[inline]
+fn alphanumeric1_unicode(input: Span) -> IResult<Span, Span> {
+    recognize(many1_count(satisfy(char::is_alphanumeric)))(input)
+}
+
 fn spliced<'a>(
     segments: &[Span<'a>],
     func: impl Fn(Span) -> IResult<Span, Token>,
@@ -316,15 +401,16 @@ fn space(input: Span) -> IResult<Span, Token> {
     Ok((input, Token::PlainText(frag.into_fragment().into())))
 }
 
-struct Matcher<'a, 'b, T> {
+#[derive(Copy, Clone)]
+struct Matcher<'a, 'b, T: Clone> {
     matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
     collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
-    _phantom_closure: std::marker::PhantomData<&'a ()>,
-    _phantom_data: std::marker::PhantomData<&'b ()>,
-    _phantom_output: std::marker::PhantomData<fn() -> T>,
+    _phantom_closure: PhantomData<&'a ()>,
+    _phantom_data: PhantomData<&'b ()>,
+    _phantom_output: PhantomData<fn() -> T>,
 }
 
-impl<'a, 'b, T> Matcher<'a, 'b, T> {
+impl<'a, 'b, T: Clone> Matcher<'a, 'b, T> {
     fn new(
         matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
         collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
@@ -332,9 +418,9 @@ impl<'a, 'b, T> Matcher<'a, 'b, T> {
         Self {
             matcher_inner,
             collector,
-            _phantom_closure: std::marker::PhantomData,
-            _phantom_data: std::marker::PhantomData,
-            _phantom_output: std::marker::PhantomData,
+            _phantom_closure: PhantomData,
+            _phantom_data: PhantomData,
+            _phantom_output: PhantomData,
         }
     }
 }
@@ -345,33 +431,60 @@ impl<'a, 'b> Matcher<'a, 'b, Infallible> {
         Self {
             matcher_inner: &fail::<_, Infallible, _>,
             collector: &|_| unreachable!(),
-            _phantom_closure: std::marker::PhantomData,
-            _phantom_data: std::marker::PhantomData,
-            _phantom_output: std::marker::PhantomData,
+            _phantom_closure: PhantomData,
+            _phantom_data: PhantomData,
+            _phantom_output: PhantomData,
         }
     }
 }
 
-struct Context;
+#[derive(Copy, Clone, Debug)]
+enum FlankingRule {
+    Lenient,
+    Strict,
+    DontCare,
+}
+
+struct FlankingDelim<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>>(
+    T,
+    FlankingRule,
+    PhantomData<&'a ()>,
+);
+
+impl<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>> From<(T, FlankingRule)>
+    for FlankingDelim<'a, T>
+{
+    fn from((func, rule): (T, FlankingRule)) -> Self {
+        FlankingDelim(func, rule, PhantomData)
+    }
+}
+
+impl<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>> From<T> for FlankingDelim<'a, T> {
+    fn from(func: T) -> Self {
+        FlankingDelim(func, FlankingRule::DontCare, PhantomData)
+    }
+}
+
+pub struct Context;
 
 impl Context {
     #[inline]
-    const fn partial(
+    fn partial(
         &self,
         func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token<'a>> + 'static,
     ) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token<'a>> + '_ {
         move |input| func(self, input)
     }
 
-    fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    pub fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         map(many1(self.partial(Self::full_single)), Token::Sequence)(input)
     }
 
-    fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    pub fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         map(many1(self.partial(Self::inline_single)), Token::Sequence)(input)
     }
 
-    fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    pub fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         map(
             many1(self.partial(Self::inline_label_safe_single)),
             Token::Sequence,
@@ -606,14 +719,21 @@ impl Context {
     }
 
     #[inline]
-    fn tag_delimited<'a, 'b: 'a, T, S>(
+    fn tag_delimited<'a, 'b: 'a, T: Clone, S: Clone, FOpen, FClose>(
         &'a self,
-        opening_tag: impl Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
-        closing_tag: impl Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
+        opening_tag: impl Into<FlankingDelim<'b, FOpen>> + 'a,
+        closing_tag: impl Into<FlankingDelim<'b, FClose>> + 'a,
         escape: bool,
         matcher: Matcher<'a, 'b, T>,
         fallback: Matcher<'a, 'b, S>,
-    ) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token<'b>> + '_ {
+    ) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token<'b>> + '_
+    where
+        FOpen: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
+        FClose: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
+    {
+        let FlankingDelim(opening_tag, opening_rule, ..) = opening_tag.into();
+        let FlankingDelim(closing_tag, closing_rule, ..) = closing_tag.into();
+
         move |input| {
             if escape {
                 if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), &opening_tag))(input) {
@@ -662,10 +782,44 @@ impl Context {
                 ));
             }
 
-            let (input, (inner, _)) = res?;
+            let (input, (inner, closing)) = res?;
             let mut inner = inner.into_iter().map(|(_, t)| t);
 
-            Ok((input, (matcher.collector)(&mut inner)))
+            let inner_tok = (matcher.collector)(&mut inner);
+
+            let correct_left_flanking =
+                if let FlankingRule::Lenient | FlankingRule::Strict = opening_rule {
+                    let text_left = inner_tok.str_content_left();
+
+                    !(text_left.is_some_and(|s| s.starts_with(char::is_whitespace))
+                        || text_left.is_none())
+                } else {
+                    true
+                };
+
+            let correct_right_flanking =
+                if let FlankingRule::Lenient | FlankingRule::Strict = closing_rule {
+                    let text_right = inner_tok.str_content_right();
+                    !(text_right.is_some_and(|s| s.ends_with(char::is_whitespace))
+                        || text_right.is_none())
+                } else {
+                    true
+                };
+
+            // TODO: Unfinished flanking rules
+            let correct_flanking = correct_left_flanking && correct_right_flanking;
+
+            if !correct_flanking {
+                return Ok((
+                    input,
+                    Token::Sequence(vec![
+                        Token::PlainText(begin.fragment_between(&post_open).into()),
+                        inner_tok.inner().owned(),
+                        Token::PlainText(closing.into_fragment().into()),
+                    ]),
+                ));
+            }
+            Ok((input, Token::Sequence(vec![inner_tok])))
         }
     }
 
@@ -720,12 +874,12 @@ impl Context {
     }
 
     fn tag_plain<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
-        let opening_tag = &tag("<small>");
-        let closing_tag = &tag("</small>");
+        let opening_tag = &tag("<plain>");
+        let closing_tag = &tag("</plain>");
 
         let (input, _) = opening_tag(input)?;
         let (input, text) = map(
-            recognize(many1(tuple((not_line_ending, not(closing_tag))))),
+            recognize(many1(tuple((not(line_ending), not(closing_tag), anychar)))),
             Span::into_fragment,
         )(input)?;
         let (input, _) = closing_tag(input)?;
@@ -735,8 +889,8 @@ impl Context {
 
     fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            tag("<small>"),
-            tag("</small>"),
+            tag_no_case("<small>"),
+            tag_no_case("</small>"),
             false,
             Matcher::new(
                 &self.partial(Self::inline_single),
@@ -749,11 +903,10 @@ impl Context {
         )(input)
     }
 
-    // TODO: CommonMark flanking rules
     fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            tag("***"),
-            tag("***"),
+            (tag("***"), FlankingRule::Lenient),
+            (tag("***"), FlankingRule::Lenient),
             true,
             Matcher::new(
                 &self.partial(Self::inline_single),
@@ -766,11 +919,10 @@ impl Context {
         )(input)
     }
 
-    // TODO: CommonMark flanking rules
     fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            tag("___"),
-            tag("___"),
+            (tag("___"), FlankingRule::Strict),
+            (tag("___"), FlankingRule::Strict),
             true,
             Matcher::new(
                 &self.partial(Self::inline_single),
@@ -785,8 +937,8 @@ impl Context {
 
     fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            tag("<b>"),
-            tag("</b>"),
+            tag_no_case("<b>"),
+            tag_no_case("</b>"),
             false,
             Matcher::new(
                 &self.partial(Self::inline_single),
@@ -799,11 +951,10 @@ impl Context {
         )(input)
     }
 
-    // TODO: CommonMark flanking rules
     fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            tag("**"),
-            tag("**"),
+            (tag("**"), FlankingRule::Lenient),
+            (tag("**"), FlankingRule::Lenient),
             true,
             Matcher::new(
                 &self.partial(Self::inline_single),
@@ -816,11 +967,10 @@ impl Context {
         )(input)
     }
 
-    // TODO: CommonMark flanking rules
     fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            tag("__"),
-            tag("__"),
+            (tag("__"), FlankingRule::Strict),
+            (tag("__"), FlankingRule::Strict),
             true,
             Matcher::new(
                 &self.partial(Self::inline_single),
@@ -835,8 +985,8 @@ impl Context {
 
     fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            tag("<i>"),
-            tag("</i>"),
+            tag_no_case("<i>"),
+            tag_no_case("</i>"),
             false,
             Matcher::new(
                 &self.partial(Self::inline_single),
@@ -849,11 +999,10 @@ impl Context {
         )(input)
     }
 
-    // TODO: CommonMark flanking rules
     fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            tag("*"),
-            tag("*"),
+            (tag("*"), FlankingRule::Lenient),
+            (tag("*"), FlankingRule::Lenient),
             true,
             Matcher::new(
                 &self.partial(Self::inline_single),
@@ -866,11 +1015,10 @@ impl Context {
         )(input)
     }
 
-    // TODO: CommonMark flanking rules
     fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            tag("_"),
-            tag("_"),
+            (tag("_"), FlankingRule::Strict),
+            (tag("_"), FlankingRule::Strict),
             true,
             Matcher::new(
                 &self.partial(Self::inline_single),
@@ -885,8 +1033,8 @@ impl Context {
 
     fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            tag("<s>"),
-            tag("</s>"),
+            tag_no_case("<s>"),
+            tag_no_case("</s>"),
             false,
             Matcher::new(
                 &self.partial(Self::inline_single),
@@ -899,11 +1047,10 @@ impl Context {
         )(input)
     }
 
-    // TODO: CommonMark flanking rules
     fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
         self.tag_delimited(
-            tag("~~"),
-            tag("~~"),
+            (tag("~~"), FlankingRule::Lenient),
+            (tag("~~"), FlankingRule::Lenient),
             true,
             Matcher::new(
                 &move |input| {
@@ -1037,20 +1184,42 @@ impl Context {
     }
 
     fn shortcode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
-        // TODO: Fail when preceded by alphanumerics
+        if let (plain_out, Some(plain)) = map(
+            opt(recognize(tuple((
+                alphanumeric1_unicode,
+                self.partial(Self::shortcode_emoji),
+            )))),
+            |o| o.map(Span::into_fragment),
+        )(input)?
+        {
+            return Ok((plain_out, Token::PlainText(plain.into())));
+        }
+
         let (input, _) = tag(":")(input)?;
         let (input, shortcode) = map(
-            recognize(many1(alt((alphanumeric1, recognize(one_of("_+-")))))),
+            recognize(many1(alt((
+                alphanumeric1_unicode,
+                recognize(one_of("_+-")),
+            )))),
             Span::into_fragment,
         )(input)?;
         let (input, _) = tag(":")(input)?;
-        let (input, _) = not(alphanumeric1)(input)?;
+        let (input, _) = not(alphanumeric1_unicode)(input)?;
 
         Ok((input, Token::ShortcodeEmoji(shortcode.into())))
     }
 
     fn tag_mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
-        // TODO: Escaping and skip when preceded by alphanumerics
+        if let (plain_out, Some(plain)) = map(
+            opt(recognize(tuple((
+                alt((tag("\\"), alphanumeric1_unicode)),
+                self.partial(Self::tag_mention),
+            )))),
+            |o| o.map(Span::into_fragment),
+        )(input)?
+        {
+            return Ok((plain_out, Token::PlainText(plain.into())));
+        }
 
         let tags = one_of("@!");
         let (input, mention_type) = map(tags, |c| match c {
@@ -1123,12 +1292,12 @@ fn protocol(input: Span) -> IResult<Span, Span> {
 
 #[inline]
 fn url_chars_base(input: Span) -> IResult<Span, Span> {
-    recognize(alt((
-        alpha1,
+    alt((
+        alphanumeric1_unicode,
         recognize(tuple((tag("["), many_till(url_chars_base, tag("]"))))),
         recognize(tuple((tag("("), many_till(url_chars_base, tag(")"))))),
         recognize(one_of(".,_/:%#$&?!~=+-@")),
-    )))(input)
+    ))(input)
 }
 
 #[inline]
@@ -1221,6 +1390,21 @@ mod test {
             Token::Italic(Box::new(Token::PlainText("italic".into()))),
         );
 
+        assert_eq!(
+            parse_full(r#"* italic *"#),
+            Token::PlainText("* italic *".into())
+        );
+
+        assert_eq!(
+            parse_full(r#"_ italic *"#),
+            Token::PlainText("_ italic *".into())
+        );
+
+        assert_eq!(
+            parse_full(r#"*"italic"*"#),
+            Token::Italic(Box::new(Token::PlainText("\"italic\"".into())))
+        );
+
         assert_eq!(
             parse_full(r#"not code `code` also not code"#),
             Token::Sequence(vec![
@@ -1356,6 +1540,47 @@ text</center>"#
 
     #[test]
     fn parse_link() {
+        assert_eq!(
+            parse_full("IPv4 test: <https://0>"),
+            Token::Sequence(vec![
+                Token::PlainText("IPv4 test: ".into()),
+                Token::UrlNoEmbed("https://0".into())
+            ])
+        );
+
+        assert_eq!(
+            parse_full("IPv4 test: <https://127.0.0.1>"),
+            Token::Sequence(vec![
+                Token::PlainText("IPv4 test: ".into()),
+                Token::UrlNoEmbed("https://127.0.0.1".into())
+            ])
+        );
+
+        assert_eq!(
+            parse_full("IPv6 test: <https://[::2f:1]/nya>"),
+            Token::Sequence(vec![
+                Token::PlainText("IPv6 test: ".into()),
+                Token::UrlNoEmbed("https://[::2f:1]/nya".into())
+            ])
+        );
+
+        assert_eq!(
+            parse_full("IPv6 test: https://[::2f:1]/nya"),
+            Token::Sequence(vec![
+                Token::PlainText("IPv6 test: ".into()),
+                Token::UrlRaw("https://[::2f:1]/nya".into())
+            ])
+        );
+
+        // IDNs
+        assert_eq!(
+            parse_full("IDN test: https://www.háčkyčárky.cz/"),
+            Token::Sequence(vec![
+                Token::PlainText("IDN test: ".into()),
+                Token::UrlRaw("https://www.háčkyčárky.cz/".into())
+            ])
+        );
+
         assert_eq!(
             parse_full("Link test: [label](https://example.com)"),
             Token::Sequence(vec![
@@ -1440,6 +1665,11 @@ text</center>"#
             }
         );
 
+        assert_eq!(
+            parse_full("email@notactuallyamenmtion.org"),
+            Token::PlainText("email@notactuallyamenmtion.org".into())
+        );
+
         assert_eq!(
             parse_full("hgsjlkdsa @tag fgahjsdkd"),
             Token::Sequence(vec![
@@ -1532,6 +1762,32 @@ text</center>"#
         );
     }
 
+    #[test]
+    fn parse_shortcodes() {
+        assert_eq!(
+            parse_full(":bottom:"),
+            Token::ShortcodeEmoji("bottom".into())
+        );
+
+        assert_eq!(
+            parse_full(":bottom::blobfox:"),
+            Token::Sequence(vec![
+                Token::ShortcodeEmoji("bottom".into()),
+                Token::ShortcodeEmoji("blobfox".into())
+            ])
+        );
+
+        assert_eq!(
+            parse_full(":bottom:blobfox"),
+            Token::PlainText(":bottom:blobfox".into())
+        );
+
+        assert_eq!(
+            parse_full("bottom:blobfox:"),
+            Token::PlainText("bottom:blobfox:".into())
+        );
+    }
+
     #[test]
     fn parse_emoji() {
         assert_eq!(

From 23a63f2fe926e6297dd55e2a097015a1c7130f77 Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Sat, 14 Oct 2023 21:41:36 +0200
Subject: [PATCH 20/23] MMM: Made the parser always output owned tokens

---
 Cargo.lock                     |  23 +++
 Cargo.toml                     |   1 +
 magnetar_mmm_parser/Cargo.toml |   1 +
 magnetar_mmm_parser/src/lib.rs | 287 +++++++++++++--------------------
 4 files changed, 138 insertions(+), 174 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index e2d79c5..cb3905d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -462,6 +462,15 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3a4f925191b4367301851c6d99b09890311d74b0d43f274c0b34c86d308a3663"
 
+[[package]]
+name = "castaway"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a17ed5635fc8536268e5d4de1e22e81ac34419e5f052d4d51f4e01dcc263fcc"
+dependencies = [
+ "rustversion",
+]
+
 [[package]]
 name = "cc"
 version = "1.0.81"
@@ -584,6 +593,19 @@ dependencies = [
  "tokio-util",
 ]
 
+[[package]]
+name = "compact_str"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f86b9c4c00838774a6d902ef931eff7470720c51d90c2e32cfe15dc304737b3f"
+dependencies = [
+ "castaway",
+ "cfg-if",
+ "itoa",
+ "ryu",
+ "static_assertions",
+]
+
 [[package]]
 name = "const-oid"
 version = "0.9.4"
@@ -1622,6 +1644,7 @@ dependencies = [
 name = "mmm_parser"
 version = "0.2.1-alpha"
 dependencies = [
+ "compact_str",
  "either",
  "emojis",
  "nom",
diff --git a/Cargo.toml b/Cargo.toml
index c5d0c4e..9828764 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -28,6 +28,7 @@ axum = "0.6"
 cached = "0.46"
 cfg-if = "1"
 chrono = "0.4"
+compact_str = "0.7"
 dotenvy = "0.15"
 either = "1.9"
 emojis = "0.6"
diff --git a/magnetar_mmm_parser/Cargo.toml b/magnetar_mmm_parser/Cargo.toml
index 25faa6b..14e36f7 100644
--- a/magnetar_mmm_parser/Cargo.toml
+++ b/magnetar_mmm_parser/Cargo.toml
@@ -9,4 +9,5 @@ either = { workspace = true }
 emojis = { workspace = true }
 nom = { workspace = true }
 nom_locate = { workspace = true }
+compact_str = { workspace = true }
 unicode-segmentation = { workspace = true }
diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index d270760..b940145 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -1,17 +1,17 @@
+use compact_str::{CompactString, ToCompactString};
 use either::Either;
 use nom::branch::alt;
 use nom::bytes::complete::{tag, tag_no_case};
 use nom::character::complete::{
-    alpha1, alphanumeric1, anychar, char as one_char, char, line_ending, not_line_ending, one_of,
+    alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of,
     satisfy, space1, tab,
 };
 use nom::combinator::{eof, fail, map, not, opt, recognize};
 use nom::error::ErrorKind;
 use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
 use nom::sequence::tuple;
-use nom::{Compare, IResult, Offset, Slice};
+use nom::{IResult, Offset, Slice};
 use nom_locate::LocatedSpan;
-use std::borrow::Cow;
 use std::collections::HashMap;
 use std::convert::{identity, Infallible};
 use std::marker::PhantomData;
@@ -33,47 +33,47 @@ impl MentionType {
 }
 
 #[derive(Clone, Debug, Eq, PartialEq)]
-pub enum Token<'a> {
-    PlainText(Cow<'a, str>),
-    Sequence(Vec<Token<'a>>),
-    Quote(Box<Token<'a>>),
-    Small(Box<Token<'a>>),
-    BoldItalic(Box<Token<'a>>),
-    Bold(Box<Token<'a>>),
-    Italic(Box<Token<'a>>),
-    Center(Box<Token<'a>>),
-    Strikethrough(Box<Token<'a>>),
-    PlainTag(Cow<'a, str>),
-    InlineCode(Cow<'a, str>),
-    InlineMath(Cow<'a, str>),
-    UrlRaw(Cow<'a, str>),
-    UrlNoEmbed(Cow<'a, str>),
+pub enum Token {
+    PlainText(CompactString),
+    Sequence(Vec<Token>),
+    Quote(Box<Token>),
+    Small(Box<Token>),
+    BoldItalic(Box<Token>),
+    Bold(Box<Token>),
+    Italic(Box<Token>),
+    Center(Box<Token>),
+    Strikethrough(Box<Token>),
+    PlainTag(String),
+    InlineCode(String),
+    InlineMath(String),
+    UrlRaw(String),
+    UrlNoEmbed(String),
     Link {
-        label: Box<Token<'a>>,
-        href: Cow<'a, str>,
+        label: Box<Token>,
+        href: String,
         embed: bool,
     },
     BlockCode {
-        lang: Option<Cow<'a, str>>,
-        inner: Cow<'a, str>,
+        lang: Option<String>,
+        inner: String,
     },
-    BlockMath(Cow<'a, str>),
+    BlockMath(String),
     Function {
-        name: Cow<'a, str>,
-        params: HashMap<Cow<'a, str>, Option<Cow<'a, str>>>,
-        inner: Box<Token<'a>>,
+        name: String,
+        params: HashMap<String, Option<String>>,
+        inner: Box<Token>,
     },
     Mention {
-        name: Cow<'a, str>,
-        host: Option<Cow<'a, str>>,
+        name: String,
+        host: Option<String>,
         mention_type: MentionType,
     },
-    UnicodeEmoji(Cow<'a, str>),
-    ShortcodeEmoji(Cow<'a, str>),
-    Hashtag(Cow<'a, str>),
+    UnicodeEmoji(String),
+    ShortcodeEmoji(String),
+    Hashtag(String),
 }
 
-impl Token<'_> {
+impl Token {
     fn str_content_left(&self) -> Option<&str> {
         match self {
             Token::PlainText(text) => Some(text.as_ref()),
@@ -132,79 +132,19 @@ impl Token<'_> {
             Token::Italic(inner) => inner.inner(),
             Token::Center(inner) => inner.inner(),
             Token::Strikethrough(inner) => inner.inner(),
-            Token::PlainTag(text) => Token::PlainText(text.clone()),
-            Token::InlineCode(code) => Token::PlainText(code.clone()),
-            Token::InlineMath(math) => Token::PlainText(math.clone()),
-            Token::UrlRaw(url) => Token::PlainText(url.clone()),
-            Token::UrlNoEmbed(url) => Token::PlainText(url.clone()),
+            Token::PlainTag(text) => Token::PlainText(text.clone().into()),
+            Token::InlineCode(code) => Token::PlainText(code.clone().into()),
+            Token::InlineMath(math) => Token::PlainText(math.clone().into()),
+            Token::UrlRaw(url) => Token::PlainText(url.clone().into()),
+            Token::UrlNoEmbed(url) => Token::PlainText(url.clone().into()),
             Token::Link { label, .. } => label.inner(),
-            Token::BlockCode { inner, .. } => Token::PlainText(inner.clone()),
-            Token::BlockMath(math) => Token::PlainText(math.clone()),
+            Token::BlockCode { inner, .. } => Token::PlainText(inner.clone().into()),
+            Token::BlockMath(math) => Token::PlainText(math.clone().into()),
             Token::Function { inner, .. } => inner.inner(),
-            Token::Mention { name, .. } => Token::PlainText(name.clone()),
-            Token::UnicodeEmoji(code) => Token::PlainText(code.clone()),
-            Token::ShortcodeEmoji(shortcode) => Token::PlainText(shortcode.clone()),
-            Token::Hashtag(tag) => Token::PlainText(tag.clone()),
-        }
-    }
-
-    fn owned(&self) -> Token<'static> {
-        match self {
-            Token::PlainText(text) => Token::PlainText(Cow::Owned(text.clone().into_owned())),
-            Token::Sequence(tokens) => Token::Sequence(tokens.iter().map(Token::owned).collect()),
-            Token::Quote(inner) => Token::Quote(Box::new(inner.owned())),
-            Token::Small(inner) => Token::Small(Box::new(inner.owned())),
-            Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.owned())),
-            Token::Bold(inner) => Token::Bold(Box::new(inner.owned())),
-            Token::Italic(inner) => Token::Italic(Box::new(inner.owned())),
-            Token::Center(inner) => Token::Center(Box::new(inner.owned())),
-            Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.owned())),
-            Token::PlainTag(tag) => Token::PlainTag(Cow::Owned(tag.clone().into_owned())),
-            Token::InlineCode(code) => Token::InlineCode(Cow::Owned(code.clone().into_owned())),
-            Token::InlineMath(math) => Token::InlineMath(Cow::Owned(math.clone().into_owned())),
-            Token::UrlRaw(url) => Token::UrlRaw(Cow::Owned(url.clone().into_owned())),
-            Token::UrlNoEmbed(url) => Token::UrlNoEmbed(Cow::Owned(url.clone().into_owned())),
-            Token::Link { embed, label, href } => Token::Link {
-                embed: *embed,
-                label: Box::new(label.owned()),
-                href: Cow::Owned(href.clone().into_owned()),
-            },
-            Token::BlockCode { inner, lang } => Token::BlockCode {
-                lang: lang.as_ref().map(|l| Cow::Owned(l.clone().into_owned())),
-                inner: Cow::Owned(inner.clone().into_owned()),
-            },
-            Token::BlockMath(math) => Token::BlockMath(Cow::Owned(math.clone().into_owned())),
-            Token::Function {
-                name,
-                params,
-                inner,
-            } => Token::Function {
-                name: Cow::Owned(name.clone().into_owned()),
-                params: params
-                    .iter()
-                    .map(|(k, v)| {
-                        (
-                            Cow::Owned(k.clone().into_owned()),
-                            v.as_ref().map(|val| Cow::Owned(val.clone().into_owned())),
-                        )
-                    })
-                    .collect(),
-                inner: Box::new(inner.owned()),
-            },
-            Token::Mention {
-                name,
-                host,
-                mention_type,
-            } => Token::Mention {
-                name: Cow::Owned(name.clone().into_owned()),
-                host: host.as_ref().map(|v| Cow::Owned(v.clone().into_owned())),
-                mention_type: *mention_type,
-            },
-            Token::UnicodeEmoji(code) => Token::UnicodeEmoji(Cow::Owned(code.clone().into_owned())),
-            Token::ShortcodeEmoji(shortcode) => {
-                Token::ShortcodeEmoji(Cow::Owned(shortcode.clone().into_owned()))
-            }
-            Token::Hashtag(tag) => Token::Hashtag(Cow::Owned(tag.clone().into_owned())),
+            Token::Mention { name, .. } => Token::PlainText(name.clone().into()),
+            Token::UnicodeEmoji(code) => Token::PlainText(code.clone().into()),
+            Token::ShortcodeEmoji(shortcode) => Token::PlainText(shortcode.clone().into()),
+            Token::Hashtag(tag) => Token::PlainText(tag.clone().into()),
         }
     }
 
@@ -214,7 +154,7 @@ impl Token<'_> {
                 let tokens_multi = tokens.iter().fold(Vec::new(), |mut acc, tok| {
                     if let Some(Token::PlainText(last)) = acc.last_mut() {
                         if let Token::PlainText(tok_text) = tok {
-                            *last = Cow::from(last.to_string() + tok_text.as_ref());
+                            *last += tok_text.as_ref();
 
                             return acc;
                         }
@@ -229,7 +169,7 @@ impl Token<'_> {
                         for item in items {
                             if let Some(Token::PlainText(last)) = acc.last_mut() {
                                 if let Token::PlainText(tok_text) = item {
-                                    *last = Cow::from(last.to_string() + tok_text.as_ref());
+                                    *last += tok_text.as_ref();
 
                                     continue;
                                 }
@@ -301,23 +241,23 @@ impl SliceOffset for Span<'_> {
 }
 
 #[inline]
-fn boxing_token<'a>(func: impl Fn(Box<Token<'a>>) -> Token<'a>) -> impl Fn(Token<'a>) -> Token<'a> {
+fn boxing_token(func: impl Fn(Box<Token>) -> Token) -> impl Fn(Token) -> Token {
     move |tokens| func(Box::new(tokens))
 }
 
 #[inline]
-fn collect_sequence<'a, T>(
-    func: impl Fn(Vec<T>) -> Token<'a>,
-    transform: impl Fn(Token<'a>) -> Token<'a>,
-) -> impl Fn(&mut dyn Iterator<Item = T>) -> Token<'a> {
+fn collect_sequence<T>(
+    func: impl Fn(Vec<T>) -> Token,
+    transform: impl Fn(Token) -> Token,
+) -> impl Fn(&mut dyn Iterator<Item = T>) -> Token {
     move |tokens| transform(func(tokens.collect()))
 }
 
 #[inline]
-fn collect_char_sequence<'a>(
-    func: impl Fn(Cow<'a, str>) -> Token<'a>,
-) -> impl Fn(&mut dyn Iterator<Item = char>) -> Token<'a> {
-    move |chars| func(Cow::Owned(chars.collect()))
+fn collect_char_sequence(
+    func: impl Fn(String) -> Token,
+) -> impl Fn(&mut dyn Iterator<Item = char>) -> Token {
+    move |chars| func(chars.collect())
 }
 
 #[inline]
@@ -334,7 +274,7 @@ fn spliced<'a>(
     segments: &[Span<'a>],
     func: impl Fn(Span) -> IResult<Span, Token>,
     parent: Span<'a>,
-) -> IResult<Span<'a>, Token<'static>, nom::error::Error<Span<'a>>> {
+) -> IResult<Span<'a>, Token, nom::error::Error<Span<'a>>> {
     let combined = segments
         .iter()
         .copied()
@@ -362,7 +302,7 @@ fn spliced<'a>(
 
     let quote_span = Span::new(&combined);
     let (input, inner) = match func(quote_span) {
-        Ok((input, token)) => (input, token.owned()),
+        Ok(s) => s,
         Err(e) => {
             return match e {
                 NE::Error(e) => {
@@ -393,7 +333,7 @@ fn spliced<'a>(
         parent
     };
 
-    Ok((out, inner.owned()))
+    Ok((out, inner))
 }
 
 fn space(input: Span) -> IResult<Span, Token> {
@@ -404,7 +344,7 @@ fn space(input: Span) -> IResult<Span, Token> {
 #[derive(Copy, Clone)]
 struct Matcher<'a, 'b, T: Clone> {
     matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
-    collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
+    collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token + 'a),
     _phantom_closure: PhantomData<&'a ()>,
     _phantom_data: PhantomData<&'b ()>,
     _phantom_output: PhantomData<fn() -> T>,
@@ -413,7 +353,7 @@ struct Matcher<'a, 'b, T: Clone> {
 impl<'a, 'b, T: Clone> Matcher<'a, 'b, T> {
     fn new(
         matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
-        collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
+        collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token + 'a),
     ) -> Self {
         Self {
             matcher_inner,
@@ -471,27 +411,27 @@ impl Context {
     #[inline]
     fn partial(
         &self,
-        func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token<'a>> + 'static,
-    ) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token<'a>> + '_ {
+        func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token> + 'static,
+    ) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token> + '_ {
         move |input| func(self, input)
     }
 
-    pub fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    pub fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         map(many1(self.partial(Self::full_single)), Token::Sequence)(input)
     }
 
-    pub fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    pub fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         map(many1(self.partial(Self::inline_single)), Token::Sequence)(input)
     }
 
-    pub fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    pub fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         map(
             many1(self.partial(Self::inline_label_safe_single)),
             Token::Sequence,
         )(input)
     }
 
-    fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         alt((
             self.partial(Self::tag_bold_italic_asterisk),
             self.partial(Self::tag_bold_italic_underscore),
@@ -502,7 +442,7 @@ impl Context {
         ))(input)
     }
 
-    fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         let (input, token) = alt((
             self.partial(Self::unicode_emoji),
             alt((
@@ -527,12 +467,12 @@ impl Context {
             self.partial(Self::shortcode_emoji),
             self.partial(Self::link),
             self.partial(Self::raw_url),
-            self.partial(Self::text),
+            self.partial(Self::tag_raw_text),
         ))(input)?;
         Ok((input, token))
     }
 
-    fn inline_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn inline_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         let (input, token) = alt((
             self.partial(Self::unicode_emoji),
             self.partial(Self::tag_small),
@@ -551,12 +491,12 @@ impl Context {
             self.partial(Self::shortcode_emoji),
             self.partial(Self::link),
             self.partial(Self::raw_url),
-            self.partial(Self::text),
+            self.partial(Self::tag_raw_text),
         ))(input)?;
         Ok((input, token))
     }
 
-    fn inline_non_formatting_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn inline_non_formatting_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         let (input, token) = alt((
             self.partial(Self::unicode_emoji),
             self.partial(Self::url_no_embed),
@@ -567,12 +507,12 @@ impl Context {
             self.partial(Self::tag_hashtag),
             self.partial(Self::shortcode_emoji),
             self.partial(Self::raw_url),
-            self.partial(Self::text),
+            self.partial(Self::tag_raw_text),
         ))(input)?;
         Ok((input, token))
     }
 
-    fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         let (input, token) = alt((
             self.partial(Self::unicode_emoji),
             self.partial(Self::tag_small),
@@ -584,12 +524,12 @@ impl Context {
             self.partial(Self::tag_strikethrough_tilde),
             self.partial(Self::tag_func),
             self.partial(Self::shortcode_emoji),
-            self.partial(Self::text),
+            self.partial(Self::tag_raw_text),
         ))(input)?;
         Ok((input, token))
     }
 
-    fn tag_quote<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_quote<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         let (input, leading_spaces) = tuple((opt(line_ending), opt(line_ending)))(input)?;
 
         if let (None, None) = leading_spaces {
@@ -625,7 +565,7 @@ impl Context {
         Ok((input, Token::Quote(Box::new(inner))))
     }
 
-    fn tag_block_center<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_block_center<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         let tag_start = &tag("<center>");
         let tag_end = &tag("</center>");
 
@@ -649,7 +589,7 @@ impl Context {
         ))
     }
 
-    fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         let delim = &tag("```");
 
         let (input, _) = opt(line_ending)(input)?;
@@ -688,7 +628,7 @@ impl Context {
         ))
     }
 
-    fn tag_block_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_block_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         let start = &tag("\\[");
         let end = &tag("\\]");
 
@@ -714,7 +654,7 @@ impl Context {
 
         Ok((
             input,
-            Token::BlockMath(Cow::Borrowed(math_span.into_fragment())),
+            Token::BlockMath(math_span.into_fragment().to_string()),
         ))
     }
 
@@ -726,7 +666,7 @@ impl Context {
         escape: bool,
         matcher: Matcher<'a, 'b, T>,
         fallback: Matcher<'a, 'b, S>,
-    ) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token<'b>> + '_
+    ) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token> + '_
     where
         FOpen: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
         FClose: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
@@ -739,7 +679,7 @@ impl Context {
                 if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), &opening_tag))(input) {
                     return Ok((
                         input_escaped,
-                        Token::PlainText(Cow::Borrowed(mark.fragment())),
+                        Token::PlainText(mark.fragment().to_string().into()),
                     ));
                 }
             }
@@ -814,7 +754,7 @@ impl Context {
                     input,
                     Token::Sequence(vec![
                         Token::PlainText(begin.fragment_between(&post_open).into()),
-                        inner_tok.inner().owned(),
+                        inner_tok.inner(),
                         Token::PlainText(closing.into_fragment().into()),
                     ]),
                 ));
@@ -823,7 +763,7 @@ impl Context {
         }
     }
 
-    fn tag_func<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_func<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         let (input, _) = tag("$[")(input)?;
 
         let func_ident = |input| {
@@ -852,8 +792,8 @@ impl Context {
                 .into_iter()
                 .map(|(k, v)| {
                     (
-                        Cow::from(k.into_fragment()),
-                        v.map(|(_, val)| Cow::from(val.into_fragment())),
+                        k.into_fragment().to_string(),
+                        v.map(|(_, val)| val.into_fragment().to_string()),
                     )
                 })
                 .collect::<HashMap<_, _>>()
@@ -866,14 +806,14 @@ impl Context {
         Ok((
             input,
             Token::Function {
-                name: Cow::from(func_name),
+                name: func_name.to_string(),
                 params: args_out,
                 inner: Box::new(Token::Sequence(inner)),
             },
         ))
     }
 
-    fn tag_plain<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_plain<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         let opening_tag = &tag("<plain>");
         let closing_tag = &tag("</plain>");
 
@@ -887,7 +827,7 @@ impl Context {
         Ok((input, Token::PlainTag(text.into())))
     }
 
-    fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         self.tag_delimited(
             tag_no_case("<small>"),
             tag_no_case("</small>"),
@@ -903,7 +843,7 @@ impl Context {
         )(input)
     }
 
-    fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         self.tag_delimited(
             (tag("***"), FlankingRule::Lenient),
             (tag("***"), FlankingRule::Lenient),
@@ -919,7 +859,7 @@ impl Context {
         )(input)
     }
 
-    fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         self.tag_delimited(
             (tag("___"), FlankingRule::Strict),
             (tag("___"), FlankingRule::Strict),
@@ -935,7 +875,7 @@ impl Context {
         )(input)
     }
 
-    fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         self.tag_delimited(
             tag_no_case("<b>"),
             tag_no_case("</b>"),
@@ -951,7 +891,7 @@ impl Context {
         )(input)
     }
 
-    fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         self.tag_delimited(
             (tag("**"), FlankingRule::Lenient),
             (tag("**"), FlankingRule::Lenient),
@@ -967,7 +907,7 @@ impl Context {
         )(input)
     }
 
-    fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         self.tag_delimited(
             (tag("__"), FlankingRule::Strict),
             (tag("__"), FlankingRule::Strict),
@@ -983,7 +923,7 @@ impl Context {
         )(input)
     }
 
-    fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         self.tag_delimited(
             tag_no_case("<i>"),
             tag_no_case("</i>"),
@@ -999,7 +939,7 @@ impl Context {
         )(input)
     }
 
-    fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         self.tag_delimited(
             (tag("*"), FlankingRule::Lenient),
             (tag("*"), FlankingRule::Lenient),
@@ -1015,7 +955,7 @@ impl Context {
         )(input)
     }
 
-    fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         self.tag_delimited(
             (tag("_"), FlankingRule::Strict),
             (tag("_"), FlankingRule::Strict),
@@ -1031,7 +971,7 @@ impl Context {
         )(input)
     }
 
-    fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         self.tag_delimited(
             tag_no_case("<s>"),
             tag_no_case("</s>"),
@@ -1047,7 +987,7 @@ impl Context {
         )(input)
     }
 
-    fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         self.tag_delimited(
             (tag("~~"), FlankingRule::Lenient),
             (tag("~~"), FlankingRule::Lenient),
@@ -1076,7 +1016,7 @@ impl Context {
         )(input)
     }
 
-    fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         self.tag_delimited(
             tag("`"),
             |input| alt((tag("`"), tag("´")))(input),
@@ -1094,7 +1034,7 @@ impl Context {
         )(input)
     }
 
-    fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         self.tag_delimited(
             tag("\\("),
             tag("\\)"),
@@ -1109,12 +1049,12 @@ impl Context {
         )(input)
     }
 
-    fn text<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
-        let (input, text) = map(recognize(anychar), Span::into_fragment)(input)?;
-        Ok((input, Token::PlainText(text.into())))
+    fn tag_raw_text<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
+        let (input, text) = anychar(input)?;
+        Ok((input, Token::PlainText(text.to_compact_string())))
     }
 
-    fn raw_url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn raw_url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         let (input, url_span) = recognize(tuple((
             protocol,
             url_chars(|input| not(url_chars_base)(input), false),
@@ -1130,21 +1070,21 @@ impl Context {
             url
         };
 
-        Ok((input, Token::UrlRaw(Cow::from(final_url))))
+        Ok((input, Token::UrlRaw(final_url.to_string())))
     }
 
-    fn url_no_embed<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn url_no_embed<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         let (input, _) = tag("<")(input)?;
         let (input, url_span) = recognize(tuple((protocol, url_chars(tag(">"), true))))(input)?;
         let (input, _) = tag(">")(input)?;
 
         Ok((
             input,
-            Token::UrlNoEmbed(Cow::from(url_span.into_fragment())),
+            Token::UrlNoEmbed(url_span.into_fragment().to_string()),
         ))
     }
 
-    fn link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         let (input, no_embed) = opt(tag("?"))(input)?;
         let (input, _) = tag("[")(input)?;
         let (input, _) = not(tag("["))(input)?;
@@ -1163,7 +1103,7 @@ impl Context {
         ))
     }
 
-    fn unicode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn unicode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         let frag = input.fragment();
         let Some(grapheme) = frag.graphemes(true).next() else {
             return fail(input);
@@ -1183,7 +1123,7 @@ impl Context {
         ))
     }
 
-    fn shortcode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn shortcode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         if let (plain_out, Some(plain)) = map(
             opt(recognize(tuple((
                 alphanumeric1_unicode,
@@ -1209,7 +1149,7 @@ impl Context {
         Ok((input, Token::ShortcodeEmoji(shortcode.into())))
     }
 
-    fn tag_mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         if let (plain_out, Some(plain)) = map(
             opt(recognize(tuple((
                 alt((tag("\\"), alphanumeric1_unicode)),
@@ -1257,7 +1197,7 @@ impl Context {
         ))
     }
 
-    fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
+    fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         // TODO: Skip when preceded by alphanumerics
 
         let (input, _) = tag("#")(input)?;
@@ -1319,11 +1259,10 @@ fn url_chars<'a, T: 'a>(
 mod test {
     use crate::{url_chars, Context, Span, Token};
     use nom::bytes::complete::tag;
-    use std::borrow::Cow;
     use std::collections::HashMap;
 
     fn parse_full(string: &str) -> Token {
-        Context.full(Span::new(string)).unwrap().1.merged().owned()
+        Context.full(Span::new(string)).unwrap().1.merged()
     }
 
     #[test]
@@ -1795,7 +1734,7 @@ text</center>"#
             Token::Sequence(
                 vec!["🥺", "💜", "❤️", "🦊"]
                     .into_iter()
-                    .map(<&str as Into<Cow<_>>>::into)
+                    .map(str::to_string)
                     .map(Token::UnicodeEmoji)
                     .collect::<Vec<_>>()
             )

From 86d5c87e9a20a05583d831bfef65fe6bcf6413bc Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Mon, 16 Oct 2023 23:45:45 +0200
Subject: [PATCH 21/23] MMM: Nesting-limited parsing

---
 Cargo.lock                     |   1 +
 magnetar_mmm_parser/Cargo.toml |   1 +
 magnetar_mmm_parser/src/lib.rs | 438 +++++++++++++++++++++++----------
 3 files changed, 313 insertions(+), 127 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index cb3905d..35e50cd 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1649,6 +1649,7 @@ dependencies = [
  "emojis",
  "nom",
  "nom_locate",
+ "tracing",
  "unicode-segmentation",
 ]
 
diff --git a/magnetar_mmm_parser/Cargo.toml b/magnetar_mmm_parser/Cargo.toml
index 14e36f7..d7b9b2d 100644
--- a/magnetar_mmm_parser/Cargo.toml
+++ b/magnetar_mmm_parser/Cargo.toml
@@ -10,4 +10,5 @@ emojis = { workspace = true }
 nom = { workspace = true }
 nom_locate = { workspace = true }
 compact_str = { workspace = true }
+tracing = { workspace = true }
 unicode-segmentation = { workspace = true }
diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index b940145..2f76532 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -7,14 +7,15 @@ use nom::character::complete::{
     satisfy, space1, tab,
 };
 use nom::combinator::{eof, fail, map, not, opt, recognize};
-use nom::error::ErrorKind;
+use nom::error::{ErrorKind, ParseError};
 use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
 use nom::sequence::tuple;
-use nom::{IResult, Offset, Slice};
+use nom::{IResult, Offset, Parser, Slice};
 use nom_locate::LocatedSpan;
 use std::collections::HashMap;
 use std::convert::{identity, Infallible};
 use std::marker::PhantomData;
+use tracing::trace;
 use unicode_segmentation::UnicodeSegmentation;
 
 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
@@ -217,7 +218,18 @@ impl Token {
     }
 }
 
-type Span<'a> = LocatedSpan<&'a str>;
+#[derive(Debug, Default, Copy, Clone)]
+pub struct SpanMeta {
+    depth: usize,
+}
+
+impl SpanMeta {
+    fn new(depth: usize) -> Self {
+        Self { depth }
+    }
+}
+
+type Span<'a> = LocatedSpan<&'a str, SpanMeta>;
 
 trait SliceOffset {
     fn up_to(&self, other: &Self) -> Self;
@@ -300,7 +312,10 @@ fn spliced<'a>(
     type NE<E> = nom::Err<E>;
     type NomError<'x> = nom::error::Error<Span<'x>>;
 
-    let quote_span = Span::new(&combined);
+    let quote_span = Span::new_extra(
+        &combined,
+        segments.first().map_or(SpanMeta::new(0), |s| s.extra),
+    );
     let (input, inner) = match func(quote_span) {
         Ok(s) => s,
         Err(e) => {
@@ -311,7 +326,10 @@ fn spliced<'a>(
                         let offset = offset_new - offset_seg_new;
                         let offset_orig = offset + seg_parent.location_offset();
                         Err(NE::Error(NomError::new(
-                            Span::new(&parent.into_fragment()[offset_orig..]),
+                            Span::new_extra(
+                                &parent.into_fragment()[offset_orig..],
+                                seg_parent.extra,
+                            ),
                             e.code,
                         )))
                     } else {
@@ -405,9 +423,53 @@ impl<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>> From<T> for FlankingDel
     }
 }
 
-pub struct Context;
+pub struct Context {
+    depth_limit: usize,
+}
+
+const DEFAULT_DEPTH_LIMIT: usize = 24;
+
+impl Default for Context {
+    fn default() -> Self {
+        Context::new(DEFAULT_DEPTH_LIMIT)
+    }
+}
 
 impl Context {
+    pub fn new(depth_limit: usize) -> Self {
+        Self { depth_limit }
+    }
+
+    pub fn parse_full(&self, input: &str) -> Token {
+        match self.full(Span::new_extra(input, SpanMeta::default())) {
+            Ok((_, t)) => t.merged(),
+            Err(e) => {
+                trace!(input = input, "Full parser fail: {:?}", e);
+                Token::PlainText(e.to_compact_string())
+            }
+        }
+    }
+
+    pub fn parse_inline(&self, input: &str) -> Token {
+        match self.full(Span::new_extra(input, SpanMeta::default())) {
+            Ok((_, t)) => t.merged(),
+            Err(e) => {
+                trace!(input = input, "Inline parser fail: {:?}", e);
+                Token::PlainText(e.to_compact_string())
+            }
+        }
+    }
+
+    pub fn parse_ui(&self, input: &str) -> Token {
+        match self.inline_ui(Span::new_extra(input, SpanMeta::default())) {
+            Ok((_, t)) => t.merged(),
+            Err(e) => {
+                trace!(input = input, "Inline parser fail: {:?}", e);
+                Token::PlainText(e.to_compact_string())
+            }
+        }
+    }
+
     #[inline]
     fn partial(
         &self,
@@ -416,6 +478,14 @@ impl Context {
         move |input| func(self, input)
     }
 
+    #[inline]
+    fn partial_span(
+        &self,
+        func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Span<'a>> + 'static,
+    ) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>> + '_ {
+        move |input| func(self, input)
+    }
+
     pub fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         map(many1(self.partial(Self::full_single)), Token::Sequence)(input)
     }
@@ -431,6 +501,17 @@ impl Context {
         )(input)
     }
 
+    fn inline_ui<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
+        map(
+            many1(alt((
+                self.partial(Self::unicode_emoji),
+                self.partial(Self::shortcode_emoji),
+                self.partial(Self::tag_raw_text),
+            ))),
+            Token::Sequence,
+        )(input)
+    }
+
     fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         alt((
             self.partial(Self::tag_bold_italic_asterisk),
@@ -444,69 +525,72 @@ impl Context {
 
     fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         let (input, token) = alt((
-            self.partial(Self::unicode_emoji),
-            alt((
+            self.increase_nesting(alt((
+                self.partial(Self::unicode_emoji),
                 self.partial(Self::tag_block_center),
                 self.partial(Self::tag_small),
                 self.partial(Self::tag_plain),
                 self.partial(Self::tag_bold),
                 self.partial(Self::tag_italic),
                 self.partial(Self::tag_strikethrough),
-            )),
-            self.partial(Self::url_no_embed),
-            self.partial(Self::base_bold_italic),
-            self.partial(Self::tag_block_code),
-            self.partial(Self::tag_inline_code),
-            self.partial(Self::tag_quote),
-            self.partial(Self::tag_block_math),
-            self.partial(Self::tag_inline_math),
-            self.partial(Self::tag_strikethrough_tilde),
-            self.partial(Self::tag_func),
-            self.partial(Self::tag_mention),
-            self.partial(Self::tag_hashtag),
-            self.partial(Self::shortcode_emoji),
-            self.partial(Self::link),
-            self.partial(Self::raw_url),
+                self.partial(Self::url_no_embed),
+                self.partial(Self::base_bold_italic),
+                self.partial(Self::tag_block_code),
+                self.partial(Self::tag_inline_code),
+                self.partial(Self::tag_quote),
+                self.partial(Self::tag_block_math),
+                self.partial(Self::tag_inline_math),
+                self.partial(Self::tag_strikethrough_tilde),
+                self.partial(Self::tag_func),
+                self.partial(Self::tag_mention),
+                self.partial(Self::tag_hashtag),
+                self.partial(Self::shortcode_emoji),
+                self.partial(Self::link),
+                self.partial(Self::raw_url),
+            ))),
             self.partial(Self::tag_raw_text),
         ))(input)?;
         Ok((input, token))
     }
 
     fn inline_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-        let (input, token) = alt((
-            self.partial(Self::unicode_emoji),
-            self.partial(Self::tag_small),
-            self.partial(Self::tag_plain),
-            self.partial(Self::tag_bold),
-            self.partial(Self::tag_italic),
-            self.partial(Self::tag_strikethrough),
-            self.partial(Self::url_no_embed),
-            self.partial(Self::base_bold_italic),
-            self.partial(Self::tag_inline_code),
-            self.partial(Self::tag_inline_math),
-            self.partial(Self::tag_strikethrough_tilde),
-            self.partial(Self::tag_func),
-            self.partial(Self::tag_mention),
-            self.partial(Self::tag_hashtag),
-            self.partial(Self::shortcode_emoji),
-            self.partial(Self::link),
-            self.partial(Self::raw_url),
+        alt((
+            self.increase_nesting(alt((
+                self.partial(Self::unicode_emoji),
+                self.partial(Self::tag_small),
+                self.partial(Self::tag_plain),
+                self.partial(Self::tag_bold),
+                self.partial(Self::tag_italic),
+                self.partial(Self::tag_strikethrough),
+                self.partial(Self::url_no_embed),
+                self.partial(Self::base_bold_italic),
+                self.partial(Self::tag_inline_code),
+                self.partial(Self::tag_inline_math),
+                self.partial(Self::tag_strikethrough_tilde),
+                self.partial(Self::tag_func),
+                self.partial(Self::tag_mention),
+                self.partial(Self::tag_hashtag),
+                self.partial(Self::shortcode_emoji),
+                self.partial(Self::link),
+                self.partial(Self::raw_url),
+            ))),
             self.partial(Self::tag_raw_text),
-        ))(input)?;
-        Ok((input, token))
+        ))(input)
     }
 
     fn inline_non_formatting_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         let (input, token) = alt((
-            self.partial(Self::unicode_emoji),
-            self.partial(Self::url_no_embed),
-            self.partial(Self::tag_inline_code),
-            self.partial(Self::tag_inline_math),
-            self.partial(Self::tag_func),
-            self.partial(Self::tag_mention),
-            self.partial(Self::tag_hashtag),
-            self.partial(Self::shortcode_emoji),
-            self.partial(Self::raw_url),
+            self.increase_nesting(alt((
+                self.partial(Self::unicode_emoji),
+                self.partial(Self::url_no_embed),
+                self.partial(Self::tag_inline_code),
+                self.partial(Self::tag_inline_math),
+                self.partial(Self::tag_func),
+                self.partial(Self::tag_mention),
+                self.partial(Self::tag_hashtag),
+                self.partial(Self::shortcode_emoji),
+                self.partial(Self::raw_url),
+            ))),
             self.partial(Self::tag_raw_text),
         ))(input)?;
         Ok((input, token))
@@ -514,16 +598,18 @@ impl Context {
 
     fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         let (input, token) = alt((
-            self.partial(Self::unicode_emoji),
-            self.partial(Self::tag_small),
-            self.partial(Self::tag_plain),
-            self.partial(Self::tag_bold),
-            self.partial(Self::tag_italic),
-            self.partial(Self::tag_strikethrough),
-            self.partial(Self::base_bold_italic),
-            self.partial(Self::tag_strikethrough_tilde),
-            self.partial(Self::tag_func),
-            self.partial(Self::shortcode_emoji),
+            self.increase_nesting(alt((
+                self.partial(Self::unicode_emoji),
+                self.partial(Self::tag_small),
+                self.partial(Self::tag_plain),
+                self.partial(Self::tag_bold),
+                self.partial(Self::tag_italic),
+                self.partial(Self::tag_strikethrough),
+                self.partial(Self::base_bold_italic),
+                self.partial(Self::tag_strikethrough_tilde),
+                self.partial(Self::tag_func),
+                self.partial(Self::shortcode_emoji),
+            ))),
             self.partial(Self::tag_raw_text),
         ))(input)?;
         Ok((input, token))
@@ -1056,8 +1142,11 @@ impl Context {
 
     fn raw_url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         let (input, url_span) = recognize(tuple((
-            protocol,
-            url_chars(|input| not(url_chars_base)(input), false),
+            self.partial_span(Self::protocol),
+            self.url_chars(
+                |input| recognize(not(self.partial_span(Self::url_chars_base)))(input),
+                false,
+            ),
         )))(input)?;
 
         let url = url_span.into_fragment();
@@ -1075,7 +1164,10 @@ impl Context {
 
     fn url_no_embed<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
         let (input, _) = tag("<")(input)?;
-        let (input, url_span) = recognize(tuple((protocol, url_chars(tag(">"), true))))(input)?;
+        let (input, url_span) = recognize(tuple((
+            self.partial_span(Self::protocol),
+            self.url_chars(tag(">"), true),
+        )))(input)?;
         let (input, _) = tag(">")(input)?;
 
         Ok((
@@ -1090,7 +1182,10 @@ impl Context {
         let (input, _) = not(tag("["))(input)?;
         let (input, (label_tok, _)) =
             many_till(self.partial(Self::inline_label_safe_single), tag("]("))(input)?;
-        let (input, url_span) = recognize(tuple((protocol, url_chars(tag(")"), true))))(input)?;
+        let (input, url_span) = recognize(tuple((
+            self.partial_span(Self::protocol),
+            self.url_chars(tag(")"), true),
+        )))(input)?;
         let (input, _) = tag(")")(input)?;
 
         Ok((
@@ -1202,74 +1297,136 @@ impl Context {
 
         let (input, _) = tag("#")(input)?;
 
-        let (input, hashtag_text) =
-            map(recognize(many1(hashtag_chars)), Span::into_fragment)(input)?;
+        let (input, hashtag_text) = map(
+            recognize(many1(self.partial_span(Self::hashtag_chars))),
+            Span::into_fragment,
+        )(input)?;
 
         Ok((input, Token::Hashtag(hashtag_text.into())))
     }
-}
 
-#[inline]
-fn hashtag_chars(input: Span) -> IResult<Span, Span> {
-    recognize(alt((
-        recognize(tuple((tag("("), hashtag_chars, tag(")")))),
-        recognize(tuple((tag("["), hashtag_chars, tag("]")))),
-        recognize(tuple((tag("「"), hashtag_chars, tag("」")))),
-        recognize(tuple((tag("（"), hashtag_chars, tag("）")))),
-        recognize(tuple((
-            not(space1),
-            not_line_ending,
-            not(one_of(".,:;!?#?/[]【】()「」（）<>")),
-            anychar,
-        ))),
-    )))(input)
-}
+    #[inline]
+    fn increase_nesting<'a, 'b, O, F>(
+        &'b self,
+        mut func: F,
+    ) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, O> + 'b
+    where
+        F: Parser<Span<'a>, O, nom::error::Error<Span<'a>>> + 'b,
+    {
+        move |mut input| {
+            if input.extra.depth >= self.depth_limit {
+                return fail(input);
+            }
 
-#[inline]
-fn protocol(input: Span) -> IResult<Span, Span> {
-    alt((tag("https://"), tag("http://")))(input)
-}
+            input.extra.depth += 1;
+            func.parse(input)
+        }
+    }
 
-#[inline]
-fn url_chars_base(input: Span) -> IResult<Span, Span> {
-    alt((
-        alphanumeric1_unicode,
-        recognize(tuple((tag("["), many_till(url_chars_base, tag("]"))))),
-        recognize(tuple((tag("("), many_till(url_chars_base, tag(")"))))),
-        recognize(one_of(".,_/:%#$&?!~=+-@")),
-    ))(input)
-}
+    #[inline]
+    fn hashtag_chars<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
+        recognize(alt((
+            recognize(tuple((
+                tag("("),
+                self.increase_nesting(self.partial_span(Self::hashtag_chars)),
+                tag(")"),
+            ))),
+            recognize(tuple((
+                tag("["),
+                self.increase_nesting(self.partial_span(Self::hashtag_chars)),
+                tag("]"),
+            ))),
+            recognize(tuple((
+                tag("「"),
+                self.increase_nesting(self.partial_span(Self::hashtag_chars)),
+                tag("」"),
+            ))),
+            recognize(tuple((
+                tag("（"),
+                self.increase_nesting(self.partial_span(Self::hashtag_chars)),
+                tag("）"),
+            ))),
+            recognize(tuple((
+                not(space1),
+                not_line_ending,
+                not(one_of(".,:;!?#?/[]【】()「」（）<>")),
+                anychar,
+            ))),
+        )))(input)
+    }
 
-#[inline]
-fn url_chars<'a, T: 'a>(
-    terminator: impl Fn(Span<'a>) -> IResult<Span<'a>, T> + 'a,
-    spaces: bool,
-) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, Span<'a>> + 'a {
-    let chars = tuple((
-        not(tuple((space1, eof))),
-        not(tuple((space1, tag("\"")))),
-        not(tuple((opt(space1), terminator))),
-        alt((url_chars_base, if spaces { space1 } else { fail })),
-    ));
+    #[inline]
+    fn protocol<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
+        alt((tag("https://"), tag("http://")))(input)
+    }
 
-    recognize(many1_count(chars))
+    #[inline]
+    fn url_chars_base<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
+        alt((
+            alphanumeric1_unicode,
+            recognize(tuple((
+                tag("["),
+                many_till(
+                    self.increase_nesting(self.partial_span(Self::url_chars_base)),
+                    tag("]"),
+                ),
+            ))),
+            recognize(tuple((
+                tag("("),
+                many_till(
+                    self.increase_nesting(self.partial_span(Self::url_chars_base)),
+                    tag(")"),
+                ),
+            ))),
+            recognize(one_of(".,_/:%#$&?!~=+-@")),
+        ))(input)
+    }
+
+    #[inline]
+    fn url_chars<'a, 'b, F>(
+        &'b self,
+        mut terminator: F,
+        spaces: bool,
+    ) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, Span<'a>> + 'b
+    where
+        F: Parser<Span<'a>, Span<'a>, nom::error::Error<Span<'a>>> + 'b,
+    {
+        move |input| {
+            recognize(many1_count(tuple((
+                not(tuple((space1, eof))),
+                not(tuple((space1, tag("\"")))),
+                not(tuple((opt(space1), |input| terminator.parse(input)))),
+                alt((
+                    |input| self.url_chars_base(input),
+                    if spaces { space1 } else { fail },
+                )),
+            ))))(input)
+        }
+    }
 }
 
 #[cfg(test)]
 mod test {
-    use crate::{url_chars, Context, Span, Token};
+    use crate::{Context, Span, SpanMeta, Token, DEFAULT_DEPTH_LIMIT};
     use nom::bytes::complete::tag;
     use std::collections::HashMap;
 
     fn parse_full(string: &str) -> Token {
-        Context.full(Span::new(string)).unwrap().1.merged()
+        Context::default()
+            .full(Span::new_extra(string, SpanMeta::default()))
+            .unwrap()
+            .1
+            .merged()
     }
 
     #[test]
     fn parse_url_chars() {
+        let ctx = Context::default();
+
         assert_eq!(
-            url_chars(tag(")"), true)(Span::new(
-                "https://en.wikipedia.org/wiki/Sandbox_(computer_security))"
+            ctx.url_chars(tag(")"), true)(Span::new_extra(
+                "https://en.wikipedia.org/wiki/Sandbox_(computer_security))",
+                SpanMeta::default()
             ))
             .unwrap()
             .1
@@ -1278,8 +1435,9 @@ mod test {
         );
 
         assert_eq!(
-            url_chars(tag(")"), true)(Span::new(
-                "https://en.wikipedia.org/wiki/Sandbox_(computer_security)))"
+            ctx.url_chars(tag(")"), true)(Span::new_extra(
+                "https://en.wikipedia.org/wiki/Sandbox_(computer_security)))",
+                SpanMeta::default()
             ))
             .unwrap()
             .1
@@ -1288,26 +1446,35 @@ mod test {
         );
 
         assert_eq!(
-            url_chars(tag(")"), true)(Span::new("https://cs.wikipedia.org/wiki/Among_Us  "))
-                .unwrap()
-                .1
-                .into_fragment(),
+            ctx.url_chars(tag(")"), true)(Span::new_extra(
+                "https://cs.wikipedia.org/wiki/Among_Us  ",
+                SpanMeta::default()
+            ))
+            .unwrap()
+            .1
+            .into_fragment(),
             "https://cs.wikipedia.org/wiki/Among_Us",
         );
 
         assert_eq!(
-            url_chars(tag(")"), true)(Span::new("https://cs.wikipedia.org/wiki/Among Us  )"))
-                .unwrap()
-                .1
-                .into_fragment(),
+            ctx.url_chars(tag(")"), true)(Span::new_extra(
+                "https://cs.wikipedia.org/wiki/Among Us  )",
+                SpanMeta::default()
+            ))
+            .unwrap()
+            .1
+            .into_fragment(),
             "https://cs.wikipedia.org/wiki/Among Us"
         );
 
         assert_eq!(
-            url_chars(tag(")"), false)(Span::new("https://en.wikipedia.org/wiki/Among Us  )"))
-                .unwrap()
-                .1
-                .into_fragment(),
+            ctx.url_chars(tag(")"), false)(Span::new_extra(
+                "https://en.wikipedia.org/wiki/Among Us  )",
+                SpanMeta::default()
+            ))
+            .unwrap()
+            .1
+            .into_fragment(),
             "https://en.wikipedia.org/wiki/Among"
         );
     }
@@ -1593,6 +1760,23 @@ text</center>"#
         );
     }
 
+    #[test]
+    fn limit_nesting() {
+        let mut tok = Token::PlainText(" <s><i>test</i></s> ".into());
+        for _ in 0..DEFAULT_DEPTH_LIMIT {
+            tok = Token::Bold(Box::new(tok));
+        }
+
+        assert_eq!(
+            parse_full(
+                &("<b>".repeat(DEFAULT_DEPTH_LIMIT)
+                    + " <s><i>test</i></s> "
+                    + &*"</b>".repeat(DEFAULT_DEPTH_LIMIT))
+            ),
+            tok
+        );
+    }
+
     #[test]
     fn parse_mention() {
         assert_eq!(

From 42fa83c6e248070cc8cd31ef03ce9310b1b87410 Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Mon, 23 Oct 2023 23:52:02 +0200
Subject: [PATCH 22/23] MMM: Fixed hashtag parsing

---
 magnetar_mmm_parser/src/lib.rs | 35 ++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index 2f76532..4806587 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -7,7 +7,7 @@ use nom::character::complete::{
     satisfy, space1, tab,
 };
 use nom::combinator::{eof, fail, map, not, opt, recognize};
-use nom::error::{ErrorKind, ParseError};
+use nom::error::ErrorKind;
 use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
 use nom::sequence::tuple;
 use nom::{IResult, Offset, Parser, Slice};
@@ -277,6 +277,14 @@ fn alpha1_unicode(input: Span) -> IResult<Span, Span> {
     recognize(many1_count(satisfy(char::is_alphanumeric)))(input)
 }
 
+#[inline]
+fn space1_unicode(input: Span) -> IResult<Span, Span> {
+    recognize(many1_count(tuple((
+        not(line_ending),
+        satisfy(char::is_whitespace),
+    ))))(input)
+}
+
 #[inline]
 fn alphanumeric1_unicode(input: Span) -> IResult<Span, Span> {
     recognize(many1_count(satisfy(char::is_alphanumeric)))(input)
@@ -1293,7 +1301,12 @@ impl Context {
     }
 
     fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-        // TODO: Skip when preceded by alphanumerics
+        let (input, maybe_preceded) =
+            opt(recognize(tuple((alphanumeric1_unicode, tag("#")))))(input)?;
+
+        if let Some(preceded) = maybe_preceded {
+            return Ok((input, Token::PlainText(preceded.into_fragment().into())));
+        }
 
         let (input, _) = tag("#")(input)?;
 
@@ -1347,8 +1360,8 @@ impl Context {
                 tag("）"),
             ))),
             recognize(tuple((
-                not(space1),
-                not_line_ending,
+                not(space1_unicode),
+                not(line_ending),
                 not(one_of(".,:;!?#?/[]【】()「」（）<>")),
                 anychar,
             ))),
@@ -1699,6 +1712,20 @@ text</center>"#
             ])
         );
 
+        assert_eq!(
+            parse_full("test #hashtag tail"),
+            Token::Sequence(vec![
+                Token::PlainText("test ".into()),
+                Token::Hashtag("hashtag".into()),
+                Token::PlainText(" tail".into())
+            ])
+        );
+
+        assert_eq!(
+            parse_full("not#hashtag tail"),
+            Token::PlainText("not#hashtag tail".into())
+        );
+
         assert_eq!(
             parse_full("<https://example.com>"),
             Token::UrlNoEmbed("https://example.com".into())

From c4a8ebebf38a766bfc41f9b303d3bb090140ea3c Mon Sep 17 00:00:00 2001
From: Natty <natty.sh.git@gmail.com>
Date: Tue, 24 Oct 2023 00:27:54 +0200
Subject: [PATCH 23/23] MMM: Janky outer flanking rules implementation

---
 magnetar_mmm_parser/src/lib.rs | 37 ++++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/magnetar_mmm_parser/src/lib.rs b/magnetar_mmm_parser/src/lib.rs
index 4806587..26661e6 100644
--- a/magnetar_mmm_parser/src/lib.rs
+++ b/magnetar_mmm_parser/src/lib.rs
@@ -6,7 +6,7 @@ use nom::character::complete::{
     alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of,
     satisfy, space1, tab,
 };
-use nom::combinator::{eof, fail, map, not, opt, recognize};
+use nom::combinator::{eof, fail, map, not, opt, peek, recognize};
 use nom::error::ErrorKind;
 use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
 use nom::sequence::tuple;
@@ -272,11 +272,6 @@ fn collect_char_sequence(
     move |chars| func(chars.collect())
 }
 
-#[inline]
-fn alpha1_unicode(input: Span) -> IResult<Span, Span> {
-    recognize(many1_count(satisfy(char::is_alphanumeric)))(input)
-}
-
 #[inline]
 fn space1_unicode(input: Span) -> IResult<Span, Span> {
     recognize(many1_count(tuple((
@@ -778,6 +773,14 @@ impl Context {
                 }
             }
 
+            if let FlankingRule::Strict = opening_rule {
+                let (input, pre) =
+                    opt(recognize(tuple((alphanumeric1_unicode, &opening_tag))))(input)?;
+                if let Some(pre_text) = pre {
+                    return Ok((input, Token::PlainText(pre_text.into_fragment().into())));
+                }
+            }
+
             let begin = input;
             let (post_open, _) = opening_tag(input)?;
 
@@ -840,8 +843,12 @@ impl Context {
                     true
                 };
 
-            // TODO: Unfinished flanking rules
-            let correct_flanking = correct_left_flanking && correct_right_flanking;
+            let (input, alphanum) = opt(peek(alphanumeric1_unicode))(input)?;
+            let correct_right_outer =
+                alphanum.is_none() || !matches!(closing_rule, FlankingRule::Strict);
+
+            let correct_flanking =
+                correct_left_flanking && correct_right_flanking && correct_right_outer;
 
             if !correct_flanking {
                 return Ok((
@@ -1514,6 +1521,20 @@ mod test {
             Token::PlainText("* italic *".into())
         );
 
+        assert_eq!(
+            parse_full("snake_case_variable"),
+            Token::PlainText("snake_case_variable".into())
+        );
+
+        assert_eq!(
+            parse_full("intra*word*italic"),
+            Token::Sequence(vec![
+                Token::PlainText("intra".into()),
+                Token::Italic(Box::new(Token::PlainText("word".into()))),
+                Token::PlainText("italic".into())
+            ])
+        );
+
         assert_eq!(
             parse_full(r#"_ italic *"#),
             Token::PlainText("_ italic *".into())