magnetar/magnetar_mmm_parser/src/lib.rs

use compact_str::{CompactString, ToCompactString};
use either::Either;
use nom::branch::alt;
use nom::bytes::complete::{tag, tag_no_case};
use nom::character::complete::{
    alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of,
    satisfy, space1, tab,
};
use nom::combinator::{eof, fail, map, not, opt, peek, recognize};
use nom::error::ErrorKind;
use nom::multi::{many0_count, many1, many1_count, many_till, separated_list1};
use nom::sequence::tuple;
use nom::{IResult, Offset, Parser, Slice};
use nom_locate::LocatedSpan;
use quick_xml::events::{BytesText, Event};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::convert::{identity, Infallible};
use std::io::{Cursor, Write};
use std::marker::PhantomData;
use strum::IntoStaticStr;
use tracing::trace;
use unicode_segmentation::UnicodeSegmentation;

#[derive(Copy, Clone, Debug, Eq, PartialEq, Deserialize, Serialize, IntoStaticStr)]
// The alternative would be to implement a serde serializer for this one enum, but that's disgusting
#[strum(serialize_all = "snake_case")]
#[serde(rename_all = "snake_case")]
pub enum MentionType {
    Community,
    User,
    MatrixUser,
}

impl MentionType {
    pub fn to_char(&self) -> char {
        match self {
            MentionType::Community => '!',
            MentionType::User => '@',
            MentionType::MatrixUser => ':',
        }
    }

    pub fn separator(&self) -> char {
        match self {
            MentionType::Community | MentionType::User => '@',
            MentionType::MatrixUser => ':',
        }
    }
}

#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)]
pub enum Token {
    PlainText(CompactString),
    Sequence(Vec<Token>),
    Quote(Box<Token>),
    Small(Box<Token>),
    BoldItalic(Box<Token>),
    Bold(Box<Token>),
    Italic(Box<Token>),
    Center(Box<Token>),
    Strikethrough(Box<Token>),
    PlainTag(String),
    InlineCode(String),
    InlineMath(String),
    UrlRaw(String),
    UrlNoEmbed(String),
    Link {
        label: Box<Token>,
        href: String,
        embed: bool,
    },
    BlockCode {
        lang: Option<String>,
        inner: String,
    },
    BlockMath(String),
    Function {
        name: String,
        params: HashMap<String, Option<String>>,
        inner: Box<Token>,
    },
    Mention {
        name: String,
        host: Option<String>,
        mention_type: MentionType,
    },
    UnicodeEmoji(String),
    ShortcodeEmoji {
        shortcode: String,
        host: Option<String>,
    },
    Hashtag(String),
}

impl Token {
    fn str_content_left(&self) -> Option<&str> {
        match self {
            Token::PlainText(text) => Some(text.as_ref()),
            Token::Sequence(tokens) => tokens.first().and_then(Token::str_content_left),
            Token::Quote(inner) => inner.str_content_left(),
            Token::Small(inner) => inner.str_content_left(),
            Token::BoldItalic(inner) => inner.str_content_left(),
            Token::Bold(inner) => inner.str_content_left(),
            Token::Italic(inner) => inner.str_content_left(),
            Token::Center(inner) => inner.str_content_left(),
            Token::Strikethrough(inner) => inner.str_content_left(),
            Token::PlainTag(tag) => Some(tag.as_ref()),
            Token::UrlRaw(url) => Some(url.as_ref()),
            Token::UrlNoEmbed(url) => Some(url.as_ref()),
            Token::Link { label, .. } => label.str_content_left(),
            Token::Function { inner, .. } => inner.str_content_left(),
            Token::Mention { name, .. } => Some(name.as_ref()),
            Token::UnicodeEmoji(code) => Some(code.as_ref()),
            Token::Hashtag(tag) => Some(tag.as_ref()),
            _ => None,
        }
    }

    fn str_content_right(&self) -> Option<&str> {
        match self {
            Token::PlainText(text) => Some(text.as_ref()),
            Token::Sequence(tokens) => tokens.last().and_then(Token::str_content_right),
            Token::Quote(inner) => inner.str_content_right(),
            Token::Small(inner) => inner.str_content_right(),
            Token::BoldItalic(inner) => inner.str_content_right(),
            Token::Bold(inner) => inner.str_content_right(),
            Token::Italic(inner) => inner.str_content_right(),
            Token::Center(inner) => inner.str_content_right(),
            Token::Strikethrough(inner) => inner.str_content_right(),
            Token::PlainTag(tag) => Some(tag.as_ref()),
            Token::UrlRaw(url) => Some(url.as_ref()),
            Token::UrlNoEmbed(url) => Some(url.as_ref()),
            Token::Link { label, .. } => label.str_content_right(),
            Token::Function { inner, .. } => inner.str_content_right(),
            Token::Mention { name, .. } => Some(name.as_ref()),
            Token::UnicodeEmoji(code) => Some(code.as_ref()),
            Token::Hashtag(tag) => Some(tag.as_ref()),
            _ => None,
        }
    }

    fn inner(&self) -> Token {
        match self {
            plain @ Token::PlainText(_) => plain.clone(),
            sequence @ Token::Sequence(_) => sequence.clone(),
            Token::Quote(inner) => inner.inner(),
            Token::Small(inner) => inner.inner(),
            Token::BoldItalic(inner) => inner.inner(),
            Token::Bold(inner) => inner.inner(),
            Token::Italic(inner) => inner.inner(),
            Token::Center(inner) => inner.inner(),
            Token::Strikethrough(inner) => inner.inner(),
            Token::PlainTag(text) => Token::PlainText(text.clone().into()),
            Token::InlineCode(code) => Token::PlainText(code.clone().into()),
            Token::InlineMath(math) => Token::PlainText(math.clone().into()),
            Token::UrlRaw(url) => Token::PlainText(url.clone().into()),
            Token::UrlNoEmbed(url) => Token::PlainText(url.clone().into()),
            Token::Link { label, .. } => label.inner(),
            Token::BlockCode { inner, .. } => Token::PlainText(inner.clone().into()),
            Token::BlockMath(math) => Token::PlainText(math.clone().into()),
            Token::Function { inner, .. } => inner.inner(),
            Token::Mention { name, .. } => Token::PlainText(name.clone().into()),
            Token::UnicodeEmoji(code) => Token::PlainText(code.clone().into()),
            Token::ShortcodeEmoji { shortcode, .. } => Token::PlainText(shortcode.clone().into()),
            Token::Hashtag(tag) => Token::PlainText(tag.clone().into()),
        }
    }

    fn merged(&self) -> Token {
        match self {
            Token::Sequence(tokens) => {
                let tokens_multi = tokens.iter().fold(Vec::new(), |mut acc, tok| {
                    if let Some(Token::PlainText(last)) = acc.last_mut() {
                        if let Token::PlainText(tok_text) = tok {
                            *last += tok_text.as_ref();

                            return acc;
                        }
                    }

                    if let Token::Sequence(seq) = tok {
                        let items = seq.iter().map(Token::merged).flat_map(|t| match t {
                            Token::Sequence(seq) => Either::Left(seq.into_iter()),
                            other => Either::Right(std::iter::once(other)),
                        });

                        for item in items {
                            if let Some(Token::PlainText(last)) = acc.last_mut() {
                                if let Token::PlainText(tok_text) = item {
                                    *last += tok_text.as_ref();

                                    continue;
                                }
                            }

                            acc.push(item);
                        }

                        return acc;
                    }

                    acc.push(tok.merged());
                    acc
                });

                if tokens_multi.len() == 1 {
                    return tokens_multi.into_iter().next().unwrap();
                }

                Token::Sequence(tokens_multi)
            }
            Token::Quote(inner) => Token::Quote(Box::new(inner.merged())),
            Token::Small(inner) => Token::Small(Box::new(inner.merged())),
            Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.merged())),
            Token::Bold(inner) => Token::Bold(Box::new(inner.merged())),
            Token::Italic(inner) => Token::Italic(Box::new(inner.merged())),
            Token::Center(inner) => Token::Center(Box::new(inner.merged())),
            Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.merged())),
            Token::Link { embed, label, href } => Token::Link {
                label: Box::new(label.merged()),
                href: href.clone(),
                embed: *embed,
            },
            Token::Function {
                name,
                params,
                inner,
            } => Token::Function {
                name: name.clone(),
                params: params.clone(),
                inner: Box::new(inner.merged()),
            },
            other => other.clone(),
        }
    }

    pub fn walk_map_collect<T>(&self, func: &impl Fn(&Token) -> Option<T>, out: &mut Vec<T>) {
        if let Some(v) = func(self) {
            out.push(v)
        }

        match self {
            Token::Sequence(items) => {
                items.iter().for_each(|tok| tok.walk_map_collect(func, out));
            }
            Token::Quote(inner)
            | Token::Small(inner)
            | Token::BoldItalic(inner)
            | Token::Bold(inner)
            | Token::Italic(inner)
            | Token::Center(inner)
            | Token::Function { inner, .. }
            | Token::Link { label: inner, .. }
            | Token::Strikethrough(inner) => inner.walk_map_collect(func, out),
            _ => {}
        }
    }

    pub fn walk_speech_transform(&mut self, func: &impl Fn(&mut CompactString)) {
        match self {
            Token::Sequence(items) => {
                items
                    .iter_mut()
                    .for_each(|tok| tok.walk_speech_transform(func));
            }
            Token::Small(inner)
            | Token::BoldItalic(inner)
            | Token::Bold(inner)
            | Token::Italic(inner)
            | Token::Center(inner)
            | Token::Function { inner, .. }
            | Token::Strikethrough(inner) => inner.walk_speech_transform(func),
            Token::PlainText(text) => func(text),
            _ => {}
        }
    }

    fn write<T: Write>(&self, writer: &mut quick_xml::Writer<T>) -> quick_xml::Result<()> {
        match self {
            Token::PlainText(plain) => {
                writer.write_event(Event::Text(BytesText::new(plain.as_str())))?;
            }
            Token::Sequence(sequence) => {
                sequence.iter().try_for_each(|item| item.write(writer))?;
            }
            Token::Quote(inner) => {
                writer
                    .create_element("quote")
                    .write_inner_content(|w| inner.write(w))?;
            }
            Token::Small(inner) => {
                writer
                    .create_element("small")
                    .write_inner_content(|w| inner.write(w))?;
            }
            Token::BoldItalic(inner) => {
                writer
                    .create_element("b")
                    .write_inner_content::<_, quick_xml::Error>(|w| {
                        w.create_element("i")
                            .write_inner_content(|w| inner.write(w))?;

                        Ok(())
                    })?;
            }
            Token::Bold(inner) => {
                writer
                    .create_element("b")
                    .write_inner_content(|w| inner.write(w))?;
            }
            Token::Italic(inner) => {
                writer
                    .create_element("i")
                    .write_inner_content(|w| inner.write(w))?;
            }
            Token::Center(inner) => {
                writer
                    .create_element("center")
                    .write_inner_content(|w| inner.write(w))?;
            }
            Token::Strikethrough(inner) => {
                writer
                    .create_element("s")
                    .write_inner_content(|w| inner.write(w))?;
            }
            Token::PlainTag(plain) => {
                writer.write_event(Event::Text(BytesText::new(plain.as_str())))?;
            }
            Token::InlineCode(code) => {
                writer
                    .create_element("inline-code")
                    .write_text_content(BytesText::new(code))?;
            }
            Token::InlineMath(math) => {
                writer
                    .create_element("inline-math")
                    .write_text_content(BytesText::new(math))?;
            }
            Token::UrlRaw(url) => {
                writer
                    .create_element("a")
                    .with_attribute(("href", url.as_str()))
                    .write_text_content(BytesText::new(url))?;
            }
            Token::UrlNoEmbed(url) => {
                writer
                    .create_element("a")
                    .with_attribute(("href", url.as_str()))
                    .with_attribute(("embed", "false"))
                    .write_text_content(BytesText::new(url))?;
            }
            Token::Link { label, href, embed } => {
                writer
                    .create_element("a")
                    .with_attribute(("href", href.as_str()))
                    .with_attribute(("embed", if *embed { "true" } else { "false" }))
                    .write_inner_content(|w| label.write(w))?;
            }
            Token::BlockCode { inner, lang } => {
                let mut ew = writer.create_element("code");

                if let Some(language) = lang {
                    ew = ew.with_attribute(("lang", language.as_str()));
                }

                ew.write_text_content(BytesText::new(inner))?;
            }
            Token::BlockMath(math) => {
                writer
                    .create_element("math")
                    .write_text_content(BytesText::new(math))?;
            }
            Token::Function {
                inner,
                name,
                params,
            } => {
                let mut ew = writer
                    .create_element("fn")
                    .with_attribute(("name", name.as_str()));

                for (k, v) in params {
                    ew = ew
                        .with_attribute((format!("arg-{k}").as_str(), v.as_deref().unwrap_or("")));
                }

                ew.write_inner_content(|w| inner.write(w))?;
            }
            Token::Mention {
                name,
                host,
                mention_type,
            } => {
                let mut ew = writer
                    .create_element("mention")
                    .with_attribute(("name", name.as_str()))
                    .with_attribute(("type", mention_type.into()));

                if let Some(host) = host {
                    ew = ew.with_attribute(("host", host.as_str()));
                }

                ew.write_empty()?;
            }
            Token::UnicodeEmoji(text) => {
                writer
                    .create_element("ue")
                    .write_text_content(BytesText::new(text))?;
            }
            Token::ShortcodeEmoji { shortcode, host } => {
                let mut ew = writer.create_element("ee");

                if let Some(host) = host {
                    ew = ew.with_attribute(("host", host.as_str()));
                }

                ew.write_text_content(BytesText::new(shortcode))?;
            }
            Token::Hashtag(tag) => {
                writer
                    .create_element("hashtag")
                    .write_text_content(BytesText::new(tag.as_str()))?;
            }
        }

        Ok(())
    }
}

pub fn to_xml_string(token: &Token) -> quick_xml::Result<String> {
    let mut writer = quick_xml::Writer::new(Cursor::new(Vec::new()));
    writer
        .create_element("mmm")
        .write_inner_content(|writer| token.write(writer))?;
    Ok(String::from_utf8(writer.into_inner().into_inner())?)
}

#[derive(Debug, Default, Copy, Clone)]
pub struct SpanMeta {
    depth: usize,
}

impl SpanMeta {
    fn new(depth: usize) -> Self {
        Self { depth }
    }
}

type Span<'a> = LocatedSpan<&'a str, SpanMeta>;

trait SliceOffset {
    fn up_to(&self, other: &Self) -> Self;

    fn fragment_between<'a>(&self, other: &Self) -> &'a str
    where
        Self: 'a;
}

impl SliceOffset for Span<'_> {
    fn up_to(&self, other: &Self) -> Self {
        self.slice(..self.offset(other))
    }

    fn fragment_between<'a>(&self, other: &Self) -> &'a str
    where
        Self: 'a,
    {
        self.up_to(other).into_fragment()
    }
}

#[inline]
fn boxing_token(func: impl Fn(Box<Token>) -> Token) -> impl Fn(Token) -> Token {
    move |tokens| func(Box::new(tokens))
}

#[inline]
fn collect_sequence<T>(
    func: impl Fn(Vec<T>) -> Token,
    transform: impl Fn(Token) -> Token,
) -> impl Fn(&mut dyn Iterator<Item = T>) -> Token {
    move |tokens| transform(func(tokens.collect()))
}

#[inline]
fn collect_char_sequence(
    func: impl Fn(String) -> Token,
) -> impl Fn(&mut dyn Iterator<Item = char>) -> Token {
    move |chars| func(chars.collect())
}

#[inline]
fn space1_unicode(input: Span) -> IResult<Span, Span> {
    recognize(many1_count(tuple((
        not(line_ending),
        satisfy(char::is_whitespace),
    ))))(input)
}

#[inline]
fn alphanumeric1_unicode(input: Span) -> IResult<Span, Span> {
    recognize(many1_count(satisfy(char::is_alphanumeric)))(input)
}

fn spliced<'a>(
    segments: &[Span<'a>],
    func: impl Fn(Span) -> IResult<Span, Token>,
    parent: Span<'a>,
) -> IResult<Span<'a>, Token, nom::error::Error<Span<'a>>> {
    let combined = segments
        .iter()
        .copied()
        .map(Span::into_fragment)
        .collect::<Vec<_>>()
        .join("\n");
    let cum_offset_combined = segments
        .iter()
        .scan(0, |acc, &x| {
            *acc += x.len();
            Some(*acc)
        })
        .collect::<Vec<_>>();
    let current_seg = |input: Span| {
        cum_offset_combined
            .iter()
            .enumerate()
            .take_while(|(_, &o)| o > input.location_offset())
            .map(|(i, o)| (segments[i], o))
            .last()
    };

    type NE<E> = nom::Err<E>;
    type NomError<'x> = nom::error::Error<Span<'x>>;

    let spliced_span = Span::new_extra(
        &combined,
        segments.first().map_or(SpanMeta::new(0), |s| s.extra),
    );
    let (input, inner) = match func(spliced_span) {
        Ok(s) => s,
        Err(e) => {
            return match e {
                NE::Error(e) => {
                    let offset_new = e.input.location_offset();
                    if let Some((seg_parent, offset_seg_new)) = current_seg(e.input) {
                        let offset = offset_new - offset_seg_new;
                        let offset_orig = offset + seg_parent.location_offset();
                        Err(NE::Error(NomError::new(
                            Span::new_extra(
                                &parent.into_fragment()[offset_orig..],
                                seg_parent.extra,
                            ),
                            e.code,
                        )))
                    } else {
                        // ???
                        Err(NE::Failure(NomError::new(parent, ErrorKind::Fail)))
                    }
                }
                NE::Failure(e) => Err(NE::Error(NomError::new(parent, e.code))),
                NE::Incomplete(i) => Err(NE::Incomplete(i)),
            };
        }
    };

    let out = if let Some((seg_parent, offset_seg_new)) = current_seg(input) {
        let offset = input.location_offset() - offset_seg_new;
        let offset_orig = offset + seg_parent.location_offset();
        parent.slice(offset_orig..)
    } else {
        parent
    };

    Ok((out, inner))
}

fn space(input: Span) -> IResult<Span, Token> {
    let (input, frag) = recognize(alt((one_char('\u{0020}'), one_char('\u{3000}'), tab)))(input)?;
    Ok((input, Token::PlainText(frag.into_fragment().into())))
}

#[derive(Copy, Clone)]
struct Matcher<'a, 'b, T: Clone> {
    matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
    collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token + 'a),
    _phantom_closure: PhantomData<&'a ()>,
    _phantom_data: PhantomData<&'b ()>,
    _phantom_output: PhantomData<fn() -> T>,
}

impl<'a, 'b, T: Clone> Matcher<'a, 'b, T> {
    fn new(
        matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
        collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token + 'a),
    ) -> Self {
        Self {
            matcher_inner,
            collector,
            _phantom_closure: PhantomData,
            _phantom_data: PhantomData,
            _phantom_output: PhantomData,
        }
    }
}

impl<'a, 'b> Matcher<'a, 'b, Infallible> {
    // Don't break this invariant, else a monster will come at night and eat all your socks
    fn reject() -> Self {
        Self {
            matcher_inner: &fail::<_, Infallible, _>,
            collector: &|_| unreachable!(),
            _phantom_closure: PhantomData,
            _phantom_data: PhantomData,
            _phantom_output: PhantomData,
        }
    }
}

#[derive(Copy, Clone, Debug)]
enum FlankingRule {
    Lenient,
    Strict,
    DontCare,
}

struct FlankingDelim<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>>(
    T,
    FlankingRule,
    PhantomData<&'a ()>,
);

impl<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>> From<(T, FlankingRule)>
    for FlankingDelim<'a, T>
{
    fn from((func, rule): (T, FlankingRule)) -> Self {
        FlankingDelim(func, rule, PhantomData)
    }
}

impl<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>> From<T> for FlankingDelim<'a, T> {
    fn from(func: T) -> Self {
        FlankingDelim(func, FlankingRule::DontCare, PhantomData)
    }
}

pub struct Context {
    depth_limit: usize,
}

const DEFAULT_DEPTH_LIMIT: usize = 24;

impl Default for Context {
    fn default() -> Self {
        Context::new(DEFAULT_DEPTH_LIMIT)
    }
}

impl Context {
    pub fn new(depth_limit: usize) -> Self {
        Self { depth_limit }
    }

    pub fn parse_full(&self, input: &str) -> Token {
        match self.full(Span::new_extra(input, SpanMeta::default())) {
            Ok((_, t)) => t.merged(),
            Err(e) => {
                trace!(input = input, "Full parser fail: {:?}", e);
                Token::PlainText(e.to_compact_string())
            }
        }
    }

    pub fn parse_inline(&self, input: &str) -> Token {
        match self.full(Span::new_extra(input, SpanMeta::default())) {
            Ok((_, t)) => t.merged(),
            Err(e) => {
                trace!(input = input, "Inline parser fail: {:?}", e);
                Token::PlainText(e.to_compact_string())
            }
        }
    }

    pub fn parse_ui(&self, input: &str) -> Token {
        match self.inline_ui(Span::new_extra(input, SpanMeta::default())) {
            Ok((_, t)) => t.merged(),
            Err(e) => {
                trace!(input = input, "Inline parser fail: {:?}", e);
                Token::PlainText(e.to_compact_string())
            }
        }
    }

    pub fn parse_profile_fields(&self, input: &str) -> Token {
        match self.inline_profile_fields(Span::new_extra(input, SpanMeta::default())) {
            Ok((_, t)) => t.merged(),
            Err(e) => {
                trace!(input = input, "Profile field parser fail: {:?}", e);
                Token::PlainText(e.to_compact_string())
            }
        }
    }

    #[inline]
    fn partial(
        &self,
        func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token> + 'static,
    ) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token> + '_ {
        move |input| func(self, input)
    }

    #[inline]
    fn partial_span(
        &self,
        func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Span<'a>> + 'static,
    ) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>> + '_ {
        move |input| func(self, input)
    }

    pub fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        map(many1(self.partial(Self::full_single)), Token::Sequence)(input)
    }

    pub fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        map(many1(self.partial(Self::inline_single)), Token::Sequence)(input)
    }

    pub fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        map(
            many1(self.partial(Self::inline_label_safe_single)),
            Token::Sequence,
        )(input)
    }

    fn inline_profile_fields<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        map(
            many1(alt((
                self.partial(Self::unicode_emoji),
                self.partial(Self::tag_mention),
                self.partial(Self::tag_hashtag),
                self.partial(Self::raw_url),
                self.partial(Self::tag_raw_text),
            ))),
            Token::Sequence,
        )(input)
    }

    fn inline_ui<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        map(
            many1(alt((
                self.partial(Self::unicode_emoji),
                self.partial(Self::shortcode_emoji),
                self.partial(Self::tag_raw_text),
            ))),
            Token::Sequence,
        )(input)
    }

    fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        alt((
            self.partial(Self::tag_bold_italic_asterisk),
            self.partial(Self::tag_bold_italic_underscore),
            self.partial(Self::tag_bold_asterisk),
            self.partial(Self::tag_italic_asterisk),
            self.partial(Self::tag_bold_underscore),
            self.partial(Self::tag_italic_underscore),
        ))(input)
    }

    fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        let (input, token) = alt((
            self.increase_nesting(alt((
                self.partial(Self::unicode_emoji),
                self.partial(Self::tag_block_center),
                self.partial(Self::tag_small),
                self.partial(Self::tag_plain),
                self.partial(Self::tag_bold),
                self.partial(Self::tag_italic),
                self.partial(Self::tag_strikethrough),
                self.partial(Self::url_no_embed),
                self.partial(Self::base_bold_italic),
                self.partial(Self::tag_block_code),
                self.partial(Self::tag_inline_code),
                self.partial(Self::tag_quote),
                self.partial(Self::tag_block_math),
                self.partial(Self::tag_inline_math),
                self.partial(Self::tag_strikethrough_tilde),
                self.partial(Self::tag_func),
                self.partial(Self::tag_mention),
                self.partial(Self::tag_hashtag),
                self.partial(Self::shortcode_emoji),
                self.partial(Self::link),
                self.partial(Self::raw_url),
            ))),
            self.partial(Self::tag_raw_text),
        ))(input)?;
        Ok((input, token))
    }

    fn inline_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        alt((
            self.increase_nesting(alt((
                self.partial(Self::unicode_emoji),
                self.partial(Self::tag_small),
                self.partial(Self::tag_plain),
                self.partial(Self::tag_bold),
                self.partial(Self::tag_italic),
                self.partial(Self::tag_strikethrough),
                self.partial(Self::url_no_embed),
                self.partial(Self::base_bold_italic),
                self.partial(Self::tag_inline_code),
                self.partial(Self::tag_inline_math),
                self.partial(Self::tag_strikethrough_tilde),
                self.partial(Self::tag_func),
                self.partial(Self::tag_mention),
                self.partial(Self::tag_hashtag),
                self.partial(Self::shortcode_emoji),
                self.partial(Self::link),
                self.partial(Self::raw_url),
            ))),
            self.partial(Self::tag_raw_text),
        ))(input)
    }

    fn inline_non_formatting_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        let (input, token) = alt((
            self.increase_nesting(alt((
                self.partial(Self::unicode_emoji),
                self.partial(Self::url_no_embed),
                self.partial(Self::tag_inline_code),
                self.partial(Self::tag_inline_math),
                self.partial(Self::tag_func),
                self.partial(Self::tag_mention),
                self.partial(Self::tag_hashtag),
                self.partial(Self::shortcode_emoji),
                self.partial(Self::raw_url),
            ))),
            self.partial(Self::tag_raw_text),
        ))(input)?;
        Ok((input, token))
    }

    fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        let (input, token) = alt((
            self.increase_nesting(alt((
                self.partial(Self::unicode_emoji),
                self.partial(Self::tag_small),
                self.partial(Self::tag_plain),
                self.partial(Self::tag_bold),
                self.partial(Self::tag_italic),
                self.partial(Self::tag_strikethrough),
                self.partial(Self::base_bold_italic),
                self.partial(Self::tag_strikethrough_tilde),
                self.partial(Self::tag_func),
                self.partial(Self::shortcode_emoji),
            ))),
            self.partial(Self::tag_raw_text),
        ))(input)?;
        Ok((input, token))
    }

    fn tag_quote<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        let (input, leading_spaces) = tuple((opt(line_ending), opt(line_ending)))(input)?;

        if let (None, None) = leading_spaces {
            if input.get_column() != 1 {
                return fail(input);
            }
        }

        let quote_line = |input| tuple((tag(">"), opt(space), not_line_ending))(input);

        let orig_input = input;
        let (input, lines) = separated_list1(line_ending, quote_line)(input)?;

        let quote_lines = lines
            .into_iter()
            .map(|(_, _, text)| text)
            .collect::<Vec<_>>();

        if quote_lines.len() == 1
            && quote_lines
                .iter()
                .map(Span::fragment)
                .copied()
                .any(&str::is_empty)
        {
            return fail(input);
        }

        let (_, inner) = spliced(&quote_lines, self.partial(Self::full), orig_input)?;

        let (input, _) = tuple((opt(line_ending), opt(line_ending)))(input)?;

        Ok((input, Token::Quote(Box::new(inner))))
    }

    fn tag_block_center<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        let tag_start = &tag("<center>");
        let tag_end = &tag("</center>");

        let (input, _) = opt(line_ending)(input)?;

        if input.get_column() != 1 {
            return fail(input);
        }

        let (input, _) = tag_start(input)?;
        let (input, _) = opt(line_ending)(input)?;

        let (input, (center_seq, _)) = many_till(
            self.partial(Self::inline_single),
            tuple((opt(space1), opt(line_ending), tag_end)),
        )(input)?;

        Ok((
            input,
            boxing_token(Token::Center)(Token::Sequence(center_seq)),
        ))
    }

    fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        let delim = &tag("```");

        let (input, _) = opt(line_ending)(input)?;

        if input.get_column() != 1 {
            return fail(input);
        }

        let (input, _) = delim(input)?;
        let (input, lang) = opt(map(
            recognize(many1(tuple((not(delim), not(line_ending), anychar)))),
            Span::into_fragment,
        ))(input)?;
        let (input, _) = line_ending(input)?;

        let (input, code) = map(
            recognize(many1_count(tuple((
                not(tuple((line_ending, delim))),
                anychar,
            )))),
            Span::into_fragment,
        )(input)?;

        let (input, _) = line_ending(input)?;
        let (input, _) = delim(input)?;
        // Trailing whitespace after the triple backtick
        let (input, _) = opt(space1_unicode)(input)?;
        // If we got this far, the next character should be a line ending
        let (input, _) = not(tuple((not(line_ending), anychar)))(input)?;
        let (input, _) = opt(line_ending)(input)?;

        Ok((
            input,
            Token::BlockCode {
                lang: lang.map(<&str>::into),
                inner: code.into(),
            },
        ))
    }

    fn tag_block_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        let start = &tag("\\[");
        let end = &tag("\\]");

        let (input, _) = opt(line_ending)(input)?;

        if input.get_column() != 1 {
            return fail(input);
        }

        let (input, _) = start(input)?;
        let (input, _) = opt(line_ending)(input)?;

        let (input, math_span) = recognize(many1_count(tuple((
            not(tuple((opt(line_ending), end))),
            not_line_ending,
        ))))(input)?;

        let (input, _) = opt(line_ending)(input)?;
        let (input, _) = end(input)?;
        // Trailing whitespace after the closing delim
        let (input, _) = opt(space1_unicode)(input)?;
        // If we got this far, the next character should be a line ending
        let (input, _) = not(tuple((not(line_ending), anychar)))(input)?;
        let (input, _) = opt(line_ending)(input)?;

        Ok((
            input,
            Token::BlockMath(math_span.into_fragment().to_string()),
        ))
    }

    #[inline]
    fn tag_delimited<'a, 'b: 'a, T: Clone, S: Clone, FOpen, FClose>(
        &'a self,
        opening_tag: impl Into<FlankingDelim<'b, FOpen>> + 'a,
        closing_tag: impl Into<FlankingDelim<'b, FClose>> + 'a,
        escape: bool,
        matcher: Matcher<'a, 'b, T>,
        fallback: Matcher<'a, 'b, S>,
    ) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token> + '_
    where
        FOpen: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
        FClose: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
    {
        let FlankingDelim(opening_tag, opening_rule, ..) = opening_tag.into();
        let FlankingDelim(closing_tag, closing_rule, ..) = closing_tag.into();

        move |input| {
            if escape {
                if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), &opening_tag))(input) {
                    return Ok((
                        input_escaped,
                        Token::PlainText(mark.fragment().to_string().into()),
                    ));
                }
            }

            if let FlankingRule::Strict = opening_rule {
                let (input, pre) =
                    opt(recognize(tuple((alphanumeric1_unicode, &opening_tag))))(input)?;
                if let Some(pre_text) = pre {
                    return Ok((input, Token::PlainText(pre_text.into_fragment().into())));
                }
            }

            let begin = input;
            let (post_open, _) = opening_tag(input)?;

            let res = tuple((
                many1(tuple((not(&closing_tag), &matcher.matcher_inner))),
                &closing_tag,
            ))(post_open);

            if let Err(nom::Err::Error(nom::error::Error {
                input: input_past_err,
                ..
            })) = res
            {
                let res_fallback = tuple((
                    many1(tuple((not(&closing_tag), &fallback.matcher_inner))),
                    &closing_tag,
                ))(post_open);

                if res_fallback.is_err() {
                    return Ok((
                        input_past_err,
                        Token::PlainText(begin.fragment_between(&input_past_err).into()),
                    ));
                }

                let (input, (inner, closing)) = res_fallback.unwrap();
                let mut inner = inner.into_iter().map(|(_, t)| t);

                return Ok((
                    input,
                    Token::Sequence(vec![
                        Token::PlainText(begin.fragment_between(&post_open).into()),
                        ((fallback.collector)(&mut inner)),
                        Token::PlainText(closing.into_fragment().into()),
                    ]),
                ));
            }

            let (input, (inner, closing)) = res?;
            let mut inner = inner.into_iter().map(|(_, t)| t);

            let inner_tok = (matcher.collector)(&mut inner);

            let correct_left_flanking =
                if let FlankingRule::Lenient | FlankingRule::Strict = opening_rule {
                    let text_left = inner_tok.str_content_left();

                    !(text_left.is_some_and(|s| s.starts_with(char::is_whitespace))
                        || text_left.is_none())
                } else {
                    true
                };

            let correct_right_flanking =
                if let FlankingRule::Lenient | FlankingRule::Strict = closing_rule {
                    let text_right = inner_tok.str_content_right();
                    !(text_right.is_some_and(|s| s.ends_with(char::is_whitespace))
                        || text_right.is_none())
                } else {
                    true
                };

            let (input, alphanum) = opt(peek(alphanumeric1_unicode))(input)?;
            let correct_right_outer =
                alphanum.is_none() || !matches!(closing_rule, FlankingRule::Strict);

            let correct_flanking =
                correct_left_flanking && correct_right_flanking && correct_right_outer;

            if !correct_flanking {
                return Ok((
                    input,
                    Token::Sequence(vec![
                        Token::PlainText(begin.fragment_between(&post_open).into()),
                        inner_tok.inner(),
                        Token::PlainText(closing.into_fragment().into()),
                    ]),
                ));
            }
            Ok((input, Token::Sequence(vec![inner_tok])))
        }
    }

    fn tag_func<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        let (input, _) = tag("$[")(input)?;

        let func_ident = |input| {
            recognize(tuple((
                many1_count(alt((alpha1, tag("_")))),
                many0_count(alt((alphanumeric1, tag("_")))),
            )))(input)
        };

        let arg_value = recognize(many1_count(alt((
            alphanumeric1,
            tag("."),
            tag("-"),
            tag("_"),
        ))));

        let (input, func_name) = map(func_ident, Span::into_fragment)(input)?;

        let arg = tuple((func_ident, opt(tuple((tag("="), arg_value)))));

        let (input, args) =
            opt(tuple((one_char('.'), separated_list1(one_char(','), arg))))(input)?;

        let args_out = args.map_or_else(HashMap::new, |(_, items)| {
            items
                .into_iter()
                .map(|(k, v)| {
                    (
                        k.into_fragment().to_string(),
                        v.map(|(_, val)| val.into_fragment().to_string()),
                    )
                })
                .collect::<HashMap<_, _>>()
        });

        let (input, _) = opt(space)(input)?;

        let (input, (inner, _)) = many_till(self.partial(Self::inline_single), tag("]"))(input)?;

        Ok((
            input,
            Token::Function {
                name: func_name.to_string(),
                params: args_out,
                inner: Box::new(Token::Sequence(inner)),
            },
        ))
    }

    fn tag_plain<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        let opening_tag = &tag("<plain>");
        let closing_tag = &tag("</plain>");

        let (input, _) = opening_tag(input)?;
        let (input, text) = map(
            recognize(many1(tuple((not(line_ending), not(closing_tag), anychar)))),
            Span::into_fragment,
        )(input)?;
        let (input, _) = closing_tag(input)?;

        Ok((input, Token::PlainTag(text.into())))
    }

    fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        self.tag_delimited(
            tag_no_case("<small>"),
            tag_no_case("</small>"),
            false,
            Matcher::new(
                &self.partial(Self::inline_single),
                &collect_sequence(Token::Sequence, boxing_token(Token::Small)),
            ),
            Matcher::new(
                &self.partial(Self::inline_non_formatting_single),
                &collect_sequence(Token::Sequence, identity),
            ),
        )(input)
    }

    fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        self.tag_delimited(
            (tag("***"), FlankingRule::Lenient),
            (tag("***"), FlankingRule::Lenient),
            true,
            Matcher::new(
                &self.partial(Self::inline_single),
                &collect_sequence(Token::Sequence, boxing_token(Token::BoldItalic)),
            ),
            Matcher::new(
                &self.partial(Self::inline_non_formatting_single),
                &collect_sequence(Token::Sequence, identity),
            ),
        )(input)
    }

    fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        self.tag_delimited(
            (tag("___"), FlankingRule::Strict),
            (tag("___"), FlankingRule::Strict),
            true,
            Matcher::new(
                &self.partial(Self::inline_single),
                &collect_sequence(Token::Sequence, boxing_token(Token::BoldItalic)),
            ),
            Matcher::new(
                &self.partial(Self::inline_non_formatting_single),
                &collect_sequence(Token::Sequence, identity),
            ),
        )(input)
    }

    fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        self.tag_delimited(
            tag_no_case("<b>"),
            tag_no_case("</b>"),
            false,
            Matcher::new(
                &self.partial(Self::inline_single),
                &collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
            ),
            Matcher::new(
                &self.partial(Self::inline_non_formatting_single),
                &collect_sequence(Token::Sequence, identity),
            ),
        )(input)
    }

    fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        self.tag_delimited(
            (tag("**"), FlankingRule::Lenient),
            (tag("**"), FlankingRule::Lenient),
            true,
            Matcher::new(
                &self.partial(Self::inline_single),
                &collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
            ),
            Matcher::new(
                &self.partial(Self::inline_non_formatting_single),
                &collect_sequence(Token::Sequence, identity),
            ),
        )(input)
    }

    fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        self.tag_delimited(
            (tag("__"), FlankingRule::Strict),
            (tag("__"), FlankingRule::Strict),
            true,
            Matcher::new(
                &self.partial(Self::inline_single),
                &collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
            ),
            Matcher::new(
                &self.partial(Self::inline_non_formatting_single),
                &collect_sequence(Token::Sequence, identity),
            ),
        )(input)
    }

    fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        self.tag_delimited(
            tag_no_case("<i>"),
            tag_no_case("</i>"),
            false,
            Matcher::new(
                &self.partial(Self::inline_single),
                &collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
            ),
            Matcher::new(
                &self.partial(Self::inline_non_formatting_single),
                &collect_sequence(Token::Sequence, identity),
            ),
        )(input)
    }

    fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        self.tag_delimited(
            (tag("*"), FlankingRule::Lenient),
            (tag("*"), FlankingRule::Lenient),
            true,
            Matcher::new(
                &self.partial(Self::inline_single),
                &collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
            ),
            Matcher::new(
                &self.partial(Self::inline_non_formatting_single),
                &collect_sequence(Token::Sequence, identity),
            ),
        )(input)
    }

    fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        self.tag_delimited(
            (tag("_"), FlankingRule::Strict),
            (tag("_"), FlankingRule::Strict),
            true,
            Matcher::new(
                &self.partial(Self::inline_single),
                &collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
            ),
            Matcher::new(
                &self.partial(Self::inline_non_formatting_single),
                &collect_sequence(Token::Sequence, identity),
            ),
        )(input)
    }

    fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        self.tag_delimited(
            tag_no_case("<s>"),
            tag_no_case("</s>"),
            false,
            Matcher::new(
                &self.partial(Self::inline_single),
                &collect_sequence(Token::Sequence, boxing_token(Token::Strikethrough)),
            ),
            Matcher::new(
                &self.partial(Self::inline_non_formatting_single),
                &collect_sequence(Token::Sequence, identity),
            ),
        )(input)
    }

    fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        self.tag_delimited(
            (tag("~~"), FlankingRule::Lenient),
            (tag("~~"), FlankingRule::Lenient),
            true,
            Matcher::new(
                &move |input| {
                    map(
                        tuple(((not(line_ending)), self.partial(Self::inline_single))),
                        |(_, captured)| captured,
                    )(input)
                },
                &collect_sequence(Token::Sequence, boxing_token(Token::Strikethrough)),
            ),
            Matcher::new(
                &move |input| {
                    map(
                        tuple((
                            (not(line_ending)),
                            self.partial(Self::inline_non_formatting_single),
                        )),
                        |(_, captured)| captured,
                    )(input)
                },
                &collect_sequence(Token::Sequence, identity),
            ),
        )(input)
    }

    fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        self.tag_delimited(
            tag("`"),
            |input| alt((tag("`"), tag("´")))(input),
            true,
            Matcher::new(
                &move |input| {
                    map(
                        tuple((not(alt((tag("`"), tag("´"), line_ending))), anychar)),
                        |(_, captured)| captured,
                    )(input)
                },
                &collect_char_sequence(Token::InlineCode),
            ),
            Matcher::reject(),
        )(input)
    }

    fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        self.tag_delimited(
            tag("\\("),
            tag("\\)"),
            false,
            Matcher::new(
                &move |input| {
                    map(tuple((not(line_ending), anychar)), |(_, captured)| captured)(input)
                },
                &collect_char_sequence(Token::InlineMath),
            ),
            Matcher::reject(),
        )(input)
    }

    fn tag_raw_text<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        let (input, text) = anychar(input)?;
        Ok((input, Token::PlainText(text.to_compact_string())))
    }

    fn raw_url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        let (input, url_span) = recognize(tuple((
            self.partial_span(Self::protocol),
            self.url_chars(
                |input| recognize(not(self.partial_span(Self::url_chars_base)))(input),
                false,
            ),
        )))(input)?;

        let url = url_span.into_fragment();
        let url_bytes = url.as_bytes();

        // Strip punctuation at the end of sentences that might have been consumed as a part of the URL
        let final_url = if matches!(url_bytes.last(), Some(b'.' | b',' | b'?')) {
            url.slice(..url.len() - 1)
        } else {
            url
        };

        Ok((input, Token::UrlRaw(final_url.to_string())))
    }

    fn url_no_embed<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        let (input, _) = tag("<")(input)?;
        let (input, url_span) = recognize(tuple((
            self.partial_span(Self::protocol),
            self.url_chars(tag(">"), true),
        )))(input)?;
        let (input, _) = tag(">")(input)?;

        Ok((
            input,
            Token::UrlNoEmbed(url_span.into_fragment().to_string()),
        ))
    }

    fn link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        let (input, no_embed) = opt(tag("?"))(input)?;
        let (input, _) = tag("[")(input)?;
        let (input, _) = not(tag("["))(input)?;
        let (input, (label_tok, _)) =
            many_till(self.partial(Self::inline_label_safe_single), tag("]("))(input)?;
        let (input, url_span) = recognize(tuple((
            self.partial_span(Self::protocol),
            self.url_chars(tag(")"), true),
        )))(input)?;
        let (input, _) = tag(")")(input)?;

        Ok((
            input,
            Token::Link {
                label: Box::new(Token::Sequence(label_tok)),
                href: url_span.into_fragment().into(),
                embed: no_embed.is_none(),
            },
        ))
    }

    fn unicode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        let frag = input.fragment();
        let Some(grapheme) = frag.graphemes(true).next() else {
            return fail(input);
        };

        let grapheme = grapheme.trim_end_matches(|c| c == '\u{200c}' || c == '\u{200d}');

        let emoji = emojis::get(grapheme);

        if emoji.is_none() {
            return fail(input);
        }

        Ok((
            input.slice(grapheme.len()..),
            Token::UnicodeEmoji(grapheme.into()),
        ))
    }

    fn shortcode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        if let (plain_out, Some(plain)) = map(
            opt(recognize(tuple((
                alphanumeric1_unicode,
                self.partial(Self::shortcode_emoji),
            )))),
            |o| o.map(Span::into_fragment),
        )(input)?
        {
            return Ok((plain_out, Token::PlainText(plain.into())));
        }

        let (input, _) = tag(":")(input)?;
        let (input, shortcode) = map(
            recognize(many1(alt((
                alphanumeric1_unicode,
                recognize(one_of("_+-")),
            )))),
            Span::into_fragment,
        )(input)?;
        let (input, host) = opt(map(
            tuple((
                tag("@"),
                map(
                    recognize(many1(alt((alphanumeric1, recognize(one_of("-.")))))),
                    Span::into_fragment,
                ),
            )),
            |(_at, host)| host,
        ))(input)?;
        let (input, _) = tag(":")(input)?;
        let (input, _) = not(alphanumeric1_unicode)(input)?;

        Ok((
            input,
            Token::ShortcodeEmoji {
                shortcode: shortcode.into(),
                host: host.map(str::to_string),
            },
        ))
    }

    fn tag_mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        if let (plain_out, Some(plain)) = map(
            opt(recognize(tuple((
                alt((tag("\\"), alphanumeric1_unicode)),
                self.partial(Self::tag_mention),
            )))),
            |o| o.map(Span::into_fragment),
        )(input)?
        {
            return Ok((plain_out, Token::PlainText(plain.into())));
        }

        let tags = one_of("@!");
        let (input, mention_type) = map(tags, |c| match c {
            '@' => MentionType::User,
            '!' => MentionType::Community,
            _ => unreachable!(),
        })(input)?;

        let (input, name) = map(
            recognize(many1(alt((alphanumeric1, recognize(one_of("-_")))))),
            Span::into_fragment,
        )(input)?;

        let before = input;
        let (_, host_opt) = opt(tuple((
            one_of(if matches!(mention_type, MentionType::User) {
                "@:"
            } else {
                "@"
            }),
            map(
                recognize(many1(alt((alphanumeric1, recognize(one_of("-_.")))))),
                Span::into_fragment,
            ),
        )))(input)?;

        // Promote tags with a colon separator to Matrix handles
        let mention_type = if let Some((':', _)) = host_opt {
            MentionType::MatrixUser
        } else {
            mention_type
        };
        let host =
            host_opt.map(|(_, name)| name.trim_end_matches(|c| matches!(c, '.' | '-' | '_')));
        let input = host.map(|c| before.slice(c.len() + 1..)).unwrap_or(before);

        Ok((
            input,
            Token::Mention {
                mention_type,
                name: name.into(),
                host: host.map(|h| h.into()),
            },
        ))
    }

    fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
        let (input, maybe_preceded) =
            opt(recognize(tuple((alphanumeric1_unicode, tag("#")))))(input)?;

        if let Some(preceded) = maybe_preceded {
            return Ok((input, Token::PlainText(preceded.into_fragment().into())));
        }

        let (input, _) = tag("#")(input)?;

        let (input, hashtag_text) = map(
            recognize(many1(self.partial_span(Self::hashtag_chars))),
            Span::into_fragment,
        )(input)?;

        Ok((input, Token::Hashtag(hashtag_text.into())))
    }

    #[inline]
    fn increase_nesting<'a, 'b, O, F>(
        &'b self,
        mut func: F,
    ) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, O> + 'b
    where
        F: Parser<Span<'a>, O, nom::error::Error<Span<'a>>> + 'b,
    {
        move |mut input| {
            if input.extra.depth >= self.depth_limit {
                return fail(input);
            }

            input.extra.depth += 1;
            func.parse(input)
        }
    }

    #[inline]
    fn hashtag_chars<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
        recognize(alt((
            recognize(tuple((
                tag("("),
                self.increase_nesting(self.partial_span(Self::hashtag_chars)),
                tag(")"),
            ))),
            recognize(tuple((
                tag("["),
                self.increase_nesting(self.partial_span(Self::hashtag_chars)),
                tag("]"),
            ))),
            recognize(tuple((
                tag("「"),
                self.increase_nesting(self.partial_span(Self::hashtag_chars)),
                tag("」"),
            ))),
            recognize(tuple((
                tag("（"),
                self.increase_nesting(self.partial_span(Self::hashtag_chars)),
                tag("）"),
            ))),
            recognize(tuple((
                not(space1_unicode),
                not(line_ending),
                not(one_of(".,:;!?#?/[]【】()「」（）<>")),
                anychar,
            ))),
        )))(input)
    }

    #[inline]
    fn protocol<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
        alt((tag("https://"), tag("http://")))(input)
    }

    #[inline]
    fn url_chars_base<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
        alt((
            alphanumeric1_unicode,
            recognize(tuple((
                tag("["),
                many_till(
                    self.increase_nesting(self.partial_span(Self::url_chars_base)),
                    tag("]"),
                ),
            ))),
            recognize(tuple((
                tag("("),
                many_till(
                    self.increase_nesting(self.partial_span(Self::url_chars_base)),
                    tag(")"),
                ),
            ))),
            recognize(one_of(".,_/:%#$&?!~=+-@")),
        ))(input)
    }

    #[inline]
    fn url_chars<'a, 'b, F>(
        &'b self,
        mut terminator: F,
        spaces: bool,
    ) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, Span<'a>> + 'b
    where
        F: Parser<Span<'a>, Span<'a>, nom::error::Error<Span<'a>>> + 'b,
    {
        move |input| {
            recognize(many1_count(tuple((
                not(tuple((space1, eof))),
                not(tuple((space1, tag("\"")))),
                not(tuple((opt(space1), |input| terminator.parse(input)))),
                alt((
                    |input| self.url_chars_base(input),
                    if spaces { space1 } else { fail },
                )),
            ))))(input)
        }
    }
}

#[cfg(test)]
mod test {
    use crate::{to_xml_string, Context, Span, SpanMeta, Token, DEFAULT_DEPTH_LIMIT};
    use nom::bytes::complete::tag;
    use std::collections::HashMap;

    fn parse_full(string: &str) -> Token {
        Context::default()
            .full(Span::new_extra(string, SpanMeta::default()))
            .unwrap()
            .1
            .merged()
    }

    #[test]
    fn parse_url_chars() {
        let ctx = Context::default();

        assert_eq!(
            ctx.url_chars(tag(")"), true)(Span::new_extra(
                "https://en.wikipedia.org/wiki/Sandbox_(computer_security))",
                SpanMeta::default()
            ))
            .unwrap()
            .1
            .into_fragment(),
            "https://en.wikipedia.org/wiki/Sandbox_(computer_security)"
        );

        assert_eq!(
            ctx.url_chars(tag(")"), true)(Span::new_extra(
                "https://en.wikipedia.org/wiki/Sandbox_(computer_security)))",
                SpanMeta::default()
            ))
            .unwrap()
            .1
            .into_fragment(),
            "https://en.wikipedia.org/wiki/Sandbox_(computer_security)",
        );

        assert_eq!(
            ctx.url_chars(tag(")"), true)(Span::new_extra(
                "https://cs.wikipedia.org/wiki/Among_Us  ",
                SpanMeta::default()
            ))
            .unwrap()
            .1
            .into_fragment(),
            "https://cs.wikipedia.org/wiki/Among_Us",
        );

        assert_eq!(
            ctx.url_chars(tag(")"), true)(Span::new_extra(
                "https://cs.wikipedia.org/wiki/Among Us  )",
                SpanMeta::default()
            ))
            .unwrap()
            .1
            .into_fragment(),
            "https://cs.wikipedia.org/wiki/Among Us"
        );

        assert_eq!(
            ctx.url_chars(tag(")"), false)(Span::new_extra(
                "https://en.wikipedia.org/wiki/Among Us  )",
                SpanMeta::default()
            ))
            .unwrap()
            .1
            .into_fragment(),
            "https://en.wikipedia.org/wiki/Among"
        );
    }

    #[test]
    fn parse_formatting() {
        assert_eq!(
            parse_full(r#"~~stikethrough~~"#),
            Token::Strikethrough(Box::new(Token::PlainText("stikethrough".into()))),
        );

        assert_eq!(
            parse_full(r#"**bold**"#),
            Token::Bold(Box::new(Token::PlainText("bold".into()))),
        );

        assert_eq!(
            parse_full(r#"*italic*"#),
            Token::Italic(Box::new(Token::PlainText("italic".into()))),
        );

        assert_eq!(
            parse_full(r#"* italic *"#),
            Token::PlainText("* italic *".into())
        );

        assert_eq!(
            parse_full("snake_case_variable"),
            Token::PlainText("snake_case_variable".into())
        );

        assert_eq!(
            parse_full("intra*word*italic"),
            Token::Sequence(vec![
                Token::PlainText("intra".into()),
                Token::Italic(Box::new(Token::PlainText("word".into()))),
                Token::PlainText("italic".into())
            ])
        );

        assert_eq!(
            parse_full(r#"_ italic *"#),
            Token::PlainText("_ italic *".into())
        );

        assert_eq!(
            parse_full(r#"*"italic"*"#),
            Token::Italic(Box::new(Token::PlainText("\"italic\"".into())))
        );

        assert_eq!(
            parse_full(r#"not code `code` also not code"#),
            Token::Sequence(vec![
                Token::PlainText("not code ".into()),
                Token::InlineCode("code".into()),
                Token::PlainText(" also not code".into())
            ]),
        );

        assert_eq!(
            parse_full(r#"not code `code` also `not code"#),
            Token::Sequence(vec![
                Token::PlainText("not code ".into()),
                Token::InlineCode("code".into()),
                Token::PlainText(" also `not code".into())
            ]),
        );

        assert_eq!(
            parse_full(r#"not code `*not bold*` also not code"#),
            Token::Sequence(vec![
                Token::PlainText("not code ".into()),
                Token::InlineCode("*not bold*".into()),
                Token::PlainText(" also not code".into())
            ]),
        );

        assert_eq!(
            parse_full(r#"***bold italic***"#),
            Token::BoldItalic(Box::new(Token::PlainText("bold italic".into())))
        );

        assert_eq!(
            parse_full(r#"<b><i>bold italic</i></b>"#),
            Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
                "bold italic".into()
            )))))
        );

        assert_eq!(
            parse_full("~~*hello\nworld*"),
            Token::PlainText("~~*hello\nworld*".into())
        )
    }

    #[test]
    fn parse_complex() {
        assert_eq!(
            parse_full(r"\( nya^3 \)"),
            Token::InlineMath(" nya^3 ".to_string())
        );

        assert_eq!(
            parse_full("\\( nya^3 \n \\)"),
            Token::PlainText("\\( nya^3 \n \\)".into())
        );

        assert_eq!(
            parse_full(r"`AbstractProxyFactoryBean`"),
            Token::InlineCode("AbstractProxyFactoryBean".to_string())
        );

        assert_eq!(
            parse_full("`let x = \n 5;`"),
            Token::PlainText("`let x = \n 5;`".into())
        );

        assert_eq!(
            parse_full(
                r#"
```js
var x = undefined;
```"#
            ),
            Token::BlockCode {
                lang: Some("js".to_string()),
                inner: "var x = undefined;".to_string(),
            }
        );

        assert_eq!(
            parse_full(
                r"
\[
a^2 + b^2 = c^2
\]"
            ),
            Token::BlockMath("a^2 + b^2 = c^2".to_string())
        );

        assert_eq!(
            parse_full(
                r#"<center>centered
🦋🏳️‍⚧️
text</center>"#
            ),
            Token::Center(Box::new(Token::Sequence(vec![
                Token::PlainText("centered\n".into()),
                Token::UnicodeEmoji("🦋".into()),
                Token::UnicodeEmoji("🏳️‍⚧️".into()),
                Token::PlainText("\ntext".into())
            ])))
        );

        assert_eq!(
            parse_full(
                r#"> <center>centered
> 👩🏽‍🤝‍👩🏼
> text</center>"#
            ),
            Token::Quote(Box::new(Token::Center(Box::new(Token::Sequence(vec![
                Token::PlainText("centered\n".into()),
                Token::UnicodeEmoji("👩🏽‍🤝‍👩🏼".into()),
                Token::PlainText("\ntext".into())
            ]))))),
        );

        assert_eq!(
            parse_full(r#"$[x2 $[sparkle 🥺]💜$[spin.y,speed=5s ❤️]🦊]"#),
            Token::Function {
                name: "x2".into(),
                params: HashMap::new(),
                inner: Box::new(Token::Sequence(vec![
                    Token::Function {
                        name: "sparkle".into(),
                        params: HashMap::new(),
                        inner: Box::new(Token::UnicodeEmoji("🥺".into())),
                    },
                    Token::UnicodeEmoji("💜".into()),
                    Token::Function {
                        name: "spin".into(),
                        params: {
                            let mut params = HashMap::new();
                            params.insert("y".into(), None);
                            params.insert("speed".into(), Some("5s".into()));
                            params
                        },
                        inner: Box::new(Token::UnicodeEmoji("❤️".into())),
                    },
                    Token::UnicodeEmoji("🦊".into()),
                ]))
            },
        );

        assert_eq!(
            parse_full(r#"<b>bold @tag1 <i> @tag2 </b>italic</i>"#),
            Token::Sequence(vec![
                Token::PlainText("<b>bold ".into()),
                Token::Mention {
                    mention_type: crate::MentionType::User,
                    name: "tag1".into(),
                    host: None
                },
                Token::PlainText(" <i> ".into()),
                Token::Mention {
                    mention_type: crate::MentionType::User,
                    name: "tag2".into(),
                    host: None
                },
                Token::PlainText(" </b>italic</i>".into())
            ]),
        );

        assert_eq!(
            parse_full(
                r#"
> test
> <i>
> italic
> </i>
>> Nested quote
"#
            ),
            Token::Quote(Box::new(Token::Sequence(vec![
                Token::PlainText("test\n".into()),
                Token::Italic(Box::new(Token::PlainText("\nitalic\n".into()))),
                Token::Quote(Box::new(Token::PlainText("Nested quote".into())))
            ]))),
        );
    }

    #[test]
    fn parse_link() {
        assert_eq!(
            parse_full("IPv4 test: <https://0>"),
            Token::Sequence(vec![
                Token::PlainText("IPv4 test: ".into()),
                Token::UrlNoEmbed("https://0".into())
            ])
        );

        assert_eq!(
            parse_full("IPv4 test: <https://127.0.0.1>"),
            Token::Sequence(vec![
                Token::PlainText("IPv4 test: ".into()),
                Token::UrlNoEmbed("https://127.0.0.1".into())
            ])
        );

        assert_eq!(
            parse_full("IPv6 test: <https://[::2f:1]/nya>"),
            Token::Sequence(vec![
                Token::PlainText("IPv6 test: ".into()),
                Token::UrlNoEmbed("https://[::2f:1]/nya".into())
            ])
        );

        assert_eq!(
            parse_full("IPv6 test: https://[::2f:1]/nya"),
            Token::Sequence(vec![
                Token::PlainText("IPv6 test: ".into()),
                Token::UrlRaw("https://[::2f:1]/nya".into())
            ])
        );

        // IDNs
        assert_eq!(
            parse_full("IDN test: https://www.háčkyčárky.cz/"),
            Token::Sequence(vec![
                Token::PlainText("IDN test: ".into()),
                Token::UrlRaw("https://www.háčkyčárky.cz/".into())
            ])
        );

        assert_eq!(
            parse_full("Link test: [label](https://example.com)"),
            Token::Sequence(vec![
                Token::PlainText("Link test: ".into()),
                Token::Link {
                    label: Box::new(Token::PlainText("label".into())),
                    href: "https://example.com".into(),
                    embed: true
                }
            ])
        );

        assert_eq!(
            parse_full("test #hashtag tail"),
            Token::Sequence(vec![
                Token::PlainText("test ".into()),
                Token::Hashtag("hashtag".into()),
                Token::PlainText(" tail".into())
            ])
        );

        assert_eq!(
            parse_full("not#hashtag tail"),
            Token::PlainText("not#hashtag tail".into())
        );

        assert_eq!(
            parse_full("<https://example.com>"),
            Token::UrlNoEmbed("https://example.com".into())
        );

        // Adjacent links okay
        assert_eq!(
            parse_full("<https://example.com/><https://awawa.gay/>"),
            Token::Sequence(vec![
                Token::UrlNoEmbed("https://example.com/".into()),
                Token::UrlNoEmbed("https://awawa.gay/".into())
            ])
        );

        assert_eq!(
            parse_full("Link test: ?[label](https://awawa.gay)"),
            Token::Sequence(vec![
                Token::PlainText("Link test: ".into()),
                Token::Link {
                    label: Box::new(Token::PlainText("label".into())),
                    href: "https://awawa.gay".into(),
                    embed: false
                }
            ])
        );

        assert_eq!(
            parse_full("Link test: ?[label](https://awawa.gay)test"),
            Token::Sequence(vec![
                Token::PlainText("Link test: ".into()),
                Token::Link {
                    label: Box::new(Token::PlainText("label".into())),
                    href: "https://awawa.gay".into(),
                    embed: false
                },
                Token::PlainText("test".into())
            ])
        );

        assert_eq!(
            parse_full("Link test: (?[label](https://awawa.gay))"),
            Token::Sequence(vec![
                Token::PlainText("Link test: (".into()),
                Token::Link {
                    label: Box::new(Token::PlainText("label".into())),
                    href: "https://awawa.gay".into(),
                    embed: false
                },
                Token::PlainText(")".into())
            ])
        );

        assert_eq!(
            parse_full("Link test: ?[label](https://awawa.gay"), // Missing closing bracket
            Token::Sequence(vec![
                Token::PlainText("Link test: ?[label](".into()),
                Token::UrlRaw("https://awawa.gay".into()),
            ])
        );
    }

    #[test]
    fn limit_nesting() {
        let mut tok = Token::PlainText(" <s><i>test</i></s> ".into());
        for _ in 0..DEFAULT_DEPTH_LIMIT {
            tok = Token::Bold(Box::new(tok));
        }

        assert_eq!(
            parse_full(
                &("<b>".repeat(DEFAULT_DEPTH_LIMIT)
                    + " <s><i>test</i></s> "
                    + &*"</b>".repeat(DEFAULT_DEPTH_LIMIT))
            ),
            tok
        );
    }

    #[test]
    fn parse_mention() {
        assert_eq!(
            parse_full("@tag"),
            Token::Mention {
                mention_type: crate::MentionType::User,
                name: "tag".into(),
                host: None
            }
        );

        assert_eq!(
            parse_full("email@notactuallyamenmtion.org"),
            Token::PlainText("email@notactuallyamenmtion.org".into())
        );

        assert_eq!(
            parse_full("hgsjlkdsa @tag fgahjsdkd"),
            Token::Sequence(vec![
                Token::PlainText("hgsjlkdsa ".into()),
                Token::Mention {
                    mention_type: crate::MentionType::User,
                    name: "tag".into(),
                    host: None
                },
                Token::PlainText(" fgahjsdkd".into())
            ])
        );

        assert_eq!(
            parse_full("hgsjlkdsa @tag@ fgahjsdkd"),
            Token::Sequence(vec![
                Token::PlainText("hgsjlkdsa ".into()),
                Token::Mention {
                    mention_type: crate::MentionType::User,
                    name: "tag".into(),
                    host: None
                },
                Token::PlainText("@ fgahjsdkd".into())
            ])
        );

        assert_eq!(
            parse_full("aaaa @tag@domain bbbbb"),
            Token::Sequence(vec![
                Token::PlainText("aaaa ".into()),
                Token::Mention {
                    mention_type: crate::MentionType::User,
                    name: "tag".into(),
                    host: Some("domain".into())
                },
                Token::PlainText(" bbbbb".into())
            ])
        );

        assert_eq!(
            parse_full("test @tag@domain, test"),
            Token::Sequence(vec![
                Token::PlainText("test ".into()),
                Token::Mention {
                    mention_type: crate::MentionType::User,
                    name: "tag".into(),
                    host: Some("domain".into())
                },
                Token::PlainText(", test".into())
            ])
        );

        assert_eq!(
            parse_full("test @tag@domain.gay. test"),
            Token::Sequence(vec![
                Token::PlainText("test ".into()),
                Token::Mention {
                    mention_type: crate::MentionType::User,
                    name: "tag".into(),
                    host: Some("domain.gay".into())
                },
                Token::PlainText(". test".into())
            ])
        );

        assert_eq!(
            parse_full("test @tag@domain? test"),
            Token::Sequence(vec![
                Token::PlainText("test ".into()),
                Token::Mention {
                    mention_type: crate::MentionType::User,
                    name: "tag".into(),
                    host: Some("domain".into())
                },
                Token::PlainText("? test".into())
            ])
        );

        assert_eq!(
            parse_full("test !tag@domain.com test"),
            Token::Sequence(vec![
                Token::PlainText("test ".into()),
                Token::Mention {
                    mention_type: crate::MentionType::Community,
                    name: "tag".into(),
                    host: Some("domain.com".into())
                },
                Token::PlainText(" test".into())
            ])
        );

        assert_eq!(
            parse_full("@tag:domain.com"),
            Token::Mention {
                mention_type: crate::MentionType::MatrixUser,
                name: "tag".into(),
                host: Some("domain.com".into())
            },
        );
    }

    #[test]
    fn parse_shortcodes() {
        assert_eq!(
            parse_full(":bottom:"),
            Token::ShortcodeEmoji {
                shortcode: "bottom".into(),
                host: None
            }
        );

        assert_eq!(
            parse_full(":bottom::blobfox:"),
            Token::Sequence(vec![
                Token::ShortcodeEmoji {
                    shortcode: "bottom".into(),
                    host: None
                },
                Token::ShortcodeEmoji {
                    shortcode: "blobfox".into(),
                    host: None
                }
            ])
        );

        assert_eq!(
            parse_full(":bottom@magnetar.social:"),
            Token::ShortcodeEmoji {
                shortcode: "bottom".into(),
                host: Some("magnetar.social".into())
            }
        );

        assert_eq!(
            parse_full(":bottom:blobfox"),
            Token::PlainText(":bottom:blobfox".into())
        );

        assert_eq!(
            parse_full("bottom:blobfox:"),
            Token::PlainText("bottom:blobfox:".into())
        );
    }

    #[test]
    fn parse_emoji() {
        assert_eq!(
            parse_full("🥺💜❤️🦊"),
            Token::Sequence(
                vec!["🥺", "💜", "❤️", "🦊"]
                    .into_iter()
                    .map(str::to_string)
                    .map(Token::UnicodeEmoji)
                    .collect::<Vec<_>>()
            )
        );

        // Trans flag, ZWJ
        assert_eq!(
            parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}"),
            Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}".into())
        );

        assert_eq!(
            parse_full("\u{0200d}\u{1f3f3}\u{0fe0f}"),
            Token::Sequence(vec![
                Token::PlainText("\u{0200d}".into()),             // ZWJ
                Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
            ])
        );

        // Trans flag, ZWNJ
        assert_eq!(
            parse_full("\u{1f3f3}\u{0fe0f}\u{0200c}\u{026a7}\u{0fe0f}"),
            Token::Sequence(vec![
                Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
                Token::PlainText("\u{0200c}".into()),             // ZWNJ
                Token::UnicodeEmoji("\u{026a7}\u{0fe0f}".into())  // Trans symbol
            ])
        );

        assert_eq!(
            parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{0200d}\u{0200d}"),
            Token::Sequence(vec![
                Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
                Token::PlainText("\u{0200d}\u{0200d}\u{0200d}".into()), // ZWJ
            ])
        );
    }

    #[test]
    fn xml_serialization() {
        assert_eq!(
            &to_xml_string(&parse_full("***nyaaa***")).unwrap(),
            r#"<mmm><b><i>nyaaa</i></b></mmm>"#
        );

        assert_eq!(
            &to_xml_string(&parse_full(
                "@natty $[spin.speed=0.5s 🥺]:cat_attack: <plain>test</plain>"
            ))
            .unwrap(),
            r#"<mmm><mention name="natty" type="user"/> <fn name="spin" arg-speed="0.5s"><ue>🥺</ue></fn><ee>cat_attack</ee> test</mmm>"#
        );

        assert_eq!(
            &to_xml_string(&parse_full(
                "Ring Galaxy AM 0644 741 from Hubble\nCredits: AURA, STScI, J. Higdon, Cornell, ESA, #NASA\n#nature #space #astrophotography"
            ))
            .unwrap(),
            r#"<mmm>Ring Galaxy AM 0644 741 from Hubble
Credits: AURA, STScI, J. Higdon, Cornell, ESA, <hashtag>NASA</hashtag>
<hashtag>nature</hashtag> <hashtag>space</hashtag> <hashtag>astrophotography</hashtag></mmm>"#
        );

        assert_eq!(
            &to_xml_string(&parse_full(
                r#"
```js
var x = undefined;
```         "#
            ))
            .unwrap(),
            "<mmm><code lang=\"js\">var x = undefined;</code></mmm>"
        );
    }
}
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								use compact_str::{CompactString, ToCompactString};
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
+								use either::Either;
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								use nom::branch::alt;
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								use nom::bytes::complete::{tag, tag_no_case};
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								use nom::character::complete::{
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of,
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								    satisfy, space1, tab,
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								};
-												MMM: Janky outer flanking rules implementation

											
										
										
											2023-10-23 22:27:54 +00:00
+								use nom::combinator::{eof, fail, map, not, opt, peek, recognize};
-												MMM: Fixed hashtag parsing

											
										
										
											2023-10-23 21:52:02 +00:00
+								use nom::error::ErrorKind;
-												MMM: XML serialization and fixed block code parsing

											
										
										
											2023-10-25 22:30:11 +00:00
+								use nom::multi::{many0_count, many1, many1_count, many_till, separated_list1};
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								use nom::sequence::tuple;
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								use nom::{IResult, Offset, Parser, Slice};
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								use nom_locate::LocatedSpan;
-												MMM: XML serialization and fixed block code parsing

											
										
										
											2023-10-25 22:30:11 +00:00
+								use quick_xml::events::{BytesText, Event};
-												MMM: Reexport in the SDK

											
										
										
											2023-10-25 17:45:59 +00:00
+								use serde::{Deserialize, Serialize};
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								use std::collections::HashMap;
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								use std::convert::{identity, Infallible};
-												MMM: XML serialization and fixed block code parsing

											
										
										
											2023-10-25 22:30:11 +00:00
+								use std::io::{Cursor, Write};
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								use std::marker::PhantomData;
-												MMM: XML serialization and fixed block code parsing

											
										
										
											2023-10-25 22:30:11 +00:00
+								use strum::IntoStaticStr;
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								use tracing::trace;
-												Emoji parsing

											
										
										
											2023-10-05 19:21:23 +00:00
+								use unicode_segmentation::UnicodeSegmentation;
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
-												MMM: XML serialization and fixed block code parsing

											
										
										
											2023-10-25 22:30:11 +00:00
+								#[derive(Copy, Clone, Debug, Eq, PartialEq, Deserialize, Serialize, IntoStaticStr)]
 								// The alternative would be to implement a serde serializer for this one enum, but that's disgusting
 								#[strum(serialize_all = "snake_case")]
 								#[serde(rename_all = "snake_case")]
-												Mention parsing

											
										
										
											2023-10-05 18:05:03 +00:00
+								pub enum MentionType {
 								    Community,
 								    User,
-												MMM: Matrix handle parsing

											
										
										
											2023-10-26 19:23:59 +00:00
+								    MatrixUser,
-												Mention parsing

											
										
										
											2023-10-05 18:05:03 +00:00
+								}
 								impl MentionType {
 								    pub fn to_char(&self) -> char {
 								        match self {
 								            MentionType::Community => '!',
 								            MentionType::User => '@',
-												MMM: Matrix handle parsing

											
										
										
											2023-10-26 19:23:59 +00:00
+								            MentionType::MatrixUser => ':',
 								        }
 								    }
 								    pub fn separator(&self) -> char {
 								        match self {
 								            MentionType::Community | MentionType::User => '@',
 								            MentionType::MatrixUser => ':',
-												Mention parsing

											
										
										
											2023-10-05 18:05:03 +00:00
+								        }
 								    }
 								}
-												MMM: Reexport in the SDK

											
										
										
											2023-10-25 17:45:59 +00:00
+								#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)]
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								pub enum Token {
 								    PlainText(CompactString),
 								    Sequence(Vec<Token>),
 								    Quote(Box<Token>),
 								    Small(Box<Token>),
 								    BoldItalic(Box<Token>),
 								    Bold(Box<Token>),
 								    Italic(Box<Token>),
 								    Center(Box<Token>),
 								    Strikethrough(Box<Token>),
 								    PlainTag(String),
 								    InlineCode(String),
 								    InlineMath(String),
 								    UrlRaw(String),
 								    UrlNoEmbed(String),
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								    Link {
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								        label: Box<Token>,
 								        href: String,
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								        embed: bool,
 								    },
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								    BlockCode {
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								        lang: Option<String>,
 								        inner: String,
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								    },
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    BlockMath(String),
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								    Function {
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								        name: String,
 								        params: HashMap<String, Option<String>>,
 								        inner: Box<Token>,
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								    },
-												Mention parsing

											
										
										
											2023-10-05 18:05:03 +00:00
+								    Mention {
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								        name: String,
 								        host: Option<String>,
-												Mention parsing

											
										
										
											2023-10-05 18:05:03 +00:00
+								        mention_type: MentionType,
 								    },
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    UnicodeEmoji(String),
-												User fetching with reactions and a user by tag endpoint

											
										
										
											2023-10-30 22:00:46 +00:00
+								    ShortcodeEmoji {
 								        shortcode: String,
 								        host: Option<String>,
 								    },
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    Hashtag(String),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								}
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								impl Token {
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								    fn str_content_left(&self) -> Option<&str> {
 								        match self {
 								            Token::PlainText(text) => Some(text.as_ref()),
 								            Token::Sequence(tokens) => tokens.first().and_then(Token::str_content_left),
 								            Token::Quote(inner) => inner.str_content_left(),
 								            Token::Small(inner) => inner.str_content_left(),
 								            Token::BoldItalic(inner) => inner.str_content_left(),
 								            Token::Bold(inner) => inner.str_content_left(),
 								            Token::Italic(inner) => inner.str_content_left(),
 								            Token::Center(inner) => inner.str_content_left(),
 								            Token::Strikethrough(inner) => inner.str_content_left(),
 								            Token::PlainTag(tag) => Some(tag.as_ref()),
 								            Token::UrlRaw(url) => Some(url.as_ref()),
 								            Token::UrlNoEmbed(url) => Some(url.as_ref()),
 								            Token::Link { label, .. } => label.str_content_left(),
 								            Token::Function { inner, .. } => inner.str_content_left(),
 								            Token::Mention { name, .. } => Some(name.as_ref()),
 								            Token::UnicodeEmoji(code) => Some(code.as_ref()),
 								            Token::Hashtag(tag) => Some(tag.as_ref()),
 								            _ => None,
 								        }
 								    }
 								    fn str_content_right(&self) -> Option<&str> {
 								        match self {
 								            Token::PlainText(text) => Some(text.as_ref()),
 								            Token::Sequence(tokens) => tokens.last().and_then(Token::str_content_right),
 								            Token::Quote(inner) => inner.str_content_right(),
 								            Token::Small(inner) => inner.str_content_right(),
 								            Token::BoldItalic(inner) => inner.str_content_right(),
 								            Token::Bold(inner) => inner.str_content_right(),
 								            Token::Italic(inner) => inner.str_content_right(),
 								            Token::Center(inner) => inner.str_content_right(),
 								            Token::Strikethrough(inner) => inner.str_content_right(),
 								            Token::PlainTag(tag) => Some(tag.as_ref()),
 								            Token::UrlRaw(url) => Some(url.as_ref()),
 								            Token::UrlNoEmbed(url) => Some(url.as_ref()),
 								            Token::Link { label, .. } => label.str_content_right(),
 								            Token::Function { inner, .. } => inner.str_content_right(),
 								            Token::Mention { name, .. } => Some(name.as_ref()),
 								            Token::UnicodeEmoji(code) => Some(code.as_ref()),
 								            Token::Hashtag(tag) => Some(tag.as_ref()),
 								            _ => None,
 								        }
 								    }
 								    fn inner(&self) -> Token {
 								        match self {
 								            plain @ Token::PlainText(_) => plain.clone(),
 								            sequence @ Token::Sequence(_) => sequence.clone(),
 								            Token::Quote(inner) => inner.inner(),
 								            Token::Small(inner) => inner.inner(),
 								            Token::BoldItalic(inner) => inner.inner(),
 								            Token::Bold(inner) => inner.inner(),
 								            Token::Italic(inner) => inner.inner(),
 								            Token::Center(inner) => inner.inner(),
 								            Token::Strikethrough(inner) => inner.inner(),
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								            Token::PlainTag(text) => Token::PlainText(text.clone().into()),
 								            Token::InlineCode(code) => Token::PlainText(code.clone().into()),
 								            Token::InlineMath(math) => Token::PlainText(math.clone().into()),
 								            Token::UrlRaw(url) => Token::PlainText(url.clone().into()),
 								            Token::UrlNoEmbed(url) => Token::PlainText(url.clone().into()),
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            Token::Link { label, .. } => label.inner(),
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								            Token::BlockCode { inner, .. } => Token::PlainText(inner.clone().into()),
 								            Token::BlockMath(math) => Token::PlainText(math.clone().into()),
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            Token::Function { inner, .. } => inner.inner(),
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								            Token::Mention { name, .. } => Token::PlainText(name.clone().into()),
 								            Token::UnicodeEmoji(code) => Token::PlainText(code.clone().into()),
-												User fetching with reactions and a user by tag endpoint

											
										
										
											2023-10-30 22:00:46 +00:00
+								            Token::ShortcodeEmoji { shortcode, .. } => Token::PlainText(shortcode.clone().into()),
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								            Token::Hashtag(tag) => Token::PlainText(tag.clone().into()),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        }
 								    }
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
 								    fn merged(&self) -> Token {
 								        match self {
 								            Token::Sequence(tokens) => {
 								                let tokens_multi = tokens.iter().fold(Vec::new(), |mut acc, tok| {
 								                    if let Some(Token::PlainText(last)) = acc.last_mut() {
 								                        if let Token::PlainText(tok_text) = tok {
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								                            *last += tok_text.as_ref();
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
 								                            return acc;
 								                        }
 								                    }
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
+								                    if let Token::Sequence(seq) = tok {
 								                        let items = seq.iter().map(Token::merged).flat_map(|t| match t {
 								                            Token::Sequence(seq) => Either::Left(seq.into_iter()),
 								                            other => Either::Right(std::iter::once(other)),
 								                        });
 								                        for item in items {
 								                            if let Some(Token::PlainText(last)) = acc.last_mut() {
 								                                if let Token::PlainText(tok_text) = item {
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								                                    *last += tok_text.as_ref();
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
 								                                    continue;
 								                                }
 								                            }
 								                            acc.push(item);
 								                        }
 								                        return acc;
 								                    }
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								                    acc.push(tok.merged());
 								                    acc
 								                });
 								                if tokens_multi.len() == 1 {
 								                    return tokens_multi.into_iter().next().unwrap();
 								                }
 								                Token::Sequence(tokens_multi)
 								            }
 								            Token::Quote(inner) => Token::Quote(Box::new(inner.merged())),
 								            Token::Small(inner) => Token::Small(Box::new(inner.merged())),
 								            Token::BoldItalic(inner) => Token::BoldItalic(Box::new(inner.merged())),
 								            Token::Bold(inner) => Token::Bold(Box::new(inner.merged())),
 								            Token::Italic(inner) => Token::Italic(Box::new(inner.merged())),
 								            Token::Center(inner) => Token::Center(Box::new(inner.merged())),
 								            Token::Strikethrough(inner) => Token::Strikethrough(Box::new(inner.merged())),
-												Fixed link parsing

											
										
										
											2023-10-07 18:40:01 +00:00
+								            Token::Link { embed, label, href } => Token::Link {
 								                label: Box::new(label.merged()),
 								                href: href.clone(),
 								                embed: *embed,
 								            },
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								            Token::Function {
 								                name,
 								                params,
 								                inner,
 								            } => Token::Function {
 								                name: name.clone(),
 								                params: params.clone(),
 								                inner: Box::new(inner.merged()),
 								            },
 								            other => other.clone(),
 								        }
 								    }
-												MMM: XML serialization and fixed block code parsing

											
										
										
											2023-10-25 22:30:11 +00:00
-												Generic caching and basic user fetching in the backend

											
										
										
											2023-10-27 22:33:09 +00:00
+								    pub fn walk_map_collect<T>(&self, func: &impl Fn(&Token) -> Option<T>, out: &mut Vec<T>) {
-												Instance meta cache and initial pack processing for users

											
										
										
											2023-10-26 23:41:48 +00:00
+								        if let Some(v) = func(self) {
 								            out.push(v)
 								        }
 								        match self {
 								            Token::Sequence(items) => {
-												Generic caching and basic user fetching in the backend

											
										
										
											2023-10-27 22:33:09 +00:00
+								                items.iter().for_each(|tok| tok.walk_map_collect(func, out));
-												Instance meta cache and initial pack processing for users

											
										
										
											2023-10-26 23:41:48 +00:00
+								            }
 								            Token::Quote(inner)
 								            | Token::Small(inner)
 								            | Token::BoldItalic(inner)
 								            | Token::Bold(inner)
 								            | Token::Italic(inner)
 								            | Token::Center(inner)
 								            | Token::Function { inner, .. }
 								            | Token::Link { label: inner, .. }
 								            | Token::Strikethrough(inner) => inner.walk_map_collect(func, out),
 								            _ => {}
 								        }
 								    }
-												Note by ID testing endpoint

											
										
										
											2023-10-29 01:10:48 +00:00
+								    pub fn walk_speech_transform(&mut self, func: &impl Fn(&mut CompactString)) {
 								        match self {
 								            Token::Sequence(items) => {
 								                items
 								                    .iter_mut()
 								                    .for_each(|tok| tok.walk_speech_transform(func));
 								            }
 								            Token::Small(inner)
 								            | Token::BoldItalic(inner)
 								            | Token::Bold(inner)
 								            | Token::Italic(inner)
 								            | Token::Center(inner)
 								            | Token::Function { inner, .. }
 								            | Token::Strikethrough(inner) => inner.walk_speech_transform(func),
 								            Token::PlainText(text) => func(text),
 								            _ => {}
 								        }
 								    }
-												MMM: XML serialization and fixed block code parsing

											
										
										
											2023-10-25 22:30:11 +00:00
+								    fn write<T: Write>(&self, writer: &mut quick_xml::Writer<T>) -> quick_xml::Result<()> {
 								        match self {
 								            Token::PlainText(plain) => {
 								                writer.write_event(Event::Text(BytesText::new(plain.as_str())))?;
 								            }
 								            Token::Sequence(sequence) => {
 								                sequence.iter().try_for_each(|item| item.write(writer))?;
 								            }
 								            Token::Quote(inner) => {
 								                writer
 								                    .create_element("quote")
 								                    .write_inner_content(|w| inner.write(w))?;
 								            }
 								            Token::Small(inner) => {
 								                writer
 								                    .create_element("small")
 								                    .write_inner_content(|w| inner.write(w))?;
 								            }
 								            Token::BoldItalic(inner) => {
 								                writer
 								                    .create_element("b")
 								                    .write_inner_content::<_, quick_xml::Error>(|w| {
 								                        w.create_element("i")
 								                            .write_inner_content(|w| inner.write(w))?;
 								                        Ok(())
 								                    })?;
 								            }
 								            Token::Bold(inner) => {
 								                writer
 								                    .create_element("b")
 								                    .write_inner_content(|w| inner.write(w))?;
 								            }
 								            Token::Italic(inner) => {
 								                writer
 								                    .create_element("i")
 								                    .write_inner_content(|w| inner.write(w))?;
 								            }
 								            Token::Center(inner) => {
 								                writer
 								                    .create_element("center")
 								                    .write_inner_content(|w| inner.write(w))?;
 								            }
 								            Token::Strikethrough(inner) => {
 								                writer
 								                    .create_element("s")
 								                    .write_inner_content(|w| inner.write(w))?;
 								            }
 								            Token::PlainTag(plain) => {
 								                writer.write_event(Event::Text(BytesText::new(plain.as_str())))?;
 								            }
 								            Token::InlineCode(code) => {
 								                writer
 								                    .create_element("inline-code")
 								                    .write_text_content(BytesText::new(code))?;
 								            }
 								            Token::InlineMath(math) => {
 								                writer
 								                    .create_element("inline-math")
 								                    .write_text_content(BytesText::new(math))?;
 								            }
 								            Token::UrlRaw(url) => {
 								                writer
 								                    .create_element("a")
 								                    .with_attribute(("href", url.as_str()))
 								                    .write_text_content(BytesText::new(url))?;
 								            }
 								            Token::UrlNoEmbed(url) => {
 								                writer
 								                    .create_element("a")
 								                    .with_attribute(("href", url.as_str()))
 								                    .with_attribute(("embed", "false"))
 								                    .write_text_content(BytesText::new(url))?;
 								            }
 								            Token::Link { label, href, embed } => {
 								                writer
 								                    .create_element("a")
 								                    .with_attribute(("href", href.as_str()))
 								                    .with_attribute(("embed", if *embed { "true" } else { "false" }))
 								                    .write_inner_content(|w| label.write(w))?;
 								            }
 								            Token::BlockCode { inner, lang } => {
 								                let mut ew = writer.create_element("code");
 								                if let Some(language) = lang {
 								                    ew = ew.with_attribute(("lang", language.as_str()));
 								                }
 								                ew.write_text_content(BytesText::new(inner))?;
 								            }
 								            Token::BlockMath(math) => {
 								                writer
 								                    .create_element("math")
 								                    .write_text_content(BytesText::new(math))?;
 								            }
 								            Token::Function {
 								                inner,
 								                name,
 								                params,
 								            } => {
 								                let mut ew = writer
 								                    .create_element("fn")
 								                    .with_attribute(("name", name.as_str()));
 								                for (k, v) in params {
 								                    ew = ew
 								                        .with_attribute((format!("arg-{k}").as_str(), v.as_deref().unwrap_or("")));
 								                }
 								                ew.write_inner_content(|w| inner.write(w))?;
 								            }
 								            Token::Mention {
 								                name,
 								                host,
 								                mention_type,
 								            } => {
 								                let mut ew = writer
 								                    .create_element("mention")
 								                    .with_attribute(("name", name.as_str()))
 								                    .with_attribute(("type", mention_type.into()));
 								                if let Some(host) = host {
 								                    ew = ew.with_attribute(("host", host.as_str()));
 								                }
 								                ew.write_empty()?;
 								            }
 								            Token::UnicodeEmoji(text) => {
 								                writer
 								                    .create_element("ue")
 								                    .write_text_content(BytesText::new(text))?;
 								            }
-												User fetching with reactions and a user by tag endpoint

											
										
										
											2023-10-30 22:00:46 +00:00
+								            Token::ShortcodeEmoji { shortcode, host } => {
 								                let mut ew = writer.create_element("ee");
 								                if let Some(host) = host {
 								                    ew = ew.with_attribute(("host", host.as_str()));
 								                }
 								                ew.write_text_content(BytesText::new(shortcode))?;
-												MMM: XML serialization and fixed block code parsing

											
										
										
											2023-10-25 22:30:11 +00:00
+								            }
 								            Token::Hashtag(tag) => {
 								                writer
 								                    .create_element("hashtag")
-												MMM: Fixed hashtag serialization

											
										
										
											2023-10-29 11:52:20 +00:00
+								                    .write_text_content(BytesText::new(tag.as_str()))?;
-												MMM: XML serialization and fixed block code parsing

											
										
										
											2023-10-25 22:30:11 +00:00
+								            }
 								        }
 								        Ok(())
 								    }
 								}
 								pub fn to_xml_string(token: &Token) -> quick_xml::Result<String> {
 								    let mut writer = quick_xml::Writer::new(Cursor::new(Vec::new()));
 								    writer
 								        .create_element("mmm")
 								        .write_inner_content(|writer| token.write(writer))?;
 								    Ok(String::from_utf8(writer.into_inner().into_inner())?)
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								}
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								#[derive(Debug, Default, Copy, Clone)]
 								pub struct SpanMeta {
 								    depth: usize,
 								}
 								impl SpanMeta {
 								    fn new(depth: usize) -> Self {
 								        Self { depth }
 								    }
 								}
 								type Span<'a> = LocatedSpan<&'a str, SpanMeta>;
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
 								trait SliceOffset {
 								    fn up_to(&self, other: &Self) -> Self;
 								    fn fragment_between<'a>(&self, other: &Self) -> &'a str
 								    where
 								        Self: 'a;
 								}
 								impl SliceOffset for Span<'_> {
 								    fn up_to(&self, other: &Self) -> Self {
 								        self.slice(..self.offset(other))
 								    }
 								    fn fragment_between<'a>(&self, other: &Self) -> &'a str
 								    where
 								        Self: 'a,
 								    {
 								        self.up_to(other).into_fragment()
 								    }
 								}
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
+								#[inline]
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								fn boxing_token(func: impl Fn(Box<Token>) -> Token) -> impl Fn(Token) -> Token {
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
+								    move |tokens| func(Box::new(tokens))
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								}
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								#[inline]
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								fn collect_sequence<T>(
 								    func: impl Fn(Vec<T>) -> Token,
 								    transform: impl Fn(Token) -> Token,
 								) -> impl Fn(&mut dyn Iterator<Item = T>) -> Token {
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								    move |tokens| transform(func(tokens.collect()))
 								}
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
+								#[inline]
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								fn collect_char_sequence(
 								    func: impl Fn(String) -> Token,
 								) -> impl Fn(&mut dyn Iterator<Item = char>) -> Token {
 								    move |chars| func(chars.collect())
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								}
-												MMM: Fixed hashtag parsing

											
										
										
											2023-10-23 21:52:02 +00:00
+								#[inline]
 								fn space1_unicode(input: Span) -> IResult<Span, Span> {
 								    recognize(many1_count(tuple((
 								        not(line_ending),
 								        satisfy(char::is_whitespace),
 								    ))))(input)
 								}
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								#[inline]
 								fn alphanumeric1_unicode(input: Span) -> IResult<Span, Span> {
 								    recognize(many1_count(satisfy(char::is_alphanumeric)))(input)
 								}
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								fn spliced<'a>(
 								    segments: &[Span<'a>],
 								    func: impl Fn(Span) -> IResult<Span, Token>,
 								    parent: Span<'a>,
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								) -> IResult<Span<'a>, Token, nom::error::Error<Span<'a>>> {
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								    let combined = segments
 								        .iter()
 								        .copied()
 								        .map(Span::into_fragment)
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
+								        .collect::<Vec<_>>()
 								        .join("\n");
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								    let cum_offset_combined = segments
 								        .iter()
 								        .scan(0, |acc, &x| {
 								            *acc += x.len();
 								            Some(*acc)
 								        })
 								        .collect::<Vec<_>>();
 								    let current_seg = |input: Span| {
 								        cum_offset_combined
 								            .iter()
 								            .enumerate()
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
+								            .take_while(|(_, &o)| o > input.location_offset())
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								            .map(|(i, o)| (segments[i], o))
 								            .last()
 								    };
 								    type NE<E> = nom::Err<E>;
 								    type NomError<'x> = nom::error::Error<Span<'x>>;
-												MMM: Coverage and naming fix

											
										
										
											2023-10-26 16:38:45 +00:00
+								    let spliced_span = Span::new_extra(
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								        &combined,
 								        segments.first().map_or(SpanMeta::new(0), |s| s.extra),
 								    );
-												MMM: Coverage and naming fix

											
										
										
											2023-10-26 16:38:45 +00:00
+								    let (input, inner) = match func(spliced_span) {
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								        Ok(s) => s,
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        Err(e) => {
 								            return match e {
 								                NE::Error(e) => {
 								                    let offset_new = e.input.location_offset();
 								                    if let Some((seg_parent, offset_seg_new)) = current_seg(e.input) {
 								                        let offset = offset_new - offset_seg_new;
 								                        let offset_orig = offset + seg_parent.location_offset();
 								                        Err(NE::Error(NomError::new(
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								                            Span::new_extra(
 								                                &parent.into_fragment()[offset_orig..],
 								                                seg_parent.extra,
 								                            ),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								                            e.code,
 								                        )))
 								                    } else {
 								                        // ???
 								                        Err(NE::Failure(NomError::new(parent, ErrorKind::Fail)))
 								                    }
 								                }
 								                NE::Failure(e) => Err(NE::Error(NomError::new(parent, e.code))),
 								                NE::Incomplete(i) => Err(NE::Incomplete(i)),
 								            };
 								        }
 								    };
 								    let out = if let Some((seg_parent, offset_seg_new)) = current_seg(input) {
 								        let offset = input.location_offset() - offset_seg_new;
 								        let offset_orig = offset + seg_parent.location_offset();
 								        parent.slice(offset_orig..)
 								    } else {
 								        parent
 								    };
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    Ok((out, inner))
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								}
 								fn space(input: Span) -> IResult<Span, Token> {
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								    let (input, frag) = recognize(alt((one_char('\u{0020}'), one_char('\u{3000}'), tab)))(input)?;
 								    Ok((input, Token::PlainText(frag.into_fragment().into())))
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								}
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								#[derive(Copy, Clone)]
 								struct Matcher<'a, 'b, T: Clone> {
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								    matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token + 'a),
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								    _phantom_closure: PhantomData<&'a ()>,
 								    _phantom_data: PhantomData<&'b ()>,
 								    _phantom_output: PhantomData<fn() -> T>,
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								}
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								impl<'a, 'b, T: Clone> Matcher<'a, 'b, T> {
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								    fn new(
 								        matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								        collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token + 'a),
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								    ) -> Self {
 								        Self {
 								            matcher_inner,
 								            collector,
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            _phantom_closure: PhantomData,
 								            _phantom_data: PhantomData,
 								            _phantom_output: PhantomData,
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								        }
 								    }
 								}
 								impl<'a, 'b> Matcher<'a, 'b, Infallible> {
 								    // Don't break this invariant, else a monster will come at night and eat all your socks
 								    fn reject() -> Self {
 								        Self {
 								            matcher_inner: &fail::<_, Infallible, _>,
 								            collector: &|_| unreachable!(),
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            _phantom_closure: PhantomData,
 								            _phantom_data: PhantomData,
 								            _phantom_output: PhantomData,
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								        }
 								    }
 								}
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								#[derive(Copy, Clone, Debug)]
 								enum FlankingRule {
 								    Lenient,
 								    Strict,
 								    DontCare,
 								}
 								struct FlankingDelim<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>>(
 								    T,
 								    FlankingRule,
 								    PhantomData<&'a ()>,
 								);
 								impl<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>> From<(T, FlankingRule)>
 								    for FlankingDelim<'a, T>
 								{
 								    fn from((func, rule): (T, FlankingRule)) -> Self {
 								        FlankingDelim(func, rule, PhantomData)
 								    }
 								}
 								impl<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>> From<T> for FlankingDelim<'a, T> {
 								    fn from(func: T) -> Self {
 								        FlankingDelim(func, FlankingRule::DontCare, PhantomData)
 								    }
 								}
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								pub struct Context {
 								    depth_limit: usize,
 								}
 								const DEFAULT_DEPTH_LIMIT: usize = 24;
 								impl Default for Context {
 								    fn default() -> Self {
 								        Context::new(DEFAULT_DEPTH_LIMIT)
 								    }
 								}
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
 								impl Context {
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								    pub fn new(depth_limit: usize) -> Self {
 								        Self { depth_limit }
 								    }
 								    pub fn parse_full(&self, input: &str) -> Token {
 								        match self.full(Span::new_extra(input, SpanMeta::default())) {
 								            Ok((_, t)) => t.merged(),
 								            Err(e) => {
 								                trace!(input = input, "Full parser fail: {:?}", e);
 								                Token::PlainText(e.to_compact_string())
 								            }
 								        }
 								    }
 								    pub fn parse_inline(&self, input: &str) -> Token {
 								        match self.full(Span::new_extra(input, SpanMeta::default())) {
 								            Ok((_, t)) => t.merged(),
 								            Err(e) => {
 								                trace!(input = input, "Inline parser fail: {:?}", e);
 								                Token::PlainText(e.to_compact_string())
 								            }
 								        }
 								    }
 								    pub fn parse_ui(&self, input: &str) -> Token {
 								        match self.inline_ui(Span::new_extra(input, SpanMeta::default())) {
 								            Ok((_, t)) => t.merged(),
 								            Err(e) => {
 								                trace!(input = input, "Inline parser fail: {:?}", e);
 								                Token::PlainText(e.to_compact_string())
 								            }
 								        }
 								    }
-												MMM: Profile field parsing and skipping Matrix handles

											
										
										
											2023-10-26 19:08:51 +00:00
+								    pub fn parse_profile_fields(&self, input: &str) -> Token {
 								        match self.inline_profile_fields(Span::new_extra(input, SpanMeta::default())) {
 								            Ok((_, t)) => t.merged(),
 								            Err(e) => {
 								                trace!(input = input, "Profile field parser fail: {:?}", e);
 								                Token::PlainText(e.to_compact_string())
 								            }
 								        }
 								    }
-												Minor cleanup

											
										
										
											2023-10-04 17:44:27 +00:00
+								    #[inline]
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								    fn partial(
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        &self,
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								        func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token> + 'static,
 								    ) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token> + '_ {
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        move |input| func(self, input)
 								    }
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								    #[inline]
 								    fn partial_span(
 								        &self,
 								        func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Span<'a>> + 'static,
 								    ) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>> + '_ {
 								        move |input| func(self, input)
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    pub fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								        map(many1(self.partial(Self::full_single)), Token::Sequence)(input)
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    pub fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								        map(many1(self.partial(Self::inline_single)), Token::Sequence)(input)
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    pub fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								        map(
 								            many1(self.partial(Self::inline_label_safe_single)),
 								            Token::Sequence,
 								        )(input)
 								    }
-												MMM: Profile field parsing and skipping Matrix handles

											
										
										
											2023-10-26 19:08:51 +00:00
+								    fn inline_profile_fields<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
 								        map(
 								            many1(alt((
 								                self.partial(Self::unicode_emoji),
 								                self.partial(Self::tag_mention),
 								                self.partial(Self::tag_hashtag),
 								                self.partial(Self::raw_url),
 								                self.partial(Self::tag_raw_text),
 								            ))),
 								            Token::Sequence,
 								        )(input)
 								    }
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								    fn inline_ui<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
 								        map(
 								            many1(alt((
 								                self.partial(Self::unicode_emoji),
 								                self.partial(Self::shortcode_emoji),
 								                self.partial(Self::tag_raw_text),
 								            ))),
 								            Token::Sequence,
 								        )(input)
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								        alt((
 								            self.partial(Self::tag_bold_italic_asterisk),
 								            self.partial(Self::tag_bold_italic_underscore),
 								            self.partial(Self::tag_bold_asterisk),
 								            self.partial(Self::tag_italic_asterisk),
 								            self.partial(Self::tag_bold_underscore),
 								            self.partial(Self::tag_italic_underscore),
 								        ))(input)
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								        let (input, token) = alt((
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								            self.increase_nesting(alt((
 								                self.partial(Self::unicode_emoji),
-												Fixed link parsing

											
										
										
											2023-10-07 18:40:01 +00:00
+								                self.partial(Self::tag_block_center),
 								                self.partial(Self::tag_small),
 								                self.partial(Self::tag_plain),
 								                self.partial(Self::tag_bold),
 								                self.partial(Self::tag_italic),
 								                self.partial(Self::tag_strikethrough),
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								                self.partial(Self::url_no_embed),
 								                self.partial(Self::base_bold_italic),
 								                self.partial(Self::tag_block_code),
 								                self.partial(Self::tag_inline_code),
 								                self.partial(Self::tag_quote),
 								                self.partial(Self::tag_block_math),
 								                self.partial(Self::tag_inline_math),
 								                self.partial(Self::tag_strikethrough_tilde),
 								                self.partial(Self::tag_func),
 								                self.partial(Self::tag_mention),
 								                self.partial(Self::tag_hashtag),
 								                self.partial(Self::shortcode_emoji),
 								                self.partial(Self::link),
 								                self.partial(Self::raw_url),
 								            ))),
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								            self.partial(Self::tag_raw_text),
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								        ))(input)?;
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        Ok((input, token))
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn inline_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								        alt((
 								            self.increase_nesting(alt((
 								                self.partial(Self::unicode_emoji),
 								                self.partial(Self::tag_small),
 								                self.partial(Self::tag_plain),
 								                self.partial(Self::tag_bold),
 								                self.partial(Self::tag_italic),
 								                self.partial(Self::tag_strikethrough),
 								                self.partial(Self::url_no_embed),
 								                self.partial(Self::base_bold_italic),
 								                self.partial(Self::tag_inline_code),
 								                self.partial(Self::tag_inline_math),
 								                self.partial(Self::tag_strikethrough_tilde),
 								                self.partial(Self::tag_func),
 								                self.partial(Self::tag_mention),
 								                self.partial(Self::tag_hashtag),
 								                self.partial(Self::shortcode_emoji),
 								                self.partial(Self::link),
 								                self.partial(Self::raw_url),
 								            ))),
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								            self.partial(Self::tag_raw_text),
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								        ))(input)
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn inline_non_formatting_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
+								        let (input, token) = alt((
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								            self.increase_nesting(alt((
 								                self.partial(Self::unicode_emoji),
 								                self.partial(Self::url_no_embed),
 								                self.partial(Self::tag_inline_code),
 								                self.partial(Self::tag_inline_math),
 								                self.partial(Self::tag_func),
 								                self.partial(Self::tag_mention),
 								                self.partial(Self::tag_hashtag),
 								                self.partial(Self::shortcode_emoji),
 								                self.partial(Self::raw_url),
 								            ))),
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								            self.partial(Self::tag_raw_text),
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
+								        ))(input)?;
 								        Ok((input, token))
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								        let (input, token) = alt((
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								            self.increase_nesting(alt((
 								                self.partial(Self::unicode_emoji),
 								                self.partial(Self::tag_small),
 								                self.partial(Self::tag_plain),
 								                self.partial(Self::tag_bold),
 								                self.partial(Self::tag_italic),
 								                self.partial(Self::tag_strikethrough),
 								                self.partial(Self::base_bold_italic),
 								                self.partial(Self::tag_strikethrough_tilde),
 								                self.partial(Self::tag_func),
 								                self.partial(Self::shortcode_emoji),
 								            ))),
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								            self.partial(Self::tag_raw_text),
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								        ))(input)?;
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								        Ok((input, token))
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_quote<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        let (input, leading_spaces) = tuple((opt(line_ending), opt(line_ending)))(input)?;
 								        if let (None, None) = leading_spaces {
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								            if input.get_column() != 1 {
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								                return fail(input);
 								            }
 								        }
 								        let quote_line = |input| tuple((tag(">"), opt(space), not_line_ending))(input);
 								        let orig_input = input;
 								        let (input, lines) = separated_list1(line_ending, quote_line)(input)?;
 								        let quote_lines = lines
 								            .into_iter()
 								            .map(|(_, _, text)| text)
 								            .collect::<Vec<_>>();
 								        if quote_lines.len() == 1
 								            && quote_lines
 								                .iter()
 								                .map(Span::fragment)
 								                .copied()
 								                .any(&str::is_empty)
 								        {
 								            return fail(input);
 								        }
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
+								        let (_, inner) = spliced(&quote_lines, self.partial(Self::full), orig_input)?;
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
 								        let (input, _) = tuple((opt(line_ending), opt(line_ending)))(input)?;
 								        Ok((input, Token::Quote(Box::new(inner))))
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_block_center<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								        let tag_start = &tag("<center>");
 								        let tag_end = &tag("</center>");
 								        let (input, _) = opt(line_ending)(input)?;
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								        if input.get_column() != 1 {
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								            return fail(input);
 								        }
 								        let (input, _) = tag_start(input)?;
 								        let (input, _) = opt(line_ending)(input)?;
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								        let (input, (center_seq, _)) = many_till(
 								            self.partial(Self::inline_single),
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            tuple((opt(space1), opt(line_ending), tag_end)),
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								        )(input)?;
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
+								        Ok((
 								            input,
 								            boxing_token(Token::Center)(Token::Sequence(center_seq)),
 								        ))
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_block_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Code block parsing

											
										
										
											2023-10-05 20:25:29 +00:00
+								        let delim = &tag("```");
 								        let (input, _) = opt(line_ending)(input)?;
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								        if input.get_column() != 1 {
-												Code block parsing

											
										
										
											2023-10-05 20:25:29 +00:00
+								            return fail(input);
 								        }
 								        let (input, _) = delim(input)?;
 								        let (input, lang) = opt(map(
-												MMM: XML serialization and fixed block code parsing

											
										
										
											2023-10-25 22:30:11 +00:00
+								            recognize(many1(tuple((not(delim), not(line_ending), anychar)))),
-												Code block parsing

											
										
										
											2023-10-05 20:25:29 +00:00
+								            Span::into_fragment,
 								        ))(input)?;
 								        let (input, _) = line_ending(input)?;
 								        let (input, code) = map(
 								            recognize(many1_count(tuple((
 								                not(tuple((line_ending, delim))),
 								                anychar,
 								            )))),
 								            Span::into_fragment,
 								        )(input)?;
 								        let (input, _) = line_ending(input)?;
 								        let (input, _) = delim(input)?;
-												MMM: XML serialization and fixed block code parsing

											
										
										
											2023-10-25 22:30:11 +00:00
+								        // Trailing whitespace after the triple backtick
 								        let (input, _) = opt(space1_unicode)(input)?;
 								        // If we got this far, the next character should be a line ending
 								        let (input, _) = not(tuple((not(line_ending), anychar)))(input)?;
-												Code block parsing

											
										
										
											2023-10-05 20:25:29 +00:00
+								        let (input, _) = opt(line_ending)(input)?;
 								        Ok((
 								            input,
 								            Token::BlockCode {
 								                lang: lang.map(<&str>::into),
 								                inner: code.into(),
 								            },
 								        ))
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_block_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Minor cleanup

											
										
										
											2023-10-04 17:44:27 +00:00
+								        let start = &tag("\\[");
 								        let end = &tag("\\]");
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								        let (input, _) = opt(line_ending)(input)?;
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								        if input.get_column() != 1 {
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								            return fail(input);
 								        }
-												Minor cleanup

											
										
										
											2023-10-04 17:44:27 +00:00
+								        let (input, _) = start(input)?;
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								        let (input, _) = opt(line_ending)(input)?;
 								        let (input, math_span) = recognize(many1_count(tuple((
-												Minor cleanup

											
										
										
											2023-10-04 17:44:27 +00:00
+								            not(tuple((opt(line_ending), end))),
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								            not_line_ending,
 								        ))))(input)?;
 								        let (input, _) = opt(line_ending)(input)?;
-												Minor cleanup

											
										
										
											2023-10-04 17:44:27 +00:00
+								        let (input, _) = end(input)?;
-												MMM: XML serialization and fixed block code parsing

											
										
										
											2023-10-25 22:30:11 +00:00
+								        // Trailing whitespace after the closing delim
 								        let (input, _) = opt(space1_unicode)(input)?;
 								        // If we got this far, the next character should be a line ending
 								        let (input, _) = not(tuple((not(line_ending), anychar)))(input)?;
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								        let (input, _) = opt(line_ending)(input)?;
 								        Ok((
 								            input,
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								            Token::BlockMath(math_span.into_fragment().to_string()),
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								        ))
 								    }
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
+								    #[inline]
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								    fn tag_delimited<'a, 'b: 'a, T: Clone, S: Clone, FOpen, FClose>(
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        &'a self,
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								        opening_tag: impl Into<FlankingDelim<'b, FOpen>> + 'a,
 								        closing_tag: impl Into<FlankingDelim<'b, FClose>> + 'a,
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        escape: bool,
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								        matcher: Matcher<'a, 'b, T>,
 								        fallback: Matcher<'a, 'b, S>,
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    ) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token> + '_
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								    where
 								        FOpen: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
 								        FClose: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
 								    {
 								        let FlankingDelim(opening_tag, opening_rule, ..) = opening_tag.into();
 								        let FlankingDelim(closing_tag, closing_rule, ..) = closing_tag.into();
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        move |input| {
 								            if escape {
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								                if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), &opening_tag))(input) {
 								                    return Ok((
 								                        input_escaped,
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								                        Token::PlainText(mark.fragment().to_string().into()),
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								                    ));
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								                }
 								            }
-												MMM: Janky outer flanking rules implementation

											
										
										
											2023-10-23 22:27:54 +00:00
+								            if let FlankingRule::Strict = opening_rule {
 								                let (input, pre) =
 								                    opt(recognize(tuple((alphanumeric1_unicode, &opening_tag))))(input)?;
 								                if let Some(pre_text) = pre {
 								                    return Ok((input, Token::PlainText(pre_text.into_fragment().into())));
 								                }
 								            }
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								            let begin = input;
 								            let (post_open, _) = opening_tag(input)?;
 								            let res = tuple((
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								                many1(tuple((not(&closing_tag), &matcher.matcher_inner))),
 								                &closing_tag,
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								            ))(post_open);
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
+								            if let Err(nom::Err::Error(nom::error::Error {
 								                input: input_past_err,
 								                ..
 								            })) = res
 								            {
 								                let res_fallback = tuple((
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								                    many1(tuple((not(&closing_tag), &fallback.matcher_inner))),
 								                    &closing_tag,
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
+								                ))(post_open);
 								                if res_fallback.is_err() {
 								                    return Ok((
 								                        input_past_err,
 								                        Token::PlainText(begin.fragment_between(&input_past_err).into()),
 								                    ));
 								                }
 								                let (input, (inner, closing)) = res_fallback.unwrap();
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								                let mut inner = inner.into_iter().map(|(_, t)| t);
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								                return Ok((
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
+								                    input,
 								                    Token::Sequence(vec![
 								                        Token::PlainText(begin.fragment_between(&post_open).into()),
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								                        ((fallback.collector)(&mut inner)),
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
+								                        Token::PlainText(closing.into_fragment().into()),
 								                    ]),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								                ));
 								            }
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            let (input, (inner, closing)) = res?;
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            let mut inner = inner.into_iter().map(|(_, t)| t);
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            let inner_tok = (matcher.collector)(&mut inner);
 								            let correct_left_flanking =
 								                if let FlankingRule::Lenient | FlankingRule::Strict = opening_rule {
 								                    let text_left = inner_tok.str_content_left();
 								                    !(text_left.is_some_and(|s| s.starts_with(char::is_whitespace))
 								                        || text_left.is_none())
 								                } else {
 								                    true
 								                };
 								            let correct_right_flanking =
 								                if let FlankingRule::Lenient | FlankingRule::Strict = closing_rule {
 								                    let text_right = inner_tok.str_content_right();
 								                    !(text_right.is_some_and(|s| s.ends_with(char::is_whitespace))
 								                        || text_right.is_none())
 								                } else {
 								                    true
 								                };
-												MMM: Janky outer flanking rules implementation

											
										
										
											2023-10-23 22:27:54 +00:00
+								            let (input, alphanum) = opt(peek(alphanumeric1_unicode))(input)?;
 								            let correct_right_outer =
 								                alphanum.is_none() || !matches!(closing_rule, FlankingRule::Strict);
 								            let correct_flanking =
 								                correct_left_flanking && correct_right_flanking && correct_right_outer;
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
 								            if !correct_flanking {
 								                return Ok((
 								                    input,
 								                    Token::Sequence(vec![
 								                        Token::PlainText(begin.fragment_between(&post_open).into()),
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								                        inner_tok.inner(),
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								                        Token::PlainText(closing.into_fragment().into()),
 								                    ]),
 								                ));
 								            }
 								            Ok((input, Token::Sequence(vec![inner_tok])))
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        }
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_func<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								        let (input, _) = tag("$[")(input)?;
 								        let func_ident = |input| {
 								            recognize(tuple((
 								                many1_count(alt((alpha1, tag("_")))),
 								                many0_count(alt((alphanumeric1, tag("_")))),
 								            )))(input)
 								        };
-												MMM: XML serialization and fixed block code parsing

											
										
										
											2023-10-25 22:30:11 +00:00
+								        let arg_value = recognize(many1_count(alt((
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								            alphanumeric1,
 								            tag("."),
 								            tag("-"),
 								            tag("_"),
 								        ))));
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								        let (input, func_name) = map(func_ident, Span::into_fragment)(input)?;
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
-												MMM: XML serialization and fixed block code parsing

											
										
										
											2023-10-25 22:30:11 +00:00
+								        let arg = tuple((func_ident, opt(tuple((tag("="), arg_value)))));
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
 								        let (input, args) =
 								            opt(tuple((one_char('.'), separated_list1(one_char(','), arg))))(input)?;
 								        let args_out = args.map_or_else(HashMap::new, |(_, items)| {
 								            items
 								                .into_iter()
 								                .map(|(k, v)| {
 								                    (
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								                        k.into_fragment().to_string(),
 								                        v.map(|(_, val)| val.into_fragment().to_string()),
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								                    )
 								                })
 								                .collect::<HashMap<_, _>>()
 								        });
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								        let (input, _) = opt(space)(input)?;
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								        let (input, (inner, _)) = many_till(self.partial(Self::inline_single), tag("]"))(input)?;
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
 								        Ok((
 								            input,
 								            Token::Function {
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								                name: func_name.to_string(),
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								                params: args_out,
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								                inner: Box::new(Token::Sequence(inner)),
-												Implemented MFM functions and math and center blocks

											
										
										
											2023-10-04 17:31:03 +00:00
+								            },
 								        ))
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_plain<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								        let opening_tag = &tag("<plain>");
 								        let closing_tag = &tag("</plain>");
-												Plain tag parsing

											
										
										
											2023-10-05 20:12:51 +00:00
 								        let (input, _) = opening_tag(input)?;
 								        let (input, text) = map(
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            recognize(many1(tuple((not(line_ending), not(closing_tag), anychar)))),
-												Plain tag parsing

											
										
										
											2023-10-05 20:12:51 +00:00
+								            Span::into_fragment,
 								        )(input)?;
 								        let (input, _) = closing_tag(input)?;
 								        Ok((input, Token::PlainTag(text.into())))
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        self.tag_delimited(
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            tag_no_case("<small>"),
 								            tag_no_case("</small>"),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								            false,
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Matcher::new(
 								                &self.partial(Self::inline_single),
 								                &collect_sequence(Token::Sequence, boxing_token(Token::Small)),
 								            ),
 								            Matcher::new(
 								                &self.partial(Self::inline_non_formatting_single),
 								                &collect_sequence(Token::Sequence, identity),
 								            ),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        )(input)
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        self.tag_delimited(
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            (tag("***"), FlankingRule::Lenient),
 								            (tag("***"), FlankingRule::Lenient),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								            true,
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Matcher::new(
 								                &self.partial(Self::inline_single),
 								                &collect_sequence(Token::Sequence, boxing_token(Token::BoldItalic)),
 								            ),
 								            Matcher::new(
 								                &self.partial(Self::inline_non_formatting_single),
 								                &collect_sequence(Token::Sequence, identity),
 								            ),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        )(input)
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        self.tag_delimited(
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            (tag("___"), FlankingRule::Strict),
 								            (tag("___"), FlankingRule::Strict),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								            true,
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Matcher::new(
 								                &self.partial(Self::inline_single),
 								                &collect_sequence(Token::Sequence, boxing_token(Token::BoldItalic)),
 								            ),
 								            Matcher::new(
 								                &self.partial(Self::inline_non_formatting_single),
 								                &collect_sequence(Token::Sequence, identity),
 								            ),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        )(input)
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        self.tag_delimited(
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            tag_no_case("<b>"),
 								            tag_no_case("</b>"),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								            false,
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Matcher::new(
 								                &self.partial(Self::inline_single),
 								                &collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
 								            ),
 								            Matcher::new(
 								                &self.partial(Self::inline_non_formatting_single),
 								                &collect_sequence(Token::Sequence, identity),
 								            ),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        )(input)
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        self.tag_delimited(
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            (tag("**"), FlankingRule::Lenient),
 								            (tag("**"), FlankingRule::Lenient),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								            true,
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Matcher::new(
 								                &self.partial(Self::inline_single),
 								                &collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
 								            ),
 								            Matcher::new(
 								                &self.partial(Self::inline_non_formatting_single),
 								                &collect_sequence(Token::Sequence, identity),
 								            ),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        )(input)
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        self.tag_delimited(
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            (tag("__"), FlankingRule::Strict),
 								            (tag("__"), FlankingRule::Strict),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								            true,
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Matcher::new(
 								                &self.partial(Self::inline_single),
 								                &collect_sequence(Token::Sequence, boxing_token(Token::Bold)),
 								            ),
 								            Matcher::new(
 								                &self.partial(Self::inline_non_formatting_single),
 								                &collect_sequence(Token::Sequence, identity),
 								            ),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        )(input)
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        self.tag_delimited(
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            tag_no_case("<i>"),
 								            tag_no_case("</i>"),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								            false,
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Matcher::new(
 								                &self.partial(Self::inline_single),
 								                &collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
 								            ),
 								            Matcher::new(
 								                &self.partial(Self::inline_non_formatting_single),
 								                &collect_sequence(Token::Sequence, identity),
 								            ),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        )(input)
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        self.tag_delimited(
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            (tag("*"), FlankingRule::Lenient),
 								            (tag("*"), FlankingRule::Lenient),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								            true,
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Matcher::new(
 								                &self.partial(Self::inline_single),
 								                &collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
 								            ),
 								            Matcher::new(
 								                &self.partial(Self::inline_non_formatting_single),
 								                &collect_sequence(Token::Sequence, identity),
 								            ),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        )(input)
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        self.tag_delimited(
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            (tag("_"), FlankingRule::Strict),
 								            (tag("_"), FlankingRule::Strict),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								            true,
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Matcher::new(
 								                &self.partial(Self::inline_single),
 								                &collect_sequence(Token::Sequence, boxing_token(Token::Italic)),
 								            ),
 								            Matcher::new(
 								                &self.partial(Self::inline_non_formatting_single),
 								                &collect_sequence(Token::Sequence, identity),
 								            ),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        )(input)
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        self.tag_delimited(
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            tag_no_case("<s>"),
 								            tag_no_case("</s>"),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								            false,
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Matcher::new(
 								                &self.partial(Self::inline_single),
 								                &collect_sequence(Token::Sequence, boxing_token(Token::Strikethrough)),
 								            ),
 								            Matcher::new(
 								                &self.partial(Self::inline_non_formatting_single),
 								                &collect_sequence(Token::Sequence, identity),
 								            ),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        )(input)
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        self.tag_delimited(
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            (tag("~~"), FlankingRule::Lenient),
 								            (tag("~~"), FlankingRule::Lenient),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								            true,
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Matcher::new(
 								                &move |input| {
 								                    map(
 								                        tuple(((not(line_ending)), self.partial(Self::inline_single))),
 								                        |(_, captured)| captured,
 								                    )(input)
 								                },
 								                &collect_sequence(Token::Sequence, boxing_token(Token::Strikethrough)),
 								            ),
 								            Matcher::new(
 								                &move |input| {
 								                    map(
 								                        tuple((
 								                            (not(line_ending)),
 								                            self.partial(Self::inline_non_formatting_single),
 								                        )),
 								                        |(_, captured)| captured,
 								                    )(input)
 								                },
 								                &collect_sequence(Token::Sequence, identity),
 								            ),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        )(input)
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_inline_code<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        self.tag_delimited(
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            tag("`"),
 								            |input| alt((tag("`"), tag("´")))(input),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								            true,
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Matcher::new(
 								                &move |input| {
 								                    map(
 								                        tuple((not(alt((tag("`"), tag("´"), line_ending))), anychar)),
 								                        |(_, captured)| captured,
 								                    )(input)
 								                },
 								                &collect_char_sequence(Token::InlineCode),
 								            ),
 								            Matcher::reject(),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        )(input)
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_inline_math<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        self.tag_delimited(
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            tag("\\("),
 								            tag("\\)"),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								            false,
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Matcher::new(
 								                &move |input| {
 								                    map(tuple((not(line_ending), anychar)), |(_, captured)| captured)(input)
 								                },
 								                &collect_char_sequence(Token::InlineMath),
 								            ),
 								            Matcher::reject(),
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								        )(input)
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_raw_text<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
 								        let (input, text) = anychar(input)?;
 								        Ok((input, Token::PlainText(text.to_compact_string())))
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								    }
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn raw_url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								        let (input, url_span) = recognize(tuple((
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								            self.partial_span(Self::protocol),
 								            self.url_chars(
 								                |input| recognize(not(self.partial_span(Self::url_chars_base)))(input),
 								                false,
 								            ),
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								        )))(input)?;
 								        let url = url_span.into_fragment();
 								        let url_bytes = url.as_bytes();
 								        // Strip punctuation at the end of sentences that might have been consumed as a part of the URL
 								        let final_url = if matches!(url_bytes.last(), Some(b'.' | b',' | b'?')) {
 								            url.slice(..url.len() - 1)
 								        } else {
 								            url
 								        };
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								        Ok((input, Token::UrlRaw(final_url.to_string())))
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn url_no_embed<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								        let (input, _) = tag("<")(input)?;
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								        let (input, url_span) = recognize(tuple((
 								            self.partial_span(Self::protocol),
 								            self.url_chars(tag(">"), true),
 								        )))(input)?;
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								        let (input, _) = tag(">")(input)?;
-												Stricter URL parsing

											
										
										
											2023-10-07 19:22:21 +00:00
+								        Ok((
 								            input,
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								            Token::UrlNoEmbed(url_span.into_fragment().to_string()),
-												Stricter URL parsing

											
										
										
											2023-10-07 19:22:21 +00:00
+								        ))
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								        let (input, no_embed) = opt(tag("?"))(input)?;
 								        let (input, _) = tag("[")(input)?;
 								        let (input, _) = not(tag("["))(input)?;
-												Fixed link parsing

											
										
										
											2023-10-07 18:40:01 +00:00
+								        let (input, (label_tok, _)) =
 								            many_till(self.partial(Self::inline_label_safe_single), tag("]("))(input)?;
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								        let (input, url_span) = recognize(tuple((
 								            self.partial_span(Self::protocol),
 								            self.url_chars(tag(")"), true),
 								        )))(input)?;
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								        let (input, _) = tag(")")(input)?;
 								        Ok((
 								            input,
 								            Token::Link {
-												Fixed link parsing

											
										
										
											2023-10-07 18:40:01 +00:00
+								                label: Box::new(Token::Sequence(label_tok)),
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								                href: url_span.into_fragment().into(),
 								                embed: no_embed.is_none(),
 								            },
 								        ))
 								    }
-												Mention parsing

											
										
										
											2023-10-05 18:05:03 +00:00
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn unicode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Emoji parsing

											
										
										
											2023-10-05 19:21:23 +00:00
+								        let frag = input.fragment();
 								        let Some(grapheme) = frag.graphemes(true).next() else {
 								            return fail(input);
 								        };
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								        let grapheme = grapheme.trim_end_matches(|c| c == '\u{200c}' || c == '\u{200d}');
-												Emoji parsing

											
										
										
											2023-10-05 19:21:23 +00:00
+								        let emoji = emojis::get(grapheme);
 								        if emoji.is_none() {
 								            return fail(input);
 								        }
 								        Ok((
 								            input.slice(grapheme.len()..),
 								            Token::UnicodeEmoji(grapheme.into()),
 								        ))
 								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn shortcode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								        if let (plain_out, Some(plain)) = map(
 								            opt(recognize(tuple((
 								                alphanumeric1_unicode,
 								                self.partial(Self::shortcode_emoji),
 								            )))),
 								            |o| o.map(Span::into_fragment),
 								        )(input)?
 								        {
 								            return Ok((plain_out, Token::PlainText(plain.into())));
 								        }
-												Shortcode emoji parsing

											
										
										
											2023-10-05 20:32:53 +00:00
+								        let (input, _) = tag(":")(input)?;
 								        let (input, shortcode) = map(
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            recognize(many1(alt((
 								                alphanumeric1_unicode,
 								                recognize(one_of("_+-")),
 								            )))),
-												Shortcode emoji parsing

											
										
										
											2023-10-05 20:32:53 +00:00
+								            Span::into_fragment,
 								        )(input)?;
-												User fetching with reactions and a user by tag endpoint

											
										
										
											2023-10-30 22:00:46 +00:00
+								        let (input, host) = opt(map(
 								            tuple((
 								                tag("@"),
 								                map(
 								                    recognize(many1(alt((alphanumeric1, recognize(one_of("-.")))))),
 								                    Span::into_fragment,
 								                ),
 								            )),
 								            |(_at, host)| host,
 								        ))(input)?;
-												Shortcode emoji parsing

											
										
										
											2023-10-05 20:32:53 +00:00
+								        let (input, _) = tag(":")(input)?;
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								        let (input, _) = not(alphanumeric1_unicode)(input)?;
-												Shortcode emoji parsing

											
										
										
											2023-10-05 20:32:53 +00:00
-												User fetching with reactions and a user by tag endpoint

											
										
										
											2023-10-30 22:00:46 +00:00
+								        Ok((
 								            input,
 								            Token::ShortcodeEmoji {
 								                shortcode: shortcode.into(),
 								                host: host.map(str::to_string),
 								            },
 								        ))
-												Shortcode emoji parsing

											
										
										
											2023-10-05 20:32:53 +00:00
+								    }
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								        if let (plain_out, Some(plain)) = map(
 								            opt(recognize(tuple((
 								                alt((tag("\\"), alphanumeric1_unicode)),
 								                self.partial(Self::tag_mention),
 								            )))),
 								            |o| o.map(Span::into_fragment),
 								        )(input)?
 								        {
 								            return Ok((plain_out, Token::PlainText(plain.into())));
 								        }
-												Mention parsing

											
										
										
											2023-10-05 18:05:03 +00:00
 								        let tags = one_of("@!");
 								        let (input, mention_type) = map(tags, |c| match c {
 								            '@' => MentionType::User,
 								            '!' => MentionType::Community,
 								            _ => unreachable!(),
 								        })(input)?;
 								        let (input, name) = map(
 								            recognize(many1(alt((alphanumeric1, recognize(one_of("-_")))))),
 								            Span::into_fragment,
 								        )(input)?;
-												Fixed link parsing

											
										
										
											2023-10-07 18:40:01 +00:00
+								        let before = input;
-												MMM: Matrix handle parsing

											
										
										
											2023-10-26 19:23:59 +00:00
+								        let (_, host_opt) = opt(tuple((
 								            one_of(if matches!(mention_type, MentionType::User) {
 								                "@:"
 								            } else {
 								                "@"
 								            }),
 								            map(
 								                recognize(many1(alt((alphanumeric1, recognize(one_of("-_.")))))),
 								                Span::into_fragment,
 								            ),
 								        )))(input)?;
-												Mention parsing

											
										
										
											2023-10-05 18:05:03 +00:00
-												MMM: Matrix handle parsing

											
										
										
											2023-10-26 19:23:59 +00:00
+								        // Promote tags with a colon separator to Matrix handles
 								        let mention_type = if let Some((':', _)) = host_opt {
 								            MentionType::MatrixUser
 								        } else {
 								            mention_type
 								        };
 								        let host =
 								            host_opt.map(|(_, name)| name.trim_end_matches(|c| matches!(c, '.' | '-' | '_')));
-												MMM: Profile field parsing and skipping Matrix handles

											
										
										
											2023-10-26 19:08:51 +00:00
+								        let input = host.map(|c| before.slice(c.len() + 1..)).unwrap_or(before);
-												Mention parsing

											
										
										
											2023-10-05 18:05:03 +00:00
+								        Ok((
-												MMM: Profile field parsing and skipping Matrix handles

											
										
										
											2023-10-26 19:08:51 +00:00
+								            input,
-												Mention parsing

											
										
										
											2023-10-05 18:05:03 +00:00
+								            Token::Mention {
 								                mention_type,
 								                name: name.into(),
 								                host: host.map(|h| h.into()),
 								            },
 								        ))
 								    }
-												Hashtag parsing

											
										
										
											2023-10-05 20:02:46 +00:00
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								    fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
-												MMM: Fixed hashtag parsing

											
										
										
											2023-10-23 21:52:02 +00:00
+								        let (input, maybe_preceded) =
 								            opt(recognize(tuple((alphanumeric1_unicode, tag("#")))))(input)?;
 								        if let Some(preceded) = maybe_preceded {
 								            return Ok((input, Token::PlainText(preceded.into_fragment().into())));
 								        }
-												Hashtag parsing

											
										
										
											2023-10-05 20:02:46 +00:00
 								        let (input, _) = tag("#")(input)?;
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								        let (input, hashtag_text) = map(
 								            recognize(many1(self.partial_span(Self::hashtag_chars))),
 								            Span::into_fragment,
 								        )(input)?;
-												Hashtag parsing

											
										
										
											2023-10-05 20:02:46 +00:00
 								        Ok((input, Token::Hashtag(hashtag_text.into())))
 								    }
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								    #[inline]
 								    fn increase_nesting<'a, 'b, O, F>(
 								        &'b self,
 								        mut func: F,
 								    ) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, O> + 'b
 								    where
 								        F: Parser<Span<'a>, O, nom::error::Error<Span<'a>>> + 'b,
 								    {
 								        move |mut input| {
 								            if input.extra.depth >= self.depth_limit {
 								                return fail(input);
 								            }
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								            input.extra.depth += 1;
 								            func.parse(input)
 								        }
 								    }
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								    #[inline]
 								    fn hashtag_chars<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
 								        recognize(alt((
 								            recognize(tuple((
 								                tag("("),
 								                self.increase_nesting(self.partial_span(Self::hashtag_chars)),
 								                tag(")"),
 								            ))),
 								            recognize(tuple((
 								                tag("["),
 								                self.increase_nesting(self.partial_span(Self::hashtag_chars)),
 								                tag("]"),
 								            ))),
 								            recognize(tuple((
 								                tag("「"),
 								                self.increase_nesting(self.partial_span(Self::hashtag_chars)),
 								                tag("」"),
 								            ))),
 								            recognize(tuple((
 								                tag("（"),
 								                self.increase_nesting(self.partial_span(Self::hashtag_chars)),
 								                tag("）"),
 								            ))),
 								            recognize(tuple((
-												MMM: Fixed hashtag parsing

											
										
										
											2023-10-23 21:52:02 +00:00
+								                not(space1_unicode),
 								                not(line_ending),
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								                not(one_of(".,:;!?#?/[]【】()「」（）<>")),
 								                anychar,
 								            ))),
 								        )))(input)
 								    }
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								    #[inline]
 								    fn protocol<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
 								        alt((tag("https://"), tag("http://")))(input)
 								    }
 								    #[inline]
 								    fn url_chars_base<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
 								        alt((
 								            alphanumeric1_unicode,
 								            recognize(tuple((
 								                tag("["),
 								                many_till(
 								                    self.increase_nesting(self.partial_span(Self::url_chars_base)),
 								                    tag("]"),
 								                ),
 								            ))),
 								            recognize(tuple((
 								                tag("("),
 								                many_till(
 								                    self.increase_nesting(self.partial_span(Self::url_chars_base)),
 								                    tag(")"),
 								                ),
 								            ))),
 								            recognize(one_of(".,_/:%#$&?!~=+-@")),
 								        ))(input)
 								    }
 								    #[inline]
 								    fn url_chars<'a, 'b, F>(
 								        &'b self,
 								        mut terminator: F,
 								        spaces: bool,
 								    ) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, Span<'a>> + 'b
 								    where
 								        F: Parser<Span<'a>, Span<'a>, nom::error::Error<Span<'a>>> + 'b,
 								    {
 								        move |input| {
 								            recognize(many1_count(tuple((
 								                not(tuple((space1, eof))),
 								                not(tuple((space1, tag("\"")))),
 								                not(tuple((opt(space1), |input| terminator.parse(input)))),
 								                alt((
 								                    |input| self.url_chars_base(input),
 								                    if spaces { space1 } else { fail },
 								                )),
 								            ))))(input)
 								        }
 								    }
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								}
 								#[cfg(test)]
 								mod test {
-												MMM: XML serialization and fixed block code parsing

											
										
										
											2023-10-25 22:30:11 +00:00
+								    use crate::{to_xml_string, Context, Span, SpanMeta, Token, DEFAULT_DEPTH_LIMIT};
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								    use nom::bytes::complete::tag;
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								    use std::collections::HashMap;
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								    fn parse_full(string: &str) -> Token {
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								        Context::default()
 								            .full(Span::new_extra(string, SpanMeta::default()))
 								            .unwrap()
 								            .1
 								            .merged()
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								    }
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								    #[test]
 								    fn parse_url_chars() {
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								        let ctx = Context::default();
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								        assert_eq!(
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								            ctx.url_chars(tag(")"), true)(Span::new_extra(
 								                "https://en.wikipedia.org/wiki/Sandbox_(computer_security))",
 								                SpanMeta::default()
-												Stricter URL parsing

											
										
										
											2023-10-07 19:22:21 +00:00
+								            ))
 								            .unwrap()
 								            .1
 								            .into_fragment(),
 								            "https://en.wikipedia.org/wiki/Sandbox_(computer_security)"
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								        );
 								        assert_eq!(
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								            ctx.url_chars(tag(")"), true)(Span::new_extra(
 								                "https://en.wikipedia.org/wiki/Sandbox_(computer_security)))",
 								                SpanMeta::default()
-												Stricter URL parsing

											
										
										
											2023-10-07 19:22:21 +00:00
+								            ))
 								            .unwrap()
 								            .1
 								            .into_fragment(),
 								            "https://en.wikipedia.org/wiki/Sandbox_(computer_security)",
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								        );
 								        assert_eq!(
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								            ctx.url_chars(tag(")"), true)(Span::new_extra(
 								                "https://cs.wikipedia.org/wiki/Among_Us  ",
 								                SpanMeta::default()
 								            ))
 								            .unwrap()
 								            .1
 								            .into_fragment(),
-												Stricter URL parsing

											
										
										
											2023-10-07 19:22:21 +00:00
+								            "https://cs.wikipedia.org/wiki/Among_Us",
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								        );
 								        assert_eq!(
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								            ctx.url_chars(tag(")"), true)(Span::new_extra(
 								                "https://cs.wikipedia.org/wiki/Among Us  )",
 								                SpanMeta::default()
 								            ))
 								            .unwrap()
 								            .1
 								            .into_fragment(),
-												Stricter URL parsing

											
										
										
											2023-10-07 19:22:21 +00:00
+								            "https://cs.wikipedia.org/wiki/Among Us"
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								        );
 								        assert_eq!(
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								            ctx.url_chars(tag(")"), false)(Span::new_extra(
 								                "https://en.wikipedia.org/wiki/Among Us  )",
 								                SpanMeta::default()
 								            ))
 								            .unwrap()
 								            .1
 								            .into_fragment(),
-												Stricter URL parsing

											
										
										
											2023-10-07 19:22:21 +00:00
+								            "https://en.wikipedia.org/wiki/Among"
-												Implemented URL parsing

											
										
										
											2023-10-05 17:09:26 +00:00
+								        );
 								    }
-												Emoji parsing

											
										
										
											2023-10-05 19:21:23 +00:00
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								    #[test]
 								    fn parse_formatting() {
 								        assert_eq!(
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            parse_full(r#"~~stikethrough~~"#),
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Token::Strikethrough(Box::new(Token::PlainText("stikethrough".into()))),
 								        );
 								        assert_eq!(
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            parse_full(r#"**bold**"#),
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Token::Bold(Box::new(Token::PlainText("bold".into()))),
 								        );
 								        assert_eq!(
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            parse_full(r#"*italic*"#),
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Token::Italic(Box::new(Token::PlainText("italic".into()))),
 								        );
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								        assert_eq!(
 								            parse_full(r#"* italic *"#),
 								            Token::PlainText("* italic *".into())
 								        );
-												MMM: Janky outer flanking rules implementation

											
										
										
											2023-10-23 22:27:54 +00:00
+								        assert_eq!(
 								            parse_full("snake_case_variable"),
 								            Token::PlainText("snake_case_variable".into())
 								        );
 								        assert_eq!(
 								            parse_full("intra*word*italic"),
 								            Token::Sequence(vec![
 								                Token::PlainText("intra".into()),
 								                Token::Italic(Box::new(Token::PlainText("word".into()))),
 								                Token::PlainText("italic".into())
 								            ])
 								        );
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								        assert_eq!(
 								            parse_full(r#"_ italic *"#),
 								            Token::PlainText("_ italic *".into())
 								        );
 								        assert_eq!(
 								            parse_full(r#"*"italic"*"#),
 								            Token::Italic(Box::new(Token::PlainText("\"italic\"".into())))
 								        );
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								        assert_eq!(
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            parse_full(r#"not code `code` also not code"#),
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Token::Sequence(vec![
 								                Token::PlainText("not code ".into()),
 								                Token::InlineCode("code".into()),
 								                Token::PlainText(" also not code".into())
 								            ]),
 								        );
 								        assert_eq!(
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            parse_full(r#"not code `code` also `not code"#),
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Token::Sequence(vec![
 								                Token::PlainText("not code ".into()),
 								                Token::InlineCode("code".into()),
 								                Token::PlainText(" also `not code".into())
 								            ]),
 								        );
 								        assert_eq!(
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            parse_full(r#"not code `*not bold*` also not code"#),
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Token::Sequence(vec![
 								                Token::PlainText("not code ".into()),
 								                Token::InlineCode("*not bold*".into()),
 								                Token::PlainText(" also not code".into())
 								            ]),
 								        );
 								        assert_eq!(
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            parse_full(r#"***bold italic***"#),
 								            Token::BoldItalic(Box::new(Token::PlainText("bold italic".into())))
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								        );
 								        assert_eq!(
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            parse_full(r#"<b><i>bold italic</i></b>"#),
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Token::Bold(Box::new(Token::Italic(Box::new(Token::PlainText(
 								                "bold italic".into()
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            )))))
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								        );
-												MMM: Coverage and naming fix

											
										
										
											2023-10-26 16:38:45 +00:00
 								        assert_eq!(
 								            parse_full("~~*hello\nworld*"),
 								            Token::PlainText("~~*hello\nworld*".into())
 								        )
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								    }
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								    #[test]
 								    fn parse_complex() {
-												MMM: XML serialization and fixed block code parsing

											
										
										
											2023-10-25 22:30:11 +00:00
+								        assert_eq!(
 								            parse_full(r"\( nya^3 \)"),
 								            Token::InlineMath(" nya^3 ".to_string())
 								        );
 								        assert_eq!(
 								            parse_full("\\( nya^3 \n \\)"),
 								            Token::PlainText("\\( nya^3 \n \\)".into())
 								        );
 								        assert_eq!(
 								            parse_full(r"`AbstractProxyFactoryBean`"),
 								            Token::InlineCode("AbstractProxyFactoryBean".to_string())
 								        );
 								        assert_eq!(
 								            parse_full("`let x = \n 5;`"),
 								            Token::PlainText("`let x = \n 5;`".into())
 								        );
 								        assert_eq!(
 								            parse_full(
 								                r#"
 								```js
 								var x = undefined;
 								```"#
 								            ),
 								            Token::BlockCode {
 								                lang: Some("js".to_string()),
 								                inner: "var x = undefined;".to_string(),
 								            }
 								        );
 								        assert_eq!(
 								            parse_full(
 								                r"
 								\[
 								a^2 + b^2 = c^2
 								\]"
 								            ),
 								            Token::BlockMath("a^2 + b^2 = c^2".to_string())
 								        );
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								        assert_eq!(
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            parse_full(
 								                r#"<center>centered
 								🦋🏳️‍⚧️
 								text</center>"#
 								            ),
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Token::Center(Box::new(Token::Sequence(vec![
 								                Token::PlainText("centered\n".into()),
 								                Token::UnicodeEmoji("🦋".into()),
 								                Token::UnicodeEmoji("🏳️‍⚧️".into()),
 								                Token::PlainText("\ntext".into())
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            ])))
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								        );
 								        assert_eq!(
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            parse_full(
 								                r#"> <center>centered
 								> 👩🏽‍🤝‍👩🏼
 								> text</center>"#
 								            ),
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Token::Quote(Box::new(Token::Center(Box::new(Token::Sequence(vec![
 								                Token::PlainText("centered\n".into()),
 								                Token::UnicodeEmoji("👩🏽‍🤝‍👩🏼".into()),
 								                Token::PlainText("\ntext".into())
 								            ]))))),
 								        );
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								        assert_eq!(
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            parse_full(r#"$[x2 $[sparkle 🥺]💜$[spin.y,speed=5s ❤️]🦊]"#),
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								            Token::Function {
 								                name: "x2".into(),
 								                params: HashMap::new(),
 								                inner: Box::new(Token::Sequence(vec![
 								                    Token::Function {
 								                        name: "sparkle".into(),
 								                        params: HashMap::new(),
 								                        inner: Box::new(Token::UnicodeEmoji("🥺".into())),
 								                    },
 								                    Token::UnicodeEmoji("💜".into()),
 								                    Token::Function {
 								                        name: "spin".into(),
 								                        params: {
 								                            let mut params = HashMap::new();
 								                            params.insert("y".into(), None);
 								                            params.insert("speed".into(), Some("5s".into()));
 								                            params
 								                        },
 								                        inner: Box::new(Token::UnicodeEmoji("❤️".into())),
 								                    },
 								                    Token::UnicodeEmoji("🦊".into()),
 								                ]))
 								            },
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
+								        );
 								        assert_eq!(
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            parse_full(r#"<b>bold @tag1 <i> @tag2 </b>italic</i>"#),
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
+								            Token::Sequence(vec![
 								                Token::PlainText("<b>bold ".into()),
 								                Token::Mention {
 								                    mention_type: crate::MentionType::User,
 								                    name: "tag1".into(),
 								                    host: None
 								                },
 								                Token::PlainText(" <i> ".into()),
 								                Token::Mention {
 								                    mention_type: crate::MentionType::User,
 								                    name: "tag2".into(),
 								                    host: None
 								                },
 								                Token::PlainText(" </b>italic</i>".into())
 								            ]),
 								        );
 								        assert_eq!(
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            parse_full(
 								                r#"
 								> test
 								> <i>
 								> italic
 								> </i>
 								>> Nested quote
 								"#
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            ),
 								            Token::Quote(Box::new(Token::Sequence(vec![
 								                Token::PlainText("test\n".into()),
 								                Token::Italic(Box::new(Token::PlainText("\nitalic\n".into()))),
 								                Token::Quote(Box::new(Token::PlainText("Nested quote".into())))
 								            ]))),
-												Janky sequence unnesting and attempting to salvage nested parsing in incorrect formatting tags

											
										
										
											2023-10-06 23:46:20 +00:00
+								        );
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								    }
-												Fixed link parsing

											
										
										
											2023-10-07 18:40:01 +00:00
+								    #[test]
 								    fn parse_link() {
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								        assert_eq!(
 								            parse_full("IPv4 test: <https://0>"),
 								            Token::Sequence(vec![
 								                Token::PlainText("IPv4 test: ".into()),
 								                Token::UrlNoEmbed("https://0".into())
 								            ])
 								        );
 								        assert_eq!(
 								            parse_full("IPv4 test: <https://127.0.0.1>"),
 								            Token::Sequence(vec![
 								                Token::PlainText("IPv4 test: ".into()),
 								                Token::UrlNoEmbed("https://127.0.0.1".into())
 								            ])
 								        );
 								        assert_eq!(
 								            parse_full("IPv6 test: <https://[::2f:1]/nya>"),
 								            Token::Sequence(vec![
 								                Token::PlainText("IPv6 test: ".into()),
 								                Token::UrlNoEmbed("https://[::2f:1]/nya".into())
 								            ])
 								        );
 								        assert_eq!(
 								            parse_full("IPv6 test: https://[::2f:1]/nya"),
 								            Token::Sequence(vec![
 								                Token::PlainText("IPv6 test: ".into()),
 								                Token::UrlRaw("https://[::2f:1]/nya".into())
 								            ])
 								        );
 								        // IDNs
 								        assert_eq!(
 								            parse_full("IDN test: https://www.háčkyčárky.cz/"),
 								            Token::Sequence(vec![
 								                Token::PlainText("IDN test: ".into()),
 								                Token::UrlRaw("https://www.háčkyčárky.cz/".into())
 								            ])
 								        );
-												Fixed link parsing

											
										
										
											2023-10-07 18:40:01 +00:00
+								        assert_eq!(
 								            parse_full("Link test: [label](https://example.com)"),
 								            Token::Sequence(vec![
 								                Token::PlainText("Link test: ".into()),
 								                Token::Link {
 								                    label: Box::new(Token::PlainText("label".into())),
 								                    href: "https://example.com".into(),
 								                    embed: true
 								                }
 								            ])
 								        );
-												MMM: Fixed hashtag parsing

											
										
										
											2023-10-23 21:52:02 +00:00
+								        assert_eq!(
 								            parse_full("test #hashtag tail"),
 								            Token::Sequence(vec![
 								                Token::PlainText("test ".into()),
 								                Token::Hashtag("hashtag".into()),
 								                Token::PlainText(" tail".into())
 								            ])
 								        );
 								        assert_eq!(
 								            parse_full("not#hashtag tail"),
 								            Token::PlainText("not#hashtag tail".into())
 								        );
-												Stricter URL parsing

											
										
										
											2023-10-07 19:22:21 +00:00
+								        assert_eq!(
 								            parse_full("<https://example.com>"),
 								            Token::UrlNoEmbed("https://example.com".into())
 								        );
 								        // Adjacent links okay
 								        assert_eq!(
 								            parse_full("<https://example.com/><https://awawa.gay/>"),
 								            Token::Sequence(vec![
 								                Token::UrlNoEmbed("https://example.com/".into()),
 								                Token::UrlNoEmbed("https://awawa.gay/".into())
 								            ])
 								        );
-												Fixed link parsing

											
										
										
											2023-10-07 18:40:01 +00:00
+								        assert_eq!(
 								            parse_full("Link test: ?[label](https://awawa.gay)"),
 								            Token::Sequence(vec![
 								                Token::PlainText("Link test: ".into()),
 								                Token::Link {
 								                    label: Box::new(Token::PlainText("label".into())),
 								                    href: "https://awawa.gay".into(),
 								                    embed: false
 								                }
 								            ])
 								        );
-												Stricter URL parsing

											
										
										
											2023-10-07 19:22:21 +00:00
+								        assert_eq!(
 								            parse_full("Link test: ?[label](https://awawa.gay)test"),
 								            Token::Sequence(vec![
 								                Token::PlainText("Link test: ".into()),
 								                Token::Link {
 								                    label: Box::new(Token::PlainText("label".into())),
 								                    href: "https://awawa.gay".into(),
 								                    embed: false
 								                },
 								                Token::PlainText("test".into())
 								            ])
 								        );
 								        assert_eq!(
 								            parse_full("Link test: (?[label](https://awawa.gay))"),
 								            Token::Sequence(vec![
 								                Token::PlainText("Link test: (".into()),
 								                Token::Link {
 								                    label: Box::new(Token::PlainText("label".into())),
 								                    href: "https://awawa.gay".into(),
 								                    embed: false
 								                },
 								                Token::PlainText(")".into())
 								            ])
 								        );
-												Fixed link parsing

											
										
										
											2023-10-07 18:40:01 +00:00
+								        assert_eq!(
 								            parse_full("Link test: ?[label](https://awawa.gay"), // Missing closing bracket
 								            Token::Sequence(vec![
 								                Token::PlainText("Link test: ?[label](".into()),
 								                Token::UrlRaw("https://awawa.gay".into()),
 								            ])
 								        );
 								    }
-												MMM: Nesting-limited parsing

											
										
										
											2023-10-16 21:45:45 +00:00
+								    #[test]
 								    fn limit_nesting() {
 								        let mut tok = Token::PlainText(" <s><i>test</i></s> ".into());
 								        for _ in 0..DEFAULT_DEPTH_LIMIT {
 								            tok = Token::Bold(Box::new(tok));
 								        }
 								        assert_eq!(
 								            parse_full(
 								                &("<b>".repeat(DEFAULT_DEPTH_LIMIT)
 								                    + " <s><i>test</i></s> "
 								                    + &*"</b>".repeat(DEFAULT_DEPTH_LIMIT))
 								            ),
 								            tok
 								        );
 								    }
-												Fixed link parsing

											
										
										
											2023-10-07 18:40:01 +00:00
+								    #[test]
 								    fn parse_mention() {
 								        assert_eq!(
 								            parse_full("@tag"),
 								            Token::Mention {
 								                mention_type: crate::MentionType::User,
 								                name: "tag".into(),
 								                host: None
 								            }
 								        );
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								        assert_eq!(
 								            parse_full("email@notactuallyamenmtion.org"),
 								            Token::PlainText("email@notactuallyamenmtion.org".into())
 								        );
-												Fixed link parsing

											
										
										
											2023-10-07 18:40:01 +00:00
+								        assert_eq!(
 								            parse_full("hgsjlkdsa @tag fgahjsdkd"),
 								            Token::Sequence(vec![
 								                Token::PlainText("hgsjlkdsa ".into()),
 								                Token::Mention {
 								                    mention_type: crate::MentionType::User,
 								                    name: "tag".into(),
 								                    host: None
 								                },
 								                Token::PlainText(" fgahjsdkd".into())
 								            ])
 								        );
 								        assert_eq!(
 								            parse_full("hgsjlkdsa @tag@ fgahjsdkd"),
 								            Token::Sequence(vec![
 								                Token::PlainText("hgsjlkdsa ".into()),
 								                Token::Mention {
 								                    mention_type: crate::MentionType::User,
 								                    name: "tag".into(),
 								                    host: None
 								                },
 								                Token::PlainText("@ fgahjsdkd".into())
 								            ])
 								        );
 								        assert_eq!(
 								            parse_full("aaaa @tag@domain bbbbb"),
 								            Token::Sequence(vec![
 								                Token::PlainText("aaaa ".into()),
 								                Token::Mention {
 								                    mention_type: crate::MentionType::User,
 								                    name: "tag".into(),
 								                    host: Some("domain".into())
 								                },
 								                Token::PlainText(" bbbbb".into())
 								            ])
 								        );
 								        assert_eq!(
 								            parse_full("test @tag@domain, test"),
 								            Token::Sequence(vec![
 								                Token::PlainText("test ".into()),
 								                Token::Mention {
 								                    mention_type: crate::MentionType::User,
 								                    name: "tag".into(),
 								                    host: Some("domain".into())
 								                },
 								                Token::PlainText(", test".into())
 								            ])
 								        );
 								        assert_eq!(
 								            parse_full("test @tag@domain.gay. test"),
 								            Token::Sequence(vec![
 								                Token::PlainText("test ".into()),
 								                Token::Mention {
 								                    mention_type: crate::MentionType::User,
 								                    name: "tag".into(),
 								                    host: Some("domain.gay".into())
 								                },
 								                Token::PlainText(". test".into())
 								            ])
 								        );
 								        assert_eq!(
 								            parse_full("test @tag@domain? test"),
 								            Token::Sequence(vec![
 								                Token::PlainText("test ".into()),
 								                Token::Mention {
 								                    mention_type: crate::MentionType::User,
 								                    name: "tag".into(),
 								                    host: Some("domain".into())
 								                },
 								                Token::PlainText("? test".into())
 								            ])
 								        );
 								        assert_eq!(
 								            parse_full("test !tag@domain.com test"),
 								            Token::Sequence(vec![
 								                Token::PlainText("test ".into()),
 								                Token::Mention {
 								                    mention_type: crate::MentionType::Community,
 								                    name: "tag".into(),
 								                    host: Some("domain.com".into())
 								                },
 								                Token::PlainText(" test".into())
 								            ])
 								        );
-												MMM: Profile field parsing and skipping Matrix handles

											
										
										
											2023-10-26 19:08:51 +00:00
 								        assert_eq!(
 								            parse_full("@tag:domain.com"),
-												MMM: Matrix handle parsing

											
										
										
											2023-10-26 19:23:59 +00:00
+								            Token::Mention {
 								                mention_type: crate::MentionType::MatrixUser,
 								                name: "tag".into(),
 								                host: Some("domain.com".into())
 								            },
-												MMM: Profile field parsing and skipping Matrix handles

											
										
										
											2023-10-26 19:08:51 +00:00
+								        );
-												Fixed link parsing

											
										
										
											2023-10-07 18:40:01 +00:00
+								    }
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								    #[test]
 								    fn parse_shortcodes() {
 								        assert_eq!(
 								            parse_full(":bottom:"),
-												User fetching with reactions and a user by tag endpoint

											
										
										
											2023-10-30 22:00:46 +00:00
+								            Token::ShortcodeEmoji {
 								                shortcode: "bottom".into(),
 								                host: None
 								            }
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								        );
 								        assert_eq!(
 								            parse_full(":bottom::blobfox:"),
 								            Token::Sequence(vec![
-												User fetching with reactions and a user by tag endpoint

											
										
										
											2023-10-30 22:00:46 +00:00
+								                Token::ShortcodeEmoji {
 								                    shortcode: "bottom".into(),
 								                    host: None
 								                },
 								                Token::ShortcodeEmoji {
 								                    shortcode: "blobfox".into(),
 								                    host: None
 								                }
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								            ])
 								        );
-												User fetching with reactions and a user by tag endpoint

											
										
										
											2023-10-30 22:00:46 +00:00
+								        assert_eq!(
 								            parse_full(":bottom@magnetar.social:"),
 								            Token::ShortcodeEmoji {
 								                shortcode: "bottom".into(),
 								                host: Some("magnetar.social".into())
 								            }
 								        );
-												Fixed URL parsing and initial flanking rules implementation

											
										
										
											2023-10-08 20:15:55 +00:00
+								        assert_eq!(
 								            parse_full(":bottom:blobfox"),
 								            Token::PlainText(":bottom:blobfox".into())
 								        );
 								        assert_eq!(
 								            parse_full("bottom:blobfox:"),
 								            Token::PlainText("bottom:blobfox:".into())
 								        );
 								    }
-												Emoji parsing

											
										
										
											2023-10-05 19:21:23 +00:00
+								    #[test]
 								    fn parse_emoji() {
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								        assert_eq!(
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            parse_full("🥺💜❤️🦊"),
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Token::Sequence(
 								                vec!["🥺", "💜", "❤️", "🦊"]
 								                    .into_iter()
-												MMM: Made the parser always output owned tokens

											
										
										
											2023-10-14 19:41:36 +00:00
+								                    .map(str::to_string)
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								                    .map(Token::UnicodeEmoji)
 								                    .collect::<Vec<_>>()
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            )
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								        );
-												Emoji parsing

											
										
										
											2023-10-05 19:21:23 +00:00
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								        // Trans flag, ZWJ
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								        assert_eq!(
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}"),
 								            Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}\u{0200d}\u{026a7}\u{0fe0f}".into())
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								        );
 								        assert_eq!(
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            parse_full("\u{0200d}\u{1f3f3}\u{0fe0f}"),
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Token::Sequence(vec![
 								                Token::PlainText("\u{0200d}".into()),             // ZWJ
 								                Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            ])
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								        );
 								        // Trans flag, ZWNJ
 								        assert_eq!(
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            parse_full("\u{1f3f3}\u{0fe0f}\u{0200c}\u{026a7}\u{0fe0f}"),
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Token::Sequence(vec![
 								                Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
 								                Token::PlainText("\u{0200c}".into()),             // ZWNJ
 								                Token::UnicodeEmoji("\u{026a7}\u{0fe0f}".into())  // Trans symbol
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            ])
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								        );
 								        assert_eq!(
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            parse_full("\u{1f3f3}\u{0fe0f}\u{0200d}\u{0200d}\u{0200d}"),
-												More precise emoji extraction and fixed center tag parsing

											
										
										
											2023-10-07 17:44:39 +00:00
+								            Token::Sequence(vec![
 								                Token::UnicodeEmoji("\u{1f3f3}\u{0fe0f}".into()), // White flag
 								                Token::PlainText("\u{0200d}\u{0200d}\u{0200d}".into()), // ZWJ
-												Normalized tests

											
										
										
											2023-10-07 19:26:25 +00:00
+								            ])
-												Connected it all

											
										
										
											2023-10-05 22:17:52 +00:00
+								        );
-												Emoji parsing

											
										
										
											2023-10-05 19:21:23 +00:00
+								    }
-												MMM: XML serialization and fixed block code parsing

											
										
										
											2023-10-25 22:30:11 +00:00
 								    #[test]
 								    fn xml_serialization() {
 								        assert_eq!(
 								            &to_xml_string(&parse_full("***nyaaa***")).unwrap(),
 								            r#"<mmm><b><i>nyaaa</i></b></mmm>"#
 								        );
 								        assert_eq!(
 								            &to_xml_string(&parse_full(
 								                "@natty $[spin.speed=0.5s 🥺]:cat_attack: <plain>test</plain>"
 								            ))
 								            .unwrap(),
 								            r#"<mmm><mention name="natty" type="user"/> <fn name="spin" arg-speed="0.5s"><ue>🥺</ue></fn><ee>cat_attack</ee> test</mmm>"#
 								        );
-												MMM: Fixed hashtag serialization

											
										
										
											2023-10-29 11:52:20 +00:00
+								        assert_eq!(
 								            &to_xml_string(&parse_full(
 								                "Ring Galaxy AM 0644 741 from Hubble\nCredits: AURA, STScI, J. Higdon, Cornell, ESA, #NASA\n#nature #space #astrophotography"
 								            ))
 								            .unwrap(),
 								            r#"<mmm>Ring Galaxy AM 0644 741 from Hubble
 								Credits: AURA, STScI, J. Higdon, Cornell, ESA, <hashtag>NASA</hashtag>
 								<hashtag>nature</hashtag> <hashtag>space</hashtag> <hashtag>astrophotography</hashtag></mmm>"#
 								        );
-												MMM: XML serialization and fixed block code parsing

											
										
										
											2023-10-25 22:30:11 +00:00
+								        assert_eq!(
 								            &to_xml_string(&parse_full(
 								                r#"
 								```js
 								var x = undefined;
 								```         "#
 								            ))
 								            .unwrap(),
 								            "<mmm><code lang=\"js\">var x = undefined;</code></mmm>"
 								        );
 								    }
-												Basic inline tag parsing

											
										
										
											2023-10-01 21:04:32 +00:00
+								}