MMM: Nesting-limited parsing
This commit is contained in:
parent
23a63f2fe9
commit
86d5c87e9a
|
@ -1649,6 +1649,7 @@ dependencies = [
|
||||||
"emojis",
|
"emojis",
|
||||||
"nom",
|
"nom",
|
||||||
"nom_locate",
|
"nom_locate",
|
||||||
|
"tracing",
|
||||||
"unicode-segmentation",
|
"unicode-segmentation",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -10,4 +10,5 @@ emojis = { workspace = true }
|
||||||
nom = { workspace = true }
|
nom = { workspace = true }
|
||||||
nom_locate = { workspace = true }
|
nom_locate = { workspace = true }
|
||||||
compact_str = { workspace = true }
|
compact_str = { workspace = true }
|
||||||
|
tracing = { workspace = true }
|
||||||
unicode-segmentation = { workspace = true }
|
unicode-segmentation = { workspace = true }
|
||||||
|
|
|
@ -7,14 +7,15 @@ use nom::character::complete::{
|
||||||
satisfy, space1, tab,
|
satisfy, space1, tab,
|
||||||
};
|
};
|
||||||
use nom::combinator::{eof, fail, map, not, opt, recognize};
|
use nom::combinator::{eof, fail, map, not, opt, recognize};
|
||||||
use nom::error::ErrorKind;
|
use nom::error::{ErrorKind, ParseError};
|
||||||
use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
|
use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
|
||||||
use nom::sequence::tuple;
|
use nom::sequence::tuple;
|
||||||
use nom::{IResult, Offset, Slice};
|
use nom::{IResult, Offset, Parser, Slice};
|
||||||
use nom_locate::LocatedSpan;
|
use nom_locate::LocatedSpan;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::convert::{identity, Infallible};
|
use std::convert::{identity, Infallible};
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
use tracing::trace;
|
||||||
use unicode_segmentation::UnicodeSegmentation;
|
use unicode_segmentation::UnicodeSegmentation;
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||||
|
@ -217,7 +218,18 @@ impl Token {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type Span<'a> = LocatedSpan<&'a str>;
|
#[derive(Debug, Default, Copy, Clone)]
|
||||||
|
pub struct SpanMeta {
|
||||||
|
depth: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SpanMeta {
|
||||||
|
fn new(depth: usize) -> Self {
|
||||||
|
Self { depth }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type Span<'a> = LocatedSpan<&'a str, SpanMeta>;
|
||||||
|
|
||||||
trait SliceOffset {
|
trait SliceOffset {
|
||||||
fn up_to(&self, other: &Self) -> Self;
|
fn up_to(&self, other: &Self) -> Self;
|
||||||
|
@ -300,7 +312,10 @@ fn spliced<'a>(
|
||||||
type NE<E> = nom::Err<E>;
|
type NE<E> = nom::Err<E>;
|
||||||
type NomError<'x> = nom::error::Error<Span<'x>>;
|
type NomError<'x> = nom::error::Error<Span<'x>>;
|
||||||
|
|
||||||
let quote_span = Span::new(&combined);
|
let quote_span = Span::new_extra(
|
||||||
|
&combined,
|
||||||
|
segments.first().map_or(SpanMeta::new(0), |s| s.extra),
|
||||||
|
);
|
||||||
let (input, inner) = match func(quote_span) {
|
let (input, inner) = match func(quote_span) {
|
||||||
Ok(s) => s,
|
Ok(s) => s,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
|
@ -311,7 +326,10 @@ fn spliced<'a>(
|
||||||
let offset = offset_new - offset_seg_new;
|
let offset = offset_new - offset_seg_new;
|
||||||
let offset_orig = offset + seg_parent.location_offset();
|
let offset_orig = offset + seg_parent.location_offset();
|
||||||
Err(NE::Error(NomError::new(
|
Err(NE::Error(NomError::new(
|
||||||
Span::new(&parent.into_fragment()[offset_orig..]),
|
Span::new_extra(
|
||||||
|
&parent.into_fragment()[offset_orig..],
|
||||||
|
seg_parent.extra,
|
||||||
|
),
|
||||||
e.code,
|
e.code,
|
||||||
)))
|
)))
|
||||||
} else {
|
} else {
|
||||||
|
@ -405,9 +423,53 @@ impl<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>> From<T> for FlankingDel
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Context;
|
pub struct Context {
|
||||||
|
depth_limit: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
const DEFAULT_DEPTH_LIMIT: usize = 24;
|
||||||
|
|
||||||
|
impl Default for Context {
|
||||||
|
fn default() -> Self {
|
||||||
|
Context::new(DEFAULT_DEPTH_LIMIT)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Context {
|
impl Context {
|
||||||
|
pub fn new(depth_limit: usize) -> Self {
|
||||||
|
Self { depth_limit }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_full(&self, input: &str) -> Token {
|
||||||
|
match self.full(Span::new_extra(input, SpanMeta::default())) {
|
||||||
|
Ok((_, t)) => t.merged(),
|
||||||
|
Err(e) => {
|
||||||
|
trace!(input = input, "Full parser fail: {:?}", e);
|
||||||
|
Token::PlainText(e.to_compact_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_inline(&self, input: &str) -> Token {
|
||||||
|
match self.full(Span::new_extra(input, SpanMeta::default())) {
|
||||||
|
Ok((_, t)) => t.merged(),
|
||||||
|
Err(e) => {
|
||||||
|
trace!(input = input, "Inline parser fail: {:?}", e);
|
||||||
|
Token::PlainText(e.to_compact_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_ui(&self, input: &str) -> Token {
|
||||||
|
match self.inline_ui(Span::new_extra(input, SpanMeta::default())) {
|
||||||
|
Ok((_, t)) => t.merged(),
|
||||||
|
Err(e) => {
|
||||||
|
trace!(input = input, "Inline parser fail: {:?}", e);
|
||||||
|
Token::PlainText(e.to_compact_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn partial(
|
fn partial(
|
||||||
&self,
|
&self,
|
||||||
|
@ -416,6 +478,14 @@ impl Context {
|
||||||
move |input| func(self, input)
|
move |input| func(self, input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn partial_span(
|
||||||
|
&self,
|
||||||
|
func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Span<'a>> + 'static,
|
||||||
|
) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>> + '_ {
|
||||||
|
move |input| func(self, input)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
pub fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||||
map(many1(self.partial(Self::full_single)), Token::Sequence)(input)
|
map(many1(self.partial(Self::full_single)), Token::Sequence)(input)
|
||||||
}
|
}
|
||||||
|
@ -431,6 +501,17 @@ impl Context {
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn inline_ui<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||||
|
map(
|
||||||
|
many1(alt((
|
||||||
|
self.partial(Self::unicode_emoji),
|
||||||
|
self.partial(Self::shortcode_emoji),
|
||||||
|
self.partial(Self::tag_raw_text),
|
||||||
|
))),
|
||||||
|
Token::Sequence,
|
||||||
|
)(input)
|
||||||
|
}
|
||||||
|
|
||||||
fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
fn base_bold_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||||
alt((
|
alt((
|
||||||
self.partial(Self::tag_bold_italic_asterisk),
|
self.partial(Self::tag_bold_italic_asterisk),
|
||||||
|
@ -444,69 +525,72 @@ impl Context {
|
||||||
|
|
||||||
fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
fn full_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||||
let (input, token) = alt((
|
let (input, token) = alt((
|
||||||
self.partial(Self::unicode_emoji),
|
self.increase_nesting(alt((
|
||||||
alt((
|
self.partial(Self::unicode_emoji),
|
||||||
self.partial(Self::tag_block_center),
|
self.partial(Self::tag_block_center),
|
||||||
self.partial(Self::tag_small),
|
self.partial(Self::tag_small),
|
||||||
self.partial(Self::tag_plain),
|
self.partial(Self::tag_plain),
|
||||||
self.partial(Self::tag_bold),
|
self.partial(Self::tag_bold),
|
||||||
self.partial(Self::tag_italic),
|
self.partial(Self::tag_italic),
|
||||||
self.partial(Self::tag_strikethrough),
|
self.partial(Self::tag_strikethrough),
|
||||||
)),
|
self.partial(Self::url_no_embed),
|
||||||
self.partial(Self::url_no_embed),
|
self.partial(Self::base_bold_italic),
|
||||||
self.partial(Self::base_bold_italic),
|
self.partial(Self::tag_block_code),
|
||||||
self.partial(Self::tag_block_code),
|
self.partial(Self::tag_inline_code),
|
||||||
self.partial(Self::tag_inline_code),
|
self.partial(Self::tag_quote),
|
||||||
self.partial(Self::tag_quote),
|
self.partial(Self::tag_block_math),
|
||||||
self.partial(Self::tag_block_math),
|
self.partial(Self::tag_inline_math),
|
||||||
self.partial(Self::tag_inline_math),
|
self.partial(Self::tag_strikethrough_tilde),
|
||||||
self.partial(Self::tag_strikethrough_tilde),
|
self.partial(Self::tag_func),
|
||||||
self.partial(Self::tag_func),
|
self.partial(Self::tag_mention),
|
||||||
self.partial(Self::tag_mention),
|
self.partial(Self::tag_hashtag),
|
||||||
self.partial(Self::tag_hashtag),
|
self.partial(Self::shortcode_emoji),
|
||||||
self.partial(Self::shortcode_emoji),
|
self.partial(Self::link),
|
||||||
self.partial(Self::link),
|
self.partial(Self::raw_url),
|
||||||
self.partial(Self::raw_url),
|
))),
|
||||||
self.partial(Self::tag_raw_text),
|
self.partial(Self::tag_raw_text),
|
||||||
))(input)?;
|
))(input)?;
|
||||||
Ok((input, token))
|
Ok((input, token))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn inline_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
fn inline_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||||
let (input, token) = alt((
|
alt((
|
||||||
self.partial(Self::unicode_emoji),
|
self.increase_nesting(alt((
|
||||||
self.partial(Self::tag_small),
|
self.partial(Self::unicode_emoji),
|
||||||
self.partial(Self::tag_plain),
|
self.partial(Self::tag_small),
|
||||||
self.partial(Self::tag_bold),
|
self.partial(Self::tag_plain),
|
||||||
self.partial(Self::tag_italic),
|
self.partial(Self::tag_bold),
|
||||||
self.partial(Self::tag_strikethrough),
|
self.partial(Self::tag_italic),
|
||||||
self.partial(Self::url_no_embed),
|
self.partial(Self::tag_strikethrough),
|
||||||
self.partial(Self::base_bold_italic),
|
self.partial(Self::url_no_embed),
|
||||||
self.partial(Self::tag_inline_code),
|
self.partial(Self::base_bold_italic),
|
||||||
self.partial(Self::tag_inline_math),
|
self.partial(Self::tag_inline_code),
|
||||||
self.partial(Self::tag_strikethrough_tilde),
|
self.partial(Self::tag_inline_math),
|
||||||
self.partial(Self::tag_func),
|
self.partial(Self::tag_strikethrough_tilde),
|
||||||
self.partial(Self::tag_mention),
|
self.partial(Self::tag_func),
|
||||||
self.partial(Self::tag_hashtag),
|
self.partial(Self::tag_mention),
|
||||||
self.partial(Self::shortcode_emoji),
|
self.partial(Self::tag_hashtag),
|
||||||
self.partial(Self::link),
|
self.partial(Self::shortcode_emoji),
|
||||||
self.partial(Self::raw_url),
|
self.partial(Self::link),
|
||||||
|
self.partial(Self::raw_url),
|
||||||
|
))),
|
||||||
self.partial(Self::tag_raw_text),
|
self.partial(Self::tag_raw_text),
|
||||||
))(input)?;
|
))(input)
|
||||||
Ok((input, token))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn inline_non_formatting_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
fn inline_non_formatting_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||||
let (input, token) = alt((
|
let (input, token) = alt((
|
||||||
self.partial(Self::unicode_emoji),
|
self.increase_nesting(alt((
|
||||||
self.partial(Self::url_no_embed),
|
self.partial(Self::unicode_emoji),
|
||||||
self.partial(Self::tag_inline_code),
|
self.partial(Self::url_no_embed),
|
||||||
self.partial(Self::tag_inline_math),
|
self.partial(Self::tag_inline_code),
|
||||||
self.partial(Self::tag_func),
|
self.partial(Self::tag_inline_math),
|
||||||
self.partial(Self::tag_mention),
|
self.partial(Self::tag_func),
|
||||||
self.partial(Self::tag_hashtag),
|
self.partial(Self::tag_mention),
|
||||||
self.partial(Self::shortcode_emoji),
|
self.partial(Self::tag_hashtag),
|
||||||
self.partial(Self::raw_url),
|
self.partial(Self::shortcode_emoji),
|
||||||
|
self.partial(Self::raw_url),
|
||||||
|
))),
|
||||||
self.partial(Self::tag_raw_text),
|
self.partial(Self::tag_raw_text),
|
||||||
))(input)?;
|
))(input)?;
|
||||||
Ok((input, token))
|
Ok((input, token))
|
||||||
|
@ -514,16 +598,18 @@ impl Context {
|
||||||
|
|
||||||
fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
fn inline_label_safe_single<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||||
let (input, token) = alt((
|
let (input, token) = alt((
|
||||||
self.partial(Self::unicode_emoji),
|
self.increase_nesting(alt((
|
||||||
self.partial(Self::tag_small),
|
self.partial(Self::unicode_emoji),
|
||||||
self.partial(Self::tag_plain),
|
self.partial(Self::tag_small),
|
||||||
self.partial(Self::tag_bold),
|
self.partial(Self::tag_plain),
|
||||||
self.partial(Self::tag_italic),
|
self.partial(Self::tag_bold),
|
||||||
self.partial(Self::tag_strikethrough),
|
self.partial(Self::tag_italic),
|
||||||
self.partial(Self::base_bold_italic),
|
self.partial(Self::tag_strikethrough),
|
||||||
self.partial(Self::tag_strikethrough_tilde),
|
self.partial(Self::base_bold_italic),
|
||||||
self.partial(Self::tag_func),
|
self.partial(Self::tag_strikethrough_tilde),
|
||||||
self.partial(Self::shortcode_emoji),
|
self.partial(Self::tag_func),
|
||||||
|
self.partial(Self::shortcode_emoji),
|
||||||
|
))),
|
||||||
self.partial(Self::tag_raw_text),
|
self.partial(Self::tag_raw_text),
|
||||||
))(input)?;
|
))(input)?;
|
||||||
Ok((input, token))
|
Ok((input, token))
|
||||||
|
@ -1056,8 +1142,11 @@ impl Context {
|
||||||
|
|
||||||
fn raw_url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
fn raw_url<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||||
let (input, url_span) = recognize(tuple((
|
let (input, url_span) = recognize(tuple((
|
||||||
protocol,
|
self.partial_span(Self::protocol),
|
||||||
url_chars(|input| not(url_chars_base)(input), false),
|
self.url_chars(
|
||||||
|
|input| recognize(not(self.partial_span(Self::url_chars_base)))(input),
|
||||||
|
false,
|
||||||
|
),
|
||||||
)))(input)?;
|
)))(input)?;
|
||||||
|
|
||||||
let url = url_span.into_fragment();
|
let url = url_span.into_fragment();
|
||||||
|
@ -1075,7 +1164,10 @@ impl Context {
|
||||||
|
|
||||||
fn url_no_embed<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
fn url_no_embed<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
|
||||||
let (input, _) = tag("<")(input)?;
|
let (input, _) = tag("<")(input)?;
|
||||||
let (input, url_span) = recognize(tuple((protocol, url_chars(tag(">"), true))))(input)?;
|
let (input, url_span) = recognize(tuple((
|
||||||
|
self.partial_span(Self::protocol),
|
||||||
|
self.url_chars(tag(">"), true),
|
||||||
|
)))(input)?;
|
||||||
let (input, _) = tag(">")(input)?;
|
let (input, _) = tag(">")(input)?;
|
||||||
|
|
||||||
Ok((
|
Ok((
|
||||||
|
@ -1090,7 +1182,10 @@ impl Context {
|
||||||
let (input, _) = not(tag("["))(input)?;
|
let (input, _) = not(tag("["))(input)?;
|
||||||
let (input, (label_tok, _)) =
|
let (input, (label_tok, _)) =
|
||||||
many_till(self.partial(Self::inline_label_safe_single), tag("]("))(input)?;
|
many_till(self.partial(Self::inline_label_safe_single), tag("]("))(input)?;
|
||||||
let (input, url_span) = recognize(tuple((protocol, url_chars(tag(")"), true))))(input)?;
|
let (input, url_span) = recognize(tuple((
|
||||||
|
self.partial_span(Self::protocol),
|
||||||
|
self.url_chars(tag(")"), true),
|
||||||
|
)))(input)?;
|
||||||
let (input, _) = tag(")")(input)?;
|
let (input, _) = tag(")")(input)?;
|
||||||
|
|
||||||
Ok((
|
Ok((
|
||||||
|
@ -1202,74 +1297,136 @@ impl Context {
|
||||||
|
|
||||||
let (input, _) = tag("#")(input)?;
|
let (input, _) = tag("#")(input)?;
|
||||||
|
|
||||||
let (input, hashtag_text) =
|
let (input, hashtag_text) = map(
|
||||||
map(recognize(many1(hashtag_chars)), Span::into_fragment)(input)?;
|
recognize(many1(self.partial_span(Self::hashtag_chars))),
|
||||||
|
Span::into_fragment,
|
||||||
|
)(input)?;
|
||||||
|
|
||||||
Ok((input, Token::Hashtag(hashtag_text.into())))
|
Ok((input, Token::Hashtag(hashtag_text.into())))
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn hashtag_chars(input: Span) -> IResult<Span, Span> {
|
fn increase_nesting<'a, 'b, O, F>(
|
||||||
recognize(alt((
|
&'b self,
|
||||||
recognize(tuple((tag("("), hashtag_chars, tag(")")))),
|
mut func: F,
|
||||||
recognize(tuple((tag("["), hashtag_chars, tag("]")))),
|
) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, O> + 'b
|
||||||
recognize(tuple((tag("「"), hashtag_chars, tag("」")))),
|
where
|
||||||
recognize(tuple((tag("("), hashtag_chars, tag(")")))),
|
F: Parser<Span<'a>, O, nom::error::Error<Span<'a>>> + 'b,
|
||||||
recognize(tuple((
|
{
|
||||||
not(space1),
|
move |mut input| {
|
||||||
not_line_ending,
|
if input.extra.depth >= self.depth_limit {
|
||||||
not(one_of(".,:;!?#?/[]【】()「」()<>")),
|
return fail(input);
|
||||||
anychar,
|
}
|
||||||
))),
|
|
||||||
)))(input)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
input.extra.depth += 1;
|
||||||
fn protocol(input: Span) -> IResult<Span, Span> {
|
func.parse(input)
|
||||||
alt((tag("https://"), tag("http://")))(input)
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn url_chars_base(input: Span) -> IResult<Span, Span> {
|
fn hashtag_chars<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
|
||||||
alt((
|
recognize(alt((
|
||||||
alphanumeric1_unicode,
|
recognize(tuple((
|
||||||
recognize(tuple((tag("["), many_till(url_chars_base, tag("]"))))),
|
tag("("),
|
||||||
recognize(tuple((tag("("), many_till(url_chars_base, tag(")"))))),
|
self.increase_nesting(self.partial_span(Self::hashtag_chars)),
|
||||||
recognize(one_of(".,_/:%#$&?!~=+-@")),
|
tag(")"),
|
||||||
))(input)
|
))),
|
||||||
}
|
recognize(tuple((
|
||||||
|
tag("["),
|
||||||
|
self.increase_nesting(self.partial_span(Self::hashtag_chars)),
|
||||||
|
tag("]"),
|
||||||
|
))),
|
||||||
|
recognize(tuple((
|
||||||
|
tag("「"),
|
||||||
|
self.increase_nesting(self.partial_span(Self::hashtag_chars)),
|
||||||
|
tag("」"),
|
||||||
|
))),
|
||||||
|
recognize(tuple((
|
||||||
|
tag("("),
|
||||||
|
self.increase_nesting(self.partial_span(Self::hashtag_chars)),
|
||||||
|
tag(")"),
|
||||||
|
))),
|
||||||
|
recognize(tuple((
|
||||||
|
not(space1),
|
||||||
|
not_line_ending,
|
||||||
|
not(one_of(".,:;!?#?/[]【】()「」()<>")),
|
||||||
|
anychar,
|
||||||
|
))),
|
||||||
|
)))(input)
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn url_chars<'a, T: 'a>(
|
fn protocol<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
|
||||||
terminator: impl Fn(Span<'a>) -> IResult<Span<'a>, T> + 'a,
|
alt((tag("https://"), tag("http://")))(input)
|
||||||
spaces: bool,
|
}
|
||||||
) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, Span<'a>> + 'a {
|
|
||||||
let chars = tuple((
|
|
||||||
not(tuple((space1, eof))),
|
|
||||||
not(tuple((space1, tag("\"")))),
|
|
||||||
not(tuple((opt(space1), terminator))),
|
|
||||||
alt((url_chars_base, if spaces { space1 } else { fail })),
|
|
||||||
));
|
|
||||||
|
|
||||||
recognize(many1_count(chars))
|
#[inline]
|
||||||
|
fn url_chars_base<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
|
||||||
|
alt((
|
||||||
|
alphanumeric1_unicode,
|
||||||
|
recognize(tuple((
|
||||||
|
tag("["),
|
||||||
|
many_till(
|
||||||
|
self.increase_nesting(self.partial_span(Self::url_chars_base)),
|
||||||
|
tag("]"),
|
||||||
|
),
|
||||||
|
))),
|
||||||
|
recognize(tuple((
|
||||||
|
tag("("),
|
||||||
|
many_till(
|
||||||
|
self.increase_nesting(self.partial_span(Self::url_chars_base)),
|
||||||
|
tag(")"),
|
||||||
|
),
|
||||||
|
))),
|
||||||
|
recognize(one_of(".,_/:%#$&?!~=+-@")),
|
||||||
|
))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn url_chars<'a, 'b, F>(
|
||||||
|
&'b self,
|
||||||
|
mut terminator: F,
|
||||||
|
spaces: bool,
|
||||||
|
) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, Span<'a>> + 'b
|
||||||
|
where
|
||||||
|
F: Parser<Span<'a>, Span<'a>, nom::error::Error<Span<'a>>> + 'b,
|
||||||
|
{
|
||||||
|
move |input| {
|
||||||
|
recognize(many1_count(tuple((
|
||||||
|
not(tuple((space1, eof))),
|
||||||
|
not(tuple((space1, tag("\"")))),
|
||||||
|
not(tuple((opt(space1), |input| terminator.parse(input)))),
|
||||||
|
alt((
|
||||||
|
|input| self.url_chars_base(input),
|
||||||
|
if spaces { space1 } else { fail },
|
||||||
|
)),
|
||||||
|
))))(input)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use crate::{url_chars, Context, Span, Token};
|
use crate::{Context, Span, SpanMeta, Token, DEFAULT_DEPTH_LIMIT};
|
||||||
use nom::bytes::complete::tag;
|
use nom::bytes::complete::tag;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
fn parse_full(string: &str) -> Token {
|
fn parse_full(string: &str) -> Token {
|
||||||
Context.full(Span::new(string)).unwrap().1.merged()
|
Context::default()
|
||||||
|
.full(Span::new_extra(string, SpanMeta::default()))
|
||||||
|
.unwrap()
|
||||||
|
.1
|
||||||
|
.merged()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_url_chars() {
|
fn parse_url_chars() {
|
||||||
|
let ctx = Context::default();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
url_chars(tag(")"), true)(Span::new(
|
ctx.url_chars(tag(")"), true)(Span::new_extra(
|
||||||
"https://en.wikipedia.org/wiki/Sandbox_(computer_security))"
|
"https://en.wikipedia.org/wiki/Sandbox_(computer_security))",
|
||||||
|
SpanMeta::default()
|
||||||
))
|
))
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.1
|
.1
|
||||||
|
@ -1278,8 +1435,9 @@ mod test {
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
url_chars(tag(")"), true)(Span::new(
|
ctx.url_chars(tag(")"), true)(Span::new_extra(
|
||||||
"https://en.wikipedia.org/wiki/Sandbox_(computer_security)))"
|
"https://en.wikipedia.org/wiki/Sandbox_(computer_security)))",
|
||||||
|
SpanMeta::default()
|
||||||
))
|
))
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.1
|
.1
|
||||||
|
@ -1288,26 +1446,35 @@ mod test {
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
url_chars(tag(")"), true)(Span::new("https://cs.wikipedia.org/wiki/Among_Us "))
|
ctx.url_chars(tag(")"), true)(Span::new_extra(
|
||||||
.unwrap()
|
"https://cs.wikipedia.org/wiki/Among_Us ",
|
||||||
.1
|
SpanMeta::default()
|
||||||
.into_fragment(),
|
))
|
||||||
|
.unwrap()
|
||||||
|
.1
|
||||||
|
.into_fragment(),
|
||||||
"https://cs.wikipedia.org/wiki/Among_Us",
|
"https://cs.wikipedia.org/wiki/Among_Us",
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
url_chars(tag(")"), true)(Span::new("https://cs.wikipedia.org/wiki/Among Us )"))
|
ctx.url_chars(tag(")"), true)(Span::new_extra(
|
||||||
.unwrap()
|
"https://cs.wikipedia.org/wiki/Among Us )",
|
||||||
.1
|
SpanMeta::default()
|
||||||
.into_fragment(),
|
))
|
||||||
|
.unwrap()
|
||||||
|
.1
|
||||||
|
.into_fragment(),
|
||||||
"https://cs.wikipedia.org/wiki/Among Us"
|
"https://cs.wikipedia.org/wiki/Among Us"
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
url_chars(tag(")"), false)(Span::new("https://en.wikipedia.org/wiki/Among Us )"))
|
ctx.url_chars(tag(")"), false)(Span::new_extra(
|
||||||
.unwrap()
|
"https://en.wikipedia.org/wiki/Among Us )",
|
||||||
.1
|
SpanMeta::default()
|
||||||
.into_fragment(),
|
))
|
||||||
|
.unwrap()
|
||||||
|
.1
|
||||||
|
.into_fragment(),
|
||||||
"https://en.wikipedia.org/wiki/Among"
|
"https://en.wikipedia.org/wiki/Among"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -1593,6 +1760,23 @@ text</center>"#
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn limit_nesting() {
|
||||||
|
let mut tok = Token::PlainText(" <s><i>test</i></s> ".into());
|
||||||
|
for _ in 0..DEFAULT_DEPTH_LIMIT {
|
||||||
|
tok = Token::Bold(Box::new(tok));
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
parse_full(
|
||||||
|
&("<b>".repeat(DEFAULT_DEPTH_LIMIT)
|
||||||
|
+ " <s><i>test</i></s> "
|
||||||
|
+ &*"</b>".repeat(DEFAULT_DEPTH_LIMIT))
|
||||||
|
),
|
||||||
|
tok
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_mention() {
|
fn parse_mention() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|
Loading…
Reference in New Issue