Fixed URL parsing and initial flanking rules implementation
This commit is contained in:
parent
26bd6fe4b2
commit
d0d977e6eb
|
@ -1,19 +1,20 @@
|
||||||
use either::Either;
|
use either::Either;
|
||||||
use nom::branch::alt;
|
use nom::branch::alt;
|
||||||
use nom::bytes::complete::tag;
|
use nom::bytes::complete::{tag, tag_no_case};
|
||||||
use nom::character::complete::{
|
use nom::character::complete::{
|
||||||
alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of, space1,
|
alpha1, alphanumeric1, anychar, char as one_char, char, line_ending, not_line_ending, one_of,
|
||||||
tab,
|
satisfy, space1, tab,
|
||||||
};
|
};
|
||||||
use nom::combinator::{eof, fail, map, not, opt, recognize};
|
use nom::combinator::{eof, fail, map, not, opt, recognize};
|
||||||
use nom::error::ErrorKind;
|
use nom::error::ErrorKind;
|
||||||
use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
|
use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
|
||||||
use nom::sequence::tuple;
|
use nom::sequence::tuple;
|
||||||
use nom::{IResult, Offset, Slice};
|
use nom::{Compare, IResult, Offset, Slice};
|
||||||
use nom_locate::LocatedSpan;
|
use nom_locate::LocatedSpan;
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::convert::{identity, Infallible};
|
use std::convert::{identity, Infallible};
|
||||||
|
use std::marker::PhantomData;
|
||||||
use unicode_segmentation::UnicodeSegmentation;
|
use unicode_segmentation::UnicodeSegmentation;
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||||
|
@ -73,6 +74,80 @@ pub enum Token<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Token<'_> {
|
impl Token<'_> {
|
||||||
|
fn str_content_left(&self) -> Option<&str> {
|
||||||
|
match self {
|
||||||
|
Token::PlainText(text) => Some(text.as_ref()),
|
||||||
|
Token::Sequence(tokens) => tokens.first().and_then(Token::str_content_left),
|
||||||
|
Token::Quote(inner) => inner.str_content_left(),
|
||||||
|
Token::Small(inner) => inner.str_content_left(),
|
||||||
|
Token::BoldItalic(inner) => inner.str_content_left(),
|
||||||
|
Token::Bold(inner) => inner.str_content_left(),
|
||||||
|
Token::Italic(inner) => inner.str_content_left(),
|
||||||
|
Token::Center(inner) => inner.str_content_left(),
|
||||||
|
Token::Strikethrough(inner) => inner.str_content_left(),
|
||||||
|
Token::PlainTag(tag) => Some(tag.as_ref()),
|
||||||
|
Token::UrlRaw(url) => Some(url.as_ref()),
|
||||||
|
Token::UrlNoEmbed(url) => Some(url.as_ref()),
|
||||||
|
Token::Link { label, .. } => label.str_content_left(),
|
||||||
|
Token::Function { inner, .. } => inner.str_content_left(),
|
||||||
|
Token::Mention { name, .. } => Some(name.as_ref()),
|
||||||
|
Token::UnicodeEmoji(code) => Some(code.as_ref()),
|
||||||
|
Token::ShortcodeEmoji(_) => None,
|
||||||
|
Token::Hashtag(tag) => Some(tag.as_ref()),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn str_content_right(&self) -> Option<&str> {
|
||||||
|
match self {
|
||||||
|
Token::PlainText(text) => Some(text.as_ref()),
|
||||||
|
Token::Sequence(tokens) => tokens.last().and_then(Token::str_content_right),
|
||||||
|
Token::Quote(inner) => inner.str_content_right(),
|
||||||
|
Token::Small(inner) => inner.str_content_right(),
|
||||||
|
Token::BoldItalic(inner) => inner.str_content_right(),
|
||||||
|
Token::Bold(inner) => inner.str_content_right(),
|
||||||
|
Token::Italic(inner) => inner.str_content_right(),
|
||||||
|
Token::Center(inner) => inner.str_content_right(),
|
||||||
|
Token::Strikethrough(inner) => inner.str_content_right(),
|
||||||
|
Token::PlainTag(tag) => Some(tag.as_ref()),
|
||||||
|
Token::UrlRaw(url) => Some(url.as_ref()),
|
||||||
|
Token::UrlNoEmbed(url) => Some(url.as_ref()),
|
||||||
|
Token::Link { label, .. } => label.str_content_right(),
|
||||||
|
Token::Function { inner, .. } => inner.str_content_right(),
|
||||||
|
Token::Mention { name, .. } => Some(name.as_ref()),
|
||||||
|
Token::UnicodeEmoji(code) => Some(code.as_ref()),
|
||||||
|
Token::Hashtag(tag) => Some(tag.as_ref()),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn inner(&self) -> Token {
|
||||||
|
match self {
|
||||||
|
plain @ Token::PlainText(_) => plain.clone(),
|
||||||
|
sequence @ Token::Sequence(_) => sequence.clone(),
|
||||||
|
Token::Quote(inner) => inner.inner(),
|
||||||
|
Token::Small(inner) => inner.inner(),
|
||||||
|
Token::BoldItalic(inner) => inner.inner(),
|
||||||
|
Token::Bold(inner) => inner.inner(),
|
||||||
|
Token::Italic(inner) => inner.inner(),
|
||||||
|
Token::Center(inner) => inner.inner(),
|
||||||
|
Token::Strikethrough(inner) => inner.inner(),
|
||||||
|
Token::PlainTag(text) => Token::PlainText(text.clone()),
|
||||||
|
Token::InlineCode(code) => Token::PlainText(code.clone()),
|
||||||
|
Token::InlineMath(math) => Token::PlainText(math.clone()),
|
||||||
|
Token::UrlRaw(url) => Token::PlainText(url.clone()),
|
||||||
|
Token::UrlNoEmbed(url) => Token::PlainText(url.clone()),
|
||||||
|
Token::Link { label, .. } => label.inner(),
|
||||||
|
Token::BlockCode { inner, .. } => Token::PlainText(inner.clone()),
|
||||||
|
Token::BlockMath(math) => Token::PlainText(math.clone()),
|
||||||
|
Token::Function { inner, .. } => inner.inner(),
|
||||||
|
Token::Mention { name, .. } => Token::PlainText(name.clone()),
|
||||||
|
Token::UnicodeEmoji(code) => Token::PlainText(code.clone()),
|
||||||
|
Token::ShortcodeEmoji(shortcode) => Token::PlainText(shortcode.clone()),
|
||||||
|
Token::Hashtag(tag) => Token::PlainText(tag.clone()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn owned(&self) -> Token<'static> {
|
fn owned(&self) -> Token<'static> {
|
||||||
match self {
|
match self {
|
||||||
Token::PlainText(text) => Token::PlainText(Cow::Owned(text.clone().into_owned())),
|
Token::PlainText(text) => Token::PlainText(Cow::Owned(text.clone().into_owned())),
|
||||||
|
@ -129,7 +204,7 @@ impl Token<'_> {
|
||||||
Token::ShortcodeEmoji(shortcode) => {
|
Token::ShortcodeEmoji(shortcode) => {
|
||||||
Token::ShortcodeEmoji(Cow::Owned(shortcode.clone().into_owned()))
|
Token::ShortcodeEmoji(Cow::Owned(shortcode.clone().into_owned()))
|
||||||
}
|
}
|
||||||
Token::Hashtag(url) => Token::Hashtag(Cow::Owned(url.clone().into_owned())),
|
Token::Hashtag(tag) => Token::Hashtag(Cow::Owned(tag.clone().into_owned())),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -245,6 +320,16 @@ fn collect_char_sequence<'a>(
|
||||||
move |chars| func(Cow::Owned(chars.collect()))
|
move |chars| func(Cow::Owned(chars.collect()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn alpha1_unicode(input: Span) -> IResult<Span, Span> {
|
||||||
|
recognize(many1_count(satisfy(char::is_alphanumeric)))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn alphanumeric1_unicode(input: Span) -> IResult<Span, Span> {
|
||||||
|
recognize(many1_count(satisfy(char::is_alphanumeric)))(input)
|
||||||
|
}
|
||||||
|
|
||||||
fn spliced<'a>(
|
fn spliced<'a>(
|
||||||
segments: &[Span<'a>],
|
segments: &[Span<'a>],
|
||||||
func: impl Fn(Span) -> IResult<Span, Token>,
|
func: impl Fn(Span) -> IResult<Span, Token>,
|
||||||
|
@ -316,15 +401,16 @@ fn space(input: Span) -> IResult<Span, Token> {
|
||||||
Ok((input, Token::PlainText(frag.into_fragment().into())))
|
Ok((input, Token::PlainText(frag.into_fragment().into())))
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Matcher<'a, 'b, T> {
|
#[derive(Copy, Clone)]
|
||||||
|
struct Matcher<'a, 'b, T: Clone> {
|
||||||
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
|
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
|
||||||
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
|
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
|
||||||
_phantom_closure: std::marker::PhantomData<&'a ()>,
|
_phantom_closure: PhantomData<&'a ()>,
|
||||||
_phantom_data: std::marker::PhantomData<&'b ()>,
|
_phantom_data: PhantomData<&'b ()>,
|
||||||
_phantom_output: std::marker::PhantomData<fn() -> T>,
|
_phantom_output: PhantomData<fn() -> T>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 'b, T> Matcher<'a, 'b, T> {
|
impl<'a, 'b, T: Clone> Matcher<'a, 'b, T> {
|
||||||
fn new(
|
fn new(
|
||||||
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
|
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
|
||||||
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
|
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
|
||||||
|
@ -332,9 +418,9 @@ impl<'a, 'b, T> Matcher<'a, 'b, T> {
|
||||||
Self {
|
Self {
|
||||||
matcher_inner,
|
matcher_inner,
|
||||||
collector,
|
collector,
|
||||||
_phantom_closure: std::marker::PhantomData,
|
_phantom_closure: PhantomData,
|
||||||
_phantom_data: std::marker::PhantomData,
|
_phantom_data: PhantomData,
|
||||||
_phantom_output: std::marker::PhantomData,
|
_phantom_output: PhantomData,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -345,33 +431,60 @@ impl<'a, 'b> Matcher<'a, 'b, Infallible> {
|
||||||
Self {
|
Self {
|
||||||
matcher_inner: &fail::<_, Infallible, _>,
|
matcher_inner: &fail::<_, Infallible, _>,
|
||||||
collector: &|_| unreachable!(),
|
collector: &|_| unreachable!(),
|
||||||
_phantom_closure: std::marker::PhantomData,
|
_phantom_closure: PhantomData,
|
||||||
_phantom_data: std::marker::PhantomData,
|
_phantom_data: PhantomData,
|
||||||
_phantom_output: std::marker::PhantomData,
|
_phantom_output: PhantomData,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Context;
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
enum FlankingRule {
|
||||||
|
Lenient,
|
||||||
|
Strict,
|
||||||
|
DontCare,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct FlankingDelim<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>>(
|
||||||
|
T,
|
||||||
|
FlankingRule,
|
||||||
|
PhantomData<&'a ()>,
|
||||||
|
);
|
||||||
|
|
||||||
|
impl<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>> From<(T, FlankingRule)>
|
||||||
|
for FlankingDelim<'a, T>
|
||||||
|
{
|
||||||
|
fn from((func, rule): (T, FlankingRule)) -> Self {
|
||||||
|
FlankingDelim(func, rule, PhantomData)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>> From<T> for FlankingDelim<'a, T> {
|
||||||
|
fn from(func: T) -> Self {
|
||||||
|
FlankingDelim(func, FlankingRule::DontCare, PhantomData)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Context;
|
||||||
|
|
||||||
impl Context {
|
impl Context {
|
||||||
#[inline]
|
#[inline]
|
||||||
const fn partial(
|
fn partial(
|
||||||
&self,
|
&self,
|
||||||
func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token<'a>> + 'static,
|
func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token<'a>> + 'static,
|
||||||
) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token<'a>> + '_ {
|
) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token<'a>> + '_ {
|
||||||
move |input| func(self, input)
|
move |input| func(self, input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
pub fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
map(many1(self.partial(Self::full_single)), Token::Sequence)(input)
|
map(many1(self.partial(Self::full_single)), Token::Sequence)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
pub fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
map(many1(self.partial(Self::inline_single)), Token::Sequence)(input)
|
map(many1(self.partial(Self::inline_single)), Token::Sequence)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
pub fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
map(
|
map(
|
||||||
many1(self.partial(Self::inline_label_safe_single)),
|
many1(self.partial(Self::inline_label_safe_single)),
|
||||||
Token::Sequence,
|
Token::Sequence,
|
||||||
|
@ -606,14 +719,21 @@ impl Context {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn tag_delimited<'a, 'b: 'a, T, S>(
|
fn tag_delimited<'a, 'b: 'a, T: Clone, S: Clone, FOpen, FClose>(
|
||||||
&'a self,
|
&'a self,
|
||||||
opening_tag: impl Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
|
opening_tag: impl Into<FlankingDelim<'b, FOpen>> + 'a,
|
||||||
closing_tag: impl Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
|
closing_tag: impl Into<FlankingDelim<'b, FClose>> + 'a,
|
||||||
escape: bool,
|
escape: bool,
|
||||||
matcher: Matcher<'a, 'b, T>,
|
matcher: Matcher<'a, 'b, T>,
|
||||||
fallback: Matcher<'a, 'b, S>,
|
fallback: Matcher<'a, 'b, S>,
|
||||||
) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token<'b>> + '_ {
|
) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token<'b>> + '_
|
||||||
|
where
|
||||||
|
FOpen: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
|
||||||
|
FClose: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
|
||||||
|
{
|
||||||
|
let FlankingDelim(opening_tag, opening_rule, ..) = opening_tag.into();
|
||||||
|
let FlankingDelim(closing_tag, closing_rule, ..) = closing_tag.into();
|
||||||
|
|
||||||
move |input| {
|
move |input| {
|
||||||
if escape {
|
if escape {
|
||||||
if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), &opening_tag))(input) {
|
if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), &opening_tag))(input) {
|
||||||
|
@ -662,10 +782,44 @@ impl Context {
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
let (input, (inner, _)) = res?;
|
let (input, (inner, closing)) = res?;
|
||||||
let mut inner = inner.into_iter().map(|(_, t)| t);
|
let mut inner = inner.into_iter().map(|(_, t)| t);
|
||||||
|
|
||||||
Ok((input, (matcher.collector)(&mut inner)))
|
let inner_tok = (matcher.collector)(&mut inner);
|
||||||
|
|
||||||
|
let correct_left_flanking =
|
||||||
|
if let FlankingRule::Lenient | FlankingRule::Strict = opening_rule {
|
||||||
|
let text_left = inner_tok.str_content_left();
|
||||||
|
|
||||||
|
!(text_left.is_some_and(|s| s.starts_with(char::is_whitespace))
|
||||||
|
|| text_left.is_none())
|
||||||
|
} else {
|
||||||
|
true
|
||||||
|
};
|
||||||
|
|
||||||
|
let correct_right_flanking =
|
||||||
|
if let FlankingRule::Lenient | FlankingRule::Strict = closing_rule {
|
||||||
|
let text_right = inner_tok.str_content_right();
|
||||||
|
!(text_right.is_some_and(|s| s.ends_with(char::is_whitespace))
|
||||||
|
|| text_right.is_none())
|
||||||
|
} else {
|
||||||
|
true
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO: Unfinished flanking rules
|
||||||
|
let correct_flanking = correct_left_flanking && correct_right_flanking;
|
||||||
|
|
||||||
|
if !correct_flanking {
|
||||||
|
return Ok((
|
||||||
|
input,
|
||||||
|
Token::Sequence(vec![
|
||||||
|
Token::PlainText(begin.fragment_between(&post_open).into()),
|
||||||
|
inner_tok.inner().owned(),
|
||||||
|
Token::PlainText(closing.into_fragment().into()),
|
||||||
|
]),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
Ok((input, Token::Sequence(vec![inner_tok])))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -720,12 +874,12 @@ impl Context {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn tag_plain<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_plain<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
let opening_tag = &tag("<small>");
|
let opening_tag = &tag("<plain>");
|
||||||
let closing_tag = &tag("</small>");
|
let closing_tag = &tag("</plain>");
|
||||||
|
|
||||||
let (input, _) = opening_tag(input)?;
|
let (input, _) = opening_tag(input)?;
|
||||||
let (input, text) = map(
|
let (input, text) = map(
|
||||||
recognize(many1(tuple((not_line_ending, not(closing_tag))))),
|
recognize(many1(tuple((not(line_ending), not(closing_tag), anychar)))),
|
||||||
Span::into_fragment,
|
Span::into_fragment,
|
||||||
)(input)?;
|
)(input)?;
|
||||||
let (input, _) = closing_tag(input)?;
|
let (input, _) = closing_tag(input)?;
|
||||||
|
@ -735,8 +889,8 @@ impl Context {
|
||||||
|
|
||||||
fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
tag("<small>"),
|
tag_no_case("<small>"),
|
||||||
tag("</small>"),
|
tag_no_case("</small>"),
|
||||||
false,
|
false,
|
||||||
Matcher::new(
|
Matcher::new(
|
||||||
&self.partial(Self::inline_single),
|
&self.partial(Self::inline_single),
|
||||||
|
@ -749,11 +903,10 @@ impl Context {
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: CommonMark flanking rules
|
|
||||||
fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
tag("***"),
|
(tag("***"), FlankingRule::Lenient),
|
||||||
tag("***"),
|
(tag("***"), FlankingRule::Lenient),
|
||||||
true,
|
true,
|
||||||
Matcher::new(
|
Matcher::new(
|
||||||
&self.partial(Self::inline_single),
|
&self.partial(Self::inline_single),
|
||||||
|
@ -766,11 +919,10 @@ impl Context {
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: CommonMark flanking rules
|
|
||||||
fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
tag("___"),
|
(tag("___"), FlankingRule::Strict),
|
||||||
tag("___"),
|
(tag("___"), FlankingRule::Strict),
|
||||||
true,
|
true,
|
||||||
Matcher::new(
|
Matcher::new(
|
||||||
&self.partial(Self::inline_single),
|
&self.partial(Self::inline_single),
|
||||||
|
@ -785,8 +937,8 @@ impl Context {
|
||||||
|
|
||||||
fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
tag("<b>"),
|
tag_no_case("<b>"),
|
||||||
tag("</b>"),
|
tag_no_case("</b>"),
|
||||||
false,
|
false,
|
||||||
Matcher::new(
|
Matcher::new(
|
||||||
&self.partial(Self::inline_single),
|
&self.partial(Self::inline_single),
|
||||||
|
@ -799,11 +951,10 @@ impl Context {
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: CommonMark flanking rules
|
|
||||||
fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
tag("**"),
|
(tag("**"), FlankingRule::Lenient),
|
||||||
tag("**"),
|
(tag("**"), FlankingRule::Lenient),
|
||||||
true,
|
true,
|
||||||
Matcher::new(
|
Matcher::new(
|
||||||
&self.partial(Self::inline_single),
|
&self.partial(Self::inline_single),
|
||||||
|
@ -816,11 +967,10 @@ impl Context {
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: CommonMark flanking rules
|
|
||||||
fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
tag("__"),
|
(tag("__"), FlankingRule::Strict),
|
||||||
tag("__"),
|
(tag("__"), FlankingRule::Strict),
|
||||||
true,
|
true,
|
||||||
Matcher::new(
|
Matcher::new(
|
||||||
&self.partial(Self::inline_single),
|
&self.partial(Self::inline_single),
|
||||||
|
@ -835,8 +985,8 @@ impl Context {
|
||||||
|
|
||||||
fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
tag("<i>"),
|
tag_no_case("<i>"),
|
||||||
tag("</i>"),
|
tag_no_case("</i>"),
|
||||||
false,
|
false,
|
||||||
Matcher::new(
|
Matcher::new(
|
||||||
&self.partial(Self::inline_single),
|
&self.partial(Self::inline_single),
|
||||||
|
@ -849,11 +999,10 @@ impl Context {
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: CommonMark flanking rules
|
|
||||||
fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
tag("*"),
|
(tag("*"), FlankingRule::Lenient),
|
||||||
tag("*"),
|
(tag("*"), FlankingRule::Lenient),
|
||||||
true,
|
true,
|
||||||
Matcher::new(
|
Matcher::new(
|
||||||
&self.partial(Self::inline_single),
|
&self.partial(Self::inline_single),
|
||||||
|
@ -866,11 +1015,10 @@ impl Context {
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: CommonMark flanking rules
|
|
||||||
fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
tag("_"),
|
(tag("_"), FlankingRule::Strict),
|
||||||
tag("_"),
|
(tag("_"), FlankingRule::Strict),
|
||||||
true,
|
true,
|
||||||
Matcher::new(
|
Matcher::new(
|
||||||
&self.partial(Self::inline_single),
|
&self.partial(Self::inline_single),
|
||||||
|
@ -885,8 +1033,8 @@ impl Context {
|
||||||
|
|
||||||
fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
tag("<s>"),
|
tag_no_case("<s>"),
|
||||||
tag("</s>"),
|
tag_no_case("</s>"),
|
||||||
false,
|
false,
|
||||||
Matcher::new(
|
Matcher::new(
|
||||||
&self.partial(Self::inline_single),
|
&self.partial(Self::inline_single),
|
||||||
|
@ -899,11 +1047,10 @@ impl Context {
|
||||||
)(input)
|
)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: CommonMark flanking rules
|
|
||||||
fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
self.tag_delimited(
|
self.tag_delimited(
|
||||||
tag("~~"),
|
(tag("~~"), FlankingRule::Lenient),
|
||||||
tag("~~"),
|
(tag("~~"), FlankingRule::Lenient),
|
||||||
true,
|
true,
|
||||||
Matcher::new(
|
Matcher::new(
|
||||||
&move |input| {
|
&move |input| {
|
||||||
|
@ -1037,20 +1184,42 @@ impl Context {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn shortcode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn shortcode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
// TODO: Fail when preceded by alphanumerics
|
if let (plain_out, Some(plain)) = map(
|
||||||
|
opt(recognize(tuple((
|
||||||
|
alphanumeric1_unicode,
|
||||||
|
self.partial(Self::shortcode_emoji),
|
||||||
|
)))),
|
||||||
|
|o| o.map(Span::into_fragment),
|
||||||
|
)(input)?
|
||||||
|
{
|
||||||
|
return Ok((plain_out, Token::PlainText(plain.into())));
|
||||||
|
}
|
||||||
|
|
||||||
let (input, _) = tag(":")(input)?;
|
let (input, _) = tag(":")(input)?;
|
||||||
let (input, shortcode) = map(
|
let (input, shortcode) = map(
|
||||||
recognize(many1(alt((alphanumeric1, recognize(one_of("_+-")))))),
|
recognize(many1(alt((
|
||||||
|
alphanumeric1_unicode,
|
||||||
|
recognize(one_of("_+-")),
|
||||||
|
)))),
|
||||||
Span::into_fragment,
|
Span::into_fragment,
|
||||||
)(input)?;
|
)(input)?;
|
||||||
let (input, _) = tag(":")(input)?;
|
let (input, _) = tag(":")(input)?;
|
||||||
let (input, _) = not(alphanumeric1)(input)?;
|
let (input, _) = not(alphanumeric1_unicode)(input)?;
|
||||||
|
|
||||||
Ok((input, Token::ShortcodeEmoji(shortcode.into())))
|
Ok((input, Token::ShortcodeEmoji(shortcode.into())))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn tag_mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn tag_mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
// TODO: Escaping and skip when preceded by alphanumerics
|
if let (plain_out, Some(plain)) = map(
|
||||||
|
opt(recognize(tuple((
|
||||||
|
alt((tag("\\"), alphanumeric1_unicode)),
|
||||||
|
self.partial(Self::tag_mention),
|
||||||
|
)))),
|
||||||
|
|o| o.map(Span::into_fragment),
|
||||||
|
)(input)?
|
||||||
|
{
|
||||||
|
return Ok((plain_out, Token::PlainText(plain.into())));
|
||||||
|
}
|
||||||
|
|
||||||
let tags = one_of("@!");
|
let tags = one_of("@!");
|
||||||
let (input, mention_type) = map(tags, |c| match c {
|
let (input, mention_type) = map(tags, |c| match c {
|
||||||
|
@ -1123,12 +1292,12 @@ fn protocol(input: Span) -> IResult<Span, Span> {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn url_chars_base(input: Span) -> IResult<Span, Span> {
|
fn url_chars_base(input: Span) -> IResult<Span, Span> {
|
||||||
recognize(alt((
|
alt((
|
||||||
alpha1,
|
alphanumeric1_unicode,
|
||||||
recognize(tuple((tag("["), many_till(url_chars_base, tag("]"))))),
|
recognize(tuple((tag("["), many_till(url_chars_base, tag("]"))))),
|
||||||
recognize(tuple((tag("("), many_till(url_chars_base, tag(")"))))),
|
recognize(tuple((tag("("), many_till(url_chars_base, tag(")"))))),
|
||||||
recognize(one_of(".,_/:%#$&?!~=+-@")),
|
recognize(one_of(".,_/:%#$&?!~=+-@")),
|
||||||
)))(input)
|
))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -1221,6 +1390,21 @@ mod test {
|
||||||
Token::Italic(Box::new(Token::PlainText("italic".into()))),
|
Token::Italic(Box::new(Token::PlainText("italic".into()))),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
parse_full(r#"* italic *"#),
|
||||||
|
Token::PlainText("* italic *".into())
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
parse_full(r#"_ italic *"#),
|
||||||
|
Token::PlainText("_ italic *".into())
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
parse_full(r#"*"italic"*"#),
|
||||||
|
Token::Italic(Box::new(Token::PlainText("\"italic\"".into())))
|
||||||
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_full(r#"not code `code` also not code"#),
|
parse_full(r#"not code `code` also not code"#),
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
|
@ -1356,6 +1540,47 @@ text</center>"#
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_link() {
|
fn parse_link() {
|
||||||
|
assert_eq!(
|
||||||
|
parse_full("IPv4 test: <https://0>"),
|
||||||
|
Token::Sequence(vec![
|
||||||
|
Token::PlainText("IPv4 test: ".into()),
|
||||||
|
Token::UrlNoEmbed("https://0".into())
|
||||||
|
])
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
parse_full("IPv4 test: <https://127.0.0.1>"),
|
||||||
|
Token::Sequence(vec![
|
||||||
|
Token::PlainText("IPv4 test: ".into()),
|
||||||
|
Token::UrlNoEmbed("https://127.0.0.1".into())
|
||||||
|
])
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
parse_full("IPv6 test: <https://[::2f:1]/nya>"),
|
||||||
|
Token::Sequence(vec![
|
||||||
|
Token::PlainText("IPv6 test: ".into()),
|
||||||
|
Token::UrlNoEmbed("https://[::2f:1]/nya".into())
|
||||||
|
])
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
parse_full("IPv6 test: https://[::2f:1]/nya"),
|
||||||
|
Token::Sequence(vec![
|
||||||
|
Token::PlainText("IPv6 test: ".into()),
|
||||||
|
Token::UrlRaw("https://[::2f:1]/nya".into())
|
||||||
|
])
|
||||||
|
);
|
||||||
|
|
||||||
|
// IDNs
|
||||||
|
assert_eq!(
|
||||||
|
parse_full("IDN test: https://www.háčkyčárky.cz/"),
|
||||||
|
Token::Sequence(vec![
|
||||||
|
Token::PlainText("IDN test: ".into()),
|
||||||
|
Token::UrlRaw("https://www.háčkyčárky.cz/".into())
|
||||||
|
])
|
||||||
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_full("Link test: [label](https://example.com)"),
|
parse_full("Link test: [label](https://example.com)"),
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
|
@ -1440,6 +1665,11 @@ text</center>"#
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
parse_full("email@notactuallyamenmtion.org"),
|
||||||
|
Token::PlainText("email@notactuallyamenmtion.org".into())
|
||||||
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_full("hgsjlkdsa @tag fgahjsdkd"),
|
parse_full("hgsjlkdsa @tag fgahjsdkd"),
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
|
@ -1532,6 +1762,32 @@ text</center>"#
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_shortcodes() {
|
||||||
|
assert_eq!(
|
||||||
|
parse_full(":bottom:"),
|
||||||
|
Token::ShortcodeEmoji("bottom".into())
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
parse_full(":bottom::blobfox:"),
|
||||||
|
Token::Sequence(vec![
|
||||||
|
Token::ShortcodeEmoji("bottom".into()),
|
||||||
|
Token::ShortcodeEmoji("blobfox".into())
|
||||||
|
])
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
parse_full(":bottom:blobfox"),
|
||||||
|
Token::PlainText(":bottom:blobfox".into())
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
parse_full("bottom:blobfox:"),
|
||||||
|
Token::PlainText("bottom:blobfox:".into())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_emoji() {
|
fn parse_emoji() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|
Loading…
Reference in New Issue