Fixed URL parsing and initial flanking rules implementation
This commit is contained in:
parent
26bd6fe4b2
commit
d0d977e6eb
|
@ -1,19 +1,20 @@
|
|||
use either::Either;
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::bytes::complete::{tag, tag_no_case};
|
||||
use nom::character::complete::{
|
||||
alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of, space1,
|
||||
tab,
|
||||
alpha1, alphanumeric1, anychar, char as one_char, char, line_ending, not_line_ending, one_of,
|
||||
satisfy, space1, tab,
|
||||
};
|
||||
use nom::combinator::{eof, fail, map, not, opt, recognize};
|
||||
use nom::error::ErrorKind;
|
||||
use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
|
||||
use nom::sequence::tuple;
|
||||
use nom::{IResult, Offset, Slice};
|
||||
use nom::{Compare, IResult, Offset, Slice};
|
||||
use nom_locate::LocatedSpan;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::convert::{identity, Infallible};
|
||||
use std::marker::PhantomData;
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||
|
@ -73,6 +74,80 @@ pub enum Token<'a> {
|
|||
}
|
||||
|
||||
impl Token<'_> {
|
||||
fn str_content_left(&self) -> Option<&str> {
|
||||
match self {
|
||||
Token::PlainText(text) => Some(text.as_ref()),
|
||||
Token::Sequence(tokens) => tokens.first().and_then(Token::str_content_left),
|
||||
Token::Quote(inner) => inner.str_content_left(),
|
||||
Token::Small(inner) => inner.str_content_left(),
|
||||
Token::BoldItalic(inner) => inner.str_content_left(),
|
||||
Token::Bold(inner) => inner.str_content_left(),
|
||||
Token::Italic(inner) => inner.str_content_left(),
|
||||
Token::Center(inner) => inner.str_content_left(),
|
||||
Token::Strikethrough(inner) => inner.str_content_left(),
|
||||
Token::PlainTag(tag) => Some(tag.as_ref()),
|
||||
Token::UrlRaw(url) => Some(url.as_ref()),
|
||||
Token::UrlNoEmbed(url) => Some(url.as_ref()),
|
||||
Token::Link { label, .. } => label.str_content_left(),
|
||||
Token::Function { inner, .. } => inner.str_content_left(),
|
||||
Token::Mention { name, .. } => Some(name.as_ref()),
|
||||
Token::UnicodeEmoji(code) => Some(code.as_ref()),
|
||||
Token::ShortcodeEmoji(_) => None,
|
||||
Token::Hashtag(tag) => Some(tag.as_ref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn str_content_right(&self) -> Option<&str> {
|
||||
match self {
|
||||
Token::PlainText(text) => Some(text.as_ref()),
|
||||
Token::Sequence(tokens) => tokens.last().and_then(Token::str_content_right),
|
||||
Token::Quote(inner) => inner.str_content_right(),
|
||||
Token::Small(inner) => inner.str_content_right(),
|
||||
Token::BoldItalic(inner) => inner.str_content_right(),
|
||||
Token::Bold(inner) => inner.str_content_right(),
|
||||
Token::Italic(inner) => inner.str_content_right(),
|
||||
Token::Center(inner) => inner.str_content_right(),
|
||||
Token::Strikethrough(inner) => inner.str_content_right(),
|
||||
Token::PlainTag(tag) => Some(tag.as_ref()),
|
||||
Token::UrlRaw(url) => Some(url.as_ref()),
|
||||
Token::UrlNoEmbed(url) => Some(url.as_ref()),
|
||||
Token::Link { label, .. } => label.str_content_right(),
|
||||
Token::Function { inner, .. } => inner.str_content_right(),
|
||||
Token::Mention { name, .. } => Some(name.as_ref()),
|
||||
Token::UnicodeEmoji(code) => Some(code.as_ref()),
|
||||
Token::Hashtag(tag) => Some(tag.as_ref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn inner(&self) -> Token {
|
||||
match self {
|
||||
plain @ Token::PlainText(_) => plain.clone(),
|
||||
sequence @ Token::Sequence(_) => sequence.clone(),
|
||||
Token::Quote(inner) => inner.inner(),
|
||||
Token::Small(inner) => inner.inner(),
|
||||
Token::BoldItalic(inner) => inner.inner(),
|
||||
Token::Bold(inner) => inner.inner(),
|
||||
Token::Italic(inner) => inner.inner(),
|
||||
Token::Center(inner) => inner.inner(),
|
||||
Token::Strikethrough(inner) => inner.inner(),
|
||||
Token::PlainTag(text) => Token::PlainText(text.clone()),
|
||||
Token::InlineCode(code) => Token::PlainText(code.clone()),
|
||||
Token::InlineMath(math) => Token::PlainText(math.clone()),
|
||||
Token::UrlRaw(url) => Token::PlainText(url.clone()),
|
||||
Token::UrlNoEmbed(url) => Token::PlainText(url.clone()),
|
||||
Token::Link { label, .. } => label.inner(),
|
||||
Token::BlockCode { inner, .. } => Token::PlainText(inner.clone()),
|
||||
Token::BlockMath(math) => Token::PlainText(math.clone()),
|
||||
Token::Function { inner, .. } => inner.inner(),
|
||||
Token::Mention { name, .. } => Token::PlainText(name.clone()),
|
||||
Token::UnicodeEmoji(code) => Token::PlainText(code.clone()),
|
||||
Token::ShortcodeEmoji(shortcode) => Token::PlainText(shortcode.clone()),
|
||||
Token::Hashtag(tag) => Token::PlainText(tag.clone()),
|
||||
}
|
||||
}
|
||||
|
||||
fn owned(&self) -> Token<'static> {
|
||||
match self {
|
||||
Token::PlainText(text) => Token::PlainText(Cow::Owned(text.clone().into_owned())),
|
||||
|
@ -129,7 +204,7 @@ impl Token<'_> {
|
|||
Token::ShortcodeEmoji(shortcode) => {
|
||||
Token::ShortcodeEmoji(Cow::Owned(shortcode.clone().into_owned()))
|
||||
}
|
||||
Token::Hashtag(url) => Token::Hashtag(Cow::Owned(url.clone().into_owned())),
|
||||
Token::Hashtag(tag) => Token::Hashtag(Cow::Owned(tag.clone().into_owned())),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -245,6 +320,16 @@ fn collect_char_sequence<'a>(
|
|||
move |chars| func(Cow::Owned(chars.collect()))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn alpha1_unicode(input: Span) -> IResult<Span, Span> {
|
||||
recognize(many1_count(satisfy(char::is_alphanumeric)))(input)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn alphanumeric1_unicode(input: Span) -> IResult<Span, Span> {
|
||||
recognize(many1_count(satisfy(char::is_alphanumeric)))(input)
|
||||
}
|
||||
|
||||
fn spliced<'a>(
|
||||
segments: &[Span<'a>],
|
||||
func: impl Fn(Span) -> IResult<Span, Token>,
|
||||
|
@ -316,15 +401,16 @@ fn space(input: Span) -> IResult<Span, Token> {
|
|||
Ok((input, Token::PlainText(frag.into_fragment().into())))
|
||||
}
|
||||
|
||||
struct Matcher<'a, 'b, T> {
|
||||
#[derive(Copy, Clone)]
|
||||
struct Matcher<'a, 'b, T: Clone> {
|
||||
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
|
||||
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
|
||||
_phantom_closure: std::marker::PhantomData<&'a ()>,
|
||||
_phantom_data: std::marker::PhantomData<&'b ()>,
|
||||
_phantom_output: std::marker::PhantomData<fn() -> T>,
|
||||
_phantom_closure: PhantomData<&'a ()>,
|
||||
_phantom_data: PhantomData<&'b ()>,
|
||||
_phantom_output: PhantomData<fn() -> T>,
|
||||
}
|
||||
|
||||
impl<'a, 'b, T> Matcher<'a, 'b, T> {
|
||||
impl<'a, 'b, T: Clone> Matcher<'a, 'b, T> {
|
||||
fn new(
|
||||
matcher_inner: &'a (dyn Fn(Span<'b>) -> IResult<Span<'b>, T> + 'a),
|
||||
collector: &'a (dyn Fn(&mut dyn Iterator<Item = T>) -> Token<'b> + 'a),
|
||||
|
@ -332,9 +418,9 @@ impl<'a, 'b, T> Matcher<'a, 'b, T> {
|
|||
Self {
|
||||
matcher_inner,
|
||||
collector,
|
||||
_phantom_closure: std::marker::PhantomData,
|
||||
_phantom_data: std::marker::PhantomData,
|
||||
_phantom_output: std::marker::PhantomData,
|
||||
_phantom_closure: PhantomData,
|
||||
_phantom_data: PhantomData,
|
||||
_phantom_output: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -345,33 +431,60 @@ impl<'a, 'b> Matcher<'a, 'b, Infallible> {
|
|||
Self {
|
||||
matcher_inner: &fail::<_, Infallible, _>,
|
||||
collector: &|_| unreachable!(),
|
||||
_phantom_closure: std::marker::PhantomData,
|
||||
_phantom_data: std::marker::PhantomData,
|
||||
_phantom_output: std::marker::PhantomData,
|
||||
_phantom_closure: PhantomData,
|
||||
_phantom_data: PhantomData,
|
||||
_phantom_output: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Context;
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
enum FlankingRule {
|
||||
Lenient,
|
||||
Strict,
|
||||
DontCare,
|
||||
}
|
||||
|
||||
struct FlankingDelim<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>>(
|
||||
T,
|
||||
FlankingRule,
|
||||
PhantomData<&'a ()>,
|
||||
);
|
||||
|
||||
impl<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>> From<(T, FlankingRule)>
|
||||
for FlankingDelim<'a, T>
|
||||
{
|
||||
fn from((func, rule): (T, FlankingRule)) -> Self {
|
||||
FlankingDelim(func, rule, PhantomData)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Fn(Span<'a>) -> IResult<Span<'a>, Span<'a>>> From<T> for FlankingDelim<'a, T> {
|
||||
fn from(func: T) -> Self {
|
||||
FlankingDelim(func, FlankingRule::DontCare, PhantomData)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Context;
|
||||
|
||||
impl Context {
|
||||
#[inline]
|
||||
const fn partial(
|
||||
fn partial(
|
||||
&self,
|
||||
func: impl for<'a> Fn(&Self, Span<'a>) -> IResult<Span<'a>, Token<'a>> + 'static,
|
||||
) -> impl for<'a> Fn(Span<'a>) -> IResult<Span<'a>, Token<'a>> + '_ {
|
||||
move |input| func(self, input)
|
||||
}
|
||||
|
||||
fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
pub fn full<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
map(many1(self.partial(Self::full_single)), Token::Sequence)(input)
|
||||
}
|
||||
|
||||
fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
pub fn inline<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
map(many1(self.partial(Self::inline_single)), Token::Sequence)(input)
|
||||
}
|
||||
|
||||
fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
pub fn inline_label_safe<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
map(
|
||||
many1(self.partial(Self::inline_label_safe_single)),
|
||||
Token::Sequence,
|
||||
|
@ -606,14 +719,21 @@ impl Context {
|
|||
}
|
||||
|
||||
#[inline]
|
||||
fn tag_delimited<'a, 'b: 'a, T, S>(
|
||||
fn tag_delimited<'a, 'b: 'a, T: Clone, S: Clone, FOpen, FClose>(
|
||||
&'a self,
|
||||
opening_tag: impl Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
|
||||
closing_tag: impl Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
|
||||
opening_tag: impl Into<FlankingDelim<'b, FOpen>> + 'a,
|
||||
closing_tag: impl Into<FlankingDelim<'b, FClose>> + 'a,
|
||||
escape: bool,
|
||||
matcher: Matcher<'a, 'b, T>,
|
||||
fallback: Matcher<'a, 'b, S>,
|
||||
) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token<'b>> + '_ {
|
||||
) -> impl Fn(Span<'b>) -> IResult<Span<'b>, Token<'b>> + '_
|
||||
where
|
||||
FOpen: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
|
||||
FClose: Fn(Span<'b>) -> IResult<Span<'b>, Span<'b>> + 'a,
|
||||
{
|
||||
let FlankingDelim(opening_tag, opening_rule, ..) = opening_tag.into();
|
||||
let FlankingDelim(closing_tag, closing_rule, ..) = closing_tag.into();
|
||||
|
||||
move |input| {
|
||||
if escape {
|
||||
if let Ok((input_escaped, (_, mark))) = tuple((tag("\\"), &opening_tag))(input) {
|
||||
|
@ -662,10 +782,44 @@ impl Context {
|
|||
));
|
||||
}
|
||||
|
||||
let (input, (inner, _)) = res?;
|
||||
let (input, (inner, closing)) = res?;
|
||||
let mut inner = inner.into_iter().map(|(_, t)| t);
|
||||
|
||||
Ok((input, (matcher.collector)(&mut inner)))
|
||||
let inner_tok = (matcher.collector)(&mut inner);
|
||||
|
||||
let correct_left_flanking =
|
||||
if let FlankingRule::Lenient | FlankingRule::Strict = opening_rule {
|
||||
let text_left = inner_tok.str_content_left();
|
||||
|
||||
!(text_left.is_some_and(|s| s.starts_with(char::is_whitespace))
|
||||
|| text_left.is_none())
|
||||
} else {
|
||||
true
|
||||
};
|
||||
|
||||
let correct_right_flanking =
|
||||
if let FlankingRule::Lenient | FlankingRule::Strict = closing_rule {
|
||||
let text_right = inner_tok.str_content_right();
|
||||
!(text_right.is_some_and(|s| s.ends_with(char::is_whitespace))
|
||||
|| text_right.is_none())
|
||||
} else {
|
||||
true
|
||||
};
|
||||
|
||||
// TODO: Unfinished flanking rules
|
||||
let correct_flanking = correct_left_flanking && correct_right_flanking;
|
||||
|
||||
if !correct_flanking {
|
||||
return Ok((
|
||||
input,
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText(begin.fragment_between(&post_open).into()),
|
||||
inner_tok.inner().owned(),
|
||||
Token::PlainText(closing.into_fragment().into()),
|
||||
]),
|
||||
));
|
||||
}
|
||||
Ok((input, Token::Sequence(vec![inner_tok])))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -720,12 +874,12 @@ impl Context {
|
|||
}
|
||||
|
||||
fn tag_plain<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
let opening_tag = &tag("<small>");
|
||||
let closing_tag = &tag("</small>");
|
||||
let opening_tag = &tag("<plain>");
|
||||
let closing_tag = &tag("</plain>");
|
||||
|
||||
let (input, _) = opening_tag(input)?;
|
||||
let (input, text) = map(
|
||||
recognize(many1(tuple((not_line_ending, not(closing_tag))))),
|
||||
recognize(many1(tuple((not(line_ending), not(closing_tag), anychar)))),
|
||||
Span::into_fragment,
|
||||
)(input)?;
|
||||
let (input, _) = closing_tag(input)?;
|
||||
|
@ -735,8 +889,8 @@ impl Context {
|
|||
|
||||
fn tag_small<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
self.tag_delimited(
|
||||
tag("<small>"),
|
||||
tag("</small>"),
|
||||
tag_no_case("<small>"),
|
||||
tag_no_case("</small>"),
|
||||
false,
|
||||
Matcher::new(
|
||||
&self.partial(Self::inline_single),
|
||||
|
@ -749,11 +903,10 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
// TODO: CommonMark flanking rules
|
||||
fn tag_bold_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
self.tag_delimited(
|
||||
tag("***"),
|
||||
tag("***"),
|
||||
(tag("***"), FlankingRule::Lenient),
|
||||
(tag("***"), FlankingRule::Lenient),
|
||||
true,
|
||||
Matcher::new(
|
||||
&self.partial(Self::inline_single),
|
||||
|
@ -766,11 +919,10 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
// TODO: CommonMark flanking rules
|
||||
fn tag_bold_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
self.tag_delimited(
|
||||
tag("___"),
|
||||
tag("___"),
|
||||
(tag("___"), FlankingRule::Strict),
|
||||
(tag("___"), FlankingRule::Strict),
|
||||
true,
|
||||
Matcher::new(
|
||||
&self.partial(Self::inline_single),
|
||||
|
@ -785,8 +937,8 @@ impl Context {
|
|||
|
||||
fn tag_bold<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
self.tag_delimited(
|
||||
tag("<b>"),
|
||||
tag("</b>"),
|
||||
tag_no_case("<b>"),
|
||||
tag_no_case("</b>"),
|
||||
false,
|
||||
Matcher::new(
|
||||
&self.partial(Self::inline_single),
|
||||
|
@ -799,11 +951,10 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
// TODO: CommonMark flanking rules
|
||||
fn tag_bold_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
self.tag_delimited(
|
||||
tag("**"),
|
||||
tag("**"),
|
||||
(tag("**"), FlankingRule::Lenient),
|
||||
(tag("**"), FlankingRule::Lenient),
|
||||
true,
|
||||
Matcher::new(
|
||||
&self.partial(Self::inline_single),
|
||||
|
@ -816,11 +967,10 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
// TODO: CommonMark flanking rules
|
||||
fn tag_bold_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
self.tag_delimited(
|
||||
tag("__"),
|
||||
tag("__"),
|
||||
(tag("__"), FlankingRule::Strict),
|
||||
(tag("__"), FlankingRule::Strict),
|
||||
true,
|
||||
Matcher::new(
|
||||
&self.partial(Self::inline_single),
|
||||
|
@ -835,8 +985,8 @@ impl Context {
|
|||
|
||||
fn tag_italic<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
self.tag_delimited(
|
||||
tag("<i>"),
|
||||
tag("</i>"),
|
||||
tag_no_case("<i>"),
|
||||
tag_no_case("</i>"),
|
||||
false,
|
||||
Matcher::new(
|
||||
&self.partial(Self::inline_single),
|
||||
|
@ -849,11 +999,10 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
// TODO: CommonMark flanking rules
|
||||
fn tag_italic_asterisk<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
self.tag_delimited(
|
||||
tag("*"),
|
||||
tag("*"),
|
||||
(tag("*"), FlankingRule::Lenient),
|
||||
(tag("*"), FlankingRule::Lenient),
|
||||
true,
|
||||
Matcher::new(
|
||||
&self.partial(Self::inline_single),
|
||||
|
@ -866,11 +1015,10 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
// TODO: CommonMark flanking rules
|
||||
fn tag_italic_underscore<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
self.tag_delimited(
|
||||
tag("_"),
|
||||
tag("_"),
|
||||
(tag("_"), FlankingRule::Strict),
|
||||
(tag("_"), FlankingRule::Strict),
|
||||
true,
|
||||
Matcher::new(
|
||||
&self.partial(Self::inline_single),
|
||||
|
@ -885,8 +1033,8 @@ impl Context {
|
|||
|
||||
fn tag_strikethrough<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
self.tag_delimited(
|
||||
tag("<s>"),
|
||||
tag("</s>"),
|
||||
tag_no_case("<s>"),
|
||||
tag_no_case("</s>"),
|
||||
false,
|
||||
Matcher::new(
|
||||
&self.partial(Self::inline_single),
|
||||
|
@ -899,11 +1047,10 @@ impl Context {
|
|||
)(input)
|
||||
}
|
||||
|
||||
// TODO: CommonMark flanking rules
|
||||
fn tag_strikethrough_tilde<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
self.tag_delimited(
|
||||
tag("~~"),
|
||||
tag("~~"),
|
||||
(tag("~~"), FlankingRule::Lenient),
|
||||
(tag("~~"), FlankingRule::Lenient),
|
||||
true,
|
||||
Matcher::new(
|
||||
&move |input| {
|
||||
|
@ -1037,20 +1184,42 @@ impl Context {
|
|||
}
|
||||
|
||||
fn shortcode_emoji<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
// TODO: Fail when preceded by alphanumerics
|
||||
if let (plain_out, Some(plain)) = map(
|
||||
opt(recognize(tuple((
|
||||
alphanumeric1_unicode,
|
||||
self.partial(Self::shortcode_emoji),
|
||||
)))),
|
||||
|o| o.map(Span::into_fragment),
|
||||
)(input)?
|
||||
{
|
||||
return Ok((plain_out, Token::PlainText(plain.into())));
|
||||
}
|
||||
|
||||
let (input, _) = tag(":")(input)?;
|
||||
let (input, shortcode) = map(
|
||||
recognize(many1(alt((alphanumeric1, recognize(one_of("_+-")))))),
|
||||
recognize(many1(alt((
|
||||
alphanumeric1_unicode,
|
||||
recognize(one_of("_+-")),
|
||||
)))),
|
||||
Span::into_fragment,
|
||||
)(input)?;
|
||||
let (input, _) = tag(":")(input)?;
|
||||
let (input, _) = not(alphanumeric1)(input)?;
|
||||
let (input, _) = not(alphanumeric1_unicode)(input)?;
|
||||
|
||||
Ok((input, Token::ShortcodeEmoji(shortcode.into())))
|
||||
}
|
||||
|
||||
fn tag_mention<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||
// TODO: Escaping and skip when preceded by alphanumerics
|
||||
if let (plain_out, Some(plain)) = map(
|
||||
opt(recognize(tuple((
|
||||
alt((tag("\\"), alphanumeric1_unicode)),
|
||||
self.partial(Self::tag_mention),
|
||||
)))),
|
||||
|o| o.map(Span::into_fragment),
|
||||
)(input)?
|
||||
{
|
||||
return Ok((plain_out, Token::PlainText(plain.into())));
|
||||
}
|
||||
|
||||
let tags = one_of("@!");
|
||||
let (input, mention_type) = map(tags, |c| match c {
|
||||
|
@ -1123,12 +1292,12 @@ fn protocol(input: Span) -> IResult<Span, Span> {
|
|||
|
||||
#[inline]
|
||||
fn url_chars_base(input: Span) -> IResult<Span, Span> {
|
||||
recognize(alt((
|
||||
alpha1,
|
||||
alt((
|
||||
alphanumeric1_unicode,
|
||||
recognize(tuple((tag("["), many_till(url_chars_base, tag("]"))))),
|
||||
recognize(tuple((tag("("), many_till(url_chars_base, tag(")"))))),
|
||||
recognize(one_of(".,_/:%#$&?!~=+-@")),
|
||||
)))(input)
|
||||
))(input)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -1221,6 +1390,21 @@ mod test {
|
|||
Token::Italic(Box::new(Token::PlainText("italic".into()))),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"* italic *"#),
|
||||
Token::PlainText("* italic *".into())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"_ italic *"#),
|
||||
Token::PlainText("_ italic *".into())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"*"italic"*"#),
|
||||
Token::Italic(Box::new(Token::PlainText("\"italic\"".into())))
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(r#"not code `code` also not code"#),
|
||||
Token::Sequence(vec![
|
||||
|
@ -1356,6 +1540,47 @@ text</center>"#
|
|||
|
||||
#[test]
|
||||
fn parse_link() {
|
||||
assert_eq!(
|
||||
parse_full("IPv4 test: <https://0>"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("IPv4 test: ".into()),
|
||||
Token::UrlNoEmbed("https://0".into())
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("IPv4 test: <https://127.0.0.1>"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("IPv4 test: ".into()),
|
||||
Token::UrlNoEmbed("https://127.0.0.1".into())
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("IPv6 test: <https://[::2f:1]/nya>"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("IPv6 test: ".into()),
|
||||
Token::UrlNoEmbed("https://[::2f:1]/nya".into())
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("IPv6 test: https://[::2f:1]/nya"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("IPv6 test: ".into()),
|
||||
Token::UrlRaw("https://[::2f:1]/nya".into())
|
||||
])
|
||||
);
|
||||
|
||||
// IDNs
|
||||
assert_eq!(
|
||||
parse_full("IDN test: https://www.háčkyčárky.cz/"),
|
||||
Token::Sequence(vec![
|
||||
Token::PlainText("IDN test: ".into()),
|
||||
Token::UrlRaw("https://www.háčkyčárky.cz/".into())
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("Link test: [label](https://example.com)"),
|
||||
Token::Sequence(vec![
|
||||
|
@ -1440,6 +1665,11 @@ text</center>"#
|
|||
}
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("email@notactuallyamenmtion.org"),
|
||||
Token::PlainText("email@notactuallyamenmtion.org".into())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("hgsjlkdsa @tag fgahjsdkd"),
|
||||
Token::Sequence(vec![
|
||||
|
@ -1532,6 +1762,32 @@ text</center>"#
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_shortcodes() {
|
||||
assert_eq!(
|
||||
parse_full(":bottom:"),
|
||||
Token::ShortcodeEmoji("bottom".into())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(":bottom::blobfox:"),
|
||||
Token::Sequence(vec![
|
||||
Token::ShortcodeEmoji("bottom".into()),
|
||||
Token::ShortcodeEmoji("blobfox".into())
|
||||
])
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full(":bottom:blobfox"),
|
||||
Token::PlainText(":bottom:blobfox".into())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_full("bottom:blobfox:"),
|
||||
Token::PlainText("bottom:blobfox:".into())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_emoji() {
|
||||
assert_eq!(
|
||||
|
|
Loading…
Reference in New Issue