Compare commits

..

2 Commits

Author SHA1 Message Date
Natty c4a8ebebf3
MMM: Janky outer flanking rules implementation 2023-10-24 00:27:54 +02:00
Natty 42fa83c6e2
MMM: Fixed hashtag parsing 2023-10-23 23:52:02 +02:00
1 changed files with 57 additions and 9 deletions

View File

@ -6,8 +6,8 @@ use nom::character::complete::{
alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of, alpha1, alphanumeric1, anychar, char as one_char, line_ending, not_line_ending, one_of,
satisfy, space1, tab, satisfy, space1, tab,
}; };
use nom::combinator::{eof, fail, map, not, opt, recognize}; use nom::combinator::{eof, fail, map, not, opt, peek, recognize};
use nom::error::{ErrorKind, ParseError}; use nom::error::ErrorKind;
use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1}; use nom::multi::{many0, many0_count, many1, many1_count, many_till, separated_list1};
use nom::sequence::tuple; use nom::sequence::tuple;
use nom::{IResult, Offset, Parser, Slice}; use nom::{IResult, Offset, Parser, Slice};
@ -273,8 +273,11 @@ fn collect_char_sequence(
} }
#[inline] #[inline]
fn alpha1_unicode(input: Span) -> IResult<Span, Span> { fn space1_unicode(input: Span) -> IResult<Span, Span> {
recognize(many1_count(satisfy(char::is_alphanumeric)))(input) recognize(many1_count(tuple((
not(line_ending),
satisfy(char::is_whitespace),
))))(input)
} }
#[inline] #[inline]
@ -770,6 +773,14 @@ impl Context {
} }
} }
if let FlankingRule::Strict = opening_rule {
let (input, pre) =
opt(recognize(tuple((alphanumeric1_unicode, &opening_tag))))(input)?;
if let Some(pre_text) = pre {
return Ok((input, Token::PlainText(pre_text.into_fragment().into())));
}
}
let begin = input; let begin = input;
let (post_open, _) = opening_tag(input)?; let (post_open, _) = opening_tag(input)?;
@ -832,8 +843,12 @@ impl Context {
true true
}; };
// TODO: Unfinished flanking rules let (input, alphanum) = opt(peek(alphanumeric1_unicode))(input)?;
let correct_flanking = correct_left_flanking && correct_right_flanking; let correct_right_outer =
alphanum.is_none() || !matches!(closing_rule, FlankingRule::Strict);
let correct_flanking =
correct_left_flanking && correct_right_flanking && correct_right_outer;
if !correct_flanking { if !correct_flanking {
return Ok(( return Ok((
@ -1293,7 +1308,12 @@ impl Context {
} }
fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> { fn tag_hashtag<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token> {
// TODO: Skip when preceded by alphanumerics let (input, maybe_preceded) =
opt(recognize(tuple((alphanumeric1_unicode, tag("#")))))(input)?;
if let Some(preceded) = maybe_preceded {
return Ok((input, Token::PlainText(preceded.into_fragment().into())));
}
let (input, _) = tag("#")(input)?; let (input, _) = tag("#")(input)?;
@ -1347,8 +1367,8 @@ impl Context {
tag(""), tag(""),
))), ))),
recognize(tuple(( recognize(tuple((
not(space1), not(space1_unicode),
not_line_ending, not(line_ending),
not(one_of(".,:;!?#?/[]【】()「」()<>")), not(one_of(".,:;!?#?/[]【】()「」()<>")),
anychar, anychar,
))), ))),
@ -1501,6 +1521,20 @@ mod test {
Token::PlainText("* italic *".into()) Token::PlainText("* italic *".into())
); );
assert_eq!(
parse_full("snake_case_variable"),
Token::PlainText("snake_case_variable".into())
);
assert_eq!(
parse_full("intra*word*italic"),
Token::Sequence(vec![
Token::PlainText("intra".into()),
Token::Italic(Box::new(Token::PlainText("word".into()))),
Token::PlainText("italic".into())
])
);
assert_eq!( assert_eq!(
parse_full(r#"_ italic *"#), parse_full(r#"_ italic *"#),
Token::PlainText("_ italic *".into()) Token::PlainText("_ italic *".into())
@ -1699,6 +1733,20 @@ text</center>"#
]) ])
); );
assert_eq!(
parse_full("test #hashtag tail"),
Token::Sequence(vec![
Token::PlainText("test ".into()),
Token::Hashtag("hashtag".into()),
Token::PlainText(" tail".into())
])
);
assert_eq!(
parse_full("not#hashtag tail"),
Token::PlainText("not#hashtag tail".into())
);
assert_eq!( assert_eq!(
parse_full("<https://example.com>"), parse_full("<https://example.com>"),
Token::UrlNoEmbed("https://example.com".into()) Token::UrlNoEmbed("https://example.com".into())