Stricter URL parsing
This commit is contained in:
parent
d2bc679740
commit
c4fd99fa45
|
@ -991,7 +991,10 @@ impl Context {
|
||||||
let (input, url_span) = recognize(tuple((protocol, url_chars(tag(">"), true))))(input)?;
|
let (input, url_span) = recognize(tuple((protocol, url_chars(tag(">"), true))))(input)?;
|
||||||
let (input, _) = tag(">")(input)?;
|
let (input, _) = tag(">")(input)?;
|
||||||
|
|
||||||
Ok((input, Token::UrlRaw(Cow::from(url_span.into_fragment()))))
|
Ok((
|
||||||
|
input,
|
||||||
|
Token::UrlNoEmbed(Cow::from(url_span.into_fragment())),
|
||||||
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
fn link<'a>(&self, input: Span<'a>) -> IResult<Span<'a>, Token<'a>> {
|
||||||
|
@ -1120,7 +1123,12 @@ fn protocol(input: Span) -> IResult<Span, Span> {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn url_chars_base(input: Span) -> IResult<Span, Span> {
|
fn url_chars_base(input: Span) -> IResult<Span, Span> {
|
||||||
recognize(alt((alpha1, recognize(one_of(".,_/:%#$&?!~=+-()[]@")))))(input)
|
recognize(alt((
|
||||||
|
alpha1,
|
||||||
|
recognize(tuple((tag("["), many_till(url_chars_base, tag("]"))))),
|
||||||
|
recognize(tuple((tag("("), many_till(url_chars_base, tag(")"))))),
|
||||||
|
recognize(one_of(".,_/:%#$&?!~=+-@")),
|
||||||
|
)))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -1128,26 +1136,10 @@ fn url_chars<'a, T: 'a>(
|
||||||
terminator: impl Fn(Span<'a>) -> IResult<Span<'a>, T> + 'a,
|
terminator: impl Fn(Span<'a>) -> IResult<Span<'a>, T> + 'a,
|
||||||
spaces: bool,
|
spaces: bool,
|
||||||
) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, Span<'a>> + 'a {
|
) -> impl FnMut(Span<'a>) -> IResult<Span<'a>, Span<'a>> + 'a {
|
||||||
let terminating = move |input| {
|
|
||||||
tuple((
|
|
||||||
&terminator,
|
|
||||||
alt((
|
|
||||||
space1,
|
|
||||||
line_ending,
|
|
||||||
eof,
|
|
||||||
recognize(one_of("([<'\"")),
|
|
||||||
recognize(tuple((
|
|
||||||
alt((alpha1, recognize(one_of("*")))),
|
|
||||||
alt((space1, line_ending, eof)),
|
|
||||||
))),
|
|
||||||
)),
|
|
||||||
))(input)
|
|
||||||
};
|
|
||||||
|
|
||||||
let chars = tuple((
|
let chars = tuple((
|
||||||
not(tuple((space1, eof))),
|
not(tuple((space1, eof))),
|
||||||
not(tuple((space1, tag("\"")))),
|
not(tuple((space1, tag("\"")))),
|
||||||
not(tuple((opt(space1), terminating))),
|
not(tuple((opt(space1), terminator))),
|
||||||
alt((url_chars_base, if spaces { space1 } else { fail })),
|
alt((url_chars_base, if spaces { space1 } else { fail })),
|
||||||
));
|
));
|
||||||
|
|
||||||
|
@ -1167,49 +1159,48 @@ mod test {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_url_chars() {
|
fn parse_url_chars() {
|
||||||
let test1 = "https://en.wikipedia.org/wiki/Sandbox_(computer_security))";
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
url_chars(tag(")"), true)(Span::new(
|
||||||
|
"https://en.wikipedia.org/wiki/Sandbox_(computer_security))"
|
||||||
|
))
|
||||||
|
.unwrap()
|
||||||
|
.1
|
||||||
|
.into_fragment(),
|
||||||
|
"https://en.wikipedia.org/wiki/Sandbox_(computer_security)"
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
url_chars(tag(")"), true)(Span::new(
|
||||||
|
"https://en.wikipedia.org/wiki/Sandbox_(computer_security)))"
|
||||||
|
))
|
||||||
|
.unwrap()
|
||||||
|
.1
|
||||||
|
.into_fragment(),
|
||||||
"https://en.wikipedia.org/wiki/Sandbox_(computer_security)",
|
"https://en.wikipedia.org/wiki/Sandbox_(computer_security)",
|
||||||
url_chars(tag(")"), true)(Span::new(test1))
|
|
||||||
.unwrap()
|
|
||||||
.1
|
|
||||||
.into_fragment()
|
|
||||||
);
|
);
|
||||||
|
|
||||||
let test2 = "https://en.wikipedia.org/wiki/Sandbox_(computer_security)))";
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
"https://en.wikipedia.org/wiki/Sandbox_(computer_security))",
|
url_chars(tag(")"), true)(Span::new("https://cs.wikipedia.org/wiki/Among_Us "))
|
||||||
url_chars(tag(")"), true)(Span::new(test2))
|
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.1
|
.1
|
||||||
.into_fragment()
|
.into_fragment(),
|
||||||
);
|
|
||||||
|
|
||||||
let test3 = "https://en.wikipedia.org/wiki/(";
|
|
||||||
assert_eq!(
|
|
||||||
test3,
|
|
||||||
url_chars(tag(")"), true)(Span::new(test3))
|
|
||||||
.unwrap()
|
|
||||||
.1
|
|
||||||
.into_fragment()
|
|
||||||
);
|
|
||||||
|
|
||||||
let test4 = "https://cs.wikipedia.org/wiki/Among_Us ";
|
|
||||||
assert_eq!(
|
|
||||||
"https://cs.wikipedia.org/wiki/Among_Us",
|
"https://cs.wikipedia.org/wiki/Among_Us",
|
||||||
url_chars(tag(")"), true)(Span::new(test4))
|
|
||||||
.unwrap()
|
|
||||||
.1
|
|
||||||
.into_fragment()
|
|
||||||
);
|
);
|
||||||
|
|
||||||
let test5 = "https://cs.wikipedia.org/wiki/Among Us )";
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
"https://cs.wikipedia.org/wiki/Among Us",
|
url_chars(tag(")"), true)(Span::new("https://cs.wikipedia.org/wiki/Among Us )"))
|
||||||
url_chars(tag(")"), true)(Span::new(test5))
|
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.1
|
.1
|
||||||
.into_fragment()
|
.into_fragment(),
|
||||||
|
"https://cs.wikipedia.org/wiki/Among Us"
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
url_chars(tag(")"), false)(Span::new("https://en.wikipedia.org/wiki/Among Us )"))
|
||||||
|
.unwrap()
|
||||||
|
.1
|
||||||
|
.into_fragment(),
|
||||||
|
"https://en.wikipedia.org/wiki/Among"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1381,6 +1372,20 @@ text</center>"#
|
||||||
])
|
])
|
||||||
);
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
parse_full("<https://example.com>"),
|
||||||
|
Token::UrlNoEmbed("https://example.com".into())
|
||||||
|
);
|
||||||
|
|
||||||
|
// Adjacent links okay
|
||||||
|
assert_eq!(
|
||||||
|
parse_full("<https://example.com/><https://awawa.gay/>"),
|
||||||
|
Token::Sequence(vec![
|
||||||
|
Token::UrlNoEmbed("https://example.com/".into()),
|
||||||
|
Token::UrlNoEmbed("https://awawa.gay/".into())
|
||||||
|
])
|
||||||
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_full("Link test: ?[label](https://awawa.gay)"),
|
parse_full("Link test: ?[label](https://awawa.gay)"),
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
|
@ -1393,6 +1398,32 @@ text</center>"#
|
||||||
])
|
])
|
||||||
);
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
parse_full("Link test: ?[label](https://awawa.gay)test"),
|
||||||
|
Token::Sequence(vec![
|
||||||
|
Token::PlainText("Link test: ".into()),
|
||||||
|
Token::Link {
|
||||||
|
label: Box::new(Token::PlainText("label".into())),
|
||||||
|
href: "https://awawa.gay".into(),
|
||||||
|
embed: false
|
||||||
|
},
|
||||||
|
Token::PlainText("test".into())
|
||||||
|
])
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
parse_full("Link test: (?[label](https://awawa.gay))"),
|
||||||
|
Token::Sequence(vec![
|
||||||
|
Token::PlainText("Link test: (".into()),
|
||||||
|
Token::Link {
|
||||||
|
label: Box::new(Token::PlainText("label".into())),
|
||||||
|
href: "https://awawa.gay".into(),
|
||||||
|
embed: false
|
||||||
|
},
|
||||||
|
Token::PlainText(")".into())
|
||||||
|
])
|
||||||
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
parse_full("Link test: ?[label](https://awawa.gay"), // Missing closing bracket
|
parse_full("Link test: ?[label](https://awawa.gay"), // Missing closing bracket
|
||||||
Token::Sequence(vec![
|
Token::Sequence(vec![
|
||||||
|
|
Loading…
Reference in New Issue