FIX: bbcode URLs not handling paths correctly (#21215)

Due to the order we were parsing markdown, bbcode [url] elements were not
handled properly.

`[url]https://example.com/path[/url]` was not currectly parsing cause
linkify was detecting the url as: `https://example.com/path[/url]` which is
legit.

To resolve this I swapped url to use a replace rule, and instead re-parsed
the internal payload and injected the tokens in.

This fix is complex cause we support stuff like

`[url][b]test.com[/b][/url]`

So we need to parse the content inside url `[b]test.com[/b]`
This commit is contained in:
Sam 2023-04-25 11:28:32 +10:00 committed by GitHub
parent 366ff0e76b
commit fd4aea7bc5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 88 additions and 38 deletions

View File

@ -989,6 +989,24 @@ eviltrout</p>
'<p><a href="http://discourse.org" data-bbcode="true">discourse</a></p>',
"named links are properly parsed"
);
assert.cooked(
"[url]https://discourse.org/path[/url]",
'<p><a href="https://discourse.org/path" data-bbcode="true">https://discourse.org/path</a></p>',
"paths are correctly handled"
);
assert.cooked(
"[url]discourse.org/path[/url]",
'<p><a href="https://discourse.org/path" data-bbcode="true">discourse.org/path</a></p>',
"paths are correctly handled"
);
assert.cooked(
"[url][b]discourse.org/path[/b][/url]",
'<p><a href="https://discourse.org/path" data-bbcode="true"><span class="bbcode-b">discourse.org/path</span></a></p>',
"paths are correctly handled"
);
});
test("images", function (assert) {
@ -1216,7 +1234,7 @@ eviltrout</p>
);
assert.cookedPara(
"[url]abc.com[/url]",
'<a href="http://abc.com">abc.com</a>',
'<a href="https://abc.com" data-bbcode="true">abc.com</a>',
"it magically links using linkify"
);
assert.cookedPara(

View File

@ -182,56 +182,88 @@ export function setup(helper) {
const simpleUrlRegex = /^https?:\/\//;
ruler.push("url", {
tag: "url",
wrap(startToken, endToken, tagInfo, content, state) {
const url = (tagInfo.attrs["_default"] || content).trim();
let linkifyFound = false;
if (state.md.options.linkify) {
const tokens = state.tokens;
const startIndex = tokens.indexOf(startToken);
const endIndex = tokens.indexOf(endToken);
replace(state, tagInfo, content) {
let token;
// reuse existing tokens from linkify if they exist
for (let index = startIndex + 1; index < endIndex; index++) {
const token = tokens[index];
// we need to tokenize the content and reinsert tokens in the stream
// this is because we need to support nested bbcode
let tokens = [];
md.inline.parse(content, state.md, state.env, tokens);
if (
token.markup === "linkify" &&
token.info === "auto" &&
token.type === "link_open"
) {
linkifyFound = true;
token.attrs.push(["data-bbcode", "true"]);
let url = tagInfo.attrs["_default"];
if (!url) {
// try to find the actual url in the tokens
for (let i = 0; i < tokens.length; i++) {
token = tokens[i];
// nested linkify or link, just pick it
if (token.type === "link_open") {
for (let j = 0; j < token.attrs.length; j++) {
if (token.attrs[j][0] === "href") {
url = token.attrs[j][1];
break;
}
}
if (url) {
break;
}
}
if (token.type === "text") {
url = token.content;
break;
}
}
}
if (!linkifyFound && simpleUrlRegex.test(url)) {
startToken.type = "link_open";
startToken.tag = "a";
startToken.attrs = [
if (md.linkify) {
let match = null;
// linkify has trouble with strings containing spaces, so just ban
// them outright
if (url && !url.includes(" ")) {
match = md.linkify.matchAtStart(url);
if (!match) {
match = md.linkify.matchAtStart("https://" + url);
}
}
if (match) {
url = match.url;
} else {
url = null;
}
} else if (!url.match(simpleUrlRegex)) {
url = "https://" + url;
}
if (url) {
token = state.push("link_open", "a", 0);
token.attrs = ["href", url];
token.attrs = [
["href", url],
["data-bbcode", "true"],
];
startToken.content = "";
startToken.nesting = 1;
endToken.type = "link_close";
endToken.tag = "a";
endToken.content = "";
endToken.nesting = -1;
} else {
// just strip the bbcode tag
endToken.content = "";
startToken.content = "";
// edge case, we don't want this detected as a onebox if auto linked
// this ensures it is not stripped
startToken.type = "html_inline";
token.content = "";
token.nesting = 1;
}
return false;
for (let i = 0; i < tokens.length; i++) {
token = tokens[i];
if (token.type === "link_open" || token.type === "link_close") {
// linkify nested tokens, do nothing
} else {
state.tokens.push(token);
}
}
if (url) {
token = state.push("link_close", "a", 0);
token.nesting = -1;
token.content = "";
}
return true;
},
});