FIX: bbcode URLs not handling paths correctly (#21215)

Due to the order we were parsing markdown, bbcode [url] elements were not handled properly. `[url]https://example.com/path[/url]` was not currectly parsing cause linkify was detecting the url as: `https://example.com/path[/url]` which is legit. To resolve this I swapped url to use a replace rule, and instead re-parsed the internal payload and injected the tokens in. This fix is complex cause we support stuff like `[url][b]test.com[/b][/url]` So we need to parse the content inside url `[b]test.com[/b]`
2025-02-21 19:55:43 +00:00 · 2023-04-25 11:28:32 +10:00 · 2023-04-25 11:28:32 +10:00 · fd4aea7bc5
commit fd4aea7bc5
parent 366ff0e76b
2 changed files with 88 additions and 38 deletions
--- a/app/assets/javascripts/discourse/tests/unit/lib/pretty-text-test.js
+++ b/app/assets/javascripts/discourse/tests/unit/lib/pretty-text-test.js
@ -989,6 +989,24 @@ eviltrout</p>
      '<p><a href="http://discourse.org" data-bbcode="true">discourse</a></p>',
      "named links are properly parsed"
    );
+
+    assert.cooked(
+      "[url]https://discourse.org/path[/url]",
+      '<p><a href="https://discourse.org/path" data-bbcode="true">https://discourse.org/path</a></p>',
+      "paths are correctly handled"
+    );
+
+    assert.cooked(
+      "[url]discourse.org/path[/url]",
+      '<p><a href="https://discourse.org/path" data-bbcode="true">discourse.org/path</a></p>',
+      "paths are correctly handled"
+    );
+
+    assert.cooked(
+      "[url][b]discourse.org/path[/b][/url]",
+      '<p><a href="https://discourse.org/path" data-bbcode="true"><span class="bbcode-b">discourse.org/path</span></a></p>',
+      "paths are correctly handled"
+    );
  });

  test("images", function (assert) {
@ -1216,7 +1234,7 @@ eviltrout</p>
    );
    assert.cookedPara(
      "[url]abc.com[/url]",
-      '<a href="http://abc.com">abc.com</a>',
+      '<a href="https://abc.com" data-bbcode="true">abc.com</a>',
      "it magically links using linkify"
    );
    assert.cookedPara(
--- a/app/assets/javascripts/pretty-text/engines/discourse-markdown/bbcode-inline.js
+++ b/app/assets/javascripts/pretty-text/engines/discourse-markdown/bbcode-inline.js
@ -182,56 +182,88 @@ export function setup(helper) {
    const simpleUrlRegex = /^https?:\/\//;
    ruler.push("url", {
      tag: "url",
-      wrap(startToken, endToken, tagInfo, content, state) {
-        const url = (tagInfo.attrs["_default"] || content).trim();
-        let linkifyFound = false;

-        if (state.md.options.linkify) {
-          const tokens = state.tokens;
-          const startIndex = tokens.indexOf(startToken);
-          const endIndex = tokens.indexOf(endToken);
+      replace(state, tagInfo, content) {
+        let token;

-          // reuse existing tokens from linkify if they exist
-          for (let index = startIndex + 1; index < endIndex; index++) {
-            const token = tokens[index];
+        // we need to tokenize the content and reinsert tokens in the stream
+        // this is because we need to support nested bbcode
+        let tokens = [];
+        md.inline.parse(content, state.md, state.env, tokens);

-            if (
-              token.markup === "linkify" &&
-              token.info === "auto" &&
-              token.type === "link_open"
-            ) {
-              linkifyFound = true;
-              token.attrs.push(["data-bbcode", "true"]);
+        let url = tagInfo.attrs["_default"];
+
+        if (!url) {
+          // try to find the actual url in the tokens
+          for (let i = 0; i < tokens.length; i++) {
+            token = tokens[i];
+            // nested linkify or link, just pick it
+            if (token.type === "link_open") {
+              for (let j = 0; j < token.attrs.length; j++) {
+                if (token.attrs[j][0] === "href") {
+                  url = token.attrs[j][1];
+                  break;
+                }
+              }
+              if (url) {
+                break;
+              }
+            }
+            if (token.type === "text") {
+              url = token.content;
              break;
            }
          }
        }

-        if (!linkifyFound && simpleUrlRegex.test(url)) {
-          startToken.type = "link_open";
-          startToken.tag = "a";
-          startToken.attrs = [
+        if (md.linkify) {
+          let match = null;
+
+          // linkify has trouble with strings containing spaces, so just ban
+          // them outright
+          if (url && !url.includes(" ")) {
+            match = md.linkify.matchAtStart(url);
+            if (!match) {
+              match = md.linkify.matchAtStart("https://" + url);
+            }
+          }
+
+          if (match) {
+            url = match.url;
+          } else {
+            url = null;
+          }
+        } else if (!url.match(simpleUrlRegex)) {
+          url = "https://" + url;
+        }
+
+        if (url) {
+          token = state.push("link_open", "a", 0);
+          token.attrs = ["href", url];
+          token.attrs = [
            ["href", url],
            ["data-bbcode", "true"],
          ];
-          startToken.content = "";
-          startToken.nesting = 1;
-
-          endToken.type = "link_close";
-          endToken.tag = "a";
-          endToken.content = "";
-          endToken.nesting = -1;
-        } else {
-          // just strip the bbcode tag
-          endToken.content = "";
-          startToken.content = "";
-
-          // edge case, we don't want this detected as a onebox if auto linked
-          // this ensures it is not stripped
-          startToken.type = "html_inline";
+          token.content = "";
+          token.nesting = 1;
        }

-        return false;
+        for (let i = 0; i < tokens.length; i++) {
+          token = tokens[i];
+          if (token.type === "link_open" || token.type === "link_close") {
+            // linkify nested tokens, do nothing
+          } else {
+            state.tokens.push(token);
+          }
+        }
+
+        if (url) {
+          token = state.push("link_close", "a", 0);
+          token.nesting = -1;
+          token.content = "";
+        }
+
+        return true;
      },
    });