DEV: adds initial support for custom blocks using code fencing (#15743)

Allows to write custom code blocks:

```
   ```mermaid height=200,foo=bar
   test
   ```
```

Which will then get converted to:

```
<pre data-code-wrap="mermaid" data-code-height="200" data-code-foo="bar">
  <code class="lang-nohighlight">
    test
  </code>
</pre>
```
This commit is contained in:
Joffrey JAFFEUX 2022-02-09 11:23:44 +01:00 committed by GitHub
parent c38114f0c6
commit b3ecf00c98
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 173 additions and 32 deletions

View File

@ -21,6 +21,7 @@ module("Unit | Utility | allowLister", function () {
"custom.foo",
"custom.baz",
"custom[data-*]",
"custom[data-custom-*=foo]",
"custom[rel=nofollow]",
]);
@ -38,11 +39,12 @@ module("Unit | Utility | allowLister", function () {
custom: {
class: ["foo", "baz"],
"data-*": ["*"],
"data-custom-*": ["foo"],
rel: ["nofollow", "test"],
},
},
},
"Expecting a correct white list"
"Expecting a correct allow list"
);
allowLister.disable("test");
@ -53,7 +55,7 @@ module("Unit | Utility | allowLister", function () {
tagList: {},
attrList: {},
},
"Expecting an empty white list"
"Expecting an empty allow list"
);
});
});

View File

@ -893,8 +893,8 @@ eviltrout</p>
assert.cooked(
"```eviltrout\nhello\n```",
'<pre><code class="lang-auto">hello\n</code></pre>',
"it doesn't not allowlist all classes"
'<pre data-code-wrap="eviltrout"><code class="lang-nohighlight">hello\n</code></pre>',
"it converts to custom block unknown code names"
);
assert.cooked(

View File

@ -1,6 +1,7 @@
import PrettyText, { buildOptions } from "pretty-text/pretty-text";
import { module, test } from "qunit";
import { hrefAllowed } from "pretty-text/sanitizer";
import { hrefAllowed, sanitize } from "pretty-text/sanitizer";
import AllowLister from "pretty-text/allow-lister";
module("Unit | Utility | sanitizer", function () {
test("sanitize", function (assert) {
@ -250,4 +251,67 @@ module("Unit | Utility | sanitizer", function () {
"escape single quotes"
);
});
test("correctly sanitizes complex data attributes rules", function (assert) {
const allowLister = new AllowLister();
allowLister.allowListFeature("test", [
"pre[data-*]",
"code[data-custom-*=foo]",
"div[data-cat-*]",
]);
allowLister.enable("test");
assert.strictEqual(sanitize("<b data-foo=*></b>", allowLister), "<b></b>");
assert.strictEqual(sanitize("<b data-foo=1></b>", allowLister), "<b></b>");
assert.strictEqual(sanitize("<b data-=1></b>", allowLister), "<b></b>");
assert.strictEqual(sanitize("<b data=1></b>", allowLister), "<b></b>");
assert.strictEqual(sanitize("<b data></b>", allowLister), "<b></b>");
assert.strictEqual(sanitize("<b data=*></b>", allowLister), "<b></b>");
assert.strictEqual(
sanitize("<pre data-foo=1></pre>", allowLister),
'<pre data-foo="1"></pre>'
);
assert.strictEqual(
sanitize("<pre data-foo-bar=1></pre>", allowLister),
'<pre data-foo-bar="1"></pre>'
);
assert.strictEqual(
sanitize("<code data-foo=foo></code>", allowLister),
"<code></code>"
);
assert.strictEqual(
sanitize("<code data-custom-=foo></code>", allowLister),
"<code></code>"
);
assert.strictEqual(
sanitize("<code data-custom-*=foo></code>", allowLister),
"<code></code>"
);
assert.strictEqual(
sanitize("<code data-custom-bar=foo></code>", allowLister),
'<code data-custom-bar="foo"></code>'
);
assert.strictEqual(
sanitize("<code data-custom-bar=1></code>", allowLister),
"<code></code>"
);
assert.strictEqual(
sanitize("<div data-cat=1></div>", allowLister),
'<div data-cat="1"></div>'
);
assert.strictEqual(
sanitize("<div data-cat-dog=1></div>", allowLister),
'<div data-cat-dog="1"></div>'
);
});
});

View File

@ -41,6 +41,18 @@ export function hrefAllowed(href, extraHrefMatchers) {
}
}
function testDataAttribute(forTag, name, value) {
return Object.keys(forTag).find((k) => {
const nameWithMatcher = `^${k.replace(/\*$/, "\\w+?")}`;
const validValues = forTag[k];
return (
new RegExp(nameWithMatcher).test(name) &&
(validValues.includes("*") ? true : validValues.includes(value))
);
});
}
export function sanitize(text, allowLister) {
if (!text) {
return "";
@ -72,12 +84,13 @@ export function sanitize(text, allowLister) {
const forTag = allowList.attrList[tag];
if (forTag) {
const forAttr = forTag[name];
if (
(forAttr &&
(forAttr.indexOf("*") !== -1 || forAttr.indexOf(value) !== -1)) ||
(name.indexOf("data-html-") === -1 &&
name.indexOf("data-") === 0 &&
forTag["data-*"]) ||
(forTag["data-*"] || testDataAttribute(forTag, name, value))) ||
(tag === "a" &&
name === "href" &&
hrefAllowed(value, extraHrefMatchers)) ||

View File

@ -1,35 +1,78 @@
// we need a custom renderer for code blocks cause we have a slightly non compliant
// format with special handling for text and so on
const TEXT_CODE_CLASSES = ["text", "pre", "plain"];
function extractTokenInfo(info, md) {
if (!info) {
return;
}
info = info.trim();
const matches = info.match(/(^\s*\S*)\s*(.*)/i);
if (!matches) {
return;
}
// ensure the token has only valid chars
// c++, strucuted-text and p91, are all valid
if (!/^[\w+-]*$/i.test(matches[1])) {
return;
}
const ASCII_REGEX = /[^\x00-\x7F]/;
const tag = md.utils.unescapeAll(matches[1].replace(ASCII_REGEX, ""));
const extractedData = { tag, attributes: {} };
if (matches[2]?.length) {
md.utils
.unescapeAll(matches[2].replace(ASCII_REGEX, ""))
.split(",")
.forEach((potentialPair) => {
const [key, value] = potentialPair.trim().split(/\s+/g)[0].split("=");
// invalid pairs would get caught here and not used, eg `foo=`
if (key && value) {
extractedData.attributes[key] = value;
}
});
}
return extractedData;
}
function render(tokens, idx, options, env, slf, md) {
let token = tokens[idx],
info = token.info ? md.utils.unescapeAll(token.info) : "",
langName = md.options.discourse.defaultCodeLang,
className,
escapedContent = md.utils.escapeHtml(token.content);
const token = tokens[idx];
const escapedContent = md.utils.escapeHtml(token.content);
const tokenInfo = extractTokenInfo(token.info, md);
const tag = tokenInfo?.tag || md.options.discourse.defaultCodeLang;
const attributes = tokenInfo?.attributes || {};
if (info) {
// strip off any additional languages
info = info.trim().split(/\s+/g)[0];
let className;
const acceptableCodeClasses =
md.options.discourse.acceptableCodeClasses || [];
if (TEXT_CODE_CLASSES.indexOf(tag) > -1) {
className = "lang-nohighlight";
} else if (acceptableCodeClasses.indexOf(tag) > -1) {
className = `lang-${tag}`;
} else {
className = "lang-nohighlight";
attributes["wrap"] = tag;
}
const acceptableCodeClasses = md.options.discourse.acceptableCodeClasses;
if (
acceptableCodeClasses &&
info &&
acceptableCodeClasses.indexOf(info) !== -1
) {
langName = info;
}
const dataAttributes = Object.keys(attributes)
.map((key) => {
const value = md.utils.escapeHtml(attributes[key]);
key = md.utils.escapeHtml(key);
return `data-code-${key}="${value}"`;
})
.join(" ");
className =
TEXT_CODE_CLASSES.indexOf(info) !== -1
? "lang-nohighlight"
: "lang-" + langName;
return `<pre><code class="${className}">${escapedContent}</code></pre>\n`;
return `<pre${dataAttributes ? ` ${dataAttributes}` : ""}><code${
className ? ` class="${className}"` : ""
}>${escapedContent}</code></pre>\n`;
}
export function setup(helper) {
@ -41,6 +84,8 @@ export function setup(helper) {
.concat(["auto", "nohighlight"]);
});
helper.allowList(["pre[data-code-*]"]);
helper.allowList({
custom(tag, name, value) {
if (tag === "code" && name === "class") {

View File

@ -2188,7 +2188,7 @@ en:
display_name_on_posts: "Show a user's full name on their posts in addition to their @username."
show_time_gap_days: "If two posts are made this many days apart, display the time gap in the topic."
short_progress_text_threshold: "After the number of posts in a topic goes above this number, the progress bar will only show the current post number. If you change the progress bar's width, you may need to change this value."
default_code_lang: "Default programming language syntax highlighting applied to GitHub code blocks (auto, nohighlight, ruby, python etc.)"
default_code_lang: "Default programming language syntax highlighting applied to code blocks (auto, nohighlight, ruby, python etc.)"
warn_reviving_old_topic_age: "When someone starts replying to a topic where the last reply is older than this many days, a warning will be displayed. Disable by setting to 0."
autohighlight_all_code: "Force apply code highlighting to all preformatted code blocks even when they didn't explicitly specify the language."
highlighted_languages: "Included syntax highlighting rules. (Warning: including too many languages may impact performance) see: <a href='https://highlightjs.org/static/demo/' target='_blank'>https://highlightjs.org/static/demo</a> for a demo"

View File

@ -527,12 +527,27 @@ describe PrettyText do
end
it 'can include code class correctly' do
SiteSetting.highlighted_languages += '|c++|structured-text|p21'
# keep in mind spaces should be trimmed per spec
expect(PrettyText.cook("``` ruby the mooby\n`````")).to eq('<pre><code class="lang-ruby"></code></pre>')
expect(PrettyText.cook("```cpp\ncpp\n```")).to match_html("<pre><code class='lang-cpp'>cpp\n</code></pre>")
expect(PrettyText.cook("```\ncpp\n```")).to match_html("<pre><code class='lang-auto'>cpp\n</code></pre>")
expect(PrettyText.cook("```text\ncpp\n```")).to match_html("<pre><code class='lang-nohighlight'>cpp\n</code></pre>")
expect(PrettyText.cook("```custom\ncustom content\n```")).to match_html("<pre data-code-wrap='custom'><code class='lang-nohighlight'>custom content\n</code></pre>")
expect(PrettyText.cook("```custom foo=bar\ncustom content\n```")).to match_html("<pre data-code-foo='bar' data-code-wrap='custom'><code class='lang-nohighlight'>custom content</code></pre>")
expect(PrettyText.cook("```INVALID a=1\n```")).to match_html("<pre data-code-a='1' data-code-wrap='INVALID'><code class='lang-nohighlight'>\n</code></pre>")
expect(PrettyText.cook("```INVALID a=1, foo=bar , baz=2\n```")).to match_html("<pre data-code-a='1' data-code-foo='bar' data-code-baz='2' data-code-wrap='INVALID'><code class='lang-nohighlight'>\n</code></pre>")
expect(PrettyText.cook("```text\n```")).to match_html("<pre><code class='lang-nohighlight'>\n</code></pre>")
expect(PrettyText.cook("```auto\n```")).to match_html("<pre><code class='lang-auto'>\n</code></pre>")
expect(PrettyText.cook("```ruby startline=3 $%@#\n```")).to match_html("<pre data-code-startline='3'><code class='lang-ruby'>\n</code></pre>")
expect(PrettyText.cook("```mermaid a_-你=17\n```")).to match_html("<pre data-code-a_-='17' data-code-wrap='mermaid'><code class='lang-nohighlight'>\n</code></pre>")
expect(PrettyText.cook("```mermaid foo=<script>alert(document.cookie)</script>\n```")).to match_html("<pre data-code-foo='&lt;script&gt;alert(document.cookie)&lt;/script&gt;' data-code-wrap='mermaid'><code class='lang-nohighlight'>\n</code></pre>")
expect(PrettyText.cook("```mermaid foo= begin admin o\n```")).to match_html("<pre data-code-wrap='mermaid'><code class='lang-nohighlight'>\n</code></pre>")
expect(PrettyText.cook("```c++\nc++\n```")).to match_html("<pre><code class='lang-c++'>c++\n</code></pre>")
expect(PrettyText.cook("```structured-text\nstructured-text\n```")).to match_html("<pre><code class='lang-structured-text'>structured-text\n</code></pre>")
expect(PrettyText.cook("```p21\np21\n```")).to match_html("<pre><code class='lang-p21'>p21\n</code></pre>")
expect(PrettyText.cook("<pre data-code='3' data-code-foo='1' data-malicous-code='2'></pre>")).to match_html("<pre data-code-foo='1'></pre>")
end
it 'indents code correctly' do
@ -553,7 +568,7 @@ describe PrettyText do
it "strips out unicode bidirectional (bidi) override characters and replaces with a highlighted span" do
code = <<~MD
X
```js
```auto
var isAdmin = false;
/* begin admin only */ if (isAdmin) {
console.log("You are an admin.");

View File

@ -34,6 +34,8 @@ describe "CommonMark" do
cooked.gsub!(" class=\"lang-auto\"", '')
cooked.gsub!(/<span class="hashtag">(.*)<\/span>/, "\\1")
cooked.gsub!(/<a name="(.*)" class="anchor" href="#\1*"><\/a>/, "")
# we support data-attributes which is not in the spec
cooked.gsub!("<pre data-code-startline=\"3\">", '<pre>')
# we don't care about this
cooked.gsub!("<blockquote>\n</blockquote>", "<blockquote></blockquote>")
html.gsub!("<blockquote>\n</blockquote>", "<blockquote></blockquote>")