Merge pull request #2848 from riking/whitelist-api

Improve Markdown.whiteListTag, code dialect
2014-11-07 11:33:37 -05:00 · 2014-11-07 11:33:37 -05:00 · 564e7a988c
parent d6ff1630a8 ef36a619c3
commit 564e7a988c
4 changed files with 96 additions and 46 deletions
--- a/app/assets/javascripts/discourse/dialects/code_dialect.js
+++ b/app/assets/javascripts/discourse/dialects/code_dialect.js
@ -3,12 +3,12 @@
 **/

 var acceptableCodeClasses =
-  ["lang-auto", "1c", "actionscript", "apache", "applescript", "avrasm", "axapta", "bash", "brainfuck",
+  ["auto", "1c", "actionscript", "apache", "applescript", "avrasm", "axapta", "bash", "brainfuck",
   "clojure", "cmake", "coffeescript", "cpp", "cs", "css", "d", "delphi", "diff", "xml", "django", "dos",
   "erlang-repl", "erlang", "glsl", "go", "handlebars", "haskell", "http", "ini", "java", "javascript",
-   "json", "lisp", "lua", "markdown", "matlab", "mel", "nginx", "objectivec", "parser3", "perl", "php",
-   "profile", "python", "r", "rib", "rsl", "ruby", "rust", "scala", "smalltalk", "sql", "tex", "text",
-   "vala", "vbscript", "vhdl"];
+   "json", "lisp", "lua", "markdown", "matlab", "mel", "nginx", "nohighlight", "objectivec", "parser3",
+   "perl", "php", "profile", "python", "r", "rib", "rsl", "ruby", "rust", "scala", "smalltalk", "sql",
+   "tex", "text", "vala", "vbscript", "vhdl"];

 var textCodeClasses = ["text", "pre"];

@ -32,9 +32,9 @@ Discourse.Dialect.replaceBlock({
    }

    if (textCodeClasses.indexOf(matches[1]) !== -1) {
-      return ['p', ['pre', ['code', flattenBlocks(blockContents) ]]];
+      return ['p', ['pre', ['code', {'class': 'lang-nohighlight'}, flattenBlocks(blockContents) ]]];
    } else  {
-      return ['p', ['pre', ['code', {'class': klass}, flattenBlocks(blockContents) ]]];
+      return ['p', ['pre', ['code', {'class': 'lang-' + klass}, flattenBlocks(blockContents) ]]];
    }
  }
 });
@ -69,3 +69,7 @@ Discourse.Dialect.replaceBlock({
    return ['p', ['pre', flattenBlocks(blockContents)]];
  }
 });
+
+// Whitelist the language classes
+var regexpSource = "^lang-(" + acceptableCodeClasses.join('|') + ")$";
+Discourse.Markdown.whiteListTag('code', 'class', new RegExp(regexpSource, "i"));
--- a/app/assets/javascripts/discourse/lib/markdown.js
+++ b/app/assets/javascripts/discourse/lib/markdown.js
@ -1,4 +1,4 @@
-/*global Markdown:true, hljs:true */
+/*global Markdown:true */

 /**
  Contains methods to help us with markdown formatting.
@ -7,10 +7,44 @@
  @namespace Discourse
  @module Discourse
 **/
-var _validClasses = {},
-    _validIframes = [],
-    _validTags = {},
-    _decoratedCaja = false;
+
+/**
+ * An object mapping from HTML tag names to an object mapping the valid
+ * attributes on that tag to an array of permitted values.
+ *
+ * The permitted values can be strings or regexes.
+ *
+ * The pseduo-attribute 'data-*' can be used to validate any data-foo
+ * attributes without any specified validations.
+ *
+ * Code can insert into this map by calling Discourse.Markdown.whiteListTag().
+ *
+ * Example:
+ *
+ * <pre><code>
+ * {
+ *   a: {
+ *     href: ['*'],
+ *     data-mention-id: [/^\d+$/],
+ *     ...
+ *   },
+ *   code: {
+ *     class: ['ada', 'haskell', 'c', 'cpp', ... ]
+ *   },
+ *   ...
+ * }
+ * </code></pre>
+ *
+ * @private
+ */
+var _validTags = {};
+/**
+ * Classes valid on all elements. Map from class name to 'true'.
+ * @private
+ */
+var _validClasses = {};
+var _validIframes = [];
+var _decoratedCaja = false;

 function validateAttribute(tagName, attribName, value) {
  var tag = _validTags[tagName];
@ -18,18 +52,17 @@ function validateAttribute(tagName, attribName, value) {
  // Handle classes
  if (attribName === "class") {
    if (_validClasses[value]) { return value; }
+  }

-    if (tag) {
-      var classes = tag['class'];
-      if (classes && (classes.indexOf(value) !== -1 || classes.indexOf('*') !== -1)) {
-        return value;
-      }
-    }
-  } else if (attribName.indexOf('data-') === 0) {
-    // data-* attributes
-    if (tag) {
-      var allowed = tag[attribName] || tag['data-*'];
-      if (allowed && (allowed === value || allowed.indexOf('*') !== -1)) { return value; }
+  if (attribName.indexOf('data-') === 0) {
+    // data-* catch-all validators
+    if (tag && tag['data-*'] && !tag[attribName]) {
+      var permitted = tag['data-*'];
+      if (permitted && (
+            permitted.indexOf(value) !== -1 ||
+            permitted.indexOf('*') !== -1 ||
+            ((permitted instanceof RegExp) && permitted.test(value)))
+        ) { return value; }
    }
  }

@ -37,21 +70,40 @@ function validateAttribute(tagName, attribName, value) {
    var attrs = tag[attribName];
    if (attrs && (attrs.indexOf(value) !== -1 ||
                  attrs.indexOf('*') !== -1) ||
-                  _.any(attrs,function(r){return (r instanceof RegExp) && value.search(r) >= 0;})
+                  _.any(attrs, function(r) { return (r instanceof RegExp) && r.test(value); })
        ) { return value; }
  }
+
+  // return undefined;
+}
+
+function anchorRegexp(regex) {
+  if (/^\^.*\$$/.test(regex.source)) {
+    return regex; // already anchored
+  }
+
+  var flags = "";
+  if (regex.global) { throw "Invalid attribute validation regex - cannot be global"; }
+  if (regex.ignoreCase) { flags += "i"; }
+  if (regex.multiline) { flags += "m"; }
+  if (regex.sticky) { throw "Invalid attribute validation regex - cannot be sticky"; }
+
+  return new RegExp("^" + regex.source + "$", flags);
 }

 Discourse.Markdown = {

  /**
-    Whitelist class for only a certain tag
+    Add to the attribute whitelist for a certain HTML tag.

-    @param {String} tagName to whitelist
-    @param {String} attribName to whitelist
-    @param {String} value to whitelist
+    @param {String} tagName tag to whitelist the attr for
+    @param {String} attribName attr to whitelist for the tag
+    @param {String | RegExp} [value] whitelisted value for the attribute
  **/
  whiteListTag: function(tagName, attribName, value) {
+    if (value instanceof RegExp) {
+      value = anchorRegexp(value);
+    }
    _validTags[tagName] = _validTags[tagName] || {};
    _validTags[tagName][attribName] = _validTags[tagName][attribName] || [];
    _validTags[tagName][attribName].push(value || '*');
@ -238,26 +290,19 @@ Discourse.Markdown = {
 RSVP.EventTarget.mixin(Discourse.Markdown);

 Discourse.Markdown.whiteListTag('a', 'class', 'attachment');
-Discourse.Markdown.whiteListTag('a', 'target', '_blank');
 Discourse.Markdown.whiteListTag('a', 'class', 'onebox');
 Discourse.Markdown.whiteListTag('a', 'class', 'mention');

+Discourse.Markdown.whiteListTag('a', 'target', '_blank');
+Discourse.Markdown.whiteListTag('a', 'rel', 'nofollow');
 Discourse.Markdown.whiteListTag('a', 'data-bbcode');
 Discourse.Markdown.whiteListTag('a', 'name');

-Discourse.Markdown.whiteListTag('img', 'src', /^data:image.*/i);
+Discourse.Markdown.whiteListTag('img', 'src', /^data:image.*$/i);

 Discourse.Markdown.whiteListTag('div', 'class', 'title');
 Discourse.Markdown.whiteListTag('div', 'class', 'quote-controls');

-// explicitly whitelist classes we need allowed through for
-// syntax highlighting, grabbed from highlight.js
-hljs.listLanguages().forEach(function (language) {
-  Discourse.Markdown.whiteListTag('code', 'class', language);
-});
-Discourse.Markdown.whiteListTag('code', 'class', 'text');
-Discourse.Markdown.whiteListTag('code', 'class', 'lang-auto');
-
 Discourse.Markdown.whiteListTag('span', 'class', 'mention');
 Discourse.Markdown.whiteListTag('span', 'class', 'spoiler');
 Discourse.Markdown.whiteListTag('div', 'class', 'spoiler');
--- a/config/site_settings.yml
+++ b/config/site_settings.yml
@ -377,7 +377,7 @@ posting:
    default: 10000
  default_code_lang:
    client: true
-    default: "lang-auto"
+    default: "auto"
  warn_reviving_old_topic_age: 180
  autohighlight_all_code:
    client: true
--- a/test/javascripts/lib/markdown-test.js.es6
+++ b/test/javascripts/lib/markdown-test.js.es6
@ -1,6 +1,7 @@
 module("Discourse.Markdown", {
  setup: function() {
    Discourse.SiteSettings.traditional_markdown_linebreaks = false;
+    Discourse.SiteSettings.default_code_lang = "auto";
  }
 });

@ -337,25 +338,25 @@ test("Code Blocks", function() {
         "it supports basic code blocks");

  cooked("```json\n{hello: 'world'}\n```\ntrailing",
-         "<p><pre><code class=\"json\">{hello: &#x27;world&#x27;}</code></pre></p>\n\n<p>trailing</p>",
+         "<p><pre><code class=\"lang-json\">{hello: &#x27;world&#x27;}</code></pre></p>\n\n<p>trailing</p>",
         "It does not truncate text after a code block.");

  cooked("```json\nline 1\n\nline 2\n\n\nline3\n```",
-         "<p><pre><code class=\"json\">line 1\n\nline 2\n\n\nline3</code></pre></p>",
+         "<p><pre><code class=\"lang-json\">line 1\n\nline 2\n\n\nline3</code></pre></p>",
         "it maintains new lines inside a code block.");

  cooked("hello\nworld\n```json\nline 1\n\nline 2\n\n\nline3\n```",
-         "<p>hello<br/>world<br/></p>\n\n<p><pre><code class=\"json\">line 1\n\nline 2\n\n\nline3</code></pre></p>",
+         "<p>hello<br/>world<br/></p>\n\n<p><pre><code class=\"lang-json\">line 1\n\nline 2\n\n\nline3</code></pre></p>",
         "it maintains new lines inside a code block with leading content.");

  cooked("```ruby\n<header>hello</header>\n```",
-         "<p><pre><code class=\"ruby\">&lt;header&gt;hello&lt;/header&gt;</code></pre></p>",
+         "<p><pre><code class=\"lang-ruby\">&lt;header&gt;hello&lt;/header&gt;</code></pre></p>",
         "it escapes code in the code block");

-  cooked("```text\ntext\n```", "<p><pre><code>text</code></pre></p>", "handles text without adding class");
+  cooked("```text\ntext\n```", "<p><pre><code class=\"lang-nohighlight\">text</code></pre></p>", "handles text by adding nohighlight");

  cooked("```ruby\n# cool\n```",
-         "<p><pre><code class=\"ruby\"># cool</code></pre></p>",
+         "<p><pre><code class=\"lang-ruby\"># cool</code></pre></p>",
         "it supports changing the language");

  cooked("    ```\n    hello\n    ```",
@ -363,11 +364,11 @@ test("Code Blocks", function() {
         "only detect ``` at the beginning of lines");

  cooked("```ruby\ndef self.parse(text)\n\n  text\nend\n```",
-         "<p><pre><code class=\"ruby\">def self.parse(text)\n\n  text\nend</code></pre></p>",
+         "<p><pre><code class=\"lang-ruby\">def self.parse(text)\n\n  text\nend</code></pre></p>",
         "it allows leading spaces on lines in a code block.");

  cooked("```ruby\nhello `eviltrout`\n```",
-         "<p><pre><code class=\"ruby\">hello &#x60;eviltrout&#x60;</code></pre></p>",
+         "<p><pre><code class=\"lang-ruby\">hello &#x60;eviltrout&#x60;</code></pre></p>",
         "it allows code with backticks in it");

  cooked("```eviltrout\nhello\n```",