FIX: pre-hoist code blocks & spans

2015-03-09 12:32:37 +01:00 · 2015-03-09 12:32:37 +01:00 · 70f00e31df
parent 5b1ee3fedf
commit 70f00e31df
7 changed files with 120 additions and 31 deletions
--- a/app/assets/javascripts/discourse/dialects/code_dialect.js
+++ b/app/assets/javascripts/discourse/dialects/code_dialect.js
@ -10,7 +10,7 @@ var acceptableCodeClasses =
   "perl", "php", "profile", "python", "r", "rib", "rsl", "ruby", "rust", "scala", "smalltalk", "sql",
   "tex", "text", "vala", "vbscript", "vhdl"];

-var textCodeClasses = ["text", "pre"];
+var textCodeClasses = ["text", "pre", "plain"];

 function flattenBlocks(blocks) {
  var result = "";
@ -39,6 +39,17 @@ Discourse.Dialect.replaceBlock({
  }
 });

+Discourse.Dialect.replaceBlock({
+  start: /(<pre[^\>]*\>)([\s\S]*)/igm,
+  stop: /<\/pre>/igm,
+  rawContents: true,
+  skipIfTradtionalLinebreaks: true,
+
+  emitter: function(blockContents) {
+    return ['p', ['pre', flattenBlocks(blockContents)]];
+  }
+});
+
 // Ensure that content in a code block is fully escaped. This way it's not white listed
 // and we can use HTML and Javascript examples.
 Discourse.Dialect.on('parseNode', function (event) {
@ -51,7 +62,6 @@ Discourse.Dialect.on('parseNode', function (event) {

    if (path && path[path.length-1] && path[path.length-1][0] && path[path.length-1][0] === "pre") {
      regexp = / +$/g;
-
    } else {
      regexp = /^ +| +$/g;
    }
@ -59,17 +69,6 @@ Discourse.Dialect.on('parseNode', function (event) {
  }
 });

-Discourse.Dialect.replaceBlock({
-  start: /(<pre[^\>]*\>)([\s\S]*)/igm,
-  stop: /<\/pre>/igm,
-  rawContents: true,
-  skipIfTradtionalLinebreaks: true,
-
-  emitter: function(blockContents) {
-    return ['p', ['pre', flattenBlocks(blockContents)]];
-  }
-});
-
 // Whitelist the language classes
 var regexpSource = "^lang-(" + acceptableCodeClasses.join('|') + ")$";
 Discourse.Markdown.whiteListTag('code', 'class', new RegExp(regexpSource, "i"));
--- a/app/assets/javascripts/discourse/dialects/dialect.js
+++ b/app/assets/javascripts/discourse/dialects/dialect.js
@ -12,7 +12,8 @@ var parser = window.BetterMarkdown,
    initialized = false,
    emitters = [],
    hoisted,
-    preProcessors = [];
+    preProcessors = [],
+    escape = Handlebars.Utils.escapeExpression;

 /**
  Initialize our dialects for processing.
@ -162,6 +163,69 @@ function hoister(t, target, replacement) {
  return t;
 }

+function outdent(t) {
+  return t.replace(/^([ ]{4}|\t)/gm, "");
+}
+
+function hideBackslashEscapedCharacters(t) {
+  return t.replace(/\\\\/g, "\u1E800")
+          .replace(/\\`/g, "\u1E8001");
+}
+
+function showBackslashEscapedCharacters(t) {
+  return t.replace(/\u1E8001/g, "\\`")
+          .replace(/\u1E800/g, "\\\\");
+}
+
+function hoistCodeBlocksAndSpans(text) {
+  // replace all "\`" with a single character
+  text = hideBackslashEscapedCharacters(text);
+
+  // <pre>...</pre> code blocks
+  text = text.replace(/(^\n*|\n\n)<pre>([\s\S]*?)<\/pre>/ig, function(_, before, content) {
+    var hash = md5(content);
+    hoisted[hash] = escape(showBackslashEscapedCharacters(content.trim()));
+    return before + "<pre>" + hash + "</pre>";
+  });
+
+  // markdown code blocks
+  text = text.replace(/(^\n*|\n\n)((?:(?:[ ]{4}|\t).*\n*)+)/g, function(match, before, content, index) {
+    // make sure we aren't in a list
+    var previousLine = text.slice(0, index).trim().match(/.*$/);
+    if (previousLine && previousLine[0].length) {
+      previousLine = previousLine[0].trim();
+      if (/^(?:\*|\+|-|\d+\.)\s+/.test(previousLine)) {
+        return match;
+      }
+    }
+    // we can safely hoist the code block
+    var hash = md5(content);
+    // only remove trailing whitespace
+    content = content.replace(/\s+$/, "");
+    hoisted[hash] = escape(outdent(showBackslashEscapedCharacters(content)));
+    return before + "    " + hash + "\n";
+  });
+
+  // fenced code blocks (AKA GitHub code blocks)
+  text = text.replace(/(^\n*|\n\n)```([a-z0-9\-]*)\n([\s\S]*?)\n```/g, function(_, before, language, content) {
+    var hash = md5(content);
+    hoisted[hash] = escape(showBackslashEscapedCharacters(content.trim()));
+    return before + "```" + language + "\n" + hash + "\n```";
+  });
+
+  // code spans (double & single `)
+  ["``", "`"].forEach(function(delimiter) {
+    var regexp = new RegExp("(^|[^`])" + delimiter + "([^`\\n]+?)" + delimiter + "([^`]|$)", "g");
+    text = text.replace(regexp, function(_, before, content, after) {
+      var hash = md5(content);
+      hoisted[hash] = escape(showBackslashEscapedCharacters(content.trim()));
+      return before + delimiter + hash + delimiter + after;
+    });
+  });
+
+  // replace back all weird character with "\`"
+  return showBackslashEscapedCharacters(text);
+}

 /**
  An object used for rendering our dialects.
@ -183,14 +247,19 @@ Discourse.Dialect = {
  cook: function(text, opts) {
    if (!initialized) { initializeDialects(); }

+    dialect.options = opts;
+
    // Helps us hoist out HTML
    hoisted = {};

+    // pre-hoist all code-blocks/spans
+    text = hoistCodeBlocksAndSpans(text);
+
+    // pre-processors
    preProcessors.forEach(function(p) {
      text = p(text, hoister);
    });

-    dialect.options = opts;
    var tree = parser.toHTMLTree(text, 'Discourse'),
        result = parser.renderJsonML(parseTree(tree));

@ -203,12 +272,11 @@ Discourse.Dialect = {
    // If we hoisted out anything, put it back
    var keys = Object.keys(hoisted);
    if (keys.length) {
-      keys.forEach(function(k) {
-        result = result.replace(new RegExp(k,"g"), hoisted[k]);
+      keys.forEach(function(key) {
+        result = result.replace(new RegExp(key, "g"), hoisted[key]);
      });
    }

-    hoisted = {};
    return result.trim();
  },

--- a/test/javascripts/lib/markdown-test.js.es6
+++ b/test/javascripts/lib/markdown-test.js.es6
@ -216,15 +216,33 @@ test("Mentions", function() {
         "<p><a href=\"https://twitter.com/codinghorror\">@codinghorror</a></p>",
         "it doesn't do link mentions within links");

-  cooked("Hello @EvilTrout", "<p>Hello <span class=\"mention\">@EvilTrout</span></p>", "adds a mention class");
-  cooked("robin@email.host", "<p>robin@email.host</p>", "won't add mention class to an email address");
-  cooked("hanzo55@yahoo.com", "<p>hanzo55@yahoo.com</p>", "won't be affected by email addresses that have a number before the @ symbol");
-  cooked("@EvilTrout yo", "<p><span class=\"mention\">@EvilTrout</span> yo</p>", "it handles mentions at the beginning of a string");
-  cooked("yo\n@EvilTrout", "<p>yo<br/><span class=\"mention\">@EvilTrout</span></p>", "it handles mentions at the beginning of a new line");
+  cooked("Hello @EvilTrout",
+         "<p>Hello <span class=\"mention\">@EvilTrout</span></p>",
+         "adds a mention class");
+
+  cooked("robin@email.host",
+         "<p>robin@email.host</p>",
+         "won't add mention class to an email address");
+
+  cooked("hanzo55@yahoo.com",
+         "<p>hanzo55@yahoo.com</p>",
+         "won't be affected by email addresses that have a number before the @ symbol");
+
+  cooked("@EvilTrout yo",
+         "<p><span class=\"mention\">@EvilTrout</span> yo</p>",
+         "it handles mentions at the beginning of a string");
+
+  cooked("yo\n@EvilTrout",
+         "<p>yo<br/><span class=\"mention\">@EvilTrout</span></p>",
+         "it handles mentions at the beginning of a new line");
+
  cooked("`evil` @EvilTrout `trout`",
         "<p><code>evil</code> <span class=\"mention\">@EvilTrout</span> <code>trout</code></p>",
         "deals correctly with multiple <code> blocks");
-  cooked("```\na @test\n```", "<p><pre><code class=\"lang-auto\">a @test</code></pre></p>", "should not do mentions within a code block.");
+
+  cooked("```\na @test\n```",
+         "<p><pre><code class=\"lang-auto\">a @test</code></pre></p>",
+         "should not do mentions within a code block.");

  cooked("> foo bar baz @eviltrout",
         "<blockquote><p>foo bar baz <span class=\"mention\">@eviltrout</span></p></blockquote>",
@ -357,7 +375,9 @@ test("Code Blocks", function() {
         "<p><pre><code class=\"lang-ruby\">&lt;header&gt;hello&lt;/header&gt;</code></pre></p>",
         "it escapes code in the code block");

-  cooked("```text\ntext\n```", "<p><pre><code class=\"lang-nohighlight\">text</code></pre></p>", "handles text by adding nohighlight");
+  cooked("```text\ntext\n```",
+         "<p><pre><code class=\"lang-nohighlight\">text</code></pre></p>",
+         "handles text by adding nohighlight");

  cooked("```ruby\n# cool\n```",
         "<p><pre><code class=\"lang-ruby\"># cool</code></pre></p>",
@ -403,7 +423,9 @@ test("Code Blocks", function() {
         "<pre><code>[quote]test[/quote]</code></pre>",
         "it does not parse other block types in markdown code blocks");

-  cooked("## a\nb\n```\nc\n```", "<h2>a</h2>\n\n<p><pre><code class=\"lang-auto\">c</code></pre></p>", "it handles headings with code blocks after them.");
+  cooked("## a\nb\n```\nc\n```",
+         "<h2>a</h2>\n\n<p><pre><code class=\"lang-auto\">c</code></pre></p>",
+         "it handles headings with code blocks after them.");
 });

 test("sanitize", function() {
--- a/test/javascripts/mdtest/fixtures/Backslash
+++ b/test/javascripts/mdtest/fixtures/Backslash
@ -73,7 +73,7 @@ Nor should these, which occur in code spans:

 Backslash: `\\`

-Backtick: `` \` ``
+Backtick: `\``

 Asterisk: `\*`

@ -113,7 +113,7 @@ other Markdown constructs:

 \`backticks\`

-This is a code span with a literal backslash-backtick sequence: `` \` ``
+This is a code span with a literal backslash-backtick sequence: `\``

 This is a tag with unescaped backticks <span attr='`ticks`'>bar</span>.

--- a/test/javascripts/mdtest/fixtures/Code
+++ b/test/javascripts/mdtest/fixtures/Code
@ -2,4 +2,4 @@

 Fix for backticks within HTML tag: <span attr='`ticks`'>like this</span>

-Here's how you put `` `backticks` `` in a code span.
+Here's how you put `` `backticks` `` in a code span.
--- a/test/javascripts/mdtest/fixtures/Ordered
+++ b/test/javascripts/mdtest/fixtures/Ordered
@ -86,7 +86,7 @@ Multiple paragraphs:

 1.	Item 1, graf one.

-	Item 2. graf two. The quick brown fox jumped over the lazy dog's
+	Item 1. graf two. The quick brown fox jumped over the lazy dog's
 	back.

 2.	Item 2.
--- a/test/javascripts/mdtest/fixtures/Ordered
+++ b/test/javascripts/mdtest/fixtures/Ordered
@ -91,7 +91,7 @@
 <ol>
 <li><p>Item 1, graf one.</p>

-<p>Item 2. graf two. The quick brown fox jumped over the lazy dog's
+<p>Item 1. graf two. The quick brown fox jumped over the lazy dog's
 back.</p></li>
 <li><p>Item 2.</p></li>
 <li><p>Item 3.</p></li>