Merge pull request #2592 from Elberet/fix-parser

Fixes for quirky markdown parser behaviours
This commit is contained in:
Robin Ward 2014-07-28 13:12:22 -04:00
commit 8866141ba2
6 changed files with 128 additions and 97 deletions

View File

@ -55,6 +55,20 @@ function replaceBBCodeParamsRaw(tag, emitter) {
});
}
/**
Filters an array of JSON-ML nodes, removing nodes that represent empty lines ("\n").
@method removeEmptyLines
@param {Array} [contents] Array of JSON-ML nodes
**/
function removeEmptyLines(contents) {
var result = [];
for (var i=0; i < contents.length; i++) {
if (contents[i] !== "\n") { result.push(contents[i]); }
}
return result;
}
/**
Creates a BBCode handler that accepts parameters. Passes them to the emitter.
Processes the inside recursively so it can be nested.
@ -75,9 +89,9 @@ replaceBBCode('u', function(contents) { return ['span', {'class': 'bbcode-u'}].c
replaceBBCode('s', function(contents) { return ['span', {'class': 'bbcode-s'}].concat(contents); });
Discourse.Markdown.whiteListTag('span', 'class', /^bbcode-[bius]$/);
replaceBBCode('ul', function(contents) { return ['ul'].concat(contents); });
replaceBBCode('ol', function(contents) { return ['ol'].concat(contents); });
replaceBBCode('li', function(contents) { return ['li'].concat(contents); });
replaceBBCode('ul', function(contents) { return ['ul'].concat(removeEmptyLines(contents)); });
replaceBBCode('ol', function(contents) { return ['ol'].concat(removeEmptyLines(contents)); });
replaceBBCode('li', function(contents) { return ['li'].concat(removeEmptyLines(contents)); });
rawBBCode('img', function(contents) { return ['img', {href: contents}]; });
rawBBCode('email', function(contents) { return ['a', {href: "mailto:" + contents, 'data-bbcode': true}, contents]; });

View File

@ -10,6 +10,15 @@ var acceptableCodeClasses =
"profile", "python", "r", "rib", "rsl", "ruby", "rust", "scala", "smalltalk", "sql", "tex", "text",
"vala", "vbscript", "vhdl"];
function flattenBlocks(blocks) {
var result = "";
blocks.forEach(function(b) {
result += b;
if (b.trailing) { result += b.trailing; }
});
return result;
}
Discourse.Dialect.replaceBlock({
start: /^`{3}([^\n\[\]]+)?\n?([\s\S]*)?/gm,
stop: '```',
@ -19,7 +28,7 @@ Discourse.Dialect.replaceBlock({
if (matches[1] && acceptableCodeClasses.indexOf(matches[1]) !== -1) {
klass = matches[1];
}
return ['p', ['pre', ['code', {'class': klass}, blockContents.join("\n") ]]];
return ['p', ['pre', ['code', {'class': klass}, flattenBlocks(blockContents) ]]];
}
});
@ -50,6 +59,6 @@ Discourse.Dialect.replaceBlock({
skipIfTradtionalLinebreaks: true,
emitter: function(blockContents) {
return ['p', ['pre', blockContents.join("\n")]];
return ['p', ['pre', flattenBlocks(blockContents)]];
}
});

View File

@ -133,6 +133,19 @@ function invalidBoundary(args, prev) {
if (args.spaceOrTagBoundary && (!last.match(/(\s|\>)$/))) { return true; }
}
/**
Returns the number of (terminated) lines in a string.
@method countLines
@param {string} str the string.
@returns {Integer} number of terminated lines in str
**/
function countLines(str) {
var index = -1, count = 0;
while ((index = str.indexOf("\n", index + 1)) !== -1) { count++; }
return count;
}
/**
An object used for rendering our dialects.
@ -288,7 +301,7 @@ Discourse.Dialect = {
this.registerInline(start, function(text, match, prev) {
if (invalidBoundary(args, prev)) { return; }
var endPos = self.findEndPos(text, stop, args, startLength);
var endPos = self.findEndPos(text, start, stop, args, startLength);
if (endPos === -1) { return; }
var between = text.slice(startLength, endPos);
@ -304,13 +317,14 @@ Discourse.Dialect = {
});
},
findEndPos: function(text, stop, args, start) {
var endPos = text.indexOf(stop, start);
findEndPos: function(text, start, stop, args, offset) {
var endPos, nextStart;
do {
endPos = text.indexOf(stop, offset);
if (endPos === -1) { return -1; }
var after = text.charAt(endPos + stop.length);
if (after && after.indexOf(stop) === 0) {
return this.findEndPos(text, stop, args, endPos + stop.length + 1);
}
nextStart = text.indexOf(start, offset);
offset = endPos + stop.length;
} while (nextStart !== -1 && nextStart < endPos);
return endPos;
},
@ -358,102 +372,83 @@ Discourse.Dialect = {
var linebreaks = dialect.options.traditional_markdown_linebreaks ||
Discourse.SiteSettings.traditional_markdown_linebreaks;
// Some replacers should not be run with traditional linebreaks
if (linebreaks && args.skipIfTradtionalLinebreaks) { return; }
args.start.lastIndex = 0;
var m = (args.start).exec(block);
var result = [], match = (args.start).exec(block);
if (!match) { return; }
if (!m) { return; }
var lastChance = function() {
return !next.some(function(e) { return e.indexOf(args.stop) !== -1; });
};
var startPos = args.start.lastIndex - m[0].length,
leading,
blockContents = [],
result = [],
lineNumber = block.lineNumber;
if (startPos > 0) {
leading = block.slice(0, startPos);
lineNumber += (leading.split("\n").length - 1);
var para = ['p'];
this.processInline(leading).forEach(function (l) {
para.push(l);
});
result.push(para);
}
if (m[2]) {
next.unshift(MD.mk_block(m[2], null, lineNumber + 1));
}
lineNumber++;
var blockClosed = false;
for (var i=0; i<next.length; i++) {
if (next[i].indexOf(args.stop) >= 0) {
blockClosed = true;
break;
}
}
if (!blockClosed) {
if (m[2]) { next.shift(); }
return;
}
var numOpen = 1;
while (next.length > 0) {
var b = next.shift(),
blockLine = b.lineNumber,
diff = ((typeof blockLine === "undefined") ? lineNumber : blockLine) - lineNumber,
endFound = b.indexOf(args.stop),
leadingContents = b.slice(0, endFound),
trailingContents = b.slice(endFound+args.stop.length),
m2;
if (endFound === -1) {
leadingContents = b;
// shave off start tag and leading text, if any.
var pos = args.start.lastIndex - match[0].length,
leading = block.slice(0, pos),
trailing = match[2] ? match[2].replace(/^\n*/, "") : "";
// just give up if there's no stop tag in this or any next block
if (block.indexOf(args.stop, pos + args.stop.length) === -1 && lastChance()) { return; }
if (leading.length > 0) { result.push(['p'].concat(this.processInline(leading))); }
if (trailing.length > 0) {
next.unshift(MD.mk_block(trailing, block.trailing,
block.lineNumber + countLines(leading) + (match[2] ? match[2].length : 0) - trailing.length));
}
// go through the available blocks to find the matching stop tag.
var contentBlocks = [], nesting = 0, actualEndPos = -1, currentBlock;
blockloop:
while (currentBlock = next.shift()) {
// collect all the start and stop tags in the current block
args.start.lastIndex = 0;
if (m2 = (args.start).exec(leadingContents)) {
numOpen++;
args.start.lastIndex -= m2[0].length - 1;
while (m2 = (args.start).exec(leadingContents)) {
numOpen++;
args.start.lastIndex -= m2[0].length - 1;
var startPos = [], m;
while (m = (args.start).exec(currentBlock)) {
startPos.push(args.start.lastIndex - m[0].length);
args.start.lastIndex = args.start.lastIndex - (m[2] ? m[2].length : 0);
}
var endPos = [], offset = 0;
while ((pos = currentBlock.indexOf(args.stop, offset)) !== -1) {
endPos.push(pos);
offset += (pos + args.stop.length);
}
// go through the available end tags:
var ep = 0, sp = 0; // array indices
while (ep < endPos.length) {
if (sp < startPos.length && startPos[sp] < endPos[ep]) {
// there's an end tag, but there's also another start tag first. we need to go deeper.
sp++; nesting++;
} else if (nesting > 0) {
// found an end tag, but we must go up a level first.
ep++; nesting--;
} else {
// found an end tag and we're at the top: done!
actualEndPos = endPos[ep];
break blockloop;
}
}
if (endFound >= 0) { numOpen--; }
for (var j=1; j<diff; j++) {
blockContents.push("");
}
lineNumber = blockLine + b.split("\n").length - 1;
if (endFound >= 0) {
if (trailingContents) {
next.unshift(MD.mk_block(trailingContents.replace(/^\s+/, "")));
}
blockContents.push(leadingContents.replace(/\s+$/, ""));
if (numOpen === 0) {
if (lastChance()) {
// when lastChance() becomes true the first time, currentBlock contains the last
// end tag available in the input blocks but it's not on the right nesting level
// or we would have terminated the loop already. the only thing we can do is to
// treat the last available end tag as tho it were matched with our start tag
// and let the emitter figure out how to render the garbage inside.
actualEndPos = endPos[endPos.length - 1];
break;
}
blockContents.push(args.stop);
} else {
blockContents.push(b);
}
// any left-over start tags still increase the nesting level
nesting += startPos.length - sp;
contentBlocks.push(currentBlock);
}
var emitterResult = args.emitter.call(this, blockContents, m, dialect.options);
if (emitterResult) {
result.push(emitterResult);
}
var before = currentBlock.slice(0, actualEndPos).replace(/\n*$/, ""),
after = currentBlock.slice(actualEndPos + args.stop.length).replace(/^\n*/, "");
if (before.length > 0) contentBlocks.push(MD.mk_block(before, "", currentBlock.lineNumber));
if (after.length > 0) next.unshift(MD.mk_block(after, "", currentBlock.lineNumber + countLines(before)));
var emitterResult = args.emitter.call(this, contentBlocks, match, dialect.options);
if (emitterResult) { result.push(emitterResult); }
return result;
});
},

View File

@ -22,6 +22,7 @@ test('basic bbcode', function() {
"<span class=\"bbcode-b\">evil <span class=\"bbcode-i\">trout</span></span>",
"allows embedding of tags");
format("[EMAIL]eviltrout@mailinator.com[/EMAIL]", "<a href=\"mailto:eviltrout@mailinator.com\">eviltrout@mailinator.com</a>", "supports upper case bbcode");
format("[b]strong [b]stronger[/b][/b]", "<span class=\"bbcode-b\">strong <span class=\"bbcode-b\">stronger</span></span>", "accepts nested bbcode tags");
});
test('invalid bbcode', function() {
@ -44,6 +45,7 @@ test('spoiler', function() {
test('lists', function() {
format("[ul][li]option one[/li][/ul]", "<ul><li>option one</li></ul>", "creates an ul");
format("[ol][li]option one[/li][/ol]", "<ol><li>option one</li></ol>", "creates an ol");
format("[ul]\n[li]option one[/li]\n[li]option two[/li]\n[/ul]", "<ul><li>option one</li><li>option two</li></ul>", "suppresses empty lines in lists");
});
test('tags with arguments', function() {
@ -127,6 +129,16 @@ test("quote formatting", function() {
"</div><blockquote><p>abc</p></blockquote></aside>\n\n<p>hello</p>",
"handles new lines properly");
formatQ("[quote=\"Alice, post:1, topic:1\"]\n[quote=\"Bob, post:2, topic:1\"]\n[/quote]\n[/quote]",
"<aside class=\"quote\" data-post=\"1\" data-topic=\"1\"><div class=\"title\"><div class=\"quote-controls\"></div>Alice said:" +
"</div><blockquote><aside class=\"quote\" data-post=\"2\" data-topic=\"1\"><div class=\"title\"><div class=\"quote-controls\"></div>Bob said:" +
"</div><blockquote></blockquote></aside></blockquote></aside>",
"quotes can be nested");
formatQ("[quote=\"Alice, post:1, topic:1\"]\n[quote=\"Bob, post:2, topic:1\"]\n[/quote]",
"<aside class=\"quote\" data-post=\"1\" data-topic=\"1\"><div class=\"title\"><div class=\"quote-controls\"></div>Alice said:" +
"</div><blockquote><p>[quote=\"Bob, post:2, topic:1\"]</p></blockquote></aside>",
"handles mismatched nested quote tags");
});
test("quotes with trailing formatting", function() {

View File

@ -36,6 +36,7 @@ test("Auto quoting", function() {
"it converts single line quotes to blockquotes");
cooked('"hello\nworld"', "<p>\"hello<br/>world\"</p>", "It doesn't convert multi line quotes");
cooked('"hello "evil" trout"', '<p>"hello "evil" trout"</p>', "it doesn't format quotes in the middle of a line");
cooked('["text"', '<p>["text"</p>', "it recognizes leading tag-like text");
});
test("Traditional Line Breaks", function() {
@ -315,7 +316,7 @@ test("links with full urls", function() {
test("Code Blocks", function() {
cooked("<pre>\nhello\n</pre>\n",
"<p><pre>\nhello</pre></p>",
"<p><pre>hello</pre></p>",
"pre blocks don't include extra lines");
cooked("```\na\nb\nc\n\nd\n```",

View File

@ -662,7 +662,7 @@
return [consumed, null, nodes];
}
var res = this.dialect.inline.__oneElement__.call(this, text.substr( consumed ), patterns );
var res = this.dialect.inline.__oneElement__.call(this, text.substr( consumed ), patterns, [text.substr(0, consumed)]);
consumed += res[ 0 ];
// Add any returned nodes.
nodes.push.apply( nodes, res.slice( 1 ) );