FIX: Change the approach to sanitization. Includes a more detailed API
for allowing classes and attributes for only certain tag names.
This commit is contained in:
parent
cfeae239a8
commit
fc1ce96dbb
|
@ -2060,6 +2060,10 @@ var html = (function(html4) {
|
|||
html4.ATTRIBS.hasOwnProperty(attribKey))) {
|
||||
atype = html4.ATTRIBS[attribKey];
|
||||
}
|
||||
|
||||
// Discourse modification: give us more flexibility with whitelists
|
||||
if (opt_nmTokenPolicy && opt_nmTokenPolicy(tagName, attribName, value)) { continue; }
|
||||
|
||||
if (atype !== null) {
|
||||
switch (atype) {
|
||||
case html4.atype['NONE']: break;
|
||||
|
@ -2108,17 +2112,6 @@ var html = (function(html4) {
|
|||
log(opt_logger, tagName, attribName, oldValue, value);
|
||||
}
|
||||
break;
|
||||
case html4.atype['ID']:
|
||||
case html4.atype['IDREF']:
|
||||
case html4.atype['IDREFS']:
|
||||
case html4.atype['GLOBAL_NAME']:
|
||||
case html4.atype['LOCAL_NAME']:
|
||||
case html4.atype['CLASSES']:
|
||||
value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value;
|
||||
if (opt_logger) {
|
||||
log(opt_logger, tagName, attribName, oldValue, value);
|
||||
}
|
||||
break;
|
||||
case html4.atype['URI']:
|
||||
value = safeUri(value,
|
||||
getUriEffect(tagName, attribName),
|
||||
|
@ -2135,7 +2128,6 @@ var html = (function(html4) {
|
|||
case html4.atype['URI_FRAGMENT']:
|
||||
if (value && '#' === value.charAt(0)) {
|
||||
value = value.substring(1); // remove the leading '#'
|
||||
value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value;
|
||||
if (value !== null && value !== void 0) {
|
||||
value = '#' + value; // restore the leading '#'
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
**/
|
||||
var urlReplacerArgs = {
|
||||
matcher: /^((?:https?:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.])(?:[^\s()<>]+|\([^\s()<>]+\))+(?:\([^\s()<>]+\)|[^`!()\[\]{};:'".,<>?«»“”‘’\s]))/gm,
|
||||
spaceBoundary: true,
|
||||
spaceOrTagBoundary: true,
|
||||
|
||||
emitter: function(matches) {
|
||||
var url = matches[1],
|
||||
|
|
|
@ -42,10 +42,6 @@ function processTextNodes(node, event, emitter) {
|
|||
for (var j=1; j<node.length; j++) {
|
||||
var textContent = node[j];
|
||||
if (typeof textContent === "string") {
|
||||
if (dialect.options.sanitize && !skipSanitize[textContent]) {
|
||||
textContent = Discourse.Markdown.sanitize(textContent);
|
||||
}
|
||||
|
||||
var result = emitter(textContent, event);
|
||||
if (result) {
|
||||
if (result instanceof Array) {
|
||||
|
@ -118,13 +114,14 @@ function parseTree(tree, path, insideCounts) {
|
|||
@returns {Boolean} whether there is an invalid word boundary
|
||||
**/
|
||||
function invalidBoundary(args, prev) {
|
||||
if (!args.wordBoundary && !args.spaceBoundary) { return false; }
|
||||
if (!(args.wordBoundary || args.spaceBoundary || args.spaceOrTagBoundary)) { return false; }
|
||||
|
||||
var last = prev[prev.length - 1];
|
||||
if (typeof last !== "string") { return false; }
|
||||
|
||||
if (args.wordBoundary && (last.match(/(\w|\/)$/))) { return true; }
|
||||
if (args.spaceBoundary && (!last.match(/\s$/))) { return true; }
|
||||
if (args.spaceOrTagBoundary && (!last.match(/(\s|\>)$/))) { return true; }
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -147,9 +144,19 @@ Discourse.Dialect = {
|
|||
cook: function(text, opts) {
|
||||
if (!initialized) { initializeDialects(); }
|
||||
dialect.options = opts;
|
||||
var tree = parser.toHTMLTree(text, 'Discourse');
|
||||
var tree = parser.toHTMLTree(text, 'Discourse'),
|
||||
result = parser.renderJsonML(parseTree(tree));
|
||||
|
||||
return parser.renderJsonML(parseTree(tree));
|
||||
// This feature is largely for MDTest. We prefer to strip comments
|
||||
// in Discourse
|
||||
|
||||
if (opts.sanitize) {
|
||||
result = Discourse.Markdown.sanitize(result);
|
||||
} else if (opts.sanitizerFunction) {
|
||||
result = opts.sanitizerFunction(result);
|
||||
}
|
||||
|
||||
return result.trim();
|
||||
},
|
||||
|
||||
/**
|
||||
|
|
|
@ -7,10 +7,51 @@
|
|||
@namespace Discourse
|
||||
@module Discourse
|
||||
**/
|
||||
var _validClasses = {},
|
||||
_validIframes = [],
|
||||
_validTags = {};
|
||||
|
||||
function validateAttribute(tagName, attribName, value) {
|
||||
var tag = _validTags[tagName];
|
||||
|
||||
// Handle classes
|
||||
if (attribName === "class") {
|
||||
if (_validClasses[value]) { return value; }
|
||||
|
||||
if (tag) {
|
||||
var classes = tag['class'];
|
||||
if (classes && (classes.indexOf(value) !== -1 || classes.indexOf('*') !== -1)) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
} else if (attribName.indexOf('data-') === 0) {
|
||||
// data-* attributes
|
||||
if (tag) {
|
||||
var allowed = tag[attribName] || tag['data-*'];
|
||||
if (allowed === value || allowed.indexOf('*') !== -1) { return value; }
|
||||
}
|
||||
}
|
||||
|
||||
if (tag) {
|
||||
var attrs = tag[attribName];
|
||||
if (attrs && (attrs.indexOf(value) !== -1 || attrs.indexOf('*') !== -1)) { return value; }
|
||||
}
|
||||
}
|
||||
|
||||
Discourse.Markdown = {
|
||||
|
||||
validClasses: {},
|
||||
validIframes: [],
|
||||
/**
|
||||
Whitelist class for only a certain tag
|
||||
|
||||
@param {String} tagName to whitelist
|
||||
@param {String} attribName to whitelist
|
||||
@param {String} value to whitelist
|
||||
**/
|
||||
whiteListTag: function(tagName, attribName, value) {
|
||||
_validTags[tagName] = _validTags[tagName] || {};
|
||||
_validTags[tagName][attribName] = _validTags[tagName][attribName] || [];
|
||||
_validTags[tagName][attribName].push(value || '*');
|
||||
},
|
||||
|
||||
/**
|
||||
Whitelists more classes for sanitization.
|
||||
|
@ -19,10 +60,8 @@ Discourse.Markdown = {
|
|||
@method whiteListClass
|
||||
**/
|
||||
whiteListClass: function() {
|
||||
var args = Array.prototype.slice.call(arguments),
|
||||
validClasses = Discourse.Markdown.validClasses;
|
||||
|
||||
args.forEach(function (a) { validClasses[a] = true; });
|
||||
var args = Array.prototype.slice.call(arguments);
|
||||
args.forEach(function (a) { _validClasses[a] = true; });
|
||||
},
|
||||
|
||||
/**
|
||||
|
@ -32,7 +71,7 @@ Discourse.Markdown = {
|
|||
@param {Regexp} regexp The regexp to whitelist.
|
||||
**/
|
||||
whiteListIframe: function(regexp) {
|
||||
Discourse.Markdown.validIframes.push(regexp);
|
||||
_validIframes.push(regexp);
|
||||
},
|
||||
|
||||
/**
|
||||
|
@ -124,8 +163,8 @@ Discourse.Markdown = {
|
|||
|
||||
// whitelist some iframe only
|
||||
if (hints && hints.XML_TAG === "iframe" && hints.XML_ATTR === "src") {
|
||||
for (var i = 0, length = Discourse.Markdown.validIframes.length; i < length; i++) {
|
||||
if(Discourse.Markdown.validIframes[i].test(url)) { return url; }
|
||||
for (var i = 0, length = _validIframes.length; i < length; i++) {
|
||||
if(_validIframes[i].test(url)) { return url; }
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -144,12 +183,10 @@ Discourse.Markdown = {
|
|||
Checks to see if a name, class or id is allowed in the cooked content
|
||||
|
||||
@method nameIdClassAllowed
|
||||
@param {String} val The name, class or id to check
|
||||
@return {String} val the transformed name class or id
|
||||
@param {String} tagName to check
|
||||
@param {String} attribName to check
|
||||
@param {String} value to check
|
||||
**/
|
||||
nameIdClassAllowed: function(val) {
|
||||
if (Discourse.Markdown.validClasses[val]) { return val; }
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
|
@ -161,8 +198,11 @@ Discourse.Markdown = {
|
|||
**/
|
||||
sanitize: function(text) {
|
||||
if (!window.html_sanitize || !text) return "";
|
||||
text = text.replace(/<([^A-Za-z\/]|$)/g, "<$1");
|
||||
return window.html_sanitize(text, Discourse.Markdown.urlAllowed, Discourse.Markdown.nameIdClassAllowed);
|
||||
|
||||
// Allow things like <3 and <_<
|
||||
text = text.replace(/<([^A-Za-z\/\!]|$)/g, "<$1");
|
||||
|
||||
return window.html_sanitize(text, Discourse.Markdown.urlAllowed, validateAttribute);
|
||||
},
|
||||
|
||||
/**
|
||||
|
@ -185,5 +225,14 @@ Discourse.Markdown = {
|
|||
};
|
||||
RSVP.EventTarget.mixin(Discourse.Markdown);
|
||||
|
||||
Discourse.Markdown.whiteListClass("attachment");
|
||||
Discourse.Markdown.whiteListTag('a', 'class', 'attachment');
|
||||
Discourse.Markdown.whiteListTag('a', 'target', '_blank');
|
||||
Discourse.Markdown.whiteListTag('a', 'class', 'onebox');
|
||||
Discourse.Markdown.whiteListTag('a', 'class', 'mention');
|
||||
Discourse.Markdown.whiteListTag('div', 'class', 'title');
|
||||
Discourse.Markdown.whiteListTag('div', 'class', 'quote-controls');
|
||||
Discourse.Markdown.whiteListTag('code', 'class', '*');
|
||||
Discourse.Markdown.whiteListTag('span', 'class', 'mention');
|
||||
Discourse.Markdown.whiteListTag('aside', 'class', 'quote');
|
||||
Discourse.Markdown.whiteListTag('aside', 'data-*');
|
||||
Discourse.Markdown.whiteListIframe(/^(https?:)?\/\/www\.google\.com\/maps\/embed\?.+/i);
|
||||
|
|
|
@ -156,5 +156,5 @@
|
|||
});
|
||||
}
|
||||
|
||||
Discourse.Markdown.whiteListClass("emoji");
|
||||
Discourse.Markdown.whiteListTag('img', 'class', 'emoji');
|
||||
}).call(this);
|
||||
|
|
|
@ -258,20 +258,11 @@ describe PrettyText do
|
|||
PrettyText.cook("**a*_b**").should match_html "<p><strong>a*_b</strong></p>"
|
||||
end
|
||||
|
||||
pending "small links" do
|
||||
PrettyText.cook("<small>\nhttp://a.com\n<small>").should match_html "<p><small><br><a href=\"http://a.com\" rel=\"nofollow\">http://a.com</a><br></small></p>"
|
||||
end
|
||||
|
||||
|
||||
it "does not apply italics when there is a space inside" do
|
||||
PrettyText.cook("** hello**").should match_html "<p>** hello**</p>"
|
||||
PrettyText.cook("**hello **").should match_html "<p>**hello **</p>"
|
||||
end
|
||||
|
||||
pending "allows comments through" do
|
||||
PrettyText.cook("boom <!--comment-->").should match_html "<p>boom <!--comment--></p>"
|
||||
end
|
||||
|
||||
it "allows does not bold chinese intra word" do
|
||||
PrettyText.cook("你**hello**").should match_html "<p>你**hello**</p>"
|
||||
end
|
||||
|
@ -279,11 +270,6 @@ describe PrettyText do
|
|||
it "allows bold chinese" do
|
||||
PrettyText.cook("**你hello**").should match_html "<p><strong>你hello</strong></p>"
|
||||
end
|
||||
|
||||
pending "does not break a streak for mentions" do
|
||||
Fabricate(:user, username: 'sam')
|
||||
PrettyText.cook("<small>a @sam c</small>").should match_html "<p><small>a <a class='mention' href='/users/sam'>@sam</a> c</small></p>"
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
|
|
@ -6,6 +6,8 @@ module("Discourse.Markdown", {
|
|||
|
||||
var cooked = function(input, expected, text) {
|
||||
var result = Discourse.Markdown.cook(input, {sanitize: true});
|
||||
expected = expected.replace(/\/>/g, ">");
|
||||
// result = result.replace("/>", ">");
|
||||
equal(result, expected, text);
|
||||
};
|
||||
|
||||
|
@ -138,6 +140,8 @@ test("Links", function() {
|
|||
cooked("User [MOD]: Hello!",
|
||||
"<p>User [MOD]: Hello!</p>",
|
||||
"It does not consider references that are obviously not URLs");
|
||||
|
||||
cooked("<small>http://eviltrout.com</small>", "<p><small><a href=\"http://eviltrout.com\">http://eviltrout.com</a></small></p>", "Links within HTML tags");
|
||||
});
|
||||
|
||||
test("simple quotes", function() {
|
||||
|
@ -240,6 +244,9 @@ test("Mentions", function() {
|
|||
"<p><a class=\"mention\" href=\"/users/eviltrout\">@eviltrout</a></p>",
|
||||
"it doesn't onebox mentions");
|
||||
|
||||
cookedOptions("<small>a @sam c</small>", alwaysTrue,
|
||||
"<p><small>a <a class=\"mention\" href=\"/users/sam\">@sam</a> c</small></p>",
|
||||
"it allows mentions within HTML tags");
|
||||
});
|
||||
|
||||
|
||||
|
@ -370,7 +377,7 @@ test("sanitize", function() {
|
|||
|
||||
cooked("[the answer](javascript:alert(42))", "<p><a>the answer</a></p>", "it prevents XSS");
|
||||
|
||||
cooked("<i class=\"fa fa-bug fa-spin\" style=\"font-size:600%\"></i>\n<!-- -->", "<p><i></i><br/><!-- --></p>", "it doesn't circumvent XSS with comments");
|
||||
cooked("<i class=\"fa fa-bug fa-spin\" style=\"font-size:600%\"></i>\n<!-- -->", "<p><i></i><br/></p>", "it doesn't circumvent XSS with comments");
|
||||
});
|
||||
|
||||
test("URLs in BBCode tags", function() {
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
<p><img src="/url/" alt="alt text" title="with a title" />.</p>
|
||||
|
||||
<p><img src="" alt="Empty" /></p>
|
||||
<p><img alt="Empty" /></p>
|
||||
|
||||
<p><img src="http://example.com/(parens).jpg" alt="this is a stupid URL" /></p>
|
||||
|
||||
|
|
|
@ -10,9 +10,9 @@
|
|||
|
||||
<p><a href="/url/">URL wrapped in angle brackets</a>.</p>
|
||||
|
||||
<p><a href="/url/" title="Here's the title">URL w/ angle brackets + title</a>.</p>
|
||||
<p><a href="/url/" title="Here's the title">URL w/ angle brackets + title</a>.</p>
|
||||
|
||||
<p><a href="">Empty</a>.</p>
|
||||
<p><a>Empty</a>.</p>
|
||||
|
||||
<p><a href="http://en.wikipedia.org/wiki/WIMP_(computing)">With parens in the URL</a></p>
|
||||
|
||||
|
|
|
@ -8,13 +8,42 @@ module("MDTest", {
|
|||
// do not affect formatting.
|
||||
function normalize(str) {
|
||||
return str.replace(/\n\s*/g, '').
|
||||
replace(/ \/\>/g, '/>').
|
||||
replace(/ \/\>/g, '>').
|
||||
replace(/ ?/g, "\t").
|
||||
replace(/"/g, '"');
|
||||
}
|
||||
|
||||
// We use a custom sanitizer for MD test that hoists out comments. In Discourse
|
||||
// they are stripped, but to be compliant with the spec they should not be.
|
||||
function hoistingSanitizer(result) {
|
||||
var hoisted,
|
||||
m = result.match(/<!--[\s\S]*?-->/g);
|
||||
if (m && m.length) {
|
||||
hoisted = [];
|
||||
for (var i=0; i<m.length; i++) {
|
||||
var c = m[i],
|
||||
id = "discourse:hoisted-comment:" + i;
|
||||
result = result.replace(c, id);
|
||||
hoisted.push([c, id]);
|
||||
}
|
||||
}
|
||||
|
||||
result = Discourse.Markdown.sanitize(result);
|
||||
|
||||
if (hoisted) {
|
||||
hoisted.forEach(function(tuple) {
|
||||
result = result.replace(tuple[1], tuple[0]);
|
||||
});
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
var md = function(input, expected, text) {
|
||||
var result = Discourse.Markdown.cook(input, {sanitize: true, traditional_markdown_linebreaks: true}),
|
||||
var result = Discourse.Markdown.cook(input, {
|
||||
sanitizerFunction: hoistingSanitizer,
|
||||
traditional_markdown_linebreaks: true
|
||||
}),
|
||||
resultNorm = normalize(result),
|
||||
expectedNorm = normalize(expected),
|
||||
same = (result === expected) || (resultNorm === expectedNorm);
|
||||
|
|
Loading…
Reference in New Issue