FIX: Change the approach to sanitization. Includes a more detailed API

for allowing classes and attributes for only certain tag names.
This commit is contained in:
Robin Ward 2014-07-03 16:54:56 -04:00
parent cfeae239a8
commit fc1ce96dbb
10 changed files with 128 additions and 58 deletions

View File

@ -2060,6 +2060,10 @@ var html = (function(html4) {
html4.ATTRIBS.hasOwnProperty(attribKey))) {
atype = html4.ATTRIBS[attribKey];
}
// Discourse modification: give us more flexibility with whitelists
if (opt_nmTokenPolicy && opt_nmTokenPolicy(tagName, attribName, value)) { continue; }
if (atype !== null) {
switch (atype) {
case html4.atype['NONE']: break;
@ -2108,17 +2112,6 @@ var html = (function(html4) {
log(opt_logger, tagName, attribName, oldValue, value);
}
break;
case html4.atype['ID']:
case html4.atype['IDREF']:
case html4.atype['IDREFS']:
case html4.atype['GLOBAL_NAME']:
case html4.atype['LOCAL_NAME']:
case html4.atype['CLASSES']:
value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value;
if (opt_logger) {
log(opt_logger, tagName, attribName, oldValue, value);
}
break;
case html4.atype['URI']:
value = safeUri(value,
getUriEffect(tagName, attribName),
@ -2135,7 +2128,6 @@ var html = (function(html4) {
case html4.atype['URI_FRAGMENT']:
if (value && '#' === value.charAt(0)) {
value = value.substring(1); // remove the leading '#'
value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value;
if (value !== null && value !== void 0) {
value = '#' + value; // restore the leading '#'
}

View File

@ -4,7 +4,7 @@
**/
var urlReplacerArgs = {
matcher: /^((?:https?:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.])(?:[^\s()<>]+|\([^\s()<>]+\))+(?:\([^\s()<>]+\)|[^`!()\[\]{};:'".,<>?«»“”‘’\s]))/gm,
spaceBoundary: true,
spaceOrTagBoundary: true,
emitter: function(matches) {
var url = matches[1],

View File

@ -42,10 +42,6 @@ function processTextNodes(node, event, emitter) {
for (var j=1; j<node.length; j++) {
var textContent = node[j];
if (typeof textContent === "string") {
if (dialect.options.sanitize && !skipSanitize[textContent]) {
textContent = Discourse.Markdown.sanitize(textContent);
}
var result = emitter(textContent, event);
if (result) {
if (result instanceof Array) {
@ -118,13 +114,14 @@ function parseTree(tree, path, insideCounts) {
@returns {Boolean} whether there is an invalid word boundary
**/
function invalidBoundary(args, prev) {
if (!args.wordBoundary && !args.spaceBoundary) { return false; }
if (!(args.wordBoundary || args.spaceBoundary || args.spaceOrTagBoundary)) { return false; }
var last = prev[prev.length - 1];
if (typeof last !== "string") { return false; }
if (args.wordBoundary && (last.match(/(\w|\/)$/))) { return true; }
if (args.spaceBoundary && (!last.match(/\s$/))) { return true; }
if (args.spaceOrTagBoundary && (!last.match(/(\s|\>)$/))) { return true; }
}
/**
@ -147,9 +144,19 @@ Discourse.Dialect = {
cook: function(text, opts) {
if (!initialized) { initializeDialects(); }
dialect.options = opts;
var tree = parser.toHTMLTree(text, 'Discourse');
var tree = parser.toHTMLTree(text, 'Discourse'),
result = parser.renderJsonML(parseTree(tree));
return parser.renderJsonML(parseTree(tree));
// This feature is largely for MDTest. We prefer to strip comments
// in Discourse
if (opts.sanitize) {
result = Discourse.Markdown.sanitize(result);
} else if (opts.sanitizerFunction) {
result = opts.sanitizerFunction(result);
}
return result.trim();
},
/**

View File

@ -7,10 +7,51 @@
@namespace Discourse
@module Discourse
**/
var _validClasses = {},
_validIframes = [],
_validTags = {};
function validateAttribute(tagName, attribName, value) {
var tag = _validTags[tagName];
// Handle classes
if (attribName === "class") {
if (_validClasses[value]) { return value; }
if (tag) {
var classes = tag['class'];
if (classes && (classes.indexOf(value) !== -1 || classes.indexOf('*') !== -1)) {
return value;
}
}
} else if (attribName.indexOf('data-') === 0) {
// data-* attributes
if (tag) {
var allowed = tag[attribName] || tag['data-*'];
if (allowed === value || allowed.indexOf('*') !== -1) { return value; }
}
}
if (tag) {
var attrs = tag[attribName];
if (attrs && (attrs.indexOf(value) !== -1 || attrs.indexOf('*') !== -1)) { return value; }
}
}
Discourse.Markdown = {
validClasses: {},
validIframes: [],
/**
Whitelist class for only a certain tag
@param {String} tagName to whitelist
@param {String} attribName to whitelist
@param {String} value to whitelist
**/
whiteListTag: function(tagName, attribName, value) {
_validTags[tagName] = _validTags[tagName] || {};
_validTags[tagName][attribName] = _validTags[tagName][attribName] || [];
_validTags[tagName][attribName].push(value || '*');
},
/**
Whitelists more classes for sanitization.
@ -19,10 +60,8 @@ Discourse.Markdown = {
@method whiteListClass
**/
whiteListClass: function() {
var args = Array.prototype.slice.call(arguments),
validClasses = Discourse.Markdown.validClasses;
args.forEach(function (a) { validClasses[a] = true; });
var args = Array.prototype.slice.call(arguments);
args.forEach(function (a) { _validClasses[a] = true; });
},
/**
@ -32,7 +71,7 @@ Discourse.Markdown = {
@param {Regexp} regexp The regexp to whitelist.
**/
whiteListIframe: function(regexp) {
Discourse.Markdown.validIframes.push(regexp);
_validIframes.push(regexp);
},
/**
@ -124,8 +163,8 @@ Discourse.Markdown = {
// whitelist some iframe only
if (hints && hints.XML_TAG === "iframe" && hints.XML_ATTR === "src") {
for (var i = 0, length = Discourse.Markdown.validIframes.length; i < length; i++) {
if(Discourse.Markdown.validIframes[i].test(url)) { return url; }
for (var i = 0, length = _validIframes.length; i < length; i++) {
if(_validIframes[i].test(url)) { return url; }
}
return;
}
@ -144,12 +183,10 @@ Discourse.Markdown = {
Checks to see if a name, class or id is allowed in the cooked content
@method nameIdClassAllowed
@param {String} val The name, class or id to check
@return {String} val the transformed name class or id
@param {String} tagName to check
@param {String} attribName to check
@param {String} value to check
**/
nameIdClassAllowed: function(val) {
if (Discourse.Markdown.validClasses[val]) { return val; }
},
/**
@ -161,8 +198,11 @@ Discourse.Markdown = {
**/
sanitize: function(text) {
if (!window.html_sanitize || !text) return "";
text = text.replace(/<([^A-Za-z\/]|$)/g, "&lt;$1");
return window.html_sanitize(text, Discourse.Markdown.urlAllowed, Discourse.Markdown.nameIdClassAllowed);
// Allow things like <3 and <_<
text = text.replace(/<([^A-Za-z\/\!]|$)/g, "&lt;$1");
return window.html_sanitize(text, Discourse.Markdown.urlAllowed, validateAttribute);
},
/**
@ -185,5 +225,14 @@ Discourse.Markdown = {
};
RSVP.EventTarget.mixin(Discourse.Markdown);
Discourse.Markdown.whiteListClass("attachment");
Discourse.Markdown.whiteListTag('a', 'class', 'attachment');
Discourse.Markdown.whiteListTag('a', 'target', '_blank');
Discourse.Markdown.whiteListTag('a', 'class', 'onebox');
Discourse.Markdown.whiteListTag('a', 'class', 'mention');
Discourse.Markdown.whiteListTag('div', 'class', 'title');
Discourse.Markdown.whiteListTag('div', 'class', 'quote-controls');
Discourse.Markdown.whiteListTag('code', 'class', '*');
Discourse.Markdown.whiteListTag('span', 'class', 'mention');
Discourse.Markdown.whiteListTag('aside', 'class', 'quote');
Discourse.Markdown.whiteListTag('aside', 'data-*');
Discourse.Markdown.whiteListIframe(/^(https?:)?\/\/www\.google\.com\/maps\/embed\?.+/i);

View File

@ -156,5 +156,5 @@
});
}
Discourse.Markdown.whiteListClass("emoji");
Discourse.Markdown.whiteListTag('img', 'class', 'emoji');
}).call(this);

View File

@ -258,20 +258,11 @@ describe PrettyText do
PrettyText.cook("**a*_b**").should match_html "<p><strong>a*_b</strong></p>"
end
pending "small links" do
PrettyText.cook("<small>\nhttp://a.com\n<small>").should match_html "<p><small><br><a href=\"http://a.com\" rel=\"nofollow\">http://a.com</a><br></small></p>"
end
it "does not apply italics when there is a space inside" do
PrettyText.cook("** hello**").should match_html "<p>** hello**</p>"
PrettyText.cook("**hello **").should match_html "<p>**hello **</p>"
end
pending "allows comments through" do
PrettyText.cook("boom <!--comment-->").should match_html "<p>boom <!--comment--></p>"
end
it "allows does not bold chinese intra word" do
PrettyText.cook("你**hello**").should match_html "<p>你**hello**</p>"
end
@ -279,11 +270,6 @@ describe PrettyText do
it "allows bold chinese" do
PrettyText.cook("**你hello**").should match_html "<p><strong>你hello</strong></p>"
end
pending "does not break a streak for mentions" do
Fabricate(:user, username: 'sam')
PrettyText.cook("<small>a @sam c</small>").should match_html "<p><small>a <a class='mention' href='/users/sam'>@sam</a> c</small></p>"
end
end
end

View File

@ -6,6 +6,8 @@ module("Discourse.Markdown", {
var cooked = function(input, expected, text) {
var result = Discourse.Markdown.cook(input, {sanitize: true});
expected = expected.replace(/\/>/g, ">");
// result = result.replace("/>", ">");
equal(result, expected, text);
};
@ -138,6 +140,8 @@ test("Links", function() {
cooked("User [MOD]: Hello!",
"<p>User [MOD]: Hello!</p>",
"It does not consider references that are obviously not URLs");
cooked("<small>http://eviltrout.com</small>", "<p><small><a href=\"http://eviltrout.com\">http://eviltrout.com</a></small></p>", "Links within HTML tags");
});
test("simple quotes", function() {
@ -240,6 +244,9 @@ test("Mentions", function() {
"<p><a class=\"mention\" href=\"/users/eviltrout\">@eviltrout</a></p>",
"it doesn't onebox mentions");
cookedOptions("<small>a @sam c</small>", alwaysTrue,
"<p><small>a <a class=\"mention\" href=\"/users/sam\">@sam</a> c</small></p>",
"it allows mentions within HTML tags");
});
@ -370,7 +377,7 @@ test("sanitize", function() {
cooked("[the answer](javascript:alert(42))", "<p><a>the answer</a></p>", "it prevents XSS");
cooked("<i class=\"fa fa-bug fa-spin\" style=\"font-size:600%\"></i>\n<!-- -->", "<p><i></i><br/>&lt;!-- --&gt;</p>", "it doesn't circumvent XSS with comments");
cooked("<i class=\"fa fa-bug fa-spin\" style=\"font-size:600%\"></i>\n<!-- -->", "<p><i></i><br/></p>", "it doesn't circumvent XSS with comments");
});
test("URLs in BBCode tags", function() {

View File

@ -12,7 +12,7 @@
<p><img src="/url/" alt="alt text" title="with a title" />.</p>
<p><img src="" alt="Empty" /></p>
<p><img alt="Empty" /></p>
<p><img src="http://example.com/(parens).jpg" alt="this is a stupid URL" /></p>

View File

@ -10,9 +10,9 @@
<p><a href="/url/">URL wrapped in angle brackets</a>.</p>
<p><a href="/url/" title="Here&#39;s the title">URL w/ angle brackets + title</a>.</p>
<p><a href="/url/" title="Here's the title">URL w/ angle brackets + title</a>.</p>
<p><a href="">Empty</a>.</p>
<p><a>Empty</a>.</p>
<p><a href="http://en.wikipedia.org/wiki/WIMP_(computing)">With parens in the URL</a></p>

View File

@ -8,13 +8,42 @@ module("MDTest", {
// do not affect formatting.
function normalize(str) {
return str.replace(/\n\s*/g, '').
replace(/ \/\>/g, '/>').
replace(/ \/\>/g, '>').
replace(/ ?/g, "\t").
replace(/&#34;/g, '&quot;');
}
// We use a custom sanitizer for MD test that hoists out comments. In Discourse
// they are stripped, but to be compliant with the spec they should not be.
function hoistingSanitizer(result) {
var hoisted,
m = result.match(/<!--[\s\S]*?-->/g);
if (m && m.length) {
hoisted = [];
for (var i=0; i<m.length; i++) {
var c = m[i],
id = "discourse:hoisted-comment:" + i;
result = result.replace(c, id);
hoisted.push([c, id]);
}
}
result = Discourse.Markdown.sanitize(result);
if (hoisted) {
hoisted.forEach(function(tuple) {
result = result.replace(tuple[1], tuple[0]);
});
}
return result;
}
var md = function(input, expected, text) {
var result = Discourse.Markdown.cook(input, {sanitize: true, traditional_markdown_linebreaks: true}),
var result = Discourse.Markdown.cook(input, {
sanitizerFunction: hoistingSanitizer,
traditional_markdown_linebreaks: true
}),
resultNorm = normalize(result),
expectedNorm = normalize(expected),
same = (result === expected) || (resultNorm === expectedNorm);