Upgraded and refactored Sanitizing. Much less crap should get through now!

Conflicts:
	app/assets/javascripts/discourse/components/syntax_highlighting.js
This commit is contained in:
Robin Ward 2013-10-11 16:24:27 -04:00
parent e0e79cae73
commit 5281b7f80c
16 changed files with 175 additions and 174 deletions

View File

@ -11,11 +11,6 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Sam: made some modifications to pass jshint and protect against global namespace pollution
window.sanitizeHtml = (function() {
/**
* @fileoverview
@ -88,7 +83,7 @@ function encodeIfExists(unescapedPart) {
return encodeURIComponent(unescapedPart);
}
return null;
}
};
/**
* if unescapedPart is non null, then escapes any characters in it that aren't
* valid characters in a url and also escapes any special characters that
@ -159,7 +154,7 @@ var EXTRA_PARENT_PATHS_RE = /^(?:\.\.\/)*(?:\.\.$)?/;
* }
*/
function collapse_dots(path) {
if (path == null) { return null; }
if (path === null) { return null; }
var p = normPath(path);
// Only /../ left to flatten
var r = PARENT_DIRECTORY_HANDLER_RE;
@ -743,9 +738,14 @@ URI.utils = {
return URI;
})();
// Exports for closure compiler.
if (typeof window !== 'undefined') {
window['URI'] = URI;
}
;
// Copyright Google Inc.
// Licensed under the Apache Licence Version 2.0
// Autogenerated at Wed Feb 20 13:32:22 EST 2013
// Autogenerated at Fri Oct 11 16:16:32 EDT 2013
// @overrides window
// @provides html4
var html4 = {};
@ -765,6 +765,7 @@ html4.atype = {
'FRAME_TARGET': 10,
'MEDIA_QUERY': 13
};
html4[ 'atype' ] = html4.atype;
html4.ATTRIBS = {
'*::class': 9,
'*::dir': 0,
@ -780,6 +781,7 @@ html4.ATTRIBS = {
'*::onchange': 2,
'*::onclick': 2,
'*::ondblclick': 2,
'*::onerror': 2,
'*::onfocus': 2,
'*::onkeydown': 2,
'*::onkeypress': 2,
@ -825,6 +827,7 @@ html4.ATTRIBS = {
'audio::mediagroup': 5,
'audio::muted': 0,
'audio::preload': 0,
'audio::src': 1,
'bdo::dir': 0,
'blockquote::cite': 1,
'br::clear': 0,
@ -1066,8 +1069,10 @@ html4.ATTRIBS = {
'video::muted': 0,
'video::poster': 1,
'video::preload': 0,
'video::src': 1,
'video::width': 0
};
html4[ 'ATTRIBS' ] = html4.ATTRIBS;
html4.eflags = {
'OPTIONAL_ENDTAG': 1,
'EMPTY': 2,
@ -1079,6 +1084,7 @@ html4.eflags = {
'STYLE': 128,
'VIRTUALIZED': 256
};
html4[ 'eflags' ] = html4.eflags;
html4.ELEMENTS = {
'a': 0,
'abbr': 0,
@ -1202,6 +1208,7 @@ html4.ELEMENTS = {
'video': 0,
'wbr': 2
};
html4[ 'ELEMENTS' ] = html4.ELEMENTS;
html4.ELEMENT_DOM_INTERFACES = {
'a': 'HTMLAnchorElement',
'abbr': 'HTMLElement',
@ -1325,14 +1332,17 @@ html4.ELEMENT_DOM_INTERFACES = {
'video': 'HTMLVideoElement',
'wbr': 'HTMLElement'
};
html4[ 'ELEMENT_DOM_INTERFACES' ] = html4.ELEMENT_DOM_INTERFACES;
html4.ueffects = {
'NOT_LOADED': 0,
'SAME_DOCUMENT': 1,
'NEW_DOCUMENT': 2
};
html4[ 'ueffects' ] = html4.ueffects;
html4.URIEFFECTS = {
'a::href': 2,
'area::href': 2,
'audio::src': 1,
'blockquote::cite': 0,
'command::icon': 1,
'del::cite': 0,
@ -1341,16 +1351,20 @@ html4.URIEFFECTS = {
'input::src': 1,
'ins::cite': 0,
'q::cite': 0,
'video::poster': 1
'video::poster': 1,
'video::src': 1
};
html4[ 'URIEFFECTS' ] = html4.URIEFFECTS;
html4.ltypes = {
'UNSANDBOXED': 2,
'SANDBOXED': 1,
'DATA': 0
};
html4[ 'ltypes' ] = html4.ltypes;
html4.LOADERTYPES = {
'a::href': 2,
'area::href': 2,
'audio::src': 2,
'blockquote::cite': 2,
'command::icon': 1,
'del::cite': 2,
@ -1359,8 +1373,15 @@ html4.LOADERTYPES = {
'input::src': 1,
'ins::cite': 2,
'q::cite': 2,
'video::poster': 1
'video::poster': 1,
'video::src': 2
};
html4[ 'LOADERTYPES' ] = html4.LOADERTYPES;
// export for Closure Compiler
if (typeof window !== 'undefined') {
window['html4'] = html4;
}
;
// Copyright (C) 2006 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
@ -1397,7 +1418,7 @@ html4.LOADERTYPES = {
*/
// The Turkish i seems to be a non-issue, but abort in case it is.
// if ('I'.toLowerCase() !== 'i') { throw 'I/i problem'; } # Sam ... screwing up in turkish browsers seems a silly idea
if ('I'.toLowerCase() !== 'i') { throw 'I/i problem'; }
/**
* \@namespace
@ -1407,9 +1428,9 @@ var html = (function(html4) {
// For closure compiler
var parseCssDeclarations, sanitizeCssProperty, cssSchema;
if ('undefined' !== typeof window) {
parseCssDeclarations = window.parseCssDeclarations;
sanitizeCssProperty = window.sanitizeCssProperty;
cssSchema = window.cssSchema;
parseCssDeclarations = window['parseCssDeclarations'];
sanitizeCssProperty = window['sanitizeCssProperty'];
cssSchema = window['cssSchema'];
}
// The keys of this object must be 'quoted' or JSCompiler will mangle them!
@ -1439,7 +1460,8 @@ var html = (function(html4) {
// TODO(kpreid): This retrieval is a kludge and leads to silent loss of
// functionality if the document isn't available.
var entityLookupElement =
('undefined' !== typeof window && window.document) ? window.document.createElement('textarea') : null;
('undefined' !== typeof window && window['document'])
? window['document'].createElement('textarea') : null;
/**
* Decodes an HTML entity.
*
@ -1608,7 +1630,7 @@ var html = (function(html4) {
var splitWillCapture = ('a,b'.split(/(,)/).length === 3);
// bitmask for tags with special parsing, like <script> and <textarea>
var EFLAGS_TEXT = html4.eflags.CDATA | html4.eflags.RCDATA;
var EFLAGS_TEXT = html4.eflags['CDATA'] | html4.eflags['RCDATA'];
/**
* Given a SAX-like event handler, produce a function that feeds those
@ -1714,7 +1736,7 @@ var html = (function(html4) {
}
break;
case '<\/':
if (m = /^([-\w:]+)[^\'\"]*/.exec(next)) {
if ((m = /^([-\w:]+)[^\'\"]*/.exec(next))) {
if (m[0].length === next.length && parts[pos + 1] === '>') {
// fast case, no attribute parsing needed
pos += 2;
@ -1875,7 +1897,7 @@ var html = (function(html4) {
var parts = [];
var lastPos = 0;
var m;
while ((m = re.exec(str)) != null) {
while ((m = re.exec(str)) !== null) {
parts.push(str.substring(lastPos, m.index));
parts.push(m[0]);
lastPos = m.index + m[0].length;
@ -2085,7 +2107,7 @@ var html = (function(html4) {
for (var i = 0, n = attribs.length; i < n; i += 2) {
var attribName = attribs[i],
value = attribs[i + 1];
if (value != null && value !== void 0) {
if (value !== null && value !== void 0) {
out.push(' ', attribName, '="', escapeAttrib(value), '"');
}
}
@ -2241,7 +2263,7 @@ var html = (function(html4) {
html4.ATTRIBS.hasOwnProperty(attribKey))) {
atype = html4.ATTRIBS[attribKey];
}
if (atype != null) {
if (atype !== null) {
switch (atype) {
case html4.atype['NONE']: break;
case html4.atype['SCRIPT']:
@ -2262,14 +2284,10 @@ var html = (function(html4) {
parseCssDeclarations(
value,
{
declaration: function (property, tokens) {
'declaration': function (property, tokens) {
var normProp = property.toLowerCase();
var schema = cssSchema[normProp];
if (!schema) {
return;
}
sanitizeCssProperty(
normProp, schema, tokens,
normProp, tokens,
opt_naiveUriRewriter
? function (url) {
return safeUri(
@ -2281,7 +2299,10 @@ var html = (function(html4) {
}, opt_naiveUriRewriter);
}
: null);
sanitizedDeclarations.push(property + ': ' + tokens.join(' '));
if (tokens.length) {
sanitizedDeclarations.push(
normProp + ': ' + tokens.join(' '));
}
}
});
value = sanitizedDeclarations.length > 0 ?
@ -2318,7 +2339,7 @@ var html = (function(html4) {
if (value && '#' === value.charAt(0)) {
value = value.substring(1); // remove the leading '#'
value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value;
if (value != null && value !== void 0) {
if (value !== null && value !== void 0) {
value = '#' + value; // restore the leading '#'
}
} else {
@ -2336,9 +2357,11 @@ var html = (function(html4) {
break;
}
} else {
value = null;
if (opt_logger) {
log(opt_logger, tagName, attribName, oldValue, value);
if (!/^data\-/.test(attribName)) {
value = null;
if (opt_logger) {
log(opt_logger, tagName, attribName, oldValue, value);
}
}
}
attribs[i + 1] = value;
@ -2418,7 +2441,11 @@ var html = (function(html4) {
return html;
})(html4);
var html_sanitize = html['sanitize'];
return function(s){return html.sanitize(s, function(uri){return uri;})};
})();
// Exports for Closure compiler. Note this file is also cajoled
// for domado and run in an environment without 'window'
if (typeof window !== 'undefined') {
window['html'] = html;
window['html_sanitize'] = html_sanitize;
}

View File

@ -9,6 +9,23 @@
**/
Discourse.Markdown = {
validClasses: {},
/**
Whitelists classes for sanitization
@method whiteListClass
@param {String} val The value to whitelist. Can supply more than one argument
**/
whiteListClass: function() {
var args = Array.prototype.slice.call(arguments),
validClasses = Discourse.Markdown.validClasses;
args.forEach(function (a) {
validClasses[a] = true;
});
},
/**
Convert a raw string to a cooked markdown string.
@ -85,6 +102,41 @@ Discourse.Markdown = {
return new Markdown.Editor(markdownConverter, undefined, editorOptions);
},
/**
Checks to see if a URL is allowed in the cooked content
@method urlAllowed
@param {String} url Url to check
@return {String} url to insert in the cooked content
**/
urlAllowed: function (url) {
if(/^https?:\/\//.test(url)) { return url; }
if(/^\/\/?[\w\.\-]+/.test(url)) { return url; }
},
/**
Checks to see if a name, class or id is allowed in the cooked content
@method nameIdClassAllowed
@param {String} val The name, class or id to check
@return {String} val the transformed name class or id
**/
nameIdClassAllowed: function(val) {
if (Discourse.Markdown.validClasses[val]) { return val; }
},
/**
Sanitize text using the sanitizer
@method sanitize
@param {String} text The text to sanitize
@return {String} text The sanitized text
**/
sanitize: function(text) {
if (!window.html_sanitize) return "";
return window.html_sanitize(text, Discourse.Markdown.urlAllowed, Discourse.Markdown.nameIdClassAllowed);
},
/**
Creates a Markdown.Converter that we we can use for formatting
@ -101,8 +153,7 @@ Discourse.Markdown = {
if (!text) return "";
if (opts.sanitize) {
if (!window.sanitizeHtml) return "";
text = window.sanitizeHtml(text);
text = Discourse.Markdown.sanitize(text);
}
return text;
@ -112,3 +163,5 @@ Discourse.Markdown = {
};
RSVP.EventTarget.mixin(Discourse.Markdown);
Discourse.Markdown.whiteListClass("attachment");

View File

@ -16,12 +16,10 @@ Discourse.SyntaxHighlighting = {
@param {jQuery.selector} $elem The element we want to apply our highlighting to
**/
apply: function($elem) {
return $('pre code[class]', $elem).each(function(i, e) {
$('pre code[class]', $elem).each(function(i, e) {
return $LAB.script("/javascripts/highlight-handlebars.pack.js").wait(function() {
return hljs.highlightBlock(e);
});
});
}
};

View File

@ -93,15 +93,6 @@ replaceBBCodeParams("size", function(param, contents) {
return ['span', {'class': "bbcode-size-" + param}].concat(contents);
});
replaceBBCodeParams("color", function(param, contents) {
// Only allow valid HTML colors.
if (/^(\#[0-9a-fA-F]{3}([0-9a-fA-F]{3})?)|(aqua|black|blue|fuchsia|gray|green|lime|maroon|navy|olive|purple|red|silver|teal|white|yellow)$/.test(param)) {
return ['span', {style: "color: " + param}].concat(contents);
} else {
return ['span'].concat(contents);
}
});
// Handles `[code] ... [/code]` blocks
Discourse.Dialect.replaceBlock({
start: /(\[code\])([\s\S]*)/igm,
@ -112,3 +103,8 @@ Discourse.Dialect.replaceBlock({
}
});
Discourse.Markdown.whiteListClass("bbcode-b", "bbcode-i", "bbcode-u", "bbcode-s", "spoiler");
for(var i=1; i<=40; i++) {
Discourse.Markdown.whiteListClass("bbcode-size-" + i);
}

View File

@ -18,3 +18,12 @@ Discourse.Dialect.replaceBlock({
Discourse.Dialect.postProcessTag('code', function (contents) {
return Handlebars.Utils.escapeExpression(contents);
});
Discourse.Markdown.whiteListClass(
"lang-auto", "1c", "actionscript", "apache", "applescript", "avrasm", "axapta", "bash", "brainfuck",
"clojure", "cmake", "coffeescript", "cpp", "cs", "css", "d", "delphi", "diff", "xml", "django", "dos",
"erlang-repl", "erlang", "glsl", "go", "handlebars", "haskell", "http", "ini", "java", "javascript",
"json", "lisp", "lua", "markdown", "matlab", "mel", "nginx", "objectivec", "parser3", "perl", "php",
"profile", "python", "r", "rib", "rsl", "ruby", "rust", "scala", "smalltalk", "sql", "tex", "text",
"vala", "vbscript", "vhdl"
);

View File

@ -19,3 +19,5 @@ Discourse.Dialect.inlineRegexp({
}
}
});
Discourse.Markdown.whiteListClass('mention');

View File

@ -76,3 +76,5 @@ Discourse.Dialect.on("parseNode", function(event) {
}
}
});
Discourse.Markdown.whiteListClass("onebox", "onebox-result", "onebox-result-body", "source", "clearfix", "thumbnail", "info");

View File

@ -68,3 +68,5 @@ Discourse.Dialect.on("parseNode", function(event) {
}
});
Discourse.Markdown.whiteListClass("quote", "title", "quote-controls", "avatar");

View File

@ -4,7 +4,6 @@
//= require LAB.js
//= require Markdown.Converter.js
//= require Markdown.Editor.js
//= require Markdown.Sanitizer.js
//= require better_markdown.js
//= require bootbox.js
//= require bootstrap-alert.js
@ -43,6 +42,7 @@
// Stuff we need to load first
//= require ./discourse/mixins/scrolling
//= require_tree ./discourse/mixins
//= require ./discourse/components/markdown
//= require ./discourse/components/computed
//= require ./discourse/views/view
//= require ./discourse/views/container_view

View File

@ -1,6 +1,7 @@
require 'v8'
require 'nokogiri'
require_dependency 'excerpt_parser'
require 'post'
module PrettyText
@ -108,6 +109,7 @@ module PrettyText
ctx_load(ctx,
"vendor/assets/javascripts/better_markdown.js",
"app/assets/javascripts/defer/html-sanitizer-bundle.js",
"app/assets/javascripts/discourse/dialects/dialect.js",
"app/assets/javascripts/discourse/components/utilities.js",
"app/assets/javascripts/discourse/components/markdown.js")
@ -164,8 +166,19 @@ module PrettyText
context = v8
# we need to do this to work in a multi site environment, many sites, many settings
decorate_context(context)
context['opts'] = opts || {}
context_opts = opts || {}
context_opts[:sanitize] ||= true
context['opts'] = context_opts
context['raw'] = text
if Post.white_listed_image_classes.present?
Post.white_listed_image_classes.each do |klass|
context.eval("Discourse.Markdown.whiteListClass('#{klass}')")
end
end
context.eval('opts["mentionLookup"] = function(u){return helpers.is_username_valid(u);}')
context.eval('opts["lookupAvatar"] = function(p){return Discourse.Utilities.avatarImg({size: "tiny", avatarTemplate: helpers.avatar_template(p)});}')
baked = context.eval('Discourse.Markdown.markdownConverter(opts).makeHtml(raw)')

View File

@ -32,7 +32,7 @@ describe PrettyText do
end
it "should sanitize the html" do
PrettyText.cook("<script>alert(42)</script>").should match_html "alert(42)"
PrettyText.cook("<script>alert(42)</script>").should match_html "<p></p>"
end
it 'should allow for @mentions to have punctuation' do

View File

@ -27,13 +27,6 @@ test('lists', function() {
format("[ol][li]option one[/li][/ol]", "<ol><li>option one</li></ol>", "creates an ol");
});
test('color', function() {
format("[color=#00f]blue[/color]", "<span style=\"color: #00f\">blue</span>", "supports [color=] with a short hex value");
format("[color=#ffff00]yellow[/color]", "<span style=\"color: #ffff00\">yellow</span>", "supports [color=] with a long hex value");
format("[color=red]red[/color]", "<span style=\"color: red\">red</span>", "supports [color=] with an html color");
format("[color=javascript:alert('wat')]noop[/color]", "<span>noop</span>", "it performs a noop on invalid input");
});
test('tags with arguments', function() {
format("[size=35]BIG [b]whoop[/b][/size]", "<span class=\"bbcode-size-35\">BIG <span class=\"bbcode-b\">whoop</span></span>", "supports [size=]");
format("[url=http://bettercallsaul.com]better call![/url]", "<a href=\"http://bettercallsaul.com\">better call!</a>", "supports [url] with a title");
@ -42,7 +35,6 @@ test('tags with arguments', function() {
format("[b]first[/b] [b]second[/b]", "<span class=\"bbcode-b\">first</span> <span class=\"bbcode-b\">second</span>", "can bold two things on the same line");
});
test("quotes", function() {
var post = Discourse.Post.create({

View File

@ -278,10 +278,12 @@ test("Code Blocks", function() {
});
test("SanitizeHTML", function() {
test("sanitize", function() {
var sanitize = Discourse.Markdown.sanitize;
equal(sanitizeHtml("<div><script>alert('hi');</script></div>"), "<div></div>");
equal(sanitizeHtml("<div><p class=\"funky\" wrong='1'>hello</p></div>"), "<div><p class=\"funky\">hello</p></div>");
equal(sanitize("<i class=\"icon-bug icon-spin\">bug</i>"), "<i>bug</i>");
equal(sanitize("<div><script>alert('hi');</script></div>"), "<div></div>");
equal(sanitize("<div><p class=\"funky\" wrong='1'>hello</p></div>"), "<div><p>hello</p></div>");
cooked("hello<script>alert(42)</script>", "<p>hello</p>", "it sanitizes while cooking");
cooked("<a href='http://disneyland.disney.go.com/'>disney</a> <a href='http://reddit.com'>reddit</a>",
@ -305,3 +307,15 @@ test("URLs in BBCode tags", function() {
"named links are properly parsed");
});
test("urlAllowed", function() {
var allowed = function(url, msg) {
equal(Discourse.Markdown.urlAllowed(url), url, msg);
};
allowed("/foo/bar.html", "allows relative urls");
allowed("http://eviltrout.com/evil/trout", "allows full urls");
allowed("https://eviltrout.com/evil/trout", "allows https urls");
allowed("//eviltrout.com/evil/trout", "allows protocol relative urls");
});

View File

@ -25,7 +25,6 @@
//= require LAB.js
//= require Markdown.Converter.js
//= require Markdown.Editor.js
//= require Markdown.Sanitizer.js
//= require better_markdown.js
//= require bootbox.js
//= require bootstrap-alert.js

View File

@ -1,108 +0,0 @@
(function () {
var output, Converter;
if (typeof exports === "object" && typeof require === "function") { // we're in a CommonJS (e.g. Node.js) module
output = exports;
Converter = require("./Markdown.Converter").Converter;
} else {
output = window.Markdown;
Converter = output.Converter;
}
output.getSanitizingConverter = function () {
var converter = new Converter();
converter.hooks.chain("postConversion", sanitizeHtml);
converter.hooks.chain("postConversion", balanceTags);
return converter;
}
function sanitizeHtml(html) {
return html.replace(/<[^>]*>?/gi, sanitizeTag);
}
// (tags that can be opened/closed) | (tags that stand alone)
var basic_tag_whitelist = /^(<\/?(b|blockquote|code|del|dd|dl|dt|em|h1|h2|h3|i|kbd|li|ol|p|pre|s|sup|sub|strong|strike|ul)>|<(br|hr)\s?\/?>)$/i;
// <a href="url..." optional title>|</a>
var a_white = /^(<a\shref="((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+"(\stitle="[^"<>]+")?\s?>|<\/a>)$/i;
// <img src="url..." optional width optional height optional alt optional title
var img_white = /^(<img\ssrc="(https?:\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$/i;
function sanitizeTag(tag) {
if (tag.match(basic_tag_whitelist) || tag.match(a_white) || tag.match(img_white))
return tag;
else
return "";
}
/// <summary>
/// attempt to balance HTML tags in the html string
/// by removing any unmatched opening or closing tags
/// IMPORTANT: we *assume* HTML has *already* been
/// sanitized and is safe/sane before balancing!
///
/// adapted from CODESNIPPET: A8591DBA-D1D3-11DE-947C-BA5556D89593
/// </summary>
function balanceTags(html) {
if (html == "")
return "";
var re = /<\/?\w+[^>]*(\s|$|>)/g;
// convert everything to lower case; this makes
// our case insensitive comparisons easier
var tags = html.toLowerCase().match(re);
// no HTML tags present? nothing to do; exit now
var tagcount = (tags || []).length;
if (tagcount == 0)
return html;
var tagname, tag;
var ignoredtags = "<p><img><br><li><hr>";
var match;
var tagpaired = [];
var tagremove = [];
var needsRemoval = false;
// loop through matched tags in forward order
for (var ctag = 0; ctag < tagcount; ctag++) {
tagname = tags[ctag].replace(/<\/?(\w+).*/, "$1");
// skip any already paired tags
// and skip tags in our ignore list; assume they're self-closed
if (tagpaired[ctag] || ignoredtags.search("<" + tagname + ">") > -1)
continue;
tag = tags[ctag];
match = -1;
if (!/^<\//.test(tag)) {
// this is an opening tag
// search forwards (next tags), look for closing tags
for (var ntag = ctag + 1; ntag < tagcount; ntag++) {
if (!tagpaired[ntag] && tags[ntag] == "</" + tagname + ">") {
match = ntag;
break;
}
}
}
if (match == -1)
needsRemoval = tagremove[ctag] = true; // mark for removal
else
tagpaired[match] = true; // mark paired
}
if (!needsRemoval)
return html;
// delete all orphaned tags from the string
var ctag = 0;
html = html.replace(re, function (match) {
var res = tagremove[ctag] ? "" : match;
ctag++;
return res;
});
return html;
}
})();

View File

@ -108,4 +108,6 @@
});
});
}
Discourse.Markdown.whiteListClass("emoji");
}).call(this);