From 0c085059c9d16766d3680992a6ec5cfef0797bea Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Wed, 20 Feb 2013 15:37:42 +1100 Subject: [PATCH] added sane sanitizer (Google Cajole) that is much more robust than old one ... yay for smilies added sane way to do $LAB includes - pattern to be expanded people keep on messing structure.sql --- .../defer/html-sanitizer-bundle.js | 2424 +++++++++++++++++ .../discourse/components/sanitize.js | 99 +- .../discourse/components/utilities.coffee | 11 +- .../discourse/views/composer_view.js.coffee | 8 +- .../common/_discourse_javascript.html.erb | 66 +- config/application.rb | 4 +- db/structure.sql | 2040 +------------- spec/javascripts/sanitize_spec.js | 19 + spec/javascripts/spec.js | 1 + 9 files changed, 2592 insertions(+), 2080 deletions(-) create mode 100644 app/assets/javascripts/defer/html-sanitizer-bundle.js create mode 100644 spec/javascripts/sanitize_spec.js diff --git a/app/assets/javascripts/defer/html-sanitizer-bundle.js b/app/assets/javascripts/defer/html-sanitizer-bundle.js new file mode 100644 index 00000000000..6904ec96bbe --- /dev/null +++ b/app/assets/javascripts/defer/html-sanitizer-bundle.js @@ -0,0 +1,2424 @@ +// Copyright (C) 2010 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Sam: made some modifications to pass jshint and protect against global namespace pollution + + +window.sanitizeHtml = (function() { + +/** + * @fileoverview + * Implements RFC 3986 for parsing/formatting URIs. + * + * @author mikesamuel@gmail.com + * \@provides URI + * \@overrides window + */ + +var URI = (function () { + +/** + * creates a uri from the string form. The parser is relaxed, so special + * characters that aren't escaped but don't cause ambiguities will not cause + * parse failures. + * + * @return {URI|null} + */ +function parse(uriStr) { + var m = ('' + uriStr).match(URI_RE_); + if (!m) { return null; } + return new URI( + nullIfAbsent(m[1]), + nullIfAbsent(m[2]), + nullIfAbsent(m[3]), + nullIfAbsent(m[4]), + nullIfAbsent(m[5]), + nullIfAbsent(m[6]), + nullIfAbsent(m[7])); +} + + +/** + * creates a uri from the given parts. + * + * @param scheme {string} an unencoded scheme such as "http" or null + * @param credentials {string} unencoded user credentials or null + * @param domain {string} an unencoded domain name or null + * @param port {number} a port number in [1, 32768]. + * -1 indicates no port, as does null. + * @param path {string} an unencoded path + * @param query {Array.|string|null} a list of unencoded cgi + * parameters where even values are keys and odds the corresponding values + * or an unencoded query. + * @param fragment {string} an unencoded fragment without the "#" or null. + * @return {URI} + */ +function create(scheme, credentials, domain, port, path, query, fragment) { + var uri = new URI( + encodeIfExists2(scheme, URI_DISALLOWED_IN_SCHEME_OR_CREDENTIALS_), + encodeIfExists2( + credentials, URI_DISALLOWED_IN_SCHEME_OR_CREDENTIALS_), + encodeIfExists(domain), + port > 0 ? port.toString() : null, + encodeIfExists2(path, URI_DISALLOWED_IN_PATH_), + null, + encodeIfExists(fragment)); + if (query) { + if ('string' === typeof query) { + uri.setRawQuery(query.replace(/[^?&=0-9A-Za-z_\-~.%]/g, encodeOne)); + } else { + uri.setAllParameters(query); + } + } + return uri; +} +function encodeIfExists(unescapedPart) { + if ('string' == typeof unescapedPart) { + return encodeURIComponent(unescapedPart); + } + return null; +} +/** + * if unescapedPart is non null, then escapes any characters in it that aren't + * valid characters in a url and also escapes any special characters that + * appear in extra. + * + * @param unescapedPart {string} + * @param extra {RegExp} a character set of characters in [\01-\177]. + * @return {string|null} null iff unescapedPart == null. + */ +function encodeIfExists2(unescapedPart, extra) { + if ('string' == typeof unescapedPart) { + return encodeURI(unescapedPart).replace(extra, encodeOne); + } + return null; +}; +/** converts a character in [\01-\177] to its url encoded equivalent. */ +function encodeOne(ch) { + var n = ch.charCodeAt(0); + return '%' + '0123456789ABCDEF'.charAt((n >> 4) & 0xf) + + '0123456789ABCDEF'.charAt(n & 0xf); +} + +/** + * {@updoc + * $ normPath('foo/./bar') + * # 'foo/bar' + * $ normPath('./foo') + * # 'foo' + * $ normPath('foo/.') + * # 'foo' + * $ normPath('foo//bar') + * # 'foo/bar' + * } + */ +function normPath(path) { + return path.replace(/(^|\/)\.(?:\/|$)/g, '$1').replace(/\/{2,}/g, '/'); +} + +var PARENT_DIRECTORY_HANDLER = new RegExp( + '' + // A path break + + '(/|^)' + // followed by a non .. path element + // (cannot be . because normPath is used prior to this RegExp) + + '(?:[^./][^/]*|\\.{2,}(?:[^./][^/]*)|\\.{3,}[^/]*)' + // followed by .. followed by a path break. + + '/\\.\\.(?:/|$)'); + +var PARENT_DIRECTORY_HANDLER_RE = new RegExp(PARENT_DIRECTORY_HANDLER); + +var EXTRA_PARENT_PATHS_RE = /^(?:\.\.\/)*(?:\.\.$)?/; + +/** + * Normalizes its input path and collapses all . and .. sequences except for + * .. sequences that would take it above the root of the current parent + * directory. + * {@updoc + * $ collapse_dots('foo/../bar') + * # 'bar' + * $ collapse_dots('foo/./bar') + * # 'foo/bar' + * $ collapse_dots('foo/../bar/./../../baz') + * # 'baz' + * $ collapse_dots('../foo') + * # '../foo' + * $ collapse_dots('../foo').replace(EXTRA_PARENT_PATHS_RE, '') + * # 'foo' + * } + */ +function collapse_dots(path) { + if (path === null) { return null; } + var p = normPath(path); + // Only /../ left to flatten + var r = PARENT_DIRECTORY_HANDLER_RE; + // We replace with $1 which matches a / before the .. because this + // guarantees that: + // (1) we have at most 1 / between the adjacent place, + // (2) always have a slash if there is a preceding path section, and + // (3) we never turn a relative path into an absolute path. + for (var q; (q = p.replace(r, '$1')) != p; p = q) {}; + return p; +} + +/** + * resolves a relative url string to a base uri. + * @return {URI} + */ +function resolve(baseUri, relativeUri) { + // there are several kinds of relative urls: + // 1. //foo - replaces everything from the domain on. foo is a domain name + // 2. foo - replaces the last part of the path, the whole query and fragment + // 3. /foo - replaces the the path, the query and fragment + // 4. ?foo - replace the query and fragment + // 5. #foo - replace the fragment only + + var absoluteUri = baseUri.clone(); + // we satisfy these conditions by looking for the first part of relativeUri + // that is not blank and applying defaults to the rest + + var overridden = relativeUri.hasScheme(); + + if (overridden) { + absoluteUri.setRawScheme(relativeUri.getRawScheme()); + } else { + overridden = relativeUri.hasCredentials(); + } + + if (overridden) { + absoluteUri.setRawCredentials(relativeUri.getRawCredentials()); + } else { + overridden = relativeUri.hasDomain(); + } + + if (overridden) { + absoluteUri.setRawDomain(relativeUri.getRawDomain()); + } else { + overridden = relativeUri.hasPort(); + } + + var rawPath = relativeUri.getRawPath(); + var simplifiedPath = collapse_dots(rawPath); + if (overridden) { + absoluteUri.setPort(relativeUri.getPort()); + simplifiedPath = simplifiedPath + && simplifiedPath.replace(EXTRA_PARENT_PATHS_RE, ''); + } else { + overridden = !!rawPath; + if (overridden) { + // resolve path properly + if (simplifiedPath.charCodeAt(0) !== 0x2f /* / */) { // path is relative + var absRawPath = collapse_dots(absoluteUri.getRawPath() || '') + .replace(EXTRA_PARENT_PATHS_RE, ''); + var slash = absRawPath.lastIndexOf('/') + 1; + simplifiedPath = collapse_dots( + (slash ? absRawPath.substring(0, slash) : '') + + collapse_dots(rawPath)) + .replace(EXTRA_PARENT_PATHS_RE, ''); + } + } else { + simplifiedPath = simplifiedPath + && simplifiedPath.replace(EXTRA_PARENT_PATHS_RE, ''); + if (simplifiedPath !== rawPath) { + absoluteUri.setRawPath(simplifiedPath); + } + } + } + + if (overridden) { + absoluteUri.setRawPath(simplifiedPath); + } else { + overridden = relativeUri.hasQuery(); + } + + if (overridden) { + absoluteUri.setRawQuery(relativeUri.getRawQuery()); + } else { + overridden = relativeUri.hasFragment(); + } + + if (overridden) { + absoluteUri.setRawFragment(relativeUri.getRawFragment()); + } + + return absoluteUri; +} + +/** + * a mutable URI. + * + * This class contains setters and getters for the parts of the URI. + * The getXYZ/setXYZ methods return the decoded part -- so + * uri.parse('/foo%20bar').getPath() will return the decoded path, + * /foo bar. + * + *

The raw versions of fields are available too. + * uri.parse('/foo%20bar').getRawPath() will return the raw path, + * /foo%20bar. Use the raw setters with care, since + * URI::toString is not guaranteed to return a valid url if a + * raw setter was used. + * + *

All setters return this and so may be chained, a la + * uri.parse('/foo').setFragment('part').toString(). + * + *

You should not use this constructor directly -- please prefer the factory + * functions {@link uri.parse}, {@link uri.create}, {@link uri.resolve} + * instead.

+ * + *

The parameters are all raw (assumed to be properly escaped) parts, and + * any (but not all) may be null. Undefined is not allowed.

+ * + * @constructor + */ +function URI( + rawScheme, + rawCredentials, rawDomain, port, + rawPath, rawQuery, rawFragment) { + this.scheme_ = rawScheme; + this.credentials_ = rawCredentials; + this.domain_ = rawDomain; + this.port_ = port; + this.path_ = rawPath; + this.query_ = rawQuery; + this.fragment_ = rawFragment; + /** + * @type {Array|null} + */ + this.paramCache_ = null; +} + +/** returns the string form of the url. */ +URI.prototype.toString = function () { + var out = []; + if (null !== this.scheme_) { out.push(this.scheme_, ':'); } + if (null !== this.domain_) { + out.push('//'); + if (null !== this.credentials_) { out.push(this.credentials_, '@'); } + out.push(this.domain_); + if (null !== this.port_) { out.push(':', this.port_.toString()); } + } + if (null !== this.path_) { out.push(this.path_); } + if (null !== this.query_) { out.push('?', this.query_); } + if (null !== this.fragment_) { out.push('#', this.fragment_); } + return out.join(''); +}; + +URI.prototype.clone = function () { + return new URI(this.scheme_, this.credentials_, this.domain_, this.port_, + this.path_, this.query_, this.fragment_); +}; + +URI.prototype.getScheme = function () { + // HTML5 spec does not require the scheme to be lowercased but + // all common browsers except Safari lowercase the scheme. + return this.scheme_ && decodeURIComponent(this.scheme_).toLowerCase(); +}; +URI.prototype.getRawScheme = function () { + return this.scheme_; +}; +URI.prototype.setScheme = function (newScheme) { + this.scheme_ = encodeIfExists2( + newScheme, URI_DISALLOWED_IN_SCHEME_OR_CREDENTIALS_); + return this; +}; +URI.prototype.setRawScheme = function (newScheme) { + this.scheme_ = newScheme ? newScheme : null; + return this; +}; +URI.prototype.hasScheme = function () { + return null !== this.scheme_; +}; + + +URI.prototype.getCredentials = function () { + return this.credentials_ && decodeURIComponent(this.credentials_); +}; +URI.prototype.getRawCredentials = function () { + return this.credentials_; +}; +URI.prototype.setCredentials = function (newCredentials) { + this.credentials_ = encodeIfExists2( + newCredentials, URI_DISALLOWED_IN_SCHEME_OR_CREDENTIALS_); + + return this; +}; +URI.prototype.setRawCredentials = function (newCredentials) { + this.credentials_ = newCredentials ? newCredentials : null; + return this; +}; +URI.prototype.hasCredentials = function () { + return null !== this.credentials_; +}; + + +URI.prototype.getDomain = function () { + return this.domain_ && decodeURIComponent(this.domain_); +}; +URI.prototype.getRawDomain = function () { + return this.domain_; +}; +URI.prototype.setDomain = function (newDomain) { + return this.setRawDomain(newDomain && encodeURIComponent(newDomain)); +}; +URI.prototype.setRawDomain = function (newDomain) { + this.domain_ = newDomain ? newDomain : null; + // Maintain the invariant that paths must start with a slash when the URI + // is not path-relative. + return this.setRawPath(this.path_); +}; +URI.prototype.hasDomain = function () { + return null !== this.domain_; +}; + + +URI.prototype.getPort = function () { + return this.port_ && decodeURIComponent(this.port_); +}; +URI.prototype.setPort = function (newPort) { + if (newPort) { + newPort = Number(newPort); + if (newPort !== (newPort & 0xffff)) { + throw new Error('Bad port number ' + newPort); + } + this.port_ = '' + newPort; + } else { + this.port_ = null; + } + return this; +}; +URI.prototype.hasPort = function () { + return null !== this.port_; +}; + + +URI.prototype.getPath = function () { + return this.path_ && decodeURIComponent(this.path_); +}; +URI.prototype.getRawPath = function () { + return this.path_; +}; +URI.prototype.setPath = function (newPath) { + return this.setRawPath(encodeIfExists2(newPath, URI_DISALLOWED_IN_PATH_)); +}; +URI.prototype.setRawPath = function (newPath) { + if (newPath) { + newPath = String(newPath); + this.path_ = + // Paths must start with '/' unless this is a path-relative URL. + (!this.domain_ || /^\//.test(newPath)) ? newPath : '/' + newPath; + } else { + this.path_ = null; + } + return this; +}; +URI.prototype.hasPath = function () { + return null !== this.path_; +}; + + +URI.prototype.getQuery = function () { + // From http://www.w3.org/Addressing/URL/4_URI_Recommentations.html + // Within the query string, the plus sign is reserved as shorthand notation + // for a space. + return this.query_ && decodeURIComponent(this.query_).replace(/\+/g, ' '); +}; +URI.prototype.getRawQuery = function () { + return this.query_; +}; +URI.prototype.setQuery = function (newQuery) { + this.paramCache_ = null; + this.query_ = encodeIfExists(newQuery); + return this; +}; +URI.prototype.setRawQuery = function (newQuery) { + this.paramCache_ = null; + this.query_ = newQuery ? newQuery : null; + return this; +}; +URI.prototype.hasQuery = function () { + return null !== this.query_; +}; + +/** + * sets the query given a list of strings of the form + * [ key0, value0, key1, value1, ... ]. + * + *

uri.setAllParameters(['a', 'b', 'c', 'd']).getQuery() + * will yield 'a=b&c=d'. + */ +URI.prototype.setAllParameters = function (params) { + if (typeof params === 'object') { + if (!(params instanceof Array) + && (params instanceof Object + || Object.prototype.toString.call(params) !== '[object Array]')) { + var newParams = []; + var i = -1; + for (var k in params) { + var v = params[k]; + if ('string' === typeof v) { + newParams[++i] = k; + newParams[++i] = v; + } + } + params = newParams; + } + } + this.paramCache_ = null; + var queryBuf = []; + var separator = ''; + for (var j = 0; j < params.length;) { + var k = params[j++]; + var v = params[j++]; + queryBuf.push(separator, encodeURIComponent(k.toString())); + separator = '&'; + if (v) { + queryBuf.push('=', encodeURIComponent(v.toString())); + } + } + this.query_ = queryBuf.join(''); + return this; +}; +URI.prototype.checkParameterCache_ = function () { + if (!this.paramCache_) { + var q = this.query_; + if (!q) { + this.paramCache_ = []; + } else { + var cgiParams = q.split(/[&\?]/); + var out = []; + var k = -1; + for (var i = 0; i < cgiParams.length; ++i) { + var m = cgiParams[i].match(/^([^=]*)(?:=(.*))?$/); + // From http://www.w3.org/Addressing/URL/4_URI_Recommentations.html + // Within the query string, the plus sign is reserved as shorthand + // notation for a space. + out[++k] = decodeURIComponent(m[1]).replace(/\+/g, ' '); + out[++k] = decodeURIComponent(m[2] || '').replace(/\+/g, ' '); + } + this.paramCache_ = out; + } + } +}; +/** + * sets the values of the named cgi parameters. + * + *

So, uri.parse('foo?a=b&c=d&e=f').setParameterValues('c', ['new']) + * yields foo?a=b&c=new&e=f.

+ * + * @param key {string} + * @param values {Array.} the new values. If values is a single string + * then it will be treated as the sole value. + */ +URI.prototype.setParameterValues = function (key, values) { + // be nice and avoid subtle bugs where [] operator on string performs charAt + // on some browsers and crashes on IE + if (typeof values === 'string') { + values = [ values ]; + } + + this.checkParameterCache_(); + var newValueIndex = 0; + var pc = this.paramCache_; + var params = []; + for (var i = 0, k = 0; i < pc.length; i += 2) { + if (key === pc[i]) { + if (newValueIndex < values.length) { + params.push(key, values[newValueIndex++]); + } + } else { + params.push(pc[i], pc[i + 1]); + } + } + while (newValueIndex < values.length) { + params.push(key, values[newValueIndex++]); + } + this.setAllParameters(params); + return this; +}; +URI.prototype.removeParameter = function (key) { + return this.setParameterValues(key, []); +}; +/** + * returns the parameters specified in the query part of the uri as a list of + * keys and values like [ key0, value0, key1, value1, ... ]. + * + * @return {Array.} + */ +URI.prototype.getAllParameters = function () { + this.checkParameterCache_(); + return this.paramCache_.slice(0, this.paramCache_.length); +}; +/** + * returns the values for a given cgi parameter as a list of decoded + * query parameter values. + * @return {Array.} + */ +URI.prototype.getParameterValues = function (paramNameUnescaped) { + this.checkParameterCache_(); + var values = []; + for (var i = 0; i < this.paramCache_.length; i += 2) { + if (paramNameUnescaped === this.paramCache_[i]) { + values.push(this.paramCache_[i + 1]); + } + } + return values; +}; +/** + * returns a map of cgi parameter names to (non-empty) lists of values. + * @return {Object.>} + */ +URI.prototype.getParameterMap = function (paramNameUnescaped) { + this.checkParameterCache_(); + var paramMap = {}; + for (var i = 0; i < this.paramCache_.length; i += 2) { + var key = this.paramCache_[i++], + value = this.paramCache_[i++]; + if (!(key in paramMap)) { + paramMap[key] = [value]; + } else { + paramMap[key].push(value); + } + } + return paramMap; +}; +/** + * returns the first value for a given cgi parameter or null if the given + * parameter name does not appear in the query string. + * If the given parameter name does appear, but has no '=' following + * it, then the empty string will be returned. + * @return {string|null} + */ +URI.prototype.getParameterValue = function (paramNameUnescaped) { + this.checkParameterCache_(); + for (var i = 0; i < this.paramCache_.length; i += 2) { + if (paramNameUnescaped === this.paramCache_[i]) { + return this.paramCache_[i + 1]; + } + } + return null; +}; + +URI.prototype.getFragment = function () { + return this.fragment_ && decodeURIComponent(this.fragment_); +}; +URI.prototype.getRawFragment = function () { + return this.fragment_; +}; +URI.prototype.setFragment = function (newFragment) { + this.fragment_ = newFragment ? encodeURIComponent(newFragment) : null; + return this; +}; +URI.prototype.setRawFragment = function (newFragment) { + this.fragment_ = newFragment ? newFragment : null; + return this; +}; +URI.prototype.hasFragment = function () { + return null !== this.fragment_; +}; + +function nullIfAbsent(matchPart) { + return ('string' == typeof matchPart) && (matchPart.length > 0) + ? matchPart + : null; +} + + + + +/** + * a regular expression for breaking a URI into its component parts. + * + *

http://www.gbiv.com/protocols/uri/rfc/rfc3986.html#RFC2234 says + * As the "first-match-wins" algorithm is identical to the "greedy" + * disambiguation method used by POSIX regular expressions, it is natural and + * commonplace to use a regular expression for parsing the potential five + * components of a URI reference. + * + *

The following line is the regular expression for breaking-down a + * well-formed URI reference into its components. + * + *

+ * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+ *  12            3  4          5       6  7        8 9
+ * 
+ * + *

The numbers in the second line above are only to assist readability; they + * indicate the reference points for each subexpression (i.e., each paired + * parenthesis). We refer to the value matched for subexpression as $. + * For example, matching the above expression to + *

+ *     http://www.ics.uci.edu/pub/ietf/uri/#Related
+ * 
+ * results in the following subexpression matches: + *
+ *    $1 = http:
+ *    $2 = http
+ *    $3 = //www.ics.uci.edu
+ *    $4 = www.ics.uci.edu
+ *    $5 = /pub/ietf/uri/
+ *    $6 = 
+ *    $7 = 
+ *    $8 = #Related
+ *    $9 = Related
+ * 
+ * where indicates that the component is not present, as is the + * case for the query component in the above example. Therefore, we can + * determine the value of the five components as + *
+ *    scheme    = $2
+ *    authority = $4
+ *    path      = $5
+ *    query     = $7
+ *    fragment  = $9
+ * 
+ * + *

msamuel: I have modified the regular expression slightly to expose the + * credentials, domain, and port separately from the authority. + * The modified version yields + *

+ *    $1 = http              scheme
+ *    $2 =        credentials -\
+ *    $3 = www.ics.uci.edu   domain       | authority
+ *    $4 =        port        -/
+ *    $5 = /pub/ietf/uri/    path
+ *    $6 =        query without ?
+ *    $7 = Related           fragment without #
+ * 
+ */ +var URI_RE_ = new RegExp( + "^" + + "(?:" + + "([^:/?#]+)" + // scheme + ":)?" + + "(?://" + + "(?:([^/?#]*)@)?" + // credentials + "([^/?#:@]*)" + // domain + "(?::([0-9]+))?" + // port + ")?" + + "([^?#]+)?" + // path + "(?:\\?([^#]*))?" + // query + "(?:#(.*))?" + // fragment + "$" + ); + +var URI_DISALLOWED_IN_SCHEME_OR_CREDENTIALS_ = /[#\/\?@]/g; +var URI_DISALLOWED_IN_PATH_ = /[\#\?]/g; + +URI.parse = parse; +URI.create = create; +URI.resolve = resolve; +URI.collapse_dots = collapse_dots; // Visible for testing. + +// lightweight string-based api for loadModuleMaker +URI.utils = { + mimeTypeOf: function (uri) { + var uriObj = parse(uri); + if (/\.html$/.test(uriObj.getPath())) { + return 'text/html'; + } else { + return 'application/javascript'; + } + }, + resolve: function (base, uri) { + if (base) { + return resolve(parse(base), parse(uri)).toString(); + } else { + return '' + uri; + } + } +}; + + +return URI; +})(); + +// Copyright Google Inc. +// Licensed under the Apache Licence Version 2.0 +// Autogenerated at Wed Feb 20 13:32:22 EST 2013 +// @overrides window +// @provides html4 +var html4 = {}; +html4.atype = { + 'NONE': 0, + 'URI': 1, + 'URI_FRAGMENT': 11, + 'SCRIPT': 2, + 'STYLE': 3, + 'HTML': 12, + 'ID': 4, + 'IDREF': 5, + 'IDREFS': 6, + 'GLOBAL_NAME': 7, + 'LOCAL_NAME': 8, + 'CLASSES': 9, + 'FRAME_TARGET': 10, + 'MEDIA_QUERY': 13 +}; +html4.ATTRIBS = { + '*::class': 9, + '*::dir': 0, + '*::draggable': 0, + '*::hidden': 0, + '*::id': 4, + '*::inert': 0, + '*::itemprop': 0, + '*::itemref': 6, + '*::itemscope': 0, + '*::lang': 0, + '*::onblur': 2, + '*::onchange': 2, + '*::onclick': 2, + '*::ondblclick': 2, + '*::onfocus': 2, + '*::onkeydown': 2, + '*::onkeypress': 2, + '*::onkeyup': 2, + '*::onload': 2, + '*::onmousedown': 2, + '*::onmousemove': 2, + '*::onmouseout': 2, + '*::onmouseover': 2, + '*::onmouseup': 2, + '*::onreset': 2, + '*::onscroll': 2, + '*::onselect': 2, + '*::onsubmit': 2, + '*::onunload': 2, + '*::spellcheck': 0, + '*::style': 3, + '*::title': 0, + '*::translate': 0, + 'a::accesskey': 0, + 'a::coords': 0, + 'a::href': 1, + 'a::hreflang': 0, + 'a::name': 7, + 'a::onblur': 2, + 'a::onfocus': 2, + 'a::shape': 0, + 'a::tabindex': 0, + 'a::target': 10, + 'a::type': 0, + 'area::accesskey': 0, + 'area::alt': 0, + 'area::coords': 0, + 'area::href': 1, + 'area::nohref': 0, + 'area::onblur': 2, + 'area::onfocus': 2, + 'area::shape': 0, + 'area::tabindex': 0, + 'area::target': 10, + 'audio::controls': 0, + 'audio::loop': 0, + 'audio::mediagroup': 5, + 'audio::muted': 0, + 'audio::preload': 0, + 'bdo::dir': 0, + 'blockquote::cite': 1, + 'br::clear': 0, + 'button::accesskey': 0, + 'button::disabled': 0, + 'button::name': 8, + 'button::onblur': 2, + 'button::onfocus': 2, + 'button::tabindex': 0, + 'button::type': 0, + 'button::value': 0, + 'canvas::height': 0, + 'canvas::width': 0, + 'caption::align': 0, + 'col::align': 0, + 'col::char': 0, + 'col::charoff': 0, + 'col::span': 0, + 'col::valign': 0, + 'col::width': 0, + 'colgroup::align': 0, + 'colgroup::char': 0, + 'colgroup::charoff': 0, + 'colgroup::span': 0, + 'colgroup::valign': 0, + 'colgroup::width': 0, + 'command::checked': 0, + 'command::command': 5, + 'command::disabled': 0, + 'command::icon': 1, + 'command::label': 0, + 'command::radiogroup': 0, + 'command::type': 0, + 'data::value': 0, + 'del::cite': 1, + 'del::datetime': 0, + 'details::open': 0, + 'dir::compact': 0, + 'div::align': 0, + 'dl::compact': 0, + 'fieldset::disabled': 0, + 'font::color': 0, + 'font::face': 0, + 'font::size': 0, + 'form::accept': 0, + 'form::action': 1, + 'form::autocomplete': 0, + 'form::enctype': 0, + 'form::method': 0, + 'form::name': 7, + 'form::novalidate': 0, + 'form::onreset': 2, + 'form::onsubmit': 2, + 'form::target': 10, + 'h1::align': 0, + 'h2::align': 0, + 'h3::align': 0, + 'h4::align': 0, + 'h5::align': 0, + 'h6::align': 0, + 'hr::align': 0, + 'hr::noshade': 0, + 'hr::size': 0, + 'hr::width': 0, + 'iframe::align': 0, + 'iframe::frameborder': 0, + 'iframe::height': 0, + 'iframe::marginheight': 0, + 'iframe::marginwidth': 0, + 'iframe::width': 0, + 'img::align': 0, + 'img::alt': 0, + 'img::border': 0, + 'img::height': 0, + 'img::hspace': 0, + 'img::ismap': 0, + 'img::name': 7, + 'img::src': 1, + 'img::usemap': 11, + 'img::vspace': 0, + 'img::width': 0, + 'input::accept': 0, + 'input::accesskey': 0, + 'input::align': 0, + 'input::alt': 0, + 'input::autocomplete': 0, + 'input::checked': 0, + 'input::disabled': 0, + 'input::inputmode': 0, + 'input::ismap': 0, + 'input::list': 5, + 'input::max': 0, + 'input::maxlength': 0, + 'input::min': 0, + 'input::multiple': 0, + 'input::name': 8, + 'input::onblur': 2, + 'input::onchange': 2, + 'input::onfocus': 2, + 'input::onselect': 2, + 'input::placeholder': 0, + 'input::readonly': 0, + 'input::required': 0, + 'input::size': 0, + 'input::src': 1, + 'input::step': 0, + 'input::tabindex': 0, + 'input::type': 0, + 'input::usemap': 11, + 'input::value': 0, + 'ins::cite': 1, + 'ins::datetime': 0, + 'label::accesskey': 0, + 'label::for': 5, + 'label::onblur': 2, + 'label::onfocus': 2, + 'legend::accesskey': 0, + 'legend::align': 0, + 'li::type': 0, + 'li::value': 0, + 'map::name': 7, + 'menu::compact': 0, + 'menu::label': 0, + 'menu::type': 0, + 'meter::high': 0, + 'meter::low': 0, + 'meter::max': 0, + 'meter::min': 0, + 'meter::value': 0, + 'ol::compact': 0, + 'ol::reversed': 0, + 'ol::start': 0, + 'ol::type': 0, + 'optgroup::disabled': 0, + 'optgroup::label': 0, + 'option::disabled': 0, + 'option::label': 0, + 'option::selected': 0, + 'option::value': 0, + 'output::for': 6, + 'output::name': 8, + 'p::align': 0, + 'pre::width': 0, + 'progress::max': 0, + 'progress::min': 0, + 'progress::value': 0, + 'q::cite': 1, + 'select::autocomplete': 0, + 'select::disabled': 0, + 'select::multiple': 0, + 'select::name': 8, + 'select::onblur': 2, + 'select::onchange': 2, + 'select::onfocus': 2, + 'select::required': 0, + 'select::size': 0, + 'select::tabindex': 0, + 'source::type': 0, + 'table::align': 0, + 'table::bgcolor': 0, + 'table::border': 0, + 'table::cellpadding': 0, + 'table::cellspacing': 0, + 'table::frame': 0, + 'table::rules': 0, + 'table::summary': 0, + 'table::width': 0, + 'tbody::align': 0, + 'tbody::char': 0, + 'tbody::charoff': 0, + 'tbody::valign': 0, + 'td::abbr': 0, + 'td::align': 0, + 'td::axis': 0, + 'td::bgcolor': 0, + 'td::char': 0, + 'td::charoff': 0, + 'td::colspan': 0, + 'td::headers': 6, + 'td::height': 0, + 'td::nowrap': 0, + 'td::rowspan': 0, + 'td::scope': 0, + 'td::valign': 0, + 'td::width': 0, + 'textarea::accesskey': 0, + 'textarea::autocomplete': 0, + 'textarea::cols': 0, + 'textarea::disabled': 0, + 'textarea::inputmode': 0, + 'textarea::name': 8, + 'textarea::onblur': 2, + 'textarea::onchange': 2, + 'textarea::onfocus': 2, + 'textarea::onselect': 2, + 'textarea::placeholder': 0, + 'textarea::readonly': 0, + 'textarea::required': 0, + 'textarea::rows': 0, + 'textarea::tabindex': 0, + 'textarea::wrap': 0, + 'tfoot::align': 0, + 'tfoot::char': 0, + 'tfoot::charoff': 0, + 'tfoot::valign': 0, + 'th::abbr': 0, + 'th::align': 0, + 'th::axis': 0, + 'th::bgcolor': 0, + 'th::char': 0, + 'th::charoff': 0, + 'th::colspan': 0, + 'th::headers': 6, + 'th::height': 0, + 'th::nowrap': 0, + 'th::rowspan': 0, + 'th::scope': 0, + 'th::valign': 0, + 'th::width': 0, + 'thead::align': 0, + 'thead::char': 0, + 'thead::charoff': 0, + 'thead::valign': 0, + 'tr::align': 0, + 'tr::bgcolor': 0, + 'tr::char': 0, + 'tr::charoff': 0, + 'tr::valign': 0, + 'track::default': 0, + 'track::kind': 0, + 'track::label': 0, + 'track::srclang': 0, + 'ul::compact': 0, + 'ul::type': 0, + 'video::controls': 0, + 'video::height': 0, + 'video::loop': 0, + 'video::mediagroup': 5, + 'video::muted': 0, + 'video::poster': 1, + 'video::preload': 0, + 'video::width': 0 +}; +html4.eflags = { + 'OPTIONAL_ENDTAG': 1, + 'EMPTY': 2, + 'CDATA': 4, + 'RCDATA': 8, + 'UNSAFE': 16, + 'FOLDABLE': 32, + 'SCRIPT': 64, + 'STYLE': 128, + 'VIRTUALIZED': 256 +}; +html4.ELEMENTS = { + 'a': 0, + 'abbr': 0, + 'acronym': 0, + 'address': 0, + 'applet': 272, + 'area': 2, + 'article': 0, + 'aside': 0, + 'audio': 0, + 'b': 0, + 'base': 274, + 'basefont': 274, + 'bdi': 0, + 'bdo': 0, + 'big': 0, + 'blockquote': 0, + 'body': 305, + 'br': 2, + 'button': 0, + 'canvas': 0, + 'caption': 0, + 'center': 0, + 'cite': 0, + 'code': 0, + 'col': 2, + 'colgroup': 1, + 'command': 2, + 'data': 0, + 'datalist': 0, + 'dd': 1, + 'del': 0, + 'details': 0, + 'dfn': 0, + 'dialog': 272, + 'dir': 0, + 'div': 0, + 'dl': 0, + 'dt': 1, + 'em': 0, + 'fieldset': 0, + 'figcaption': 0, + 'figure': 0, + 'font': 0, + 'footer': 0, + 'form': 0, + 'frame': 274, + 'frameset': 272, + 'h1': 0, + 'h2': 0, + 'h3': 0, + 'h4': 0, + 'h5': 0, + 'h6': 0, + 'head': 305, + 'header': 0, + 'hgroup': 0, + 'hr': 2, + 'html': 305, + 'i': 0, + 'iframe': 4, + 'img': 2, + 'input': 2, + 'ins': 0, + 'isindex': 274, + 'kbd': 0, + 'keygen': 274, + 'label': 0, + 'legend': 0, + 'li': 1, + 'link': 274, + 'map': 0, + 'mark': 0, + 'menu': 0, + 'meta': 274, + 'meter': 0, + 'nav': 0, + 'nobr': 0, + 'noembed': 276, + 'noframes': 276, + 'noscript': 276, + 'object': 272, + 'ol': 0, + 'optgroup': 0, + 'option': 1, + 'output': 0, + 'p': 1, + 'param': 274, + 'pre': 0, + 'progress': 0, + 'q': 0, + 's': 0, + 'samp': 0, + 'script': 84, + 'section': 0, + 'select': 0, + 'small': 0, + 'source': 2, + 'span': 0, + 'strike': 0, + 'strong': 0, + 'style': 148, + 'sub': 0, + 'summary': 0, + 'sup': 0, + 'table': 0, + 'tbody': 1, + 'td': 1, + 'textarea': 8, + 'tfoot': 1, + 'th': 1, + 'thead': 1, + 'time': 0, + 'title': 280, + 'tr': 1, + 'track': 2, + 'tt': 0, + 'u': 0, + 'ul': 0, + 'var': 0, + 'video': 0, + 'wbr': 2 +}; +html4.ELEMENT_DOM_INTERFACES = { + 'a': 'HTMLAnchorElement', + 'abbr': 'HTMLElement', + 'acronym': 'HTMLElement', + 'address': 'HTMLElement', + 'applet': 'HTMLAppletElement', + 'area': 'HTMLAreaElement', + 'article': 'HTMLElement', + 'aside': 'HTMLElement', + 'audio': 'HTMLAudioElement', + 'b': 'HTMLElement', + 'base': 'HTMLBaseElement', + 'basefont': 'HTMLBaseFontElement', + 'bdi': 'HTMLElement', + 'bdo': 'HTMLElement', + 'big': 'HTMLElement', + 'blockquote': 'HTMLQuoteElement', + 'body': 'HTMLBodyElement', + 'br': 'HTMLBRElement', + 'button': 'HTMLButtonElement', + 'canvas': 'HTMLCanvasElement', + 'caption': 'HTMLTableCaptionElement', + 'center': 'HTMLElement', + 'cite': 'HTMLElement', + 'code': 'HTMLElement', + 'col': 'HTMLTableColElement', + 'colgroup': 'HTMLTableColElement', + 'command': 'HTMLCommandElement', + 'data': 'HTMLElement', + 'datalist': 'HTMLDataListElement', + 'dd': 'HTMLElement', + 'del': 'HTMLModElement', + 'details': 'HTMLDetailsElement', + 'dfn': 'HTMLElement', + 'dialog': 'HTMLDialogElement', + 'dir': 'HTMLDirectoryElement', + 'div': 'HTMLDivElement', + 'dl': 'HTMLDListElement', + 'dt': 'HTMLElement', + 'em': 'HTMLElement', + 'fieldset': 'HTMLFieldSetElement', + 'figcaption': 'HTMLElement', + 'figure': 'HTMLElement', + 'font': 'HTMLFontElement', + 'footer': 'HTMLElement', + 'form': 'HTMLFormElement', + 'frame': 'HTMLFrameElement', + 'frameset': 'HTMLFrameSetElement', + 'h1': 'HTMLHeadingElement', + 'h2': 'HTMLHeadingElement', + 'h3': 'HTMLHeadingElement', + 'h4': 'HTMLHeadingElement', + 'h5': 'HTMLHeadingElement', + 'h6': 'HTMLHeadingElement', + 'head': 'HTMLHeadElement', + 'header': 'HTMLElement', + 'hgroup': 'HTMLElement', + 'hr': 'HTMLHRElement', + 'html': 'HTMLHtmlElement', + 'i': 'HTMLElement', + 'iframe': 'HTMLIFrameElement', + 'img': 'HTMLImageElement', + 'input': 'HTMLInputElement', + 'ins': 'HTMLModElement', + 'isindex': 'HTMLUnknownElement', + 'kbd': 'HTMLElement', + 'keygen': 'HTMLKeygenElement', + 'label': 'HTMLLabelElement', + 'legend': 'HTMLLegendElement', + 'li': 'HTMLLIElement', + 'link': 'HTMLLinkElement', + 'map': 'HTMLMapElement', + 'mark': 'HTMLElement', + 'menu': 'HTMLMenuElement', + 'meta': 'HTMLMetaElement', + 'meter': 'HTMLMeterElement', + 'nav': 'HTMLElement', + 'nobr': 'HTMLElement', + 'noembed': 'HTMLElement', + 'noframes': 'HTMLElement', + 'noscript': 'HTMLElement', + 'object': 'HTMLObjectElement', + 'ol': 'HTMLOListElement', + 'optgroup': 'HTMLOptGroupElement', + 'option': 'HTMLOptionElement', + 'output': 'HTMLOutputElement', + 'p': 'HTMLParagraphElement', + 'param': 'HTMLParamElement', + 'pre': 'HTMLPreElement', + 'progress': 'HTMLProgressElement', + 'q': 'HTMLQuoteElement', + 's': 'HTMLElement', + 'samp': 'HTMLElement', + 'script': 'HTMLScriptElement', + 'section': 'HTMLElement', + 'select': 'HTMLSelectElement', + 'small': 'HTMLElement', + 'source': 'HTMLSourceElement', + 'span': 'HTMLSpanElement', + 'strike': 'HTMLElement', + 'strong': 'HTMLElement', + 'style': 'HTMLStyleElement', + 'sub': 'HTMLElement', + 'summary': 'HTMLElement', + 'sup': 'HTMLElement', + 'table': 'HTMLTableElement', + 'tbody': 'HTMLTableSectionElement', + 'td': 'HTMLTableDataCellElement', + 'textarea': 'HTMLTextAreaElement', + 'tfoot': 'HTMLTableSectionElement', + 'th': 'HTMLTableHeaderCellElement', + 'thead': 'HTMLTableSectionElement', + 'time': 'HTMLTimeElement', + 'title': 'HTMLTitleElement', + 'tr': 'HTMLTableRowElement', + 'track': 'HTMLTrackElement', + 'tt': 'HTMLElement', + 'u': 'HTMLElement', + 'ul': 'HTMLUListElement', + 'var': 'HTMLElement', + 'video': 'HTMLVideoElement', + 'wbr': 'HTMLElement' +}; +html4.ueffects = { + 'NOT_LOADED': 0, + 'SAME_DOCUMENT': 1, + 'NEW_DOCUMENT': 2 +}; +html4.URIEFFECTS = { + 'a::href': 2, + 'area::href': 2, + 'blockquote::cite': 0, + 'command::icon': 1, + 'del::cite': 0, + 'form::action': 2, + 'img::src': 1, + 'input::src': 1, + 'ins::cite': 0, + 'q::cite': 0, + 'video::poster': 1 +}; +html4.ltypes = { + 'UNSANDBOXED': 2, + 'SANDBOXED': 1, + 'DATA': 0 +}; +html4.LOADERTYPES = { + 'a::href': 2, + 'area::href': 2, + 'blockquote::cite': 2, + 'command::icon': 1, + 'del::cite': 2, + 'form::action': 2, + 'img::src': 1, + 'input::src': 1, + 'ins::cite': 2, + 'q::cite': 2, + 'video::poster': 1 +}; +// Copyright (C) 2006 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * @fileoverview + * An HTML sanitizer that can satisfy a variety of security policies. + * + *

+ * The HTML sanitizer is built around a SAX parser and HTML element and + * attributes schemas. + * + * If the cssparser is loaded, inline styles are sanitized using the + * css property and value schemas. Else they are remove during + * sanitization. + * + * If it exists, uses parseCssDeclarations, sanitizeCssProperty, cssSchema + * + * @author mikesamuel@gmail.com + * @author jasvir@gmail.com + * \@requires html4, URI + * \@overrides window + * \@provides html, html_sanitize + */ + +// The Turkish i seems to be a non-issue, but abort in case it is. +// if ('I'.toLowerCase() !== 'i') { throw 'I/i problem'; } # Sam ... screwing up in turkish browsers seems a silly idea + +/** + * \@namespace + */ +var html = (function(html4) { + + // For closure compiler + var parseCssDeclarations, sanitizeCssProperty, cssSchema; + if ('undefined' !== typeof window) { + parseCssDeclarations = window.parseCssDeclarations; + sanitizeCssProperty = window.sanitizeCssProperty; + cssSchema = window.cssSchema; + } + + // The keys of this object must be 'quoted' or JSCompiler will mangle them! + // This is a partial list -- lookupEntity() uses the host browser's parser + // (when available) to implement full entity lookup. + // Note that entities are in general case-sensitive; the uppercase ones are + // explicitly defined by HTML5 (presumably as compatibility). + var ENTITIES = { + 'lt': '<', + 'LT': '<', + 'gt': '>', + 'GT': '>', + 'amp': '&', + 'AMP': '&', + 'quot': '"', + 'apos': '\'', + 'nbsp': '\240' + }; + + // Patterns for types of entity/character reference names. + var decimalEscapeRe = /^#(\d+)$/; + var hexEscapeRe = /^#x([0-9A-Fa-f]+)$/; + // contains every entity per http://www.w3.org/TR/2011/WD-html5-20110113/named-character-references.html + var safeEntityNameRe = /^[A-Za-z][A-za-z0-9]+$/; + // Used as a hook to invoke the browser's entity parsing.