fix(core): traverse and sanitize content of unsafe elements (#28804)

In the past, the sanitizer would remove unsafe elements, but still
traverse and sanitize (and potentially preserve) their content. This was
problematic in the case of `<style></style>` tags, whose content would
be converted to HTML text nodes.

In order to fix this, the sanitizer's behavior was changed in #25879 to
ignore the content of _all_ unsafe elements. While this fixed the
problem with `<style></style>` tags, it unnecessarily removed the
contents for _any_ unsafe element. This was an unneeded breaking change.

This commit partially restores the old sanitizer behavior (namely
traversing content of unsafe elements), but introduces a list of
elements whose content should not be traversed if the elements
themselves are considered unsafe. Currently, this list contains `style`,
`script` and `template`.

Related to #25879 and #26007.

Fixes #28427

PR Close #28804
This commit is contained in:
George Kalpakas 2019-02-18 18:14:34 +02:00 committed by Ben Lesh
parent dc9f0af080
commit 262ba67525
2 changed files with 48 additions and 26 deletions

View File

@ -83,6 +83,13 @@ const HTML_ATTRS = tagSet(
export const VALID_ATTRS = merge(URI_ATTRS, SRCSET_ATTRS, HTML_ATTRS); export const VALID_ATTRS = merge(URI_ATTRS, SRCSET_ATTRS, HTML_ATTRS);
// Elements whose content should not be traversed/preserved, if the elements themselves are invalid.
//
// Typically, `<invalid>Some content</invalid>` would traverse (and in this case preserve)
// `Some content`, but strip `invalid-element` opening/closing tags. For some elements, though, we
// don't want to preserve the content, if the elements themselves are going to be removed.
const SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS = tagSet('script,style,template');
/** /**
* SanitizingHtmlSerializer serializes a DOM fragment, stripping out any unsafe elements and unsafe * SanitizingHtmlSerializer serializes a DOM fragment, stripping out any unsafe elements and unsafe
* attributes. * attributes.
@ -98,17 +105,17 @@ class SanitizingHtmlSerializer {
// However this code never accesses properties off of `document` before deleting its contents // However this code never accesses properties off of `document` before deleting its contents
// again, so it shouldn't be vulnerable to DOM clobbering. // again, so it shouldn't be vulnerable to DOM clobbering.
let current: Node = el.firstChild !; let current: Node = el.firstChild !;
let elementValid = true; let traverseContent = true;
while (current) { while (current) {
if (current.nodeType === Node.ELEMENT_NODE) { if (current.nodeType === Node.ELEMENT_NODE) {
elementValid = this.startElement(current as Element); traverseContent = this.startElement(current as Element);
} else if (current.nodeType === Node.TEXT_NODE) { } else if (current.nodeType === Node.TEXT_NODE) {
this.chars(current.nodeValue !); this.chars(current.nodeValue !);
} else { } else {
// Strip non-element, non-text nodes. // Strip non-element, non-text nodes.
this.sanitizedSomething = true; this.sanitizedSomething = true;
} }
if (elementValid && current.firstChild) { if (traverseContent && current.firstChild) {
current = current.firstChild !; current = current.firstChild !;
continue; continue;
} }
@ -132,18 +139,18 @@ class SanitizingHtmlSerializer {
} }
/** /**
* Outputs only valid Elements. * Sanitizes an opening element tag (if valid) and returns whether the element's contents should
* be traversed. Element content must always be traversed (even if the element itself is not
* valid/safe), unless the element is one of `SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS`.
* *
* Invalid elements are skipped. * @param element The element to sanitize.
* * @return True if the element's contents should be traversed.
* @param element element to sanitize
* Returns true if the element is valid.
*/ */
private startElement(element: Element): boolean { private startElement(element: Element): boolean {
const tagName = element.nodeName.toLowerCase(); const tagName = element.nodeName.toLowerCase();
if (!VALID_ELEMENTS.hasOwnProperty(tagName)) { if (!VALID_ELEMENTS.hasOwnProperty(tagName)) {
this.sanitizedSomething = true; this.sanitizedSomething = true;
return false; return !SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS.hasOwnProperty(tagName);
} }
this.buf.push('<'); this.buf.push('<');
this.buf.push(tagName); this.buf.push(tagName);

View File

@ -36,8 +36,8 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
.toEqual('<p>Hello <br> World</p>'); .toEqual('<p>Hello <br> World</p>');
}); });
it('supports removal of namespaced elements', it('supports namespaced elements',
() => { expect(_sanitizeHtml(defaultDoc, 'a<my:hr/><my:div>b</my:div>c')).toEqual('a'); }); () => { expect(_sanitizeHtml(defaultDoc, 'a<my:hr/><my:div>b</my:div>c')).toEqual('abc'); });
it('supports namespaced attributes', () => { it('supports namespaced attributes', () => {
expect(_sanitizeHtml(defaultDoc, '<a xlink:href="something">t</a>')) expect(_sanitizeHtml(defaultDoc, '<a xlink:href="something">t</a>'))
@ -85,7 +85,7 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
.toEqual('<p alt="% &amp; &#34; !">Hello</p>'); // NB: quote encoded as ASCII &#34;. .toEqual('<p alt="% &amp; &#34; !">Hello</p>'); // NB: quote encoded as ASCII &#34;.
}); });
describe('should strip dangerous elements and its content', () => { describe('should strip dangerous elements (but potentially traverse their content)', () => {
const dangerousTags = [ const dangerousTags = [
'form', 'form',
'object', 'object',
@ -93,32 +93,40 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
'button', 'button',
'option', 'option',
'select', 'select',
'script',
'style',
]; ];
for (const tag of dangerousTags) { for (const tag of dangerousTags) {
it(`${tag}`, it(tag,
() => { expect(_sanitizeHtml(defaultDoc, `<${tag}>evil!</${tag}>`)).toEqual(''); }); () => { expect(_sanitizeHtml(defaultDoc, `<${tag}>evil!</${tag}>`)).toEqual('evil!'); });
} }
const dangerousSelfClosingTags = [ const dangerousSelfClosingTags = [
'frameset',
'embed',
'input',
'param',
'base', 'base',
'basefont', 'basefont',
'param', 'embed',
'frameset',
'input',
'link', 'link',
'param',
]; ];
for (const tag of dangerousSelfClosingTags) { for (const tag of dangerousSelfClosingTags) {
it(`${tag}`, () => { it(tag, () => {
expect(_sanitizeHtml(defaultDoc, `before<${tag}>After`)).toEqual('beforeAfter'); expect(_sanitizeHtml(defaultDoc, `before<${tag}>After`)).toEqual('beforeAfter');
}); });
} }
it(`swallows frame entirely`, () => { const dangerousSkipContentTags = [
'script',
'style',
'template',
];
for (const tag of dangerousSkipContentTags) {
it(tag, () => { expect(_sanitizeHtml(defaultDoc, `<${tag}>evil!</${tag}>`)).toEqual(''); });
}
it(`frame`, () => {
// `<frame>` is special, because different browsers treat it differently (e.g. remove it
// altogether). // We just verify that (one way or another), there is no `<frame>` element
// after sanitization.
expect(_sanitizeHtml(defaultDoc, `<frame>evil!</frame>`)).not.toContain('<frame>'); expect(_sanitizeHtml(defaultDoc, `<frame>evil!</frame>`)).not.toContain('<frame>');
}); });
}); });
@ -133,6 +141,13 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
} }
}); });
it('ignores content of script elements', () => {
expect(_sanitizeHtml(defaultDoc, '<script>var foo="<p>bar</p>"</script>')).toEqual('');
expect(_sanitizeHtml(defaultDoc, '<script>var foo="<p>bar</p>"</script><div>hi</div>'))
.toEqual('<div>hi</div>');
expect(_sanitizeHtml(defaultDoc, '<style>\<\!-- something--\>hi</style>')).toEqual('');
});
it('ignores content of style elements', () => { it('ignores content of style elements', () => {
expect(_sanitizeHtml(defaultDoc, '<style><!-- foobar --></style><div>hi</div>')) expect(_sanitizeHtml(defaultDoc, '<style><!-- foobar --></style><div>hi</div>'))
.toEqual('<div>hi</div>'); .toEqual('<div>hi</div>');
@ -186,7 +201,7 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
// PlatformBrowser output // PlatformBrowser output
'<p><img src="x"></p>' : '<p><img src="x"></p>' :
// PlatformServer output // PlatformServer output
''); '<p></p>');
}); });
if (browserDetection.isWebkit) { if (browserDetection.isWebkit) {