fix(core): traverse and sanitize content of unsafe elements (#28804)

In the past, the sanitizer would remove unsafe elements, but still
traverse and sanitize (and potentially preserve) their content. This was
problematic in the case of `<style></style>` tags, whose content would
be converted to HTML text nodes.

In order to fix this, the sanitizer's behavior was changed in #25879 to
ignore the content of _all_ unsafe elements. While this fixed the
problem with `<style></style>` tags, it unnecessarily removed the
contents for _any_ unsafe element. This was an unneeded breaking change.

This commit partially restores the old sanitizer behavior (namely
traversing content of unsafe elements), but introduces a list of
elements whose content should not be traversed if the elements
themselves are considered unsafe. Currently, this list contains `style`,
`script` and `template`.

Related to #25879 and #26007.

Fixes #28427

PR Close #28804
This commit is contained in:
George Kalpakas 2019-02-18 18:14:34 +02:00 committed by Ben Lesh
parent dc9f0af080
commit 262ba67525
2 changed files with 48 additions and 26 deletions

View File

@ -83,6 +83,13 @@ const HTML_ATTRS = tagSet(
export const VALID_ATTRS = merge(URI_ATTRS, SRCSET_ATTRS, HTML_ATTRS);
// Elements whose content should not be traversed/preserved, if the elements themselves are invalid.
//
// Typically, `<invalid>Some content</invalid>` would traverse (and in this case preserve)
// `Some content`, but strip `invalid-element` opening/closing tags. For some elements, though, we
// don't want to preserve the content, if the elements themselves are going to be removed.
const SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS = tagSet('script,style,template');
/**
* SanitizingHtmlSerializer serializes a DOM fragment, stripping out any unsafe elements and unsafe
* attributes.
@ -98,17 +105,17 @@ class SanitizingHtmlSerializer {
// However this code never accesses properties off of `document` before deleting its contents
// again, so it shouldn't be vulnerable to DOM clobbering.
let current: Node = el.firstChild !;
let elementValid = true;
let traverseContent = true;
while (current) {
if (current.nodeType === Node.ELEMENT_NODE) {
elementValid = this.startElement(current as Element);
traverseContent = this.startElement(current as Element);
} else if (current.nodeType === Node.TEXT_NODE) {
this.chars(current.nodeValue !);
} else {
// Strip non-element, non-text nodes.
this.sanitizedSomething = true;
}
if (elementValid && current.firstChild) {
if (traverseContent && current.firstChild) {
current = current.firstChild !;
continue;
}
@ -132,18 +139,18 @@ class SanitizingHtmlSerializer {
}
/**
* Outputs only valid Elements.
* Sanitizes an opening element tag (if valid) and returns whether the element's contents should
* be traversed. Element content must always be traversed (even if the element itself is not
* valid/safe), unless the element is one of `SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS`.
*
* Invalid elements are skipped.
*
* @param element element to sanitize
* Returns true if the element is valid.
* @param element The element to sanitize.
* @return True if the element's contents should be traversed.
*/
private startElement(element: Element): boolean {
const tagName = element.nodeName.toLowerCase();
if (!VALID_ELEMENTS.hasOwnProperty(tagName)) {
this.sanitizedSomething = true;
return false;
return !SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS.hasOwnProperty(tagName);
}
this.buf.push('<');
this.buf.push(tagName);

View File

@ -36,8 +36,8 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
.toEqual('<p>Hello <br> World</p>');
});
it('supports removal of namespaced elements',
() => { expect(_sanitizeHtml(defaultDoc, 'a<my:hr/><my:div>b</my:div>c')).toEqual('a'); });
it('supports namespaced elements',
() => { expect(_sanitizeHtml(defaultDoc, 'a<my:hr/><my:div>b</my:div>c')).toEqual('abc'); });
it('supports namespaced attributes', () => {
expect(_sanitizeHtml(defaultDoc, '<a xlink:href="something">t</a>'))
@ -85,7 +85,7 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
.toEqual('<p alt="% &amp; &#34; !">Hello</p>'); // NB: quote encoded as ASCII &#34;.
});
describe('should strip dangerous elements and its content', () => {
describe('should strip dangerous elements (but potentially traverse their content)', () => {
const dangerousTags = [
'form',
'object',
@ -93,32 +93,40 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
'button',
'option',
'select',
'script',
'style',
];
for (const tag of dangerousTags) {
it(`${tag}`,
() => { expect(_sanitizeHtml(defaultDoc, `<${tag}>evil!</${tag}>`)).toEqual(''); });
it(tag,
() => { expect(_sanitizeHtml(defaultDoc, `<${tag}>evil!</${tag}>`)).toEqual('evil!'); });
}
const dangerousSelfClosingTags = [
'frameset',
'embed',
'input',
'param',
'base',
'basefont',
'param',
'embed',
'frameset',
'input',
'link',
'param',
];
for (const tag of dangerousSelfClosingTags) {
it(`${tag}`, () => {
it(tag, () => {
expect(_sanitizeHtml(defaultDoc, `before<${tag}>After`)).toEqual('beforeAfter');
});
}
it(`swallows frame entirely`, () => {
const dangerousSkipContentTags = [
'script',
'style',
'template',
];
for (const tag of dangerousSkipContentTags) {
it(tag, () => { expect(_sanitizeHtml(defaultDoc, `<${tag}>evil!</${tag}>`)).toEqual(''); });
}
it(`frame`, () => {
// `<frame>` is special, because different browsers treat it differently (e.g. remove it
// altogether). // We just verify that (one way or another), there is no `<frame>` element
// after sanitization.
expect(_sanitizeHtml(defaultDoc, `<frame>evil!</frame>`)).not.toContain('<frame>');
});
});
@ -133,6 +141,13 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
}
});
it('ignores content of script elements', () => {
expect(_sanitizeHtml(defaultDoc, '<script>var foo="<p>bar</p>"</script>')).toEqual('');
expect(_sanitizeHtml(defaultDoc, '<script>var foo="<p>bar</p>"</script><div>hi</div>'))
.toEqual('<div>hi</div>');
expect(_sanitizeHtml(defaultDoc, '<style>\<\!-- something--\>hi</style>')).toEqual('');
});
it('ignores content of style elements', () => {
expect(_sanitizeHtml(defaultDoc, '<style><!-- foobar --></style><div>hi</div>'))
.toEqual('<div>hi</div>');
@ -186,7 +201,7 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
// PlatformBrowser output
'<p><img src="x"></p>' :
// PlatformServer output
'');
'<p></p>');
});
if (browserDetection.isWebkit) {