fix(core): traverse and sanitize content of unsafe elements (#28804)
In the past, the sanitizer would remove unsafe elements, but still traverse and sanitize (and potentially preserve) their content. This was problematic in the case of `<style></style>` tags, whose content would be converted to HTML text nodes. In order to fix this, the sanitizer's behavior was changed in #25879 to ignore the content of _all_ unsafe elements. While this fixed the problem with `<style></style>` tags, it unnecessarily removed the contents for _any_ unsafe element. This was an unneeded breaking change. This commit partially restores the old sanitizer behavior (namely traversing content of unsafe elements), but introduces a list of elements whose content should not be traversed if the elements themselves are considered unsafe. Currently, this list contains `style`, `script` and `template`. Related to #25879 and #26007. Fixes #28427 PR Close #28804
This commit is contained in:
parent
dc9f0af080
commit
262ba67525
|
@ -83,6 +83,13 @@ const HTML_ATTRS = tagSet(
|
|||
|
||||
export const VALID_ATTRS = merge(URI_ATTRS, SRCSET_ATTRS, HTML_ATTRS);
|
||||
|
||||
// Elements whose content should not be traversed/preserved, if the elements themselves are invalid.
|
||||
//
|
||||
// Typically, `<invalid>Some content</invalid>` would traverse (and in this case preserve)
|
||||
// `Some content`, but strip `invalid-element` opening/closing tags. For some elements, though, we
|
||||
// don't want to preserve the content, if the elements themselves are going to be removed.
|
||||
const SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS = tagSet('script,style,template');
|
||||
|
||||
/**
|
||||
* SanitizingHtmlSerializer serializes a DOM fragment, stripping out any unsafe elements and unsafe
|
||||
* attributes.
|
||||
|
@ -98,17 +105,17 @@ class SanitizingHtmlSerializer {
|
|||
// However this code never accesses properties off of `document` before deleting its contents
|
||||
// again, so it shouldn't be vulnerable to DOM clobbering.
|
||||
let current: Node = el.firstChild !;
|
||||
let elementValid = true;
|
||||
let traverseContent = true;
|
||||
while (current) {
|
||||
if (current.nodeType === Node.ELEMENT_NODE) {
|
||||
elementValid = this.startElement(current as Element);
|
||||
traverseContent = this.startElement(current as Element);
|
||||
} else if (current.nodeType === Node.TEXT_NODE) {
|
||||
this.chars(current.nodeValue !);
|
||||
} else {
|
||||
// Strip non-element, non-text nodes.
|
||||
this.sanitizedSomething = true;
|
||||
}
|
||||
if (elementValid && current.firstChild) {
|
||||
if (traverseContent && current.firstChild) {
|
||||
current = current.firstChild !;
|
||||
continue;
|
||||
}
|
||||
|
@ -132,18 +139,18 @@ class SanitizingHtmlSerializer {
|
|||
}
|
||||
|
||||
/**
|
||||
* Outputs only valid Elements.
|
||||
* Sanitizes an opening element tag (if valid) and returns whether the element's contents should
|
||||
* be traversed. Element content must always be traversed (even if the element itself is not
|
||||
* valid/safe), unless the element is one of `SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS`.
|
||||
*
|
||||
* Invalid elements are skipped.
|
||||
*
|
||||
* @param element element to sanitize
|
||||
* Returns true if the element is valid.
|
||||
* @param element The element to sanitize.
|
||||
* @return True if the element's contents should be traversed.
|
||||
*/
|
||||
private startElement(element: Element): boolean {
|
||||
const tagName = element.nodeName.toLowerCase();
|
||||
if (!VALID_ELEMENTS.hasOwnProperty(tagName)) {
|
||||
this.sanitizedSomething = true;
|
||||
return false;
|
||||
return !SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS.hasOwnProperty(tagName);
|
||||
}
|
||||
this.buf.push('<');
|
||||
this.buf.push(tagName);
|
||||
|
|
|
@ -36,8 +36,8 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
|
|||
.toEqual('<p>Hello <br> World</p>');
|
||||
});
|
||||
|
||||
it('supports removal of namespaced elements',
|
||||
() => { expect(_sanitizeHtml(defaultDoc, 'a<my:hr/><my:div>b</my:div>c')).toEqual('a'); });
|
||||
it('supports namespaced elements',
|
||||
() => { expect(_sanitizeHtml(defaultDoc, 'a<my:hr/><my:div>b</my:div>c')).toEqual('abc'); });
|
||||
|
||||
it('supports namespaced attributes', () => {
|
||||
expect(_sanitizeHtml(defaultDoc, '<a xlink:href="something">t</a>'))
|
||||
|
@ -85,7 +85,7 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
|
|||
.toEqual('<p alt="% & " !">Hello</p>'); // NB: quote encoded as ASCII ".
|
||||
});
|
||||
|
||||
describe('should strip dangerous elements and its content', () => {
|
||||
describe('should strip dangerous elements (but potentially traverse their content)', () => {
|
||||
const dangerousTags = [
|
||||
'form',
|
||||
'object',
|
||||
|
@ -93,32 +93,40 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
|
|||
'button',
|
||||
'option',
|
||||
'select',
|
||||
'script',
|
||||
'style',
|
||||
];
|
||||
|
||||
for (const tag of dangerousTags) {
|
||||
it(`${tag}`,
|
||||
() => { expect(_sanitizeHtml(defaultDoc, `<${tag}>evil!</${tag}>`)).toEqual(''); });
|
||||
it(tag,
|
||||
() => { expect(_sanitizeHtml(defaultDoc, `<${tag}>evil!</${tag}>`)).toEqual('evil!'); });
|
||||
}
|
||||
|
||||
const dangerousSelfClosingTags = [
|
||||
'frameset',
|
||||
'embed',
|
||||
'input',
|
||||
'param',
|
||||
'base',
|
||||
'basefont',
|
||||
'param',
|
||||
'embed',
|
||||
'frameset',
|
||||
'input',
|
||||
'link',
|
||||
'param',
|
||||
];
|
||||
|
||||
for (const tag of dangerousSelfClosingTags) {
|
||||
it(`${tag}`, () => {
|
||||
it(tag, () => {
|
||||
expect(_sanitizeHtml(defaultDoc, `before<${tag}>After`)).toEqual('beforeAfter');
|
||||
});
|
||||
}
|
||||
|
||||
it(`swallows frame entirely`, () => {
|
||||
const dangerousSkipContentTags = [
|
||||
'script',
|
||||
'style',
|
||||
'template',
|
||||
];
|
||||
for (const tag of dangerousSkipContentTags) {
|
||||
it(tag, () => { expect(_sanitizeHtml(defaultDoc, `<${tag}>evil!</${tag}>`)).toEqual(''); });
|
||||
}
|
||||
|
||||
it(`frame`, () => {
|
||||
// `<frame>` is special, because different browsers treat it differently (e.g. remove it
|
||||
// altogether). // We just verify that (one way or another), there is no `<frame>` element
|
||||
// after sanitization.
|
||||
expect(_sanitizeHtml(defaultDoc, `<frame>evil!</frame>`)).not.toContain('<frame>');
|
||||
});
|
||||
});
|
||||
|
@ -133,6 +141,13 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
|
|||
}
|
||||
});
|
||||
|
||||
it('ignores content of script elements', () => {
|
||||
expect(_sanitizeHtml(defaultDoc, '<script>var foo="<p>bar</p>"</script>')).toEqual('');
|
||||
expect(_sanitizeHtml(defaultDoc, '<script>var foo="<p>bar</p>"</script><div>hi</div>'))
|
||||
.toEqual('<div>hi</div>');
|
||||
expect(_sanitizeHtml(defaultDoc, '<style>\<\!-- something--\>hi</style>')).toEqual('');
|
||||
});
|
||||
|
||||
it('ignores content of style elements', () => {
|
||||
expect(_sanitizeHtml(defaultDoc, '<style><!-- foobar --></style><div>hi</div>'))
|
||||
.toEqual('<div>hi</div>');
|
||||
|
@ -186,7 +201,7 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
|
|||
// PlatformBrowser output
|
||||
'<p><img src="x"></p>' :
|
||||
// PlatformServer output
|
||||
'');
|
||||
'<p></p>');
|
||||
});
|
||||
|
||||
if (browserDetection.isWebkit) {
|
||||
|
|
Loading…
Reference in New Issue