fix(core): remove closing body tag from inert DOM builder (#38454)

Fix a bug in the HTML sanitizer where an unclosed iframe tag would
result in an escaped closing body tag as the output:

_sanitizeHtml(document, '<iframe>') => '&lt;/body&gt;'

This closing body tag comes from the DOMParserHelper where the HTML to be
sanitized is wrapped with surrounding body tags. When an opening iframe
tag is parsed by DOMParser, which DOMParserHelper uses, everything up
until its matching closing tag is consumed as a text node. In the above
example this includes the appended closing body tag.

By removing the explicit closing body tag from the DOMParserHelper and
relying on the body tag being closed implicitly at the end, the above
example is sanitized as expected:

_sanitizeHtml(document, '<iframe>') => ''

PR Close #38454
This commit is contained in:
Bjarki 2020-08-13 21:33:40 +00:00 committed by Misko Hevery
parent 68a9a01a64
commit f245c6bb15
2 changed files with 24 additions and 2 deletions

View File

@ -32,8 +32,9 @@ class DOMParserHelper implements InertBodyHelper {
getInertBodyElement(html: string): HTMLElement|null {
// We add these extra elements to ensure that the rest of the content is parsed as expected
// e.g. leading whitespace is maintained and tags like `<meta>` do not get hoisted to the
// `<head>` tag.
html = '<body><remove></remove>' + html + '</body>';
// `<head>` tag. Note that the `<body>` tag is closed implicitly to prevent unclosed tags
// in `html` from consuming the otherwise explicit `</body>` tag.
html = '<body><remove></remove>' + html;
try {
const body = new (window as any).DOMParser().parseFromString(html, 'text/html').body as
HTMLBodyElement;

View File

@ -173,6 +173,27 @@ import {isDOMParserAvailable} from '../../src/sanitization/inert_body';
expect(logMsgs.join('\n')).toMatch(/sanitizing HTML stripped some content/);
});
it('should strip unclosed iframe tag', () => {
expect(_sanitizeHtml(defaultDoc, '<iframe>')).toEqual('');
expect([
'&lt;iframe&gt;',
// Double-escaped on IE
'&amp;lt;iframe&amp;gt;'
]).toContain(_sanitizeHtml(defaultDoc, '<iframe><iframe>'));
expect([
'&lt;script&gt;evil();&lt;/script&gt;',
// Double-escaped on IE
'&amp;lt;script&amp;gt;evil();&amp;lt;/script&amp;gt;'
]).toContain(_sanitizeHtml(defaultDoc, '<iframe><script>evil();</script>'));
});
it('should ignore extraneous body tags', () => {
expect(_sanitizeHtml(defaultDoc, '</body>')).toEqual('');
expect(_sanitizeHtml(defaultDoc, 'foo</body>bar')).toEqual('foobar');
expect(_sanitizeHtml(defaultDoc, 'foo<body>bar')).toEqual('foobar');
expect(_sanitizeHtml(defaultDoc, 'fo<body>ob</body>ar')).toEqual('foobar');
});
it('should not enter an infinite loop on clobbered elements', () => {
// Some browsers are vulnerable to clobbered elements and will throw an expected exception
// IE and EDGE does not seems to be affected by those cases