fix(core): remove closing body tag from inert DOM builder (#38454)

Fix a bug in the HTML sanitizer where an unclosed iframe tag would result in an escaped closing body tag as the output: _sanitizeHtml(document, '<iframe>') => '</body>' This closing body tag comes from the DOMParserHelper where the HTML to be sanitized is wrapped with surrounding body tags. When an opening iframe tag is parsed by DOMParser, which DOMParserHelper uses, everything up until its matching closing tag is consumed as a text node. In the above example this includes the appended closing body tag. By removing the explicit closing body tag from the DOMParserHelper and relying on the body tag being closed implicitly at the end, the above example is sanitized as expected: _sanitizeHtml(document, '<iframe>') => '' PR Close #38454
2020-08-13 21:33:40 +00:00 · 2020-08-13 21:33:40 +00:00 · f245c6bb15
commit f245c6bb15
parent 68a9a01a64
2 changed files with 24 additions and 2 deletions
--- a/packages/core/src/sanitization/inert_body.ts
+++ b/packages/core/src/sanitization/inert_body.ts
@ -32,8 +32,9 @@ class DOMParserHelper implements InertBodyHelper {
  getInertBodyElement(html: string): HTMLElement|null {
    // We add these extra elements to ensure that the rest of the content is parsed as expected
    // e.g. leading whitespace is maintained and tags like `<meta>` do not get hoisted to the
-    // `<head>` tag.
-    html = '<body><remove></remove>' + html + '</body>';
+    // `<head>` tag. Note that the `<body>` tag is closed implicitly to prevent unclosed tags
+    // in `html` from consuming the otherwise explicit `</body>` tag.
+    html = '<body><remove></remove>' + html;
    try {
      const body = new (window as any).DOMParser().parseFromString(html, 'text/html').body as
          HTMLBodyElement;
--- a/packages/core/test/sanitization/html_sanitizer_spec.ts
+++ b/packages/core/test/sanitization/html_sanitizer_spec.ts
@ -173,6 +173,27 @@ import {isDOMParserAvailable} from '../../src/sanitization/inert_body';
      expect(logMsgs.join('\n')).toMatch(/sanitizing HTML stripped some content/);
    });

+    it('should strip unclosed iframe tag', () => {
+      expect(_sanitizeHtml(defaultDoc, '<iframe>')).toEqual('');
+      expect([
+        '&lt;iframe&gt;',
+        // Double-escaped on IE
+        '&amp;lt;iframe&amp;gt;'
+      ]).toContain(_sanitizeHtml(defaultDoc, '<iframe><iframe>'));
+      expect([
+        '&lt;script&gt;evil();&lt;/script&gt;',
+        // Double-escaped on IE
+        '&amp;lt;script&amp;gt;evil();&amp;lt;/script&amp;gt;'
+      ]).toContain(_sanitizeHtml(defaultDoc, '<iframe><script>evil();</script>'));
+    });
+
+    it('should ignore extraneous body tags', () => {
+      expect(_sanitizeHtml(defaultDoc, '</body>')).toEqual('');
+      expect(_sanitizeHtml(defaultDoc, 'foo</body>bar')).toEqual('foobar');
+      expect(_sanitizeHtml(defaultDoc, 'foo<body>bar')).toEqual('foobar');
+      expect(_sanitizeHtml(defaultDoc, 'fo<body>ob</body>ar')).toEqual('foobar');
+    });
+
    it('should not enter an infinite loop on clobbered elements', () => {
      // Some browsers are vulnerable to clobbered elements and will throw an expected exception
      // IE and EDGE does not seems to be affected by those cases