fix(core): traverse and sanitize content of unsafe elements (#28804)

In the past, the sanitizer would remove unsafe elements, but still traverse and sanitize (and potentially preserve) their content. This was problematic in the case of `<style></style>` tags, whose content would be converted to HTML text nodes. In order to fix this, the sanitizer's behavior was changed in #25879 to ignore the content of _all_ unsafe elements. While this fixed the problem with `<style></style>` tags, it unnecessarily removed the contents for _any_ unsafe element. This was an unneeded breaking change. This commit partially restores the old sanitizer behavior (namely traversing content of unsafe elements), but introduces a list of elements whose content should not be traversed if the elements themselves are considered unsafe. Currently, this list contains `style`, `script` and `template`. Related to #25879 and #26007. Fixes #28427 PR Close #28804
2019-02-18 18:14:34 +02:00 · 2019-02-18 18:14:34 +02:00 · 262ba67525
commit 262ba67525
parent dc9f0af080
2 changed files with 48 additions and 26 deletions
--- a/packages/core/src/sanitization/html_sanitizer.ts
+++ b/packages/core/src/sanitization/html_sanitizer.ts
@ -83,6 +83,13 @@ const HTML_ATTRS = tagSet(

 export const VALID_ATTRS = merge(URI_ATTRS, SRCSET_ATTRS, HTML_ATTRS);

+// Elements whose content should not be traversed/preserved, if the elements themselves are invalid.
+//
+// Typically, `<invalid>Some content</invalid>` would traverse (and in this case preserve)
+// `Some content`, but strip `invalid-element` opening/closing tags. For some elements, though, we
+// don't want to preserve the content, if the elements themselves are going to be removed.
+const SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS = tagSet('script,style,template');
+
 /**
 * SanitizingHtmlSerializer serializes a DOM fragment, stripping out any unsafe elements and unsafe
 * attributes.
@ -98,17 +105,17 @@ class SanitizingHtmlSerializer {
    // However this code never accesses properties off of `document` before deleting its contents
    // again, so it shouldn't be vulnerable to DOM clobbering.
    let current: Node = el.firstChild !;
-    let elementValid = true;
+    let traverseContent = true;
    while (current) {
      if (current.nodeType === Node.ELEMENT_NODE) {
-        elementValid = this.startElement(current as Element);
+        traverseContent = this.startElement(current as Element);
      } else if (current.nodeType === Node.TEXT_NODE) {
        this.chars(current.nodeValue !);
      } else {
        // Strip non-element, non-text nodes.
        this.sanitizedSomething = true;
      }
-      if (elementValid && current.firstChild) {
+      if (traverseContent && current.firstChild) {
        current = current.firstChild !;
        continue;
      }
@ -132,18 +139,18 @@ class SanitizingHtmlSerializer {
  }

  /**
-   * Outputs only valid Elements.
+   * Sanitizes an opening element tag (if valid) and returns whether the element's contents should
+   * be traversed. Element content must always be traversed (even if the element itself is not
+   * valid/safe), unless the element is one of `SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS`.
   *
-   * Invalid elements are skipped.
-   *
-   * @param element element to sanitize
-   * Returns true if the element is valid.
+   * @param element The element to sanitize.
+   * @return True if the element's contents should be traversed.
   */
  private startElement(element: Element): boolean {
    const tagName = element.nodeName.toLowerCase();
    if (!VALID_ELEMENTS.hasOwnProperty(tagName)) {
      this.sanitizedSomething = true;
-      return false;
+      return !SKIP_TRAVERSING_CONTENT_IF_INVALID_ELEMENTS.hasOwnProperty(tagName);
    }
    this.buf.push('<');
    this.buf.push(tagName);
--- a/packages/core/test/sanitization/html_sanitizer_spec.ts
+++ b/packages/core/test/sanitization/html_sanitizer_spec.ts
@ -36,8 +36,8 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
          .toEqual('<p>Hello <br> World</p>');
    });

-    it('supports removal of namespaced elements',
-       () => { expect(_sanitizeHtml(defaultDoc, 'a<my:hr/><my:div>b</my:div>c')).toEqual('a'); });
+    it('supports namespaced elements',
+       () => { expect(_sanitizeHtml(defaultDoc, 'a<my:hr/><my:div>b</my:div>c')).toEqual('abc'); });

    it('supports namespaced attributes', () => {
      expect(_sanitizeHtml(defaultDoc, '<a xlink:href="something">t</a>'))
@ -85,7 +85,7 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
          .toEqual('<p alt="% &amp; &#34; !">Hello</p>');  // NB: quote encoded as ASCII &#34;.
    });

-    describe('should strip dangerous elements and its content', () => {
+    describe('should strip dangerous elements (but potentially traverse their content)', () => {
      const dangerousTags = [
        'form',
        'object',
@ -93,32 +93,40 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
        'button',
        'option',
        'select',
-        'script',
-        'style',
      ];
-
      for (const tag of dangerousTags) {
-        it(`${tag}`,
-           () => { expect(_sanitizeHtml(defaultDoc, `<${tag}>evil!</${tag}>`)).toEqual(''); });
+        it(tag,
+           () => { expect(_sanitizeHtml(defaultDoc, `<${tag}>evil!</${tag}>`)).toEqual('evil!'); });
      }
+
      const dangerousSelfClosingTags = [
-        'frameset',
-        'embed',
-        'input',
-        'param',
        'base',
        'basefont',
-        'param',
+        'embed',
+        'frameset',
+        'input',
        'link',
+        'param',
      ];
-
      for (const tag of dangerousSelfClosingTags) {
-        it(`${tag}`, () => {
+        it(tag, () => {
          expect(_sanitizeHtml(defaultDoc, `before<${tag}>After`)).toEqual('beforeAfter');
        });
      }

-      it(`swallows frame entirely`, () => {
+      const dangerousSkipContentTags = [
+        'script',
+        'style',
+        'template',
+      ];
+      for (const tag of dangerousSkipContentTags) {
+        it(tag, () => { expect(_sanitizeHtml(defaultDoc, `<${tag}>evil!</${tag}>`)).toEqual(''); });
+      }
+
+      it(`frame`, () => {
+        // `<frame>` is special, because different browsers treat it differently (e.g. remove it
+        // altogether). // We just verify that (one way or another), there is no `<frame>` element
+        // after sanitization.
        expect(_sanitizeHtml(defaultDoc, `<frame>evil!</frame>`)).not.toContain('<frame>');
      });
    });
@ -133,6 +141,13 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
      }
    });

+    it('ignores content of script elements', () => {
+      expect(_sanitizeHtml(defaultDoc, '<script>var foo="<p>bar</p>"</script>')).toEqual('');
+      expect(_sanitizeHtml(defaultDoc, '<script>var foo="<p>bar</p>"</script><div>hi</div>'))
+          .toEqual('<div>hi</div>');
+      expect(_sanitizeHtml(defaultDoc, '<style>\<\!-- something--\>hi</style>')).toEqual('');
+    });
+
    it('ignores content of style elements', () => {
      expect(_sanitizeHtml(defaultDoc, '<style><!-- foobar --></style><div>hi</div>'))
          .toEqual('<div>hi</div>');
@ -186,7 +201,7 @@ import {_sanitizeHtml} from '../../src/sanitization/html_sanitizer';
                     // PlatformBrowser output
                     '<p><img src="x"></p>' :
                     // PlatformServer output
-                     '');
+                     '<p></p>');
       });

    if (browserDetection.isWebkit) {