fix(core): use appropriate inert document strategy for Firefox & Safari (#17019)

Both Firefox and Safari are vulnerable to XSS if we use an inert document created via `document.implementation.createHTMLDocument()`. Now we check for those vulnerabilities and then use a DOMParser or XHR strategy if needed. Further the platform-server has its own library for parsing HTML, so we sniff for that (by checking whether DOMParser exists) and fall back to the standard strategy. Thanks to @cure53 for the heads up on this issue. PR Close #17019
2017-08-31 22:05:18 +01:00 · 2017-08-31 22:05:18 +01:00 · a751649c8d
commit a751649c8d
parent 3f5a3d6ea1
4 changed files with 250 additions and 82 deletions
--- a/integration/_payload-limits.json
+++ b/integration/_payload-limits.json
@ -3,7 +3,7 @@
    "master": {
      "uncompressed": {
        "inline": 1447,
-        "main": 151639,
+        "main": 154185,
        "polyfills": 59179
      }
    }
@ -11,7 +11,7 @@
  "hello_world__closure": {
    "master": {
      "uncompressed": {
-        "bundle": 100661
+        "bundle": 101744
      }
    }
  },
--- a/packages/platform-browser/src/security/html_sanitizer.ts
+++ b/packages/platform-browser/src/security/html_sanitizer.ts
@ -10,35 +10,9 @@ import {isDevMode} from '@angular/core';

 import {DomAdapter, getDOM} from '../dom/dom_adapter';

+import {InertBodyHelper} from './inert_body';
 import {sanitizeSrcset, sanitizeUrl} from './url_sanitizer';

-/** A <body> element that can be safely used to parse untrusted HTML. Lazily initialized below. */
-let inertElement: HTMLElement|null = null;
-/** Lazily initialized to make sure the DOM adapter gets set before use. */
-let DOM: DomAdapter = null !;
-
-/** Returns an HTML element that is guaranteed to not execute code when creating elements in it. */
-function getInertElement() {
-  if (inertElement) return inertElement;
-  DOM = getDOM();
-
-  // Prefer using <template> element if supported.
-  const templateEl = DOM.createElement('template');
-  if ('content' in templateEl) return templateEl;
-
-  const doc = DOM.createHtmlDocument();
-  inertElement = DOM.querySelector(doc, 'body');
-  if (inertElement == null) {
-    // usually there should be only one body element in the document, but IE doesn't have any, so we
-    // need to create one.
-    const html = DOM.createElement('html', doc);
-    inertElement = DOM.createElement('body', doc);
-    DOM.appendChild(html, inertElement);
-    DOM.appendChild(doc, html);
-  }
-  return inertElement;
-}
-
 function tagSet(tags: string): {[k: string]: boolean} {
  const res: {[k: string]: boolean} = {};
  for (const t of tags.split(',')) res[t] = true;
@ -121,53 +95,54 @@ class SanitizingHtmlSerializer {
  // because characters were re-encoded.
  public sanitizedSomething = false;
  private buf: string[] = [];
+  private DOM = getDOM();

  sanitizeChildren(el: Element): string {
    // This cannot use a TreeWalker, as it has to run on Angular's various DOM adapters.
    // However this code never accesses properties off of `document` before deleting its contents
    // again, so it shouldn't be vulnerable to DOM clobbering.
-    let current: Node = el.firstChild !;
+    let current: Node = this.DOM.firstChild(el) !;
    while (current) {
-      if (DOM.isElementNode(current)) {
+      if (this.DOM.isElementNode(current)) {
        this.startElement(current as Element);
-      } else if (DOM.isTextNode(current)) {
-        this.chars(DOM.nodeValue(current) !);
+      } else if (this.DOM.isTextNode(current)) {
+        this.chars(this.DOM.nodeValue(current) !);
      } else {
        // Strip non-element, non-text nodes.
        this.sanitizedSomething = true;
      }
-      if (DOM.firstChild(current)) {
-        current = DOM.firstChild(current) !;
+      if (this.DOM.firstChild(current)) {
+        current = this.DOM.firstChild(current) !;
        continue;
      }
      while (current) {
        // Leaving the element. Walk up and to the right, closing tags as we go.
-        if (DOM.isElementNode(current)) {
+        if (this.DOM.isElementNode(current)) {
          this.endElement(current as Element);
        }

-        let next = checkClobberedElement(current, DOM.nextSibling(current) !);
+        let next = this.checkClobberedElement(current, this.DOM.nextSibling(current) !);

        if (next) {
          current = next;
          break;
        }

-        current = checkClobberedElement(current, DOM.parentElement(current) !);
+        current = this.checkClobberedElement(current, this.DOM.parentElement(current) !);
      }
    }
    return this.buf.join('');
  }

  private startElement(element: Element) {
-    const tagName = DOM.nodeName(element).toLowerCase();
+    const tagName = this.DOM.nodeName(element).toLowerCase();
    if (!VALID_ELEMENTS.hasOwnProperty(tagName)) {
      this.sanitizedSomething = true;
      return;
    }
    this.buf.push('<');
    this.buf.push(tagName);
-    DOM.attributeMap(element).forEach((value: string, attrName: string) => {
+    this.DOM.attributeMap(element).forEach((value: string, attrName: string) => {
      const lower = attrName.toLowerCase();
      if (!VALID_ATTRS.hasOwnProperty(lower)) {
        this.sanitizedSomething = true;
@ -186,7 +161,7 @@ class SanitizingHtmlSerializer {
  }

  private endElement(current: Element) {
-    const tagName = DOM.nodeName(current).toLowerCase();
+    const tagName = this.DOM.nodeName(current).toLowerCase();
    if (VALID_ELEMENTS.hasOwnProperty(tagName) && !VOID_ELEMENTS.hasOwnProperty(tagName)) {
      this.buf.push('</');
      this.buf.push(tagName);
@ -195,14 +170,14 @@ class SanitizingHtmlSerializer {
  }

  private chars(chars: string) { this.buf.push(encodeEntities(chars)); }
-}

-function checkClobberedElement(node: Node, nextNode: Node): Node {
-  if (nextNode && DOM.contains(node, nextNode)) {
-    throw new Error(
-        `Failed to sanitize html because the element is clobbered: ${DOM.getOuterHTML(node)}`);
+  checkClobberedElement(node: Node, nextNode: Node): Node {
+    if (nextNode && this.DOM.contains(node, nextNode)) {
+      throw new Error(
+          `Failed to sanitize html because the element is clobbered: ${this.DOM.getOuterHTML(node)}`);
+    }
+    return nextNode;
  }
-  return nextNode;
 }

 // Regular Expressions for parsing tags and attributes
@ -232,33 +207,20 @@ function encodeEntities(value: string) {
      .replace(/>/g, '&gt;');
 }

-/**
- * When IE9-11 comes across an unknown namespaced attribute e.g. 'xlink:foo' it adds 'xmlns:ns1'
- * attribute to declare ns1 namespace and prefixes the attribute with 'ns1' (e.g. 'ns1:xlink:foo').
- *
- * This is undesirable since we don't want to allow any of these custom attributes. This method
- * strips them all.
- */
-function stripCustomNsAttrs(el: Element) {
-  DOM.attributeMap(el).forEach((_, attrName) => {
-    if (attrName === 'xmlns:ns1' || attrName.indexOf('ns1:') === 0) {
-      DOM.removeAttribute(el, attrName);
-    }
-  });
-  for (const n of DOM.childNodesAsList(el)) {
-    if (DOM.isElementNode(n)) stripCustomNsAttrs(n as Element);
-  }
-}
+let inertBodyHelper: InertBodyHelper;

 /**
 * Sanitizes the given unsafe, untrusted HTML fragment, and returns HTML text that is safe to add to
 * the DOM in a browser environment.
 */
 export function sanitizeHtml(defaultDoc: any, unsafeHtmlInput: string): string {
+  const DOM = getDOM();
+  let inertBodyElement: HTMLElement|null = null;
  try {
-    const containerEl = getInertElement();
+    inertBodyHelper = inertBodyHelper || new InertBodyHelper(defaultDoc, DOM);
    // Make sure unsafeHtml is actually a string (TypeScript types are not enforced at runtime).
    let unsafeHtml = unsafeHtmlInput ? String(unsafeHtmlInput) : '';
+    inertBodyElement = inertBodyHelper.getInertBodyElement(unsafeHtml);

    // mXSS protection. Repeatedly parse the document to make sure it stabilizes, so that a browser
    // trying to auto-correct incorrect HTML cannot cause formerly inert HTML to become dangerous.
@ -272,31 +234,25 @@ export function sanitizeHtml(defaultDoc: any, unsafeHtmlInput: string): string {
      mXSSAttempts--;

      unsafeHtml = parsedHtml;
-      DOM.setInnerHTML(containerEl, unsafeHtml);
-      if (defaultDoc.documentMode) {
-        // strip custom-namespaced attributes on IE<=11
-        stripCustomNsAttrs(containerEl);
-      }
-      parsedHtml = DOM.getInnerHTML(containerEl);
+      parsedHtml = DOM.getInnerHTML(inertBodyElement);
+      inertBodyElement = inertBodyHelper.getInertBodyElement(unsafeHtml);
    } while (unsafeHtml !== parsedHtml);

    const sanitizer = new SanitizingHtmlSerializer();
-    const safeHtml = sanitizer.sanitizeChildren(DOM.getTemplateContent(containerEl) || containerEl);
-
-    // Clear out the body element.
-    const parent = DOM.getTemplateContent(containerEl) || containerEl;
-    for (const child of DOM.childNodesAsList(parent)) {
-      DOM.removeChild(parent, child);
-    }
-
+    const safeHtml =
+        sanitizer.sanitizeChildren(DOM.getTemplateContent(inertBodyElement) || inertBodyElement);
    if (isDevMode() && sanitizer.sanitizedSomething) {
      DOM.log('WARNING: sanitizing HTML stripped some content (see http://g.co/ng/security#xss).');
    }

    return safeHtml;
-  } catch (e) {
+  } finally {
    // In case anything goes wrong, clear out inertElement to reset the entire DOM structure.
-    inertElement = null;
-    throw e;
+    if (inertBodyElement) {
+      const parent = DOM.getTemplateContent(inertBodyElement) || inertBodyElement;
+      for (const child of DOM.childNodesAsList(parent)) {
+        DOM.removeChild(parent, child);
+      }
+    }
  }
 }
--- a/packages/platform-browser/src/security/inert_body.ts
+++ b/packages/platform-browser/src/security/inert_body.ts
@ -0,0 +1,171 @@
+/**
+ * @license
+ * Copyright Google Inc. All Rights Reserved.
+ *
+ * Use of this source code is governed by an MIT-style license that can be
+ * found in the LICENSE file at https://angular.io/license
+ */
+
+import {DomAdapter, getDOM} from '../dom/dom_adapter';
+
+/**
+ * This helper class is used to get hold of an inert tree of DOM elements containing dirty HTML
+ * that needs sanitizing.
+ * Depending upon browser support we must use one of three strategies for doing this.
+ * Support: Safari 10.x -> XHR strategy
+ * Support: Firefox -> DomParser strategy
+ * Default: InertDocument strategy
+ */
+export class InertBodyHelper {
+  private inertBodyElement: HTMLElement;
+
+  constructor(private defaultDoc: any, private DOM: DomAdapter) {
+    const inertDocument = this.DOM.createHtmlDocument();
+    this.inertBodyElement = inertDocument.body;
+
+    if (this.inertBodyElement == null) {
+      // usually there should be only one body element in the document, but IE doesn't have any, so
+      // we need to create one.
+      const inertHtml = this.DOM.createElement('html', inertDocument);
+      this.inertBodyElement = this.DOM.createElement('body', inertDocument);
+      this.DOM.appendChild(inertHtml, this.inertBodyElement);
+      this.DOM.appendChild(inertDocument, inertHtml);
+    }
+
+    this.DOM.setInnerHTML(
+        this.inertBodyElement, '<svg><g onload="this.parentNode.remove()"></g></svg>');
+    if (this.inertBodyElement.querySelector && !this.inertBodyElement.querySelector('svg')) {
+      // We just hit the Safari 10.1 bug - which allows JS to run inside the SVG G element
+      // so use the XHR strategy.
+      this.getInertBodyElement = this.getInertBodyElement_XHR;
+      return;
+    }
+
+    this.DOM.setInnerHTML(
+        this.inertBodyElement, '<svg><p><style><img src="</style><img src=x onerror=alert(1)//">');
+    if (this.inertBodyElement.querySelector && this.inertBodyElement.querySelector('svg img')) {
+      // We just hit the Firefox bug - which prevents the inner img JS from being sanitized
+      // so use the DOMParser strategy, if it is available.
+      // If the DOMParser is not available then we are not in Firefox (Server/WebWorker?) so we
+      // fall through to the default strategy below.
+      if (isDOMParserAvailable()) {
+        this.getInertBodyElement = this.getInertBodyElement_DOMParser;
+        return;
+      }
+    }
+
+    // None of the bugs were hit so it is safe for us to use the default InertDocument strategy
+    this.getInertBodyElement = this.getInertBodyElement_InertDocument;
+  }
+
+  /**
+   * Get an inert DOM element containing DOM created from the dirty HTML string provided.
+   * The implementation of this is determined in the constructor, when the class is instantiated.
+   */
+  getInertBodyElement: (html: string) => HTMLElement | null;
+
+  /**
+   * Use XHR to create and fill an inert body element (on Safari 10.1)
+   * See
+   * https://github.com/cure53/DOMPurify/blob/a992d3a75031cb8bb032e5ea8399ba972bdf9a65/src/purify.js#L439-L449
+   */
+  private getInertBodyElement_XHR(html: string) {
+    // We add these extra elements to ensure that the rest of the content is parsed as expected
+    // e.g. leading whitespace is maintained and tags like `<meta>` do not get hoisted to the
+    // `<head>` tag.
+    html = '<body><remove></remove>' + html + '</body>';
+    try {
+      html = encodeURI(html);
+    } catch (e) {
+      return null;
+    }
+    const xhr = new XMLHttpRequest();
+    xhr.responseType = 'document';
+    xhr.open('GET', 'data:text/html;charset=utf-8,' + html, false);
+    xhr.send(null);
+    const body: HTMLBodyElement = xhr.response.body;
+    body.removeChild(body.firstChild !);
+    return body;
+  }
+
+  /**
+   * Use DOMParser to create and fill an inert body element (on Firefox)
+   * See https://github.com/cure53/DOMPurify/releases/tag/0.6.7
+   *
+   */
+  private getInertBodyElement_DOMParser(html: string) {
+    // We add these extra elements to ensure that the rest of the content is parsed as expected
+    // e.g. leading whitespace is maintained and tags like `<meta>` do not get hoisted to the
+    // `<head>` tag.
+    html = '<body><remove></remove>' + html + '</body>';
+    try {
+      const body = new (window as any)
+                       .DOMParser()
+                       .parseFromString(html, 'text/html')
+                       .body as HTMLBodyElement;
+      body.removeChild(body.firstChild !);
+      return body;
+    } catch (e) {
+      return null;
+    }
+  }
+
+  /**
+   * Use an HTML5 `template` element, if supported, or an inert body element created via
+   * `createHtmlDocument` to create and fill an inert DOM element.
+   * This is the default sane strategy to use if the browser does not require one of the specialised
+   * strategies above.
+   */
+  private getInertBodyElement_InertDocument(html: string) {
+    // Prefer using <template> element if supported.
+    const templateEl = this.DOM.createElement('template');
+    if ('content' in templateEl) {
+      this.DOM.setInnerHTML(templateEl, html);
+      return templateEl;
+    }
+
+    this.DOM.setInnerHTML(this.inertBodyElement, html);
+
+    // Support: IE 9-11 only
+    // strip custom-namespaced attributes on IE<=11
+    if (this.defaultDoc.documentMode) {
+      this.stripCustomNsAttrs(this.inertBodyElement);
+    }
+
+    return this.inertBodyElement;
+  }
+
+  /**
+   * When IE9-11 comes across an unknown namespaced attribute e.g. 'xlink:foo' it adds 'xmlns:ns1'
+   * attribute to declare ns1 namespace and prefixes the attribute with 'ns1' (e.g.
+   * 'ns1:xlink:foo').
+   *
+   * This is undesirable since we don't want to allow any of these custom attributes. This method
+   * strips them all.
+   */
+  private stripCustomNsAttrs(el: Element) {
+    this.DOM.attributeMap(el).forEach((_, attrName) => {
+      if (attrName === 'xmlns:ns1' || attrName.indexOf('ns1:') === 0) {
+        this.DOM.removeAttribute(el, attrName);
+      }
+    });
+    for (const n of this.DOM.childNodesAsList(el)) {
+      if (this.DOM.isElementNode(n)) this.stripCustomNsAttrs(n as Element);
+    }
+  }
+}
+
+/**
+ * We need to determine whether the DOMParser exists in the global context.
+ * The try-catch is because, on some browsers, trying to access this property
+ * on window can actually throw an error.
+ *
+ * @suppress {uselessCode}
+ */
+function isDOMParserAvailable() {
+  try {
+    return !!(window as any).DOMParser;
+  } catch (e) {
+    return false;
+  }
+}
--- a/packages/platform-browser/test/security/html_sanitizer_spec.ts
+++ b/packages/platform-browser/test/security/html_sanitizer_spec.ts
@ -134,6 +134,32 @@ import {sanitizeHtml} from '../../src/security/html_sanitizer';
      }
    });

+    // See
+    // https://github.com/cure53/DOMPurify/blob/a992d3a75031cb8bb032e5ea8399ba972bdf9a65/src/purify.js#L439-L449
+    it('should not allow JavaScript execution when creating inert document', () => {
+      const output = sanitizeHtml(defaultDoc, '<svg><g onload="window.xxx = 100"></g></svg>');
+      const window = defaultDoc.defaultView;
+      if (window) {
+        expect(window.xxx).toBe(undefined);
+        window.xxx = undefined;
+      }
+      expect(output).toEqual('');
+    });
+
+    // See https://github.com/cure53/DOMPurify/releases/tag/0.6.7
+    it('should not allow JavaScript hidden in badly formed HTML to get through sanitization (Firefox bug)',
+       () => {
+         debugger;
+         expect(sanitizeHtml(
+                    defaultDoc, '<svg><p><style><img src="</style><img src=x onerror=alert(1)//">'))
+             .toEqual(
+                 isDOMParserAvailable() ?
+                     // PlatformBrowser output
+                     '<p>&lt;img src=&#34;<img src="x"></p>' :
+                     // PlatformServer output
+                     '<p><img src="&lt;/style&gt;&lt;img src=x onerror=alert(1)//"></p>');
+       });
+
    if (browserDetection.isWebkit) {
      it('should prevent mXSS attacks', function() {
        // In Chrome Canary 62, the ideographic space character is kept as a stringified HTML entity
@ -143,3 +169,18 @@ import {sanitizeHtml} from '../../src/security/html_sanitizer';
    }
  });
 }
+
+/**
+ * We need to determine whether the DOMParser exists in the global context.
+ * The try-catch is because, on some browsers, trying to access this property
+ * on window can actually throw an error.
+ *
+ * @suppress {uselessCode}
+ */
+function isDOMParserAvailable() {
+  try {
+    return !!(window as any).DOMParser;
+  } catch (e) {
+    return false;
+  }
+}