angular-docs-cn/modules/@angular/platform-browser/src/security/html_sanitizer.ts

import {getDOM, DomAdapter} from '../dom/dom_adapter';
import {assertionsEnabled} from '../../src/facade/lang';

import {sanitizeUrl} from './url_sanitizer';

/** A <body> element that can be safely used to parse untrusted HTML. Lazily initialized below. */
let inertElement: HTMLElement = null;
/** Lazily initialized to make sure the DOM adapter gets set before use. */
let DOM: DomAdapter = null;

/** Returns an HTML element that is guaranteed to not execute code when creating elements in it. */
function getInertElement() {
  if (inertElement) return inertElement;
  DOM = getDOM();

  // Prefer using <template> element if supported.
  let templateEl = DOM.createElement('template');
  if ('content' in templateEl) return templateEl;

  let doc = DOM.createHtmlDocument();
  inertElement = DOM.querySelector(doc, 'body');
  if (inertElement == null) {
    // usually there should be only one body element in the document, but IE doesn't have any, so we
    // need to create one.
    let html = DOM.createElement('html', doc);
    inertElement = DOM.createElement('body', doc);
    DOM.appendChild(html, inertElement);
    DOM.appendChild(doc, html);
  }
  return inertElement;
}

function tagSet(tags: string): {[k: string]: boolean} {
  let res: {[k: string]: boolean} = {};
  for (let t of tags.split(',')) res[t.toLowerCase()] = true;
  return res;
}

function merge(...sets: { [k: string]: boolean }[]): {[k: string]: boolean} {
  let res: {[k: string]: boolean} = {};
  for (let s of sets) {
    for (let v in s) {
      if (s.hasOwnProperty(v)) res[v] = true;
    }
  }
  return res;
}

// Good source of info about elements and attributes
// http://dev.w3.org/html5/spec/Overview.html#semantics
// http://simon.html5.org/html-elements

// Safe Void Elements - HTML5
// http://dev.w3.org/html5/spec/Overview.html#void-elements
const VOID_ELEMENTS = tagSet('area,br,col,hr,img,wbr');

// Elements that you can, intentionally, leave open (and which close themselves)
// http://dev.w3.org/html5/spec/Overview.html#optional-tags
const OPTIONAL_END_TAG_BLOCK_ELEMENTS = tagSet('colgroup,dd,dt,li,p,tbody,td,tfoot,th,thead,tr');
const OPTIONAL_END_TAG_INLINE_ELEMENTS = tagSet('rp,rt');
const OPTIONAL_END_TAG_ELEMENTS =
    merge(OPTIONAL_END_TAG_INLINE_ELEMENTS, OPTIONAL_END_TAG_BLOCK_ELEMENTS);

// Safe Block Elements - HTML5
const BLOCK_ELEMENTS = merge(
    OPTIONAL_END_TAG_BLOCK_ELEMENTS,
    tagSet(
        'address,article,' +
        'aside,blockquote,caption,center,del,dir,div,dl,figure,figcaption,footer,h1,h2,h3,h4,h5,' +
        'h6,header,hgroup,hr,ins,map,menu,nav,ol,pre,section,table,ul'));

// Inline Elements - HTML5
const INLINE_ELEMENTS = merge(
    OPTIONAL_END_TAG_INLINE_ELEMENTS,
    tagSet('a,abbr,acronym,b,' +
           'bdi,bdo,big,br,cite,code,del,dfn,em,font,i,img,ins,kbd,label,map,mark,q,ruby,rp,rt,s,' +
           'samp,small,span,strike,strong,sub,sup,time,tt,u,var'));

const VALID_ELEMENTS =
    merge(VOID_ELEMENTS, BLOCK_ELEMENTS, INLINE_ELEMENTS, OPTIONAL_END_TAG_ELEMENTS);

// Attributes that have href and hence need to be sanitized
const URI_ATTRS = tagSet('background,cite,href,longdesc,src,xlink:href');

const HTML_ATTRS =
    tagSet('abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,' +
           'color,cols,colspan,compact,coords,dir,face,headers,height,hreflang,hspace,' +
           'ismap,lang,language,nohref,nowrap,rel,rev,rows,rowspan,rules,' +
           'scope,scrolling,shape,size,span,start,summary,tabindex,target,title,type,' +
           'valign,value,vspace,width');

const VALID_ATTRS = merge(URI_ATTRS, HTML_ATTRS);

/**
 * SanitizingHtmlSerializer serializes a DOM fragment, stripping out any unsafe elements and unsafe
 * attributes.
 */
class SanitizingHtmlSerializer {
  private buf: string[] = [];

  sanitizeChildren(el: Element): string {
    let current: Node = el.firstChild;
    while (current) {
      if (DOM.isElementNode(current)) {
        this.startElement(current);
      } else if (DOM.isTextNode(current)) {
        this.chars(DOM.nodeValue(current));
      }
      if (DOM.firstChild(current)) {
        current = DOM.firstChild(current);
        continue;
      }
      while (current) {
        // Leaving the element. Walk up and to the right, closing tags as we go.
        if (DOM.isElementNode(current)) {
          this.endElement(DOM.nodeName(current).toLowerCase());
        }
        if (DOM.nextSibling(current)) {
          current = DOM.nextSibling(current);
          break;
        }
        current = DOM.parentElement(current);
      }
    }
    return this.buf.join('');
  }

  private startElement(element: any) {
    let tagName = DOM.nodeName(element).toLowerCase();
    tagName = tagName.toLowerCase();
    if (VALID_ELEMENTS.hasOwnProperty(tagName)) {
      this.buf.push('<');
      this.buf.push(tagName);
      DOM.attributeMap(element).forEach((value: string, attrName: string) => {
        let lower = attrName.toLowerCase();
        if (!VALID_ATTRS.hasOwnProperty(lower)) return;
        // TODO(martinprobst): Special case image URIs for data:image/...
        if (URI_ATTRS[lower]) value = sanitizeUrl(value);
        this.buf.push(' ');
        this.buf.push(attrName);
        this.buf.push('="');
        this.buf.push(encodeEntities(value));
        this.buf.push('"');
      });
      this.buf.push('>');
    }
  }

  private endElement(tagName: string) {
    tagName = tagName.toLowerCase();
    if (VALID_ELEMENTS.hasOwnProperty(tagName) && !VOID_ELEMENTS.hasOwnProperty(tagName)) {
      this.buf.push('</');
      this.buf.push(tagName);
      this.buf.push('>');
    }
  }

  private chars(chars) { this.buf.push(encodeEntities(chars)); }
}

// Regular Expressions for parsing tags and attributes
const SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
// ! to ~ is the ASCII range.
const NON_ALPHANUMERIC_REGEXP = /([^\#-~ |!])/g;

/**
 * Escapes all potentially dangerous characters, so that the
 * resulting string can be safely inserted into attribute or
 * element text.
 * @param value
 * @returns {string} escaped text
 */
function encodeEntities(value) {
  return value.replace(/&/g, '&amp;')
      .replace(SURROGATE_PAIR_REGEXP,
               function(match) {
                 let hi = match.charCodeAt(0);
                 let low = match.charCodeAt(1);
                 return '&#' + (((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000) + ';';
               })
      .replace(NON_ALPHANUMERIC_REGEXP,
               function(match) { return '&#' + match.charCodeAt(0) + ';'; })
      .replace(/</g, '&lt;')
      .replace(/>/g, '&gt;');
}

/**
 * When IE9-11 comes across an unknown namespaced attribute e.g. 'xlink:foo' it adds 'xmlns:ns1'
 * attribute to declare ns1 namespace and prefixes the attribute with 'ns1' (e.g. 'ns1:xlink:foo').
 *
 * This is undesirable since we don't want to allow any of these custom attributes. This method
 * strips them all.
 */
function stripCustomNsAttrs(el: any) {
  DOM.attributeMap(el).forEach((_, attrName) => {
    if (attrName === 'xmlns:ns1' || attrName.indexOf('ns1:') === 0) {
      DOM.removeAttribute(el, attrName);
    }
  });
  for (let n of DOM.childNodesAsList(el)) {
    if (DOM.isElementNode(n)) stripCustomNsAttrs(n);
  }
}

/**
 * Sanitizes the given unsafe, untrusted HTML fragment, and returns HTML text that is safe to add to
 * the DOM in a browser environment.
 */
export function sanitizeHtml(unsafeHtml: string): string {
  try {
    let containerEl = getInertElement();
    // Make sure unsafeHtml is actually a string (TypeScript types are not enforced at runtime).
    unsafeHtml = unsafeHtml ? String(unsafeHtml) : '';

    // mXSS protection. Repeatedly parse the document to make sure it stabilizes, so that a browser
    // trying to auto-correct incorrect HTML cannot cause formerly inert HTML to become dangerous.
    let mXSSAttempts = 5;
    let parsedHtml = unsafeHtml;

    do {
      if (mXSSAttempts === 0) {
        throw new Error('Failed to sanitize html because the input is unstable');
      }
      mXSSAttempts--;

      unsafeHtml = parsedHtml;
      DOM.setInnerHTML(containerEl, unsafeHtml);
      if ((DOM.defaultDoc() as any).documentMode) {
        // strip custom-namespaced attributes on IE<=11
        stripCustomNsAttrs(containerEl);
      }
      parsedHtml = DOM.getInnerHTML(containerEl);
    } while (unsafeHtml !== parsedHtml);

    let sanitizer = new SanitizingHtmlSerializer();
    let safeHtml = sanitizer.sanitizeChildren(DOM.getTemplateContent(containerEl) || containerEl);

    // Clear out the body element.
    let parent = DOM.getTemplateContent(containerEl) || containerEl;
    for (let child of DOM.childNodesAsList(parent)) {
      DOM.removeChild(parent, child);
    }

    if (assertionsEnabled() && safeHtml !== unsafeHtml) {
      DOM.log('WARNING: some HTML contents were removed during sanitization.');
    }

    return safeHtml;
  } catch (e) {
    // In case anything goes wrong, clear out inertElement to reset the entire DOM structure.
    inertElement = null;
    throw e;
  }
}
feat(security): add an HTML sanitizer. This is based on Angular 1's implementation, parsing an HTML document into an inert DOM Document implementation, and then serializing only specifically whitelisted elements. It currently does not support SVG sanitization, all SVG elements are rejected. If available, the sanitizer uses the `<template>` HTML element as an inert container. Sanitization works client and server-side. Reviewers: rjamet, tbosch , molnarg , koto Differential Revision: https://reviews.angular.io/D108 2016-04-30 19:02:05 -07:00			`import {getDOM, DomAdapter} from '../dom/dom_adapter';`
			`import {assertionsEnabled} from '../../src/facade/lang';`

			`import {sanitizeUrl} from './url_sanitizer';`

			`/** A <body> element that can be safely used to parse untrusted HTML. Lazily initialized below. */`
			`let inertElement: HTMLElement = null;`
			`/** Lazily initialized to make sure the DOM adapter gets set before use. */`
			`let DOM: DomAdapter = null;`

			`/** Returns an HTML element that is guaranteed to not execute code when creating elements in it. */`
			`function getInertElement() {`
			`if (inertElement) return inertElement;`
			`DOM = getDOM();`

			`// Prefer using <template> element if supported.`
			`let templateEl = DOM.createElement('template');`
			`if ('content' in templateEl) return templateEl;`

			`let doc = DOM.createHtmlDocument();`
			`inertElement = DOM.querySelector(doc, 'body');`
			`if (inertElement == null) {`
			`// usually there should be only one body element in the document, but IE doesn't have any, so we`
			`// need to create one.`
			`let html = DOM.createElement('html', doc);`
			`inertElement = DOM.createElement('body', doc);`
			`DOM.appendChild(html, inertElement);`
			`DOM.appendChild(doc, html);`
			`}`
			`return inertElement;`
			`}`

			`function tagSet(tags: string): {[k: string]: boolean} {`
			`let res: {[k: string]: boolean} = {};`
			`for (let t of tags.split(',')) res[t.toLowerCase()] = true;`
			`return res;`
			`}`

			`function merge(...sets: { [k: string]: boolean }[]): {[k: string]: boolean} {`
			`let res: {[k: string]: boolean} = {};`
			`for (let s of sets) {`
			`for (let v in s) {`
			`if (s.hasOwnProperty(v)) res[v] = true;`
			`}`
			`}`
			`return res;`
			`}`

			`// Good source of info about elements and attributes`
			`// http://dev.w3.org/html5/spec/Overview.html#semantics`
			`// http://simon.html5.org/html-elements`

			`// Safe Void Elements - HTML5`
			`// http://dev.w3.org/html5/spec/Overview.html#void-elements`
			`const VOID_ELEMENTS = tagSet('area,br,col,hr,img,wbr');`

			`// Elements that you can, intentionally, leave open (and which close themselves)`
			`// http://dev.w3.org/html5/spec/Overview.html#optional-tags`
			`const OPTIONAL_END_TAG_BLOCK_ELEMENTS = tagSet('colgroup,dd,dt,li,p,tbody,td,tfoot,th,thead,tr');`
			`const OPTIONAL_END_TAG_INLINE_ELEMENTS = tagSet('rp,rt');`
			`const OPTIONAL_END_TAG_ELEMENTS =`
			`merge(OPTIONAL_END_TAG_INLINE_ELEMENTS, OPTIONAL_END_TAG_BLOCK_ELEMENTS);`

			`// Safe Block Elements - HTML5`
			`const BLOCK_ELEMENTS = merge(`
			`OPTIONAL_END_TAG_BLOCK_ELEMENTS,`
			`tagSet(`
			`'address,article,' +`
			`'aside,blockquote,caption,center,del,dir,div,dl,figure,figcaption,footer,h1,h2,h3,h4,h5,' +`
			`'h6,header,hgroup,hr,ins,map,menu,nav,ol,pre,section,table,ul'));`

			`// Inline Elements - HTML5`
			`const INLINE_ELEMENTS = merge(`
			`OPTIONAL_END_TAG_INLINE_ELEMENTS,`
			`tagSet('a,abbr,acronym,b,' +`
			`'bdi,bdo,big,br,cite,code,del,dfn,em,font,i,img,ins,kbd,label,map,mark,q,ruby,rp,rt,s,' +`
			`'samp,small,span,strike,strong,sub,sup,time,tt,u,var'));`

			`const VALID_ELEMENTS =`
			`merge(VOID_ELEMENTS, BLOCK_ELEMENTS, INLINE_ELEMENTS, OPTIONAL_END_TAG_ELEMENTS);`

			`// Attributes that have href and hence need to be sanitized`
			`const URI_ATTRS = tagSet('background,cite,href,longdesc,src,xlink:href');`

			`const HTML_ATTRS =`
			`tagSet('abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,' +`
			`'color,cols,colspan,compact,coords,dir,face,headers,height,hreflang,hspace,' +`
			`'ismap,lang,language,nohref,nowrap,rel,rev,rows,rowspan,rules,' +`
			`'scope,scrolling,shape,size,span,start,summary,tabindex,target,title,type,' +`
			`'valign,value,vspace,width');`

			`const VALID_ATTRS = merge(URI_ATTRS, HTML_ATTRS);`

			`/**`
			`* SanitizingHtmlSerializer serializes a DOM fragment, stripping out any unsafe elements and unsafe`
			`* attributes.`
			`*/`
			`class SanitizingHtmlSerializer {`
			`private buf: string[] = [];`

			`sanitizeChildren(el: Element): string {`
			`let current: Node = el.firstChild;`
			`while (current) {`
			`if (DOM.isElementNode(current)) {`
			`this.startElement(current);`
			`} else if (DOM.isTextNode(current)) {`
			`this.chars(DOM.nodeValue(current));`
			`}`
			`if (DOM.firstChild(current)) {`
			`current = DOM.firstChild(current);`
			`continue;`
			`}`
			`while (current) {`
			`// Leaving the element. Walk up and to the right, closing tags as we go.`
			`if (DOM.isElementNode(current)) {`
			`this.endElement(DOM.nodeName(current).toLowerCase());`
			`}`
			`if (DOM.nextSibling(current)) {`
			`current = DOM.nextSibling(current);`
			`break;`
			`}`
			`current = DOM.parentElement(current);`
			`}`
			`}`
			`return this.buf.join('');`
			`}`

			`private startElement(element: any) {`
			`let tagName = DOM.nodeName(element).toLowerCase();`
			`tagName = tagName.toLowerCase();`
			`if (VALID_ELEMENTS.hasOwnProperty(tagName)) {`
			`this.buf.push('<');`
			`this.buf.push(tagName);`
			`DOM.attributeMap(element).forEach((value: string, attrName: string) => {`
			`let lower = attrName.toLowerCase();`
			`if (!VALID_ATTRS.hasOwnProperty(lower)) return;`
			`// TODO(martinprobst): Special case image URIs for data:image/...`
			`if (URI_ATTRS[lower]) value = sanitizeUrl(value);`
			`this.buf.push(' ');`
			`this.buf.push(attrName);`
			`this.buf.push('="');`
			`this.buf.push(encodeEntities(value));`
			`this.buf.push('"');`
			`});`
			`this.buf.push('>');`
			`}`
			`}`

			`private endElement(tagName: string) {`
			`tagName = tagName.toLowerCase();`
			`if (VALID_ELEMENTS.hasOwnProperty(tagName) && !VOID_ELEMENTS.hasOwnProperty(tagName)) {`
			`this.buf.push('</');`
			`this.buf.push(tagName);`
			`this.buf.push('>');`
			`}`
			`}`

			`private chars(chars) { this.buf.push(encodeEntities(chars)); }`
			`}`

			`// Regular Expressions for parsing tags and attributes`
			`const SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;`
			`// ! to ~ is the ASCII range.`
			`const NON_ALPHANUMERIC_REGEXP = /([^\#-~ \|!])/g;`

			`/**`
			`* Escapes all potentially dangerous characters, so that the`
			`* resulting string can be safely inserted into attribute or`
			`* element text.`
			`* @param value`
			`* @returns {string} escaped text`
			`*/`
			`function encodeEntities(value) {`
			`return value.replace(/&/g, '&')`
			`.replace(SURROGATE_PAIR_REGEXP,`
			`function(match) {`
			`let hi = match.charCodeAt(0);`
			`let low = match.charCodeAt(1);`
			`return '&#' + (((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000) + ';';`
			`})`
			`.replace(NON_ALPHANUMERIC_REGEXP,`
			`function(match) { return '&#' + match.charCodeAt(0) + ';'; })`
			`.replace(/</g, '<')`
			`.replace(/>/g, '>');`
			`}`

			`/**`
			`* When IE9-11 comes across an unknown namespaced attribute e.g. 'xlink:foo' it adds 'xmlns:ns1'`
			`* attribute to declare ns1 namespace and prefixes the attribute with 'ns1' (e.g. 'ns1:xlink:foo').`
			`*`
			`* This is undesirable since we don't want to allow any of these custom attributes. This method`
			`* strips them all.`
			`*/`
			`function stripCustomNsAttrs(el: any) {`
			`DOM.attributeMap(el).forEach((_, attrName) => {`
			`if (attrName === 'xmlns:ns1' \|\| attrName.indexOf('ns1:') === 0) {`
			`DOM.removeAttribute(el, attrName);`
			`}`
			`});`
			`for (let n of DOM.childNodesAsList(el)) {`
			`if (DOM.isElementNode(n)) stripCustomNsAttrs(n);`
			`}`
			`}`

			`/**`
			`* Sanitizes the given unsafe, untrusted HTML fragment, and returns HTML text that is safe to add to`
			`* the DOM in a browser environment.`
			`*/`
			`export function sanitizeHtml(unsafeHtml: string): string {`
			`try {`
			`let containerEl = getInertElement();`
			`// Make sure unsafeHtml is actually a string (TypeScript types are not enforced at runtime).`
			`unsafeHtml = unsafeHtml ? String(unsafeHtml) : '';`

			`// mXSS protection. Repeatedly parse the document to make sure it stabilizes, so that a browser`
			`// trying to auto-correct incorrect HTML cannot cause formerly inert HTML to become dangerous.`
			`let mXSSAttempts = 5;`
			`let parsedHtml = unsafeHtml;`

			`do {`
			`if (mXSSAttempts === 0) {`
			`throw new Error('Failed to sanitize html because the input is unstable');`
			`}`
			`mXSSAttempts--;`

			`unsafeHtml = parsedHtml;`
			`DOM.setInnerHTML(containerEl, unsafeHtml);`
			`if ((DOM.defaultDoc() as any).documentMode) {`
			`// strip custom-namespaced attributes on IE<=11`
			`stripCustomNsAttrs(containerEl);`
			`}`
			`parsedHtml = DOM.getInnerHTML(containerEl);`
			`} while (unsafeHtml !== parsedHtml);`

			`let sanitizer = new SanitizingHtmlSerializer();`
			`let safeHtml = sanitizer.sanitizeChildren(DOM.getTemplateContent(containerEl) \|\| containerEl);`

			`// Clear out the body element.`
			`let parent = DOM.getTemplateContent(containerEl) \|\| containerEl;`
			`for (let child of DOM.childNodesAsList(parent)) {`
			`DOM.removeChild(parent, child);`
			`}`

			`if (assertionsEnabled() && safeHtml !== unsafeHtml) {`
			`DOM.log('WARNING: some HTML contents were removed during sanitization.');`
			`}`

			`return safeHtml;`
			`} catch (e) {`
			`// In case anything goes wrong, clear out inertElement to reset the entire DOM structure.`
			`inertElement = null;`
			`throw e;`
			`}`
			`}`