feat(security): add an HTML sanitizer.

This is based on Angular 1's implementation, parsing an HTML document
into an inert DOM Document implementation, and then serializing only
specifically whitelisted elements.

It currently does not support SVG sanitization, all SVG elements are
rejected.

If available, the sanitizer uses the `<template>` HTML element as an
inert container.

Sanitization works client and server-side.

Reviewers: rjamet, tbosch , molnarg , koto

Differential Revision: https://reviews.angular.io/D108
This commit is contained in:
Martin Probst 2016-04-30 19:02:05 -07:00
parent df1b1f6957
commit f86edae9f3
7 changed files with 442 additions and 44 deletions

View File

@ -4,6 +4,8 @@ import {
expect,
inject,
beforeEachProviders,
beforeEach,
afterEach,
it,
} from '@angular/core/testing/testing_internal';
import {TestComponentBuilder} from '@angular/compiler/testing';
@ -63,7 +65,14 @@ function declareTests(isJit: boolean) {
beforeEachProviders(() => [provide(ANCHOR_ELEMENT, {useValue: el('<div></div>')})]);
describe('safe HTML values', function() {
let originalLog: (msg: any) => any;
beforeEach(() => {
originalLog = getDOM().log;
getDOM().log = (msg) => { /* disable logging */ };
});
afterEach(() => { getDOM().log = originalLog; });
itAsync('should disallow binding on*', (tcb: TestComponentBuilder, async) => {
let tpl = `<div [attr.onclick]="ctxProp"></div>`;
tcb = tcb.overrideView(SecuredComponent, new ViewMetadata({template: tpl}));
@ -76,26 +85,7 @@ function declareTests(isJit: boolean) {
});
});
itAsync('should escape unsafe attributes', (tcb: TestComponentBuilder, async) => {
let tpl = `<a [href]="ctxProp">Link Title</a>`;
tcb.overrideView(SecuredComponent, new ViewMetadata({template: tpl, directives: []}))
.createAsync(SecuredComponent)
.then((fixture) => {
let e = fixture.debugElement.children[0].nativeElement;
fixture.debugElement.componentInstance.ctxProp = 'hello';
fixture.detectChanges();
// In the browser, reading href returns an absolute URL. On the server side,
// it just echoes back the property.
expect(getDOM().getProperty(e, 'href')).toMatch(/.*\/?hello$/);
fixture.debugElement.componentInstance.ctxProp = 'javascript:alert(1)';
fixture.detectChanges();
expect(getDOM().getProperty(e, 'href')).toEqual('unsafe:javascript:alert(1)');
async.done();
});
});
describe('safe HTML values', function() {
itAsync('should not escape values marked as trusted',
[TestComponentBuilder, AsyncTestCompleter, DomSanitizationService],
(tcb: TestComponentBuilder, async, sanitizer: DomSanitizationService) => {
@ -105,8 +95,9 @@ function declareTests(isJit: boolean) {
.createAsync(SecuredComponent)
.then((fixture) => {
let e = fixture.debugElement.children[0].nativeElement;
let ci = fixture.debugElement.componentInstance;
let trusted = sanitizer.bypassSecurityTrustUrl('javascript:alert(1)');
fixture.debugElement.componentInstance.ctxProp = trusted;
ci.ctxProp = trusted;
fixture.detectChanges();
expect(getDOM().getProperty(e, 'href')).toEqual('javascript:alert(1)');
@ -123,13 +114,37 @@ function declareTests(isJit: boolean) {
.createAsync(SecuredComponent)
.then((fixture) => {
let trusted = sanitizer.bypassSecurityTrustScript('javascript:alert(1)');
fixture.debugElement.componentInstance.ctxProp = trusted;
let ci = fixture.debugElement.componentInstance;
ci.ctxProp = trusted;
expect(() => fixture.detectChanges())
.toThrowErrorWith('Required a safe URL, got a Script');
async.done();
});
});
});
describe('sanitizing', () => {
itAsync('should escape unsafe attributes', (tcb: TestComponentBuilder, async) => {
let tpl = `<a [href]="ctxProp">Link Title</a>`;
tcb.overrideView(SecuredComponent, new ViewMetadata({template: tpl, directives: []}))
.createAsync(SecuredComponent)
.then((fixture) => {
let e = fixture.debugElement.children[0].nativeElement;
let ci = fixture.debugElement.componentInstance;
ci.ctxProp = 'hello';
fixture.detectChanges();
// In the browser, reading href returns an absolute URL. On the server side,
// it just echoes back the property.
expect(getDOM().getProperty(e, 'href')).toMatch(/.*\/?hello$/);
ci.ctxProp = 'javascript:alert(1)';
fixture.detectChanges();
expect(getDOM().getProperty(e, 'href')).toEqual('unsafe:javascript:alert(1)');
async.done();
});
});
itAsync('should escape unsafe style values', (tcb: TestComponentBuilder, async) => {
let tpl = `<div [style.background]="ctxProp">Text</div>`;
@ -137,14 +152,15 @@ function declareTests(isJit: boolean) {
.createAsync(SecuredComponent)
.then((fixture) => {
let e = fixture.debugElement.children[0].nativeElement;
let ci = fixture.debugElement.componentInstance;
// Make sure binding harmless values works.
fixture.debugElement.componentInstance.ctxProp = 'red';
ci.ctxProp = 'red';
fixture.detectChanges();
// In some browsers, this will contain the full background specification, not just
// the color.
expect(getDOM().getStyle(e, 'background')).toMatch(/red.*/);
fixture.debugElement.componentInstance.ctxProp = 'url(javascript:evil())';
ci.ctxProp = 'url(javascript:evil())';
fixture.detectChanges();
// Updated value gets rejected, no value change.
expect(getDOM().getStyle(e, 'background')).not.toContain('javascript');
@ -152,6 +168,35 @@ function declareTests(isJit: boolean) {
async.done();
});
});
itAsync('should escape unsafe HTML values', (tcb: TestComponentBuilder, async) => {
let tpl = `<div [innerHTML]="ctxProp">Text</div>`;
tcb.overrideView(SecuredComponent, new ViewMetadata({template: tpl, directives: []}))
.createAsync(SecuredComponent)
.then((fixture) => {
let e = fixture.debugElement.children[0].nativeElement;
let ci = fixture.debugElement.componentInstance;
// Make sure binding harmless values works.
ci.ctxProp = 'some <p>text</p>';
fixture.detectChanges();
expect(getDOM().getInnerHTML(e)).toEqual('some <p>text</p>');
ci.ctxProp = 'ha <script>evil()</script>';
fixture.detectChanges();
expect(getDOM().getInnerHTML(e)).toEqual('ha evil()');
ci.ctxProp = 'also <img src="x" onerror="evil()"> evil';
fixture.detectChanges();
expect(getDOM().getInnerHTML(e)).toEqual('also <img src="x"> evil');
ci.ctxProp = 'also <iframe srcdoc="evil"> evil';
fixture.detectChanges();
expect(getDOM().getInnerHTML(e)).toEqual('also evil');
async.done();
});
});
});
});
}

View File

@ -125,6 +125,9 @@ export class BrowserDomAdapter extends GenericBrowserDomAdapter {
return evt.defaultPrevented || isPresent(evt.returnValue) && !evt.returnValue;
}
getInnerHTML(el): string { return el.innerHTML; }
getTemplateContent(el): Node {
return 'content' in el && el instanceof HTMLTemplateElement ? el.content : null;
}
getOuterHTML(el): string { return el.outerHTML; }
nodeName(node: Node): string { return node.nodeName; }
nodeValue(node: Node): string { return node.nodeValue; }

View File

@ -56,6 +56,8 @@ export abstract class DomAdapter {
abstract preventDefault(evt);
abstract isPrevented(evt): boolean;
abstract getInnerHTML(el): string;
/** Returns content if el is a <template> element, null otherwise. */
abstract getTemplateContent(el): any;
abstract getOuterHTML(el): string;
abstract nodeName(node): string;
abstract nodeValue(node): string;

View File

@ -1,7 +1,11 @@
import {Injectable} from '@angular/core';
import {SecurityContext, SanitizationService} from '../../core_private';
import {sanitizeHtml} from './html_sanitizer';
import {sanitizeUrl} from './url_sanitizer';
import {sanitizeStyle} from './style_sanitizer';
import {SecurityContext, SanitizationService} from '../../core_private';
import {Injectable} from '@angular/core';
export {SecurityContext};
/** Marker interface for a value that's safe to use in a particular context. */
@ -103,7 +107,7 @@ export class DomSanitizationServiceImpl extends DomSanitizationService {
case SecurityContext.HTML:
if (value instanceof SafeHtmlImpl) return value.changingThisBreaksApplicationSecurity;
this.checkNotSafeValue(value, 'HTML');
return this.sanitizeHtml(String(value));
return sanitizeHtml(String(value));
case SecurityContext.STYLE:
if (value instanceof SafeStyleImpl) return value.changingThisBreaksApplicationSecurity;
this.checkNotSafeValue(value, 'Style');
@ -133,11 +137,6 @@ export class DomSanitizationServiceImpl extends DomSanitizationService {
}
}
private sanitizeHtml(value: string): string {
// TODO(martinprobst): implement.
return value;
}
bypassSecurityTrustHtml(value: string): SafeHtml { return new SafeHtmlImpl(value); }
bypassSecurityTrustStyle(value: string): SafeStyle { return new SafeStyleImpl(value); }
bypassSecurityTrustScript(value: string): SafeScript { return new SafeScriptImpl(value); }

View File

@ -0,0 +1,254 @@
import {getDOM, DomAdapter} from '../dom/dom_adapter';
import {assertionsEnabled} from '../../src/facade/lang';
import {sanitizeUrl} from './url_sanitizer';
/** A <body> element that can be safely used to parse untrusted HTML. Lazily initialized below. */
let inertElement: HTMLElement = null;
/** Lazily initialized to make sure the DOM adapter gets set before use. */
let DOM: DomAdapter = null;
/** Returns an HTML element that is guaranteed to not execute code when creating elements in it. */
function getInertElement() {
if (inertElement) return inertElement;
DOM = getDOM();
// Prefer using <template> element if supported.
let templateEl = DOM.createElement('template');
if ('content' in templateEl) return templateEl;
let doc = DOM.createHtmlDocument();
inertElement = DOM.querySelector(doc, 'body');
if (inertElement == null) {
// usually there should be only one body element in the document, but IE doesn't have any, so we
// need to create one.
let html = DOM.createElement('html', doc);
inertElement = DOM.createElement('body', doc);
DOM.appendChild(html, inertElement);
DOM.appendChild(doc, html);
}
return inertElement;
}
function tagSet(tags: string): {[k: string]: boolean} {
let res: {[k: string]: boolean} = {};
for (let t of tags.split(',')) res[t.toLowerCase()] = true;
return res;
}
function merge(...sets: { [k: string]: boolean }[]): {[k: string]: boolean} {
let res: {[k: string]: boolean} = {};
for (let s of sets) {
for (let v in s) {
if (s.hasOwnProperty(v)) res[v] = true;
}
}
return res;
}
// Good source of info about elements and attributes
// http://dev.w3.org/html5/spec/Overview.html#semantics
// http://simon.html5.org/html-elements
// Safe Void Elements - HTML5
// http://dev.w3.org/html5/spec/Overview.html#void-elements
const VOID_ELEMENTS = tagSet('area,br,col,hr,img,wbr');
// Elements that you can, intentionally, leave open (and which close themselves)
// http://dev.w3.org/html5/spec/Overview.html#optional-tags
const OPTIONAL_END_TAG_BLOCK_ELEMENTS = tagSet('colgroup,dd,dt,li,p,tbody,td,tfoot,th,thead,tr');
const OPTIONAL_END_TAG_INLINE_ELEMENTS = tagSet('rp,rt');
const OPTIONAL_END_TAG_ELEMENTS =
merge(OPTIONAL_END_TAG_INLINE_ELEMENTS, OPTIONAL_END_TAG_BLOCK_ELEMENTS);
// Safe Block Elements - HTML5
const BLOCK_ELEMENTS = merge(
OPTIONAL_END_TAG_BLOCK_ELEMENTS,
tagSet(
'address,article,' +
'aside,blockquote,caption,center,del,dir,div,dl,figure,figcaption,footer,h1,h2,h3,h4,h5,' +
'h6,header,hgroup,hr,ins,map,menu,nav,ol,pre,section,table,ul'));
// Inline Elements - HTML5
const INLINE_ELEMENTS = merge(
OPTIONAL_END_TAG_INLINE_ELEMENTS,
tagSet('a,abbr,acronym,b,' +
'bdi,bdo,big,br,cite,code,del,dfn,em,font,i,img,ins,kbd,label,map,mark,q,ruby,rp,rt,s,' +
'samp,small,span,strike,strong,sub,sup,time,tt,u,var'));
const VALID_ELEMENTS =
merge(VOID_ELEMENTS, BLOCK_ELEMENTS, INLINE_ELEMENTS, OPTIONAL_END_TAG_ELEMENTS);
// Attributes that have href and hence need to be sanitized
const URI_ATTRS = tagSet('background,cite,href,longdesc,src,xlink:href');
const HTML_ATTRS =
tagSet('abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,' +
'color,cols,colspan,compact,coords,dir,face,headers,height,hreflang,hspace,' +
'ismap,lang,language,nohref,nowrap,rel,rev,rows,rowspan,rules,' +
'scope,scrolling,shape,size,span,start,summary,tabindex,target,title,type,' +
'valign,value,vspace,width');
const VALID_ATTRS = merge(URI_ATTRS, HTML_ATTRS);
/**
* SanitizingHtmlSerializer serializes a DOM fragment, stripping out any unsafe elements and unsafe
* attributes.
*/
class SanitizingHtmlSerializer {
private buf: string[] = [];
sanitizeChildren(el: Element): string {
let current: Node = el.firstChild;
while (current) {
if (DOM.isElementNode(current)) {
this.startElement(current);
} else if (DOM.isTextNode(current)) {
this.chars(DOM.nodeValue(current));
}
if (DOM.firstChild(current)) {
current = DOM.firstChild(current);
continue;
}
while (current) {
// Leaving the element. Walk up and to the right, closing tags as we go.
if (DOM.isElementNode(current)) {
this.endElement(DOM.nodeName(current).toLowerCase());
}
if (DOM.nextSibling(current)) {
current = DOM.nextSibling(current);
break;
}
current = DOM.parentElement(current);
}
}
return this.buf.join('');
}
private startElement(element: any) {
let tagName = DOM.nodeName(element).toLowerCase();
tagName = tagName.toLowerCase();
if (VALID_ELEMENTS.hasOwnProperty(tagName)) {
this.buf.push('<');
this.buf.push(tagName);
DOM.attributeMap(element).forEach((value: string, attrName: string) => {
let lower = attrName.toLowerCase();
if (!VALID_ATTRS.hasOwnProperty(lower)) return;
// TODO(martinprobst): Special case image URIs for data:image/...
if (URI_ATTRS[lower]) value = sanitizeUrl(value);
this.buf.push(' ');
this.buf.push(attrName);
this.buf.push('="');
this.buf.push(encodeEntities(value));
this.buf.push('"');
});
this.buf.push('>');
}
}
private endElement(tagName: string) {
tagName = tagName.toLowerCase();
if (VALID_ELEMENTS.hasOwnProperty(tagName) && !VOID_ELEMENTS.hasOwnProperty(tagName)) {
this.buf.push('</');
this.buf.push(tagName);
this.buf.push('>');
}
}
private chars(chars) { this.buf.push(encodeEntities(chars)); }
}
// Regular Expressions for parsing tags and attributes
const SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
// ! to ~ is the ASCII range.
const NON_ALPHANUMERIC_REGEXP = /([^\#-~ |!])/g;
/**
* Escapes all potentially dangerous characters, so that the
* resulting string can be safely inserted into attribute or
* element text.
* @param value
* @returns {string} escaped text
*/
function encodeEntities(value) {
return value.replace(/&/g, '&amp;')
.replace(SURROGATE_PAIR_REGEXP,
function(match) {
let hi = match.charCodeAt(0);
let low = match.charCodeAt(1);
return '&#' + (((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000) + ';';
})
.replace(NON_ALPHANUMERIC_REGEXP,
function(match) { return '&#' + match.charCodeAt(0) + ';'; })
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;');
}
/**
* When IE9-11 comes across an unknown namespaced attribute e.g. 'xlink:foo' it adds 'xmlns:ns1'
* attribute to declare ns1 namespace and prefixes the attribute with 'ns1' (e.g. 'ns1:xlink:foo').
*
* This is undesirable since we don't want to allow any of these custom attributes. This method
* strips them all.
*/
function stripCustomNsAttrs(el: any) {
DOM.attributeMap(el).forEach((_, attrName) => {
if (attrName === 'xmlns:ns1' || attrName.indexOf('ns1:') === 0) {
DOM.removeAttribute(el, attrName);
}
});
for (let n of DOM.childNodesAsList(el)) {
if (DOM.isElementNode(n)) stripCustomNsAttrs(n);
}
}
/**
* Sanitizes the given unsafe, untrusted HTML fragment, and returns HTML text that is safe to add to
* the DOM in a browser environment.
*/
export function sanitizeHtml(unsafeHtml: string): string {
try {
let containerEl = getInertElement();
// Make sure unsafeHtml is actually a string (TypeScript types are not enforced at runtime).
unsafeHtml = unsafeHtml ? String(unsafeHtml) : '';
// mXSS protection. Repeatedly parse the document to make sure it stabilizes, so that a browser
// trying to auto-correct incorrect HTML cannot cause formerly inert HTML to become dangerous.
let mXSSAttempts = 5;
let parsedHtml = unsafeHtml;
do {
if (mXSSAttempts === 0) {
throw new Error('Failed to sanitize html because the input is unstable');
}
mXSSAttempts--;
unsafeHtml = parsedHtml;
DOM.setInnerHTML(containerEl, unsafeHtml);
if ((DOM.defaultDoc() as any).documentMode) {
// strip custom-namespaced attributes on IE<=11
stripCustomNsAttrs(containerEl);
}
parsedHtml = DOM.getInnerHTML(containerEl);
} while (unsafeHtml !== parsedHtml);
let sanitizer = new SanitizingHtmlSerializer();
let safeHtml = sanitizer.sanitizeChildren(DOM.getTemplateContent(containerEl) || containerEl);
// Clear out the body element.
let parent = DOM.getTemplateContent(containerEl) || containerEl;
for (let child of DOM.childNodesAsList(parent)) {
DOM.removeChild(parent, child);
}
if (assertionsEnabled() && safeHtml !== unsafeHtml) {
DOM.log('WARNING: some HTML contents were removed during sanitization.');
}
return safeHtml;
} catch (e) {
// In case anything goes wrong, clear out inertElement to reset the entire DOM structure.
inertElement = null;
throw e;
}
}

View File

@ -0,0 +1,92 @@
import * as t from '@angular/core/testing/testing_internal';
import {browserDetection} from '@angular/platform-browser/testing';
import {getDOM} from '../../src/dom/dom_adapter';
import {sanitizeHtml} from '../../src/security/html_sanitizer';
export function main() {
t.describe('HTML sanitizer', () => {
let originalLog: (msg: any) => any = null;
let logMsgs: string[];
t.beforeEach(() => {
logMsgs = [];
originalLog = getDOM().log; // Monkey patch DOM.log.
getDOM().log = (msg) => logMsgs.push(msg);
});
t.afterEach(() => { getDOM().log = originalLog; });
t.it('serializes nested structures', () => {
t.expect(sanitizeHtml('<div alt="x"><p>a</p>b<b>c<a alt="more">d</a></b>e</div>'))
.toEqual('<div alt="x"><p>a</p>b<b>c<a alt="more">d</a></b>e</div>');
t.expect(logMsgs).toEqual([]);
});
t.it('serializes self closing elements', () => {
t.expect(sanitizeHtml('<p>Hello <br> World</p>')).toEqual('<p>Hello <br> World</p>');
});
t.it('supports namespaced elements',
() => { t.expect(sanitizeHtml('a<my:hr/><my:div>b</my:div>c')).toEqual('abc'); });
t.it('supports namespaced attributes', () => {
t.expect(sanitizeHtml('<a xlink:href="something">t</a>'))
.toEqual('<a xlink:href="something">t</a>');
t.expect(sanitizeHtml('<a xlink:evil="something">t</a>')).toEqual('<a>t</a>');
t.expect(sanitizeHtml('<a xlink:href="javascript:foo()">t</a>'))
.toEqual('<a xlink:href="unsafe:javascript:foo()">t</a>');
});
t.it('supports sanitizing plain text',
() => { t.expect(sanitizeHtml('Hello, World')).toEqual('Hello, World'); });
t.it('ignores non-element, non-attribute nodes', () => {
t.expect(sanitizeHtml('<!-- comments? -->no.')).toEqual('no.');
t.expect(sanitizeHtml('<?pi nodes?>no.')).toEqual('no.');
t.expect(logMsgs.join('\n')).toMatch(/HTML contents were removed during sanitization/);
});
t.it('escapes entities', () => {
t.expect(sanitizeHtml('<p>Hello &lt; World</p>')).toEqual('<p>Hello &lt; World</p>');
t.expect(sanitizeHtml('<p>Hello < World</p>')).toEqual('<p>Hello &lt; World</p>');
t.expect(sanitizeHtml('<p alt="% &amp; &quot; !">Hello</p>'))
.toEqual('<p alt="% &amp; &#34; !">Hello</p>'); // NB: quote encoded as ASCII &#34;.
});
t.describe('should strip dangerous elements', () => {
let dangerousTags = [
'frameset',
'form',
'param',
'object',
'embed',
'textarea',
'input',
'button',
'option',
'select',
'script',
'style',
'link',
'base',
'basefont'
];
for (let tag of dangerousTags) {
t.it(`${tag}`,
() => { t.expect(sanitizeHtml(`<${tag}>evil!</${tag}>`)).toEqual('evil!'); });
}
t.it(`swallows frame entirely`,
() => { t.expect(sanitizeHtml(`<frame>evil!</frame>`)).not.toContain('<frame>'); });
});
t.describe('should strip dangerous attributes', () => {
let dangerousAttrs = ['id', 'name', 'style'];
for (let attr of dangerousAttrs) {
t.it(`${attr}`,
() => { t.expect(sanitizeHtml(`<a ${attr}="x">evil!</a>`)).toEqual('<a>evil!</a>'); });
}
});
if (browserDetection.isWebkit) {
t.it('should prevent mXSS attacks', function() {
t.expect(sanitizeHtml('<a href="&#x3000;javascript:alert(1)">CLICKME</a>'))
.toEqual('<a href="unsafe:javascript:alert(1)">CLICKME</a>');
});
}
});
}

View File

@ -165,6 +165,9 @@ export class Parse5DomAdapter extends DomAdapter {
preventDefault(evt) { evt.returnValue = false; }
isPrevented(evt): boolean { return isPresent(evt.returnValue) && !evt.returnValue; }
getInnerHTML(el): string { return serializer.serialize(this.templateAwareRoot(el)); }
getTemplateContent(el): Node {
return null; // no <template> support in parse5.
}
getOuterHTML(el): string {
serializer.html = '';
serializer._serializeElement(el);