44 lines
2.0 KiB
TypeScript
Raw Normal View History

import {getDOM} from '../dom/dom_adapter';
2016-05-31 15:22:59 -07:00
import {assertionsEnabled} from '../facade/lang';
/**
* A pattern that recognizes a commonly useful subset of URLs that are safe.
*
* This regular expression matches a subset of URLs that will not cause script
* execution if used in URL context within a HTML document. Specifically, this
* regular expression matches if (comment from here on and regex copied from
* Soy's EscapingConventions):
* (1) Either a protocol in a whitelist (http, https, mailto or ftp).
* (2) or no protocol. A protocol must be followed by a colon. The below
* allows that by allowing colons only after one of the characters [/?#].
* A colon after a hash (#) must be in the fragment.
* Otherwise, a colon after a (?) must be in a query.
* Otherwise, a colon after a single solidus (/) must be in a path.
* Otherwise, a colon after a double solidus (//) must be in the authority
* (before port).
*
* The pattern disallows &, used in HTML entity declarations before
* one of the characters in [/?#]. This disallows HTML entities used in the
* protocol name, which should never happen, e.g. "http" for "http".
* It also disallows HTML entities in the first path part of a relative path,
* e.g. "foo<bar/baz". Our existing escaping functions should not produce
* that. More importantly, it disallows masking of a colon,
* e.g. "javascript:...".
*
* This regular expression was taken from the Closure sanitization library.
*/
const SAFE_URL_PATTERN = /^(?:(?:https?|mailto|ftp|tel|file):|[^&:/?#]*(?:[/?#]|$))/gi;
/** A pattern that matches safe data URLs. Only matches image and video types. */
const DATA_URL_PATTERN =
/^data:(?:image\/(?:bmp|gif|jpeg|jpg|png|tiff|webp)|video\/(?:mpeg|mp4|ogg|webm));base64,[a-z0-9+\/]+=*$/i;
export function sanitizeUrl(url: string): string {
url = String(url);
if (url.match(SAFE_URL_PATTERN) || url.match(DATA_URL_PATTERN)) return url;
if (assertionsEnabled()) getDOM().log('WARNING: sanitizing unsafe URL value ' + url);
return 'unsafe:' + url;
}