import {getDOM} from '../dom/dom_adapter'; import {assertionsEnabled} from '../../src/facade/lang'; /** * A pattern that recognizes a commonly useful subset of URLs that are safe. * * This regular expression matches a subset of URLs that will not cause script * execution if used in URL context within a HTML document. Specifically, this * regular expression matches if (comment from here on and regex copied from * Soy's EscapingConventions): * (1) Either a protocol in a whitelist (http, https, mailto or ftp). * (2) or no protocol. A protocol must be followed by a colon. The below * allows that by allowing colons only after one of the characters [/?#]. * A colon after a hash (#) must be in the fragment. * Otherwise, a colon after a (?) must be in a query. * Otherwise, a colon after a single solidus (/) must be in a path. * Otherwise, a colon after a double solidus (//) must be in the authority * (before port). * * The pattern disallows &, used in HTML entity declarations before * one of the characters in [/?#]. This disallows HTML entities used in the * protocol name, which should never happen, e.g. "http" for "http". * It also disallows HTML entities in the first path part of a relative path, * e.g. "foo<bar/baz". Our existing escaping functions should not produce * that. More importantly, it disallows masking of a colon, * e.g. "javascript:...". * * This regular expression was taken from the Closure sanitization library. */ const SAFE_URL_PATTERN = /^(?:(?:https?|mailto|ftp|tel|file):|[^&:/?#]*(?:[/?#]|$))/gi; /** A pattern that matches safe data URLs. Only matches image and video types. */ const DATA_URL_PATTERN = /^data:(?:image\/(?:bmp|gif|jpeg|jpg|png|tiff|webp)|video\/(?:mpeg|mp4|ogg|webm));base64,[a-z0-9+\/]+=*$/i; export function sanitizeUrl(url: string): string { url = String(url); if (url.match(SAFE_URL_PATTERN) || url.match(DATA_URL_PATTERN)) return url; if (assertionsEnabled()) getDOM().log('WARNING: sanitizing unsafe URL value ' + url); return 'unsafe:' + url; }