41 lines
1.8 KiB
TypeScript
Raw Normal View History

import {getDOM} from '../dom/dom_adapter';
import {assertionsEnabled} from '../../src/facade/lang';
/**
* A pattern that recognizes a commonly useful subset of URLs that are safe.
*
* This regular expression matches a subset of URLs that will not cause script
* execution if used in URL context within a HTML document. Specifically, this
* regular expression matches if (comment from here on and regex copied from
* Soy's EscapingConventions):
* (1) Either a protocol in a whitelist (http, https, mailto or ftp).
* (2) or no protocol. A protocol must be followed by a colon. The below
* allows that by allowing colons only after one of the characters [/?#].
* A colon after a hash (#) must be in the fragment.
* Otherwise, a colon after a (?) must be in a query.
* Otherwise, a colon after a single solidus (/) must be in a path.
* Otherwise, a colon after a double solidus (//) must be in the authority
* (before port).
*
* The pattern disallows &, used in HTML entity declarations before
* one of the characters in [/?#]. This disallows HTML entities used in the
* protocol name, which should never happen, e.g. "http" for "http".
* It also disallows HTML entities in the first path part of a relative path,
* e.g. "foo<bar/baz". Our existing escaping functions should not produce
* that. More importantly, it disallows masking of a colon,
* e.g. "javascript:...".
*
* This regular expression was taken from the Closure sanitization library.
*/
const SAFE_URL_PATTERN = /^(?:(?:https?|mailto|ftp|tel|file):|[^&:/?#]*(?:[/?#]|$))/gi;
export function sanitizeUrl(url: string): string {
url = String(url);
if (url.match(SAFE_URL_PATTERN)) return url;
if (assertionsEnabled()) {
getDOM().log('WARNING: sanitizing unsafe URL value ' + url);
}
return 'unsafe:' + url;
}