33 lines
1.6 KiB
TypeScript
33 lines
1.6 KiB
TypeScript
|
|
/**
|
||
|
|
* A pattern that recognizes a commonly useful subset of URLs that are safe.
|
||
|
|
*
|
||
|
|
* This regular expression matches a subset of URLs that will not cause script
|
||
|
|
* execution if used in URL context within a HTML document. Specifically, this
|
||
|
|
* regular expression matches if (comment from here on and regex copied from
|
||
|
|
* Soy's EscapingConventions):
|
||
|
|
* (1) Either a protocol in a whitelist (http, https, mailto or ftp).
|
||
|
|
* (2) or no protocol. A protocol must be followed by a colon. The below
|
||
|
|
* allows that by allowing colons only after one of the characters [/?#].
|
||
|
|
* A colon after a hash (#) must be in the fragment.
|
||
|
|
* Otherwise, a colon after a (?) must be in a query.
|
||
|
|
* Otherwise, a colon after a single solidus (/) must be in a path.
|
||
|
|
* Otherwise, a colon after a double solidus (//) must be in the authority
|
||
|
|
* (before port).
|
||
|
|
*
|
||
|
|
* The pattern disallows &, used in HTML entity declarations before
|
||
|
|
* one of the characters in [/?#]. This disallows HTML entities used in the
|
||
|
|
* protocol name, which should never happen, e.g. "http" for "http".
|
||
|
|
* It also disallows HTML entities in the first path part of a relative path,
|
||
|
|
* e.g. "foo<bar/baz". Our existing escaping functions should not produce
|
||
|
|
* that. More importantly, it disallows masking of a colon,
|
||
|
|
* e.g. "javascript:...".
|
||
|
|
*
|
||
|
|
* This regular expression was taken from the Closure sanitization library.
|
||
|
|
*/
|
||
|
|
const SAFE_URL_PATTERN = /^(?:(?:https?|mailto|ftp|tel|file):|[^&:/?#]*(?:[/?#]|$))/gi;
|
||
|
|
|
||
|
|
export function sanitizeUrl(url: string): string {
|
||
|
|
if (String(url).match(SAFE_URL_PATTERN)) return url;
|
||
|
|
return 'unsafe:' + url;
|
||
|
|
}
|