2020-09-15 10:42:51 -04:00
|
|
|
import xss from "xss";
|
2021-03-17 09:11:40 -04:00
|
|
|
import escape from "discourse-common/lib/escape";
|
2016-06-14 14:31:51 -04:00
|
|
|
|
2024-06-28 08:21:31 -04:00
|
|
|
// Should match any <iframe> without a src attribute
|
|
|
|
const IFRAME_REGEXP =
|
|
|
|
/<iframe(?![^>]*\s+src\s*=)[^>]*>[\s\S]*?(<\/iframe\s*>|$)/gi;
|
|
|
|
|
2016-06-14 14:31:51 -04:00
|
|
|
function attr(name, value) {
|
2016-07-20 13:30:36 -04:00
|
|
|
if (value) {
|
|
|
|
return `${name}="${xss.escapeAttrValue(value)}"`;
|
|
|
|
}
|
|
|
|
|
|
|
|
return name;
|
2016-06-14 14:31:51 -04:00
|
|
|
}
|
|
|
|
|
2021-03-17 09:11:40 -04:00
|
|
|
export { escape };
|
2016-06-14 14:31:51 -04:00
|
|
|
|
2016-10-21 11:39:48 -04:00
|
|
|
export function hrefAllowed(href, extraHrefMatchers) {
|
2016-06-14 14:31:51 -04:00
|
|
|
// escape single quotes
|
|
|
|
href = href.replace(/'/g, "%27");
|
|
|
|
|
|
|
|
// absolute urls
|
|
|
|
if (/^(https?:)?\/\/[\w\.\-]+/i.test(href)) {
|
|
|
|
return href;
|
|
|
|
}
|
|
|
|
// relative urls
|
|
|
|
if (/^\/[\w\.\-]+/i.test(href)) {
|
|
|
|
return href;
|
|
|
|
}
|
|
|
|
// anchors
|
|
|
|
if (/^#[\w\.\-]+/i.test(href)) {
|
|
|
|
return href;
|
|
|
|
}
|
|
|
|
// mailtos
|
|
|
|
if (/^mailto:[\w\.\-@]+/i.test(href)) {
|
|
|
|
return href;
|
|
|
|
}
|
2016-10-21 11:39:48 -04:00
|
|
|
|
|
|
|
if (extraHrefMatchers && extraHrefMatchers.length > 0) {
|
|
|
|
for (let i = 0; i < extraHrefMatchers.length; i++) {
|
|
|
|
if (extraHrefMatchers[i].test(href)) {
|
|
|
|
return href;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2016-06-14 14:31:51 -04:00
|
|
|
}
|
|
|
|
|
2022-05-19 06:18:30 -04:00
|
|
|
function sanitizeMediaSrc(tag, attrName, value, extraHrefMatchers) {
|
|
|
|
const srcAttrs = {
|
|
|
|
img: ["src"],
|
|
|
|
source: ["src", "srcset"],
|
|
|
|
track: ["src"],
|
|
|
|
};
|
|
|
|
|
|
|
|
if (!srcAttrs[tag]?.includes(attrName)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (value.startsWith("data:image")) {
|
|
|
|
return attr(attrName, value);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (attrName === "srcset") {
|
|
|
|
const srcset = value.split(",").map((v) => v.split(" ", 2));
|
|
|
|
const sanitizedValue = srcset
|
|
|
|
.map((src) => {
|
|
|
|
const allowedSrc = hrefAllowed(src[0], extraHrefMatchers);
|
|
|
|
if (allowedSrc) {
|
|
|
|
return src[1] ? `${allowedSrc} ${src[1]}` : allowedSrc;
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.join(",");
|
|
|
|
return attr(attrName, sanitizedValue);
|
|
|
|
} else {
|
|
|
|
const returnVal = hrefAllowed(value, extraHrefMatchers);
|
|
|
|
return attr(attrName, returnVal);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-09 05:23:44 -05:00
|
|
|
function testDataAttribute(forTag, name, value) {
|
|
|
|
return Object.keys(forTag).find((k) => {
|
|
|
|
const nameWithMatcher = `^${k.replace(/\*$/, "\\w+?")}`;
|
|
|
|
const validValues = forTag[k];
|
|
|
|
|
|
|
|
return (
|
|
|
|
new RegExp(nameWithMatcher).test(name) &&
|
|
|
|
(validValues.includes("*") ? true : validValues.includes(value))
|
|
|
|
);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2020-10-27 22:22:06 -04:00
|
|
|
export function sanitize(text, allowLister) {
|
2020-09-22 10:28:28 -04:00
|
|
|
if (!text) {
|
|
|
|
return "";
|
|
|
|
}
|
2016-06-14 14:31:51 -04:00
|
|
|
|
|
|
|
// Allow things like <3 and <_<
|
|
|
|
text = text.replace(/<([^A-Za-z\/\!]|$)/g, "<$1");
|
|
|
|
|
2020-10-27 22:22:06 -04:00
|
|
|
const allowList = allowLister.getAllowList(),
|
|
|
|
allowedHrefSchemes = allowLister.getAllowedHrefSchemes(),
|
|
|
|
allowedIframes = allowLister.getAllowedIframes();
|
2016-10-21 11:39:48 -04:00
|
|
|
let extraHrefMatchers = null;
|
|
|
|
|
|
|
|
if (allowedHrefSchemes && allowedHrefSchemes.length > 0) {
|
|
|
|
extraHrefMatchers = [
|
|
|
|
new RegExp("^(" + allowedHrefSchemes.join("|") + ")://[\\w\\.\\-]+", "i"),
|
|
|
|
];
|
2018-01-29 19:02:23 -05:00
|
|
|
if (allowedHrefSchemes.includes("tel")) {
|
|
|
|
extraHrefMatchers.push(new RegExp("^tel://\\+?[\\w\\.\\-]+", "i"));
|
|
|
|
}
|
2016-10-21 11:39:48 -04:00
|
|
|
}
|
2016-06-14 14:31:51 -04:00
|
|
|
|
|
|
|
let result = xss(text, {
|
2022-04-06 16:49:13 -04:00
|
|
|
allowList: allowList.tagList,
|
2016-06-14 14:31:51 -04:00
|
|
|
stripIgnoreTag: true,
|
|
|
|
stripIgnoreTagBody: ["script", "table"],
|
2016-07-04 14:15:51 -04:00
|
|
|
|
2016-06-14 14:31:51 -04:00
|
|
|
onIgnoreTagAttr(tag, name, value) {
|
2020-10-27 22:22:06 -04:00
|
|
|
const forTag = allowList.attrList[tag];
|
2016-06-14 14:31:51 -04:00
|
|
|
if (forTag) {
|
|
|
|
const forAttr = forTag[name];
|
2022-02-09 05:23:44 -05:00
|
|
|
|
2017-09-01 10:15:34 -04:00
|
|
|
if (
|
2022-07-17 14:48:36 -04:00
|
|
|
(forAttr && (forAttr.includes("*") || forAttr.includes(value))) ||
|
|
|
|
(!name.includes("data-html-") &&
|
2022-07-17 14:16:39 -04:00
|
|
|
name.startsWith("data-") &&
|
2022-02-09 05:23:44 -05:00
|
|
|
(forTag["data-*"] || testDataAttribute(forTag, name, value))) ||
|
2016-10-21 11:39:48 -04:00
|
|
|
(tag === "a" &&
|
|
|
|
name === "href" &&
|
|
|
|
hrefAllowed(value, extraHrefMatchers)) ||
|
2017-09-01 10:15:34 -04:00
|
|
|
(tag === "iframe" &&
|
|
|
|
name === "src" &&
|
2023-05-24 06:44:18 -04:00
|
|
|
!value.match(/\/\.+\//) &&
|
2017-09-01 10:15:34 -04:00
|
|
|
allowedIframes.some((i) => {
|
2022-07-17 14:16:39 -04:00
|
|
|
return value.toLowerCase().startsWith((i || "").toLowerCase());
|
2017-09-01 10:15:34 -04:00
|
|
|
}))
|
|
|
|
) {
|
2016-06-14 14:31:51 -04:00
|
|
|
return attr(name, value);
|
|
|
|
}
|
2022-05-19 06:18:30 -04:00
|
|
|
|
|
|
|
const sanitizedMediaSrc = sanitizeMediaSrc(
|
|
|
|
tag,
|
|
|
|
name,
|
|
|
|
value,
|
|
|
|
extraHrefMatchers
|
|
|
|
);
|
|
|
|
if (sanitizedMediaSrc) {
|
|
|
|
return sanitizedMediaSrc;
|
|
|
|
}
|
2016-06-14 14:31:51 -04:00
|
|
|
|
|
|
|
if (tag === "iframe" && name === "src") {
|
2024-06-28 08:21:31 -04:00
|
|
|
// This iframe is not allowed
|
|
|
|
return "";
|
2016-06-14 14:31:51 -04:00
|
|
|
}
|
|
|
|
|
2020-12-21 12:55:00 -05:00
|
|
|
if (tag === "video" && name === "autoplay") {
|
2021-05-20 21:43:47 -04:00
|
|
|
// This might give us duplicate 'muted' attributes
|
2020-12-21 12:55:00 -05:00
|
|
|
// but they will be deduped by later processing
|
|
|
|
return "autoplay muted";
|
|
|
|
}
|
|
|
|
|
2017-10-16 11:53:47 -04:00
|
|
|
// Heading ids must begin with `heading--`
|
|
|
|
if (
|
2022-07-17 14:48:36 -04:00
|
|
|
["h1", "h2", "h3", "h4", "h5", "h6"].includes(tag) &&
|
2017-10-16 11:53:47 -04:00
|
|
|
value.match(/^heading\-\-[a-zA-Z0-9\-\_]+$/)
|
|
|
|
) {
|
|
|
|
return attr(name, value);
|
|
|
|
}
|
|
|
|
|
2020-10-27 22:22:06 -04:00
|
|
|
const custom = allowLister.getCustom();
|
2016-06-14 14:31:51 -04:00
|
|
|
for (let i = 0; i < custom.length; i++) {
|
|
|
|
const fn = custom[i];
|
|
|
|
if (fn(tag, name, value)) {
|
|
|
|
return attr(name, value);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
});
|
|
|
|
|
|
|
|
return result
|
|
|
|
.replace(/\[removed\]/g, "")
|
2024-06-28 08:21:31 -04:00
|
|
|
.replace(IFRAME_REGEXP, "")
|
2016-06-14 14:31:51 -04:00
|
|
|
.replace(/&(?![#\w]+;)/g, "&")
|
|
|
|
.replace(/'/g, "'")
|
|
|
|
.replace(/ \/>/g, ">");
|
|
|
|
}
|