FEATURE: Support upload:// urls in img tags (#16277)

Previously, our `upload://` protocol urls were only supported in markdown image tags. This meant that our PullHotlinkedImages job was forced to convert `<img` tags to markdown. Depending on the exact syntax, this can actually cause the image to break.

This commit adds support for `upload://` inside regular HTML `<img` tags. In a future commit, we'll be able to use this to make our PullHotlinkedImages job much more robust.

Context at https://meta.discourse.org/t/152801
This commit is contained in:
David Taylor 2022-03-28 16:46:47 +01:00 committed by GitHub
parent fc40a572bb
commit 720e1ca9e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 128 additions and 54 deletions

View File

@ -1,88 +1,152 @@
import xss from "xss";
const HTML_TYPES = ["html_block", "html_inline"];
// add image to array if src has an upload
function addImage(uploads, token) {
if (token.attrs) {
for (let i = 0; i < token.attrs.length; i++) {
if (token.attrs[i][1].indexOf("upload://") === 0) {
uploads.push([token, i]);
uploads.push({ token, srcIndex: i, origSrc: token.attrs[i][1] });
break;
}
}
}
}
function attr(name, value) {
if (value) {
return `${name}="${xss.escapeAttrValue(value)}"`;
}
return name;
}
function uploadLocatorString(url) {
return `___REPLACE_UPLOAD_SRC_${url}___`;
}
function findUploadsInHtml(uploads, blockToken) {
// Slightly misusing our HTML sanitizer to look for upload://
// image src attributes, and replace them with a placeholder.
// Note that we can't use browser DOM APIs because this needs
// to run in mini-racer.
const fakeAllowList = {};
let foundImage = false;
const newContent = xss(blockToken.content, {
whiteList: fakeAllowList,
allowCommentTag: true,
onTag(tag, html, options) {
// We're not using this for sanitizing, so allow all tags through
options.isWhite = true;
fakeAllowList[tag] = [];
},
onTagAttr(tag, name, value) {
if (tag === "img" && name === "src" && value.startsWith("upload://")) {
uploads.push({ token: blockToken, srcIndex: null, origSrc: value });
foundImage = true;
return uploadLocatorString(value);
}
return attr(name, value);
},
});
if (foundImage) {
blockToken.content = newContent;
}
}
function processToken(uploads, token) {
if (token.tag === "img" || token.tag === "a") {
addImage(uploads, token);
} else if (HTML_TYPES.includes(token.type)) {
findUploadsInHtml(uploads, token);
}
if (token.children) {
for (let j = 0; j < token.children.length; j++) {
const childToken = token.children[j];
processToken(uploads, childToken);
}
}
}
function rule(state) {
let uploads = [];
for (let i = 0; i < state.tokens.length; i++) {
let blockToken = state.tokens[i];
if (blockToken.tag === "img" || blockToken.tag === "a") {
addImage(uploads, blockToken);
}
if (!blockToken.children) {
continue;
}
for (let j = 0; j < blockToken.children.length; j++) {
let token = blockToken.children[j];
if (token.tag === "img" || token.tag === "a") {
addImage(uploads, token);
}
}
processToken(uploads, blockToken);
}
if (uploads.length > 0) {
let srcList = uploads.map(([token, srcIndex]) => token.attrs[srcIndex][1]);
let srcList = uploads.map((u) => u.origSrc);
// In client-side cooking, this lookup returns nothing
// This means we set data-orig-src, and let decorateCooked
// lookup the image URLs asynchronously
let lookup = state.md.options.discourse.lookupUploadUrls;
let longUrls = (lookup && lookup(srcList)) || {};
uploads.forEach(([token, srcIndex]) => {
let origSrc = token.attrs[srcIndex][1];
uploads.forEach(({ token, srcIndex, origSrc }) => {
let mapped = longUrls[origSrc];
switch (token.tag) {
case "img":
if (mapped) {
token.attrs[srcIndex][1] = mapped.url;
token.attrs.push(["data-base62-sha1", mapped.base62_sha1]);
} else {
// no point putting a transparent .png for audio/video
if (token.content.match(/\|video|\|audio/)) {
token.attrs[srcIndex][1] = state.md.options.discourse.getURL(
"/404"
);
} else {
token.attrs[srcIndex][1] = state.md.options.discourse.getURL(
"/images/transparent.png"
);
}
if (HTML_TYPES.includes(token.type)) {
const locator = uploadLocatorString(origSrc);
let attrs = [];
token.attrs.push(["data-orig-src", origSrc]);
}
break;
case "a":
if (mapped) {
// when secure media is enabled we want the full /secure-media-uploads/
// url to take advantage of access control security
if (
state.md.options.discourse.limitedSiteSettings.secureMedia &&
mapped.url.indexOf("secure-media-uploads") > -1
) {
token.attrs[srcIndex][1] = mapped.url;
} else {
token.attrs[srcIndex][1] = mapped.short_path;
}
} else {
if (mapped) {
attrs.push(
attr("src", mapped.url),
attr("data-base62-sha1", mapped.base62_sha1)
);
} else {
attrs.push(
attr(
"src",
state.md.options.discourse.getURL("/images/transparent.png")
),
attr("data-orig-src", origSrc)
);
}
token.content = token.content.replace(locator, attrs.join(" "));
} else if (token.tag === "img") {
if (mapped) {
token.attrs[srcIndex][1] = mapped.url;
token.attrs.push(["data-base62-sha1", mapped.base62_sha1]);
} else {
// no point putting a transparent .png for audio/video
if (token.content.match(/\|video|\|audio/)) {
token.attrs[srcIndex][1] = state.md.options.discourse.getURL(
"/404"
);
token.attrs.push(["data-orig-href", origSrc]);
} else {
token.attrs[srcIndex][1] = state.md.options.discourse.getURL(
"/images/transparent.png"
);
}
break;
token.attrs.push(["data-orig-src", origSrc]);
}
} else if (token.tag === "a") {
if (mapped) {
// when secure media is enabled we want the full /secure-media-uploads/
// url to take advantage of access control security
if (
state.md.options.discourse.limitedSiteSettings.secureMedia &&
mapped.url.indexOf("secure-media-uploads") > -1
) {
token.attrs[srcIndex][1] = mapped.url;
} else {
token.attrs[srcIndex][1] = mapped.short_path;
}
} else {
token.attrs[srcIndex][1] = state.md.options.discourse.getURL("/404");
token.attrs.push(["data-orig-href", origSrc]);
}
}
});
}

View File

@ -1877,6 +1877,12 @@ HTML
![upload](#{upload.short_url.gsub(".png", "")})
Inline img <img src="#{upload.short_url}">
<div>
Block img <img src="#{upload.short_url}">
</div>
[some attachment](#{upload.short_url})
[some attachment|attachment](#{upload.short_url})
@ -1901,6 +1907,10 @@ HTML
</li>
</ul>
<p><img src="#{cdn_url}" alt="upload" data-base62-sha1="#{upload.base62_sha1}"></p>
<p>Inline img <img src="#{cdn_url}" data-base62-sha1="#{upload.base62_sha1}"></p>
<div>
Block img <img src="#{cdn_url}" data-base62-sha1="#{upload.base62_sha1}">
</div>
<p><a href="#{upload.short_path}">some attachment</a></p>
<p><a class="attachment" href="#{upload.short_path}">some attachment</a></p>
<p><a href="#{upload.short_path}">some attachment|random</a></p>