FEATURE: Allow hotlinked media to be blocked (#16940)

This commit introduces a new site setting: `block_hotlinked_media`. When enabled, all attempts to hotlink media (images, videos, and audio) will fail, and be replaced with a linked placeholder. Exceptions to the rule can be added via `block_hotlinked_media_exceptions`.

`download_remote_image_to_local` can be used alongside this feature. In that case, hotlinked images will be blocked immediately when the post is created, but will then be replaced with the downloaded version a few seconds later.

This implementation is purely server-side, and does not impact the composer preview.

Technically, there are two stages to this feature:

1. `PrettyText.sanitize_hotlinked_media` is called during `PrettyText.cook`, and whenever new images are introduced by Onebox. It will iterate over all src/srcset attributes in the post HTML and check if they're allowed. If not, the attributes will be removed and replaced with a `data-blocked-hotlinked-src(set)` attribute

2. In the `CookedPostProcessor`, we iterate over all `data-blocked-hotlinked-src(set)` attributes and check whether we have a downloaded version of the media. If yes, we update the src to use the downloaded version. If not, the entire media element is replaced with a placeholder. The placeholder is labelled 'external media', and is a link to the offsite media.
This commit is contained in:
David Taylor 2022-06-07 15:23:04 +01:00 committed by GitHub
parent 1a5dbbf430
commit 5238f6788c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 291 additions and 9 deletions

View File

@ -1309,14 +1309,25 @@ a.mention-group {
} }
} }
.broken-image { .broken-image,
display: inline-flex; .blocked-hotlinked-placeholder {
&:not(a) {
color: var(--primary-low-mid-or-secondary-high); color: var(--primary-low-mid-or-secondary-high);
}
display: inline-flex;
flex-direction: column;
border: 1px solid var(--primary-low); border: 1px solid var(--primary-low);
font-size: $font-up-5; align-items: center;
justify-content: center;
.d-icon { .d-icon {
margin: 16px; margin: 16px;
font-size: var(--font-up-5);
}
.notice {
margin: 0 0.5em 0.5em 0.5em;
} }
} }

View File

@ -23,7 +23,7 @@ module Jobs
changed_hotlink_records = false changed_hotlink_records = false
extract_images_from(post.cooked).each do |node| extract_images_from(post.cooked).each do |node|
download_src = original_src = node['src'] || node['href'] download_src = original_src = node['src'] || node[PrettyText::BLOCKED_HOTLINKED_SRC_ATTR] || node['href']
download_src = "#{SiteSetting.force_https ? "https" : "http"}:#{original_src}" if original_src.start_with?("//") download_src = "#{SiteSetting.force_https ? "https" : "http"}:#{original_src}" if original_src.start_with?("//")
normalized_src = normalize_src(download_src) normalized_src = normalize_src(download_src)
@ -130,7 +130,7 @@ module Jobs
def extract_images_from(html) def extract_images_from(html)
doc = Nokogiri::HTML5::fragment(html) doc = Nokogiri::HTML5::fragment(html)
doc.css("img[src], a.lightbox[href]") - doc.css("img[src], [#{PrettyText::BLOCKED_HOTLINKED_SRC_ATTR}], a.lightbox[href]") -
doc.css("img.avatar") - doc.css("img.avatar") -
doc.css(".lightbox img[src]") doc.css(".lightbox img[src]")
end end

View File

@ -46,7 +46,11 @@ class PostAnalyzer
onebox onebox
end end
cooked = result.to_html if result.changed? if result.changed?
PrettyText.sanitize_hotlinked_media(result.doc)
cooked = result.to_html
end
cooked cooked
end end

View File

@ -727,6 +727,11 @@ en:
post: post:
image_placeholder: image_placeholder:
broken: "This image is broken" broken: "This image is broken"
blocked_hotlinked_title: "Image hosted on another site. Click to open in a new tab."
blocked_hotlinked: "External Image"
media_placeholder:
blocked_hotlinked_title: "Media hosted on another site. Click to open in a new tab."
blocked_hotlinked: "External Media"
hidden_bidi_character: "Bidirectional characters can change the order that text is rendered. This could be used to obscure malicious code." hidden_bidi_character: "Bidirectional characters can change the order that text is rendered. This could be used to obscure malicious code."
has_likes: has_likes:
one: "%{count} Like" one: "%{count} Like"
@ -1514,9 +1519,11 @@ en:
contact_email: "Email address of key contact responsible for this site. Used for critical notifications, and also displayed on <a href='%{base_path}/about' target='_blank'>/about</a> for urgent matters." contact_email: "Email address of key contact responsible for this site. Used for critical notifications, and also displayed on <a href='%{base_path}/about' target='_blank'>/about</a> for urgent matters."
contact_url: "Contact URL for this site. Displayed on the <a href='%{base_path}/about' target='_blank'>/about</a> page for urgent matters." contact_url: "Contact URL for this site. Displayed on the <a href='%{base_path}/about' target='_blank'>/about</a> page for urgent matters."
crawl_images: "Retrieve images from remote URLs to insert the correct width and height dimensions." crawl_images: "Retrieve images from remote URLs to insert the correct width and height dimensions."
download_remote_images_to_local: "Convert remote images to local images by downloading them; this prevents broken images." download_remote_images_to_local: "Convert remote (hotlinked) images to local images by downloading them; This preserves content even if the images are removed from the remote site in future."
download_remote_images_threshold: "Minimum disk space necessary to download remote images locally (in percent)" download_remote_images_threshold: "Minimum disk space necessary to download remote images locally (in percent)"
disabled_image_download_domains: "Remote images will never be downloaded from these domains. Pipe-delimited list." disabled_image_download_domains: "Remote images will never be downloaded from these domains. Pipe-delimited list."
block_hotlinked_media: "Prevent users from introducing remote (hotlinked) media in their posts. Remote media which is not downloaded via 'download_remote_images_to_local' will be replaced with a placeholder link."
block_hotlinked_media_exceptions: "A list of base URLs which are exempt from the block_hotlinked_media setting. Include the protocol (e.g. https://example.com)."
editing_grace_period: "For (n) seconds after posting, editing will not create a new version in the post history." editing_grace_period: "For (n) seconds after posting, editing will not create a new version in the post history."
editing_grace_period_max_diff: "Maximum number of character changes allowed in editing grace period, if more changed store another post revision (trust level 0 and 1)" editing_grace_period_max_diff: "Maximum number of character changes allowed in editing grace period, if more changed store another post revision (trust level 0 and 1)"
editing_grace_period_max_diff_high_trust: "Maximum number of character changes allowed in editing grace period, if more changed store another post revision (trust level 2 and up)" editing_grace_period_max_diff_high_trust: "Maximum number of character changes allowed in editing grace period, if more changed store another post revision (trust level 2 and up)"

View File

@ -1326,6 +1326,12 @@ files:
type: list type: list
list_type: simple list_type: simple
default: "" default: ""
block_hotlinked_media:
default: false
block_hotlinked_media_exceptions:
default: ""
type: list
regex: '\A((https?:\/\/.+)(\|https?:\/\/.+[|$])*)?\z'
create_thumbnails: true create_thumbnails: true
clean_up_uploads: true clean_up_uploads: true
clean_orphan_uploads_grace_period_hours: 48 clean_orphan_uploads_grace_period_hours: 48

View File

@ -42,6 +42,7 @@ class CookedPostProcessor
remove_full_quote_on_direct_reply if new_post remove_full_quote_on_direct_reply if new_post
post_process_oneboxes post_process_oneboxes
post_process_images post_process_images
add_blocked_hotlinked_media_placeholders
post_process_quotes post_process_quotes
optimize_urls optimize_urls
remove_user_ids remove_user_ids
@ -120,7 +121,7 @@ class CookedPostProcessor
def extract_images def extract_images
# all images with a src attribute # all images with a src attribute
@doc.css("img[src]") - @doc.css("img[src], img[#{PrettyText::BLOCKED_HOTLINKED_SRC_ATTR}]") -
# minus data images # minus data images
@doc.css("img[src^='data']") - @doc.css("img[src^='data']") -
# minus emojis # minus emojis
@ -371,7 +372,7 @@ class CookedPostProcessor
def process_hotlinked_image(img) def process_hotlinked_image(img)
@hotlinked_map ||= @post.post_hotlinked_media.preload(:upload).map { |r| [r.url, r] }.to_h @hotlinked_map ||= @post.post_hotlinked_media.preload(:upload).map { |r| [r.url, r] }.to_h
normalized_src = PostHotlinkedMedia.normalize_src(img["src"]) normalized_src = PostHotlinkedMedia.normalize_src(img["src"] || img[PrettyText::BLOCKED_HOTLINKED_SRC_ATTR])
info = @hotlinked_map[normalized_src] info = @hotlinked_map[normalized_src]
still_an_image = true still_an_image = true
@ -384,11 +385,37 @@ class CookedPostProcessor
still_an_image = false still_an_image = false
elsif info&.downloaded? && upload = info&.upload elsif info&.downloaded? && upload = info&.upload
img["src"] = UrlHelper.cook_url(upload.url, secure: @with_secure_media) img["src"] = UrlHelper.cook_url(upload.url, secure: @with_secure_media)
img.delete(PrettyText::BLOCKED_HOTLINKED_SRC_ATTR)
end end
still_an_image still_an_image
end end
def add_blocked_hotlinked_media_placeholders
@doc.css([
"[#{PrettyText::BLOCKED_HOTLINKED_SRC_ATTR}]",
"[#{PrettyText::BLOCKED_HOTLINKED_SRCSET_ATTR}]",
].join(',')).each do |el|
src = el[PrettyText::BLOCKED_HOTLINKED_SRC_ATTR] ||
el[PrettyText::BLOCKED_HOTLINKED_SRCSET_ATTR]&.split(',')&.first&.split(' ')&.first
if el.name == "img"
add_blocked_hotlinked_image_placeholder!(el)
next
end
if ["video", "audio"].include?(el.parent.name)
el = el.parent
end
if el.parent.classes.include?("video-container")
el = el.parent
end
add_blocked_hotlinked_media_placeholder!(el, src)
end
end
def is_svg?(img) def is_svg?(img)
path = path =
begin begin

View File

@ -40,6 +40,8 @@ module CookedProcessorMixin
end end
end end
PrettyText.sanitize_hotlinked_media(@doc)
oneboxed_images.each do |img| oneboxed_images.each do |img|
next if img["src"].blank? next if img["src"].blank?
@ -251,6 +253,31 @@ module CookedProcessorMixin
true true
end end
def add_blocked_hotlinked_image_placeholder!(el)
el.name = "a"
el.set_attribute("href", el[PrettyText::BLOCKED_HOTLINKED_SRC_ATTR])
el.set_attribute("class", "blocked-hotlinked-placeholder")
el.set_attribute("title", I18n.t("post.image_placeholder.blocked_hotlinked_title"))
el << "<svg class=\"fa d-icon d-icon-link svg-icon\" aria-hidden=\"true\"><use href=\"#link\"></use></svg>"
el << "<span class=\"notice\">#{CGI.escapeHTML(I18n.t("post.image_placeholder.blocked_hotlinked"))}</span>"
true
end
def add_blocked_hotlinked_media_placeholder!(el, src)
placeholder = Nokogiri::XML::Node.new("a", el.document)
placeholder.name = "a"
placeholder.set_attribute("href", src)
placeholder.set_attribute("class", "blocked-hotlinked-placeholder")
placeholder.set_attribute("title", I18n.t("post.media_placeholder.blocked_hotlinked_title"))
placeholder << "<svg class=\"fa d-icon d-icon-link svg-icon\" aria-hidden=\"true\"><use href=\"#link\"></use></svg>"
placeholder << "<span class=\"notice\">#{CGI.escapeHTML(I18n.t("post.media_placeholder.blocked_hotlinked"))}</span>"
el.replace(placeholder)
true
end
def oneboxed_images def oneboxed_images
@doc.css(".onebox-body img, .onebox img, img.onebox") @doc.css(".onebox-body img, .onebox img, img.onebox")
end end

View File

@ -18,6 +18,9 @@ module PrettyText
].freeze ].freeze
DANGEROUS_BIDI_REGEXP = Regexp.new(DANGEROUS_BIDI_CHARACTERS.join("|")).freeze DANGEROUS_BIDI_REGEXP = Regexp.new(DANGEROUS_BIDI_CHARACTERS.join("|")).freeze
BLOCKED_HOTLINKED_SRC_ATTR = "data-blocked-hotlinked-src"
BLOCKED_HOTLINKED_SRCSET_ATTR = "data-blocked-hotlinked-srcset"
@mutex = Mutex.new @mutex = Mutex.new
@ctx_init = Mutex.new @ctx_init = Mutex.new
@ -318,6 +321,7 @@ module PrettyText
add_nofollow = !options[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content add_nofollow = !options[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content
add_rel_attributes_to_user_content(doc, add_nofollow) add_rel_attributes_to_user_content(doc, add_nofollow)
strip_hidden_unicode_bidirectional_characters(doc) strip_hidden_unicode_bidirectional_characters(doc)
sanitize_hotlinked_media(doc)
if SiteSetting.enable_mentions if SiteSetting.enable_mentions
add_mentions(doc, user_id: opts[:user_id]) add_mentions(doc, user_id: opts[:user_id])
@ -348,6 +352,25 @@ module PrettyText
end end
end end
def self.sanitize_hotlinked_media(doc)
return if !SiteSetting.block_hotlinked_media
allowed_pattern = allowed_src_pattern
doc.css("img[src], source[src], source[srcset], track[src]").each do |el|
if el["src"] && !el["src"].match?(allowed_pattern)
el[PrettyText::BLOCKED_HOTLINKED_SRC_ATTR] = el.delete("src")
end
if el["srcset"]
srcs = el["srcset"].split(',').map { |e| e.split(' ', 2)[0].presence }
if srcs.any? { |src| !src.match?(allowed_pattern) }
el[PrettyText::BLOCKED_HOTLINKED_SRCSET_ATTR] = el.delete("srcset")
end
end
end
end
def self.add_rel_attributes_to_user_content(doc, add_nofollow) def self.add_rel_attributes_to_user_content(doc, add_nofollow)
allowlist = [] allowlist = []
@ -673,4 +696,25 @@ module PrettyText
mentions mentions
end end
def self.allowed_src_pattern
allowed_src_prefixes = [
Discourse.base_path,
Discourse.base_url,
GlobalSetting.s3_cdn_url,
GlobalSetting.cdn_url,
SiteSetting.external_emoji_url.presence,
*SiteSetting.block_hotlinked_media_exceptions.split("|")
]
patterns = allowed_src_prefixes.compact.map do |url|
pattern = Regexp.escape(url)
# If 'https://example.com' is allowed, ensure 'https://example.com.blah.com' is not
pattern += '(?:/|\z)' if !pattern.ends_with?("\/")
pattern
end
/\A(data:|#{patterns.join("|")})/
end
end end

View File

@ -0,0 +1,156 @@
# frozen_string_literal: true
describe "hotlinked media blocking" do
let(:hotlinked_url) { "http://example.com/images/2/2e/Longcat1.png" }
let(:onebox_url) { "http://example.com/onebox" }
let(:png) { Base64.decode64("R0lGODlhAQABALMAAAAAAIAAAACAAICAAAAAgIAAgACAgMDAwICAgP8AAAD/AP//AAAA//8A/wD//wBiZCH5BAEAAA8ALAAAAAABAAEAAAQC8EUAOw==") }
before do
SiteSetting.download_remote_images_to_local = false
stub_request(:get, hotlinked_url).to_return(body: png, headers: { "Content-Type" => "image/png" })
stub_image_size
end
it "normally allows hotlinked images" do
post = Fabricate(:post, raw: "<img src='#{hotlinked_url}'>")
expect(post.cooked).to have_tag("img", with: { "src" => hotlinked_url })
end
context "with hotlinked media blocked, before post-processing" do
before do
SiteSetting.block_hotlinked_media = true
Oneboxer.stubs(:cached_onebox).returns("<aside class='onebox'><img src='#{hotlinked_url}'></aside>")
end
it "blocks hotlinked images" do
post = Fabricate(:post, raw: "<img src='#{hotlinked_url}'>")
expect(post.cooked).not_to have_tag("img[src]")
expect(post.cooked).to have_tag("img", with: { PrettyText::BLOCKED_HOTLINKED_SRC_ATTR => hotlinked_url })
end
it "blocks hotlinked videos with src" do
post = Fabricate(:post, raw: "![alt text|video](#{hotlinked_url})")
expect(post.cooked).not_to have_tag("video source[src]")
expect(post.cooked).to have_tag("video source", with: { PrettyText::BLOCKED_HOTLINKED_SRC_ATTR => hotlinked_url })
end
it "blocks hotlinked videos with srcset" do
srcset = "#{hotlinked_url} 1x,https://example.com 2x"
post = Fabricate(:post, raw: "<video><source srcset='#{srcset}'></video>")
expect(post.cooked).not_to have_tag("video source[srcset]")
expect(post.cooked).to have_tag("video source", with: { PrettyText::BLOCKED_HOTLINKED_SRCSET_ATTR => srcset })
end
it "blocks hotlinked audio" do
post = Fabricate(:post, raw: "![alt text|audio](#{hotlinked_url})")
expect(post.cooked).not_to have_tag("audio source[src]")
expect(post.cooked).to have_tag("audio source", with: { PrettyText::BLOCKED_HOTLINKED_SRC_ATTR => hotlinked_url })
end
it "blocks hotlinked onebox content when cached (post_analyzer)" do
post = Fabricate(:post, raw: "#{onebox_url}")
expect(post.cooked).not_to have_tag("img[src]")
expect(post.cooked).to have_tag("img", with: { PrettyText::BLOCKED_HOTLINKED_SRC_ATTR => hotlinked_url })
end
it "allows relative URLs" do
src = "/assets/images/blah.png"
post = Fabricate(:post, raw: "![](#{src})")
expect(post.cooked).to have_tag("img", with: { src: src })
end
it "allows data URIs" do
src = "data:image/png;base64,abcde"
post = Fabricate(:post, raw: "![](#{src})")
expect(post.cooked).to have_tag("img", with: { src: src })
end
it "allows an exception" do
post = Fabricate :post, raw: <<~RAW
![](https://example.com)
![](https://example.com/myimage.png)
![](https://example.com.malicious.com/myimage.png)
![](https://malicious.invalid/https://example.com)
RAW
expect(post.cooked).not_to have_tag("img[src]")
SiteSetting.block_hotlinked_media_exceptions = "https://example.com"
post.rebake!
post.reload
expect(post.cooked).to have_tag("img", with: { "src" => "https://example.com" })
expect(post.cooked).to have_tag("img", with: { "src" => "https://example.com/myimage.png" })
expect(post.cooked).to have_tag("img", with: { PrettyText::BLOCKED_HOTLINKED_SRC_ATTR => "https://example.com.malicious.com/myimage.png" })
expect(post.cooked).to have_tag("img", with: { PrettyText::BLOCKED_HOTLINKED_SRC_ATTR => "https://malicious.invalid/https://example.com" })
end
it "allows multiple exceptions" do
post = Fabricate :post, raw: <<~RAW
![](https://example.com)
![](https://exampleb.com/myimage.png)
RAW
expect(post.cooked).not_to have_tag("img[src]")
SiteSetting.block_hotlinked_media_exceptions = "https://example.com|https://exampleb.com"
post.rebake!
post.reload
expect(post.cooked).to have_tag("img", with: { "src" => "https://example.com" })
expect(post.cooked).to have_tag("img", with: { "src" => "https://exampleb.com/myimage.png" })
end
end
context "with hotlinked media blocked, with post-processing" do
before do
SiteSetting.block_hotlinked_media = true
Jobs.run_immediately!
Oneboxer.stubs(:onebox).returns("<aside class='onebox'><img src='#{hotlinked_url}'></aside>")
end
it "renders placeholders for all media types (CookedPostProcessor)" do
post = Fabricate :post, raw: <<~RAW
<img src='#{hotlinked_url}'>
![alt text|video](#{hotlinked_url})
![alt text|audio](#{hotlinked_url})
#{onebox_url}
RAW
post.rebake!
post.reload
expect(post.cooked).not_to have_tag("img")
expect(post.cooked).not_to have_tag("video")
expect(post.cooked).not_to have_tag("audio")
expect(post.cooked).to have_tag(
"a.blocked-hotlinked-placeholder[href^='http://example.com'][rel='noopener nofollow ugc']",
count: 4
)
end
end
context "with hotlinked media blocked, and download_remote_images_to_local enabled" do
before do
SiteSetting.block_hotlinked_media = true
SiteSetting.download_remote_images_to_local = true
Oneboxer.stubs(:onebox).returns("<aside class='onebox'><img src='#{hotlinked_url}'></aside>")
Jobs.run_immediately!
end
it "can still download remote images after they're blocked" do
post = Fabricate :post, raw: <<~RAW
<img src='#{hotlinked_url}'>
#{onebox_url}
RAW
post.rebake!
post.reload
expect(post.uploads.count).to eq(1)
upload = post.uploads.first
expect(post.cooked).to have_tag("img", count: 2)
expect(post.cooked).to have_tag("img[src$=\"#{upload.url}\"]", count: 2)
end
end
end