Merge pull request #2260 from riking/download_blacklist

Add site setting for domains to never download images from
This commit is contained in:
Régis Hanol 2014-04-24 11:13:03 +02:00
commit 3036490d05
4 changed files with 27 additions and 5 deletions

View File

@ -21,6 +21,7 @@ module Jobs
return unless post.present? return unless post.present?
raw = post.raw.dup raw = post.raw.dup
start_raw = raw.dup
downloaded_urls = {} downloaded_urls = {}
extract_images_from(post.cooked).each do |image| extract_images_from(post.cooked).each do |image|
@ -28,9 +29,10 @@ module Jobs
src = "http:" + src if src.start_with?("//") src = "http:" + src if src.start_with?("//")
if is_valid_image_url(src) if is_valid_image_url(src)
hotlinked = nil
begin begin
# have we already downloaded that file? # have we already downloaded that file?
if !downloaded_urls.include?(src) unless downloaded_urls.include?(src)
begin begin
hotlinked = FileHelper.download(src, @max_size, "discourse-hotlinked") hotlinked = FileHelper.download(src, @max_size, "discourse-hotlinked")
rescue Discourse::InvalidParameters rescue Discourse::InvalidParameters
@ -71,10 +73,15 @@ module Jobs
end end
# TODO: make sure the post hasn´t changed while we were downloading remote images post.reload
if raw != post.raw if start_raw != post.raw
# post was edited - start over (after 10 minutes)
backoff = args.fetch(:backoff, 1) + 1
delay = SiteSetting.ninja_edit_window * args[:backoff]
Jobs.enqueue_in(delay.seconds.to_i, :pull_hotlinked_images, args.merge!(backoff: backoff))
elsif raw != post.raw
options = { edit_reason: I18n.t("upload.edit_reason") } options = { edit_reason: I18n.t("upload.edit_reason") }
options[:bypass_bump] = true if args[:bypass_bump] == true options[:bypass_bump] = !!args[:bypass_bump]
post.revise(Discourse.system_user, raw, options) post.revise(Discourse.system_user, raw, options)
end end
end end
@ -87,7 +94,8 @@ module Jobs
def is_valid_image_url(src) def is_valid_image_url(src)
src.present? && src.present? &&
!Discourse.store.has_been_uploaded?(src) && !Discourse.store.has_been_uploaded?(src) &&
!src.start_with?(Discourse.asset_host || Discourse.base_url_no_prefix) !src.start_with?(Discourse.asset_host || Discourse.base_url_no_prefix) &&
SiteSetting.should_download_images?(src)
end end
end end

View File

@ -72,6 +72,16 @@ class SiteSetting < ActiveRecord::Base
.first .first
end end
def self.should_download_images?(src)
setting = disabled_image_download_domains
return true unless setting.present?
host = URI.parse(src).host
return !(setting.split('|').include?(host))
rescue URI::InvalidURIError
return true
end
def self.scheme def self.scheme
use_https? ? "https" : "http" use_https? ? "https" : "http"
end end

View File

@ -605,6 +605,7 @@ en:
crawl_images: "Enable retrieving images from third party sources to insert width and height dimensions" crawl_images: "Enable retrieving images from third party sources to insert width and height dimensions"
download_remote_images_to_local: "Download a copy of remote images hotlinked in posts" download_remote_images_to_local: "Download a copy of remote images hotlinked in posts"
download_remote_images_threshold: "Amount of minimum available disk space required to download remote images locally (in percent)" download_remote_images_threshold: "Amount of minimum available disk space required to download remote images locally (in percent)"
disabled_image_download_domains: "A pipe-delimited list of domains from which linked images will never be downloaded."
ninja_edit_window: "Number of seconds after posting where edits do not create a new version" ninja_edit_window: "Number of seconds after posting where edits do not create a new version"
post_edit_time_limit: "Amount of time in minutes in which posts can be edited and deleted by the author. Set to 0 to allow editing and deleting posts at any time." post_edit_time_limit: "Amount of time in minutes in which posts can be edited and deleted by the author. Set to 0 to allow editing and deleting posts at any time."
edit_history_visible_to_public: "Allow everyone to see previous versions of an edited post. When disabled, only staff members can view edit history." edit_history_visible_to_public: "Allow everyone to see previous versions of an edited post. When disabled, only staff members can view edit history."

View File

@ -302,6 +302,9 @@ files:
test: false test: false
default: true default: true
download_remote_images_threshold: 20 download_remote_images_threshold: 20
disabled_image_download_domains:
list: true
default: ''
create_thumbnails: true create_thumbnails: true
clean_up_uploads: false clean_up_uploads: false
clean_orphan_uploads_grace_period_hours: 1 clean_orphan_uploads_grace_period_hours: 1