FIX: `Jobs::PullHotlinkedImages#is_valid_image_src` returns true for a generic string.

This commit is contained in:
Guo Xiang Tan 2017-07-06 17:55:28 +09:00
parent 405672f8e6
commit e92acb4c40
2 changed files with 95 additions and 72 deletions

View File

@ -28,7 +28,7 @@ module Jobs
extract_images_from(post.cooked).each do |image| extract_images_from(post.cooked).each do |image|
src = original_src = image['src'] src = original_src = image['src']
src = "http:" + src if src.start_with?("//") src = "http:#{src}" if src.start_with?("//")
if is_valid_image_url(src) if is_valid_image_url(src)
hotlinked = nil hotlinked = nil
@ -113,17 +113,22 @@ module Jobs
return false if Discourse.store.has_been_uploaded?(src) return false if Discourse.store.has_been_uploaded?(src)
# we don't want to pull relative images # we don't want to pull relative images
return false if src =~ /\A\/[^\/]/i return false if src =~ /\A\/[^\/]/i
# parse the src # parse the src
begin begin
uri = URI.parse(src) uri = URI.parse(src)
rescue URI::InvalidURIError rescue URI::InvalidURIError
return false return false
end end
hostname = uri.hostname
return false unless hostname
# we don't want to pull images hosted on the CDN (if we use one) # we don't want to pull images hosted on the CDN (if we use one)
return false if Discourse.asset_host.present? && URI.parse(Discourse.asset_host).hostname == uri.hostname return false if Discourse.asset_host.present? && URI.parse(Discourse.asset_host).hostname == hostname
return false if SiteSetting.s3_cdn_url.present? && URI.parse(SiteSetting.s3_cdn_url).hostname == uri.hostname return false if SiteSetting.s3_cdn_url.present? && URI.parse(SiteSetting.s3_cdn_url).hostname == hostname
# we don't want to pull images hosted on the main domain # we don't want to pull images hosted on the main domain
return false if URI.parse(Discourse.base_url_no_prefix).hostname == uri.hostname return false if URI.parse(Discourse.base_url_no_prefix).hostname == hostname
# check the domains blacklist # check the domains blacklist
SiteSetting.should_download_images?(src) SiteSetting.should_download_images?(src)
end end

View File

@ -3,6 +3,7 @@ require 'jobs/regular/pull_hotlinked_images'
describe Jobs::PullHotlinkedImages do describe Jobs::PullHotlinkedImages do
describe '#execute' do
let(:image_url) { "http://wiki.mozilla.org/images/2/2e/Longcat1.png" } let(:image_url) { "http://wiki.mozilla.org/images/2/2e/Longcat1.png" }
let(:png) { Base64.decode64("R0lGODlhAQABALMAAAAAAIAAAACAAICAAAAAgIAAgACAgMDAwICAgP8AAAD/AP//AAAA//8A/wD//wBiZCH5BAEAAA8ALAAAAAABAAEAAAQC8EUAOw==") } let(:png) { Base64.decode64("R0lGODlhAQABALMAAAAAAIAAAACAAICAAAAAgIAAgACAgMDAwICAgP8AAAD/AP//AAAA//8A/wD//wBiZCH5BAEAAA8ALAAAAAABAAEAAAQC8EUAOw==") }
@ -44,7 +45,6 @@ describe Jobs::PullHotlinkedImages do
end end
describe 'onebox' do describe 'onebox' do
let(:media) { "File:Brisbane_May_2013201.jpg" } let(:media) { "File:Brisbane_May_2013201.jpg" }
let(:url) { "https://commons.wikimedia.org/wiki/#{media}" } let(:url) { "https://commons.wikimedia.org/wiki/#{media}" }
let(:api_url) { "https://en.wikipedia.org/w/api.php?action=query&titles=#{media}&prop=imageinfo&iilimit=50&iiprop=timestamp|user|url&iiurlwidth=500&format=json" } let(:api_url) { "https://en.wikipedia.org/w/api.php?action=query&titles=#{media}&prop=imageinfo&iilimit=50&iiprop=timestamp|user|url&iiurlwidth=500&format=json" }
@ -80,7 +80,25 @@ describe Jobs::PullHotlinkedImages do
expect(post.cooked).to match(/<img src=.*\/uploads/) expect(post.cooked).to match(/<img src=.*\/uploads/)
end end
end
end
describe '#is_valid_image_url' do
subject { described_class.new }
describe 'when url is invalid' do
it 'should return false' do
expect(subject.is_valid_image_url("null")).to eq(false)
expect(subject.is_valid_image_url("meta.discourse.org")).to eq(false)
end
end
describe 'when url is valid' do
it 'should return true' do
expect(subject.is_valid_image_url("http://meta.discourse.org")).to eq(true)
expect(subject.is_valid_image_url("//meta.discourse.org")).to eq(true)
end
end
end end
end end