FIX: `Jobs::PullHotlinkedImages#is_valid_image_src` returns true for a generic string.

This commit is contained in:
Guo Xiang Tan 2017-07-06 17:55:28 +09:00
parent 405672f8e6
commit e92acb4c40
2 changed files with 95 additions and 72 deletions

View File

@ -28,7 +28,7 @@ module Jobs
extract_images_from(post.cooked).each do |image| extract_images_from(post.cooked).each do |image|
src = original_src = image['src'] src = original_src = image['src']
src = "http:" + src if src.start_with?("//") src = "http:#{src}" if src.start_with?("//")
if is_valid_image_url(src) if is_valid_image_url(src)
hotlinked = nil hotlinked = nil
@ -113,17 +113,22 @@ module Jobs
return false if Discourse.store.has_been_uploaded?(src) return false if Discourse.store.has_been_uploaded?(src)
# we don't want to pull relative images # we don't want to pull relative images
return false if src =~ /\A\/[^\/]/i return false if src =~ /\A\/[^\/]/i
# parse the src # parse the src
begin begin
uri = URI.parse(src) uri = URI.parse(src)
rescue URI::InvalidURIError rescue URI::InvalidURIError
return false return false
end end
hostname = uri.hostname
return false unless hostname
# we don't want to pull images hosted on the CDN (if we use one) # we don't want to pull images hosted on the CDN (if we use one)
return false if Discourse.asset_host.present? && URI.parse(Discourse.asset_host).hostname == uri.hostname return false if Discourse.asset_host.present? && URI.parse(Discourse.asset_host).hostname == hostname
return false if SiteSetting.s3_cdn_url.present? && URI.parse(SiteSetting.s3_cdn_url).hostname == uri.hostname return false if SiteSetting.s3_cdn_url.present? && URI.parse(SiteSetting.s3_cdn_url).hostname == hostname
# we don't want to pull images hosted on the main domain # we don't want to pull images hosted on the main domain
return false if URI.parse(Discourse.base_url_no_prefix).hostname == uri.hostname return false if URI.parse(Discourse.base_url_no_prefix).hostname == hostname
# check the domains blacklist # check the domains blacklist
SiteSetting.should_download_images?(src) SiteSetting.should_download_images?(src)
end end

View File

@ -3,84 +3,102 @@ require 'jobs/regular/pull_hotlinked_images'
describe Jobs::PullHotlinkedImages do describe Jobs::PullHotlinkedImages do
let(:image_url) { "http://wiki.mozilla.org/images/2/2e/Longcat1.png" } describe '#execute' do
let(:png) { Base64.decode64("R0lGODlhAQABALMAAAAAAIAAAACAAICAAAAAgIAAgACAgMDAwICAgP8AAAD/AP//AAAA//8A/wD//wBiZCH5BAEAAA8ALAAAAAABAAEAAAQC8EUAOw==") } let(:image_url) { "http://wiki.mozilla.org/images/2/2e/Longcat1.png" }
let(:png) { Base64.decode64("R0lGODlhAQABALMAAAAAAIAAAACAAICAAAAAgIAAgACAgMDAwICAgP8AAAD/AP//AAAA//8A/wD//wBiZCH5BAEAAA8ALAAAAAABAAEAAAQC8EUAOw==") }
before do
stub_request(:get, image_url).to_return(body: png, headers: { "Content-Type" => "image/png" })
stub_request(:head, image_url)
SiteSetting.download_remote_images_to_local = true
FastImage.expects(:size).returns([100, 100]).at_least_once
end
it 'replaces images' do
post = Fabricate(:post, raw: "<img src='http://wiki.mozilla.org/images/2/2e/Longcat1.png'>")
Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
post.reload
expect(post.raw).to match(/^<img src='\/uploads/)
end
it 'replaces images without protocol' do
post = Fabricate(:post, raw: "<img src='//wiki.mozilla.org/images/2/2e/Longcat1.png'>")
Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
post.reload
expect(post.raw).to match(/^<img src='\/uploads/)
end
it 'replaces images without extension' do
extensionless_url = "http://wiki.mozilla.org/images/2/2e/Longcat1"
stub_request(:get, extensionless_url).to_return(body: png, headers: { "Content-Type" => "image/png" })
stub_request(:head, extensionless_url)
post = Fabricate(:post, raw: "<img src='#{extensionless_url}'>")
Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
post.reload
expect(post.raw).to match(/^<img src='\/uploads/)
end
describe 'onebox' do
let(:media) { "File:Brisbane_May_2013201.jpg" }
let(:url) { "https://commons.wikimedia.org/wiki/#{media}" }
let(:api_url) { "https://en.wikipedia.org/w/api.php?action=query&titles=#{media}&prop=imageinfo&iilimit=50&iiprop=timestamp|user|url&iiurlwidth=500&format=json" }
before do before do
SiteSetting.queue_jobs = true stub_request(:get, image_url).to_return(body: png, headers: { "Content-Type" => "image/png" })
stub_request(:get, url).to_return(body: '') stub_request(:head, image_url)
stub_request(:head, url) SiteSetting.download_remote_images_to_local = true
stub_request(:get, api_url).to_return(body: "{ FastImage.expects(:size).returns([100, 100]).at_least_once
\"query\": {
\"pages\": {
\"-1\": {
\"title\": \"#{media}\",
\"imageinfo\": [{
\"thumburl\": \"#{image_url}\",
\"url\": \"#{image_url}\",
\"descriptionurl\": \"#{url}\"
}]
}
}
}
}")
stub_request(:head, api_url)
end end
it 'replaces image src' do it 'replaces images' do
post = Fabricate(:post, raw: "#{url}") post = Fabricate(:post, raw: "<img src='http://wiki.mozilla.org/images/2/2e/Longcat1.png'>")
Jobs::ProcessPost.new.execute(post_id: post.id)
Jobs::PullHotlinkedImages.new.execute(post_id: post.id) Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
Jobs::ProcessPost.new.execute(post_id: post.id)
post.reload post.reload
expect(post.cooked).to match(/<img src=.*\/uploads/) expect(post.raw).to match(/^<img src='\/uploads/)
end end
it 'replaces images without protocol' do
post = Fabricate(:post, raw: "<img src='//wiki.mozilla.org/images/2/2e/Longcat1.png'>")
Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
post.reload
expect(post.raw).to match(/^<img src='\/uploads/)
end
it 'replaces images without extension' do
extensionless_url = "http://wiki.mozilla.org/images/2/2e/Longcat1"
stub_request(:get, extensionless_url).to_return(body: png, headers: { "Content-Type" => "image/png" })
stub_request(:head, extensionless_url)
post = Fabricate(:post, raw: "<img src='#{extensionless_url}'>")
Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
post.reload
expect(post.raw).to match(/^<img src='\/uploads/)
end
describe 'onebox' do
let(:media) { "File:Brisbane_May_2013201.jpg" }
let(:url) { "https://commons.wikimedia.org/wiki/#{media}" }
let(:api_url) { "https://en.wikipedia.org/w/api.php?action=query&titles=#{media}&prop=imageinfo&iilimit=50&iiprop=timestamp|user|url&iiurlwidth=500&format=json" }
before do
SiteSetting.queue_jobs = true
stub_request(:get, url).to_return(body: '')
stub_request(:head, url)
stub_request(:get, api_url).to_return(body: "{
\"query\": {
\"pages\": {
\"-1\": {
\"title\": \"#{media}\",
\"imageinfo\": [{
\"thumburl\": \"#{image_url}\",
\"url\": \"#{image_url}\",
\"descriptionurl\": \"#{url}\"
}]
}
}
}
}")
stub_request(:head, api_url)
end
it 'replaces image src' do
post = Fabricate(:post, raw: "#{url}")
Jobs::ProcessPost.new.execute(post_id: post.id)
Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
Jobs::ProcessPost.new.execute(post_id: post.id)
post.reload
expect(post.cooked).to match(/<img src=.*\/uploads/)
end
end
end
describe '#is_valid_image_url' do
subject { described_class.new }
describe 'when url is invalid' do
it 'should return false' do
expect(subject.is_valid_image_url("null")).to eq(false)
expect(subject.is_valid_image_url("meta.discourse.org")).to eq(false)
end
end
describe 'when url is valid' do
it 'should return true' do
expect(subject.is_valid_image_url("http://meta.discourse.org")).to eq(true)
expect(subject.is_valid_image_url("//meta.discourse.org")).to eq(true)
end
end
end end
end end