Merge pull request #4845 from vinothkannans/download-onebox-images

FEATURE: pull onebox images
This commit is contained in:
Guo Xiang Tan 2017-06-07 09:55:04 +09:00 committed by GitHub
commit da736f70a4
3 changed files with 64 additions and 7 deletions

View File

@ -94,12 +94,14 @@ module Jobs
# we never want that job to bump the topic
options = { bypass_bump: true }
post.revise(Discourse.system_user, changes, options)
elsif downloaded_urls.present?
post.trigger_post_process(true)
end
end
def extract_images_from(html)
doc = Nokogiri::HTML::fragment(html)
doc.css("img[src]") - doc.css(".onebox-result img") - doc.css("img.avatar")
doc.css("img[src]") - doc.css("img.avatar")
end
def is_valid_image_url(src)

View File

@ -51,7 +51,7 @@ class CookedPostProcessor
end
def keep_reverse_index_up_to_date
upload_ids = Set.new
upload_ids = Array.new
@doc.css("a/@href", "img/@src").each do |media|
if upload = Upload.get_from_url(media.value)
@ -59,6 +59,8 @@ class CookedPostProcessor
end
end
upload_ids.concat(oneboxed_image_uploads.pluck(:id))
values = upload_ids.map{ |u| "(#{@post.id},#{u})" }.join(",")
PostUpload.transaction do
PostUpload.delete_all(post_id: @post.id)
@ -105,7 +107,12 @@ class CookedPostProcessor
end
def oneboxed_images
@doc.css(".onebox-result img, .onebox img")
@doc.css(".onebox-body img, .onebox img")
end
def oneboxed_image_uploads
urls = oneboxed_images.map { |img| img["src"] }
Upload.where(origin: urls)
end
def limit_size!(img)
@ -301,8 +308,14 @@ class CookedPostProcessor
Oneboxer.onebox(url, args)
end
# make sure we grab dimensions for oneboxed images
oneboxed_images.each { |img| limit_size!(img) }
uploads = oneboxed_image_uploads.select(:url, :origin)
oneboxed_images.each do |img|
upload = uploads.detect { |u| u.origin == img["src"] }
next unless upload.present?
img["src"] = upload.url
# make sure we grab dimensions for oneboxed images
limit_size!(img)
end
# respect nofollow admin settings
if !@cooking_options[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content

View File

@ -3,10 +3,12 @@ require 'jobs/regular/pull_hotlinked_images'
describe Jobs::PullHotlinkedImages do
let(:image_url) { "http://wiki.mozilla.org/images/2/2e/Longcat1.png" }
before do
png = Base64.decode64("R0lGODlhAQABALMAAAAAAIAAAACAAICAAAAAgIAAgACAgMDAwICAgP8AAAD/AP//AAAA//8A/wD//wBiZCH5BAEAAA8ALAAAAAABAAEAAAQC8EUAOw==")
stub_request(:get, "http://wiki.mozilla.org/images/2/2e/Longcat1.png").to_return(body: png)
stub_request(:head, "http://wiki.mozilla.org/images/2/2e/Longcat1.png")
stub_request(:get, image_url).to_return(body: png)
stub_request(:head, image_url)
SiteSetting.download_remote_images_to_local = true
FastImage.expects(:size).returns([100, 100]).at_least_once
end
@ -29,4 +31,44 @@ describe Jobs::PullHotlinkedImages do
expect(post.raw).to match(/^<img src='\/uploads/)
end
describe 'onebox' do
let(:media) { "File:Brisbane_May_2013201.jpg" }
let(:url) { "https://commons.wikimedia.org/wiki/#{media}" }
let(:api_url) { "https://en.wikipedia.org/w/api.php?action=query&titles=#{media}&prop=imageinfo&iilimit=50&iiprop=timestamp|user|url&iiurlwidth=500&format=json" }
before do
SiteSetting.queue_jobs = true
stub_request(:get, url).to_return(body: '')
stub_request(:head, url)
stub_request(:get, api_url).to_return(body: "{
\"query\": {
\"pages\": {
\"-1\": {
\"title\": \"#{media}\",
\"imageinfo\": [{
\"thumburl\": \"#{image_url}\",
\"url\": \"#{image_url}\",
\"descriptionurl\": \"#{url}\"
}]
}
}
}
}")
stub_request(:head, api_url)
end
it 'replaces image src' do
post = Fabricate(:post, raw: "#{url}")
Jobs::ProcessPost.new.execute(post_id: post.id)
Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
Jobs::ProcessPost.new.execute(post_id: post.id)
post.reload
expect(post.cooked).to match(/<img src=.*\/uploads/)
end
end
end