From fb0a655e8a373764742376bcac4a55858658eb35 Mon Sep 17 00:00:00 2001 From: Guo Xiang Tan Date: Thu, 6 Jun 2019 15:50:35 +0800 Subject: [PATCH] FEATURE: Update pull hotlinked images to use `Upload#short_url`. --- app/jobs/regular/pull_hotlinked_images.rb | 44 +++++---- spec/jobs/pull_hotlinked_images_spec.rb | 107 ++++++++++++++++++---- 2 files changed, 115 insertions(+), 36 deletions(-) diff --git a/app/jobs/regular/pull_hotlinked_images.rb b/app/jobs/regular/pull_hotlinked_images.rb index 23f69349e96..b68154c5ea5 100644 --- a/app/jobs/regular/pull_hotlinked_images.rb +++ b/app/jobs/regular/pull_hotlinked_images.rb @@ -92,36 +92,37 @@ module Jobs url = downloaded_urls[src] escaped_src = Regexp.escape(original_src) + replace_raw = ->(match, match_src, replacement, _index) { + if src.include?(match_src) + raw = raw.gsub( + match, + replacement.sub(InlineUploads::PLACEHOLDER, upload.short_url) + ) + end + } + # there are 6 ways to insert an image in a post # HTML tag - - raw.gsub!(/src=["']#{escaped_src}["']/i, "src='#{url}'") - - if (original_path = Upload.extract_url(original_src)&.to_s) && - Upload.extract_url(url)&.to_s - - raw.gsub!( - /src=["']\S*#{Regexp.escape(original_path)}["']/i, - "src='#{url}'" - ) - end + InlineUploads.match_img(raw, external_src: true, &replace_raw) # BBCode tag - [img]http://...[/img] - raw.gsub!(/\[img\]#{escaped_src}\[\/img\]/i, "[img]#{url}[/img]") + InlineUploads.match_bbcode_img(raw, &replace_raw) + # Markdown linked image - [![alt](http://...)](http://...) - raw.gsub!(/\[!\[([^\]]*)\]\(#{escaped_src}\)\]/) { "[#{$1}]" } # Markdown inline - ![alt](http://...) - raw.gsub!(/!\[([^\]]*)\]\(#{escaped_src}\)/) { "![#{$1}](#{url})" } # Markdown inline - ![](http://... "image title") - raw.gsub!(/!\[\]\(#{escaped_src} "([^\]]*)"\)/) { "![](#{url})" } # Markdown inline - ![alt](http://... "image title") - raw.gsub!(/!\[([^\]]*)\]\(#{escaped_src} "([^\]]*)"\)/) { "![](#{url})" } - # Markdown reference - [x]: http:// - raw.gsub!(/\[([^\]]+)\]:\s?#{escaped_src}/) { "[#{$1}]: #{url}" } + InlineUploads.match_md_inline_img(raw, external_src: true, &replace_raw) + # Direct link - raw.gsub!(/^#{escaped_src}(\s?)$/) { "#{$1}" } + raw.gsub!(/^#{escaped_src}(\s?)$/) { "![](#{upload.short_url})#{$1}" } end rescue => e - log(:error, "Failed to pull hotlinked image (#{src}) post: #{post_id}\n" + e.message + "\n" + e.backtrace.join("\n")) + if Rails.env.test? + raise e + else + log(:error, "Failed to pull hotlinked image (#{src}) post: #{post_id}\n" + e.message + "\n" + e.backtrace.join("\n")) + end end end end @@ -147,7 +148,10 @@ module Jobs def extract_images_from(html) doc = Nokogiri::HTML::fragment(html) - doc.css("img[src], a.lightbox[href]") - doc.css("img.avatar") - doc.css(".lightbox img[src]") + + doc.css("img[src], a.lightbox[href], a.onebox[href]") - + doc.css("img.avatar") - + doc.css(".lightbox img[src]") end def should_download_image?(src) diff --git a/spec/jobs/pull_hotlinked_images_spec.rb b/spec/jobs/pull_hotlinked_images_spec.rb index 446adea9737..25b086f4196 100644 --- a/spec/jobs/pull_hotlinked_images_spec.rb +++ b/spec/jobs/pull_hotlinked_images_spec.rb @@ -49,20 +49,22 @@ describe Jobs::PullHotlinkedImages do it 'replaces images' do post = Fabricate(:post, raw: "") - Jobs::PullHotlinkedImages.new.execute(post_id: post.id) - post.reload + expect do + Jobs::PullHotlinkedImages.new.execute(post_id: post.id) + end.to change { Upload.count }.by(1) - expect(post.raw).to match(/^") + post = Fabricate(:post, raw: "test") - Jobs::PullHotlinkedImages.new.execute(post_id: post.id) - post.reload + expect do + Jobs::PullHotlinkedImages.new.execute(post_id: post.id) + end.to change { Upload.count }.by(1) - expect(post.raw).to match(/^ "image/png" }) post = Fabricate(:post, raw: "") - Jobs::PullHotlinkedImages.new.execute(post_id: post.id) - post.reload + expect do + Jobs::PullHotlinkedImages.new.execute(post_id: post.id) + end.to change { Upload.count }.by(1) - expect(post.raw).to match(/^") + expect(post.raw).to eq("![](#{upload.short_url})") expect(post.uploads).to contain_exactly(upload) end + it 'replaces direct links' do + post = Fabricate(:post, raw: <<~MD) + #{image_url} + #{image_url} + MD + + expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) } + .to change { Upload.count }.by(1) + + post.reload + + expect(post.raw).to eq(<<~MD.chomp) + ![](#{Upload.last.short_url}) + ![](#{Upload.last.short_url}) + MD + end + + it 'replaces markdown image' do + post = Fabricate(:post, raw: <<~MD) + [![some test](#{image_url})](https://somelink.com) + ![some test](#{image_url}) + ![](#{image_url}) + ![abcde](#{image_url} 'some test') + ![](#{image_url} 'some test') + MD + + expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) } + .to change { Upload.count }.by(1) + + post.reload + + expect(post.raw).to eq(<<~MD.chomp) + [![some test](#{Upload.last.short_url})](https://somelink.com) + ![some test](#{Upload.last.short_url}) + ![](#{Upload.last.short_url}) + ![abcde](#{Upload.last.short_url} 'some test') + ![](#{Upload.last.short_url} 'some test') + MD + end + + it 'replaces bbcode images' do + post = Fabricate(:post, raw: <<~MD) + [img] + #{image_url} + [/img] + + [img] + #{image_url} + [/img] + MD + + expect { Jobs::PullHotlinkedImages.new.execute(post_id: post.id) } + .to change { Upload.count }.by(1) + + post.reload + + expect(post.raw).to eq(<<~MD.chomp) + ![](#{Upload.last.short_url}) + + ![](#{Upload.last.short_url}) + MD + end + describe 'onebox' do let(:media) { "File:Brisbane_May_2013201.jpg" } let(:url) { "https://commons.wikimedia.org/wiki/#{media}" } @@ -104,6 +170,7 @@ describe Jobs::PullHotlinkedImages do Jobs.run_later! stub_request(:head, url) stub_request(:get, url).to_return(body: '') + stub_request(:get, api_url).to_return(body: "{ \"query\": { \"pages\": { @@ -139,12 +206,20 @@ describe Jobs::PullHotlinkedImages do BODY - Jobs::ProcessPost.new.execute(post_id: post.id) - Jobs::PullHotlinkedImages.new.execute(post_id: post.id) - Jobs::ProcessPost.new.execute(post_id: post.id) - Jobs::PullHotlinkedImages.new.execute(post_id: post.id) + 2.times do + Jobs::ProcessPost.new.execute(post_id: post.id) + Jobs::PullHotlinkedImages.new.execute(post_id: post.id) + end + post.reload + expect(post.raw).to eq(<<~MD.chomp) + ![](upload://z2QSs1KJWoj51uYhDjb6ifCzxH6.gif) + https://commons.wikimedia.org/wiki/File:Brisbane_May_2013201.jpg + + + MD + expect(post.cooked).to match(/

") + expect(post.raw).to eq("![](#{Upload.last.short_url})") expect(post.uploads.count).to eq(1) end