FIX: `Jobs::PullHotlinkedImages#is_valid_image_src` returns true for a generic string.

2017-07-06 17:55:28 +09:00 · 2017-07-06 17:55:28 +09:00 · e92acb4c40
parent 405672f8e6
commit e92acb4c40
2 changed files with 95 additions and 72 deletions
--- a/app/jobs/regular/pull_hotlinked_images.rb
+++ b/app/jobs/regular/pull_hotlinked_images.rb
@ -28,7 +28,7 @@ module Jobs

      extract_images_from(post.cooked).each do |image|
        src = original_src = image['src']
-        src = "http:" + src if src.start_with?("//")
+        src = "http:#{src}" if src.start_with?("//")

        if is_valid_image_url(src)
          hotlinked = nil
@ -113,17 +113,22 @@ module Jobs
      return false if Discourse.store.has_been_uploaded?(src)
      # we don't want to pull relative images
      return false if src =~ /\A\/[^\/]/i
+
      # parse the src
      begin
        uri = URI.parse(src)
      rescue URI::InvalidURIError
        return false
      end
+
+      hostname = uri.hostname
+      return false unless hostname
+
      # we don't want to pull images hosted on the CDN (if we use one)
-      return false if Discourse.asset_host.present? && URI.parse(Discourse.asset_host).hostname == uri.hostname
-      return false if SiteSetting.s3_cdn_url.present? && URI.parse(SiteSetting.s3_cdn_url).hostname == uri.hostname
+      return false if Discourse.asset_host.present? && URI.parse(Discourse.asset_host).hostname == hostname
+      return false if SiteSetting.s3_cdn_url.present? && URI.parse(SiteSetting.s3_cdn_url).hostname == hostname
      # we don't want to pull images hosted on the main domain
-      return false if URI.parse(Discourse.base_url_no_prefix).hostname == uri.hostname
+      return false if URI.parse(Discourse.base_url_no_prefix).hostname == hostname
      # check the domains blacklist
      SiteSetting.should_download_images?(src)
    end
--- a/spec/jobs/pull_hotlinked_images_spec.rb
+++ b/spec/jobs/pull_hotlinked_images_spec.rb
@ -3,84 +3,102 @@ require 'jobs/regular/pull_hotlinked_images'

 describe Jobs::PullHotlinkedImages do

-  let(:image_url) { "http://wiki.mozilla.org/images/2/2e/Longcat1.png" }
-  let(:png) { Base64.decode64("R0lGODlhAQABALMAAAAAAIAAAACAAICAAAAAgIAAgACAgMDAwICAgP8AAAD/AP//AAAA//8A/wD//wBiZCH5BAEAAA8ALAAAAAABAAEAAAQC8EUAOw==") }
-
-  before do
-    stub_request(:get, image_url).to_return(body: png, headers: { "Content-Type" => "image/png" })
-    stub_request(:head, image_url)
-    SiteSetting.download_remote_images_to_local = true
-    FastImage.expects(:size).returns([100, 100]).at_least_once
-  end
-
-  it 'replaces images' do
-    post = Fabricate(:post, raw: "<img src='http://wiki.mozilla.org/images/2/2e/Longcat1.png'>")
-
-    Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
-    post.reload
-
-    expect(post.raw).to match(/^<img src='\/uploads/)
-  end
-
-  it 'replaces images without protocol' do
-    post = Fabricate(:post, raw: "<img src='//wiki.mozilla.org/images/2/2e/Longcat1.png'>")
-
-    Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
-    post.reload
-
-    expect(post.raw).to match(/^<img src='\/uploads/)
-  end
-
-  it 'replaces images without extension' do
-    extensionless_url = "http://wiki.mozilla.org/images/2/2e/Longcat1"
-    stub_request(:get, extensionless_url).to_return(body: png, headers: { "Content-Type" => "image/png" })
-    stub_request(:head, extensionless_url)
-    post = Fabricate(:post, raw: "<img src='#{extensionless_url}'>")
-
-    Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
-    post.reload
-
-    expect(post.raw).to match(/^<img src='\/uploads/)
-  end
-
-  describe 'onebox' do
-
-    let(:media) { "File:Brisbane_May_2013201.jpg" }
-    let(:url) { "https://commons.wikimedia.org/wiki/#{media}" }
-    let(:api_url) { "https://en.wikipedia.org/w/api.php?action=query&titles=#{media}&prop=imageinfo&iilimit=50&iiprop=timestamp|user|url&iiurlwidth=500&format=json" }
+  describe '#execute' do
+    let(:image_url) { "http://wiki.mozilla.org/images/2/2e/Longcat1.png" }
+    let(:png) { Base64.decode64("R0lGODlhAQABALMAAAAAAIAAAACAAICAAAAAgIAAgACAgMDAwICAgP8AAAD/AP//AAAA//8A/wD//wBiZCH5BAEAAA8ALAAAAAABAAEAAAQC8EUAOw==") }

    before do
-      SiteSetting.queue_jobs = true
-      stub_request(:get, url).to_return(body: '')
-      stub_request(:head, url)
-      stub_request(:get, api_url).to_return(body: "{
-        \"query\": {
-          \"pages\": {
-            \"-1\": {
-              \"title\": \"#{media}\",
-              \"imageinfo\": [{
-                \"thumburl\": \"#{image_url}\",
-                \"url\": \"#{image_url}\",
-                \"descriptionurl\": \"#{url}\"
-              }]
-            }
-          }
-        }
-      }")
-      stub_request(:head, api_url)
+      stub_request(:get, image_url).to_return(body: png, headers: { "Content-Type" => "image/png" })
+      stub_request(:head, image_url)
+      SiteSetting.download_remote_images_to_local = true
+      FastImage.expects(:size).returns([100, 100]).at_least_once
    end

-    it 'replaces image src' do
-      post = Fabricate(:post, raw: "#{url}")
+    it 'replaces images' do
+      post = Fabricate(:post, raw: "<img src='http://wiki.mozilla.org/images/2/2e/Longcat1.png'>")

-      Jobs::ProcessPost.new.execute(post_id: post.id)
      Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
-      Jobs::ProcessPost.new.execute(post_id: post.id)
      post.reload

-      expect(post.cooked).to match(/<img src=.*\/uploads/)
+      expect(post.raw).to match(/^<img src='\/uploads/)
    end

+    it 'replaces images without protocol' do
+      post = Fabricate(:post, raw: "<img src='//wiki.mozilla.org/images/2/2e/Longcat1.png'>")
+
+      Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
+      post.reload
+
+      expect(post.raw).to match(/^<img src='\/uploads/)
+    end
+
+    it 'replaces images without extension' do
+      extensionless_url = "http://wiki.mozilla.org/images/2/2e/Longcat1"
+      stub_request(:get, extensionless_url).to_return(body: png, headers: { "Content-Type" => "image/png" })
+      stub_request(:head, extensionless_url)
+      post = Fabricate(:post, raw: "<img src='#{extensionless_url}'>")
+
+      Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
+      post.reload
+
+      expect(post.raw).to match(/^<img src='\/uploads/)
+    end
+
+    describe 'onebox' do
+      let(:media) { "File:Brisbane_May_2013201.jpg" }
+      let(:url) { "https://commons.wikimedia.org/wiki/#{media}" }
+      let(:api_url) { "https://en.wikipedia.org/w/api.php?action=query&titles=#{media}&prop=imageinfo&iilimit=50&iiprop=timestamp|user|url&iiurlwidth=500&format=json" }
+
+      before do
+        SiteSetting.queue_jobs = true
+        stub_request(:get, url).to_return(body: '')
+        stub_request(:head, url)
+        stub_request(:get, api_url).to_return(body: "{
+          \"query\": {
+            \"pages\": {
+              \"-1\": {
+                \"title\": \"#{media}\",
+                \"imageinfo\": [{
+                  \"thumburl\": \"#{image_url}\",
+                  \"url\": \"#{image_url}\",
+                  \"descriptionurl\": \"#{url}\"
+                }]
+              }
+            }
+          }
+        }")
+        stub_request(:head, api_url)
+      end
+
+      it 'replaces image src' do
+        post = Fabricate(:post, raw: "#{url}")
+
+        Jobs::ProcessPost.new.execute(post_id: post.id)
+        Jobs::PullHotlinkedImages.new.execute(post_id: post.id)
+        Jobs::ProcessPost.new.execute(post_id: post.id)
+        post.reload
+
+        expect(post.cooked).to match(/<img src=.*\/uploads/)
+      end
+    end
+  end
+
+  describe '#is_valid_image_url' do
+    subject { described_class.new }
+
+    describe 'when url is invalid' do
+      it 'should return false' do
+        expect(subject.is_valid_image_url("null")).to eq(false)
+        expect(subject.is_valid_image_url("meta.discourse.org")).to eq(false)
+      end
+    end
+
+    describe 'when url is valid' do
+      it 'should return true' do
+        expect(subject.is_valid_image_url("http://meta.discourse.org")).to eq(true)
+        expect(subject.is_valid_image_url("//meta.discourse.org")).to eq(true)
+      end
+    end
  end

 end