FEATURE: Onebox can match engines based on the content_type (#13876)

* FEATURE: Onebox can match engines based on the content_type

`FinalDestination` now returns the `content_type` of a resolved URL.

`Oneboxer` passes this value to `Onebox` itself. Onebox engines can now specify a `matches_content_type` regex of content_types that the engine can handle, regardless of the URL.

`ImageOnebox` will match URLs with a content type of `image/png`, `jpg`, `gif`, `bmp`, `tif`, etc.

This will allow images that exist at a URL without a file type extension to be correctly rendered, assuming a valid `content_type` is returned.
This commit is contained in:
jbrw 2021-07-30 13:36:30 -04:00 committed by GitHub
parent f0d048b42a
commit 2f28ba318c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 65 additions and 3 deletions

View File

@ -30,7 +30,7 @@ class FinalDestination
DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15"
attr_reader :status, :cookie, :status_code, :ignored
attr_reader :status, :cookie, :status_code, :content_type, :ignored
def initialize(url, opts = nil)
@url = url
@ -215,6 +215,7 @@ class FinalDestination
end
end
@content_type = response.headers['Content-Type'] if response.headers.has_key?('Content-Type')
@status = :resolved
return @uri
when 400, 405, 406, 409, 500, 501

View File

@ -96,6 +96,12 @@ module Onebox
end
module ClassMethods
def handles_content_type?(other)
if other && class_variable_defined?(:@@matcher_content_type)
!!(other.to_s =~ class_variable_get(:@@matcher_content_type))
end
end
def ===(other)
if other.kind_of?(URI)
!!(other.to_s =~ class_variable_get(:@@matcher))
@ -112,6 +118,10 @@ module Onebox
class_variable_set :@@matcher, r
end
def matches_content_type(ct)
class_variable_set :@@matcher_content_type, ct
end
def requires_iframe_origins(*origins)
class_variable_set :@@iframe_origins, origins
end

View File

@ -5,6 +5,7 @@ module Onebox
class ImageOnebox
include Engine
matches_content_type(/^image\/(png|jpg|jpeg|gif|bmp|tif|tiff)$/)
matches_regexp(/^(https?:)?\/\/.+\.(png|jpg|jpeg|gif|bmp|tif|tiff)(\?.*)?$/i)
def always_https?

View File

@ -21,7 +21,13 @@ module Onebox
return if @uri.nil?
return if @uri.port && !Onebox.options.allowed_ports.include?(@uri.port)
return if @uri.scheme && !Onebox.options.allowed_schemes.include?(@uri.scheme)
ordered_engines.find { |engine| engine === @uri && has_allowed_iframe_origins?(engine) }
ordered_engines.find do |engine|
(
engine.respond_to?(:handles_content_type?) && engine.handles_content_type?(@options[:content_type]) ||
engine === @uri
) && has_allowed_iframe_origins?(engine)
end
end
def has_allowed_iframe_origins?(engine)

View File

@ -429,7 +429,8 @@ module Oneboxer
hostname: GlobalSetting.hostname,
facebook_app_access_token: SiteSetting.facebook_app_access_token,
disable_media_download_controls: SiteSetting.disable_onebox_media_download_controls,
body_cacher: self
body_cacher: self,
content_type: fd.content_type
}
onebox_options[:cookie] = fd.cookie if fd.cookie

View File

@ -42,6 +42,13 @@ describe FinalDestination do
}
end
let(:image_response) do
{
status: 200,
headers: { "Content-Type" => "image/jpeg" }
}
end
def redirect_response(from, dest)
stub_request(:head, from).to_return(
status: 302,
@ -317,6 +324,19 @@ describe FinalDestination do
expect(final.resolve.to_s).to eq("#{upstream_url}#L154-L205")
expect(final.status).to eq(:resolved)
end
context "content_type" do
before do
stub_request(:head, "https://eviltrout.com/this/is/an/image").to_return(image_response)
end
it "returns a content_type" do
final = FinalDestination.new("https://eviltrout.com/this/is/an/image", opts)
expect(final.resolve.to_s).to eq("https://eviltrout.com/this/is/an/image")
expect(final.content_type).to eq("image/jpeg")
expect(final.status).to eq(:resolved)
end
end
end
describe '.get' do

View File

@ -38,4 +38,8 @@ describe Onebox::Engine::ImageOnebox do
it "includes a direct link to the image" do
expect(Onebox.preview('http://www.discourse.org/images/logo.png').to_s).to match(/<a.*png/)
end
it "matches on content_type" do
expect(Onebox.preview('http://www.discourse.org/images/logo', { content_type: 'image/png' }).to_s).to match(/<img/)
end
end

View File

@ -50,6 +50,18 @@ describe Onebox::Engine do
end
end
describe "handles_content_type?" do
class OneboxEngineImages
include Onebox::Engine
@@matcher_content_type = /^image\/png$/
end
it "returns true if argument matches the matcher" do
result = OneboxEngineImages.handles_content_type?('image/png')
expect(result).to eq(true)
end
end
class AlwaysHttpsEngineExample < OneboxEngineExample
always_https
end

View File

@ -59,9 +59,16 @@ describe Onebox::Preview do
end
describe "#engine" do
let(:preview_image_url) { "http://www.example.com/image/without/file_extension" }
let(:preview_image) { described_class.new(preview_image_url, content_type: 'image/png') }
it "returns an engine" do
expect(preview.send(:engine)).to be_an(Onebox::Engine)
end
it "can match based on content_type" do
expect(preview_image.send(:engine)).to be_an(Onebox::Engine::ImageOnebox)
end
end
describe "xss" do