FIX: More errors with non-ascii URLs

This commit is contained in:
Robin Ward 2017-03-07 11:21:26 -05:00
parent d1e587c10a
commit dad57fa033
3 changed files with 21 additions and 4 deletions

View File

@ -69,7 +69,7 @@ class TopicEmbed < ActiveRecord::Base
def self.find_remote(url)
require 'ruby-readability'
original_uri = URI.parse(url)
original_uri = URI.parse(URI.encode(url))
opts = {
tags: %w[div p code pre h1 h2 h3 b em i strong a img ul li ol blockquote],
attributes: %w[href src class],
@ -81,7 +81,7 @@ class TopicEmbed < ActiveRecord::Base
embed_classname_whitelist = SiteSetting.embed_classname_whitelist if SiteSetting.embed_classname_whitelist.present?
response = FetchResponse.new
html = open(url, allow_redirections: :safe).read
html = open(URI.encode(url), allow_redirections: :safe).read
raw_doc = Nokogiri::HTML(html)
auth_element = raw_doc.at('meta[@name="author"]')
@ -107,7 +107,7 @@ class TopicEmbed < ActiveRecord::Base
src = node[url_param]
unless (src.nil? || src.empty?)
begin
uri = URI.parse(src)
uri = URI.parse(URI.encode(src))
unless uri.host
uri.scheme = original_uri.scheme
uri.host = original_uri.host
@ -145,7 +145,7 @@ class TopicEmbed < ActiveRecord::Base
# Convert any relative URLs to absolute. RSS is annoying for this.
def self.absolutize_urls(url, contents)
url = normalize_url(url)
uri = URI(url)
uri = URI(URI.encode(url))
prefix = "#{uri.scheme}://#{uri.host}"
prefix << ":#{uri.port}" if uri.port != 80 && uri.port != 443

View File

@ -34,6 +34,7 @@ class TopicRetriever
# It's possible another process or job found the embed already. So if that happened bail out.
return if TopicEmbed.where(embed_url: @embed_url).exists?
# First check RSS if that is enabled
if SiteSetting.feed_polling_enabled?
Jobs::PollFeed.new.execute({})

View File

@ -176,7 +176,23 @@ describe TopicEmbed do
it 'img node doesn\'t have other class' do
expect(response.body).to have_tag('img', without: { class: 'other' })
end
end
context "non-ascii URL" do
let(:url) { 'http://eviltrout.com/test/ماهی' }
let(:contents) { "<title>سلام</title><body>این یک پاراگراف آزمون است.</body>" }
let!(:embeddable_host) { Fabricate(:embeddable_host) }
let!(:file) { StringIO.new }
before do
file.stubs(:read).returns contents
TopicEmbed.stubs(:open).returns file
end
it "doesn't throw an error" do
response = TopicEmbed.find_remote(url)
expect(response.title).to eq("سلام")
end
end
end