FIX: broken onebox images due to url normalization bugs
normalized_encode in addressable has a number of issues, including https://github.com/sporkmonger/addressable/issues/472 To temporaily work around those issues for the majority of cases, we try parsing with `::URI`. If that fails (e.g. due to non-ascii characters) then we will fall back to addressable. Hopefully we can simplify this back to `Addressable::URI.normalized_encode` in the future. This commit also adds support for unicode domain names and emoji domain names with escape_uri. This removes an unneeded hack checking for pre-signed urls, which are now handled by the general case due to starting off valid and only being minimally normalized. Previous test case continues to pass. UrlHelper.s3_presigned_url? which was somewhat wide was removed.
This commit is contained in:
parent
3755bad03c
commit
f0a0252526
|
@ -60,9 +60,35 @@ class UrlHelper
|
|||
self.absolute(Upload.secure_media_url_from_upload_url(url), nil)
|
||||
end
|
||||
|
||||
# This is a poorly named method. In reality, it **normalizes** the given URL,
|
||||
# and does not escape it. Therefore it's idempotent, and can be used on user
|
||||
# input which includes a mix of escaped and unescaped characters
|
||||
def self.escape_uri(uri)
|
||||
return uri if s3_presigned_url?(uri)
|
||||
Addressable::URI.normalized_encode(uri)
|
||||
validated = nil
|
||||
url = uri.to_s
|
||||
|
||||
# Ideally we will jump straight to `Addressable::URI.normalized_encode`. However,
|
||||
# that implementation has some edge-case issues like https://github.com/sporkmonger/addressable/issues/472.
|
||||
# To temporaily work around those issues for the majority of cases, we try parsing with `::URI`.
|
||||
# If that fails (e.g. due to non-ascii characters) then we will fall back to addressable.
|
||||
# Hopefully we can simplify this back to `Addressable::URI.normalized_encode` in the future.
|
||||
|
||||
# edge case where we expect mailto:test%40test.com to normalize to mailto:test@test.com
|
||||
if url.match(/\Amailto:/)
|
||||
return normalize_with_addressable(url)
|
||||
end
|
||||
|
||||
# If it doesn't pass the regexp, it's definitely not gonna parse with URI.parse. Skip
|
||||
# to addressable
|
||||
if !url.match?(/\A#{URI::regexp}\z/)
|
||||
return normalize_with_addressable(url)
|
||||
end
|
||||
|
||||
begin
|
||||
normalize_with_ruby_uri(url)
|
||||
rescue URI::Error
|
||||
normalize_with_addressable(url)
|
||||
end
|
||||
end
|
||||
|
||||
def self.rails_route_from_url(url)
|
||||
|
@ -72,10 +98,6 @@ class UrlHelper
|
|||
nil
|
||||
end
|
||||
|
||||
def self.s3_presigned_url?(url)
|
||||
url[/x-amz-(algorithm|credential)/i].present?
|
||||
end
|
||||
|
||||
def self.cook_url(url, secure: false, local: nil)
|
||||
is_secure = SiteSetting.secure_media && secure
|
||||
local = is_local(url) if local.nil?
|
||||
|
@ -120,4 +142,30 @@ class UrlHelper
|
|||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def self.normalize_with_addressable(url)
|
||||
u = Addressable::URI.normalized_encode(url, Addressable::URI)
|
||||
|
||||
if u.host && !u.host.ascii_only?
|
||||
u.host = ::Addressable::IDNA.to_ascii(u.host)
|
||||
end
|
||||
|
||||
u.to_s
|
||||
end
|
||||
|
||||
def self.normalize_with_ruby_uri(url)
|
||||
u = URI.parse(url)
|
||||
|
||||
if u.scheme && u.scheme != u.scheme.downcase
|
||||
u.scheme = u.scheme.downcase
|
||||
end
|
||||
|
||||
if u.host && u.host != u.host.downcase
|
||||
u.host = u.host.downcase
|
||||
end
|
||||
|
||||
u.to_s
|
||||
end
|
||||
|
||||
end
|
||||
|
|
|
@ -26,7 +26,7 @@ RSpec.describe FinalDestination do
|
|||
when 'any-subdomain.ihaveawildcard.com' then '104.25.152.11'
|
||||
when 'wikipedia.com' then '1.2.3.4'
|
||||
else
|
||||
as_ip = IPAddr.new(host)
|
||||
_as_ip = IPAddr.new(host)
|
||||
host
|
||||
end
|
||||
end
|
||||
|
|
|
@ -86,6 +86,16 @@ RSpec.describe UrlHelper do
|
|||
end
|
||||
|
||||
describe "#escape_uri" do
|
||||
it "does not double escape %3A (:)" do
|
||||
url = "http://discourse.org/%3A/test"
|
||||
expect(UrlHelper.escape_uri(url)).to eq(url)
|
||||
end
|
||||
|
||||
it "does not double escape %2F (/)" do
|
||||
url = "http://discourse.org/%2F/test"
|
||||
expect(UrlHelper.escape_uri(url)).to eq(url)
|
||||
end
|
||||
|
||||
it "doesn't escape simple URL" do
|
||||
url = UrlHelper.escape_uri('http://example.com/foo/bar')
|
||||
expect(url).to eq('http://example.com/foo/bar')
|
||||
|
@ -107,8 +117,37 @@ RSpec.describe UrlHelper do
|
|||
end
|
||||
|
||||
it "doesn't escape already escaped chars (hash)" do
|
||||
url = UrlHelper.escape_uri('https://calendar.google.com/calendar/embed?src=en.uk%23holiday%40group.v.calendar.google.com&ctz=Europe%2FLondon')
|
||||
expect(url).to eq('https://calendar.google.com/calendar/embed?src=en.uk%23holiday@group.v.calendar.google.com&ctz=Europe/London')
|
||||
url = 'https://calendar.google.com/calendar/embed?src=en.uk%23holiday@group.v.calendar.google.com&ctz=Europe%2FLondon'
|
||||
escaped = UrlHelper.escape_uri(url)
|
||||
expect(escaped).to eq(url)
|
||||
end
|
||||
|
||||
it "leaves reserved chars alone in edge cases" do
|
||||
skip "see: https://github.com/sporkmonger/addressable/issues/472"
|
||||
url = "https://example.com/ article/id%3A1.2%2F1/bar"
|
||||
expected = "https://example.com/%20article/id%3A1.2%2F1/bar"
|
||||
escaped = UrlHelper.escape_uri(url)
|
||||
expect(escaped).to eq(expected)
|
||||
end
|
||||
|
||||
it "handles emoji domain names" do
|
||||
url = "https://💻.example/💻?computer=💻"
|
||||
expected = "https://xn--3s8h.example/%F0%9F%92%BB?computer=%F0%9F%92%BB"
|
||||
escaped = UrlHelper.escape_uri(url)
|
||||
expect(escaped).to eq(expected)
|
||||
end
|
||||
|
||||
it "handles special-character domain names" do
|
||||
url = "https://éxample.com/test"
|
||||
expected = "https://xn--xample-9ua.com/test"
|
||||
escaped = UrlHelper.escape_uri(url)
|
||||
expect(escaped).to eq(expected)
|
||||
end
|
||||
|
||||
it "performs basic normalization" do
|
||||
url = "http://EXAMPLE.com/a"
|
||||
escaped = UrlHelper.escape_uri(url)
|
||||
expect(escaped).to eq("http://example.com/a")
|
||||
end
|
||||
|
||||
it "doesn't escape S3 presigned URLs" do
|
||||
|
|
|
@ -568,27 +568,6 @@ RSpec.describe Upload do
|
|||
end
|
||||
end
|
||||
|
||||
describe ".signed_url_from_secure_media_url" do
|
||||
before do
|
||||
# must be done so signed_url_for_path exists
|
||||
enable_secure_media
|
||||
end
|
||||
|
||||
it "correctly gives back a signed url from a path only" do
|
||||
secure_url = "/secure-media-uploads/original/1X/c5a2c4ba0fa390f5aac5c2c1a12416791ebdd9e9.png"
|
||||
signed_url = Upload.signed_url_from_secure_media_url(secure_url)
|
||||
expect(signed_url).not_to include("secure-media-uploads")
|
||||
expect(UrlHelper.s3_presigned_url?(signed_url)).to eq(true)
|
||||
end
|
||||
|
||||
it "correctly gives back a signed url from a full url" do
|
||||
secure_url = "http://localhost:3000/secure-media-uploads/original/1X/c5a2c4ba0fa390f5aac5c2c1a12416791ebdd9e9.png"
|
||||
signed_url = Upload.signed_url_from_secure_media_url(secure_url)
|
||||
expect(signed_url).not_to include(Discourse.base_url)
|
||||
expect(UrlHelper.s3_presigned_url?(signed_url)).to eq(true)
|
||||
end
|
||||
end
|
||||
|
||||
describe ".secure_media_url_from_upload_url" do
|
||||
before do
|
||||
# must be done so signed_url_for_path exists
|
||||
|
|
Loading…
Reference in New Issue