FIX: URLs containing two # would fail to work

Some URLs in browsers are non compliant and contain twos `#` this commit adds
special handling for this edge case by auto encoding any fragments containing `#`
This commit is contained in:
Sam 2018-12-11 18:03:13 +11:00
parent bb4ef644bf
commit 671469bcc7
6 changed files with 47 additions and 26 deletions

View File

@ -117,11 +117,7 @@ SQL
PrettyText
.extract_links(post.cooked)
.map do |u|
uri = begin
URI.parse(u.url)
rescue URI::Error
end
uri = UrlHelper.relaxed_parse(u.url)
[u, uri]
end
.reject { |_, p| p.nil? || "mailto".freeze == p.scheme }

View File

@ -15,11 +15,7 @@ class TopicLinkClick < ActiveRecord::Base
url = args[:url][0...TopicLink.max_url_length]
return nil if url.blank?
uri = begin
URI.parse(url)
rescue URI::Error
end
uri = UrlHelper.relaxed_parse(url)
urls = Set.new
urls << url
if url =~ /^http/

View File

@ -1,5 +1,20 @@
class UrlHelper
# At the moment this handles invalid URLs that browser address bar accepts
# where second # is not encoded
#
# Longer term we can add support of simpleidn and encode unicode domains
def self.relaxed_parse(url)
url, fragment = url.split("#", 2)
uri = URI.parse(url)
if uri
fragment = URI.escape(fragment) if fragment&.include?('#')
uri.fragment = fragment
uri
end
rescue URI::Error
end
def self.is_local(url)
url.present? && (
Discourse.store.has_been_uploaded?(url) ||

View File

@ -3,6 +3,15 @@ require_dependency 'url_helper'
describe UrlHelper do
describe "#relaxed parse" do
it "can handle double #" do
url = UrlHelper.relaxed_parse("https://test.com#test#test")
expect(url.to_s).to eq("https://test.com#test%23test")
end
end
describe "#is_local" do
it "is true when the file has been uploaded" do

View File

@ -95,27 +95,28 @@ Fabricator(:post_with_uploads, from: :post) do
end
Fabricator(:post_with_uploads_and_links, from: :post) do
raw '
<a href="/uploads/default/original/2X/2345678901234567.jpg">Link</a>
<img src="/uploads/default/original/1X/1234567890123456.jpg">
<a href="http://www.google.com">Google</a>
<img src="http://foo.bar/image.png">
<a class="attachment" href="/uploads/default/original/1X/af2c2618032c679333bebf745e75f9088748d737.txt">text.txt</a> (20 Bytes)
:smile:
'
raw <<~RAW
<a href="/uploads/default/original/2X/2345678901234567.jpg">Link</a>
<img src="/uploads/default/original/1X/1234567890123456.jpg">
<a href="http://www.google.com">Google</a>
<img src="http://foo.bar/image.png">
<a class="attachment" href="/uploads/default/original/1X/af2c2618032c679333bebf745e75f9088748d737.txt">text.txt</a> (20 Bytes)
:smile:
RAW
end
Fabricator(:post_with_external_links, from: :post) do
user
topic
raw "
Here's a link to twitter: http://twitter.com
And a link to google: http://google.com
And a secure link to google: https://google.com
And a markdown link: [forumwarz](http://forumwarz.com)
And a markdown link with a period after it [codinghorror](http://www.codinghorror.com/blog).
And one with a hash http://discourse.org#faq
"
raw <<~RAW
Here's a link to twitter: http://twitter.com
And a link to google: http://google.com
And a secure link to google: https://google.com
And a markdown link: [forumwarz](http://forumwarz.com)
And a markdown link with a period after it [codinghorror](http://www.codinghorror.com/blog).
And one with a hash http://discourse.org#faq
And one with a two hash http://discourse.org#a#b
RAW
end
Fabricator(:private_message_post, from: :post) do

View File

@ -54,6 +54,10 @@ describe TopicLinkClick do
TopicLinkClick.create_from(url: @topic_link.url, post_id: @post.id, ip: '127.0.0.0', user_id: @post.user_id)
}.not_to change(TopicLinkClick, :count)
# can handle double # in a url
# NOTE: this is not compliant but exists in the wild
click = TopicLinkClick.create_from(url: "http://discourse.org#a#b", post_id: @post.id, ip: '127.0.0.1')
expect(click).to eq("http://discourse.org#a#b")
end
context 'with a valid url and post_id' do