FIX: Canonical URLs may be relative (#14825)
FinalDestination's follow_canonical mode used for embedded topics should work when canonical URLs are relative, as specified in [RFC 6596](https://datatracker.ietf.org/doc/html/rfc6596)
This commit is contained in:
parent
f444e3e3f7
commit
53abcd825d
|
@ -225,7 +225,7 @@ class FinalDestination
|
||||||
end
|
end
|
||||||
|
|
||||||
if @follow_canonical
|
if @follow_canonical
|
||||||
next_url = uri(fetch_canonical_url(response.body))
|
next_url = fetch_canonical_url(response.body)
|
||||||
|
|
||||||
if next_url.to_s.present? && next_url != @uri
|
if next_url.to_s.present? && next_url != @uri
|
||||||
@follow_canonical = false
|
@follow_canonical = false
|
||||||
|
@ -481,10 +481,17 @@ class FinalDestination
|
||||||
|
|
||||||
def fetch_canonical_url(body)
|
def fetch_canonical_url(body)
|
||||||
return if body.blank?
|
return if body.blank?
|
||||||
canonical_link = Nokogiri::HTML5(body).at("link[rel='canonical']")
|
|
||||||
|
|
||||||
return if canonical_link.nil?
|
canonical_element = Nokogiri::HTML5(body).at("link[rel='canonical']")
|
||||||
|
return if canonical_element.nil?
|
||||||
|
canonical_uri = uri(canonical_element['href'])
|
||||||
|
return if canonical_uri.blank?
|
||||||
|
|
||||||
canonical_link['href']
|
return canonical_uri if canonical_uri.host.present?
|
||||||
|
parts = [@uri.host, canonical_uri.to_s]
|
||||||
|
complete_url = canonical_uri.to_s.starts_with?('/') ? parts.join('') : parts.join('/')
|
||||||
|
complete_url = "#{@uri.scheme}://#{complete_url}" if @uri.scheme
|
||||||
|
|
||||||
|
uri(complete_url)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -194,6 +194,31 @@ describe FinalDestination do
|
||||||
expect(final.status).to eq(:resolved)
|
expect(final.status).to eq(:resolved)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'resolves the canonical link when the URL is relative' do
|
||||||
|
host = "https://codinghorror.com"
|
||||||
|
|
||||||
|
canonical_follow("#{host}/blog", "/blog/canonical")
|
||||||
|
stub_request(:head, "#{host}/blog/canonical").to_return(doc_response)
|
||||||
|
|
||||||
|
final = FinalDestination.new("#{host}/blog", opts.merge(follow_canonical: true))
|
||||||
|
|
||||||
|
expect(final.resolve.to_s).to eq("#{host}/blog/canonical")
|
||||||
|
expect(final.redirected?).to eq(false)
|
||||||
|
expect(final.status).to eq(:resolved)
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'resolves the canonical link when the URL is relative and does not start with the / symbol' do
|
||||||
|
host = "https://codinghorror.com"
|
||||||
|
canonical_follow("#{host}/blog", "blog/canonical")
|
||||||
|
stub_request(:head, "#{host}/blog/canonical").to_return(doc_response)
|
||||||
|
|
||||||
|
final = FinalDestination.new("#{host}/blog", opts.merge(follow_canonical: true))
|
||||||
|
|
||||||
|
expect(final.resolve.to_s).to eq("#{host}/blog/canonical")
|
||||||
|
expect(final.redirected?).to eq(false)
|
||||||
|
expect(final.status).to eq(:resolved)
|
||||||
|
end
|
||||||
|
|
||||||
it "does not follow the canonical link if it's the same as the current URL" do
|
it "does not follow the canonical link if it's the same as the current URL" do
|
||||||
canonical_follow("https://eviltrout.com", "https://eviltrout.com")
|
canonical_follow("https://eviltrout.com", "https://eviltrout.com")
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue