FIX: ignore canonical link to localhost (#13577)

This commit is contained in:
Arpit Jalan 2021-06-30 13:55:17 +05:30 committed by GitHub
parent 6986b36985
commit b63c9febe8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 18 additions and 1 deletions

View File

@ -36,7 +36,7 @@ module Onebox
# prefer canonical link
canonical_link = doc.at('//link[@rel="canonical"]/@href')
canonical_uri = Addressable::URI.parse(canonical_link)
if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}" && canonical_uri.host != "localhost"
response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
doc = Nokogiri::HTML(response) if response
end

View File

@ -52,6 +52,23 @@ RSpec.describe Onebox::Helpers do
expect(described_class.fetch_html_doc(uri).to_s).to match("success")
end
context "canonical link" do
it "follows canonical link" do
uri = 'https://www.example.com'
stub_request(:get, uri).to_return(status: 200, body: "<!DOCTYPE html><link rel='canonical' href='http://foobar.com/'/><p>invalid</p>")
stub_request(:get, 'http://foobar.com').to_return(status: 200, body: "<!DOCTYPE html><p>success</p>")
expect(described_class.fetch_html_doc(uri).to_s).to match("success")
end
it "does not follow canonical link pointing at localhost" do
uri = 'https://www.example.com'
stub_request(:get, uri).to_return(status: 200, body: "<!DOCTYPE html><link rel='canonical' href='http://localhost:3000/'/><p>success</p>")
expect(described_class.fetch_html_doc(uri).to_s).to match("success")
end
end
end
describe "redirects" do