FEATURE: default canonical URL (#9738)

For pages that do not specify canonical URL we will default to `https://SITENAME/PATH`. 

This ensures that if a URL is crawled on the CDN the search ranking will transfer to the main site.

Additionally we whitelist the `?page` param
This commit is contained in:
Krzysztof Kotlarek 2020-05-12 09:13:20 +10:00 committed by GitHub
parent 7cdf41d311
commit 4c8bece104
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 25 additions and 2 deletions

View File

@ -13,9 +13,18 @@ module CanonicalURL
end
module Helpers
ALLOWED_CANONICAL_PARAMS = %w(page)
def canonical_link_tag(url = nil)
return '' unless url || @canonical_url
tag('link', rel: 'canonical', href: url || @canonical_url || request.url)
tag('link', rel: 'canonical', href: url || @canonical_url || default_canonical)
end
def default_canonical
canonical = +"#{Discourse.base_url_no_prefix}#{request.path}"
allowed_params = params.select { |key| ALLOWED_CANONICAL_PARAMS.include?(key) }
if allowed_params.present?
canonical << "?#{allowed_params.keys.zip(allowed_params.values).map { |key, value| "#{key}=#{value}" }.join("&")}"
end
canonical
end
end
end

View File

@ -605,4 +605,18 @@ RSpec.describe ApplicationController do
expect(response.status).to eq(200)
expect(response.body).to include('Discourse')
end
it 'has canonical tag' do
get '/', headers: { HTTP_ACCEPT: '*/*' }
expect(response.body).to have_tag("link", with: { rel: "canonical", href: "http://test.localhost/" })
get '/?query_param=true', headers: { HTTP_ACCEPT: '*/*' }
expect(response.body).to have_tag("link", with: { rel: "canonical", href: "http://test.localhost/" })
get '/latest?page=2&additional_param=true', headers: { HTTP_ACCEPT: '*/*' }
expect(response.body).to have_tag("link", with: { rel: "canonical", href: "http://test.localhost/latest?page=2" })
get '/404', headers: { HTTP_ACCEPT: '*/*' }
expect(response.body).to have_tag("link", with: { rel: "canonical", href: "http://test.localhost/404" })
topic = create_post.topic
get "/t/#{topic.slug}/#{topic.id}"
expect(response.body).to have_tag("link", with: { rel: "canonical", href: "http://test.localhost/t/#{topic.slug}/#{topic.id}" })
end
end