From 4c8bece104867ae21eab61646afa2504d75e9707 Mon Sep 17 00:00:00 2001 From: Krzysztof Kotlarek Date: Tue, 12 May 2020 09:13:20 +1000 Subject: [PATCH] FEATURE: default canonical URL (#9738) For pages that do not specify canonical URL we will default to `https://SITENAME/PATH`. This ensures that if a URL is crawled on the CDN the search ranking will transfer to the main site. Additionally we whitelist the `?page` param --- lib/canonical_url.rb | 13 +++++++++++-- spec/requests/application_controller_spec.rb | 14 ++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/lib/canonical_url.rb b/lib/canonical_url.rb index b46f006f1cf..d318bd56de4 100644 --- a/lib/canonical_url.rb +++ b/lib/canonical_url.rb @@ -13,9 +13,18 @@ module CanonicalURL end module Helpers + ALLOWED_CANONICAL_PARAMS = %w(page) def canonical_link_tag(url = nil) - return '' unless url || @canonical_url - tag('link', rel: 'canonical', href: url || @canonical_url || request.url) + tag('link', rel: 'canonical', href: url || @canonical_url || default_canonical) + end + + def default_canonical + canonical = +"#{Discourse.base_url_no_prefix}#{request.path}" + allowed_params = params.select { |key| ALLOWED_CANONICAL_PARAMS.include?(key) } + if allowed_params.present? + canonical << "?#{allowed_params.keys.zip(allowed_params.values).map { |key, value| "#{key}=#{value}" }.join("&")}" + end + canonical end end end diff --git a/spec/requests/application_controller_spec.rb b/spec/requests/application_controller_spec.rb index 4a961746109..8e4dc439d87 100644 --- a/spec/requests/application_controller_spec.rb +++ b/spec/requests/application_controller_spec.rb @@ -605,4 +605,18 @@ RSpec.describe ApplicationController do expect(response.status).to eq(200) expect(response.body).to include('Discourse') end + + it 'has canonical tag' do + get '/', headers: { HTTP_ACCEPT: '*/*' } + expect(response.body).to have_tag("link", with: { rel: "canonical", href: "http://test.localhost/" }) + get '/?query_param=true', headers: { HTTP_ACCEPT: '*/*' } + expect(response.body).to have_tag("link", with: { rel: "canonical", href: "http://test.localhost/" }) + get '/latest?page=2&additional_param=true', headers: { HTTP_ACCEPT: '*/*' } + expect(response.body).to have_tag("link", with: { rel: "canonical", href: "http://test.localhost/latest?page=2" }) + get '/404', headers: { HTTP_ACCEPT: '*/*' } + expect(response.body).to have_tag("link", with: { rel: "canonical", href: "http://test.localhost/404" }) + topic = create_post.topic + get "/t/#{topic.slug}/#{topic.id}" + expect(response.body).to have_tag("link", with: { rel: "canonical", href: "http://test.localhost/t/#{topic.slug}/#{topic.id}" }) + end end