From 5647819de40bd7900bc4e9556ef3a673075dab4d Mon Sep 17 00:00:00 2001 From: Rafael dos Santos Silva Date: Thu, 25 Nov 2021 16:58:39 -0300 Subject: [PATCH] FEATURE: Send a 'noindex' header in non-canonical responses (#15026) * FEATURE: Optionally send a 'noindex' header in non-canonical responses This will be used in a SEO experiment. Co-authored-by: David Taylor --- app/controllers/application_controller.rb | 10 +++++++- config/site_settings.yml | 3 +++ lib/canonical_url.rb | 27 ++++++++++++-------- spec/requests/application_controller_spec.rb | 27 ++++++++++++++++++++ spec/requests/groups_controller_spec.rb | 6 ----- 5 files changed, 56 insertions(+), 17 deletions(-) diff --git a/app/controllers/application_controller.rb b/app/controllers/application_controller.rb index 0561be4c82e..8d1494136ea 100644 --- a/app/controllers/application_controller.rb +++ b/app/controllers/application_controller.rb @@ -41,13 +41,14 @@ class ApplicationController < ActionController::Base before_action :redirect_to_login_if_required before_action :block_if_requires_login before_action :preload_json - before_action :add_noindex_header, if: -> { is_feed_request? || !SiteSetting.allow_index_in_robots_txt } before_action :check_xhr after_action :add_readonly_header after_action :perform_refresh_session after_action :dont_cache_page after_action :conditionally_allow_site_embedding after_action :ensure_vary_header + after_action :add_noindex_header, if: -> { is_feed_request? || !SiteSetting.allow_index_in_robots_txt } + after_action :add_noindex_header_to_non_canonical, if: -> { request.get? && !(request.format && request.format.json?) && !request.xhr? } HONEYPOT_KEY ||= 'HONEYPOT_KEY' CHALLENGE_KEY ||= 'CHALLENGE_KEY' @@ -912,6 +913,13 @@ class ApplicationController < ActionController::Base end end + def add_noindex_header_to_non_canonical + canonical = (@canonical_url || @default_canonical) + if canonical.present? && canonical != request.url && !SiteSetting.allow_indexing_non_canonical_urls + response.headers['X-Robots-Tag'] ||= 'noindex' + end + end + protected def honeypot_value diff --git a/config/site_settings.yml b/config/site_settings.yml index 508981517d4..d82c83144a6 100644 --- a/config/site_settings.yml +++ b/config/site_settings.yml @@ -1602,6 +1602,9 @@ security: regex: "^(Lax|Strict|Disabled|None)$" enable_escaped_fragments: true allow_index_in_robots_txt: true + allow_indexing_non_canonical_urls: + default: true + hidden: true moderators_manage_categories_and_groups: false moderators_change_post_ownership: client: true diff --git a/lib/canonical_url.rb b/lib/canonical_url.rb index d318bd56de4..df66c37a6c7 100644 --- a/lib/canonical_url.rb +++ b/lib/canonical_url.rb @@ -2,6 +2,8 @@ module CanonicalURL module ControllerExtensions + ALLOWED_CANONICAL_PARAMS = %w(page) + def canonical_url(url_for_options = {}) case url_for_options when Hash @@ -10,22 +12,27 @@ module CanonicalURL @canonical_url = url_for_options end end + + def default_canonical + @default_canonical ||= begin + canonical = +"#{Discourse.base_url_no_prefix}#{request.path}" + allowed_params = params.select { |key| ALLOWED_CANONICAL_PARAMS.include?(key) } + if allowed_params.present? + canonical << "?#{allowed_params.keys.zip(allowed_params.values).map { |key, value| "#{key}=#{value}" }.join("&")}" + end + canonical + end + end + + def self.included(base) + base.helper_method :default_canonical + end end module Helpers - ALLOWED_CANONICAL_PARAMS = %w(page) def canonical_link_tag(url = nil) tag('link', rel: 'canonical', href: url || @canonical_url || default_canonical) end - - def default_canonical - canonical = +"#{Discourse.base_url_no_prefix}#{request.path}" - allowed_params = params.select { |key| ALLOWED_CANONICAL_PARAMS.include?(key) } - if allowed_params.present? - canonical << "?#{allowed_params.keys.zip(allowed_params.values).map { |key, value| "#{key}=#{value}" }.join("&")}" - end - canonical - end end end diff --git a/spec/requests/application_controller_spec.rb b/spec/requests/application_controller_spec.rb index 603d2f3fa03..34d9ecb07fc 100644 --- a/spec/requests/application_controller_spec.rb +++ b/spec/requests/application_controller_spec.rb @@ -668,6 +668,33 @@ RSpec.describe ApplicationController do expect(response.body).to have_tag("link", with: { rel: "canonical", href: "http://test.localhost/t/#{topic.slug}/#{topic.id}" }) end + it "adds a noindex header if non-canonical indexing is disabled" do + SiteSetting.allow_indexing_non_canonical_urls = false + get '/' + expect(response.headers['X-Robots-Tag']).to be_nil + + get '/latest' + expect(response.headers['X-Robots-Tag']).to be_nil + + get '/categories' + expect(response.headers['X-Robots-Tag']).to be_nil + + topic = create_post.topic + get "/t/#{topic.slug}/#{topic.id}" + expect(response.headers['X-Robots-Tag']).to be_nil + post = create_post(topic_id: topic.id) + get "/t/#{topic.slug}/#{topic.id}/2" + expect(response.headers['X-Robots-Tag']).to eq('noindex') + + 20.times do + create_post(topic_id: topic.id) + end + get "/t/#{topic.slug}/#{topic.id}/21" + expect(response.headers['X-Robots-Tag']).to eq('noindex') + get "/t/#{topic.slug}/#{topic.id}?page=2" + expect(response.headers['X-Robots-Tag']).to be_nil + end + context "default locale" do before do SiteSetting.default_locale = :fr diff --git a/spec/requests/groups_controller_spec.rb b/spec/requests/groups_controller_spec.rb index d918db850af..ff7b00a4a70 100644 --- a/spec/requests/groups_controller_spec.rb +++ b/spec/requests/groups_controller_spec.rb @@ -176,12 +176,6 @@ describe GroupsController do ) end - it 'should return correct X-Robots-Tag header when allow_index_in_robots_txt is set to false' do - SiteSetting.allow_index_in_robots_txt = false - get "/groups" - expect(response.headers['X-Robots-Tag']).to eq('noindex, nofollow') - end - context 'viewing groups of another user' do describe 'when an invalid username is given' do it 'should return the right response' do