FEATURE: Implement browser update in crawler view (#12448)

browser-update script does not work correctly in some very old browsers
because the contents of <noscript> is not accessible in JavaScript.
For these browsers, the server can display the crawler page and add the
browser update notice.

Simply loading the browser-update script in the crawler view is not a
solution because that means all crawlers will also see it.
This commit is contained in:
Dan Ungureanu 2021-03-22 19:41:42 +02:00 committed by GitHub
parent 3e586ab25a
commit 4e46732346
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 41 additions and 2 deletions

View File

@ -56,12 +56,17 @@ class ApplicationController < ActionController::Base
SiteSetting.enable_escaped_fragments? && params.key?("_escaped_fragment_")
end
def show_browser_update?
@show_browser_update ||= CrawlerDetection.show_browser_update?(request.user_agent)
end
helper_method :show_browser_update?
def use_crawler_layout?
@use_crawler_layout ||=
request.user_agent &&
(request.content_type.blank? || request.content_type.include?('html')) &&
!['json', 'rss'].include?(params[:format]) &&
(has_escaped_fragment? || params.key?("print") ||
(has_escaped_fragment? || params.key?("print") || show_browser_update? ||
CrawlerDetection.crawler?(request.user_agent, request.headers["HTTP_VIA"])
)
end

View File

@ -16,7 +16,9 @@
<%= theme_lookup("head_tag") %>
<%= render_google_universal_analytics_code %>
<%= yield :head %>
<% if show_browser_update? %>
<style>.buorg {position:absolute; z-index:111111; width:100%; top:0px; left:0px; background:#FDF2AB; text-align:left; font-family: sans-serif; color:#000; font-size: 14px;} .buorg div {padding: 8px;} .buorg a, .buorg a:visited {color:#E25600; text-decoration: underline;} @media print { .buorg { display: none !important; } }</style>
<% end %>
<%= build_plugin_html 'server:before-head-close-crawler' %>
</head>
<body class="crawler">
@ -67,6 +69,9 @@
</footer>
<%= theme_lookup("footer") %>
<%= theme_lookup("body_tag") %>
<% if show_browser_update? %>
<div class="buorg"><div><%= I18n.t("js.browser_update").html_safe %></div></div>
<% end %>
</body>
<%= yield :after_body %>
</html>

View File

@ -1539,6 +1539,11 @@ security:
default: "rss|bot|spider|crawler|facebook|archive|wayback|ping|monitor|lighthouse"
type: list
list_type: compact
browser_update_user_agents:
hidden: true
default: "MSIE 6|MSIE 7|MSIE 8|MSIE 9"
type: list
list_type: compact
crawler_check_bypass_agents:
hidden: true
default: "cubot"

View File

@ -37,6 +37,14 @@ module CrawlerDetection
end
def self.show_browser_update?(user_agent)
return false if SiteSetting.browser_update_user_agents.blank?
@browser_update_matchers ||= {}
matcher = @browser_update_matchers[SiteSetting.browser_update_user_agents] ||= to_matcher(SiteSetting.browser_update_user_agents)
user_agent.match?(matcher)
end
# Given a user_agent that returns true from crawler?, should its request be allowed?
def self.allow_crawler?(user_agent)
return true if SiteSetting.allowed_crawler_user_agents.blank? &&

View File

@ -69,6 +69,22 @@ describe CrawlerDetection do
end
describe 'show_browser_update?' do
it 'always returns false if setting is empty' do
SiteSetting.browser_update_user_agents = ""
expect(CrawlerDetection.show_browser_update?('Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)')).to eq(false)
expect(CrawlerDetection.show_browser_update?('Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/6.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E)')).to eq(false)
end
it 'returns true if setting matches user agent' do
SiteSetting.browser_update_user_agents = "MSIE 6|MSIE 7|MSIE 8|MSIE 9"
expect(CrawlerDetection.show_browser_update?('Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)')).to eq(false)
expect(CrawlerDetection.show_browser_update?('Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/6.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E)')).to eq(true)
end
end
describe 'allow_crawler?' do
it 'returns true if allowlist and blocklist are blank' do
expect(CrawlerDetection.allow_crawler?('Googlebot/2.1 (+http://www.google.com/bot.html)')).to eq(true)