FIX: blacklisted crawlers could get through by omitting the accept header

This commit is contained in:
Neil Lalonde 2018-04-17 12:39:21 -04:00
parent 059f1d8df4
commit b87fa6d749
2 changed files with 2 additions and 3 deletions

View File

@ -289,7 +289,6 @@ class Middleware::RequestTracker
def block_crawler(request) def block_crawler(request)
request.get? && request.get? &&
!request.xhr? && !request.xhr? &&
request.env['HTTP_ACCEPT'] =~ /text\/html/ &&
!request.path.ends_with?('robots.txt') && !request.path.ends_with?('robots.txt') &&
CrawlerDetection.is_blocked_crawler?(request.env['HTTP_USER_AGENT']) CrawlerDetection.is_blocked_crawler?(request.env['HTTP_USER_AGENT'])
end end

View File

@ -330,9 +330,9 @@ describe Middleware::RequestTracker do
}.to_not change { ApplicationRequest.count } }.to_not change { ApplicationRequest.count }
end end
it "allows json requests" do it "blocks json requests" do
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot' SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
expect_success_response(*middleware.call(env( expect_blocked_response(*middleware.call(env(
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)', 'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)',
'HTTP_ACCEPT' => 'application/json' 'HTTP_ACCEPT' => 'application/json'
))) )))