FIX: blacklisted crawlers could get through by omitting the accept header
This commit is contained in:
parent
059f1d8df4
commit
b87fa6d749
|
@ -289,7 +289,6 @@ class Middleware::RequestTracker
|
||||||
def block_crawler(request)
|
def block_crawler(request)
|
||||||
request.get? &&
|
request.get? &&
|
||||||
!request.xhr? &&
|
!request.xhr? &&
|
||||||
request.env['HTTP_ACCEPT'] =~ /text\/html/ &&
|
|
||||||
!request.path.ends_with?('robots.txt') &&
|
!request.path.ends_with?('robots.txt') &&
|
||||||
CrawlerDetection.is_blocked_crawler?(request.env['HTTP_USER_AGENT'])
|
CrawlerDetection.is_blocked_crawler?(request.env['HTTP_USER_AGENT'])
|
||||||
end
|
end
|
||||||
|
|
|
@ -330,9 +330,9 @@ describe Middleware::RequestTracker do
|
||||||
}.to_not change { ApplicationRequest.count }
|
}.to_not change { ApplicationRequest.count }
|
||||||
end
|
end
|
||||||
|
|
||||||
it "allows json requests" do
|
it "blocks json requests" do
|
||||||
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
|
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
|
||||||
expect_success_response(*middleware.call(env(
|
expect_blocked_response(*middleware.call(env(
|
||||||
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)',
|
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)',
|
||||||
'HTTP_ACCEPT' => 'application/json'
|
'HTTP_ACCEPT' => 'application/json'
|
||||||
)))
|
)))
|
||||||
|
|
Loading…
Reference in New Issue