remove crawler blocking until multisite support

This commit is contained in:
Neil Lalonde 2018-07-03 17:54:45 -04:00
parent c3129444ea
commit e8a6323bea
2 changed files with 5 additions and 58 deletions

View File

@ -175,11 +175,11 @@ class Middleware::RequestTracker
return result
end
if block_crawler(request)
log_request = false
result = [403, { 'Content-Type' => 'text/plain' }, ['Crawler is not allowed']]
return result
end
# if block_crawler(request)
# log_request = false
# result = [403, { 'Content-Type' => 'text/plain' }, ["Crawler is not allowed."]]
# return result
# end
env["discourse.request_tracker"] = self
MethodProfiler.start

View File

@ -286,57 +286,4 @@ describe Middleware::RequestTracker do
end
end
context "crawler blocking" do
let :middleware do
app = lambda do |env|
[200, {}, ['OK']]
end
Middleware::RequestTracker.new(app)
end
def expect_success_response(status, _, response)
expect(status).to eq(200)
expect(response).to eq(['OK'])
end
def expect_blocked_response(status, _, response)
expect(status).to eq(403)
expect(response).to eq(['Crawler is not allowed'])
end
it "applies whitelisted_crawler_user_agents correctly" do
SiteSetting.whitelisted_crawler_user_agents = 'Googlebot'
expect_success_response(*middleware.call(env))
expect_blocked_response(*middleware.call(env('HTTP_USER_AGENT' => 'Twitterbot')))
expect_success_response(*middleware.call(env('HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)')))
expect_blocked_response(*middleware.call(env('HTTP_USER_AGENT' => 'DiscourseAPI Ruby Gem 0.19.0')))
end
it "applies blacklisted_crawler_user_agents correctly" do
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
expect_success_response(*middleware.call(env))
expect_blocked_response(*middleware.call(env('HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)')))
expect_success_response(*middleware.call(env('HTTP_USER_AGENT' => 'Twitterbot')))
expect_success_response(*middleware.call(env('HTTP_USER_AGENT' => 'DiscourseAPI Ruby Gem 0.19.0')))
end
it "blocked crawlers shouldn't log page views" do
ApplicationRequest.clear_cache!
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
expect {
middleware.call(env('HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'))
ApplicationRequest.write_cache!
}.to_not change { ApplicationRequest.count }
end
it "blocks json requests" do
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
expect_blocked_response(*middleware.call(env(
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)',
'HTTP_ACCEPT' => 'application/json'
)))
end
end
end