FIX: never block /srv/status which is used for health checks

This route is also very cheap so blocking it is not required

It is still rate limited and so on elsewhere
This commit is contained in:
Sam 2018-07-18 12:33:06 +10:00
parent 3874d40910
commit 379384ae1e
2 changed files with 18 additions and 7 deletions

View File

@ -25,6 +25,7 @@ module Middleware
@request.get? &&
!@request.xhr? &&
!@request.path.ends_with?('robots.txt') &&
!@request.path.ends_with?('srv/status') &&
CrawlerDetection.is_blocked_crawler?(@request.env['HTTP_USER_AGENT'])
end

View File

@ -173,35 +173,35 @@ describe Middleware::AnonymousCache::Helper do
it "applies whitelisted_crawler_user_agents correctly" do
SiteSetting.whitelisted_crawler_user_agents = 'Googlebot'
get '/srv/status', headers: {
get '/', headers: {
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
}
expect(@status).to eq(200)
get '/srv/status', headers: {
get '/', headers: {
'HTTP_USER_AGENT' => 'Anotherbot/2.1 (+http://www.notgoogle.com/bot.html)'
}
expect(@status).to eq(403)
get '/srv/status', headers: non_crawler
get '/', headers: non_crawler
expect(@status).to eq(200)
end
it "applies blacklisted_crawler_user_agents correctly" do
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
get '/srv/status', headers: non_crawler
get '/', headers: non_crawler
expect(@status).to eq(200)
get '/srv/status', headers: {
get '/', headers: {
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
}
expect(@status).to eq(403)
get '/srv/status', headers: {
get '/', headers: {
'HTTP_USER_AGENT' => 'Twitterbot/2.1 (+http://www.notgoogle.com/bot.html)'
}
@ -218,13 +218,23 @@ describe Middleware::AnonymousCache::Helper do
expect(@status).to eq(200)
end
it "blocked crawlers shouldn't log page views" do
it "should never block srv/status" do
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
get '/srv/status', headers: {
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
}
expect(@status).to eq(200)
end
it "blocked crawlers shouldn't log page views" do
SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
get '/', headers: {
'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
}
expect(@env["discourse.request_tracker.skip"]).to eq(true)
end