FIX: never block /srv/status which is used for health checks

This route is also very cheap so blocking it is not required It is still rate limited and so on elsewhere
2018-07-18 12:33:06 +10:00 · 2018-07-18 12:33:06 +10:00 · 379384ae1e
parent 3874d40910
commit 379384ae1e
2 changed files with 18 additions and 7 deletions
--- a/lib/middleware/anonymous_cache.rb
+++ b/lib/middleware/anonymous_cache.rb
@ -25,6 +25,7 @@ module Middleware
        @request.get? &&
        !@request.xhr? &&
        !@request.path.ends_with?('robots.txt') &&
+        !@request.path.ends_with?('srv/status') &&
        CrawlerDetection.is_blocked_crawler?(@request.env['HTTP_USER_AGENT'])
      end

--- a/spec/components/middleware/anonymous_cache_spec.rb
+++ b/spec/components/middleware/anonymous_cache_spec.rb
@ -173,35 +173,35 @@ describe Middleware::AnonymousCache::Helper do
    it "applies whitelisted_crawler_user_agents correctly" do
      SiteSetting.whitelisted_crawler_user_agents = 'Googlebot'

-      get '/srv/status', headers: {
+      get '/', headers: {
        'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
      }

      expect(@status).to eq(200)

-      get '/srv/status', headers: {
+      get '/', headers: {
        'HTTP_USER_AGENT' => 'Anotherbot/2.1 (+http://www.notgoogle.com/bot.html)'
      }

      expect(@status).to eq(403)

-      get '/srv/status', headers: non_crawler
+      get '/', headers: non_crawler
      expect(@status).to eq(200)
    end

    it "applies blacklisted_crawler_user_agents correctly" do
      SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'

-      get '/srv/status', headers: non_crawler
+      get '/', headers: non_crawler
      expect(@status).to eq(200)

-      get '/srv/status', headers: {
+      get '/', headers: {
        'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
      }

      expect(@status).to eq(403)

-      get '/srv/status', headers: {
+      get '/', headers: {
        'HTTP_USER_AGENT' => 'Twitterbot/2.1 (+http://www.notgoogle.com/bot.html)'
      }

@ -218,13 +218,23 @@ describe Middleware::AnonymousCache::Helper do
      expect(@status).to eq(200)
    end

-    it "blocked crawlers shouldn't log page views" do
+    it "should never block srv/status" do
      SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'

      get '/srv/status', headers: {
        'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
      }

+      expect(@status).to eq(200)
+    end
+
+    it "blocked crawlers shouldn't log page views" do
+      SiteSetting.blacklisted_crawler_user_agents = 'Googlebot'
+
+      get '/', headers: {
+        'HTTP_USER_AGENT' => 'Googlebot/2.1 (+http://www.google.com/bot.html)'
+      }
+
      expect(@env["discourse.request_tracker.skip"]).to eq(true)
    end