FEATURE: improve performance of anonymous cache

This commit introduces 2 features:

1. DISCOURSE_COMPRESS_ANON_CACHE (true|false, default false): this allows
you to optionally compress the anon cache body entries in Redis, can be
useful for high load sites with Redis that lives on a separate server to
to webs

2. DISCOURSE_ANON_CACHE_STORE_THRESHOLD (default 2), only pop entries into
redis if we observe them more than N times. This avoids situations where
a crawler can walk a big pile of topics and store them all in Redis never
to be used. Our default anon cache time for topics is only 60 seconds. Anon
cache is in place to avoid the "slashdot" effect where a single topic is
hit by 100s of people in one minute.
This commit is contained in:
Sam Saffron 2019-09-04 17:18:32 +10:00
parent 16681cb648
commit ed00f35306
6 changed files with 78 additions and 3 deletions

View File

@ -210,6 +210,7 @@ gem 'rubyzip', require: false
gem 'sshkey', require: false
gem 'rchardet', require: false
gem 'lz4-ruby', require: false, platform: :mri
if ENV["IMPORT"] == "1"
gem 'mysql2'

View File

@ -174,6 +174,7 @@ GEM
crass (~> 1.0.2)
nokogiri (>= 1.5.9)
lru_redux (1.1.0)
lz4-ruby (0.3.3)
mail (2.7.1)
mini_mime (>= 0.1.1)
maxminddb (0.1.22)
@ -467,6 +468,7 @@ DEPENDENCIES
logstash-logger
logster
lru_redux
lz4-ruby
mail
maxminddb
memory_profiler

View File

@ -266,3 +266,15 @@ enable_js_error_reporting = true
# Having a high number here is very low risk. Regular jobs are limited in scope and scale.
mini_scheduler_workers = 5
# enable compression on anonymous cache redis entries
# this slightly increases the cost of storing cache entries but can make it much
# cheaper to retrieve cache entries when redis is stores on a different machine to the one
# running the web
compress_anon_cache = false
# Only store entries in redis for anonymous cache if they are observed more than N times
# for a specific key
#
# This ensures there are no pathological cases where we keep storing data in anonymous cache
# never to use it, set to 1 to store immediately, set to 0 to disable anon cache
anon_cache_store_threshold = 2

View File

@ -73,7 +73,7 @@ module Middleware
end
def cache_key
@cache_key ||= "ANON_CACHE_#{@env["HTTP_ACCEPT"]}_#{@env["HTTP_HOST"]}#{@env["REQUEST_URI"]}|m=#{is_mobile?}|c=#{is_crawler?}|b=#{has_brotli?}|t=#{theme_ids.join(",")}"
@cache_key ||= "ANON_CACHE_#{@env["HTTP_ACCEPT"]}_#{@env["HTTP_HOST"]}#{@env["REQUEST_URI"]}|m=#{is_mobile?}|c=#{is_crawler?}|b=#{has_brotli?}|t=#{theme_ids.join(",")}#{GlobalSetting.compress_anon_cache}"
end
def theme_ids
@ -86,6 +86,10 @@ module Middleware
end
end
def cache_key_count
@cache_key_count ||= "#{cache_key}_count"
end
def cache_key_body
@cache_key_body ||= "#{cache_key}_body"
end
@ -154,8 +158,26 @@ module Middleware
!!(!has_auth_cookie? && get? && no_cache_bypass)
end
def compress(val)
if val && GlobalSetting.compress_anon_cache
require "lz4-ruby" if !defined?(LZ4)
LZ4::compress(val)
else
val
end
end
def decompress(val)
if val && GlobalSetting.compress_anon_cache
require "lz4-ruby" if !defined?(LZ4)
LZ4::uncompress(val)
else
val
end
end
def cached(env = {})
if body = $redis.get(cache_key_body)
if body = decompress($redis.get(cache_key_body))
if other = $redis.get(cache_key_other)
other = JSON.parse(other)
if req_params = other[1].delete(ADP)
@ -174,9 +196,27 @@ module Middleware
# that fills it up, this avoids a herd killing you, we can probably do this using a job or redis tricks
# but coordinating this is tricky
def cache(result, env = {})
return result if GlobalSetting.anon_cache_store_threshold == 0
status, headers, response = result
if status == 200 && cache_duration
if GlobalSetting.anon_cache_store_threshold > 1
count = $redis.eval(<<~REDIS, [cache_key_count], [cache_duration])
local current = redis.call("incr", KEYS[1])
redis.call("expire",KEYS[1],ARGV[1])
return current
REDIS
# technically lua will cast for us, but might as well be
# prudent here, hence the to_i
if count.to_i < GlobalSetting.anon_cache_store_threshold
headers["X-Discourse-Cached"] = "skip"
return [status, headers, response]
end
end
headers_stripped = headers.dup.delete_if { |k, _| ["Set-Cookie", "X-MiniProfiler-Ids"].include? k }
headers_stripped["X-Discourse-Cached"] = "true"
parts = []
@ -191,7 +231,7 @@ module Middleware
}
end
$redis.setex(cache_key_body, cache_duration, parts.join)
$redis.setex(cache_key_body, cache_duration, compress(parts.join))
$redis.setex(cache_key_other, cache_duration, [status, headers_stripped].to_json)
headers["X-Discourse-Cached"] = "store"

View File

@ -129,6 +129,24 @@ describe Middleware::AnonymousCache::Helper do
crawler.clear_cache
end
before do
global_setting :anon_cache_store_threshold, 1
end
it "compresses body on demand" do
global_setting :compress_anon_cache, true
payload = "x" * 1000
helper.cache([200, { "HELLO" => "WORLD" }, [payload]])
helper = new_helper("ANON_CACHE_DURATION" => 10)
expect(helper.cached).to eq([200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, [payload]])
# depends on i7z implementation, but lets assume it is stable unless we discover
# otherwise
expect($redis.get(helper.cache_key_body).length).to eq(16)
end
it "handles brotli switching" do
helper.cache([200, { "HELLO" => "WORLD" }, ["hello ", "my world"]])

View File

@ -285,7 +285,9 @@ describe Middleware::RequestTracker do
}
tracker.call(env("REQUEST_URI" => uri, "ANON_CACHE_DURATION" => 60, "action_dispatch.request.parameters" => request_params))
expect(@data[:cache]).to eq("skip")
tracker.call(env("REQUEST_URI" => uri, "ANON_CACHE_DURATION" => 60, "action_dispatch.request.parameters" => request_params))
expect(@data[:cache]).to eq("store")
tracker.call(env("REQUEST_URI" => uri, "ANON_CACHE_DURATION" => 60))