discourse/lib/middleware/anonymous_cache.rb

412 lines
12 KiB
Ruby

# frozen_string_literal: true
require "mobile_detection"
require "crawler_detection"
require "guardian"
require "http_language_parser"
require "http_user_agent_encoder"
module Middleware
class AnonymousCache
def self.cache_key_segments
@@cache_key_segments ||= {
m: "key_is_mobile?",
c: "key_is_crawler?",
o: "key_is_old_browser?",
d: "key_is_modern_mobile_device?",
b: "key_has_brotli?",
t: "key_cache_theme_ids",
ca: "key_compress_anon",
l: "key_locale",
}
end
# Compile a string builder method that will be called to create
# an anonymous cache key
def self.compile_key_builder
method = +"def self.__compiled_key_builder(h)\n \""
cache_key_segments.each do |k, v|
raise "Invalid key name" unless k =~ /\A[a-z]+\z/
raise "Invalid method name" unless v =~ /\Akey_[a-z_\?]+\z/
method << "|#{k}=#\{h.#{v}}"
end
method << "\"\nend"
eval(method) # rubocop:disable Security/Eval
@@compiled = true
end
def self.build_cache_key(helper)
compile_key_builder unless defined?(@@compiled)
__compiled_key_builder(helper)
end
def self.anon_cache(env, duration)
env["ANON_CACHE_DURATION"] = duration
end
def self.clear_all_cache!
if Rails.env.production?
raise "for perf reasons, clear_all_cache! cannot be used in production."
end
Discourse.redis.keys("ANON_CACHE_*").each { |k| Discourse.redis.del(k) }
end
def self.disable_anon_cache
@@disabled = true
end
def self.enable_anon_cache
@@disabled = false
end
# This gives us an API to insert anonymous cache segments
class Helper
RACK_SESSION = "rack.session"
USER_AGENT = "HTTP_USER_AGENT"
ACCEPT_ENCODING = "HTTP_ACCEPT_ENCODING"
DISCOURSE_RENDER = "HTTP_DISCOURSE_RENDER"
REDIS_STORE_SCRIPT = DiscourseRedis::EvalHelper.new <<~LUA
local current = redis.call("incr", KEYS[1])
redis.call("expire",KEYS[1],ARGV[1])
return current
LUA
def initialize(env, request = nil)
@env = env
@user_agent = HttpUserAgentEncoder.ensure_utf8(@env[USER_AGENT])
@request = request || Rack::Request.new(@env)
end
def crawler_identifier
@user_agent
end
def blocked_crawler?
@request.get? && !@request.xhr? && !@request.path.ends_with?("robots.txt") &&
!@request.path.ends_with?("srv/status") &&
@request[Auth::DefaultCurrentUserProvider::API_KEY].nil? &&
@env[Auth::DefaultCurrentUserProvider::USER_API_KEY].nil? &&
@env[Auth::DefaultCurrentUserProvider::HEADER_API_KEY].nil? &&
CrawlerDetection.is_blocked_crawler?(crawler_identifier)
end
# rubocop:disable Lint/BooleanSymbol
def is_mobile=(val)
@is_mobile = val ? :true : :false
end
def is_mobile?
@is_mobile ||=
begin
session = @env[RACK_SESSION]
# don't initialize params until later
# otherwise you get a broken params on the request
params = {}
MobileDetection.resolve_mobile_view!(@user_agent, params, session) ? :true : :false
end
@is_mobile == :true
end
alias_method :key_is_mobile?, :is_mobile?
def key_has_brotli?
@has_brotli ||=
begin
@env[ACCEPT_ENCODING].to_s =~ /br/ ? :true : :false
end
@has_brotli == :true
end
# rubocop:enable Lint/BooleanSymbol
def key_locale
if locale = Discourse.anonymous_locale(@request)
locale
else
"" # No need to key, it is the same for all anon users
end
end
# rubocop:disable Lint/BooleanSymbol
def is_crawler?
@is_crawler ||=
begin
if @env[DISCOURSE_RENDER] == "crawler" ||
CrawlerDetection.crawler?(@user_agent, @env["HTTP_VIA"])
:true
else
if @user_agent.downcase.include?("discourse") &&
!@user_agent.downcase.include?("mobile")
:true
else
:false
end
end
end
@is_crawler == :true
end
alias_method :key_is_crawler?, :is_crawler?
# rubocop:enable Lint/BooleanSymbol
def key_is_modern_mobile_device?
MobileDetection.modern_mobile_device?(@user_agent) if @user_agent
end
def key_is_old_browser?
CrawlerDetection.show_browser_update?(@user_agent) if @user_agent
end
def cache_key
return @cache_key if defined?(@cache_key)
# Rack `xhr?` performs a case sensitive comparison, but Rails `xhr?`
# performs a case insensitive comparison. We use the latter everywhere
# else in the application, so we should use it here as well.
is_xhr = @env["HTTP_X_REQUESTED_WITH"]&.casecmp("XMLHttpRequest") == 0 ? "t" : "f"
@cache_key =
+"ANON_CACHE_#{is_xhr}_#{@env["HTTP_ACCEPT"]}_#{@env[Rack::RACK_URL_SCHEME]}_#{@env["HTTP_HOST"]}#{@env["REQUEST_URI"]}"
@cache_key << AnonymousCache.build_cache_key(self)
@cache_key
end
def key_cache_theme_ids
theme_ids.join(",")
end
def key_compress_anon
GlobalSetting.compress_anon_cache
end
def theme_ids
ids, _ = @request.cookies["theme_ids"]&.split("|")
id = ids&.split(",")&.map(&:to_i)&.first
if id && Guardian.new.allow_themes?([id])
Theme.transform_ids(id)
else
[]
end
end
def cache_key_count
@cache_key_count ||= "#{cache_key}_count"
end
def cache_key_body
@cache_key_body ||= "#{cache_key}_body"
end
def cache_key_other
@cache_key_other || "#{cache_key}_other"
end
def get?
@env["REQUEST_METHOD"] == "GET"
end
def has_auth_cookie?
CurrentUser.has_auth_cookie?(@env)
end
def no_cache_bypass
request = Rack::Request.new(@env)
request.cookies["_bypass_cache"].nil? && (request.path != "/srv/status") &&
request[Auth::DefaultCurrentUserProvider::API_KEY].nil? &&
@env[Auth::DefaultCurrentUserProvider::HEADER_API_KEY].nil? &&
@env[Auth::DefaultCurrentUserProvider::USER_API_KEY].nil?
end
def force_anonymous!
@env[Auth::DefaultCurrentUserProvider::USER_API_KEY] = nil
@env[Auth::DefaultCurrentUserProvider::HEADER_API_KEY] = nil
@env["HTTP_COOKIE"] = nil
@env["HTTP_DISCOURSE_LOGGED_IN"] = nil
@env["rack.request.cookie.hash"] = {}
@env["rack.request.cookie.string"] = ""
@env["_bypass_cache"] = nil
request = Rack::Request.new(@env)
request.delete_param("api_username")
request.delete_param("api_key")
end
def logged_in_anon_limiter
@logged_in_anon_limiter ||=
RateLimiter.new(
nil,
"logged_in_anon_cache_#{@env["HTTP_HOST"]}/#{@env["REQUEST_URI"]}",
GlobalSetting.force_anonymous_min_per_10_seconds,
10,
)
end
def check_logged_in_rate_limit!
!logged_in_anon_limiter.performed!(raise_error: false)
end
MIN_TIME_TO_CHECK = 0.05
ADP = "action_dispatch.request.parameters"
def should_force_anonymous?
if (queue_time = @env["REQUEST_QUEUE_SECONDS"]) && get?
if queue_time > GlobalSetting.force_anonymous_min_queue_seconds
return check_logged_in_rate_limit!
elsif queue_time >= MIN_TIME_TO_CHECK
return check_logged_in_rate_limit! if !logged_in_anon_limiter.can_perform?
end
end
false
end
def cacheable?
!!(
GlobalSetting.anon_cache_store_threshold > 0 && !has_auth_cookie? && get? &&
no_cache_bypass
)
end
def compress(val)
if val && GlobalSetting.compress_anon_cache
require "lz4-ruby" if !defined?(LZ4)
LZ4.compress(val)
else
val
end
end
def decompress(val)
if val && GlobalSetting.compress_anon_cache
require "lz4-ruby" if !defined?(LZ4)
LZ4.uncompress(val)
else
val
end
end
def cached(env = {})
if body = decompress(Discourse.redis.get(cache_key_body))
if other = Discourse.redis.get(cache_key_other)
other = JSON.parse(other)
if req_params = other[1].delete(ADP)
env[ADP] = req_params
end
[other[0], other[1], [body]]
end
end
end
def cache_duration
@env["ANON_CACHE_DURATION"]
end
# NOTE in an ideal world cache still serves out cached content except for one magic worker
# that fills it up, this avoids a herd killing you, we can probably do this using a job or redis tricks
# but coordinating this is tricky
def cache(result, env = {})
return result if GlobalSetting.anon_cache_store_threshold == 0
status, headers, response = result
if status == 200 && cache_duration
if GlobalSetting.anon_cache_store_threshold > 1
count = REDIS_STORE_SCRIPT.eval(Discourse.redis, [cache_key_count], [cache_duration])
# technically lua will cast for us, but might as well be
# prudent here, hence the to_i
if count.to_i < GlobalSetting.anon_cache_store_threshold
headers["X-Discourse-Cached"] = "skip"
return status, headers, response
end
end
headers_stripped =
headers.dup.delete_if { |k, _| %w[Set-Cookie X-MiniProfiler-Ids].include? k }
headers_stripped["X-Discourse-Cached"] = "true"
parts = []
response.each { |part| parts << part }
if req_params = env[ADP]
headers_stripped[ADP] = {
"action" => req_params["action"],
"controller" => req_params["controller"],
}
end
Discourse.redis.setex(cache_key_body, cache_duration, compress(parts.join))
Discourse.redis.setex(cache_key_other, cache_duration, [status, headers_stripped].to_json)
headers["X-Discourse-Cached"] = "store"
else
parts = response
end
[status, headers, parts]
end
def clear_cache
Discourse.redis.del(cache_key_body)
Discourse.redis.del(cache_key_other)
end
end
def initialize(app, settings = {})
@app = app
end
PAYLOAD_INVALID_REQUEST_METHODS = %w[GET HEAD]
def call(env)
return @app.call(env) if defined?(@@disabled) && @@disabled
if PAYLOAD_INVALID_REQUEST_METHODS.include?(env[Rack::REQUEST_METHOD]) &&
env[Rack::RACK_INPUT].size > 0
return 413, { "Cache-Control" => "private, max-age=0, must-revalidate" }, []
end
helper = Helper.new(env)
force_anon = false
if helper.blocked_crawler?
env["discourse.request_tracker.skip"] = true
return 403, {}, ["Crawler is not allowed!"]
end
if helper.should_force_anonymous?
force_anon = env["DISCOURSE_FORCE_ANON"] = true
helper.force_anonymous!
end
if (env["HTTP_DISCOURSE_BACKGROUND"] == "true") && (queue_time = env["REQUEST_QUEUE_SECONDS"])
max_time = GlobalSetting.background_requests_max_queue_length.to_f
if max_time > 0 && queue_time.to_f > max_time
return [
429,
{ "content-type" => "application/json; charset=utf-8" },
[
{
errors: I18n.t("rate_limiter.slow_down"),
extras: {
wait_seconds: 5 + (5 * rand).round(2),
},
}.to_json,
]
]
end
end
result =
if helper.cacheable?
helper.cached(env) || helper.cache(@app.call(env), env)
else
@app.call(env)
end
result[1]["Set-Cookie"] = "dosp=1; Path=/" if force_anon
result
end
end
end