2018-04-18 02:58:40 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2014-01-08 22:08:42 -05:00
|
|
|
require_dependency "mobile_detection"
|
2014-04-28 20:48:09 -04:00
|
|
|
require_dependency "crawler_detection"
|
2017-06-15 14:09:33 -04:00
|
|
|
require_dependency "guardian"
|
2020-07-22 13:00:07 -04:00
|
|
|
require_dependency "http_language_parser"
|
2014-01-08 22:08:42 -05:00
|
|
|
|
2013-10-16 01:39:18 -04:00
|
|
|
module Middleware
|
|
|
|
class AnonymousCache
|
|
|
|
|
2019-12-05 14:57:18 -05:00
|
|
|
def self.cache_key_segments
|
|
|
|
@@cache_key_segments ||= {
|
|
|
|
m: 'key_is_mobile?',
|
|
|
|
c: 'key_is_crawler?',
|
|
|
|
b: 'key_has_brotli?',
|
|
|
|
t: 'key_cache_theme_ids',
|
2020-07-22 13:00:07 -04:00
|
|
|
ca: 'key_compress_anon',
|
|
|
|
l: 'key_locale'
|
2019-12-05 14:57:18 -05:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
# Compile a string builder method that will be called to create
|
|
|
|
# an anonymous cache key
|
|
|
|
def self.compile_key_builder
|
|
|
|
method = +"def self.__compiled_key_builder(h)\n \""
|
|
|
|
cache_key_segments.each do |k, v|
|
|
|
|
raise "Invalid key name" unless k =~ /^[a-z]+$/
|
|
|
|
raise "Invalid method name" unless v =~ /^key_[a-z_\?]+$/
|
|
|
|
method << "|#{k}=#\{h.#{v}}"
|
|
|
|
end
|
|
|
|
method << "\"\nend"
|
|
|
|
eval(method)
|
|
|
|
@@compiled = true
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.build_cache_key(helper)
|
|
|
|
compile_key_builder unless defined?(@@compiled)
|
|
|
|
__compiled_key_builder(helper)
|
|
|
|
end
|
|
|
|
|
2013-10-16 01:39:18 -04:00
|
|
|
def self.anon_cache(env, duration)
|
|
|
|
env["ANON_CACHE_DURATION"] = duration
|
|
|
|
end
|
|
|
|
|
2019-12-05 14:57:18 -05:00
|
|
|
# This gives us an API to insert anonymous cache segments
|
2013-10-16 01:39:18 -04:00
|
|
|
class Helper
|
2019-08-30 14:45:18 -04:00
|
|
|
RACK_SESSION = "rack.session"
|
|
|
|
USER_AGENT = "HTTP_USER_AGENT"
|
|
|
|
ACCEPT_ENCODING = "HTTP_ACCEPT_ENCODING"
|
|
|
|
DISCOURSE_RENDER = "HTTP_DISCOURSE_RENDER"
|
2014-04-28 20:48:09 -04:00
|
|
|
|
2013-10-16 01:39:18 -04:00
|
|
|
def initialize(env)
|
|
|
|
@env = env
|
2017-06-15 14:09:33 -04:00
|
|
|
@request = Rack::Request.new(@env)
|
2013-10-16 01:39:18 -04:00
|
|
|
end
|
|
|
|
|
2018-07-03 21:14:43 -04:00
|
|
|
def blocked_crawler?
|
|
|
|
@request.get? &&
|
|
|
|
!@request.xhr? &&
|
|
|
|
!@request.path.ends_with?('robots.txt') &&
|
2018-07-17 22:33:06 -04:00
|
|
|
!@request.path.ends_with?('srv/status') &&
|
2018-09-14 15:34:21 -04:00
|
|
|
@request[Auth::DefaultCurrentUserProvider::API_KEY].nil? &&
|
|
|
|
@env[Auth::DefaultCurrentUserProvider::USER_API_KEY].nil? &&
|
2019-08-30 14:45:18 -04:00
|
|
|
CrawlerDetection.is_blocked_crawler?(@env[USER_AGENT])
|
2018-07-03 21:14:43 -04:00
|
|
|
end
|
|
|
|
|
2014-01-08 23:11:04 -05:00
|
|
|
def is_mobile=(val)
|
|
|
|
@is_mobile = val ? :true : :false
|
|
|
|
end
|
|
|
|
|
2019-12-05 15:07:22 -05:00
|
|
|
def is_mobile?
|
2014-01-08 22:08:42 -05:00
|
|
|
@is_mobile ||=
|
|
|
|
begin
|
2014-04-28 20:48:09 -04:00
|
|
|
session = @env[RACK_SESSION]
|
2019-08-30 14:45:18 -04:00
|
|
|
# don't initialize params until later
|
|
|
|
# otherwise you get a broken params on the request
|
2014-01-09 00:49:12 -05:00
|
|
|
params = {}
|
2014-01-08 22:08:42 -05:00
|
|
|
|
2019-08-30 14:45:18 -04:00
|
|
|
MobileDetection.resolve_mobile_view!(@env[USER_AGENT], params, session) ? :true : :false
|
2014-04-28 20:48:09 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
@is_mobile == :true
|
|
|
|
end
|
2019-12-05 15:07:22 -05:00
|
|
|
alias_method :key_is_mobile?, :is_mobile?
|
2014-04-28 20:48:09 -04:00
|
|
|
|
2019-12-05 14:57:18 -05:00
|
|
|
def key_has_brotli?
|
2016-12-04 21:57:09 -05:00
|
|
|
@has_brotli ||=
|
|
|
|
begin
|
|
|
|
@env[ACCEPT_ENCODING].to_s =~ /br/ ? :true : :false
|
|
|
|
end
|
|
|
|
@has_brotli == :true
|
|
|
|
end
|
|
|
|
|
2020-07-22 13:00:07 -04:00
|
|
|
def key_locale
|
|
|
|
if SiteSetting.set_locale_from_accept_language_header
|
|
|
|
HttpLanguageParser.parse(@env["HTTP_ACCEPT_LANGUAGE"])
|
|
|
|
else
|
|
|
|
"" # No need to key, it is the same for all anon users
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-12-05 15:07:22 -05:00
|
|
|
def is_crawler?
|
2014-04-28 20:48:09 -04:00
|
|
|
@is_crawler ||=
|
|
|
|
begin
|
|
|
|
user_agent = @env[USER_AGENT]
|
2019-08-30 14:45:18 -04:00
|
|
|
|
|
|
|
if @env[DISCOURSE_RENDER] == "crawler" || CrawlerDetection.crawler?(user_agent, @env["HTTP_VIA"])
|
2019-05-08 10:38:55 -04:00
|
|
|
:true
|
|
|
|
else
|
2019-11-04 09:16:50 -05:00
|
|
|
user_agent.downcase.include?("discourse") && !user_agent.downcase.include?("mobile") ? :true : :false
|
2019-05-08 10:38:55 -04:00
|
|
|
end
|
2014-04-28 20:48:09 -04:00
|
|
|
end
|
|
|
|
@is_crawler == :true
|
2014-01-08 22:08:42 -05:00
|
|
|
end
|
2019-12-05 15:07:22 -05:00
|
|
|
alias_method :key_is_crawler?, :is_crawler?
|
2014-01-08 22:08:42 -05:00
|
|
|
|
2013-10-16 01:39:18 -04:00
|
|
|
def cache_key
|
2019-12-05 14:57:18 -05:00
|
|
|
return @cache_key if defined?(@cache_key)
|
|
|
|
|
2020-03-19 15:54:42 -04:00
|
|
|
@cache_key = +"ANON_CACHE_#{@env["HTTP_ACCEPT"]}_#{@env[Rack::RACK_URL_SCHEME]}_#{@env["HTTP_HOST"]}#{@env["REQUEST_URI"]}"
|
2019-12-05 14:57:18 -05:00
|
|
|
@cache_key << AnonymousCache.build_cache_key(self)
|
|
|
|
@cache_key
|
|
|
|
end
|
|
|
|
|
|
|
|
def key_cache_theme_ids
|
|
|
|
theme_ids.join(',')
|
|
|
|
end
|
|
|
|
|
|
|
|
def key_compress_anon
|
|
|
|
GlobalSetting.compress_anon_cache
|
2017-06-15 09:36:27 -04:00
|
|
|
end
|
|
|
|
|
2018-08-08 00:46:34 -04:00
|
|
|
def theme_ids
|
2018-07-12 00:18:21 -04:00
|
|
|
ids, _ = @request.cookies['theme_ids']&.split('|')
|
2021-06-15 02:57:17 -04:00
|
|
|
id = ids&.split(",")&.map(&:to_i)&.first
|
|
|
|
if id && Guardian.new.allow_themes?([id])
|
|
|
|
Theme.transform_ids(id)
|
2017-06-15 09:36:27 -04:00
|
|
|
else
|
2018-08-08 00:46:34 -04:00
|
|
|
[]
|
2017-06-15 09:36:27 -04:00
|
|
|
end
|
2013-10-16 01:39:18 -04:00
|
|
|
end
|
|
|
|
|
2019-09-04 03:18:32 -04:00
|
|
|
def cache_key_count
|
|
|
|
@cache_key_count ||= "#{cache_key}_count"
|
|
|
|
end
|
|
|
|
|
2013-10-16 01:39:18 -04:00
|
|
|
def cache_key_body
|
|
|
|
@cache_key_body ||= "#{cache_key}_body"
|
|
|
|
end
|
|
|
|
|
|
|
|
def cache_key_other
|
|
|
|
@cache_key_other || "#{cache_key}_other"
|
|
|
|
end
|
|
|
|
|
|
|
|
def get?
|
|
|
|
@env["REQUEST_METHOD"] == "GET"
|
|
|
|
end
|
|
|
|
|
2015-02-04 00:14:56 -05:00
|
|
|
def has_auth_cookie?
|
|
|
|
CurrentUser.has_auth_cookie?(@env)
|
|
|
|
end
|
|
|
|
|
2015-10-28 17:16:56 -04:00
|
|
|
def no_cache_bypass
|
|
|
|
request = Rack::Request.new(@env)
|
2018-04-18 02:58:40 -04:00
|
|
|
request.cookies['_bypass_cache'].nil? &&
|
2020-12-16 00:47:46 -05:00
|
|
|
(request.path != '/srv/status') &&
|
2018-04-18 02:58:40 -04:00
|
|
|
request[Auth::DefaultCurrentUserProvider::API_KEY].nil? &&
|
|
|
|
@env[Auth::DefaultCurrentUserProvider::USER_API_KEY].nil?
|
|
|
|
end
|
|
|
|
|
|
|
|
def force_anonymous!
|
|
|
|
@env[Auth::DefaultCurrentUserProvider::USER_API_KEY] = nil
|
|
|
|
@env['HTTP_COOKIE'] = nil
|
|
|
|
@env['rack.request.cookie.hash'] = {}
|
|
|
|
@env['rack.request.cookie.string'] = ''
|
|
|
|
@env['_bypass_cache'] = nil
|
|
|
|
request = Rack::Request.new(@env)
|
|
|
|
request.delete_param('api_username')
|
|
|
|
request.delete_param('api_key')
|
|
|
|
end
|
|
|
|
|
2018-04-22 21:54:58 -04:00
|
|
|
def logged_in_anon_limiter
|
|
|
|
@logged_in_anon_limiter ||= RateLimiter.new(
|
2018-04-18 02:58:40 -04:00
|
|
|
nil,
|
2019-08-30 14:45:18 -04:00
|
|
|
"logged_in_anon_cache_#{@env["HTTP_HOST"]}/#{@env["REQUEST_URI"]}",
|
2018-04-18 02:58:40 -04:00
|
|
|
GlobalSetting.force_anonymous_min_per_10_seconds,
|
|
|
|
10
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
2018-04-22 21:54:58 -04:00
|
|
|
def check_logged_in_rate_limit!
|
|
|
|
!logged_in_anon_limiter.performed!(raise_error: false)
|
|
|
|
end
|
|
|
|
|
|
|
|
MIN_TIME_TO_CHECK = 0.05
|
2019-09-02 20:51:49 -04:00
|
|
|
ADP = "action_dispatch.request.parameters"
|
2018-04-22 21:54:58 -04:00
|
|
|
|
2018-04-18 02:58:40 -04:00
|
|
|
def should_force_anonymous?
|
2018-04-22 21:54:58 -04:00
|
|
|
if (queue_time = @env['REQUEST_QUEUE_SECONDS']) && get?
|
|
|
|
if queue_time > GlobalSetting.force_anonymous_min_queue_seconds
|
2018-04-18 02:58:40 -04:00
|
|
|
return check_logged_in_rate_limit!
|
2018-04-22 21:54:58 -04:00
|
|
|
elsif queue_time >= MIN_TIME_TO_CHECK
|
|
|
|
if !logged_in_anon_limiter.can_perform?
|
|
|
|
return check_logged_in_rate_limit!
|
|
|
|
end
|
2018-04-18 02:58:40 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
false
|
2015-10-28 17:16:56 -04:00
|
|
|
end
|
|
|
|
|
2013-10-16 01:39:18 -04:00
|
|
|
def cacheable?
|
2015-10-28 17:16:56 -04:00
|
|
|
!!(!has_auth_cookie? && get? && no_cache_bypass)
|
2013-10-16 01:39:18 -04:00
|
|
|
end
|
|
|
|
|
2019-09-04 03:18:32 -04:00
|
|
|
def compress(val)
|
|
|
|
if val && GlobalSetting.compress_anon_cache
|
|
|
|
require "lz4-ruby" if !defined?(LZ4)
|
|
|
|
LZ4::compress(val)
|
|
|
|
else
|
|
|
|
val
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def decompress(val)
|
|
|
|
if val && GlobalSetting.compress_anon_cache
|
|
|
|
require "lz4-ruby" if !defined?(LZ4)
|
|
|
|
LZ4::uncompress(val)
|
|
|
|
else
|
|
|
|
val
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2019-09-02 20:51:49 -04:00
|
|
|
def cached(env = {})
|
2019-12-03 04:05:53 -05:00
|
|
|
if body = decompress(Discourse.redis.get(cache_key_body))
|
|
|
|
if other = Discourse.redis.get(cache_key_other)
|
2013-10-16 01:39:18 -04:00
|
|
|
other = JSON.parse(other)
|
2019-09-02 20:51:49 -04:00
|
|
|
if req_params = other[1].delete(ADP)
|
|
|
|
env[ADP] = req_params
|
|
|
|
end
|
2013-10-16 01:39:18 -04:00
|
|
|
[other[0], other[1], [body]]
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def cache_duration
|
|
|
|
@env["ANON_CACHE_DURATION"]
|
|
|
|
end
|
|
|
|
|
|
|
|
# NOTE in an ideal world cache still serves out cached content except for one magic worker
|
|
|
|
# that fills it up, this avoids a herd killing you, we can probably do this using a job or redis tricks
|
|
|
|
# but coordinating this is tricky
|
2019-09-02 20:51:49 -04:00
|
|
|
def cache(result, env = {})
|
2019-09-04 03:18:32 -04:00
|
|
|
return result if GlobalSetting.anon_cache_store_threshold == 0
|
|
|
|
|
2013-10-16 01:39:18 -04:00
|
|
|
status, headers, response = result
|
|
|
|
|
|
|
|
if status == 200 && cache_duration
|
2019-09-04 03:18:32 -04:00
|
|
|
|
|
|
|
if GlobalSetting.anon_cache_store_threshold > 1
|
2019-12-03 04:05:53 -05:00
|
|
|
count = Discourse.redis.eval(<<~REDIS, [cache_key_count], [cache_duration])
|
2019-09-04 03:18:32 -04:00
|
|
|
local current = redis.call("incr", KEYS[1])
|
|
|
|
redis.call("expire",KEYS[1],ARGV[1])
|
|
|
|
return current
|
|
|
|
REDIS
|
|
|
|
|
|
|
|
# technically lua will cast for us, but might as well be
|
|
|
|
# prudent here, hence the to_i
|
|
|
|
if count.to_i < GlobalSetting.anon_cache_store_threshold
|
|
|
|
headers["X-Discourse-Cached"] = "skip"
|
|
|
|
return [status, headers, response]
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2014-08-14 17:54:55 -04:00
|
|
|
headers_stripped = headers.dup.delete_if { |k, _| ["Set-Cookie", "X-MiniProfiler-Ids"].include? k }
|
2015-06-15 20:30:06 -04:00
|
|
|
headers_stripped["X-Discourse-Cached"] = "true"
|
2013-10-16 01:39:18 -04:00
|
|
|
parts = []
|
|
|
|
response.each do |part|
|
|
|
|
parts << part
|
|
|
|
end
|
|
|
|
|
2019-09-02 20:51:49 -04:00
|
|
|
if req_params = env[ADP]
|
|
|
|
headers_stripped[ADP] = {
|
|
|
|
"action" => req_params["action"],
|
|
|
|
"controller" => req_params["controller"]
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2019-12-03 04:05:53 -05:00
|
|
|
Discourse.redis.setex(cache_key_body, cache_duration, compress(parts.join))
|
|
|
|
Discourse.redis.setex(cache_key_other, cache_duration, [status, headers_stripped].to_json)
|
2019-09-02 04:45:35 -04:00
|
|
|
|
|
|
|
headers["X-Discourse-Cached"] = "store"
|
2013-10-16 01:39:18 -04:00
|
|
|
else
|
|
|
|
parts = response
|
|
|
|
end
|
|
|
|
|
|
|
|
[status, headers, parts]
|
|
|
|
end
|
|
|
|
|
|
|
|
def clear_cache
|
2019-12-03 04:05:53 -05:00
|
|
|
Discourse.redis.del(cache_key_body)
|
|
|
|
Discourse.redis.del(cache_key_other)
|
2013-10-16 01:39:18 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
def initialize(app, settings = {})
|
|
|
|
@app = app
|
|
|
|
end
|
|
|
|
|
2020-08-03 05:02:50 -04:00
|
|
|
PAYLOAD_INVALID_REQUEST_METHODS = ["GET", "HEAD"]
|
2020-08-03 02:11:17 -04:00
|
|
|
|
2013-10-16 01:39:18 -04:00
|
|
|
def call(env)
|
2020-08-03 02:11:17 -04:00
|
|
|
if PAYLOAD_INVALID_REQUEST_METHODS.include?(env[Rack::REQUEST_METHOD]) &&
|
|
|
|
env[Rack::RACK_INPUT].size > 0
|
|
|
|
|
|
|
|
return [413, {}, []]
|
|
|
|
end
|
|
|
|
|
2013-10-16 01:39:18 -04:00
|
|
|
helper = Helper.new(env)
|
2018-04-18 02:58:40 -04:00
|
|
|
force_anon = false
|
|
|
|
|
2018-07-03 21:14:43 -04:00
|
|
|
if helper.blocked_crawler?
|
|
|
|
env["discourse.request_tracker.skip"] = true
|
2018-09-14 15:39:24 -04:00
|
|
|
return [403, {}, ["Crawler is not allowed!"]]
|
2018-07-03 21:14:43 -04:00
|
|
|
end
|
|
|
|
|
2018-04-18 02:58:40 -04:00
|
|
|
if helper.should_force_anonymous?
|
|
|
|
force_anon = env["DISCOURSE_FORCE_ANON"] = true
|
|
|
|
helper.force_anonymous!
|
|
|
|
end
|
2013-10-16 01:39:18 -04:00
|
|
|
|
2020-10-13 01:56:03 -04:00
|
|
|
if (env["HTTP_DISCOURSE_BACKGROUND"] == "true") && (queue_time = env["REQUEST_QUEUE_SECONDS"])
|
2020-10-13 03:08:38 -04:00
|
|
|
max_time = GlobalSetting.background_requests_max_queue_length.to_f
|
|
|
|
if max_time > 0 && queue_time.to_f > max_time
|
2020-10-13 01:56:03 -04:00
|
|
|
return [
|
|
|
|
429,
|
|
|
|
{
|
|
|
|
"content-type" => "application/json; charset=utf-8"
|
|
|
|
},
|
|
|
|
[{
|
|
|
|
errors: I18n.t("rate_limiter.slow_down"),
|
|
|
|
extras: {
|
|
|
|
wait_seconds: 5 + (5 * rand).round(2)
|
|
|
|
}
|
|
|
|
}.to_json]
|
|
|
|
]
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-04-18 02:58:40 -04:00
|
|
|
result =
|
|
|
|
if helper.cacheable?
|
2019-09-02 20:51:49 -04:00
|
|
|
helper.cached(env) || helper.cache(@app.call(env), env)
|
2018-04-18 02:58:40 -04:00
|
|
|
else
|
|
|
|
@app.call(env)
|
|
|
|
end
|
|
|
|
|
|
|
|
if force_anon
|
2018-04-23 23:24:26 -04:00
|
|
|
result[1]["Set-Cookie"] = "dosp=1; Path=/"
|
2013-10-16 01:39:18 -04:00
|
|
|
end
|
|
|
|
|
2018-04-18 02:58:40 -04:00
|
|
|
result
|
2013-10-16 01:39:18 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
end
|