425 lines
13 KiB
Ruby
425 lines
13 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
RSpec.describe Middleware::AnonymousCache do
|
|
let(:middleware) { Middleware::AnonymousCache.new(lambda { |_| [200, {}, []] }) }
|
|
|
|
before { Middleware::AnonymousCache.enable_anon_cache }
|
|
|
|
def env(opts = {})
|
|
create_request_env(path: opts.delete(:path) || "http://test.com/path?bla=1").merge(opts)
|
|
end
|
|
|
|
describe Middleware::AnonymousCache::Helper do
|
|
def new_helper(opts = {})
|
|
Middleware::AnonymousCache::Helper.new(env(opts))
|
|
end
|
|
|
|
describe "#cacheable?" do
|
|
it "true by default" do
|
|
expect(new_helper.cacheable?).to eq(true)
|
|
end
|
|
|
|
it "is false for non GET" do
|
|
expect(
|
|
new_helper("ANON_CACHE_DURATION" => 10, "REQUEST_METHOD" => "POST").cacheable?,
|
|
).to eq(false)
|
|
end
|
|
|
|
it "is false if it has a valid auth cookie" do
|
|
cookie = create_auth_cookie(token: SecureRandom.hex)
|
|
expect(new_helper("HTTP_COOKIE" => "jack=1; _t=#{cookie}; jill=2").cacheable?).to eq(false)
|
|
end
|
|
|
|
it "is true if it has an invalid auth cookie" do
|
|
cookie = create_auth_cookie(token: SecureRandom.hex, issued_at: 5.minutes.ago)
|
|
cookie = swap_2_different_characters(cookie)
|
|
cookie.prepend("%a0%a1") # an invalid byte sequence
|
|
expect(new_helper("HTTP_COOKIE" => "jack=1; _t=#{cookie}; jill=2").cacheable?).to eq(true)
|
|
end
|
|
|
|
it "is false for srv/status routes" do
|
|
expect(new_helper("PATH_INFO" => "/srv/status").cacheable?).to eq(false)
|
|
end
|
|
|
|
it "is false for API requests using header" do
|
|
expect(new_helper("HTTP_API_KEY" => "abcde").cacheable?).to eq(false)
|
|
end
|
|
|
|
it "is false for API requests using parameter" do
|
|
expect(new_helper(path: "/path?api_key=abc").cacheable?).to eq(false)
|
|
end
|
|
|
|
it "is false for User API requests using header" do
|
|
expect(new_helper("HTTP_USER_API_KEY" => "abcde").cacheable?).to eq(false)
|
|
end
|
|
end
|
|
|
|
describe "per theme cache" do
|
|
it "handles theme keys" do
|
|
theme = Fabricate(:theme, user_selectable: true)
|
|
|
|
with_bad_theme_key = new_helper("HTTP_COOKIE" => "theme_ids=abc").cache_key
|
|
with_no_theme_key = new_helper().cache_key
|
|
|
|
expect(with_bad_theme_key).to eq(with_no_theme_key)
|
|
|
|
with_good_theme_key = new_helper("HTTP_COOKIE" => "theme_ids=#{theme.id}").cache_key
|
|
|
|
expect(with_good_theme_key).not_to eq(with_no_theme_key)
|
|
end
|
|
end
|
|
|
|
context "with header or cookie based custom locale" do
|
|
it "handles different languages" do
|
|
# Normally does not check the language header
|
|
french1 = new_helper("HTTP_ACCEPT_LANGUAGE" => "fr").cache_key
|
|
french2 = new_helper("HTTP_ACCEPT_LANGUAGE" => "FR").cache_key
|
|
english = new_helper("HTTP_ACCEPT_LANGUAGE" => SiteSetting.default_locale).cache_key
|
|
none = new_helper.cache_key
|
|
|
|
expect(none).to eq(french1)
|
|
expect(none).to eq(french2)
|
|
expect(none).to eq(english)
|
|
|
|
SiteSetting.allow_user_locale = true
|
|
SiteSetting.set_locale_from_accept_language_header = true
|
|
|
|
french1 = new_helper("HTTP_ACCEPT_LANGUAGE" => "fr").cache_key
|
|
french2 = new_helper("HTTP_ACCEPT_LANGUAGE" => "FR").cache_key
|
|
english = new_helper("HTTP_ACCEPT_LANGUAGE" => SiteSetting.default_locale).cache_key
|
|
none = new_helper.cache_key
|
|
|
|
expect(none).to eq(english)
|
|
expect(french1).to eq(french2)
|
|
expect(french1).not_to eq(none)
|
|
|
|
SiteSetting.set_locale_from_cookie = true
|
|
expect(new_helper("HTTP_COOKIE" => "locale=es;").cache_key).to include("l=es")
|
|
end
|
|
end
|
|
|
|
it "handles old browsers" do
|
|
SiteSetting.browser_update_user_agents = "my_old_browser"
|
|
|
|
key1 = new_helper("HTTP_USER_AGENT" => "my_old_browser").cache_key
|
|
key2 = new_helper("HTTP_USER_AGENT" => "my_new_browser").cache_key
|
|
expect(key1).not_to eq(key2)
|
|
end
|
|
|
|
it "handles modern mobile browsers" do
|
|
key1 = new_helper("HTTP_USER_AGENT" => "Safari (iPhone OS 7)").cache_key
|
|
key2 = new_helper("HTTP_USER_AGENT" => "Safari (iPhone OS 15)").cache_key
|
|
expect(key1).not_to eq(key2)
|
|
end
|
|
|
|
it "handles user agents with invalid bytes" do
|
|
agent = (+"Evil Googlebot String \xc3\x28").force_encoding("ASCII")
|
|
expect {
|
|
key1 = new_helper("HTTP_USER_AGENT" => agent).cache_key
|
|
key2 =
|
|
new_helper(
|
|
"HTTP_USER_AGENT" => agent.encode("utf-8", invalid: :replace, undef: :replace),
|
|
).cache_key
|
|
expect(key1).to eq(key2)
|
|
}.not_to raise_error
|
|
end
|
|
|
|
context "when cached" do
|
|
let!(:helper) { new_helper("ANON_CACHE_DURATION" => 10) }
|
|
|
|
let!(:crawler) do
|
|
new_helper(
|
|
"ANON_CACHE_DURATION" => 10,
|
|
"HTTP_USER_AGENT" => "AdsBot-Google (+http://www.google.com/adsbot.html)",
|
|
)
|
|
end
|
|
|
|
after do
|
|
helper.clear_cache
|
|
crawler.clear_cache
|
|
end
|
|
|
|
before { global_setting :anon_cache_store_threshold, 1 }
|
|
|
|
it "compresses body on demand" do
|
|
global_setting :compress_anon_cache, true
|
|
|
|
payload = "x" * 1000
|
|
helper.cache([200, { "HELLO" => "WORLD" }, [payload]])
|
|
|
|
helper = new_helper("ANON_CACHE_DURATION" => 10)
|
|
expect(helper.cached).to eq(
|
|
[200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, [payload]],
|
|
)
|
|
|
|
# depends on i7z implementation, but lets assume it is stable unless we discover
|
|
# otherwise
|
|
expect(Discourse.redis.get(helper.cache_key_body).length).to eq(16)
|
|
end
|
|
|
|
it "handles brotli switching" do
|
|
helper.cache([200, { "HELLO" => "WORLD" }, ["hello ", "my world"]])
|
|
|
|
helper = new_helper("ANON_CACHE_DURATION" => 10)
|
|
expect(helper.cached).to eq(
|
|
[200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, ["hello my world"]],
|
|
)
|
|
|
|
helper = new_helper("ANON_CACHE_DURATION" => 10, "HTTP_ACCEPT_ENCODING" => "gz, br")
|
|
expect(helper.cached).to eq(nil)
|
|
end
|
|
|
|
it "returns cached data for cached requests" do
|
|
helper.is_mobile = true
|
|
expect(helper.cached).to eq(nil)
|
|
helper.cache([200, { "HELLO" => "WORLD" }, ["hello ", "my world"]])
|
|
|
|
helper = new_helper("ANON_CACHE_DURATION" => 10)
|
|
helper.is_mobile = true
|
|
expect(helper.cached).to eq(
|
|
[200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, ["hello my world"]],
|
|
)
|
|
|
|
expect(crawler.cached).to eq(nil)
|
|
crawler.cache([200, { "HELLO" => "WORLD" }, ["hello ", "world"]])
|
|
expect(crawler.cached).to eq(
|
|
[200, { "X-Discourse-Cached" => "true", "HELLO" => "WORLD" }, ["hello world"]],
|
|
)
|
|
end
|
|
end
|
|
end
|
|
|
|
describe "background request rate limit" do
|
|
it "will rate limit background requests" do
|
|
app = Middleware::AnonymousCache.new(lambda { |env| [200, {}, ["ok"]] })
|
|
|
|
global_setting :background_requests_max_queue_length, "0.5"
|
|
|
|
cookie = create_auth_cookie(token: SecureRandom.hex)
|
|
env =
|
|
create_request_env.merge(
|
|
"HTTP_COOKIE" => "_t=#{cookie}",
|
|
"HOST" => "site.com",
|
|
"REQUEST_METHOD" => "GET",
|
|
"REQUEST_URI" => "/somewhere/rainbow",
|
|
"REQUEST_QUEUE_SECONDS" => 2.1,
|
|
"rack.input" => StringIO.new,
|
|
)
|
|
|
|
# non background ... long request
|
|
env["REQUEST_QUEUE_SECONDS"] = 2
|
|
|
|
status, _ = app.call(env.dup)
|
|
expect(status).to eq(200)
|
|
|
|
env["HTTP_DISCOURSE_BACKGROUND"] = "true"
|
|
|
|
status, headers, body = app.call(env.dup)
|
|
expect(status).to eq(429)
|
|
expect(headers["content-type"]).to eq("application/json; charset=utf-8")
|
|
json = JSON.parse(body.join)
|
|
expect(json["extras"]["wait_seconds"]).to be > 4.9
|
|
|
|
env["REQUEST_QUEUE_SECONDS"] = 0.4
|
|
|
|
status, _ = app.call(env.dup)
|
|
expect(status).to eq(200)
|
|
end
|
|
end
|
|
|
|
describe "#force_anonymous!" do
|
|
before { RateLimiter.enable }
|
|
|
|
use_redis_snapshotting
|
|
|
|
it "will revert to anonymous once we reach the limit" do
|
|
is_anon = false
|
|
|
|
app =
|
|
Middleware::AnonymousCache.new(
|
|
lambda do |env|
|
|
is_anon = env["HTTP_COOKIE"].nil? && env["HTTP_DISCOURSE_LOGGED_IN"].nil?
|
|
[200, {}, ["ok"]]
|
|
end,
|
|
)
|
|
|
|
global_setting :force_anonymous_min_per_10_seconds, 2
|
|
global_setting :force_anonymous_min_queue_seconds, 1
|
|
|
|
cookie = create_auth_cookie(token: SecureRandom.hex)
|
|
env =
|
|
create_request_env.merge(
|
|
"HTTP_COOKIE" => "_t=#{cookie}",
|
|
"HTTP_DISCOURSE_LOGGED_IN" => "true",
|
|
"HOST" => "site.com",
|
|
"REQUEST_METHOD" => "GET",
|
|
"REQUEST_URI" => "/somewhere/rainbow",
|
|
"REQUEST_QUEUE_SECONDS" => 2.1,
|
|
"rack.input" => StringIO.new,
|
|
)
|
|
|
|
is_anon = false
|
|
app.call(env.dup)
|
|
expect(is_anon).to eq(false)
|
|
|
|
is_anon = false
|
|
app.call(env.dup)
|
|
expect(is_anon).to eq(false)
|
|
|
|
is_anon = false
|
|
app.call(env.dup)
|
|
expect(is_anon).to eq(true)
|
|
|
|
is_anon = false
|
|
_status, headers, _body = app.call(env.dup)
|
|
expect(is_anon).to eq(true)
|
|
expect(headers["Set-Cookie"]).to eq("dosp=1; Path=/")
|
|
|
|
# tricky change, a 50ms delay still will trigger protection
|
|
# once it is tripped
|
|
|
|
env["REQUEST_QUEUE_SECONDS"] = 0.05
|
|
is_anon = false
|
|
|
|
app.call(env.dup)
|
|
expect(is_anon).to eq(true)
|
|
|
|
is_anon = false
|
|
env["REQUEST_QUEUE_SECONDS"] = 0.01
|
|
|
|
app.call(env.dup)
|
|
expect(is_anon).to eq(false)
|
|
end
|
|
end
|
|
|
|
describe "invalid request payload" do
|
|
it "returns 413 for GET request with payload" do
|
|
status, headers, _ =
|
|
middleware.call(env.tap { |environment| environment[Rack::RACK_INPUT].write("test") })
|
|
|
|
expect(status).to eq(413)
|
|
expect(headers["Cache-Control"]).to eq("private, max-age=0, must-revalidate")
|
|
end
|
|
end
|
|
|
|
describe "crawler blocking" do
|
|
let :non_crawler do
|
|
{
|
|
"HTTP_USER_AGENT" =>
|
|
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
|
|
}
|
|
end
|
|
|
|
def get(path, options)
|
|
@env =
|
|
env(
|
|
{ "REQUEST_URI" => path, "PATH_INFO" => path, "REQUEST_PATH" => path }.merge(
|
|
options[:headers],
|
|
),
|
|
)
|
|
@status, @response_header, @response = middleware.call(@env)
|
|
end
|
|
|
|
it "applies allowed_crawler_user_agents correctly" do
|
|
SiteSetting.allowed_crawler_user_agents = "Googlebot"
|
|
|
|
get "/", headers: { "HTTP_USER_AGENT" => "Googlebot/2.1 (+http://www.google.com/bot.html)" }
|
|
|
|
expect(@status).to eq(200)
|
|
|
|
get "/",
|
|
headers: {
|
|
"HTTP_USER_AGENT" => "Anotherbot/2.1 (+http://www.notgoogle.com/bot.html)",
|
|
}
|
|
|
|
expect(@status).to eq(403)
|
|
expect(@response).to be_an(Array)
|
|
|
|
get "/", headers: non_crawler
|
|
expect(@status).to eq(200)
|
|
end
|
|
|
|
it "doesn't block api requests" do
|
|
SiteSetting.allowed_crawler_user_agents = "Googlebot"
|
|
api_key = Fabricate(:api_key)
|
|
|
|
get "/latest?api_key=#{api_key.key}&api_username=system",
|
|
headers: {
|
|
"QUERY_STRING" => "api_key=#{api_key.key}&api_username=system",
|
|
}
|
|
expect(@status).to eq(200)
|
|
|
|
get "/latest", headers: { "HTTP_API_KEY" => api_key.key, "HTTP_API_USERNAME" => "system" }
|
|
expect(@status).to eq(200)
|
|
end
|
|
|
|
it "applies blocked_crawler_user_agents correctly" do
|
|
SiteSetting.blocked_crawler_user_agents = "Googlebot"
|
|
|
|
get "/", headers: non_crawler
|
|
expect(@status).to eq(200)
|
|
|
|
get "/", headers: { "HTTP_USER_AGENT" => "Googlebot/2.1 (+http://www.google.com/bot.html)" }
|
|
|
|
expect(@status).to eq(403)
|
|
|
|
expect {
|
|
get "/",
|
|
headers: {
|
|
"HTTP_USER_AGENT" => (+"Evil Googlebot String \xc3\x28").force_encoding("ASCII"),
|
|
}
|
|
|
|
expect(@status).to eq(403)
|
|
}.not_to raise_error
|
|
|
|
get "/",
|
|
headers: {
|
|
"HTTP_USER_AGENT" => "Twitterbot/2.1 (+http://www.notgoogle.com/bot.html)",
|
|
}
|
|
|
|
expect(@status).to eq(200)
|
|
end
|
|
|
|
it "should never block robots.txt" do
|
|
SiteSetting.blocked_crawler_user_agents = "Googlebot"
|
|
|
|
get "/robots.txt",
|
|
headers: {
|
|
"HTTP_USER_AGENT" => "Googlebot/2.1 (+http://www.google.com/bot.html)",
|
|
}
|
|
|
|
expect(@status).to eq(200)
|
|
end
|
|
|
|
it "should never block srv/status" do
|
|
SiteSetting.blocked_crawler_user_agents = "Googlebot"
|
|
|
|
get "/srv/status",
|
|
headers: {
|
|
"HTTP_USER_AGENT" => "Googlebot/2.1 (+http://www.google.com/bot.html)",
|
|
}
|
|
|
|
expect(@status).to eq(200)
|
|
end
|
|
|
|
it "blocked crawlers shouldn't log page views" do
|
|
SiteSetting.blocked_crawler_user_agents = "Googlebot"
|
|
|
|
get "/", headers: { "HTTP_USER_AGENT" => "Googlebot/2.1 (+http://www.google.com/bot.html)" }
|
|
|
|
expect(@env["discourse.request_tracker.skip"]).to eq(true)
|
|
end
|
|
|
|
it "blocks json requests" do
|
|
SiteSetting.blocked_crawler_user_agents = "Googlebot"
|
|
|
|
get "/srv/status.json",
|
|
headers: {
|
|
"HTTP_USER_AGENT" => "Googlebot/2.1 (+http://www.google.com/bot.html)",
|
|
}
|
|
|
|
expect(@status).to eq(403)
|
|
end
|
|
end
|
|
end
|