FEATURE: introduce additional request limit buckets keyed on cloud or crawler identifiers

If we have additional information about the semantic source IP of a request
(i.e. a public cloud or a crawler) then we can group those requests together
under the same bucket.
This commit is contained in:
Michael Brown 2024-11-28 16:27:52 -05:00
parent 19319ceadb
commit 0d0f2edae4
No known key found for this signature in database
GPG Key ID: 6C07FB3007CF9360
2 changed files with 66 additions and 5 deletions

View File

@ -439,12 +439,34 @@ class Middleware::RequestTracker
return :skip if @@ip_skipper&.call(request.ip) return :skip if @@ip_skipper&.call(request.ip)
return :skip if STATIC_IP_SKIPPER&.any? { |entry| entry.include?(request.ip) } return :skip if STATIC_IP_SKIPPER&.any? { |entry| entry.include?(request.ip) }
if cookie && cookie[:user_id] && cookie[:trust_level] && # authenticated user?
cookie[:trust_level] >= GlobalSetting.skip_per_ip_rate_limit_trust_level if cookie && cookie[:user_id] && cookie[:trust_level]
[cookie[:user_id], false, "id", "user"] if cookie[:trust_level] >= GlobalSetting.skip_per_ip_rate_limit_trust_level
else return "id/#{cookie[:user_id]}", false, "id", "user"
[request.ip, true, "ip", "IP address"] else
# if the authenticated user comes from another bucketed source
# (e.g. public cloud) but does not meet skip_per_ip_rate_limit_trust_level,
# we don't want to bucket them along with the rest of the cloud,
# so they still get bucketed by IP
return "ip/#{request.ip}", true, "ip", "IP address"
end
end end
# TODO: authenticated user via API?
src_tag_info = SrcTagInfo.new(request)
# known crawler?
if verified_crawler = src_tag_info.verified_crawler
return "crawler/#{verified_crawler}", false, "crawler", "crawler (#{verified_crawler})"
end
# public cloud?
if verified_cloud = src_tag_info.verified_cloud
return "cloud/#{verified_cloud}", false, "cloud", "public cloud (#{verified_cloud})"
end
# fallback choice: IP
["ip/#{request.ip}", true, "ip", "IP address"]
end end
def rate_limit(request, cookie) def rate_limit(request, cookie)

View File

@ -747,6 +747,45 @@ RSpec.describe Middleware::RequestTracker do
expect(status).to eq(200) expect(status).to eq(200)
end end
context "with src-tag headers" do
ENV["DISCOURSE_HTTP_SRC_TAG_HEADER"] = "src-tag"
ENV["DISCOURSE_HTTP_SRC_TAG_SUPPORTED_HEADER"] = "src-tag-lists"
it "does bucket a public cloud" do
global_setting :max_reqs_per_ip_per_10_seconds, 1
global_setting :max_reqs_per_ip_mode, "block"
env1 = env("REMOTE_ADDR" => "1.1.1.1", "HTTP_SRC_TAG" => "cloud-aws-ec2")
env2 = env("REMOTE_ADDR" => "1.1.1.2", "HTTP_SRC_TAG" => "cloud-aws-ec2")
env3 = env("REMOTE_ADDR" => "1.1.1.3")
status, _ = middleware.call(env1)
expect(status).to eq(200)
status, headers = middleware.call(env2)
expect(status).to eq(429)
expect(headers["Retry-After"]).to eq("10")
expect(headers["Discourse-Rate-Limit-Error-Code"]).to eq("cloud_10_secs_limit")
status, _ = middleware.call(env3)
expect(status).to eq(200)
end
it "does bucket public clouds separately" do
global_setting :max_reqs_per_ip_per_10_seconds, 1
global_setting :max_reqs_per_ip_mode, "block"
env1 = env("REMOTE_ADDR" => "1.1.1.1", "HTTP_SRC_TAG" => "cloud-aws-ec2")
env2 = env("REMOTE_ADDR" => "1.1.1.2", "HTTP_SRC_TAG" => "cloud-gcp")
status, _ = middleware.call(env1)
expect(status).to eq(200)
status, _ = middleware.call(env2)
expect(status).to eq(200)
end
end
describe "diagnostic information" do describe "diagnostic information" do
it "is included when the requests-per-10-seconds limit is reached" do it "is included when the requests-per-10-seconds limit is reached" do
global_setting :max_reqs_per_ip_per_10_seconds, 1 global_setting :max_reqs_per_ip_per_10_seconds, 1