FEATURE: disable rate limiting when skipping hyde (#793)

Embedding search is rate limited due to potentially expensive
hyde operation (which require LLM access).

Embedding generally is very cheap compared to it. (usually 100x cheaper)

This raises the limit to 100 per minute for embedding searches,
while keeping the old 4 per minute for HyDE powered search.
This commit is contained in:
Sam 2024-09-04 15:51:01 +10:00 committed by GitHub
parent c4c9dc2034
commit cabecb801e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 30 additions and 2 deletions

View File

@ -7,6 +7,9 @@ module DiscourseAi
SEMANTIC_SEARCH_TYPE = "semantic_search"
MAX_HYDE_SEARCHES_PER_MINUTE = 4
MAX_SEARCHES_PER_MINUTE = 100
def search
query = params[:q].to_s
skip_hyde = params[:hyde].to_s.downcase == "false" || params[:hyde].to_s == "0"
@ -26,8 +29,20 @@ module DiscourseAi
semantic_search = DiscourseAi::Embeddings::SemanticSearch.new(guardian)
if !semantic_search.cached_query?(query)
RateLimiter.new(current_user, "semantic-search", 4, 1.minutes).performed!
if !skip_hyde && !semantic_search.cached_query?(query)
RateLimiter.new(
current_user,
"semantic-search",
MAX_HYDE_SEARCHES_PER_MINUTE,
1.minutes,
).performed!
else
RateLimiter.new(
current_user,
"semantic-search-non-hyde",
MAX_SEARCHES_PER_MINUTE,
1.minutes,
).performed!
end
hijack do

View File

@ -60,6 +60,19 @@ describe DiscourseAi::Embeddings::EmbeddingsController do
expect(response.parsed_body["topics"].map { |t| t["id"] }).to contain_exactly(topic.id)
end
context "when rate limiting is enabled" do
before { RateLimiter.enable }
it "will not rate limit API for hyde search" do
10.times do |i|
query = "test #{SecureRandom.hex}"
stub_embedding(query)
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}&hyde=false"
expect(response.status).to eq(200)
end
end
end
it "returns results correctly when performing a non Hyde search" do
index(topic)
index(topic_in_subcategory)