FEATURE: disable rate limiting when skipping hyde (#793)
Embedding search is rate limited due to potentially expensive hyde operation (which require LLM access). Embedding generally is very cheap compared to it. (usually 100x cheaper) This raises the limit to 100 per minute for embedding searches, while keeping the old 4 per minute for HyDE powered search.
This commit is contained in:
parent
c4c9dc2034
commit
cabecb801e
|
@ -7,6 +7,9 @@ module DiscourseAi
|
||||||
|
|
||||||
SEMANTIC_SEARCH_TYPE = "semantic_search"
|
SEMANTIC_SEARCH_TYPE = "semantic_search"
|
||||||
|
|
||||||
|
MAX_HYDE_SEARCHES_PER_MINUTE = 4
|
||||||
|
MAX_SEARCHES_PER_MINUTE = 100
|
||||||
|
|
||||||
def search
|
def search
|
||||||
query = params[:q].to_s
|
query = params[:q].to_s
|
||||||
skip_hyde = params[:hyde].to_s.downcase == "false" || params[:hyde].to_s == "0"
|
skip_hyde = params[:hyde].to_s.downcase == "false" || params[:hyde].to_s == "0"
|
||||||
|
@ -26,8 +29,20 @@ module DiscourseAi
|
||||||
|
|
||||||
semantic_search = DiscourseAi::Embeddings::SemanticSearch.new(guardian)
|
semantic_search = DiscourseAi::Embeddings::SemanticSearch.new(guardian)
|
||||||
|
|
||||||
if !semantic_search.cached_query?(query)
|
if !skip_hyde && !semantic_search.cached_query?(query)
|
||||||
RateLimiter.new(current_user, "semantic-search", 4, 1.minutes).performed!
|
RateLimiter.new(
|
||||||
|
current_user,
|
||||||
|
"semantic-search",
|
||||||
|
MAX_HYDE_SEARCHES_PER_MINUTE,
|
||||||
|
1.minutes,
|
||||||
|
).performed!
|
||||||
|
else
|
||||||
|
RateLimiter.new(
|
||||||
|
current_user,
|
||||||
|
"semantic-search-non-hyde",
|
||||||
|
MAX_SEARCHES_PER_MINUTE,
|
||||||
|
1.minutes,
|
||||||
|
).performed!
|
||||||
end
|
end
|
||||||
|
|
||||||
hijack do
|
hijack do
|
||||||
|
|
|
@ -60,6 +60,19 @@ describe DiscourseAi::Embeddings::EmbeddingsController do
|
||||||
expect(response.parsed_body["topics"].map { |t| t["id"] }).to contain_exactly(topic.id)
|
expect(response.parsed_body["topics"].map { |t| t["id"] }).to contain_exactly(topic.id)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context "when rate limiting is enabled" do
|
||||||
|
before { RateLimiter.enable }
|
||||||
|
|
||||||
|
it "will not rate limit API for hyde search" do
|
||||||
|
10.times do |i|
|
||||||
|
query = "test #{SecureRandom.hex}"
|
||||||
|
stub_embedding(query)
|
||||||
|
get "/discourse-ai/embeddings/semantic-search.json?q=#{query}&hyde=false"
|
||||||
|
expect(response.status).to eq(200)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
it "returns results correctly when performing a non Hyde search" do
|
it "returns results correctly when performing a non Hyde search" do
|
||||||
index(topic)
|
index(topic)
|
||||||
index(topic_in_subcategory)
|
index(topic_in_subcategory)
|
||||||
|
|
Loading…
Reference in New Issue