From cabecb801e2720148f8579220b163546ee081007 Mon Sep 17 00:00:00 2001 From: Sam Date: Wed, 4 Sep 2024 15:51:01 +1000 Subject: [PATCH] FEATURE: disable rate limiting when skipping hyde (#793) Embedding search is rate limited due to potentially expensive hyde operation (which require LLM access). Embedding generally is very cheap compared to it. (usually 100x cheaper) This raises the limit to 100 per minute for embedding searches, while keeping the old 4 per minute for HyDE powered search. --- .../embeddings/embeddings_controller.rb | 19 +++++++++++++++++-- .../embeddings/embeddings_controller_spec.rb | 13 +++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/app/controllers/discourse_ai/embeddings/embeddings_controller.rb b/app/controllers/discourse_ai/embeddings/embeddings_controller.rb index d2aafce8..1a49db3b 100644 --- a/app/controllers/discourse_ai/embeddings/embeddings_controller.rb +++ b/app/controllers/discourse_ai/embeddings/embeddings_controller.rb @@ -7,6 +7,9 @@ module DiscourseAi SEMANTIC_SEARCH_TYPE = "semantic_search" + MAX_HYDE_SEARCHES_PER_MINUTE = 4 + MAX_SEARCHES_PER_MINUTE = 100 + def search query = params[:q].to_s skip_hyde = params[:hyde].to_s.downcase == "false" || params[:hyde].to_s == "0" @@ -26,8 +29,20 @@ module DiscourseAi semantic_search = DiscourseAi::Embeddings::SemanticSearch.new(guardian) - if !semantic_search.cached_query?(query) - RateLimiter.new(current_user, "semantic-search", 4, 1.minutes).performed! + if !skip_hyde && !semantic_search.cached_query?(query) + RateLimiter.new( + current_user, + "semantic-search", + MAX_HYDE_SEARCHES_PER_MINUTE, + 1.minutes, + ).performed! + else + RateLimiter.new( + current_user, + "semantic-search-non-hyde", + MAX_SEARCHES_PER_MINUTE, + 1.minutes, + ).performed! end hijack do diff --git a/spec/requests/embeddings/embeddings_controller_spec.rb b/spec/requests/embeddings/embeddings_controller_spec.rb index d292996f..0542d2ea 100644 --- a/spec/requests/embeddings/embeddings_controller_spec.rb +++ b/spec/requests/embeddings/embeddings_controller_spec.rb @@ -60,6 +60,19 @@ describe DiscourseAi::Embeddings::EmbeddingsController do expect(response.parsed_body["topics"].map { |t| t["id"] }).to contain_exactly(topic.id) end + context "when rate limiting is enabled" do + before { RateLimiter.enable } + + it "will not rate limit API for hyde search" do + 10.times do |i| + query = "test #{SecureRandom.hex}" + stub_embedding(query) + get "/discourse-ai/embeddings/semantic-search.json?q=#{query}&hyde=false" + expect(response.status).to eq(200) + end + end + end + it "returns results correctly when performing a non Hyde search" do index(topic) index(topic_in_subcategory)