2023-03-31 14:29:56 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module DiscourseAi
|
|
|
|
module Embeddings
|
|
|
|
class EmbeddingsController < ::ApplicationController
|
|
|
|
requires_plugin ::DiscourseAi::PLUGIN_NAME
|
|
|
|
|
|
|
|
SEMANTIC_SEARCH_TYPE = "semantic_search"
|
|
|
|
|
2024-09-04 01:51:01 -04:00
|
|
|
MAX_HYDE_SEARCHES_PER_MINUTE = 4
|
|
|
|
MAX_SEARCHES_PER_MINUTE = 100
|
|
|
|
|
2023-03-31 14:29:56 -04:00
|
|
|
def search
|
2023-09-11 10:32:05 -04:00
|
|
|
query = params[:q].to_s
|
2024-08-30 11:13:29 -04:00
|
|
|
skip_hyde = params[:hyde].to_s.downcase == "false" || params[:hyde].to_s == "0"
|
2023-09-11 10:32:05 -04:00
|
|
|
|
|
|
|
if query.length < SiteSetting.min_search_term_length
|
|
|
|
raise Discourse::InvalidParameters.new(:q)
|
|
|
|
end
|
2023-03-31 14:29:56 -04:00
|
|
|
|
|
|
|
grouped_results =
|
|
|
|
Search::GroupedSearchResults.new(
|
|
|
|
type_filter: SEMANTIC_SEARCH_TYPE,
|
|
|
|
term: query,
|
|
|
|
search_context: guardian,
|
2023-04-03 10:48:38 -04:00
|
|
|
use_pg_headlines_for_excerpt: false,
|
2024-05-13 09:47:37 -04:00
|
|
|
can_lazy_load_categories: guardian.can_lazy_load_categories?,
|
2023-03-31 14:29:56 -04:00
|
|
|
)
|
|
|
|
|
2023-09-05 10:08:23 -04:00
|
|
|
semantic_search = DiscourseAi::Embeddings::SemanticSearch.new(guardian)
|
2023-03-31 14:29:56 -04:00
|
|
|
|
2024-09-04 01:51:01 -04:00
|
|
|
if !skip_hyde && !semantic_search.cached_query?(query)
|
|
|
|
RateLimiter.new(
|
|
|
|
current_user,
|
|
|
|
"semantic-search",
|
|
|
|
MAX_HYDE_SEARCHES_PER_MINUTE,
|
|
|
|
1.minutes,
|
|
|
|
).performed!
|
|
|
|
else
|
|
|
|
RateLimiter.new(
|
|
|
|
current_user,
|
|
|
|
"semantic-search-non-hyde",
|
|
|
|
MAX_SEARCHES_PER_MINUTE,
|
|
|
|
1.minutes,
|
|
|
|
).performed!
|
2023-09-05 10:08:23 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
hijack do
|
|
|
|
semantic_search
|
2024-08-28 00:17:34 -04:00
|
|
|
.search_for_topics(query, _page = 1, hyde: !skip_hyde)
|
2023-09-05 10:08:23 -04:00
|
|
|
.each { |topic_post| grouped_results.add(topic_post) }
|
|
|
|
|
|
|
|
render_serialized(grouped_results, GroupedSearchResultSerializer, result: grouped_results)
|
|
|
|
end
|
2023-03-31 14:29:56 -04:00
|
|
|
end
|
2024-03-08 11:02:50 -05:00
|
|
|
|
|
|
|
def quick_search
|
2024-08-28 00:17:34 -04:00
|
|
|
# this search function searches posts (vs: topics)
|
|
|
|
# it requires post embeddings and a reranker
|
|
|
|
# it will not perform a hyde expantion
|
2024-03-08 11:02:50 -05:00
|
|
|
query = params[:q].to_s
|
|
|
|
|
|
|
|
if query.length < SiteSetting.min_search_term_length
|
|
|
|
raise Discourse::InvalidParameters.new(:q)
|
|
|
|
end
|
|
|
|
|
|
|
|
grouped_results =
|
|
|
|
Search::GroupedSearchResults.new(
|
|
|
|
type_filter: SEMANTIC_SEARCH_TYPE,
|
|
|
|
term: query,
|
|
|
|
search_context: guardian,
|
|
|
|
use_pg_headlines_for_excerpt: false,
|
|
|
|
)
|
|
|
|
|
|
|
|
semantic_search = DiscourseAi::Embeddings::SemanticSearch.new(guardian)
|
|
|
|
|
|
|
|
if !semantic_search.cached_query?(query)
|
|
|
|
RateLimiter.new(current_user, "semantic-search", 60, 1.minutes).performed!
|
|
|
|
end
|
|
|
|
|
|
|
|
hijack do
|
|
|
|
semantic_search.quick_search(query).each { |topic_post| grouped_results.add(topic_post) }
|
|
|
|
|
|
|
|
render_serialized(grouped_results, GroupedSearchResultSerializer, result: grouped_results)
|
|
|
|
end
|
|
|
|
end
|
2023-03-31 14:29:56 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|