2023-03-31 15:29:56 -03:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module DiscourseAi
|
|
|
|
module Embeddings
|
|
|
|
class SemanticSearch
|
2023-09-05 11:08:23 -03:00
|
|
|
def self.clear_cache_for(query)
|
|
|
|
digest = OpenSSL::Digest::SHA1.hexdigest(query)
|
|
|
|
|
|
|
|
Discourse.cache.delete("hyde-doc-#{digest}")
|
|
|
|
Discourse.cache.delete("hyde-doc-embedding-#{digest}")
|
|
|
|
end
|
|
|
|
|
2023-07-13 12:41:36 -03:00
|
|
|
def initialize(guardian)
|
2023-03-31 15:29:56 -03:00
|
|
|
@guardian = guardian
|
2023-09-05 11:08:23 -03:00
|
|
|
end
|
|
|
|
|
|
|
|
def cached_query?(query)
|
|
|
|
digest = OpenSSL::Digest::SHA1.hexdigest(query)
|
|
|
|
Discourse.cache.read("hyde-doc-embedding-#{digest}").present?
|
2023-03-31 15:29:56 -03:00
|
|
|
end
|
|
|
|
|
|
|
|
def search_for_topics(query, page = 1)
|
2023-09-05 11:08:23 -03:00
|
|
|
max_results_per_page = 50
|
|
|
|
limit = [Search.per_filter, max_results_per_page].min + 1
|
|
|
|
offset = (page - 1) * limit
|
|
|
|
|
|
|
|
strategy = DiscourseAi::Embeddings::Strategies::Truncation.new
|
|
|
|
vector_rep =
|
|
|
|
DiscourseAi::Embeddings::VectorRepresentations::Base.current_representation(strategy)
|
|
|
|
|
|
|
|
digest = OpenSSL::Digest::SHA1.hexdigest(query)
|
2023-03-31 15:29:56 -03:00
|
|
|
|
2023-09-05 11:08:23 -03:00
|
|
|
hypothetical_post =
|
|
|
|
Discourse
|
|
|
|
.cache
|
|
|
|
.fetch("hyde-doc-#{digest}", expires_in: 1.week) do
|
|
|
|
hyde_generator = DiscourseAi::Embeddings::HydeGenerators::Base.current_hyde_model.new
|
|
|
|
hyde_generator.hypothetical_post_from(query)
|
|
|
|
end
|
|
|
|
|
|
|
|
hypothetical_post_embedding =
|
|
|
|
Discourse
|
|
|
|
.cache
|
|
|
|
.fetch("hyde-doc-embedding-#{digest}", expires_in: 1.week) do
|
|
|
|
vector_rep.vector_from(hypothetical_post)
|
|
|
|
end
|
|
|
|
|
|
|
|
candidate_topic_ids =
|
|
|
|
vector_rep.asymmetric_topics_similarity_search(
|
|
|
|
hypothetical_post_embedding,
|
|
|
|
limit: limit,
|
|
|
|
offset: offset,
|
|
|
|
)
|
2023-03-31 15:29:56 -03:00
|
|
|
|
|
|
|
::Post
|
|
|
|
.where(post_type: ::Topic.visible_post_types(guardian.user))
|
|
|
|
.public_posts
|
|
|
|
.where("topics.visible")
|
2023-09-05 11:08:23 -03:00
|
|
|
.where(topic_id: candidate_topic_ids, post_number: 1)
|
|
|
|
.order("array_position(ARRAY#{candidate_topic_ids}, topic_id)")
|
2023-07-13 12:41:36 -03:00
|
|
|
end
|
|
|
|
|
2023-03-31 15:29:56 -03:00
|
|
|
private
|
|
|
|
|
2023-09-05 11:08:23 -03:00
|
|
|
attr_reader :guardian
|
2023-03-31 15:29:56 -03:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|