discourse-ai/lib/modules/embeddings/semantic_related.rb

116 lines
3.6 KiB
Ruby

# frozen_string_literal: true
module DiscourseAi
module Embeddings
class SemanticRelated
MissingEmbeddingError = Class.new(StandardError)
class << self
def semantic_suggested_key(topic_id)
"semantic-suggested-topic-#{topic_id}"
end
def build_semantic_suggested_key(topic_id)
"build-semantic-suggested-topic-#{topic_id}"
end
def clear_cache_for(topic)
Discourse.cache.delete(semantic_suggested_key(topic.id))
Discourse.redis.del(build_semantic_suggested_key(topic.id))
end
def candidates_for(topic)
return ::Topic.none if SiteSetting.ai_embeddings_semantic_related_topics < 1
manager = DiscourseAi::Embeddings::Manager.new(topic)
cache_for =
case topic.created_at
when 6.hour.ago..Time.now
15.minutes
when 1.day.ago..6.hour.ago
1.hour
else
1.day
end
begin
candidate_ids =
Discourse
.cache
.fetch(semantic_suggested_key(topic.id), expires_in: cache_for) do
self.symmetric_semantic_search(manager)
end
rescue MissingEmbeddingError
# avoid a flood of jobs when visiting topic
if Discourse.redis.set(
build_semantic_suggested_key(topic.id),
"queued",
ex: 15.minutes.to_i,
nx: true,
)
Jobs.enqueue(:generate_embeddings, topic_id: topic.id)
end
return ::Topic.none
end
topic_list = ::Topic.visible.listable_topics.secured
unless SiteSetting.ai_embeddings_semantic_related_include_closed_topics
topic_list = topic_list.where(closed: false)
end
topic_list
.where("id <> ?", topic.id)
.where(id: candidate_ids)
# array_position forces the order of the topics to be preserved
.order("array_position(ARRAY#{candidate_ids}, id)")
.limit(SiteSetting.ai_embeddings_semantic_related_topics)
end
def symmetric_semantic_search(manager)
topic = manager.target
candidate_ids = self.query_symmetric_embeddings(manager)
# Happens when the topic doesn't have any embeddings
# I'd rather not use Exceptions to control the flow, so this should be refactored soon
if candidate_ids.empty? || !candidate_ids.include?(topic.id)
raise MissingEmbeddingError, "No embeddings found for topic #{topic.id}"
end
candidate_ids
end
def query_symmetric_embeddings(manager)
topic = manager.target
model = manager.model
table = manager.topic_embeddings_table
begin
DB.query(<<~SQL, topic_id: topic.id).map(&:topic_id)
SELECT
topic_id
FROM
#{table}
ORDER BY
embeddings #{model.pg_function} (
SELECT
embeddings
FROM
#{table}
WHERE
topic_id = :topic_id
LIMIT 1
)
LIMIT 100
SQL
rescue PG::Error => e
Rails.logger.error(
"Error #{e} querying embeddings for topic #{topic.id} and model #{model.name}",
)
raise MissingEmbeddingError
end
end
end
end
end
end