FIX: Proper flow when a topic doesn't have embeddings (#20)
This commit is contained in:
parent
fea9041ee1
commit
6bdbc0e32d
|
@ -23,17 +23,11 @@ module DiscourseAi
|
|||
Discourse
|
||||
.cache
|
||||
.fetch("semantic-suggested-topic-#{topic.id}", expires_in: cache_for) do
|
||||
suggested = search_suggestions(topic)
|
||||
|
||||
# Happens when the topic doesn't have any embeddings
|
||||
if suggested.empty? || !suggested.include?(topic.id)
|
||||
return { result: [], params: {} }
|
||||
end
|
||||
|
||||
suggested
|
||||
search_suggestions(topic)
|
||||
end
|
||||
rescue StandardError => e
|
||||
Rails.logger.error("SemanticSuggested: #{e}")
|
||||
return { result: [], params: {} }
|
||||
end
|
||||
|
||||
# array_position forces the order of the topics to be preserved
|
||||
|
@ -49,7 +43,8 @@ module DiscourseAi
|
|||
function =
|
||||
DiscourseAi::Embeddings::Models::SEARCH_FUNCTION_TO_PG_FUNCTION[model.functions.first]
|
||||
|
||||
DiscourseAi::Database::Connection.db.query(<<~SQL, topic_id: topic.id).map(&:topic_id)
|
||||
candidate_ids =
|
||||
DiscourseAi::Database::Connection.db.query(<<~SQL, topic_id: topic.id).map(&:topic_id)
|
||||
SELECT
|
||||
topic_id
|
||||
FROM
|
||||
|
@ -66,6 +61,14 @@ module DiscourseAi
|
|||
)
|
||||
LIMIT 11
|
||||
SQL
|
||||
|
||||
# Happens when the topic doesn't have any embeddings
|
||||
# I'd rather not use Exceptions to control the flow, so this should be refactored soon
|
||||
if candidate_ids.empty? || !candidate_ids.include?(topic.id)
|
||||
raise StandardError, "No embeddings found for topic #{topic.id}"
|
||||
end
|
||||
|
||||
candidate_ids
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -2,6 +2,10 @@
|
|||
|
||||
desc "Creates tables to store embeddings"
|
||||
task "ai:embeddings:create_table" => [:environment] do
|
||||
DiscourseAi::Database::Connection.db.exec(<<~SQL)
|
||||
CREATE EXTENSION IF NOT EXISTS pg_vector;
|
||||
SQL
|
||||
|
||||
DiscourseAi::Embeddings::Models.enabled_models.each do |model|
|
||||
DiscourseAi::Database::Connection.db.exec(<<~SQL)
|
||||
CREATE TABLE IF NOT EXISTS topic_embeddings_#{model.name.underscore} (
|
||||
|
@ -25,12 +29,13 @@ task "ai:embeddings:backfill" => [:environment] do
|
|||
end
|
||||
|
||||
desc "Creates indexes for embeddings"
|
||||
task "ai:embeddings:index" => [:environment] do
|
||||
task "ai:embeddings:index", [:work_mem] => [:environment] do |_, args|
|
||||
# Using 4 * sqrt(number of topics) as a rule of thumb for now
|
||||
# Results are not as good as without indexes, but it's much faster
|
||||
# Disk usage is ~1x the size of the table, so this double table total size
|
||||
lists = 4 * Math.sqrt(Topic.count).to_i
|
||||
|
||||
DiscourseAi::Database::Connection.db.exec("SET work_mem TO '#{args[:work_mem] || "1GB"}';")
|
||||
DiscourseAi::Embeddings::Models.enabled_models.each do |model|
|
||||
DiscourseAi::Database::Connection.db.exec(<<~SQL)
|
||||
CREATE INDEX IF NOT EXISTS
|
||||
|
@ -42,5 +47,6 @@ task "ai:embeddings:index" => [:environment] do
|
|||
WITH
|
||||
(lists = #{lists});
|
||||
SQL
|
||||
DiscourseAi::Database::Connection.db.exec("RESET work_mem;")
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue