diff --git a/lib/modules/embeddings/semantic_suggested.rb b/lib/modules/embeddings/semantic_suggested.rb index 27e3d4a5..410a56a9 100644 --- a/lib/modules/embeddings/semantic_suggested.rb +++ b/lib/modules/embeddings/semantic_suggested.rb @@ -23,17 +23,11 @@ module DiscourseAi Discourse .cache .fetch("semantic-suggested-topic-#{topic.id}", expires_in: cache_for) do - suggested = search_suggestions(topic) - - # Happens when the topic doesn't have any embeddings - if suggested.empty? || !suggested.include?(topic.id) - return { result: [], params: {} } - end - - suggested + search_suggestions(topic) end rescue StandardError => e Rails.logger.error("SemanticSuggested: #{e}") + return { result: [], params: {} } end # array_position forces the order of the topics to be preserved @@ -49,7 +43,8 @@ module DiscourseAi function = DiscourseAi::Embeddings::Models::SEARCH_FUNCTION_TO_PG_FUNCTION[model.functions.first] - DiscourseAi::Database::Connection.db.query(<<~SQL, topic_id: topic.id).map(&:topic_id) + candidate_ids = + DiscourseAi::Database::Connection.db.query(<<~SQL, topic_id: topic.id).map(&:topic_id) SELECT topic_id FROM @@ -66,6 +61,14 @@ module DiscourseAi ) LIMIT 11 SQL + + # Happens when the topic doesn't have any embeddings + # I'd rather not use Exceptions to control the flow, so this should be refactored soon + if candidate_ids.empty? || !candidate_ids.include?(topic.id) + raise StandardError, "No embeddings found for topic #{topic.id}" + end + + candidate_ids end end end diff --git a/lib/tasks/modules/embeddings/database.rake b/lib/tasks/modules/embeddings/database.rake index cee4d3f3..61b3e167 100644 --- a/lib/tasks/modules/embeddings/database.rake +++ b/lib/tasks/modules/embeddings/database.rake @@ -2,6 +2,10 @@ desc "Creates tables to store embeddings" task "ai:embeddings:create_table" => [:environment] do + DiscourseAi::Database::Connection.db.exec(<<~SQL) + CREATE EXTENSION IF NOT EXISTS pg_vector; + SQL + DiscourseAi::Embeddings::Models.enabled_models.each do |model| DiscourseAi::Database::Connection.db.exec(<<~SQL) CREATE TABLE IF NOT EXISTS topic_embeddings_#{model.name.underscore} ( @@ -25,12 +29,13 @@ task "ai:embeddings:backfill" => [:environment] do end desc "Creates indexes for embeddings" -task "ai:embeddings:index" => [:environment] do +task "ai:embeddings:index", [:work_mem] => [:environment] do |_, args| # Using 4 * sqrt(number of topics) as a rule of thumb for now # Results are not as good as without indexes, but it's much faster # Disk usage is ~1x the size of the table, so this double table total size lists = 4 * Math.sqrt(Topic.count).to_i + DiscourseAi::Database::Connection.db.exec("SET work_mem TO '#{args[:work_mem] || "1GB"}';") DiscourseAi::Embeddings::Models.enabled_models.each do |model| DiscourseAi::Database::Connection.db.exec(<<~SQL) CREATE INDEX IF NOT EXISTS @@ -42,5 +47,6 @@ task "ai:embeddings:index" => [:environment] do WITH (lists = #{lists}); SQL + DiscourseAi::Database::Connection.db.exec("RESET work_mem;") end end