mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-02-16 16:34:45 +00:00
FIX: Proper flow when a topic doesn't have embeddings (#20)
This commit is contained in:
parent
fea9041ee1
commit
6bdbc0e32d
@ -23,17 +23,11 @@ module DiscourseAi
|
|||||||
Discourse
|
Discourse
|
||||||
.cache
|
.cache
|
||||||
.fetch("semantic-suggested-topic-#{topic.id}", expires_in: cache_for) do
|
.fetch("semantic-suggested-topic-#{topic.id}", expires_in: cache_for) do
|
||||||
suggested = search_suggestions(topic)
|
search_suggestions(topic)
|
||||||
|
|
||||||
# Happens when the topic doesn't have any embeddings
|
|
||||||
if suggested.empty? || !suggested.include?(topic.id)
|
|
||||||
return { result: [], params: {} }
|
|
||||||
end
|
|
||||||
|
|
||||||
suggested
|
|
||||||
end
|
end
|
||||||
rescue StandardError => e
|
rescue StandardError => e
|
||||||
Rails.logger.error("SemanticSuggested: #{e}")
|
Rails.logger.error("SemanticSuggested: #{e}")
|
||||||
|
return { result: [], params: {} }
|
||||||
end
|
end
|
||||||
|
|
||||||
# array_position forces the order of the topics to be preserved
|
# array_position forces the order of the topics to be preserved
|
||||||
@ -49,7 +43,8 @@ module DiscourseAi
|
|||||||
function =
|
function =
|
||||||
DiscourseAi::Embeddings::Models::SEARCH_FUNCTION_TO_PG_FUNCTION[model.functions.first]
|
DiscourseAi::Embeddings::Models::SEARCH_FUNCTION_TO_PG_FUNCTION[model.functions.first]
|
||||||
|
|
||||||
DiscourseAi::Database::Connection.db.query(<<~SQL, topic_id: topic.id).map(&:topic_id)
|
candidate_ids =
|
||||||
|
DiscourseAi::Database::Connection.db.query(<<~SQL, topic_id: topic.id).map(&:topic_id)
|
||||||
SELECT
|
SELECT
|
||||||
topic_id
|
topic_id
|
||||||
FROM
|
FROM
|
||||||
@ -66,6 +61,14 @@ module DiscourseAi
|
|||||||
)
|
)
|
||||||
LIMIT 11
|
LIMIT 11
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
|
# Happens when the topic doesn't have any embeddings
|
||||||
|
# I'd rather not use Exceptions to control the flow, so this should be refactored soon
|
||||||
|
if candidate_ids.empty? || !candidate_ids.include?(topic.id)
|
||||||
|
raise StandardError, "No embeddings found for topic #{topic.id}"
|
||||||
|
end
|
||||||
|
|
||||||
|
candidate_ids
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -2,6 +2,10 @@
|
|||||||
|
|
||||||
desc "Creates tables to store embeddings"
|
desc "Creates tables to store embeddings"
|
||||||
task "ai:embeddings:create_table" => [:environment] do
|
task "ai:embeddings:create_table" => [:environment] do
|
||||||
|
DiscourseAi::Database::Connection.db.exec(<<~SQL)
|
||||||
|
CREATE EXTENSION IF NOT EXISTS pg_vector;
|
||||||
|
SQL
|
||||||
|
|
||||||
DiscourseAi::Embeddings::Models.enabled_models.each do |model|
|
DiscourseAi::Embeddings::Models.enabled_models.each do |model|
|
||||||
DiscourseAi::Database::Connection.db.exec(<<~SQL)
|
DiscourseAi::Database::Connection.db.exec(<<~SQL)
|
||||||
CREATE TABLE IF NOT EXISTS topic_embeddings_#{model.name.underscore} (
|
CREATE TABLE IF NOT EXISTS topic_embeddings_#{model.name.underscore} (
|
||||||
@ -25,12 +29,13 @@ task "ai:embeddings:backfill" => [:environment] do
|
|||||||
end
|
end
|
||||||
|
|
||||||
desc "Creates indexes for embeddings"
|
desc "Creates indexes for embeddings"
|
||||||
task "ai:embeddings:index" => [:environment] do
|
task "ai:embeddings:index", [:work_mem] => [:environment] do |_, args|
|
||||||
# Using 4 * sqrt(number of topics) as a rule of thumb for now
|
# Using 4 * sqrt(number of topics) as a rule of thumb for now
|
||||||
# Results are not as good as without indexes, but it's much faster
|
# Results are not as good as without indexes, but it's much faster
|
||||||
# Disk usage is ~1x the size of the table, so this double table total size
|
# Disk usage is ~1x the size of the table, so this double table total size
|
||||||
lists = 4 * Math.sqrt(Topic.count).to_i
|
lists = 4 * Math.sqrt(Topic.count).to_i
|
||||||
|
|
||||||
|
DiscourseAi::Database::Connection.db.exec("SET work_mem TO '#{args[:work_mem] || "1GB"}';")
|
||||||
DiscourseAi::Embeddings::Models.enabled_models.each do |model|
|
DiscourseAi::Embeddings::Models.enabled_models.each do |model|
|
||||||
DiscourseAi::Database::Connection.db.exec(<<~SQL)
|
DiscourseAi::Database::Connection.db.exec(<<~SQL)
|
||||||
CREATE INDEX IF NOT EXISTS
|
CREATE INDEX IF NOT EXISTS
|
||||||
@ -42,5 +47,6 @@ task "ai:embeddings:index" => [:environment] do
|
|||||||
WITH
|
WITH
|
||||||
(lists = #{lists});
|
(lists = #{lists});
|
||||||
SQL
|
SQL
|
||||||
|
DiscourseAi::Database::Connection.db.exec("RESET work_mem;")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
Loading…
x
Reference in New Issue
Block a user