FIX: ensure we only attempt embedding once every 15 minutes (#76)
This also heavily reduced log noise and ensures our exception handling is more surgical.
This commit is contained in:
parent
074d00ca32
commit
b82fc1e692
|
@ -3,6 +3,19 @@
|
||||||
module DiscourseAi
|
module DiscourseAi
|
||||||
module Embeddings
|
module Embeddings
|
||||||
class SemanticRelated
|
class SemanticRelated
|
||||||
|
def self.semantic_suggested_key(topic_id)
|
||||||
|
"semantic-suggested-topic-#{topic_id}"
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.build_semantic_suggested_key(topic_id)
|
||||||
|
"build-semantic-suggested-topic-#{topic_id}"
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.clear_cache_for(topic)
|
||||||
|
Discourse.cache.delete(semantic_suggested_key(topic.id))
|
||||||
|
Discourse.redis.del(build_semantic_suggested_key(topic.id))
|
||||||
|
end
|
||||||
|
|
||||||
def self.candidates_for(topic)
|
def self.candidates_for(topic)
|
||||||
return ::Topic.none if SiteSetting.ai_embeddings_semantic_related_topics < 1
|
return ::Topic.none if SiteSetting.ai_embeddings_semantic_related_topics < 1
|
||||||
|
|
||||||
|
@ -25,12 +38,19 @@ module DiscourseAi
|
||||||
candidate_ids =
|
candidate_ids =
|
||||||
Discourse
|
Discourse
|
||||||
.cache
|
.cache
|
||||||
.fetch("semantic-suggested-topic-#{topic.id}", expires_in: cache_for) do
|
.fetch(semantic_suggested_key(topic.id), expires_in: cache_for) do
|
||||||
DiscourseAi::Embeddings::Topic.new.symmetric_semantic_search(model, topic)
|
DiscourseAi::Embeddings::Topic.new.symmetric_semantic_search(model, topic)
|
||||||
end
|
end
|
||||||
rescue StandardError => e
|
rescue MissingEmbeddingError
|
||||||
Rails.logger.error("SemanticRelated: #{e}")
|
# avoid a flood of jobs when visiting topic
|
||||||
Jobs.enqueue(:generate_embeddings, topic_id: topic.id)
|
if Discourse.redis.set(
|
||||||
|
build_semantic_suggested_key(topic.id),
|
||||||
|
"queued",
|
||||||
|
ex: 15.minutes.to_i,
|
||||||
|
nx: true,
|
||||||
|
)
|
||||||
|
Jobs.enqueue(:generate_embeddings, topic_id: topic.id)
|
||||||
|
end
|
||||||
return ::Topic.none
|
return ::Topic.none
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,8 @@
|
||||||
|
|
||||||
module DiscourseAi
|
module DiscourseAi
|
||||||
module Embeddings
|
module Embeddings
|
||||||
|
MissingEmbeddingError = Class.new(StandardError)
|
||||||
|
|
||||||
class Topic
|
class Topic
|
||||||
def generate_and_store_embeddings_for(topic)
|
def generate_and_store_embeddings_for(topic)
|
||||||
return unless SiteSetting.ai_embeddings_enabled
|
return unless SiteSetting.ai_embeddings_enabled
|
||||||
|
@ -17,29 +19,12 @@ module DiscourseAi
|
||||||
end
|
end
|
||||||
|
|
||||||
def symmetric_semantic_search(model, topic)
|
def symmetric_semantic_search(model, topic)
|
||||||
candidate_ids =
|
candidate_ids = query_symmetric_embeddings(model, topic)
|
||||||
DiscourseAi::Database::Connection.db.query(<<~SQL, topic_id: topic.id).map(&:topic_id)
|
|
||||||
SELECT
|
|
||||||
topic_id
|
|
||||||
FROM
|
|
||||||
topic_embeddings_#{model.name.underscore}
|
|
||||||
ORDER BY
|
|
||||||
embedding #{model.pg_function} (
|
|
||||||
SELECT
|
|
||||||
embedding
|
|
||||||
FROM
|
|
||||||
topic_embeddings_#{model.name.underscore}
|
|
||||||
WHERE
|
|
||||||
topic_id = :topic_id
|
|
||||||
LIMIT 1
|
|
||||||
)
|
|
||||||
LIMIT 100
|
|
||||||
SQL
|
|
||||||
|
|
||||||
# Happens when the topic doesn't have any embeddings
|
# Happens when the topic doesn't have any embeddings
|
||||||
# I'd rather not use Exceptions to control the flow, so this should be refactored soon
|
# I'd rather not use Exceptions to control the flow, so this should be refactored soon
|
||||||
if candidate_ids.empty? || !candidate_ids.include?(topic.id)
|
if candidate_ids.empty? || !candidate_ids.include?(topic.id)
|
||||||
raise StandardError, "No embeddings found for topic #{topic.id}"
|
raise MissingEmbeddingError, "No embeddings found for topic #{topic.id}"
|
||||||
end
|
end
|
||||||
|
|
||||||
candidate_ids
|
candidate_ids
|
||||||
|
@ -70,6 +55,26 @@ module DiscourseAi
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
|
def query_symmetric_embeddings(model, topic)
|
||||||
|
DiscourseAi::Database::Connection.db.query(<<~SQL, topic_id: topic.id).map(&:topic_id)
|
||||||
|
SELECT
|
||||||
|
topic_id
|
||||||
|
FROM
|
||||||
|
topic_embeddings_#{model.name.underscore}
|
||||||
|
ORDER BY
|
||||||
|
embedding #{model.pg_function} (
|
||||||
|
SELECT
|
||||||
|
embedding
|
||||||
|
FROM
|
||||||
|
topic_embeddings_#{model.name.underscore}
|
||||||
|
WHERE
|
||||||
|
topic_id = :topic_id
|
||||||
|
LIMIT 1
|
||||||
|
)
|
||||||
|
LIMIT 100
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
|
||||||
def persist_embedding(topic, model, embedding)
|
def persist_embedding(topic, model, embedding)
|
||||||
DiscourseAi::Database::Connection.db.exec(<<~SQL, topic_id: topic.id, embedding: embedding)
|
DiscourseAi::Database::Connection.db.exec(<<~SQL, topic_id: topic.id, embedding: embedding)
|
||||||
INSERT INTO topic_embeddings_#{model.name.underscore} (topic_id, embedding)
|
INSERT INTO topic_embeddings_#{model.name.underscore} (topic_id, embedding)
|
||||||
|
|
|
@ -16,33 +16,65 @@ describe DiscourseAi::Embeddings::SemanticRelated do
|
||||||
before { SiteSetting.ai_embeddings_semantic_related_topics_enabled = true }
|
before { SiteSetting.ai_embeddings_semantic_related_topics_enabled = true }
|
||||||
|
|
||||||
describe "#candidates_for" do
|
describe "#candidates_for" do
|
||||||
before do
|
context "when embeddings do not exist" do
|
||||||
Discourse.cache.clear
|
let (:topic) do
|
||||||
DiscourseAi::Embeddings::Topic
|
topic = Fabricate(:topic)
|
||||||
.any_instance
|
described_class.clear_cache_for(target)
|
||||||
.expects(:symmetric_semantic_search)
|
topic
|
||||||
.returns(Topic.unscoped.order(id: :desc).limit(100).pluck(:id))
|
end
|
||||||
|
|
||||||
|
it "queues job only once per 15 minutes" do
|
||||||
|
# sadly we need to mock a DB connection to return nothing
|
||||||
|
DiscourseAi::Embeddings::Topic
|
||||||
|
.any_instance
|
||||||
|
.expects(:query_symmetric_embeddings)
|
||||||
|
.returns([])
|
||||||
|
.twice
|
||||||
|
|
||||||
|
results = nil
|
||||||
|
|
||||||
|
expect_enqueued_with(job: :generate_embeddings, args: { topic_id: topic.id }) do
|
||||||
|
results = described_class.candidates_for(topic).to_a
|
||||||
|
end
|
||||||
|
|
||||||
|
expect(results).to eq([])
|
||||||
|
|
||||||
|
expect_not_enqueued_with(job: :generate_embeddings, args: { topic_id: topic.id }) do
|
||||||
|
results = described_class.candidates_for(topic).to_a
|
||||||
|
end
|
||||||
|
|
||||||
|
expect(results).to eq([])
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
context "when embeddings exist" do
|
||||||
|
before do
|
||||||
|
Discourse.cache.clear
|
||||||
|
DiscourseAi::Embeddings::Topic
|
||||||
|
.any_instance
|
||||||
|
.expects(:symmetric_semantic_search)
|
||||||
|
.returns(Topic.unscoped.order(id: :desc).limit(100).pluck(:id))
|
||||||
|
end
|
||||||
|
|
||||||
after { Discourse.cache.clear }
|
after { Discourse.cache.clear }
|
||||||
|
|
||||||
it "returns the related topics without non public topics" do
|
it "returns the related topics without non public topics" do
|
||||||
results = described_class.candidates_for(target).to_a
|
|
||||||
expect(results).to include(normal_topic_1)
|
|
||||||
expect(results).to include(normal_topic_2)
|
|
||||||
expect(results).to include(normal_topic_3)
|
|
||||||
expect(results).to include(closed_topic)
|
|
||||||
expect(results).to_not include(target)
|
|
||||||
expect(results).to_not include(unlisted_topic)
|
|
||||||
expect(results).to_not include(private_topic)
|
|
||||||
expect(results).to_not include(secured_category_topic)
|
|
||||||
end
|
|
||||||
|
|
||||||
context "when ai_embeddings_semantic_related_include_closed_topics is false" do
|
|
||||||
before { SiteSetting.ai_embeddings_semantic_related_include_closed_topics = false }
|
|
||||||
it "do not return closed topics" do
|
|
||||||
results = described_class.candidates_for(target).to_a
|
results = described_class.candidates_for(target).to_a
|
||||||
expect(results).to_not include(closed_topic)
|
expect(results).to include(normal_topic_1)
|
||||||
|
expect(results).to include(normal_topic_2)
|
||||||
|
expect(results).to include(normal_topic_3)
|
||||||
|
expect(results).to include(closed_topic)
|
||||||
|
expect(results).to_not include(target)
|
||||||
|
expect(results).to_not include(unlisted_topic)
|
||||||
|
expect(results).to_not include(private_topic)
|
||||||
|
expect(results).to_not include(secured_category_topic)
|
||||||
|
end
|
||||||
|
|
||||||
|
context "when ai_embeddings_semantic_related_include_closed_topics is false" do
|
||||||
|
before { SiteSetting.ai_embeddings_semantic_related_include_closed_topics = false }
|
||||||
|
it "do not return closed topics" do
|
||||||
|
results = described_class.candidates_for(target).to_a
|
||||||
|
expect(results).to_not include(closed_topic)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue