REFACTOR: Build related topics using TopicQuery. (#124)

TopicQuery already provides a lot of safeguards and options for filtering topic, and enforcing permissions. It makes sense to rely on it as other plugins like discourse-assign do.

As a bonus, we now have access to the current_user while serializing these topics, so users will see things like unread posts count just like we do for the lists.
This commit is contained in:
Roman Rizzi 2023-08-02 16:58:09 -03:00 committed by GitHub
parent 602bb843ea
commit 58b96eda6c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 142 additions and 79 deletions

View File

@ -31,12 +31,28 @@ module DiscourseAi
return nil return nil
end end
@related_topics ||= @related_topics ||= TopicQuery.new(@user).list_semantic_related_topics(topic).topics
TopicList.new( end
:related,
nil, plugin.add_to_class(:topic_query, :list_semantic_related_topics) do |topic|
DiscourseAi::Embeddings::SemanticRelated.candidates_for(topic), query_opts = {
).topics skip_ordering: true,
per_page: SiteSetting.ai_embeddings_semantic_related_topics,
unordered: true,
}
if !SiteSetting.ai_embeddings_semantic_related_include_closed_topics
query_opts[:status] = "open"
end
create_list(:semantic_related, query_opts) do |topics|
candidate_ids = DiscourseAi::Embeddings::SemanticRelated.related_topic_ids_for(topic)
topics
.where.not(id: topic.id)
.where(id: candidate_ids)
.order("array_position(ARRAY#{candidate_ids}, topics.id)") # array_position forces the order of the topics to be preserved
end
end end
%i[topic_view TopicViewPosts].each do |serializer| %i[topic_view TopicViewPosts].each do |serializer|

View File

@ -19,28 +19,17 @@ module DiscourseAi
Discourse.redis.del(build_semantic_suggested_key(topic.id)) Discourse.redis.del(build_semantic_suggested_key(topic.id))
end end
def candidates_for(topic) def related_topic_ids_for(topic)
return ::Topic.none if SiteSetting.ai_embeddings_semantic_related_topics < 1 return [] if SiteSetting.ai_embeddings_semantic_related_topics < 1
manager = DiscourseAi::Embeddings::Manager.new(topic) manager = DiscourseAi::Embeddings::Manager.new(topic)
cache_for = cache_for = results_ttl(topic)
case topic.created_at
when 6.hour.ago..Time.now
15.minutes
when 3.day.ago..6.hour.ago
1.hour
when 15.days.ago..3.day.ago
12.hours
else
1.week
end
begin begin
candidate_ids =
Discourse Discourse
.cache .cache
.fetch(semantic_suggested_key(topic.id), expires_in: cache_for) do .fetch(semantic_suggested_key(topic.id), expires_in: cache_for) do
self.symmetric_semantic_search(manager) symmetric_semantic_search(manager)
end end
rescue MissingEmbeddingError rescue MissingEmbeddingError
# avoid a flood of jobs when visiting topic # avoid a flood of jobs when visiting topic
@ -52,21 +41,8 @@ module DiscourseAi
) )
Jobs.enqueue(:generate_embeddings, topic_id: topic.id) Jobs.enqueue(:generate_embeddings, topic_id: topic.id)
end end
return ::Topic.none []
end end
topic_list = ::Topic.visible.listable_topics.secured
unless SiteSetting.ai_embeddings_semantic_related_include_closed_topics
topic_list = topic_list.where(closed: false)
end
topic_list
.where("id <> ?", topic.id)
.where(id: candidate_ids)
# array_position forces the order of the topics to be preserved
.order("array_position(ARRAY#{candidate_ids}, id)")
.limit(SiteSetting.ai_embeddings_semantic_related_topics)
end end
def symmetric_semantic_search(manager) def symmetric_semantic_search(manager)
@ -111,6 +87,19 @@ module DiscourseAi
raise MissingEmbeddingError raise MissingEmbeddingError
end end
end end
def results_ttl(topic)
case topic.created_at
when 6.hour.ago..Time.now
15.minutes
when 3.day.ago..6.hour.ago
1.hour
when 15.days.ago..3.day.ago
12.hours
else
1.week
end
end
end end
end end
end end

View File

@ -28,4 +28,97 @@ describe DiscourseAi::Embeddings::EntryPoint do
end end
end end
end end
describe "TopicQuery extensions" do
describe "#list_semantic_related_topics" do
subject(:topic_query) { TopicQuery.new(user) }
fab!(:target) { Fabricate(:topic) }
def stub_semantic_search_with(results)
DiscourseAi::Embeddings::SemanticRelated.expects(:related_topic_ids_for).returns(results)
end
context "when the semantic search returns an unlisted topic" do
fab!(:unlisted_topic) { Fabricate(:topic, visible: false) }
before { stub_semantic_search_with([unlisted_topic.id]) }
it "filters it out" do
expect(topic_query.list_semantic_related_topics(target).topics).to be_empty
end
end
context "when the semantic search returns a private topic" do
fab!(:private_topic) { Fabricate(:private_message_topic) }
before { stub_semantic_search_with([private_topic.id]) }
it "filters it out" do
expect(topic_query.list_semantic_related_topics(target).topics).to be_empty
end
end
context "when the semantic search returns a topic from a restricted category" do
fab!(:group) { Fabricate(:group) }
fab!(:category) { Fabricate(:private_category, group: group) }
fab!(:secured_category_topic) { Fabricate(:topic, category: category) }
before { stub_semantic_search_with([secured_category_topic.id]) }
it "filters it out" do
expect(topic_query.list_semantic_related_topics(target).topics).to be_empty
end
it "doesn't filter it out if the user has access to the category" do
group.add(user)
expect(topic_query.list_semantic_related_topics(target).topics).to contain_exactly(
secured_category_topic,
)
end
end
context "when the semantic search returns a closed topic and we explicitly exclude them" do
fab!(:closed_topic) { Fabricate(:topic, closed: true) }
before do
SiteSetting.ai_embeddings_semantic_related_include_closed_topics = false
stub_semantic_search_with([closed_topic.id])
end
it "filters it out" do
expect(topic_query.list_semantic_related_topics(target).topics).to be_empty
end
end
context "when the semantic search returns public topics" do
fab!(:normal_topic_1) { Fabricate(:topic) }
fab!(:normal_topic_2) { Fabricate(:topic) }
fab!(:normal_topic_3) { Fabricate(:topic) }
fab!(:closed_topic) { Fabricate(:topic, closed: true) }
before do
stub_semantic_search_with(
[closed_topic.id, normal_topic_1.id, normal_topic_2.id, normal_topic_3.id],
)
end
it "filters it out" do
expect(topic_query.list_semantic_related_topics(target).topics).to eq(
[closed_topic, normal_topic_1, normal_topic_2, normal_topic_3],
)
end
it "returns the plugin limit for the number of results" do
SiteSetting.ai_embeddings_semantic_related_topics = 2
expect(topic_query.list_semantic_related_topics(target).topics).to contain_exactly(
closed_topic,
normal_topic_1,
)
end
end
end
end
end end

View File

@ -15,59 +15,27 @@ describe DiscourseAi::Embeddings::SemanticRelated do
before { SiteSetting.ai_embeddings_semantic_related_topics_enabled = true } before { SiteSetting.ai_embeddings_semantic_related_topics_enabled = true }
describe "#candidates_for" do describe "#related_topic_ids_for" do
context "when embeddings do not exist" do context "when embeddings do not exist" do
let (:topic) do let (:topic) do
topic = Fabricate(:topic) Fabricate(:topic).tap { described_class.clear_cache_for(target) }
described_class.clear_cache_for(target)
topic
end end
it "queues job only once per 15 minutes" do it "queues job only once per 15 minutes" do
results = nil results = nil
expect_enqueued_with(job: :generate_embeddings, args: { topic_id: topic.id }) do expect_enqueued_with(job: :generate_embeddings, args: { topic_id: topic.id }) do
results = described_class.candidates_for(topic).to_a results = described_class.related_topic_ids_for(topic)
end end
expect(results).to eq([]) expect(results).to eq([])
expect_not_enqueued_with(job: :generate_embeddings, args: { topic_id: topic.id }) do expect_not_enqueued_with(job: :generate_embeddings, args: { topic_id: topic.id }) do
results = described_class.candidates_for(topic).to_a results = described_class.related_topic_ids_for(topic)
end end
expect(results).to eq([]) expect(results).to eq([])
end end
end end
context "when embeddings exist" do
before do
Discourse.cache.clear
DiscourseAi::Embeddings::SemanticRelated.expects(:symmetric_semantic_search).returns(
Topic.unscoped.order(id: :desc).limit(100).pluck(:id),
)
end
after { Discourse.cache.clear }
it "returns the related topics without non public topics" do
results = described_class.candidates_for(target).to_a
expect(results).to include(normal_topic_1)
expect(results).to include(normal_topic_2)
expect(results).to include(normal_topic_3)
expect(results).to include(closed_topic)
expect(results).to_not include(target)
expect(results).to_not include(unlisted_topic)
expect(results).to_not include(private_topic)
expect(results).to_not include(secured_category_topic)
end
context "when ai_embeddings_semantic_related_include_closed_topics is false" do
before { SiteSetting.ai_embeddings_semantic_related_include_closed_topics = false }
it "do not return closed topics" do
results = described_class.candidates_for(target).to_a
expect(results).to_not include(closed_topic)
end
end
end
end end
end end

View File

@ -10,16 +10,13 @@ describe ::TopicsController do
fab!(:user) { Fabricate(:admin) } fab!(:user) { Fabricate(:admin) }
before do before do
Discourse.cache.clear
SiteSetting.ai_embeddings_semantic_related_topics_enabled = true SiteSetting.ai_embeddings_semantic_related_topics_enabled = true
SiteSetting.ai_embeddings_semantic_related_topics = 2 SiteSetting.ai_embeddings_semantic_related_topics = 2
end end
after { Discourse.cache.clear }
context "when a user is logged on" do context "when a user is logged on" do
it "includes related topics in payload when configured" do it "includes related topics in payload when configured" do
DiscourseAi::Embeddings::SemanticRelated.expects(:symmetric_semantic_search).returns( DiscourseAi::Embeddings::SemanticRelated.stubs(:related_topic_ids_for).returns(
[topic1.id, topic2.id, topic3.id], [topic1.id, topic2.id, topic3.id],
) )