From 58b96eda6c575626d2989e6ed44818d82f1c2782 Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Wed, 2 Aug 2023 16:58:09 -0300 Subject: [PATCH] REFACTOR: Build related topics using TopicQuery. (#124) TopicQuery already provides a lot of safeguards and options for filtering topic, and enforcing permissions. It makes sense to rely on it as other plugins like discourse-assign do. As a bonus, we now have access to the current_user while serializing these topics, so users will see things like unread posts count just like we do for the lists. --- lib/modules/embeddings/entry_point.rb | 28 ++++-- lib/modules/embeddings/semantic_related.rb | 55 +++++------ .../modules/embeddings/entry_point_spec.rb | 93 +++++++++++++++++++ .../embeddings/semantic_related_spec.rb | 40 +------- spec/requests/topic_spec.rb | 5 +- 5 files changed, 142 insertions(+), 79 deletions(-) diff --git a/lib/modules/embeddings/entry_point.rb b/lib/modules/embeddings/entry_point.rb index e6423bfc..bc97704a 100644 --- a/lib/modules/embeddings/entry_point.rb +++ b/lib/modules/embeddings/entry_point.rb @@ -31,12 +31,28 @@ module DiscourseAi return nil end - @related_topics ||= - TopicList.new( - :related, - nil, - DiscourseAi::Embeddings::SemanticRelated.candidates_for(topic), - ).topics + @related_topics ||= TopicQuery.new(@user).list_semantic_related_topics(topic).topics + end + + plugin.add_to_class(:topic_query, :list_semantic_related_topics) do |topic| + query_opts = { + skip_ordering: true, + per_page: SiteSetting.ai_embeddings_semantic_related_topics, + unordered: true, + } + + if !SiteSetting.ai_embeddings_semantic_related_include_closed_topics + query_opts[:status] = "open" + end + + create_list(:semantic_related, query_opts) do |topics| + candidate_ids = DiscourseAi::Embeddings::SemanticRelated.related_topic_ids_for(topic) + + topics + .where.not(id: topic.id) + .where(id: candidate_ids) + .order("array_position(ARRAY#{candidate_ids}, topics.id)") # array_position forces the order of the topics to be preserved + end end %i[topic_view TopicViewPosts].each do |serializer| diff --git a/lib/modules/embeddings/semantic_related.rb b/lib/modules/embeddings/semantic_related.rb index c7856118..667866ad 100644 --- a/lib/modules/embeddings/semantic_related.rb +++ b/lib/modules/embeddings/semantic_related.rb @@ -19,29 +19,18 @@ module DiscourseAi Discourse.redis.del(build_semantic_suggested_key(topic.id)) end - def candidates_for(topic) - return ::Topic.none if SiteSetting.ai_embeddings_semantic_related_topics < 1 + def related_topic_ids_for(topic) + return [] if SiteSetting.ai_embeddings_semantic_related_topics < 1 manager = DiscourseAi::Embeddings::Manager.new(topic) - cache_for = - case topic.created_at - when 6.hour.ago..Time.now - 15.minutes - when 3.day.ago..6.hour.ago - 1.hour - when 15.days.ago..3.day.ago - 12.hours - else - 1.week - end + cache_for = results_ttl(topic) begin - candidate_ids = - Discourse - .cache - .fetch(semantic_suggested_key(topic.id), expires_in: cache_for) do - self.symmetric_semantic_search(manager) - end + Discourse + .cache + .fetch(semantic_suggested_key(topic.id), expires_in: cache_for) do + symmetric_semantic_search(manager) + end rescue MissingEmbeddingError # avoid a flood of jobs when visiting topic if Discourse.redis.set( @@ -52,21 +41,8 @@ module DiscourseAi ) Jobs.enqueue(:generate_embeddings, topic_id: topic.id) end - return ::Topic.none + [] end - - topic_list = ::Topic.visible.listable_topics.secured - - unless SiteSetting.ai_embeddings_semantic_related_include_closed_topics - topic_list = topic_list.where(closed: false) - end - - topic_list - .where("id <> ?", topic.id) - .where(id: candidate_ids) - # array_position forces the order of the topics to be preserved - .order("array_position(ARRAY#{candidate_ids}, id)") - .limit(SiteSetting.ai_embeddings_semantic_related_topics) end def symmetric_semantic_search(manager) @@ -111,6 +87,19 @@ module DiscourseAi raise MissingEmbeddingError end end + + def results_ttl(topic) + case topic.created_at + when 6.hour.ago..Time.now + 15.minutes + when 3.day.ago..6.hour.ago + 1.hour + when 15.days.ago..3.day.ago + 12.hours + else + 1.week + end + end end end end diff --git a/spec/lib/modules/embeddings/entry_point_spec.rb b/spec/lib/modules/embeddings/entry_point_spec.rb index 3d4f14e8..ddc7c4bb 100644 --- a/spec/lib/modules/embeddings/entry_point_spec.rb +++ b/spec/lib/modules/embeddings/entry_point_spec.rb @@ -28,4 +28,97 @@ describe DiscourseAi::Embeddings::EntryPoint do end end end + + describe "TopicQuery extensions" do + describe "#list_semantic_related_topics" do + subject(:topic_query) { TopicQuery.new(user) } + + fab!(:target) { Fabricate(:topic) } + + def stub_semantic_search_with(results) + DiscourseAi::Embeddings::SemanticRelated.expects(:related_topic_ids_for).returns(results) + end + + context "when the semantic search returns an unlisted topic" do + fab!(:unlisted_topic) { Fabricate(:topic, visible: false) } + + before { stub_semantic_search_with([unlisted_topic.id]) } + + it "filters it out" do + expect(topic_query.list_semantic_related_topics(target).topics).to be_empty + end + end + + context "when the semantic search returns a private topic" do + fab!(:private_topic) { Fabricate(:private_message_topic) } + + before { stub_semantic_search_with([private_topic.id]) } + + it "filters it out" do + expect(topic_query.list_semantic_related_topics(target).topics).to be_empty + end + end + + context "when the semantic search returns a topic from a restricted category" do + fab!(:group) { Fabricate(:group) } + fab!(:category) { Fabricate(:private_category, group: group) } + fab!(:secured_category_topic) { Fabricate(:topic, category: category) } + + before { stub_semantic_search_with([secured_category_topic.id]) } + + it "filters it out" do + expect(topic_query.list_semantic_related_topics(target).topics).to be_empty + end + + it "doesn't filter it out if the user has access to the category" do + group.add(user) + + expect(topic_query.list_semantic_related_topics(target).topics).to contain_exactly( + secured_category_topic, + ) + end + end + + context "when the semantic search returns a closed topic and we explicitly exclude them" do + fab!(:closed_topic) { Fabricate(:topic, closed: true) } + + before do + SiteSetting.ai_embeddings_semantic_related_include_closed_topics = false + stub_semantic_search_with([closed_topic.id]) + end + + it "filters it out" do + expect(topic_query.list_semantic_related_topics(target).topics).to be_empty + end + end + + context "when the semantic search returns public topics" do + fab!(:normal_topic_1) { Fabricate(:topic) } + fab!(:normal_topic_2) { Fabricate(:topic) } + fab!(:normal_topic_3) { Fabricate(:topic) } + fab!(:closed_topic) { Fabricate(:topic, closed: true) } + + before do + stub_semantic_search_with( + [closed_topic.id, normal_topic_1.id, normal_topic_2.id, normal_topic_3.id], + ) + end + + it "filters it out" do + expect(topic_query.list_semantic_related_topics(target).topics).to eq( + [closed_topic, normal_topic_1, normal_topic_2, normal_topic_3], + ) + end + + it "returns the plugin limit for the number of results" do + SiteSetting.ai_embeddings_semantic_related_topics = 2 + + expect(topic_query.list_semantic_related_topics(target).topics).to contain_exactly( + closed_topic, + normal_topic_1, + ) + end + end + end + end end diff --git a/spec/lib/modules/embeddings/semantic_related_spec.rb b/spec/lib/modules/embeddings/semantic_related_spec.rb index 3000852d..22c85b51 100644 --- a/spec/lib/modules/embeddings/semantic_related_spec.rb +++ b/spec/lib/modules/embeddings/semantic_related_spec.rb @@ -15,59 +15,27 @@ describe DiscourseAi::Embeddings::SemanticRelated do before { SiteSetting.ai_embeddings_semantic_related_topics_enabled = true } - describe "#candidates_for" do + describe "#related_topic_ids_for" do context "when embeddings do not exist" do let (:topic) do - topic = Fabricate(:topic) - described_class.clear_cache_for(target) - topic + Fabricate(:topic).tap { described_class.clear_cache_for(target) } end it "queues job only once per 15 minutes" do results = nil expect_enqueued_with(job: :generate_embeddings, args: { topic_id: topic.id }) do - results = described_class.candidates_for(topic).to_a + results = described_class.related_topic_ids_for(topic) end expect(results).to eq([]) expect_not_enqueued_with(job: :generate_embeddings, args: { topic_id: topic.id }) do - results = described_class.candidates_for(topic).to_a + results = described_class.related_topic_ids_for(topic) end expect(results).to eq([]) end end - context "when embeddings exist" do - before do - Discourse.cache.clear - DiscourseAi::Embeddings::SemanticRelated.expects(:symmetric_semantic_search).returns( - Topic.unscoped.order(id: :desc).limit(100).pluck(:id), - ) - end - - after { Discourse.cache.clear } - - it "returns the related topics without non public topics" do - results = described_class.candidates_for(target).to_a - expect(results).to include(normal_topic_1) - expect(results).to include(normal_topic_2) - expect(results).to include(normal_topic_3) - expect(results).to include(closed_topic) - expect(results).to_not include(target) - expect(results).to_not include(unlisted_topic) - expect(results).to_not include(private_topic) - expect(results).to_not include(secured_category_topic) - end - - context "when ai_embeddings_semantic_related_include_closed_topics is false" do - before { SiteSetting.ai_embeddings_semantic_related_include_closed_topics = false } - it "do not return closed topics" do - results = described_class.candidates_for(target).to_a - expect(results).to_not include(closed_topic) - end - end - end end end diff --git a/spec/requests/topic_spec.rb b/spec/requests/topic_spec.rb index 44d457df..25a00895 100644 --- a/spec/requests/topic_spec.rb +++ b/spec/requests/topic_spec.rb @@ -10,16 +10,13 @@ describe ::TopicsController do fab!(:user) { Fabricate(:admin) } before do - Discourse.cache.clear SiteSetting.ai_embeddings_semantic_related_topics_enabled = true SiteSetting.ai_embeddings_semantic_related_topics = 2 end - after { Discourse.cache.clear } - context "when a user is logged on" do it "includes related topics in payload when configured" do - DiscourseAi::Embeddings::SemanticRelated.expects(:symmetric_semantic_search).returns( + DiscourseAi::Embeddings::SemanticRelated.stubs(:related_topic_ids_for).returns( [topic1.id, topic2.id, topic3.id], )