FEATURE: allow limiting results in related topics section (#30)

Also:

- Normalizes behavior between logged in and anon,
 we only show related topics in the related topic section

- Renames "suggested" to "related" given this only exists in related section
- Adds a spec section to ensure anon does not regress
- Adds `ai_embeddings_semantic_related_topics` to limit related topics

Renamed settings:

ai_embeddings_semantic_suggested_model -> ai_embeddings_semantic_related_model
ai_embeddings_semantic_suggested_topics_enabled -> ai_embeddings_semantic_related_topics_enabled

Plugins is still in an experimental phase and not much is overidden hence
avoiding adding site setting migrations.


Co-authored-by: Krzysztof Kotlarek <kotlarek.krzysztof@gmail.com>
This commit is contained in:
Sam 2023-03-31 11:04:34 +11:00 committed by GitHub
parent 1d097b9d82
commit 0d80d9ec49
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 68 additions and 67 deletions

View File

@ -42,9 +42,10 @@ en:
ai_embeddings_discourse_service_api_endpoint: "URL where the API is running for the embeddings module"
ai_embeddings_discourse_service_api_key: "API key for the embeddings API"
ai_embeddings_models: "Discourse will generate embeddings for each of the models enabled here"
ai_embeddings_semantic_suggested_model: "Model to use for suggested topics."
ai_embeddings_semantic_related_model: "Model to use for related topics."
ai_embeddings_generate_for_pms: "Generate embeddings for personal messages."
ai_embeddings_semantic_suggested_topics_enabled: "Use Semantic Search for related topics."
ai_embeddings_semantic_related_topics_enabled: "Use Semantic Search for related topics."
ai_embeddings_semantic_related_topics: "Maximum number of topics to show in related topic section."
ai_embeddings_pg_connection_string: "PostgreSQL connection string for the embeddings module. Needs pgvector extension enabled and a series of tables created. See docs for more info."
reviewables:

View File

@ -124,7 +124,7 @@ plugins:
- msmarco-distilbert-base-v4
- msmarco-distilbert-base-tas-b
- text-embedding-ada-002
ai_embeddings_semantic_suggested_model:
ai_embeddings_semantic_related_model:
type: enum
default: all-mpnet-base-v2
choices:
@ -134,5 +134,6 @@ plugins:
- multi-qa-mpnet-base-dot-v1
- paraphrase-multilingual-mpnet-base-v2
ai_embeddings_generate_for_pms: false
ai_embeddings_semantic_suggested_topics_enabled: false
ai_embeddings_semantic_related_topics_enabled: false
ai_embeddings_semantic_related_topics: 5
ai_embeddings_pg_connection_string: ""

View File

@ -7,13 +7,12 @@ module DiscourseAi
require_relative "models"
require_relative "topic"
require_relative "jobs/regular/generate_embeddings"
require_relative "semantic_suggested"
require_relative "semantic_related"
end
def inject_into(plugin)
plugin.add_to_class(:topic_view, :related_topics) do
if !@guardian&.user || topic.private_message? ||
!SiteSetting.ai_embeddings_semantic_suggested_topics_enabled
if topic.private_message? || !SiteSetting.ai_embeddings_semantic_related_topics_enabled
return nil
end
@ -21,7 +20,7 @@ module DiscourseAi
TopicList.new(
:suggested,
nil,
DiscourseAi::Embeddings::SemanticSuggested.candidates_for(topic),
DiscourseAi::Embeddings::SemanticRelated.candidates_for(topic),
).topics
end
@ -35,7 +34,7 @@ module DiscourseAi
%i[topic_view TopicViewPosts].each do |serializer|
plugin.add_to_serializer(serializer, :related_topics) do
if object.next_page.nil? && !object.topic.private_message? && scope.authenticated?
if object.next_page.nil? && !object.topic.private_message?
object.related_topics.map do |t|
SuggestedTopicSerializer.new(t, scope: scope, root: false)
end
@ -44,7 +43,7 @@ module DiscourseAi
# custom include method so we also check on semantic search
plugin.add_to_serializer(serializer, :include_related_topics?) do
plugin.enabled? && SiteSetting.ai_embeddings_semantic_suggested_topics_enabled
plugin.enabled? && SiteSetting.ai_embeddings_semantic_related_topics_enabled
end
end
@ -57,11 +56,6 @@ module DiscourseAi
plugin.on(:topic_created, &callback)
plugin.on(:topic_edited, &callback)
DiscoursePluginRegistry.register_list_suggested_for_provider(
SemanticSuggested.method(:build_suggested_topics),
plugin,
)
end
end
end

View File

@ -2,16 +2,10 @@
module DiscourseAi
module Embeddings
class SemanticSuggested
def self.build_suggested_topics(topic, pm_params, topic_query)
return unless SiteSetting.ai_embeddings_semantic_suggested_topics_enabled
return if topic_query.user
return if topic.private_message?
{ result: candidates_for(topic), params: {} }
end
class SemanticRelated
def self.candidates_for(topic)
return ::Topic.none if SiteSetting.ai_embeddings_semantic_related_topics < 1
cache_for =
case topic.created_at
when 6.hour.ago..Time.now
@ -30,7 +24,7 @@ module DiscourseAi
search_suggestions(topic)
end
rescue StandardError => e
Rails.logger.error("SemanticSuggested: #{e}")
Rails.logger.error("SemanticRelated: #{e}")
Jobs.enqueue(:generate_embeddings, topic_id: topic.id)
return ::Topic.none
end
@ -42,10 +36,11 @@ module DiscourseAi
.secured
.where(id: candidate_ids)
.order("array_position(ARRAY#{candidate_ids}, id)")
.limit(SiteSetting.ai_embeddings_semantic_related_topics)
end
def self.search_suggestions(topic)
model_name = SiteSetting.ai_embeddings_semantic_suggested_model
model_name = SiteSetting.ai_embeddings_semantic_related_model
model = DiscourseAi::Embeddings::Models.list.find { |m| m.name == model_name }
function =
DiscourseAi::Embeddings::Models::SEARCH_FUNCTION_TO_PG_FUNCTION[model.functions.first]

View File

@ -0,0 +1,37 @@
# frozen_string_literal: true
require "rails_helper"
describe DiscourseAi::Embeddings::SemanticRelated do
fab!(:target) { Fabricate(:topic) }
fab!(:normal_topic_1) { Fabricate(:topic) }
fab!(:normal_topic_2) { Fabricate(:topic) }
fab!(:normal_topic_3) { Fabricate(:topic) }
fab!(:unlisted_topic) { Fabricate(:topic, visible: false) }
fab!(:private_topic) { Fabricate(:private_message_topic) }
fab!(:secured_category) { Fabricate(:category, read_restricted: true) }
fab!(:secured_category_topic) { Fabricate(:topic, category: secured_category) }
before { SiteSetting.ai_embeddings_semantic_related_topics_enabled = true }
describe "#candidates_for" do
before do
Discourse.cache.clear
described_class.stubs(:search_suggestions).returns(
Topic.unscoped.order(id: :desc).limit(10).pluck(:id),
)
end
after { Discourse.cache.clear }
it "returns the related topics without non public topics" do
results = described_class.candidates_for(target).to_a
expect(results).to include(normal_topic_1)
expect(results).to include(normal_topic_2)
expect(results).to include(normal_topic_3)
expect(results).to_not include(unlisted_topic)
expect(results).to_not include(private_topic)
expect(results).to_not include(secured_category_topic)
end
end
end

View File

@ -1,38 +0,0 @@
# frozen_string_literal: true
require "rails_helper"
describe DiscourseAi::Embeddings::SemanticSuggested do
fab!(:target) { Fabricate(:topic) }
fab!(:normal_topic_1) { Fabricate(:topic) }
fab!(:normal_topic_2) { Fabricate(:topic) }
fab!(:normal_topic_3) { Fabricate(:topic) }
fab!(:unlisted_topic) { Fabricate(:topic, visible: false) }
fab!(:private_topic) { Fabricate(:private_message_topic) }
fab!(:secured_category) { Fabricate(:category, read_restricted: true) }
fab!(:secured_category_topic) { Fabricate(:topic, category: secured_category) }
before { SiteSetting.ai_embeddings_semantic_suggested_topics_enabled = true }
describe "#build_suggested_topics" do
before do
Discourse.cache.clear
described_class.stubs(:search_suggestions).returns(
Topic.unscoped.order(id: :desc).limit(10).pluck(:id),
)
end
after { Discourse.cache.clear }
it "returns the suggested topics without non public topics" do
suggested = described_class.build_suggested_topics(target, {}, TopicQuery.new(nil))
suggested_results = suggested[:result]
expect(suggested_results).to include(normal_topic_1)
expect(suggested_results).to include(normal_topic_2)
expect(suggested_results).to include(normal_topic_3)
expect(suggested_results).to_not include(unlisted_topic)
expect(suggested_results).to_not include(private_topic)
expect(suggested_results).to_not include(secured_category_topic)
end
end
end

View File

@ -6,25 +6,36 @@ describe ::TopicsController do
fab!(:topic) { Fabricate(:topic) }
fab!(:topic1) { Fabricate(:topic) }
fab!(:topic2) { Fabricate(:topic) }
fab!(:topic3) { Fabricate(:topic) }
fab!(:user) { Fabricate(:admin) }
before do
Discourse.cache.clear
SiteSetting.ai_embeddings_semantic_suggested_topics_enabled = true
SiteSetting.ai_embeddings_semantic_related_topics_enabled = true
SiteSetting.ai_embeddings_semantic_related_topics = 2
end
after { Discourse.cache.clear }
context "when a user is logged on" do
it "includes related topics in payload when configured" do
DiscourseAi::Embeddings::SemanticSuggested.stubs(:search_suggestions).returns([topic2.id])
DiscourseAi::Embeddings::SemanticRelated.stubs(:search_suggestions).returns(
[topic1.id, topic2.id, topic3.id],
)
get("#{topic.relative_url}.json")
json = response.parsed_body
expect(json["suggested_topics"].length).to eq(0)
expect(json["related_topics"].length).to eq(2)
sign_in(user)
get("#{topic.relative_url}.json")
json = response.parsed_body
expect(json["suggested_topics"].length).to eq(0)
expect(json["related_topics"].length).to be > 0
expect(json["related_topics"].length).to eq(2)
end
end
end