discourse-ai/spec/lib/modules/ai_helper/semantic_categorizer_spec.rb
Roman Rizzi 534b0df391
REFACTOR: Separation of concerns for embedding generation. (#1027)
In a previous refactor, we moved the responsibility of querying and storing embeddings into the `Schema` class. Now, it's time for embedding generation.

The motivation behind these changes is to isolate vector characteristics in simple objects to later replace them with a DB-backed version, similar to what we did with LLM configs.
2024-12-16 09:55:39 -03:00

41 lines
1.4 KiB
Ruby

# frozen_string_literal: true
RSpec.describe DiscourseAi::AiHelper::SemanticCategorizer do
fab!(:user)
fab!(:muted_category) { Fabricate(:category) }
fab!(:category_mute) do
CategoryUser.create!(
user: user,
category: muted_category,
notification_level: CategoryUser.notification_levels[:muted],
)
end
fab!(:muted_topic) { Fabricate(:topic, category: muted_category) }
fab!(:category)
fab!(:topic) { Fabricate(:topic, category: category) }
let(:vector) { DiscourseAi::Embeddings::Vector.instance }
let(:categorizer) { DiscourseAi::AiHelper::SemanticCategorizer.new({ text: "hello" }, user) }
let(:expected_embedding) { [0.0038493] * vector.vdef.dimensions }
before do
SiteSetting.ai_embeddings_enabled = true
SiteSetting.ai_embeddings_discourse_service_api_endpoint = "http://test.com"
SiteSetting.ai_embeddings_model = "bge-large-en"
WebMock.stub_request(
:post,
"#{SiteSetting.ai_embeddings_discourse_service_api_endpoint}/api/v1/classify",
).to_return(status: 200, body: JSON.dump(expected_embedding))
vector.generate_representation_from(topic)
vector.generate_representation_from(muted_topic)
end
it "respects user muted categories when making suggestions" do
category_ids = categorizer.categories.map { |c| c[:id] }
expect(category_ids).not_to include(muted_category.id)
expect(category_ids).to include(category.id)
end
end