2023-03-31 14:29:56 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
|
2024-03-05 10:48:28 -05:00
|
|
|
fab!(:post)
|
|
|
|
fab!(:user)
|
2023-03-31 14:29:56 -04:00
|
|
|
|
2023-07-13 11:41:36 -04:00
|
|
|
let(:query) { "test_query" }
|
2023-12-06 01:26:43 -05:00
|
|
|
let(:subject) { described_class.new(Guardian.new(user)) }
|
2023-03-31 14:29:56 -04:00
|
|
|
|
2024-06-19 17:01:35 -04:00
|
|
|
before { assign_fake_provider_to(:ai_embeddings_semantic_search_hyde_model) }
|
2024-01-29 14:04:25 -05:00
|
|
|
|
2023-03-31 14:29:56 -04:00
|
|
|
describe "#search_for_topics" do
|
2023-09-05 10:08:23 -04:00
|
|
|
let(:hypothetical_post) { "This is an hypothetical post generated from the keyword test_query" }
|
2024-12-16 07:55:39 -05:00
|
|
|
let(:vector_def) { DiscourseAi::Embeddings::VectorRepresentations::Base.current_representation }
|
|
|
|
let(:hyde_embedding) { [0.049382] * vector_def.dimensions }
|
2023-09-05 10:08:23 -04:00
|
|
|
|
|
|
|
before do
|
|
|
|
SiteSetting.ai_embeddings_discourse_service_api_endpoint = "http://test.com"
|
|
|
|
|
|
|
|
EmbeddingsGenerationStubs.discourse_service(
|
|
|
|
SiteSetting.ai_embeddings_model,
|
|
|
|
hypothetical_post,
|
|
|
|
hyde_embedding,
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
after { described_class.clear_cache_for(query) }
|
|
|
|
|
2024-12-13 08:15:21 -05:00
|
|
|
def insert_candidate(candidate)
|
|
|
|
DiscourseAi::Embeddings::Schema.for(Topic).store(candidate, hyde_embedding, "digest")
|
2023-03-31 14:29:56 -04:00
|
|
|
end
|
|
|
|
|
2023-11-23 10:58:54 -05:00
|
|
|
def trigger_search(query)
|
2023-11-28 23:17:46 -05:00
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(["<ai>#{hypothetical_post}</ai>"]) do
|
2023-11-23 10:58:54 -05:00
|
|
|
subject.search_for_topics(query)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2023-03-31 14:29:56 -04:00
|
|
|
it "returns the first post of a topic included in the asymmetric search results" do
|
2024-12-13 08:15:21 -05:00
|
|
|
insert_candidate(post.topic)
|
2023-03-31 14:29:56 -04:00
|
|
|
|
2023-11-23 10:58:54 -05:00
|
|
|
posts = trigger_search(query)
|
2023-03-31 14:29:56 -04:00
|
|
|
|
|
|
|
expect(posts).to contain_exactly(post)
|
|
|
|
end
|
|
|
|
|
|
|
|
describe "applies different scopes to the candidates" do
|
|
|
|
context "when the topic is not visible" do
|
|
|
|
it "returns an empty list" do
|
|
|
|
post.topic.update!(visible: false)
|
2024-12-13 08:15:21 -05:00
|
|
|
insert_candidate(post.topic)
|
2023-03-31 14:29:56 -04:00
|
|
|
|
2023-11-23 10:58:54 -05:00
|
|
|
posts = trigger_search(query)
|
2023-03-31 14:29:56 -04:00
|
|
|
|
|
|
|
expect(posts).to be_empty
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
context "when the post is not public" do
|
|
|
|
it "returns an empty list" do
|
|
|
|
pm_post = Fabricate(:private_message_post)
|
2024-12-13 08:15:21 -05:00
|
|
|
insert_candidate(pm_post.topic)
|
2023-03-31 14:29:56 -04:00
|
|
|
|
2023-11-23 10:58:54 -05:00
|
|
|
posts = trigger_search(query)
|
2023-03-31 14:29:56 -04:00
|
|
|
|
|
|
|
expect(posts).to be_empty
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
context "when the post type is not visible" do
|
|
|
|
it "returns an empty list" do
|
|
|
|
post.update!(post_type: Post.types[:whisper])
|
2024-12-13 08:15:21 -05:00
|
|
|
insert_candidate(post.topic)
|
2023-03-31 14:29:56 -04:00
|
|
|
|
2023-11-23 10:58:54 -05:00
|
|
|
posts = trigger_search(query)
|
2023-03-31 14:29:56 -04:00
|
|
|
|
|
|
|
expect(posts).to be_empty
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
context "when the post is not the first post in the topic" do
|
|
|
|
it "returns an empty list" do
|
|
|
|
reply = Fabricate(:reply)
|
|
|
|
reply.topic.first_post.trash!
|
2024-12-13 08:15:21 -05:00
|
|
|
insert_candidate(reply.topic)
|
2023-03-31 14:29:56 -04:00
|
|
|
|
2023-11-23 10:58:54 -05:00
|
|
|
posts = trigger_search(query)
|
2023-03-31 14:29:56 -04:00
|
|
|
|
|
|
|
expect(posts).to be_empty
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
context "when the post is not a candidate" do
|
|
|
|
it "doesn't include it in the results" do
|
|
|
|
post_2 = Fabricate(:post)
|
2024-12-13 08:15:21 -05:00
|
|
|
insert_candidate(post.topic)
|
2023-03-31 14:29:56 -04:00
|
|
|
|
2023-11-23 10:58:54 -05:00
|
|
|
posts = trigger_search(query)
|
2023-03-31 14:29:56 -04:00
|
|
|
|
|
|
|
expect(posts).not_to include(post_2)
|
|
|
|
end
|
|
|
|
end
|
2023-09-06 09:00:20 -04:00
|
|
|
|
|
|
|
context "when the post belongs to a secured category" do
|
2024-03-05 10:48:28 -05:00
|
|
|
fab!(:group)
|
2023-09-06 09:00:20 -04:00
|
|
|
fab!(:private_category) { Fabricate(:private_category, group: group) }
|
|
|
|
|
|
|
|
before do
|
|
|
|
post.topic.update!(category: private_category)
|
2024-12-13 08:15:21 -05:00
|
|
|
insert_candidate(post.topic)
|
2023-09-06 09:00:20 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
it "returns an empty list" do
|
2023-11-23 10:58:54 -05:00
|
|
|
posts = trigger_search(query)
|
2023-09-06 09:00:20 -04:00
|
|
|
|
|
|
|
expect(posts).to be_empty
|
|
|
|
end
|
|
|
|
|
|
|
|
it "returns the results if the user has access to the category" do
|
|
|
|
group.add(user)
|
|
|
|
|
2023-11-23 10:58:54 -05:00
|
|
|
posts = trigger_search(query)
|
2023-09-06 09:00:20 -04:00
|
|
|
|
|
|
|
expect(posts).to contain_exactly(post)
|
|
|
|
end
|
|
|
|
|
|
|
|
context "while searching as anon" do
|
|
|
|
it "returns an empty list" do
|
2023-11-23 10:58:54 -05:00
|
|
|
posts =
|
2023-11-28 23:17:46 -05:00
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(
|
2023-11-23 10:58:54 -05:00
|
|
|
["<ai>#{hypothetical_post}</ai>"],
|
2023-12-06 01:26:43 -05:00
|
|
|
) { described_class.new(Guardian.new(nil)).search_for_topics(query) }
|
2023-09-06 09:00:20 -04:00
|
|
|
|
|
|
|
expect(posts).to be_empty
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2023-03-31 14:29:56 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|