FEATURE: Move the default embeddings model to bge-large-en (#417)

This commit is contained in:
Rafael dos Santos Silva 2024-01-11 14:16:25 -03:00 committed by GitHub
parent 8df966e9c5
commit 3be76ebd7a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 6 additions and 5 deletions

View File

@ -231,7 +231,7 @@ discourse_ai:
ai_embeddings_model: ai_embeddings_model:
type: enum type: enum
list_type: compact list_type: compact
default: "all-mpnet-base-v2" default: "bge-large-en"
allow_any: false allow_any: false
choices: choices:
- all-mpnet-base-v2 - all-mpnet-base-v2

View File

@ -80,7 +80,7 @@ RSpec.describe DiscourseAi::AiBot::Tools::Search do
post1 = Fabricate(:post, topic: topic_with_tags) post1 = Fabricate(:post, topic: topic_with_tags)
search = described_class.new({ search_query: "hello world, sam", status: "public" }) search = described_class.new({ search_query: "hello world, sam", status: "public" })
DiscourseAi::Embeddings::VectorRepresentations::AllMpnetBaseV2 DiscourseAi::Embeddings::VectorRepresentations::BgeLargeEn
.any_instance .any_instance
.expects(:asymmetric_topics_similarity_search) .expects(:asymmetric_topics_similarity_search)
.returns([post1.topic_id]) .returns([post1.topic_id])

View File

@ -7,7 +7,7 @@ RSpec.describe Jobs::GenerateEmbeddings do
before do before do
SiteSetting.ai_embeddings_discourse_service_api_endpoint = "http://test.com" SiteSetting.ai_embeddings_discourse_service_api_endpoint = "http://test.com"
SiteSetting.ai_embeddings_enabled = true SiteSetting.ai_embeddings_enabled = true
SiteSetting.ai_embeddings_model = "all-mpnet-base-v2" SiteSetting.ai_embeddings_model = "bge-large-en"
end end
fab!(:topic) { Fabricate(:topic) } fab!(:topic) { Fabricate(:topic) }

View File

@ -24,7 +24,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
after { described_class.clear_cache_for(query) } after { described_class.clear_cache_for(query) }
def stub_candidate_ids(candidate_ids) def stub_candidate_ids(candidate_ids)
DiscourseAi::Embeddings::VectorRepresentations::AllMpnetBaseV2 DiscourseAi::Embeddings::VectorRepresentations::BgeLargeEn
.any_instance .any_instance
.expects(:asymmetric_topics_similarity_search) .expects(:asymmetric_topics_similarity_search)
.returns(candidate_ids) .returns(candidate_ids)

View File

@ -12,7 +12,7 @@ describe DiscourseAi::Embeddings::EntryPoint do
fab!(:target) { Fabricate(:topic) } fab!(:target) { Fabricate(:topic) }
def stub_semantic_search_with(results) def stub_semantic_search_with(results)
DiscourseAi::Embeddings::VectorRepresentations::AllMpnetBaseV2 DiscourseAi::Embeddings::VectorRepresentations::BgeLargeEn
.any_instance .any_instance
.expects(:symmetric_topics_similarity_search) .expects(:symmetric_topics_similarity_search)
.returns(results.concat([target.id])) .returns(results.concat([target.id]))

View File

@ -3,6 +3,7 @@
class EmbeddingsGenerationStubs class EmbeddingsGenerationStubs
class << self class << self
def discourse_service(model, string, embedding) def discourse_service(model, string, embedding)
model = "bge-large-en-v1.5" if model == "bge-large-en"
WebMock WebMock
.stub_request( .stub_request(
:post, :post,