FEATURE: Move the default embeddings model to bge-large-en (#417)

This commit is contained in:
Rafael dos Santos Silva 2024-01-11 14:16:25 -03:00 committed by GitHub
parent 8df966e9c5
commit 3be76ebd7a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 6 additions and 5 deletions

View File

@ -231,7 +231,7 @@ discourse_ai:
ai_embeddings_model:
type: enum
list_type: compact
default: "all-mpnet-base-v2"
default: "bge-large-en"
allow_any: false
choices:
- all-mpnet-base-v2

View File

@ -80,7 +80,7 @@ RSpec.describe DiscourseAi::AiBot::Tools::Search do
post1 = Fabricate(:post, topic: topic_with_tags)
search = described_class.new({ search_query: "hello world, sam", status: "public" })
DiscourseAi::Embeddings::VectorRepresentations::AllMpnetBaseV2
DiscourseAi::Embeddings::VectorRepresentations::BgeLargeEn
.any_instance
.expects(:asymmetric_topics_similarity_search)
.returns([post1.topic_id])

View File

@ -7,7 +7,7 @@ RSpec.describe Jobs::GenerateEmbeddings do
before do
SiteSetting.ai_embeddings_discourse_service_api_endpoint = "http://test.com"
SiteSetting.ai_embeddings_enabled = true
SiteSetting.ai_embeddings_model = "all-mpnet-base-v2"
SiteSetting.ai_embeddings_model = "bge-large-en"
end
fab!(:topic) { Fabricate(:topic) }

View File

@ -24,7 +24,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
after { described_class.clear_cache_for(query) }
def stub_candidate_ids(candidate_ids)
DiscourseAi::Embeddings::VectorRepresentations::AllMpnetBaseV2
DiscourseAi::Embeddings::VectorRepresentations::BgeLargeEn
.any_instance
.expects(:asymmetric_topics_similarity_search)
.returns(candidate_ids)

View File

@ -12,7 +12,7 @@ describe DiscourseAi::Embeddings::EntryPoint do
fab!(:target) { Fabricate(:topic) }
def stub_semantic_search_with(results)
DiscourseAi::Embeddings::VectorRepresentations::AllMpnetBaseV2
DiscourseAi::Embeddings::VectorRepresentations::BgeLargeEn
.any_instance
.expects(:symmetric_topics_similarity_search)
.returns(results.concat([target.id]))

View File

@ -3,6 +3,7 @@
class EmbeddingsGenerationStubs
class << self
def discourse_service(model, string, embedding)
model = "bge-large-en-v1.5" if model == "bge-large-en"
WebMock
.stub_request(
:post,