FEATURE: Move the default embeddings model to bge-large-en (#417)
This commit is contained in:
parent
8df966e9c5
commit
3be76ebd7a
|
@ -231,7 +231,7 @@ discourse_ai:
|
||||||
ai_embeddings_model:
|
ai_embeddings_model:
|
||||||
type: enum
|
type: enum
|
||||||
list_type: compact
|
list_type: compact
|
||||||
default: "all-mpnet-base-v2"
|
default: "bge-large-en"
|
||||||
allow_any: false
|
allow_any: false
|
||||||
choices:
|
choices:
|
||||||
- all-mpnet-base-v2
|
- all-mpnet-base-v2
|
||||||
|
|
|
@ -80,7 +80,7 @@ RSpec.describe DiscourseAi::AiBot::Tools::Search do
|
||||||
post1 = Fabricate(:post, topic: topic_with_tags)
|
post1 = Fabricate(:post, topic: topic_with_tags)
|
||||||
search = described_class.new({ search_query: "hello world, sam", status: "public" })
|
search = described_class.new({ search_query: "hello world, sam", status: "public" })
|
||||||
|
|
||||||
DiscourseAi::Embeddings::VectorRepresentations::AllMpnetBaseV2
|
DiscourseAi::Embeddings::VectorRepresentations::BgeLargeEn
|
||||||
.any_instance
|
.any_instance
|
||||||
.expects(:asymmetric_topics_similarity_search)
|
.expects(:asymmetric_topics_similarity_search)
|
||||||
.returns([post1.topic_id])
|
.returns([post1.topic_id])
|
||||||
|
|
|
@ -7,7 +7,7 @@ RSpec.describe Jobs::GenerateEmbeddings do
|
||||||
before do
|
before do
|
||||||
SiteSetting.ai_embeddings_discourse_service_api_endpoint = "http://test.com"
|
SiteSetting.ai_embeddings_discourse_service_api_endpoint = "http://test.com"
|
||||||
SiteSetting.ai_embeddings_enabled = true
|
SiteSetting.ai_embeddings_enabled = true
|
||||||
SiteSetting.ai_embeddings_model = "all-mpnet-base-v2"
|
SiteSetting.ai_embeddings_model = "bge-large-en"
|
||||||
end
|
end
|
||||||
|
|
||||||
fab!(:topic) { Fabricate(:topic) }
|
fab!(:topic) { Fabricate(:topic) }
|
||||||
|
|
|
@ -24,7 +24,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
|
||||||
after { described_class.clear_cache_for(query) }
|
after { described_class.clear_cache_for(query) }
|
||||||
|
|
||||||
def stub_candidate_ids(candidate_ids)
|
def stub_candidate_ids(candidate_ids)
|
||||||
DiscourseAi::Embeddings::VectorRepresentations::AllMpnetBaseV2
|
DiscourseAi::Embeddings::VectorRepresentations::BgeLargeEn
|
||||||
.any_instance
|
.any_instance
|
||||||
.expects(:asymmetric_topics_similarity_search)
|
.expects(:asymmetric_topics_similarity_search)
|
||||||
.returns(candidate_ids)
|
.returns(candidate_ids)
|
||||||
|
|
|
@ -12,7 +12,7 @@ describe DiscourseAi::Embeddings::EntryPoint do
|
||||||
fab!(:target) { Fabricate(:topic) }
|
fab!(:target) { Fabricate(:topic) }
|
||||||
|
|
||||||
def stub_semantic_search_with(results)
|
def stub_semantic_search_with(results)
|
||||||
DiscourseAi::Embeddings::VectorRepresentations::AllMpnetBaseV2
|
DiscourseAi::Embeddings::VectorRepresentations::BgeLargeEn
|
||||||
.any_instance
|
.any_instance
|
||||||
.expects(:symmetric_topics_similarity_search)
|
.expects(:symmetric_topics_similarity_search)
|
||||||
.returns(results.concat([target.id]))
|
.returns(results.concat([target.id]))
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
class EmbeddingsGenerationStubs
|
class EmbeddingsGenerationStubs
|
||||||
class << self
|
class << self
|
||||||
def discourse_service(model, string, embedding)
|
def discourse_service(model, string, embedding)
|
||||||
|
model = "bge-large-en-v1.5" if model == "bge-large-en"
|
||||||
WebMock
|
WebMock
|
||||||
.stub_request(
|
.stub_request(
|
||||||
:post,
|
:post,
|
||||||
|
|
Loading…
Reference in New Issue