mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-07-31 10:23:27 +00:00
FIX: Fix embeddings to use the old OpenAI tokenizer (#1506)
This commit is contained in:
parent
67664029e5
commit
06743d1939
@ -84,7 +84,7 @@ class EmbeddingDefinition < ActiveRecord::Base
|
||||
dimensions: 2000,
|
||||
max_sequence_length: 8191,
|
||||
pg_function: "<=>",
|
||||
tokenizer_class: "DiscourseAi::Tokenizer::OpenAiTokenizer",
|
||||
tokenizer_class: "DiscourseAi::Tokenizer::OpenAiCl100kTokenizer",
|
||||
url: "https://api.openai.com/v1/embeddings",
|
||||
provider: OPEN_AI,
|
||||
matryoshka_dimensions: true,
|
||||
@ -98,7 +98,7 @@ class EmbeddingDefinition < ActiveRecord::Base
|
||||
dimensions: 1536,
|
||||
max_sequence_length: 8191,
|
||||
pg_function: "<=>",
|
||||
tokenizer_class: "DiscourseAi::Tokenizer::OpenAiTokenizer",
|
||||
tokenizer_class: "DiscourseAi::Tokenizer::OpenAiCl100kTokenizer",
|
||||
url: "https://api.openai.com/v1/embeddings",
|
||||
provider: OPEN_AI,
|
||||
matryoshka_dimensions: true,
|
||||
@ -112,7 +112,7 @@ class EmbeddingDefinition < ActiveRecord::Base
|
||||
dimensions: 1536,
|
||||
max_sequence_length: 8191,
|
||||
pg_function: "<=>",
|
||||
tokenizer_class: "DiscourseAi::Tokenizer::OpenAiTokenizer",
|
||||
tokenizer_class: "DiscourseAi::Tokenizer::OpenAiCl100kTokenizer",
|
||||
url: "https://api.openai.com/v1/embeddings",
|
||||
provider: OPEN_AI,
|
||||
provider_params: {
|
||||
|
@ -0,0 +1,14 @@
|
||||
# frozen_string_literal: true
|
||||
class UpdateOpenAiEmbeddingsTokenizer < ActiveRecord::Migration[7.2]
|
||||
def up
|
||||
execute <<~SQL
|
||||
UPDATE embedding_definitions
|
||||
SET tokenizer_class = 'DiscourseAi::Tokenizer::OpenAiCl100kTokenizer'
|
||||
WHERE url LIKE '%https://api.openai.com/%' AND tokenizer_class <> 'DiscourseAi::Tokenizer::OpenAiCl100kTokenizer'
|
||||
SQL
|
||||
end
|
||||
|
||||
def down
|
||||
raise ActiveRecord::IrreversibleMigration
|
||||
end
|
||||
end
|
@ -50,7 +50,7 @@ RSpec.describe "Managing Embeddings configurations", type: :system, js: true do
|
||||
form.field("provider").select(EmbeddingDefinition::OPEN_AI)
|
||||
form.field("url").fill_in("https://api.openai.com/v1/embeddings")
|
||||
form.field("api_key").fill_in(api_key)
|
||||
form.field("tokenizer_class").select("DiscourseAi::Tokenizer::OpenAiTokenizer")
|
||||
form.field("tokenizer_class").select("DiscourseAi::Tokenizer::OpenAiCl100kTokenizer")
|
||||
|
||||
embed_prefix = "On creation:"
|
||||
search_prefix = "On search:"
|
||||
|
Loading…
x
Reference in New Issue
Block a user