FIX: Update migrations with latest vector rep changes (#199)

This commit is contained in:
Roman Rizzi 2023-09-05 14:31:04 -03:00 committed by GitHub
parent ee734a340a
commit 175def1267
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 53 additions and 62 deletions

View File

@ -2,27 +2,24 @@
class CreateAiTopicEmbeddingsTable < ActiveRecord::Migration[7.0]
def change
models = [
DiscourseAi::Embeddings::Models::AllMpnetBaseV2,
DiscourseAi::Embeddings::Models::TextEmbeddingAda002,
]
strategies = [DiscourseAi::Embeddings::Strategies::Truncation]
truncation = DiscourseAi::Embeddings::Strategies::Truncation.new
vector_reps =
[
DiscourseAi::Embeddings::VectorRepresentations::AllMpnetBaseV2,
DiscourseAi::Embeddings::VectorRepresentations::TextEmbeddingAda002,
].map { |k| k.new(truncation) }
models.each do |model|
strategies.each do |strategy|
table_name = "ai_topic_embeddings_#{model.id}_#{strategy.id}".to_sym
create_table table_name, id: false do |t|
vector_reps.each do |vector_rep|
create_table vector_rep.table_name.to_sym, id: false do |t|
t.integer :topic_id, null: false
t.integer :model_version, null: false
t.integer :strategy_version, null: false
t.text :digest, null: false
t.column :embeddings, "vector(#{model.dimensions})", null: false
t.column :embeddings, "vector(#{vector_rep.dimensions})", null: false
t.timestamps
t.index :topic_id, unique: true
end
end
end
end
end

View File

@ -5,16 +5,17 @@ class MigrateEmbeddingsFromDedicatedDatabase < ActiveRecord::Migration[7.0]
return unless SiteSetting.ai_embeddings_enabled
return unless SiteSetting.ai_embeddings_pg_connection_string.present?
models = [
DiscourseAi::Embeddings::Models::AllMpnetBaseV2,
DiscourseAi::Embeddings::Models::TextEmbeddingAda002,
]
strategies = [DiscourseAi::Embeddings::Strategies::Truncation]
truncation = DiscourseAi::Embeddings::Strategies::Truncation.new
models.each do |model|
strategies.each do |strategy|
new_table_name = "ai_topic_embeddings_#{model.id}_#{strategy.id}"
old_table_name = "topic_embeddings_#{model.name.underscore}"
vector_reps =
[
DiscourseAi::Embeddings::VectorRepresentations::AllMpnetBaseV2,
DiscourseAi::Embeddings::VectorRepresentations::TextEmbeddingAda002,
].map { |k| k.new(truncation) }
vector_reps.each do |vector_rep|
new_table_name = vector_rep.table_name
old_table_name = "topic_embeddings_#{vector_rep.name.underscore}"
begin
row_count =
@ -55,7 +56,6 @@ class MigrateEmbeddingsFromDedicatedDatabase < ActiveRecord::Migration[7.0]
end
end
end
end
def down
# no-op

View File

@ -2,24 +2,18 @@
class CreateMultilingualTopicEmbeddingsTable < ActiveRecord::Migration[7.0]
def change
models = [DiscourseAi::Embeddings::Models::MultilingualE5Large]
strategies = [DiscourseAi::Embeddings::Strategies::Truncation]
truncation = DiscourseAi::Embeddings::Strategies::Truncation.new
vector_rep = DiscourseAi::Embeddings::VectorRepresentations::MultilingualE5Large.new(truncation)
models.each do |model|
strategies.each do |strategy|
table_name = "ai_topic_embeddings_#{model.id}_#{strategy.id}".to_sym
create_table table_name, id: false do |t|
create_table vector_rep.table_name.to_sym, id: false do |t|
t.integer :topic_id, null: false
t.integer :model_version, null: false
t.integer :strategy_version, null: false
t.text :digest, null: false
t.column :embeddings, "vector(#{model.dimensions})", null: false
t.column :embeddings, "vector(#{vector_rep.dimensions})", null: false
t.timestamps
t.index :topic_id, unique: true
end
end
end
end
end