From b9d6179bfc132ca3035e6477234bf69b2ca286b3 Mon Sep 17 00:00:00 2001 From: Rafael dos Santos Silva Date: Mon, 16 Oct 2023 18:50:37 -0300 Subject: [PATCH] DEV: Migrations shouldn't rely on the app (#253) --- ...171142_create_ai_topic_embeddings_table.rb | 34 ++++++++++--------- ...ate_multilingual_topic_embeddings_table.rb | 7 ++-- ...55701_create_bge_topic_embeddings_table.rb | 7 ++-- 3 files changed, 22 insertions(+), 26 deletions(-) diff --git a/db/migrate/20230710171142_create_ai_topic_embeddings_table.rb b/db/migrate/20230710171142_create_ai_topic_embeddings_table.rb index 50626c87..22756c1f 100644 --- a/db/migrate/20230710171142_create_ai_topic_embeddings_table.rb +++ b/db/migrate/20230710171142_create_ai_topic_embeddings_table.rb @@ -2,24 +2,26 @@ class CreateAiTopicEmbeddingsTable < ActiveRecord::Migration[7.0] def change - truncation = DiscourseAi::Embeddings::Strategies::Truncation.new - vector_reps = - [ - DiscourseAi::Embeddings::VectorRepresentations::AllMpnetBaseV2, - DiscourseAi::Embeddings::VectorRepresentations::TextEmbeddingAda002, - ].map { |k| k.new(truncation) } + create_table :ai_topic_embeddings_1_1, id: false do |t| + t.integer :topic_id, null: false + t.integer :model_version, null: false + t.integer :strategy_version, null: false + t.text :digest, null: false + t.column :embeddings, "vector(768)", null: false + t.timestamps - vector_reps.each do |vector_rep| - create_table vector_rep.table_name.to_sym, id: false do |t| - t.integer :topic_id, null: false - t.integer :model_version, null: false - t.integer :strategy_version, null: false - t.text :digest, null: false - t.column :embeddings, "vector(#{vector_rep.dimensions})", null: false - t.timestamps + t.index :topic_id, unique: true + end - t.index :topic_id, unique: true - end + create_table :ai_topic_embeddings_2_1, id: false do |t| + t.integer :topic_id, null: false + t.integer :model_version, null: false + t.integer :strategy_version, null: false + t.text :digest, null: false + t.column :embeddings, "vector(1536)", null: false + t.timestamps + + t.index :topic_id, unique: true end end end diff --git a/db/migrate/20230727170222_create_multilingual_topic_embeddings_table.rb b/db/migrate/20230727170222_create_multilingual_topic_embeddings_table.rb index 1ca179ee..4da6b5c4 100644 --- a/db/migrate/20230727170222_create_multilingual_topic_embeddings_table.rb +++ b/db/migrate/20230727170222_create_multilingual_topic_embeddings_table.rb @@ -2,15 +2,12 @@ class CreateMultilingualTopicEmbeddingsTable < ActiveRecord::Migration[7.0] def change - truncation = DiscourseAi::Embeddings::Strategies::Truncation.new - vector_rep = DiscourseAi::Embeddings::VectorRepresentations::MultilingualE5Large.new(truncation) - - create_table vector_rep.table_name.to_sym, id: false do |t| + create_table :ai_topic_embeddings_3_1, id: false do |t| t.integer :topic_id, null: false t.integer :model_version, null: false t.integer :strategy_version, null: false t.text :digest, null: false - t.column :embeddings, "vector(#{vector_rep.dimensions})", null: false + t.column :embeddings, "vector(1024)", null: false t.timestamps t.index :topic_id, unique: true diff --git a/db/migrate/20231003155701_create_bge_topic_embeddings_table.rb b/db/migrate/20231003155701_create_bge_topic_embeddings_table.rb index f9b13e5a..e83e47e2 100644 --- a/db/migrate/20231003155701_create_bge_topic_embeddings_table.rb +++ b/db/migrate/20231003155701_create_bge_topic_embeddings_table.rb @@ -2,15 +2,12 @@ class CreateBgeTopicEmbeddingsTable < ActiveRecord::Migration[7.0] def change - truncation = DiscourseAi::Embeddings::Strategies::Truncation.new - vector_rep = DiscourseAi::Embeddings::VectorRepresentations::BgeLargeEn.new(truncation) - - create_table vector_rep.table_name.to_sym, id: false do |t| + create_table :ai_topic_embeddings_4_1, id: false do |t| t.integer :topic_id, null: false t.integer :model_version, null: false t.integer :strategy_version, null: false t.text :digest, null: false - t.column :embeddings, "vector(#{vector_rep.dimensions})", null: false + t.column :embeddings, "vector(1024)", null: false t.timestamps t.index :topic_id, unique: true