From 356ea7720142e6bcd73935d0dac6fb394d749c7f Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Tue, 14 Jan 2025 13:30:52 -0300 Subject: [PATCH] FIX: Split backfill into separate migrations to use independent transactions (#1063) --- .../20241230153300_new_embeddings_tables.rb | 16 ------- ...0250114160417_backfill_topic_embeddings.rb | 18 ++++++++ ...20250114160446_backfill_post_embeddings.rb | 18 ++++++++ .../20250114160500_backfill_rag_embeddings.rb | 19 ++++++++ ...0250113171444_drop_old_embedding_tables.rb | 43 +------------------ 5 files changed, 56 insertions(+), 58 deletions(-) create mode 100644 db/migrate/20250114160417_backfill_topic_embeddings.rb create mode 100644 db/migrate/20250114160446_backfill_post_embeddings.rb create mode 100644 db/migrate/20250114160500_backfill_rag_embeddings.rb diff --git a/db/migrate/20241230153300_new_embeddings_tables.rb b/db/migrate/20241230153300_new_embeddings_tables.rb index 34e6ca91..dacc148f 100644 --- a/db/migrate/20241230153300_new_embeddings_tables.rb +++ b/db/migrate/20241230153300_new_embeddings_tables.rb @@ -65,22 +65,6 @@ class NewEmbeddingsTables < ActiveRecord::Migration[7.2] WHERE model_id = #{model_id} AND strategy_id = 1; SQL end - - # Copy data from old tables to new tables - execute <<~SQL - INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) - SELECT * FROM ai_topic_embeddings; - SQL - - execute <<~SQL - INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) - SELECT * FROM ai_post_embeddings; - SQL - - execute <<~SQL - INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) - SELECT * FROM ai_document_fragment_embeddings; - SQL end def down diff --git a/db/migrate/20250114160417_backfill_topic_embeddings.rb b/db/migrate/20250114160417_backfill_topic_embeddings.rb new file mode 100644 index 00000000..8d17636f --- /dev/null +++ b/db/migrate/20250114160417_backfill_topic_embeddings.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true +class BackfillTopicEmbeddings < ActiveRecord::Migration[7.2] + def up + not_backfilled = DB.query_single("SELECT COUNT(*) FROM ai_topics_embeddings").first.to_i == 0 + + if not_backfilled + # Copy data from old tables to new tables + execute <<~SQL + INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) + SELECT * FROM ai_topic_embeddings; + SQL + end + end + + def down + raise ActiveRecord::IrreversibleMigration + end +end diff --git a/db/migrate/20250114160446_backfill_post_embeddings.rb b/db/migrate/20250114160446_backfill_post_embeddings.rb new file mode 100644 index 00000000..a4f380b9 --- /dev/null +++ b/db/migrate/20250114160446_backfill_post_embeddings.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true +class BackfillPostEmbeddings < ActiveRecord::Migration[7.2] + def up + not_backfilled = DB.query_single("SELECT COUNT(*) FROM ai_posts_embeddings").first.to_i == 0 + + if not_backfilled + # Copy data from old tables to new tables + execute <<~SQL + INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) + SELECT * FROM ai_post_embeddings; + SQL + end + end + + def down + raise ActiveRecord::IrreversibleMigration + end +end diff --git a/db/migrate/20250114160500_backfill_rag_embeddings.rb b/db/migrate/20250114160500_backfill_rag_embeddings.rb new file mode 100644 index 00000000..16843219 --- /dev/null +++ b/db/migrate/20250114160500_backfill_rag_embeddings.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true +class BackfillRagEmbeddings < ActiveRecord::Migration[7.2] + def up + not_backfilled = + DB.query_single("SELECT COUNT(*) FROM ai_document_fragments_embeddings").first.to_i == 0 + + if not_backfilled + # Copy data from old tables to new tables + execute <<~SQL + INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) + SELECT * FROM ai_document_fragment_embeddings; + SQL + end + end + + def down + raise ActiveRecord::IrreversibleMigration + end +end diff --git a/db/post_migrate/20250113171444_drop_old_embedding_tables.rb b/db/post_migrate/20250113171444_drop_old_embedding_tables.rb index c0794142..544dfa4f 100644 --- a/db/post_migrate/20250113171444_drop_old_embedding_tables.rb +++ b/db/post_migrate/20250113171444_drop_old_embedding_tables.rb @@ -2,48 +2,7 @@ class DropOldEmbeddingTables < ActiveRecord::Migration[7.2] def up # Copy rag embeddings created during deploy. - execute <<~SQL - INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) - ( - SELECT ai_document_fragment_embeddings.* - FROM ai_document_fragment_embeddings - LEFT OUTER JOIN ai_document_fragments_embeddings ON ai_document_fragment_embeddings.rag_document_fragment_id = ai_document_fragments_embeddings.rag_document_fragment_id - WHERE ai_document_fragments_embeddings.rag_document_fragment_id IS NULL - ) - SQL - - execute <<~SQL - DROP INDEX IF EXISTS ai_topic_embeddings_1_1_search_bit; - DROP INDEX IF EXISTS ai_topic_embeddings_2_1_search_bit; - DROP INDEX IF EXISTS ai_topic_embeddings_3_1_search_bit; - DROP INDEX IF EXISTS ai_topic_embeddings_4_1_search_bit; - DROP INDEX IF EXISTS ai_topic_embeddings_5_1_search_bit; - DROP INDEX IF EXISTS ai_topic_embeddings_6_1_search_bit; - DROP INDEX IF EXISTS ai_topic_embeddings_7_1_search_bit; - DROP INDEX IF EXISTS ai_topic_embeddings_8_1_search_bit; - - DROP INDEX IF EXISTS ai_post_embeddings_1_1_search_bit; - DROP INDEX IF EXISTS ai_post_embeddings_2_1_search_bit; - DROP INDEX IF EXISTS ai_post_embeddings_3_1_search_bit; - DROP INDEX IF EXISTS ai_post_embeddings_4_1_search_bit; - DROP INDEX IF EXISTS ai_post_embeddings_5_1_search_bit; - DROP INDEX IF EXISTS ai_post_embeddings_6_1_search_bit; - DROP INDEX IF EXISTS ai_post_embeddings_7_1_search_bit; - DROP INDEX IF EXISTS ai_post_embeddings_8_1_search_bit; - - DROP INDEX IF EXISTS ai_document_fragment_embeddings_1_1_search_bit; - DROP INDEX IF EXISTS ai_document_fragment_embeddings_2_1_search_bit; - DROP INDEX IF EXISTS ai_document_fragment_embeddings_3_1_search_bit; - DROP INDEX IF EXISTS ai_document_fragment_embeddings_4_1_search_bit; - DROP INDEX IF EXISTS ai_document_fragment_embeddings_5_1_search_bit; - DROP INDEX IF EXISTS ai_document_fragment_embeddings_6_1_search_bit; - DROP INDEX IF EXISTS ai_document_fragment_embeddings_7_1_search_bit; - DROP INDEX IF EXISTS ai_document_fragment_embeddings_8_1_search_bit; - SQL - - drop_table :ai_topic_embeddings - drop_table :ai_post_embeddings - drop_table :ai_document_fragment_embeddings + # noop. TODO(roman): Will follow-up with a new migration to drop these tables. end def down