FIX: Split backfill into separate migrations to use independent transactions (#1063)

This commit is contained in:
Roman Rizzi 2025-01-14 13:30:52 -03:00 committed by GitHub
parent 09ca123757
commit 356ea77201
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 56 additions and 58 deletions

View File

@ -65,22 +65,6 @@ class NewEmbeddingsTables < ActiveRecord::Migration[7.2]
WHERE model_id = #{model_id} AND strategy_id = 1;
SQL
end
# Copy data from old tables to new tables
execute <<~SQL
INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
SELECT * FROM ai_topic_embeddings;
SQL
execute <<~SQL
INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
SELECT * FROM ai_post_embeddings;
SQL
execute <<~SQL
INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
SELECT * FROM ai_document_fragment_embeddings;
SQL
end
def down

View File

@ -0,0 +1,18 @@
# frozen_string_literal: true
class BackfillTopicEmbeddings < ActiveRecord::Migration[7.2]
def up
not_backfilled = DB.query_single("SELECT COUNT(*) FROM ai_topics_embeddings").first.to_i == 0
if not_backfilled
# Copy data from old tables to new tables
execute <<~SQL
INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
SELECT * FROM ai_topic_embeddings;
SQL
end
end
def down
raise ActiveRecord::IrreversibleMigration
end
end

View File

@ -0,0 +1,18 @@
# frozen_string_literal: true
class BackfillPostEmbeddings < ActiveRecord::Migration[7.2]
def up
not_backfilled = DB.query_single("SELECT COUNT(*) FROM ai_posts_embeddings").first.to_i == 0
if not_backfilled
# Copy data from old tables to new tables
execute <<~SQL
INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
SELECT * FROM ai_post_embeddings;
SQL
end
end
def down
raise ActiveRecord::IrreversibleMigration
end
end

View File

@ -0,0 +1,19 @@
# frozen_string_literal: true
class BackfillRagEmbeddings < ActiveRecord::Migration[7.2]
def up
not_backfilled =
DB.query_single("SELECT COUNT(*) FROM ai_document_fragments_embeddings").first.to_i == 0
if not_backfilled
# Copy data from old tables to new tables
execute <<~SQL
INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
SELECT * FROM ai_document_fragment_embeddings;
SQL
end
end
def down
raise ActiveRecord::IrreversibleMigration
end
end

View File

@ -2,48 +2,7 @@
class DropOldEmbeddingTables < ActiveRecord::Migration[7.2]
def up
# Copy rag embeddings created during deploy.
execute <<~SQL
INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
(
SELECT ai_document_fragment_embeddings.*
FROM ai_document_fragment_embeddings
LEFT OUTER JOIN ai_document_fragments_embeddings ON ai_document_fragment_embeddings.rag_document_fragment_id = ai_document_fragments_embeddings.rag_document_fragment_id
WHERE ai_document_fragments_embeddings.rag_document_fragment_id IS NULL
)
SQL
execute <<~SQL
DROP INDEX IF EXISTS ai_topic_embeddings_1_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_2_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_3_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_4_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_5_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_6_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_7_1_search_bit;
DROP INDEX IF EXISTS ai_topic_embeddings_8_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_1_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_2_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_3_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_4_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_5_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_6_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_7_1_search_bit;
DROP INDEX IF EXISTS ai_post_embeddings_8_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_1_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_2_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_3_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_4_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_5_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_6_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_7_1_search_bit;
DROP INDEX IF EXISTS ai_document_fragment_embeddings_8_1_search_bit;
SQL
drop_table :ai_topic_embeddings
drop_table :ai_post_embeddings
drop_table :ai_document_fragment_embeddings
# noop. TODO(roman): Will follow-up with a new migration to drop these tables.
end
def down