diff --git a/db/migrate/20250114160417_backfill_topic_embeddings.rb b/db/migrate/20250114160417_backfill_topic_embeddings.rb index 8d17636f..d0a07f25 100644 --- a/db/migrate/20250114160417_backfill_topic_embeddings.rb +++ b/db/migrate/20250114160417_backfill_topic_embeddings.rb @@ -1,14 +1,24 @@ # frozen_string_literal: true class BackfillTopicEmbeddings < ActiveRecord::Migration[7.2] - def up - not_backfilled = DB.query_single("SELECT COUNT(*) FROM ai_topics_embeddings").first.to_i == 0 + disable_ddl_transaction! - if not_backfilled - # Copy data from old tables to new tables - execute <<~SQL - INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) - SELECT * FROM ai_topic_embeddings; + def up + loop do + count = execute(<<~SQL).cmd_tuples + INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) + SELECT source.* + FROM ai_topic_embeddings source + WHERE NOT EXISTS ( + SELECT 1 + FROM ai_topics_embeddings target + WHERE target.model_id = source.model_id + AND target.strategy_id = source.strategy_id + AND target.topic_id = source.topic_id + ) + LIMIT 10000 SQL + + break if count == 0 end end diff --git a/db/migrate/20250114160446_backfill_post_embeddings.rb b/db/migrate/20250114160446_backfill_post_embeddings.rb index a4f380b9..365d8516 100644 --- a/db/migrate/20250114160446_backfill_post_embeddings.rb +++ b/db/migrate/20250114160446_backfill_post_embeddings.rb @@ -1,14 +1,26 @@ # frozen_string_literal: true class BackfillPostEmbeddings < ActiveRecord::Migration[7.2] - def up - not_backfilled = DB.query_single("SELECT COUNT(*) FROM ai_posts_embeddings").first.to_i == 0 + disable_ddl_transaction! - if not_backfilled - # Copy data from old tables to new tables - execute <<~SQL + def up + # Copy data from old tables to new tables in batches. + + loop do + count = execute(<<~SQL).cmd_tuples INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at) - SELECT * FROM ai_post_embeddings; + SELECT source.* + FROM ai_post_embeddings source + WHERE NOT EXISTS ( + SELECT 1 + FROM ai_posts_embeddings target + WHERE target.model_id = source.model_id + AND target.strategy_id = source.strategy_id + AND target.post_id = source.post_id + ) + LIMIT 10000 SQL + + break if count == 0 end end