mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-06-26 09:32:40 +00:00
FIX: Do batches for backfilling huge embeddings tables (#1065)
This commit is contained in:
parent
bbae790c2b
commit
6721c6751d
@ -1,14 +1,24 @@
|
|||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
class BackfillTopicEmbeddings < ActiveRecord::Migration[7.2]
|
class BackfillTopicEmbeddings < ActiveRecord::Migration[7.2]
|
||||||
def up
|
disable_ddl_transaction!
|
||||||
not_backfilled = DB.query_single("SELECT COUNT(*) FROM ai_topics_embeddings").first.to_i == 0
|
|
||||||
|
|
||||||
if not_backfilled
|
def up
|
||||||
# Copy data from old tables to new tables
|
loop do
|
||||||
execute <<~SQL
|
count = execute(<<~SQL).cmd_tuples
|
||||||
INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
|
INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
|
||||||
SELECT * FROM ai_topic_embeddings;
|
SELECT source.*
|
||||||
|
FROM ai_topic_embeddings source
|
||||||
|
WHERE NOT EXISTS (
|
||||||
|
SELECT 1
|
||||||
|
FROM ai_topics_embeddings target
|
||||||
|
WHERE target.model_id = source.model_id
|
||||||
|
AND target.strategy_id = source.strategy_id
|
||||||
|
AND target.topic_id = source.topic_id
|
||||||
|
)
|
||||||
|
LIMIT 10000
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
|
break if count == 0
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -1,14 +1,26 @@
|
|||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
class BackfillPostEmbeddings < ActiveRecord::Migration[7.2]
|
class BackfillPostEmbeddings < ActiveRecord::Migration[7.2]
|
||||||
def up
|
disable_ddl_transaction!
|
||||||
not_backfilled = DB.query_single("SELECT COUNT(*) FROM ai_posts_embeddings").first.to_i == 0
|
|
||||||
|
|
||||||
if not_backfilled
|
def up
|
||||||
# Copy data from old tables to new tables
|
# Copy data from old tables to new tables in batches.
|
||||||
execute <<~SQL
|
|
||||||
|
loop do
|
||||||
|
count = execute(<<~SQL).cmd_tuples
|
||||||
INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
|
INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
|
||||||
SELECT * FROM ai_post_embeddings;
|
SELECT source.*
|
||||||
|
FROM ai_post_embeddings source
|
||||||
|
WHERE NOT EXISTS (
|
||||||
|
SELECT 1
|
||||||
|
FROM ai_posts_embeddings target
|
||||||
|
WHERE target.model_id = source.model_id
|
||||||
|
AND target.strategy_id = source.strategy_id
|
||||||
|
AND target.post_id = source.post_id
|
||||||
|
)
|
||||||
|
LIMIT 10000
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
|
break if count == 0
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user