FIX: Make sure limits are updated and applied on each step (#1002)
This commit is contained in:
parent
50f61925eb
commit
4ba74511c2
|
@ -31,15 +31,14 @@ module Jobs
|
||||||
.where(archetype: Archetype.default)
|
.where(archetype: Archetype.default)
|
||||||
.where(deleted_at: nil)
|
.where(deleted_at: nil)
|
||||||
.order("topics.bumped_at DESC")
|
.order("topics.bumped_at DESC")
|
||||||
.limit(limit - rebaked)
|
|
||||||
|
|
||||||
rebaked += populate_topic_embeddings(vector_rep, topics)
|
rebaked += populate_topic_embeddings(vector_rep, topics.limit(limit - rebaked))
|
||||||
|
|
||||||
return if rebaked >= limit
|
return if rebaked >= limit
|
||||||
|
|
||||||
# Then, we'll try to backfill embeddings for topics that have outdated
|
# Then, we'll try to backfill embeddings for topics that have outdated
|
||||||
# embeddings, be it model or strategy version
|
# embeddings, be it model or strategy version
|
||||||
relation = topics.where(<<~SQL)
|
relation = topics.where(<<~SQL).limit(limit - rebaked)
|
||||||
#{table_name}.model_version < #{vector_rep.version}
|
#{table_name}.model_version < #{vector_rep.version}
|
||||||
OR
|
OR
|
||||||
#{table_name}.strategy_version < #{strategy.version}
|
#{table_name}.strategy_version < #{strategy.version}
|
||||||
|
@ -65,20 +64,22 @@ module Jobs
|
||||||
|
|
||||||
# Now for posts
|
# Now for posts
|
||||||
table_name = vector_rep.post_table_name
|
table_name = vector_rep.post_table_name
|
||||||
|
posts_batch_size = 1000
|
||||||
|
|
||||||
posts =
|
posts =
|
||||||
Post
|
Post
|
||||||
.joins("LEFT JOIN #{table_name} ON #{table_name}.post_id = posts.id")
|
.joins("LEFT JOIN #{table_name} ON #{table_name}.post_id = posts.id")
|
||||||
.where(deleted_at: nil)
|
.where(deleted_at: nil)
|
||||||
.where(post_type: Post.types[:regular])
|
.where(post_type: Post.types[:regular])
|
||||||
.limit(limit - rebaked)
|
|
||||||
|
|
||||||
# First, we'll try to backfill embeddings for posts that have none
|
# First, we'll try to backfill embeddings for posts that have none
|
||||||
posts
|
posts
|
||||||
.where("#{table_name}.post_id IS NULL")
|
.where("#{table_name}.post_id IS NULL")
|
||||||
.find_in_batches do |batch|
|
.limit(limit - rebaked)
|
||||||
vector_rep.gen_bulk_reprensentations(batch)
|
.pluck(:id)
|
||||||
rebaked += batch.size
|
.each_slice(posts_batch_size) do |batch|
|
||||||
|
vector_rep.gen_bulk_reprensentations(Post.where(id: batch))
|
||||||
|
rebaked += batch.length
|
||||||
end
|
end
|
||||||
|
|
||||||
return if rebaked >= limit
|
return if rebaked >= limit
|
||||||
|
@ -91,28 +92,26 @@ module Jobs
|
||||||
OR
|
OR
|
||||||
#{table_name}.strategy_version < #{strategy.version}
|
#{table_name}.strategy_version < #{strategy.version}
|
||||||
SQL
|
SQL
|
||||||
.find_in_batches do |batch|
|
.limit(limit - rebaked)
|
||||||
vector_rep.gen_bulk_reprensentations(batch)
|
.pluck(:id)
|
||||||
rebaked += batch.size
|
.each_slice(posts_batch_size) do |batch|
|
||||||
|
vector_rep.gen_bulk_reprensentations(Post.where(id: batch))
|
||||||
|
rebaked += batch.length
|
||||||
end
|
end
|
||||||
|
|
||||||
return if rebaked >= limit
|
return if rebaked >= limit
|
||||||
|
|
||||||
# Finally, we'll try to backfill embeddings for posts that have outdated
|
# Finally, we'll try to backfill embeddings for posts that have outdated
|
||||||
# embeddings due to edits. Here we only do 10% of the limit
|
# embeddings due to edits. Here we only do 10% of the limit
|
||||||
posts_batch_size = 1000
|
posts
|
||||||
|
.where("#{table_name}.updated_at < ?", 7.days.ago)
|
||||||
outdated_post_ids =
|
.order("random()")
|
||||||
posts
|
.limit((limit - rebaked) / 10)
|
||||||
.where("#{table_name}.updated_at < ?", 7.days.ago)
|
.pluck(:id)
|
||||||
.order("random()")
|
.each_slice(posts_batch_size) do |batch|
|
||||||
.limit((limit - rebaked) / 10)
|
vector_rep.gen_bulk_reprensentations(Post.where(id: batch))
|
||||||
.pluck(:id)
|
rebaked += batch.length
|
||||||
|
end
|
||||||
outdated_post_ids.each_slice(posts_batch_size) do |batch|
|
|
||||||
vector_rep.gen_bulk_reprensentations(Post.where(id: batch))
|
|
||||||
rebaked += batch.length
|
|
||||||
end
|
|
||||||
|
|
||||||
rebaked
|
rebaked
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue