FIX: Skip records without content to classify (#960)
This commit is contained in:
parent
ddf2bf7034
commit
ef07fcb308
|
@ -70,6 +70,7 @@ module Jobs
|
|||
Post
|
||||
.joins("LEFT JOIN #{table_name} ON #{table_name}.post_id = posts.id")
|
||||
.where(deleted_at: nil)
|
||||
.where(post_type: Post.types[:regular])
|
||||
.limit(limit - rebaked)
|
||||
|
||||
# First, we'll try to backfill embeddings for posts that have none
|
||||
|
|
|
@ -61,18 +61,21 @@ module DiscourseAi
|
|||
|
||||
embedding_gen = inference_client
|
||||
promised_embeddings =
|
||||
relation.map do |record|
|
||||
materials = { target: record, text: prepare_text(record) }
|
||||
relation
|
||||
.map do |record|
|
||||
prepared_text = prepare_text(record)
|
||||
next if prepared_text.blank?
|
||||
|
||||
Concurrent::Promises
|
||||
.fulfilled_future(materials, pool)
|
||||
.then_on(pool) do |w_prepared_text|
|
||||
w_prepared_text.merge(
|
||||
embedding: embedding_gen.perform!(w_prepared_text[:text]),
|
||||
digest: OpenSSL::Digest::SHA1.hexdigest(w_prepared_text[:text]),
|
||||
)
|
||||
end
|
||||
end
|
||||
Concurrent::Promises
|
||||
.fulfilled_future({ target: record, text: prepared_text }, pool)
|
||||
.then_on(pool) do |w_prepared_text|
|
||||
w_prepared_text.merge(
|
||||
embedding: embedding_gen.perform!(w_prepared_text[:text]),
|
||||
digest: OpenSSL::Digest::SHA1.hexdigest(w_prepared_text[:text]),
|
||||
)
|
||||
end
|
||||
end
|
||||
.compact
|
||||
|
||||
Concurrent::Promises
|
||||
.zip(*promised_embeddings)
|
||||
|
|
|
@ -81,11 +81,13 @@ module DiscourseAi
|
|||
end
|
||||
|
||||
def prepare_text(record)
|
||||
if inference_client.class.name.include?("DiscourseClassifier")
|
||||
return "query: #{super(record)}"
|
||||
prepared_text = super(record)
|
||||
|
||||
if prepared_text.present? && inference_client.class.name.include?("DiscourseClassifier")
|
||||
return "query: #{prepared_text}"
|
||||
end
|
||||
|
||||
super(record)
|
||||
prepared_text
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -79,6 +79,10 @@ RSpec.shared_examples "generates and store embedding using with vector represent
|
|||
expect(vector_rep.topic_id_from_representation(expected_embedding_1)).to eq(topic.id)
|
||||
expect(vector_rep.topic_id_from_representation(expected_embedding_1)).to eq(topic.id)
|
||||
end
|
||||
|
||||
it "does nothing if passed record has no content" do
|
||||
expect { vector_rep.gen_bulk_reprensentations([Topic.new]) }.not_to raise_error
|
||||
end
|
||||
end
|
||||
|
||||
describe "#asymmetric_topics_similarity_search" do
|
||||
|
|
Loading…
Reference in New Issue