mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-03-09 11:48:47 +00:00
* REFACTOR: A Simpler way of interacting with embeddings' tables. This change adds a new abstraction called `Schema`, which acts as a repository that supports the same DB features `VectorRepresentation::Base` has, with the exception that removes the need to have duplicated methods per embeddings table. It is also a bit more flexible when performing a similarity search because you can pass it a block that gives you access to the builder, allowing you to add multiple joins/where conditions.
26 lines
966 B
Ruby
26 lines
966 B
Ruby
# frozen_string_literal: true
|
|
|
|
module ::Jobs
|
|
class GenerateRagEmbeddings < ::Jobs::Base
|
|
sidekiq_options queue: "ultra_low"
|
|
# we could also restrict concurrency but this takes so long if it is not concurrent
|
|
|
|
def execute(args)
|
|
return if (fragments = RagDocumentFragment.where(id: args[:fragment_ids].to_a)).empty?
|
|
|
|
vector_rep = DiscourseAi::Embeddings::VectorRepresentations::Base.current_representation
|
|
|
|
# generate_representation_from checks compares the digest value to make sure
|
|
# the embedding is only generated once per fragment unless something changes.
|
|
fragments.map { |fragment| vector_rep.generate_representation_from(fragment) }
|
|
|
|
last_fragment = fragments.last
|
|
target = last_fragment.target
|
|
upload = last_fragment.upload
|
|
|
|
indexing_status = RagDocumentFragment.indexing_status(target, [upload])[upload.id]
|
|
RagDocumentFragment.publish_status(upload, indexing_status)
|
|
end
|
|
end
|
|
end
|