98 lines
3.1 KiB
Ruby
98 lines
3.1 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class RagDocumentFragment < ActiveRecord::Base
|
|
# TODO Jan 2025 - remove
|
|
self.ignored_columns = %i[ai_persona_id]
|
|
|
|
belongs_to :upload
|
|
belongs_to :target, polymorphic: true
|
|
|
|
class << self
|
|
def link_target_and_uploads(target, upload_ids)
|
|
return if target.blank?
|
|
return if upload_ids.blank?
|
|
return if !SiteSetting.ai_embeddings_enabled?
|
|
|
|
UploadReference.ensure_exist!(upload_ids: upload_ids, target: target)
|
|
|
|
upload_ids.each do |upload_id|
|
|
Jobs.enqueue(
|
|
:digest_rag_upload,
|
|
target_id: target.id,
|
|
target_type: target.class.to_s,
|
|
upload_id: upload_id,
|
|
)
|
|
end
|
|
end
|
|
|
|
def update_target_uploads(target, upload_ids)
|
|
return if target.blank?
|
|
return if !SiteSetting.ai_embeddings_enabled?
|
|
|
|
if upload_ids.blank?
|
|
RagDocumentFragment.where(target: target).destroy_all
|
|
UploadReference.where(target: target).destroy_all
|
|
else
|
|
RagDocumentFragment.where(target: target).where.not(upload_id: upload_ids).destroy_all
|
|
link_target_and_uploads(target, upload_ids)
|
|
end
|
|
end
|
|
|
|
def indexing_status(persona, uploads)
|
|
truncation = DiscourseAi::Embeddings::Strategies::Truncation.new
|
|
vector_rep =
|
|
DiscourseAi::Embeddings::VectorRepresentations::Base.current_representation(truncation)
|
|
|
|
embeddings_table = vector_rep.rag_fragments_table_name
|
|
|
|
results =
|
|
DB.query(
|
|
<<~SQL,
|
|
SELECT
|
|
uploads.id,
|
|
SUM(CASE WHEN (rdf.upload_id IS NOT NULL) THEN 1 ELSE 0 END) AS total,
|
|
SUM(CASE WHEN (eft.rag_document_fragment_id IS NOT NULL) THEN 1 ELSE 0 END) as indexed,
|
|
SUM(CASE WHEN (rdf.upload_id IS NOT NULL AND eft.rag_document_fragment_id IS NULL) THEN 1 ELSE 0 END) as left
|
|
FROM uploads
|
|
LEFT OUTER JOIN rag_document_fragments rdf ON uploads.id = rdf.upload_id AND rdf.target_id = :target_id
|
|
AND rdf.target_type = :target_type
|
|
LEFT OUTER JOIN #{embeddings_table} eft ON rdf.id = eft.rag_document_fragment_id
|
|
WHERE uploads.id IN (:upload_ids)
|
|
GROUP BY uploads.id
|
|
SQL
|
|
target_id: persona.id,
|
|
target_type: persona.class.to_s,
|
|
upload_ids: uploads.map(&:id),
|
|
)
|
|
|
|
results.reduce({}) do |acc, r|
|
|
acc[r.id] = { total: r.total, indexed: r.indexed, left: r.left }
|
|
acc
|
|
end
|
|
end
|
|
|
|
def publish_status(upload, status)
|
|
MessageBus.publish("/discourse-ai/rag/#{upload.id}", status, user_ids: [upload.user_id])
|
|
end
|
|
end
|
|
end
|
|
|
|
# == Schema Information
|
|
#
|
|
# Table name: rag_document_fragments
|
|
#
|
|
# id :bigint not null, primary key
|
|
# fragment :text not null
|
|
# upload_id :integer not null
|
|
# fragment_number :integer not null
|
|
# created_at :datetime not null
|
|
# updated_at :datetime not null
|
|
# metadata :text
|
|
# target_id :bigint not null
|
|
# target_type :string(800) not null
|
|
#
|
|
# Indexes
|
|
#
|
|
# index_rag_document_fragments_on_target_type_and_target_id (target_type,target_id)
|
|
#
|