mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-03-09 02:40:50 +00:00
Polymorphic RAG means that we will be able to access RAG fragments both from AiPersona and AiCustomTool In turn this gives us support for richer RAG implementations.
103 lines
3.2 KiB
Ruby
103 lines
3.2 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class RagDocumentFragment < ActiveRecord::Base
|
|
# TODO Jan 2025 - remove
|
|
self.ignored_columns = %i[ai_persona_id]
|
|
|
|
belongs_to :upload
|
|
belongs_to :target, polymorphic: true
|
|
|
|
class << self
|
|
def link_target_and_uploads(target, upload_ids)
|
|
return if target.blank?
|
|
return if upload_ids.blank?
|
|
return if !SiteSetting.ai_embeddings_enabled?
|
|
|
|
UploadReference.ensure_exist!(upload_ids: upload_ids, target: target)
|
|
|
|
upload_ids.each do |upload_id|
|
|
Jobs.enqueue(
|
|
:digest_rag_upload,
|
|
target_id: target.id,
|
|
target_type: target.class.to_s,
|
|
upload_id: upload_id,
|
|
)
|
|
end
|
|
end
|
|
|
|
def update_target_uploads(target, upload_ids)
|
|
return if target.blank?
|
|
return if !SiteSetting.ai_embeddings_enabled?
|
|
|
|
if upload_ids.blank?
|
|
RagDocumentFragment.where(target: target).destroy_all
|
|
UploadReference.where(target: target).destroy_all
|
|
else
|
|
RagDocumentFragment.where(target: target).where.not(upload_id: upload_ids).destroy_all
|
|
link_target_and_uploads(target, upload_ids)
|
|
end
|
|
end
|
|
|
|
def indexing_status(persona, uploads)
|
|
truncation = DiscourseAi::Embeddings::Strategies::Truncation.new
|
|
vector_rep =
|
|
DiscourseAi::Embeddings::VectorRepresentations::Base.current_representation(truncation)
|
|
|
|
embeddings_table = vector_rep.rag_fragments_table_name
|
|
|
|
results =
|
|
DB.query(
|
|
<<~SQL,
|
|
SELECT
|
|
uploads.id,
|
|
SUM(CASE WHEN (rdf.upload_id IS NOT NULL) THEN 1 ELSE 0 END) AS total,
|
|
SUM(CASE WHEN (eft.rag_document_fragment_id IS NOT NULL) THEN 1 ELSE 0 END) as indexed,
|
|
SUM(CASE WHEN (rdf.upload_id IS NOT NULL AND eft.rag_document_fragment_id IS NULL) THEN 1 ELSE 0 END) as left
|
|
FROM uploads
|
|
LEFT OUTER JOIN rag_document_fragments rdf ON uploads.id = rdf.upload_id AND rdf.target_id = :target_id
|
|
AND rdf.target_type = :target_type
|
|
LEFT OUTER JOIN #{embeddings_table} eft ON rdf.id = eft.rag_document_fragment_id
|
|
WHERE uploads.id IN (:upload_ids)
|
|
GROUP BY uploads.id
|
|
SQL
|
|
target_id: persona.id,
|
|
target_type: persona.class.to_s,
|
|
upload_ids: uploads.map(&:id),
|
|
)
|
|
|
|
results.reduce({}) do |acc, r|
|
|
acc[r.id] = { total: r.total, indexed: r.indexed, left: r.left }
|
|
acc
|
|
end
|
|
end
|
|
|
|
def publish_status(upload, status)
|
|
MessageBus.publish(
|
|
"/discourse-ai/ai-persona-rag/#{upload.id}",
|
|
status,
|
|
user_ids: [upload.user_id],
|
|
)
|
|
end
|
|
end
|
|
end
|
|
|
|
# == Schema Information
|
|
#
|
|
# Table name: rag_document_fragments
|
|
#
|
|
# id :bigint not null, primary key
|
|
# fragment :text not null
|
|
# upload_id :integer not null
|
|
# ai_persona_id :integer not null
|
|
# fragment_number :integer not null
|
|
# created_at :datetime not null
|
|
# updated_at :datetime not null
|
|
# metadata :text
|
|
# target_id :integer
|
|
# target_type :string(800)
|
|
#
|
|
# Indexes
|
|
#
|
|
# index_rag_document_fragments_on_target_type_and_target_id (target_type,target_id)
|
|
#
|