UX: Display the indexing progress for RAG uploads (#557)

This commit is contained in:
Roman Rizzi 2024-04-09 11:03:07 -03:00 committed by GitHub
parent 35fbf5c836
commit aa8918911d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 231 additions and 11 deletions

View File

@ -5,7 +5,8 @@ module DiscourseAi
class AiPersonasController < ::Admin::AdminController
requires_plugin ::DiscourseAi::PLUGIN_NAME
before_action :find_ai_persona, only: %i[show update destroy create_user]
before_action :find_ai_persona,
only: %i[show update destroy create_user indexing_status_check]
def index
ai_personas =
@ -90,6 +91,10 @@ module DiscourseAi
end
end
def indexing_status_check
render json: RagDocumentFragment.indexing_status(@ai_persona, @ai_persona.uploads)
end
private
def find_ai_persona

View File

@ -41,6 +41,11 @@ module ::Jobs
end
end
RagDocumentFragment.publish_status(
upload,
{ total: fragment_ids.size, indexed: 0, left: fragment_ids.size },
)
fragment_ids.each_slice(50) do |slice|
Jobs.enqueue(:generate_rag_embeddings, fragment_ids: slice)
end

View File

@ -14,6 +14,13 @@ module ::Jobs
# generate_representation_from checks compares the digest value to make sure
# the embedding is only generated once per fragment unless something changes.
fragments.map { |fragment| vector_rep.generate_representation_from(fragment) }
last_fragment = fragments.last
ai_persona = last_fragment.ai_persona
upload = last_fragment.upload
indexing_status = RagDocumentFragment.indexing_status(ai_persona, [upload])[upload.id]
RagDocumentFragment.publish_status(upload, indexing_status)
end
end
end

View File

@ -29,6 +29,40 @@ class RagDocumentFragment < ActiveRecord::Base
link_persona_and_uploads(persona, upload_ids)
end
end
def indexing_status(persona, uploads)
truncation = DiscourseAi::Embeddings::Strategies::Truncation.new
vector_rep =
DiscourseAi::Embeddings::VectorRepresentations::Base.current_representation(truncation)
embeddings_table = vector_rep.rag_fragments_table_name
results = DB.query(<<~SQL, persona_id: persona.id, upload_ids: uploads.map(&:id))
SELECT
uploads.id,
SUM(CASE WHEN (rdf.upload_id IS NOT NULL) THEN 1 ELSE 0 END) AS total,
SUM(CASE WHEN (eft.rag_document_fragment_id IS NOT NULL) THEN 1 ELSE 0 END) as indexed,
SUM(CASE WHEN (rdf.upload_id IS NOT NULL AND eft.rag_document_fragment_id IS NULL) THEN 1 ELSE 0 END) as left
FROM uploads
LEFT OUTER JOIN rag_document_fragments rdf ON uploads.id = rdf.upload_id AND rdf.ai_persona_id = :persona_id
LEFT OUTER JOIN #{embeddings_table} eft ON rdf.id = eft.rag_document_fragment_id
WHERE uploads.id IN (:upload_ids)
GROUP BY uploads.id
SQL
results.reduce({}) do |acc, r|
acc[r.id] = { total: r.total, indexed: r.indexed, left: r.left }
acc
end
end
def publish_status(upload, status)
MessageBus.publish(
"/discourse-ai/ai-persona-rag/#{upload.id}",
status,
user_ids: [upload.user_id],
)
end
end
end

View File

@ -3,6 +3,7 @@ import { ajax } from "discourse/lib/ajax";
import RestModel from "discourse/models/rest";
const ATTRIBUTES = [
"id",
"name",
"description",
"commands",
@ -24,6 +25,7 @@ const ATTRIBUTES = [
];
const SYSTEM_ATTRIBUTES = [
"id",
"allowed_group_ids",
"enabled",
"system",

View File

@ -37,6 +37,7 @@ export default class PersonaEditor extends Component {
@tracked editingModel = null;
@tracked showDelete = false;
@tracked maxPixelsValue = null;
@tracked ragIndexingStatuses = null;
@action
updateModel() {
@ -84,7 +85,7 @@ export default class PersonaEditor extends Component {
try {
await this.args.model.save();
this.#sortPersonas();
if (isNew) {
if (isNew && this.args.model.rag_uploads.length === 0) {
this.args.personas.addObject(this.args.model);
this.router.transitionTo(
"adminPlugins.show.discourse-ai.ai-personas.show",
@ -442,6 +443,7 @@ export default class PersonaEditor extends Component {
{{#if this.siteSettings.ai_embeddings_enabled}}
<div class="control-group">
<PersonaRagUploader
@persona={{this.editingModel}}
@ragUploads={{this.editingModel.rag_uploads}}
@onAdd={{this.addUpload}}
@onRemove={{this.removeUpload}}

View File

@ -5,10 +5,12 @@ import { on } from "@ember/modifier";
import { action } from "@ember/object";
import { inject as service } from "@ember/service";
import DButton from "discourse/components/d-button";
import { ajax } from "discourse/lib/ajax";
import UppyUploadMixin from "discourse/mixins/uppy-upload";
import icon from "discourse-common/helpers/d-icon";
import discourseDebounce from "discourse-common/lib/debounce";
import I18n from "discourse-i18n";
import RagUploadProgress from "./rag-upload-progress";
export default class PersonaRagUploader extends Component.extend(
UppyUploadMixin
@ -17,6 +19,7 @@ export default class PersonaRagUploader extends Component.extend(
@tracked term = null;
@tracked filteredUploads = null;
@tracked ragIndexingStatuses = null;
id = "discourse-ai-persona-rag-uploader";
maxFiles = 20;
uploadUrl = "/admin/plugins/discourse-ai/ai-personas/files/upload";
@ -30,6 +33,14 @@ export default class PersonaRagUploader extends Component.extend(
}
this.filteredUploads = this.ragUploads || [];
if (this.ragUploads?.length) {
ajax(
`/admin/plugins/discourse-ai/ai-personas/${this.persona.id}/files/status.json`
).then((statuses) => {
this.set("ragIndexingStatuses", statuses);
});
}
}
uploadDone(uploadedFile) {
@ -97,9 +108,12 @@ export default class PersonaRagUploader extends Component.extend(
<span class="persona-rag-uploader__rag-file-icon">{{icon
"file"
}}</span>
{{upload.original_filename}}</td>
<td class="persona-rag-uploader__upload-status">{{icon "check"}}
{{I18n.t "discourse_ai.ai_persona.uploads.complete"}}</td>
{{upload.original_filename}}
</td>
<RagUploadProgress
@upload={{upload}}
@ragIndexingStatuses={{this.ragIndexingStatuses}}
/>
<td class="persona-rag-uploader__remove-file">
<DButton
@icon="times"

View File

@ -0,0 +1,81 @@
import Component from "@glimmer/component";
import { tracked } from "@glimmer/tracking";
import { action } from "@ember/object";
import didInsert from "@ember/render-modifiers/modifiers/did-insert";
import { inject as service } from "@ember/service";
import icon from "discourse-common/helpers/d-icon";
import { bind } from "discourse-common/utils/decorators";
import I18n from "discourse-i18n";
export default class RagUploadProgress extends Component {
@service messageBus;
@tracked updatedProgress = null;
willDestroy() {
super.willDestroy(...arguments);
this.messageBus.unsubscribe(
`/discourse-ai/ai-persona-rag/${this.args.upload.id}`
);
}
@action
trackProgress() {
this.messageBus.subscribe(
`/discourse-ai/ai-persona-rag/${this.args.upload.id}`,
this.onIndexingUpdate
);
}
@bind
onIndexingUpdate(data) {
// Order not guaranteed. Discard old updates.
if (!this.updatedProgress || this.updatedProgress.left > data.left) {
this.updatedProgress = data;
}
}
get calculateProgress() {
return Math.ceil((this.progress.indexed * 100) / this.progress.total);
}
get fullyIndexed() {
return this.progress && this.progress.left === 0;
}
get progress() {
if (this.updatedProgress) {
return this.updatedProgress;
} else if (this.args.ragIndexingStatuses) {
return this.args.ragIndexingStatuses[this.args.upload.id];
} else {
return [];
}
}
<template>
<td
class="persona-rag-uploader__upload-status"
{{didInsert this.trackProgress}}
>
{{#if this.progress}}
{{#if this.fullyIndexed}}
<span class="indexed">
{{icon "check"}}
{{I18n.t "discourse_ai.ai_persona.uploads.indexed"}}
</span>
{{else}}
<span class="indexing">
{{icon "robot"}}
{{I18n.t "discourse_ai.ai_persona.uploads.indexing"}}
{{this.calculateProgress}}%
</span>
{{/if}}
{{else}}
<span class="uploaded">{{I18n.t
"discourse_ai.ai_persona.uploads.uploaded"
}}</span>
{{/if}}
</td>
</template>
}

View File

@ -129,9 +129,17 @@
&__upload-status {
text-align: right;
padding-right: 0;
.indexed {
color: var(--success);
}
.uploaded,
.indexing {
color: var(--primary-low-mid);
}
}
&__remove-file {
text-align: right;
padding-left: 0;

View File

@ -171,7 +171,9 @@ en:
hint: "To control where the file's content gets placed within the system prompt, include the {uploads} placeholder in the system prompt above."
button: "Add Files"
filter: "Filter uploads"
complete: "Complete"
indexed: "Indexed"
indexing: "Indexing"
uploaded: "Ready to be indexed"
related_topics:
title: "Related Topics"

View File

@ -43,5 +43,6 @@ Discourse::Application.routes.draw do
post "/ai-personas/:id/create-user", to: "discourse_ai/admin/ai_personas#create_user"
post "/ai-personas/files/upload", to: "discourse_ai/admin/ai_personas#upload_file"
put "/ai-personas/:id/files/remove", to: "discourse_ai/admin/ai_personas#remove_file"
get "/ai-personas/:id/files/status", to: "discourse_ai/admin/ai_personas#indexing_status_check"
end
end

View File

@ -34,5 +34,20 @@ RSpec.describe Jobs::GenerateRagEmbeddings do
expect(embeddings_count).to eq(expected_embeddings)
end
describe "Publishing progress updates" do
it "sends an update through mb after a batch finishes" do
updates =
MessageBus.track_publish(
"/discourse-ai/ai-persona-rag/#{rag_document_fragment_1.upload_id}",
) { subject.execute(fragment_ids: [rag_document_fragment_1.id]) }
upload_index_stats = updates.last.data
expect(upload_index_stats[:total]).to eq(1)
expect(upload_index_stats[:indexed]).to eq(1)
expect(upload_index_stats[:left]).to eq(0)
end
end
end
end

View File

@ -73,4 +73,47 @@ RSpec.describe RagDocumentFragment do
).by(1)
end
end
describe ".indexing_status" do
let(:truncation) { DiscourseAi::Embeddings::Strategies::Truncation.new }
let(:vector_rep) do
DiscourseAi::Embeddings::VectorRepresentations::Base.current_representation(truncation)
end
fab!(:rag_document_fragment_1) do
Fabricate(:rag_document_fragment, upload: upload_1, ai_persona: persona)
end
fab!(:rag_document_fragment_2) do
Fabricate(:rag_document_fragment, upload: upload_1, ai_persona: persona)
end
let(:expected_embedding) { [0.0038493] * vector_rep.dimensions }
before do
SiteSetting.ai_embeddings_enabled = true
SiteSetting.ai_embeddings_discourse_service_api_endpoint = "http://test.com"
WebMock.stub_request(
:post,
"#{SiteSetting.ai_embeddings_discourse_service_api_endpoint}/api/v1/classify",
).to_return(status: 200, body: JSON.dump(expected_embedding))
vector_rep.generate_representation_from(rag_document_fragment_1)
end
it "returns total, indexed and unindexed fragments for each upload" do
results = described_class.indexing_status(persona, [upload_1, upload_2])
upload_1_status = results[upload_1.id]
expect(upload_1_status[:total]).to eq(2)
expect(upload_1_status[:indexed]).to eq(1)
expect(upload_1_status[:left]).to eq(1)
upload_1_status = results[upload_2.id]
expect(upload_1_status[:total]).to eq(0)
expect(upload_1_status[:indexed]).to eq(0)
expect(upload_1_status[:left]).to eq(0)
end
end
end

View File

@ -65,6 +65,7 @@ module("Discourse AI | Unit | Model | ai-persona", function () {
test("create properties", function (assert) {
const properties = {
id: 1,
name: "Test",
commands: ["CommandName"],
allowed_group_ids: [12],