diff --git a/app/controllers/discourse_ai/admin/ai_personas_controller.rb b/app/controllers/discourse_ai/admin/ai_personas_controller.rb index 4b9994e9..64b33735 100644 --- a/app/controllers/discourse_ai/admin/ai_personas_controller.rb +++ b/app/controllers/discourse_ai/admin/ai_personas_controller.rb @@ -5,7 +5,8 @@ module DiscourseAi class AiPersonasController < ::Admin::AdminController requires_plugin ::DiscourseAi::PLUGIN_NAME - before_action :find_ai_persona, only: %i[show update destroy create_user] + before_action :find_ai_persona, + only: %i[show update destroy create_user indexing_status_check] def index ai_personas = @@ -90,6 +91,10 @@ module DiscourseAi end end + def indexing_status_check + render json: RagDocumentFragment.indexing_status(@ai_persona, @ai_persona.uploads) + end + private def find_ai_persona diff --git a/app/jobs/regular/digest_rag_upload.rb b/app/jobs/regular/digest_rag_upload.rb index 55486b3d..d7c348e8 100644 --- a/app/jobs/regular/digest_rag_upload.rb +++ b/app/jobs/regular/digest_rag_upload.rb @@ -41,6 +41,11 @@ module ::Jobs end end + RagDocumentFragment.publish_status( + upload, + { total: fragment_ids.size, indexed: 0, left: fragment_ids.size }, + ) + fragment_ids.each_slice(50) do |slice| Jobs.enqueue(:generate_rag_embeddings, fragment_ids: slice) end diff --git a/app/jobs/regular/generate_rag_embeddings.rb b/app/jobs/regular/generate_rag_embeddings.rb index de433318..96483a9a 100644 --- a/app/jobs/regular/generate_rag_embeddings.rb +++ b/app/jobs/regular/generate_rag_embeddings.rb @@ -14,6 +14,13 @@ module ::Jobs # generate_representation_from checks compares the digest value to make sure # the embedding is only generated once per fragment unless something changes. fragments.map { |fragment| vector_rep.generate_representation_from(fragment) } + + last_fragment = fragments.last + ai_persona = last_fragment.ai_persona + upload = last_fragment.upload + + indexing_status = RagDocumentFragment.indexing_status(ai_persona, [upload])[upload.id] + RagDocumentFragment.publish_status(upload, indexing_status) end end end diff --git a/app/models/rag_document_fragment.rb b/app/models/rag_document_fragment.rb index a161ca83..bffd65e0 100644 --- a/app/models/rag_document_fragment.rb +++ b/app/models/rag_document_fragment.rb @@ -29,6 +29,40 @@ class RagDocumentFragment < ActiveRecord::Base link_persona_and_uploads(persona, upload_ids) end end + + def indexing_status(persona, uploads) + truncation = DiscourseAi::Embeddings::Strategies::Truncation.new + vector_rep = + DiscourseAi::Embeddings::VectorRepresentations::Base.current_representation(truncation) + + embeddings_table = vector_rep.rag_fragments_table_name + + results = DB.query(<<~SQL, persona_id: persona.id, upload_ids: uploads.map(&:id)) + SELECT + uploads.id, + SUM(CASE WHEN (rdf.upload_id IS NOT NULL) THEN 1 ELSE 0 END) AS total, + SUM(CASE WHEN (eft.rag_document_fragment_id IS NOT NULL) THEN 1 ELSE 0 END) as indexed, + SUM(CASE WHEN (rdf.upload_id IS NOT NULL AND eft.rag_document_fragment_id IS NULL) THEN 1 ELSE 0 END) as left + FROM uploads + LEFT OUTER JOIN rag_document_fragments rdf ON uploads.id = rdf.upload_id AND rdf.ai_persona_id = :persona_id + LEFT OUTER JOIN #{embeddings_table} eft ON rdf.id = eft.rag_document_fragment_id + WHERE uploads.id IN (:upload_ids) + GROUP BY uploads.id + SQL + + results.reduce({}) do |acc, r| + acc[r.id] = { total: r.total, indexed: r.indexed, left: r.left } + acc + end + end + + def publish_status(upload, status) + MessageBus.publish( + "/discourse-ai/ai-persona-rag/#{upload.id}", + status, + user_ids: [upload.user_id], + ) + end end end diff --git a/assets/javascripts/discourse/admin/models/ai-persona.js b/assets/javascripts/discourse/admin/models/ai-persona.js index d53e6543..bdd88cca 100644 --- a/assets/javascripts/discourse/admin/models/ai-persona.js +++ b/assets/javascripts/discourse/admin/models/ai-persona.js @@ -3,6 +3,7 @@ import { ajax } from "discourse/lib/ajax"; import RestModel from "discourse/models/rest"; const ATTRIBUTES = [ + "id", "name", "description", "commands", @@ -24,6 +25,7 @@ const ATTRIBUTES = [ ]; const SYSTEM_ATTRIBUTES = [ + "id", "allowed_group_ids", "enabled", "system", diff --git a/assets/javascripts/discourse/components/ai-persona-editor.gjs b/assets/javascripts/discourse/components/ai-persona-editor.gjs index c0e8fc5f..200040a7 100644 --- a/assets/javascripts/discourse/components/ai-persona-editor.gjs +++ b/assets/javascripts/discourse/components/ai-persona-editor.gjs @@ -37,6 +37,7 @@ export default class PersonaEditor extends Component { @tracked editingModel = null; @tracked showDelete = false; @tracked maxPixelsValue = null; + @tracked ragIndexingStatuses = null; @action updateModel() { @@ -84,7 +85,7 @@ export default class PersonaEditor extends Component { try { await this.args.model.save(); this.#sortPersonas(); - if (isNew) { + if (isNew && this.args.model.rag_uploads.length === 0) { this.args.personas.addObject(this.args.model); this.router.transitionTo( "adminPlugins.show.discourse-ai.ai-personas.show", @@ -442,6 +443,7 @@ export default class PersonaEditor extends Component { {{#if this.siteSettings.ai_embeddings_enabled}}
{ + this.set("ragIndexingStatuses", statuses); + }); + } } uploadDone(uploadedFile) { @@ -97,9 +108,12 @@ export default class PersonaRagUploader extends Component.extend( {{icon "file" }} - {{upload.original_filename}} - {{icon "check"}} - {{I18n.t "discourse_ai.ai_persona.uploads.complete"}} + {{upload.original_filename}} + + data.left) { + this.updatedProgress = data; + } + } + + get calculateProgress() { + return Math.ceil((this.progress.indexed * 100) / this.progress.total); + } + + get fullyIndexed() { + return this.progress && this.progress.left === 0; + } + + get progress() { + if (this.updatedProgress) { + return this.updatedProgress; + } else if (this.args.ragIndexingStatuses) { + return this.args.ragIndexingStatuses[this.args.upload.id]; + } else { + return []; + } + } + + +} diff --git a/assets/stylesheets/modules/ai-bot/common/ai-persona.scss b/assets/stylesheets/modules/ai-bot/common/ai-persona.scss index c6ff1c01..5aa320ab 100644 --- a/assets/stylesheets/modules/ai-bot/common/ai-persona.scss +++ b/assets/stylesheets/modules/ai-bot/common/ai-persona.scss @@ -129,7 +129,15 @@ &__upload-status { text-align: right; padding-right: 0; - color: var(--success); + + .indexed { + color: var(--success); + } + + .uploaded, + .indexing { + color: var(--primary-low-mid); + } } &__remove-file { diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml index 2b61b6fb..57a2fd3b 100644 --- a/config/locales/client.en.yml +++ b/config/locales/client.en.yml @@ -171,7 +171,9 @@ en: hint: "To control where the file's content gets placed within the system prompt, include the {uploads} placeholder in the system prompt above." button: "Add Files" filter: "Filter uploads" - complete: "Complete" + indexed: "Indexed" + indexing: "Indexing" + uploaded: "Ready to be indexed" related_topics: title: "Related Topics" diff --git a/config/routes.rb b/config/routes.rb index 8d349265..c50f4265 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -43,5 +43,6 @@ Discourse::Application.routes.draw do post "/ai-personas/:id/create-user", to: "discourse_ai/admin/ai_personas#create_user" post "/ai-personas/files/upload", to: "discourse_ai/admin/ai_personas#upload_file" put "/ai-personas/:id/files/remove", to: "discourse_ai/admin/ai_personas#remove_file" + get "/ai-personas/:id/files/status", to: "discourse_ai/admin/ai_personas#indexing_status_check" end end diff --git a/spec/jobs/regular/generate_rag_embeddings_spec.rb b/spec/jobs/regular/generate_rag_embeddings_spec.rb index b8ead8e2..b200bb01 100644 --- a/spec/jobs/regular/generate_rag_embeddings_spec.rb +++ b/spec/jobs/regular/generate_rag_embeddings_spec.rb @@ -34,5 +34,20 @@ RSpec.describe Jobs::GenerateRagEmbeddings do expect(embeddings_count).to eq(expected_embeddings) end + + describe "Publishing progress updates" do + it "sends an update through mb after a batch finishes" do + updates = + MessageBus.track_publish( + "/discourse-ai/ai-persona-rag/#{rag_document_fragment_1.upload_id}", + ) { subject.execute(fragment_ids: [rag_document_fragment_1.id]) } + + upload_index_stats = updates.last.data + + expect(upload_index_stats[:total]).to eq(1) + expect(upload_index_stats[:indexed]).to eq(1) + expect(upload_index_stats[:left]).to eq(0) + end + end end end diff --git a/spec/models/rag_document_fragment_spec.rb b/spec/models/rag_document_fragment_spec.rb index 3d16218c..6cf6ec8d 100644 --- a/spec/models/rag_document_fragment_spec.rb +++ b/spec/models/rag_document_fragment_spec.rb @@ -73,4 +73,47 @@ RSpec.describe RagDocumentFragment do ).by(1) end end + + describe ".indexing_status" do + let(:truncation) { DiscourseAi::Embeddings::Strategies::Truncation.new } + let(:vector_rep) do + DiscourseAi::Embeddings::VectorRepresentations::Base.current_representation(truncation) + end + + fab!(:rag_document_fragment_1) do + Fabricate(:rag_document_fragment, upload: upload_1, ai_persona: persona) + end + + fab!(:rag_document_fragment_2) do + Fabricate(:rag_document_fragment, upload: upload_1, ai_persona: persona) + end + + let(:expected_embedding) { [0.0038493] * vector_rep.dimensions } + + before do + SiteSetting.ai_embeddings_enabled = true + SiteSetting.ai_embeddings_discourse_service_api_endpoint = "http://test.com" + + WebMock.stub_request( + :post, + "#{SiteSetting.ai_embeddings_discourse_service_api_endpoint}/api/v1/classify", + ).to_return(status: 200, body: JSON.dump(expected_embedding)) + + vector_rep.generate_representation_from(rag_document_fragment_1) + end + + it "returns total, indexed and unindexed fragments for each upload" do + results = described_class.indexing_status(persona, [upload_1, upload_2]) + + upload_1_status = results[upload_1.id] + expect(upload_1_status[:total]).to eq(2) + expect(upload_1_status[:indexed]).to eq(1) + expect(upload_1_status[:left]).to eq(1) + + upload_1_status = results[upload_2.id] + expect(upload_1_status[:total]).to eq(0) + expect(upload_1_status[:indexed]).to eq(0) + expect(upload_1_status[:left]).to eq(0) + end + end end diff --git a/test/javascripts/unit/models/ai-persona-test.js b/test/javascripts/unit/models/ai-persona-test.js index cdae4395..a2084c8b 100644 --- a/test/javascripts/unit/models/ai-persona-test.js +++ b/test/javascripts/unit/models/ai-persona-test.js @@ -65,6 +65,7 @@ module("Discourse AI | Unit | Model | ai-persona", function () { test("create properties", function (assert) { const properties = { + id: 1, name: "Test", commands: ["CommandName"], allowed_group_ids: [12], diff --git a/translator.yml b/translator.yml index 5caa8f7a..bb099f87 100644 --- a/translator.yml +++ b/translator.yml @@ -1,7 +1,7 @@ # Configuration file for discourse-translator-bot files: -- source_path: config/locales/client.en.yml - destination_path: client.yml -- source_path: config/locales/server.en.yml - destination_path: server.yml + - source_path: config/locales/client.en.yml + destination_path: client.yml + - source_path: config/locales/server.en.yml + destination_path: server.yml