FEATURE: allow tuning of RAG generation (#565)

* FEATURE: allow tuning of RAG generation - change chunking to be token based vs char based (which is more accurate) - allow control over overlap / tokens per chunk and conversation snippets inserted - UI to control new settings * improve ui a bit * fix various reindex issues * reduce concurrency * try ultra low queue ... concurrency 1 is too slow.
2024-04-12 23:32:46 +10:00 · 2024-04-12 23:32:46 +10:00 · f6ac5cd0a8
parent b906046aad
commit f6ac5cd0a8
23 changed files with 435 additions and 61 deletions
--- a/admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-ai-personas-new.js
+++ b/admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-ai-personas-new.js
@ -6,6 +6,10 @@ export default DiscourseRoute.extend({
    const record = this.store.createRecord("ai-persona");
    record.set("allowed_group_ids", [AUTO_GROUPS.trust_level_0.id]);
    record.set("rag_uploads", []);
+    // these match the defaults on the table
+    record.set("rag_chunk_tokens", 374);
+    record.set("rag_chunk_overlap_tokens", 10);
+    record.set("rag_conversation_chunks", 10);
    return record;
  },

--- a/app/controllers/discourse_ai/admin/ai_personas_controller.rb
+++ b/app/controllers/discourse_ai/admin/ai_personas_controller.rb
@ -121,6 +121,9 @@ module DiscourseAi
            :max_context_posts,
            :vision_enabled,
            :vision_max_pixels,
+            :rag_chunk_tokens,
+            :rag_chunk_overlap_tokens,
+            :rag_conversation_chunks,
            allowed_group_ids: [],
            rag_uploads: [:id],
          )
--- a/app/jobs/regular/digest_rag_upload.rb
+++ b/app/jobs/regular/digest_rag_upload.rb
@ -4,13 +4,21 @@ module ::Jobs
  class DigestRagUpload < ::Jobs::Base
    CHUNK_SIZE = 1024
    CHUNK_OVERLAP = 64
-    MAX_FRAGMENTS = 10_000
+    MAX_FRAGMENTS = 100_000

    # TODO(roman): Add a way to automatically recover from errors, resulting in unindexed uploads.
    def execute(args)
      return if (upload = Upload.find_by(id: args[:upload_id])).nil?
      return if (ai_persona = AiPersona.find_by(id: args[:ai_persona_id])).nil?

+      truncation = DiscourseAi::Embeddings::Strategies::Truncation.new
+      vector_rep =
+        DiscourseAi::Embeddings::VectorRepresentations::Base.current_representation(truncation)
+
+      tokenizer = vector_rep.tokenizer
+      chunk_tokens = ai_persona.rag_chunk_tokens
+      overlap_tokens = ai_persona.rag_chunk_overlap_tokens
+
      fragment_ids = RagDocumentFragment.where(ai_persona: ai_persona, upload: upload).pluck(:id)

      # Check if this is the first time we process this upload.
@ -22,7 +30,12 @@ module ::Jobs
        idx = 0

        ActiveRecord::Base.transaction do
-          chunk_document(document) do |chunk, metadata|
+          chunk_document(
+            file: document,
+            tokenizer: tokenizer,
+            chunk_tokens: chunk_tokens,
+            overlap_tokens: overlap_tokens,
+          ) do |chunk, metadata|
            fragment_ids << RagDocumentFragment.create!(
              ai_persona: ai_persona,
              fragment: chunk,
@ -53,15 +66,18 @@ module ::Jobs

    private

-    def chunk_document(file)
+    def chunk_document(file:, tokenizer:, chunk_tokens:, overlap_tokens:)
      buffer = +""
      current_metadata = nil
      done = false
      overlap = ""

+      # generally this will be plenty
+      read_size = chunk_tokens * 10
+
      while buffer.present? || !done
-        if buffer.length < CHUNK_SIZE * 2
-          read = file.read(CHUNK_SIZE * 2)
+        if buffer.length < read_size
+          read = file.read(read_size)
          done = true if read.nil?

          read = Encodings.to_utf8(read) if read
@ -84,7 +100,7 @@ module ::Jobs
          overlap = ""
        end

-        chunk, split_char = first_chunk(to_chunk)
+        chunk, split_char = first_chunk(to_chunk, tokenizer: tokenizer, chunk_tokens: chunk_tokens)
        buffer = buffer[chunk.length..-1]

        processed_chunk = overlap + chunk
@ -94,15 +110,28 @@ module ::Jobs

        yield processed_chunk, current_metadata

-        overlap = (chunk[-CHUNK_OVERLAP..-1] || chunk) + split_char
+        current_chunk_tokens = tokenizer.encode(chunk)
+        overlap_token_ids = current_chunk_tokens[-overlap_tokens..-1] || current_chunk_tokens
+
+        overlap = ""
+
+        while overlap_token_ids.present?
+          begin
+            overlap = tokenizer.decode(overlap_token_ids) + split_char
+            break if overlap.encoding == Encoding::UTF_8
+          rescue StandardError
+            # it is possible that we truncated mid char
+          end
+          overlap_token_ids.shift
+        end

        # remove first word it is probably truncated
        overlap = overlap.split(" ", 2).last
      end
    end

-    def first_chunk(text, chunk_size: CHUNK_SIZE, splitters: ["\n\n", "\n", ".", ""])
-      return text, " " if text.length <= chunk_size
+    def first_chunk(text, chunk_tokens:, tokenizer:, splitters: ["\n\n", "\n", ".", ""])
+      return text, " " if tokenizer.tokenize(text).length <= chunk_tokens

      splitters = splitters.find_all { |s| text.include?(s) }.compact

@ -115,7 +144,7 @@ module ::Jobs
        text
          .split(split_char)
          .each do |part|
-            break if (buffer.length + split_char.length + part.length) > chunk_size
+            break if tokenizer.tokenize(buffer + split_char + part).length > chunk_tokens
            buffer << split_char
            buffer << part
          end
--- a/app/jobs/regular/generate_rag_embeddings.rb
+++ b/app/jobs/regular/generate_rag_embeddings.rb
@ -2,7 +2,8 @@

 module ::Jobs
  class GenerateRagEmbeddings < ::Jobs::Base
-    sidekiq_options queue: "low"
+    sidekiq_options queue: "ultra_low"
+    # we could also restrict concurrency but this takes so long if it is not concurrent

    def execute(args)
      return if (fragments = RagDocumentFragment.where(id: args[:fragment_ids].to_a)).empty?
--- a/app/models/ai_persona.rb
+++ b/app/models/ai_persona.rb
@ -13,6 +13,10 @@ class AiPersona < ActiveRecord::Base
  # we may want to revisit this in the future
  validates :vision_max_pixels, numericality: { greater_than: 0, maximum: 4_000_000 }

+  validates :rag_chunk_tokens, numericality: { greater_than: 0, maximum: 50_000 }
+  validates :rag_chunk_overlap_tokens, numericality: { greater_than: -1, maximum: 200 }
+  validates :rag_conversation_chunks, numericality: { greater_than: 0, maximum: 1000 }
+
  belongs_to :created_by, class_name: "User"
  belongs_to :user

@ -25,6 +29,8 @@ class AiPersona < ActiveRecord::Base

  before_destroy :ensure_not_system

+  before_update :regenerate_rag_fragments
+
  class MultisiteHash
    def initialize(id)
      @hash = Hash.new { |h, k| h[k] = {} }
@ -110,6 +116,7 @@ class AiPersona < ActiveRecord::Base
    max_context_posts = self.max_context_posts
    vision_enabled = self.vision_enabled
    vision_max_pixels = self.vision_max_pixels
+    rag_conversation_chunks = self.rag_conversation_chunks

    persona_class = DiscourseAi::AiBot::Personas::Persona.system_personas_by_id[self.id]
    if persona_class
@ -149,6 +156,10 @@ class AiPersona < ActiveRecord::Base
        vision_max_pixels
      end

+      persona_class.define_singleton_method :rag_conversation_chunks do
+        rag_conversation_chunks
+      end
+
      return persona_class
    end

@ -232,6 +243,10 @@ class AiPersona < ActiveRecord::Base
        vision_max_pixels
      end

+      define_singleton_method :rag_conversation_chunks do
+        rag_conversation_chunks
+      end
+
      define_singleton_method :to_s do
        "#<DiscourseAi::AiBot::Personas::Persona::Custom @name=#{self.name} @allowed_group_ids=#{self.allowed_group_ids.join(",")}>"
      end
@ -314,6 +329,12 @@ class AiPersona < ActiveRecord::Base
    user
  end

+  def regenerate_rag_fragments
+    if rag_chunk_tokens_changed? || rag_chunk_overlap_tokens_changed?
+      RagDocumentFragment.where(ai_persona: self).delete_all
+    end
+  end
+
  private

  def system_persona_unchangeable
@ -335,26 +356,31 @@ end
 #
 # Table name: ai_personas
 #
-#  id                :bigint           not null, primary key
-#  name              :string(100)      not null
-#  description       :string(2000)     not null
-#  commands          :json             not null
-#  system_prompt     :string(10000000) not null
-#  allowed_group_ids :integer          default([]), not null, is an Array
-#  created_by_id     :integer
-#  enabled           :boolean          default(TRUE), not null
-#  created_at        :datetime         not null
-#  updated_at        :datetime         not null
-#  system            :boolean          default(FALSE), not null
-#  priority          :boolean          default(FALSE), not null
-#  temperature       :float
-#  top_p             :float
-#  user_id           :integer
-#  mentionable       :boolean          default(FALSE), not null
-#  default_llm       :text
-#  max_context_posts :integer
-#  vision_enabled    :boolean          default(FALSE), not null
-#  vision_max_pixels :integer          default(1048576), not null
+#  id                       :bigint           not null, primary key
+#  name                     :string(100)      not null
+#  description              :string(2000)     not null
+#  commands                 :json             not null
+#  system_prompt            :string(10000000) not null
+#  allowed_group_ids        :integer          default([]), not null, is an Array
+#  created_by_id            :integer
+#  enabled                  :boolean          default(TRUE), not null
+#  created_at               :datetime         not null
+#  updated_at               :datetime         not null
+#  system                   :boolean          default(FALSE), not null
+#  priority                 :boolean          default(FALSE), not null
+#  temperature              :float
+#  top_p                    :float
+#  user_id                  :integer
+#  mentionable              :boolean          default(FALSE), not null
+#  default_llm              :text
+#  max_context_posts        :integer
+#  max_post_context_tokens  :integer
+#  max_context_tokens       :integer
+#  vision_enabled           :boolean          default(FALSE), not null
+#  vision_max_pixels        :integer          default(1048576), not null
+#  rag_chunk_tokens         :integer          default(374), not null
+#  rag_chunk_overlap_tokens :integer          default(10), not null
+#  rag_conversation_chunks  :integer          default(10), not null
 #
 # Indexes
 #
--- a/app/serializers/localized_ai_persona_serializer.rb
+++ b/app/serializers/localized_ai_persona_serializer.rb
@ -19,7 +19,10 @@ class LocalizedAiPersonaSerializer < ApplicationSerializer
             :user_id,
             :max_context_posts,
             :vision_enabled,
-             :vision_max_pixels
+             :vision_max_pixels,
+             :rag_chunk_tokens,
+             :rag_chunk_overlap_tokens,
+             :rag_conversation_chunks

  has_one :user, serializer: BasicUserSerializer, embed: :object
  has_many :rag_uploads, serializer: UploadSerializer, embed: :object
--- a/assets/javascripts/discourse/admin/models/ai-persona.js
+++ b/assets/javascripts/discourse/admin/models/ai-persona.js
@ -2,7 +2,7 @@ import { tracked } from "@glimmer/tracking";
 import { ajax } from "discourse/lib/ajax";
 import RestModel from "discourse/models/rest";

-const ATTRIBUTES = [
+const CREATE_ATTRIBUTES = [
  "id",
  "name",
  "description",
@ -24,6 +24,13 @@ const ATTRIBUTES = [
  "rag_uploads",
 ];

+// rag params are populated on save, only show it when editing
+const ATTRIBUTES = CREATE_ATTRIBUTES.concat([
+  "rag_chunk_tokens",
+  "rag_chunk_overlap_tokens",
+  "rag_conversation_chunks",
+]);
+
 const SYSTEM_ATTRIBUTES = [
  "id",
  "allowed_group_ids",
@ -38,6 +45,9 @@ const SYSTEM_ATTRIBUTES = [
  "vision_enabled",
  "vision_max_pixels",
  "rag_uploads",
+  "rag_chunk_tokens",
+  "rag_chunk_overlap_tokens",
+  "rag_conversation_chunks",
 ];

 class CommandOption {
@ -122,16 +132,19 @@ export default class AiPersona extends RestModel {
      : this.getProperties(ATTRIBUTES);
    attrs.id = this.id;
    this.populateCommandOptions(attrs);
+
    return attrs;
  }

  createProperties() {
-    let attrs = this.getProperties(ATTRIBUTES);
+    let attrs = this.getProperties(CREATE_ATTRIBUTES);
    this.populateCommandOptions(attrs);
    return attrs;
  }

  workingCopy() {
-    return AiPersona.create(this.createProperties());
+    let attrs = this.getProperties(ATTRIBUTES);
+    this.populateCommandOptions(attrs);
+    return AiPersona.create(attrs);
  }
 }
--- a/assets/javascripts/discourse/components/ai-persona-editor.gjs
+++ b/assets/javascripts/discourse/components/ai-persona-editor.gjs
@ -38,6 +38,7 @@ export default class PersonaEditor extends Component {
  @tracked showDelete = false;
  @tracked maxPixelsValue = null;
  @tracked ragIndexingStatuses = null;
+  @tracked showIndexingOptions = false;

  @action
  updateModel() {
@ -48,6 +49,13 @@ export default class PersonaEditor extends Component {
    );
  }

+  @action
+  toggleIndexingOptions(event) {
+    this.showIndexingOptions = !this.showIndexingOptions;
+    event.preventDefault();
+    event.stopPropagation();
+  }
+
  findClosestPixelValue(pixels) {
    let value = "high";
    this.maxPixelValues.forEach((info) => {
@ -69,6 +77,12 @@ export default class PersonaEditor extends Component {
    ];
  }

+  get indexingOptionsText() {
+    return this.showIndexingOptions
+      ? I18n.t("discourse_ai.ai_persona.hide_indexing_options")
+      : I18n.t("discourse_ai.ai_persona.show_indexing_options");
+  }
+
  @action
  async updateAllGroups() {
    this.allGroups = await Group.findAll();
@ -448,7 +462,66 @@ export default class PersonaEditor extends Component {
            @onAdd={{this.addUpload}}
            @onRemove={{this.removeUpload}}
          />
+          <a
+            href="#"
+            class="ai-persona-editor__indexing-options"
+            {{on "click" this.toggleIndexingOptions}}
+          >{{this.indexingOptionsText}}</a>
        </div>
+        {{#if this.showIndexingOptions}}
+          <div class="control-group">
+            <label>{{I18n.t "discourse_ai.ai_persona.rag_chunk_tokens"}}</label>
+            <Input
+              @type="number"
+              step="any"
+              lang="en"
+              class="ai-persona-editor__rag_chunk_tokens"
+              @value={{this.editingModel.rag_chunk_tokens}}
+            />
+            <DTooltip
+              @icon="question-circle"
+              @content={{I18n.t
+                "discourse_ai.ai_persona.rag_chunk_tokens_help"
+              }}
+            />
+          </div>
+          <div class="control-group">
+            <label>{{I18n.t
+                "discourse_ai.ai_persona.rag_chunk_overlap_tokens"
+              }}</label>
+            <Input
+              @type="number"
+              step="any"
+              lang="en"
+              class="ai-persona-editor__rag_chunk_overlap_tokens"
+              @value={{this.editingModel.rag_chunk_overlap_tokens}}
+            />
+            <DTooltip
+              @icon="question-circle"
+              @content={{I18n.t
+                "discourse_ai.ai_persona.rag_chunk_overlap_tokens_help"
+              }}
+            />
+          </div>
+          <div class="control-group">
+            <label>{{I18n.t
+                "discourse_ai.ai_persona.rag_conversation_chunks"
+              }}</label>
+            <Input
+              @type="number"
+              step="any"
+              lang="en"
+              class="ai-persona-editor__rag_conversation_chunks"
+              @value={{this.editingModel.rag_conversation_chunks}}
+            />
+            <DTooltip
+              @icon="question-circle"
+              @content={{I18n.t
+                "discourse_ai.ai_persona.rag_conversation_chunks_help"
+              }}
+            />
+          </div>
+        {{/if}}
      {{/if}}
      <div class="control-group ai-persona-editor__action_panel">
        <DButton
--- a/assets/stylesheets/modules/ai-bot/common/ai-persona.scss
+++ b/assets/stylesheets/modules/ai-bot/common/ai-persona.scss
@ -77,6 +77,11 @@
    align-items: center;
  }

+  &__indexing-options {
+    display: block;
+    margin-top: 1em;
+  }
+
  .persona-rag-uploader {
    width: 500px;

--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -141,6 +141,8 @@ en:
        default_llm: Default Language Model
        default_llm_help: The default language model to use for this persona. Required if you wish to mention persona on public posts.
        system_prompt: System Prompt
+        show_indexing_options: "Show Indexing Options"
+        hide_indexing_options: "Hide Indexing Options"
        save: Save
        saved: AI Persona Saved
        enabled: "Enabled?"
@ -158,6 +160,12 @@ en:
        priority: Priority
        priority_help: Priority personas are displayed to users at the top of the persona list. If multiple personas have priority, they will be sorted alphabetically.
        command_options: "Command Options"
+        rag_chunk_tokens: "RAG Chunk Tokens"
+        rag_chunk_tokens_help: "The number of tokens to use for each chunk in the RAG model. Increase to increase the amount of context the AI can use. (changing will re-index all uploads)"
+        rag_chunk_overlap_tokens: "RAG Chunk Overlap Tokens"
+        rag_chunk_overlap_tokens_help: "The number of tokens to overlap between chunks in the RAG model. (changing will re-index all uploads)"
+        rag_conversation_chunks: "RAG Conversation Chunks"
+        rag_conversation_chunks_help: "The number of chunks to use for the RAG model searches. Increase to increase the amount of context the AI can use."
        what_are_personas: "What are AI Personas?"
        no_persona_selected: |
          AI Personas are a powerful feature that allows you to customize the behavior of the AI engine in your Discourse forum. They act as a 'system message' that guides the AI's responses and interactions, helping to create a more personalized and engaging user experience.
--- a/db/migrate/20240409035951_add_rag_params_to_ai_persona.rb
+++ b/db/migrate/20240409035951_add_rag_params_to_ai_persona.rb
@ -0,0 +1,11 @@
+# frozen_string_literal: true
+
+class AddRagParamsToAiPersona < ActiveRecord::Migration[7.0]
+  def change
+    # the default fits without any data loss in a 384 token vector representation
+    # larger embedding models can easily fit larger chunks so this is configurable
+    add_column :ai_personas, :rag_chunk_tokens, :integer, null: false, default: 374
+    add_column :ai_personas, :rag_chunk_overlap_tokens, :integer, null: false, default: 10
+    add_column :ai_personas, :rag_conversation_chunks, :integer, null: false, default: 10
+  end
+end
--- a/lib/ai_bot/entry_point.rb
+++ b/lib/ai_bot/entry_point.rb
@ -210,13 +210,18 @@ module DiscourseAi
        end

        plugin.on(:site_setting_changed) do |name, old_value, new_value|
-          if name == "ai_embeddings_model" && SiteSetting.ai_embeddings_enabled? &&
+          if name == :ai_embeddings_model && SiteSetting.ai_embeddings_enabled? &&
               new_value != old_value
-            RagDocumentFragment.find_in_batches do |batch|
-              batch.each_slice(100) do |fragments|
-                Jobs.enqueue(:generate_rag_embeddings, fragment_ids: fragments.map(&:id))
+            RagDocumentFragment.delete_all
+            UploadReference
+              .where(target: AiPersona.all)
+              .each do |ref|
+                Jobs.enqueue(
+                  :digest_rag_upload,
+                  ai_persona_id: ref.target_id,
+                  upload_id: ref.upload_id,
+                )
              end
-            end
          end
        end
      end
--- a/lib/ai_bot/personas/persona.rb
+++ b/lib/ai_bot/personas/persona.rb
@ -5,6 +5,10 @@ module DiscourseAi
    module Personas
      class Persona
        class << self
+          def rag_conversation_chunks
+            10
+          end
+
          def vision_enabled
            false
          end
@ -219,11 +223,20 @@ module DiscourseAi

          interactions_vector = vector_rep.vector_from(latest_interactions)

+          rag_conversation_chunks = self.class.rag_conversation_chunks
+
          candidate_fragment_ids =
            vector_rep.asymmetric_rag_fragment_similarity_search(
              interactions_vector,
              persona_id: id,
-              limit: reranker.reranker_configured? ? 50 : 10,
+              limit:
+                (
+                  if reranker.reranker_configured?
+                    rag_conversation_chunks * 5
+                  else
+                    rag_conversation_chunks
+                  end
+                ),
              offset: 0,
            )

@ -239,11 +252,11 @@ module DiscourseAi
              DiscourseAi::Inference::HuggingFaceTextEmbeddings
                .rerank(conversation_context.last[:content], guidance)
                .to_a
-                .take(10)
+                .take(rag_conversation_chunks)
                .map { _1[:index] }

            if ranks.empty?
-              fragments = fragments.take(10)
+              fragments = fragments.take(rag_conversation_chunks)
            else
              fragments = ranks.map { |idx| fragments[idx] }
            end
--- a/lib/tokenizer/basic_tokenizer.rb
+++ b/lib/tokenizer/basic_tokenizer.rb
@ -16,11 +16,18 @@ module DiscourseAi
          tokenize(text).size
        end

+        def decode(token_ids)
+          tokenizer.decode(token_ids)
+        end
+
+        def encode(tokens)
+          tokenizer.encode(tokens).ids
+        end
+
        def truncate(text, max_length)
          # fast track common case, /2 to handle unicode chars
          # than can take more than 1 token per char
          return text if !SiteSetting.ai_strict_token_counting && text.size < max_length / 2
-
          tokenizer.decode(tokenizer.encode(text).ids.take(max_length))
        end

--- a/lib/tokenizer/open_ai_tokenizer.rb
+++ b/lib/tokenizer/open_ai_tokenizer.rb
@ -12,6 +12,14 @@ module DiscourseAi
          tokenizer.encode(text)
        end

+        def encode(text)
+          tokenizer.encode(text)
+        end
+
+        def decode(token_ids)
+          tokenizer.decode(token_ids)
+        end
+
        def truncate(text, max_length)
          # fast track common case, /2 to handle unicode chars
          # than can take more than 1 token per char
--- a/spec/fixtures/rag/doc_with_metadata.txt
+++ b/spec/fixtures/rag/doc_with_metadata.txt
@ -1,4 +1,15 @@
 No metadata yet, first chunk ’ こんにちは
+Jane’s
+1 2 3 4 5 6 7 8 9 10
+1 2 3 4 5 6 7 8 9 10
+1 2 3 4 5 6 7 8 9 10
+1 2 3 4 5 6 7 8 9 10
+1 2 3 4 5 6 7 8 9 10
+1 2 3 4 5 6 7 8 9 10
+1 2 3 4 5 6 7 8 9 10
+1 2 ’ 4 5 6 7 8 9 10
+1 2 3 4 5 6 7 8 9 10
+Jane’s

 [[metadata Sam's story]]
 Once upon a time, in a land far, far away (or maybe just down the street, who knows?), there lived a brilliant AI developer named Sam. Sam had a vision, a dream, nay, a burning desire to create the most impressive discourse AI the world had ever seen. Armed with a keyboard, an endless supply of coffee, and a mildly concerning lack of sleep, Sam embarked on this epic quest.
--- a/spec/fixtures/rag/parsed_doc_with_metadata.txt
+++ b/spec/fixtures/rag/parsed_doc_with_metadata.txt
@ -1,61 +1,118 @@
 metadata: 
 number: 1
 No metadata yet, first chunk ’ こんにちは
+Jane’s
+1 2 3 4 5 6 7 8 9 10
+1 2 3 4 5 6 7 8 9 10
+1 2 3 4 5 6 7 8 9 10
+1 2 3 4 5 6 7 8 9 10
+1 2 3 4 5 6 7 8 9 10
+1 2 3 4 5 6 7 8 9 10
+1 2 3 4 5 6 7 8 9 10
+1 2 ’ 4 5 6 7 8 9 10
+
+metadata: 
+number: 2
+’ 4 5 6 7 8 9 10
+1 2 3 4 5 6 7 8 9 10
+Jane’s

 metadata: Sam's story
-number: 2
+number: 3
 Once upon a time, in a land far, far away (or maybe just down the street, who knows?), there lived a brilliant AI developer named Sam. Sam had a vision, a dream, nay, a burning desire to create the most impressive discourse AI the world had ever seen. Armed with a keyboard, an endless supply of coffee, and a mildly concerning lack of sleep, Sam embarked on this epic quest.

+metadata: Sam's story
+number: 4
+sam embarked on this epic quest.
+
 Day and night, Sam toiled away, crafting lines of code that would make even the most seasoned programmers weep with joy. The AI slowly took shape, like a majestic, digital phoenix rising from the ashes of Sam’s social life. It was a thing of beauty, a marvel of modern technology, and it had the uncanny ability to generate conversations about anything from the meaning of life to the best way to make a grilled cheese sandwich.

 metadata: Sam's story
-number: 3
-of life to the best way to make a grilled cheese sandwich.
+number: 5
+to make a grilled cheese sandwich.

 As the project neared completion, Sam realized that there was one crucial element missing: a spec doc. And not just any spec doc, but a spec doc filled with glorious, meaningless dummy text. Because let’s face it, nothing screams “professional” quite like a wall of lorem ipsum.

+metadata: Sam's story
+number: 6
+a wall of lorem ipsum.
+
 So, Sam set out to create the most impressive dummy text the world had ever seen. It would be a masterpiece, a symphony of nonsensical words that would leave readers in awe of Sam’s ability to fill space with utter gibberish. And thus, the dummy text was born.

 [[METADATE]]

-It was a sight to behold, a tapestry of random words woven together in a way that almost made sense, but not quite. It spoke of ancient mysteries, like why hotdogs come in packs of ten, while hotdog buns come in packs of eight. It pondered the great questions of our time, like whether or not pineapple belongs on pizza (spoiler alert: it does). And it even dared to explore the darkest corners of Sam’s imagination, like the idea of a world without caffeine.
+metadata: Sam's story
+number: 7
+born. [ [ metadate ] ]
+
+.It was a sight to behold, a tapestry of random words woven together in a way that almost made sense, but not quite. It spoke of ancient mysteries, like why hotdogs come in packs of ten, while hotdog buns come in packs of eight. It pondered the great questions of our time, like whether or not pineapple belongs on pizza (spoiler alert: it does)

 metadata: Sam's story
-number: 4
-Sam’s imagination, like the idea of a world without caffeine.
+number: 8
+( spoiler alert : it does ).
+
+ And it even dared to explore the darkest corners of Sam’s imagination, like the idea of a world without caffeine.
+
+metadata: Sam's story
+number: 9
+of a world without caffeine.

 In the end, Sam’s discourse AI was a resounding success. It could carry on conversations with humans for hours on end, discussing everything from the latest trends in fashion to the intricacies of quantum physics. And whenever anyone asked about the impressive spec doc, Sam would just smile and nod, knowing full well that the real magic lay in the glorious dummy text that started it all.

+metadata: Sam's story
+number: 10
+glorious dummy text that started it all.
+
 And so, dear reader, if you ever find yourself in need of some impressive dummy text for your own project, just remember the tale of Sam and their magnificent discourse AI. Because sometimes, all it takes is a little nonsense to make the world a whole lot more interesting.

 metadata: Jane's story
-number: 5
+number: 11
 Ah, Jane. The name alone conjures up images of brilliance, wit, and a certain je ne sais quoi that can only be described as “Janeesque.” And so, it comes as no surprise that our dear Jane found herself embarking on a journey of epic proportions: the creation of a discourse AI that would put all other discourse AIs to shame.

+metadata: Jane's story
+number: 12
+all other discourse ais to shame.
+
 With a twinkle in her eye and a spring in her step, Jane set forth on this noble quest. She gathered her trusty companions: a laptop, a never-ending supply of tea, and a collection of obscure reference books that would make even the most studious librarian green with envy. Armed with these tools, Jane began her work.

-As she typed away at her keyboard, Jane couldn’t help but feel a sense of excitement bubbling up inside her. This was no ordinary project; this was a chance to create something truly extraordinary. She poured her heart and soul into every line of code, crafting algorithms that would make even the most advanced AI systems [[look]] like mere calculators.
+metadata: Jane's story
+number: 13
+these tools, jane began her work.
+
+As she typed away at her keyboard, Jane couldnât help but feel a sense of excitement bubbling up inside her. This was no ordinary project; this was a chance to create something truly extraordinary. She poured her heart and soul into every line of code, crafting algorithms that would make even the most advanced AI systems [[look]] like mere calculators.

 metadata: Jane's story
-number: 6
-the most advanced AI systems [[look]] like mere calculators.
+number: 14
+] ] like mere calculators.

 But Jane knew that a discourse AI was only as good as its training data. And so, she scoured the internet, collecting the most fascinating, hilarious, and downright bizarre conversations she could find. From heated debates about the proper way to make a cup of tea to in-depth discussions on the mating habits of the rare Peruvian flying squirrel, Jane left no stone unturned.

-As the weeks turned into months, Jane’s creation began to take shape. It was a thing of beauty, a masterpiece of artificial intelligence that could engage in witty banter, offer sage advice, and even tell the occasional joke (though its sense of humor was admittedly a bit on the quirky side). Jane beamed with pride as she watched her AI converse with humans, marveling at its ability to understand and respond to even the most complex of queries.
+metadata: Jane's story
+number: 15
+jane left no stone unturned.
+
+As the weeks turned into months, Janeâs creation began to take shape. It was a thing of beauty, a masterpiece of artificial intelligence that could engage in witty banter, offer sage advice, and even tell the occasional joke (though its sense of humor was admittedly a bit on the quirky side). Jane beamed with pride as she watched her AI converse with humans, marveling at its ability to understand and respond to even the most complex of queries.

 metadata: Jane's story
-number: 7
-to understand and respond to even the most complex of queries.
+number: 16
+even the most complex of queries.

 But there was one final hurdle to overcome: the dreaded spec doc. Jane knew that no self-respecting AI could be unleashed upon the world without a proper set of specifications. And so, she set about crafting the most magnificent dummy text the world had ever seen.

+metadata: Jane's story
+number: 17
+dummy text the world had ever seen.
+
 It was a masterpiece of nonsense, a symphony of absurdity that would leave even the most seasoned tech writer scratching their head in confusion. From descriptions of the AI’s ability to recite Shakespearean sonnets in binary code to detailed explanations of its built-in “tea break” feature, Jane’s dummy text was a work of art.

+metadata: Jane's story
+number: 18
+dummy text was a work of art.
+
 And so, with a flourish of her keyboard and a triumphant grin, Jane unleashed her creation upon the world. The response was immediate and overwhelming. People from all walks of life flocked to converse with Jane’s AI, marveling at its intelligence, its charm, and its uncanny ability to make even the most mundane of topics seem fascinating.

 metadata: Jane's story
-number: 8
-to make even the most mundane of topics seem fascinating.
+number: 19
+the most mundane of topics seem fascinating.

 In the end, Jane’s discourse AI became the stuff of legend, a shining example of what can be achieved when brilliance, determination, and a healthy dose of eccentricity come together. And as for Jane herself? Well, let’s just say that she’s already hard at work on her next project: a robot that can make the perfect cup of tea. But that, dear reader, is a story for another day.
--- a/spec/jobs/regular/digest_rag_upload_spec.rb
+++ b/spec/jobs/regular/digest_rag_upload_spec.rb
@ -26,6 +26,7 @@ RSpec.describe Jobs::DigestRagUpload do
  before do
    SiteSetting.ai_embeddings_enabled = true
    SiteSetting.ai_embeddings_discourse_service_api_endpoint = "http://test.com"
+    SiteSetting.ai_embeddings_model = "bge-large-en"
    SiteSetting.authorized_extensions = "txt"

    WebMock.stub_request(
@ -37,6 +38,9 @@ RSpec.describe Jobs::DigestRagUpload do
  describe "#execute" do
    context "when processing an upload containing metadata" do
      it "correctly splits on metadata boundary" do
+        # be explicit here about chunking strategy
+        persona.update!(rag_chunk_tokens: 100, rag_chunk_overlap_tokens: 10)
+
        described_class.new.execute(upload_id: upload_with_metadata.id, ai_persona_id: persona.id)

        parsed = +""
--- a/spec/lib/modules/ai_bot/personas/persona_spec.rb
+++ b/spec/lib/modules/ai_bot/personas/persona_spec.rb
@ -224,7 +224,7 @@ RSpec.describe DiscourseAi::AiBot::Personas::Persona do
    context "when a persona has RAG uploads" do
      fab!(:upload)

-      def stub_fragments(limit)
+      def stub_fragments(limit, expected_limit: nil)
        candidate_ids = []

        limit.times do |i|
@ -239,6 +239,7 @@ RSpec.describe DiscourseAi::AiBot::Personas::Persona do
        DiscourseAi::Embeddings::VectorRepresentations::BgeLargeEn
          .any_instance
          .expects(:asymmetric_rag_fragment_similarity_search)
+          .with { |args, kwargs| kwargs[:limit] == (expected_limit || limit) }
          .returns(candidate_ids)
      end

@ -280,11 +281,40 @@ RSpec.describe DiscourseAi::AiBot::Personas::Persona do
        end
      end

+      context "when persona allows for less fragments" do
+        before { stub_fragments(3) }
+
+        it "will only pick 3 fragments" do
+          custom_ai_persona =
+            Fabricate(
+              :ai_persona,
+              name: "custom",
+              rag_conversation_chunks: 3,
+              allowed_group_ids: [Group::AUTO_GROUPS[:trust_level_0]],
+            )
+
+          UploadReference.ensure_exist!(target: custom_ai_persona, upload_ids: [upload.id])
+
+          custom_persona =
+            DiscourseAi::AiBot::Personas::Persona.find_by(id: custom_ai_persona.id, user: user).new
+
+          expect(custom_persona.class.rag_conversation_chunks).to eq(3)
+
+          crafted_system_prompt = custom_persona.craft_prompt(with_cc).messages.first[:content]
+
+          expect(crafted_system_prompt).to include("fragment-n0")
+          expect(crafted_system_prompt).to include("fragment-n1")
+          expect(crafted_system_prompt).to include("fragment-n2")
+          expect(crafted_system_prompt).not_to include("fragment-n3")
+        end
+      end
+
      context "when the reranker is available" do
        before do
          SiteSetting.ai_hugging_face_tei_reranker_endpoint = "https://test.reranker.com"

-          stub_fragments(15) # Mimic limit being more than 10 results
+          # hard coded internal implementation, reranker takes x5 number of chunks
+          stub_fragments(15, expected_limit: 50) # Mimic limit being more than 10 results
        end

        it "uses the re-ranker to reorder the fragments and pick the top 10 candidates" do
--- a/spec/models/ai_persona_spec.rb
+++ b/spec/models/ai_persona_spec.rb
@ -41,6 +41,32 @@ RSpec.describe AiPersona do
    expect(user.id).to be <= AiPersona::FIRST_PERSONA_USER_ID
  end

+  it "removes all rag embeddings when rag params change" do
+    persona =
+      AiPersona.create!(
+        name: "test",
+        description: "test",
+        system_prompt: "test",
+        commands: [],
+        allowed_group_ids: [],
+        rag_chunk_tokens: 10,
+        rag_chunk_overlap_tokens: 5,
+      )
+
+    id =
+      RagDocumentFragment.create!(
+        ai_persona: persona,
+        fragment: "test",
+        fragment_number: 1,
+        upload: Fabricate(:upload),
+      ).id
+
+    persona.rag_chunk_tokens = 20
+    persona.save!
+
+    expect(RagDocumentFragment.exists?(id)).to eq(false)
+  end
+
  it "defines singleton methods on system persona classes" do
    forum_helper = AiPersona.find_by(name: "Forum Helper")
    forum_helper.update!(
--- a/spec/models/rag_document_fragment_spec.rb
+++ b/spec/models/rag_document_fragment_spec.rb
@ -93,6 +93,7 @@ RSpec.describe RagDocumentFragment do
    before do
      SiteSetting.ai_embeddings_enabled = true
      SiteSetting.ai_embeddings_discourse_service_api_endpoint = "http://test.com"
+      SiteSetting.ai_embeddings_model = "bge-large-en"

      WebMock.stub_request(
        :post,
@ -102,6 +103,19 @@ RSpec.describe RagDocumentFragment do
      vector_rep.generate_representation_from(rag_document_fragment_1)
    end

+    it "regenerates all embeddings if ai_embeddings_model changes" do
+      old_id = rag_document_fragment_1.id
+
+      UploadReference.create!(upload_id: upload_1.id, target: persona)
+      UploadReference.create!(upload_id: upload_2.id, target: persona)
+
+      Sidekiq::Testing.fake! do
+        SiteSetting.ai_embeddings_model = "all-mpnet-base-v2"
+        expect(RagDocumentFragment.exists?(old_id)).to eq(false)
+        expect(Jobs::DigestRagUpload.jobs.size).to eq(2)
+      end
+    end
+
    it "returns total, indexed and unindexed fragments for each upload" do
      results = described_class.indexing_status(persona, [upload_1, upload_2])

--- a/spec/requests/admin/ai_personas_controller_spec.rb
+++ b/spec/requests/admin/ai_personas_controller_spec.rb
@ -224,6 +224,26 @@ RSpec.describe DiscourseAi::Admin::AiPersonasController do
      expect(persona.temperature).to eq(nil)
    end

+    it "supports updating rag params" do
+      persona = Fabricate(:ai_persona, name: "test_bot2")
+
+      put "/admin/plugins/discourse-ai/ai-personas/#{persona.id}.json",
+          params: {
+            ai_persona: {
+              rag_chunk_tokens: "102",
+              rag_chunk_overlap_tokens: "12",
+              rag_conversation_chunks: "13",
+            },
+          }
+
+      expect(response).to have_http_status(:ok)
+      persona.reload
+
+      expect(persona.rag_chunk_tokens).to eq(102)
+      expect(persona.rag_chunk_overlap_tokens).to eq(12)
+      expect(persona.rag_conversation_chunks).to eq(13)
+    end
+
    it "supports updating vision params" do
      persona = Fabricate(:ai_persona, name: "test_bot2")
      put "/admin/plugins/discourse-ai/ai-personas/#{persona.id}.json",
--- a/test/javascripts/unit/models/ai-persona-test.js
+++ b/test/javascripts/unit/models/ai-persona-test.js
@ -49,6 +49,9 @@ module("Discourse AI | Unit | Model | ai-persona", function () {
      vision_enabled: true,
      vision_max_pixels: 100,
      rag_uploads: [],
+      rag_chunk_tokens: 374,
+      rag_chunk_overlap_tokens: 10,
+      rag_conversation_chunks: 10,
    };

    const aiPersona = AiPersona.create({ ...properties });