FEATURE: PDF support for rag pipeline (#1118)

This PR introduces several enhancements and refactorings to the AI Persona and RAG (Retrieval-Augmented Generation) functionalities within the discourse-ai plugin. Here's a breakdown of the changes: **1. LLM Model Association for RAG and Personas:** - **New Database Columns:** Adds `rag_llm_model_id` to both `ai_personas` and `ai_tools` tables. This allows specifying a dedicated LLM for RAG indexing, separate from the persona's primary LLM. Adds `default_llm_id` and `question_consolidator_llm_id` to `ai_personas`. - **Migration:** Includes a migration (`20250210032345_migrate_persona_to_llm_model_id.rb`) to populate the new `default_llm_id` and `question_consolidator_llm_id` columns in `ai_personas` based on the existing `default_llm` and `question_consolidator_llm` string columns, and a post migration to remove the latter. - **Model Changes:** The `AiPersona` and `AiTool` models now `belong_to` an `LlmModel` via `rag_llm_model_id`. The `LlmModel.proxy` method now accepts an `LlmModel` instance instead of just an identifier. `AiPersona` now has `default_llm_id` and `question_consolidator_llm_id` attributes. - **UI Updates:** The AI Persona and AI Tool editors in the admin panel now allow selecting an LLM for RAG indexing (if PDF/image support is enabled). The RAG options component displays an LLM selector. - **Serialization:** The serializers (`AiCustomToolSerializer`, `AiCustomToolListSerializer`, `LocalizedAiPersonaSerializer`) have been updated to include the new `rag_llm_model_id`, `default_llm_id` and `question_consolidator_llm_id` attributes. **2. PDF and Image Support for RAG:** - **Site Setting:** Introduces a new hidden site setting, `ai_rag_pdf_images_enabled`, to control whether PDF and image files can be indexed for RAG. This defaults to `false`. - **File Upload Validation:** The `RagDocumentFragmentsController` now checks the `ai_rag_pdf_images_enabled` setting and allows PDF, PNG, JPG, and JPEG files if enabled. Error handling is included for cases where PDF/image indexing is attempted with the setting disabled. - **PDF Processing:** Adds a new utility class, `DiscourseAi::Utils::PdfToImages`, which uses ImageMagick (`magick`) to convert PDF pages into individual PNG images. A maximum PDF size and conversion timeout are enforced. - **Image Processing:** A new utility class, `DiscourseAi::Utils::ImageToText`, is included to handle OCR for the images and PDFs. - **RAG Digestion Job:** The `DigestRagUpload` job now handles PDF and image uploads. It uses `PdfToImages` and `ImageToText` to extract text and create document fragments. - **UI Updates:** The RAG uploader component now accepts PDF and image file types if `ai_rag_pdf_images_enabled` is true. The UI text is adjusted to indicate supported file types. **3. Refactoring and Improvements:** - **LLM Enumeration:** The `DiscourseAi::Configuration::LlmEnumerator` now provides a `values_for_serialization` method, which returns a simplified array of LLM data (id, name, vision_enabled) suitable for use in serializers. This avoids exposing unnecessary details to the frontend. - **AI Helper:** The `AiHelper::Assistant` now takes optional `helper_llm` and `image_caption_llm` parameters in its constructor, allowing for greater flexibility. - **Bot and Persona Updates:** Several updates were made across the codebase, changing the string based association to a LLM to the new model based. - **Audit Logs:** The `DiscourseAi::Completions::Endpoints::Base` now formats raw request payloads as pretty JSON for easier auditing. - **Eval Script:** An evaluation script is included. **4. Testing:** - The PR introduces a new eval system for LLMs, this allows us to test how functionality works across various LLM providers. This lives in `/evals`
2025-07-08 15:22:47 +00:00 · 2025-02-14 12:15:07 +11:00 · 2025-02-14 12:15:07 +11:00 · 5e80f93e4c
commit 5e80f93e4c
parent e2afbc26d3
54 changed files with 1329 additions and 141 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,5 @@ node_modules
 /gems
 /auto_generated
 .env
 evals/log
 evals/cases
--- a/admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-tools-edit.js
+++ b/admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-tools-edit.js
@ -14,5 +14,7 @@ export default class DiscourseAiToolsEditRoute extends DiscourseRoute {
    controller.set("allTools", toolsModel);
    controller.set("presets", toolsModel.resultSetMeta.presets);
    controller.set("llms", toolsModel.resultSetMeta.llms);
    controller.set("settings", toolsModel.resultSetMeta.settings);
  }
 }
--- a/admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-tools-new.js
+++ b/admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-tools-new.js
@ -11,5 +11,7 @@ export default class DiscourseAiToolsNewRoute extends DiscourseRoute {
    controller.set("allTools", toolsModel);
    controller.set("presets", toolsModel.resultSetMeta.presets);
    controller.set("llms", toolsModel.resultSetMeta.llms);
    controller.set("settings", toolsModel.resultSetMeta.settings);
  }
 }
--- a/admin/assets/javascripts/discourse/templates/admin-plugins/show/discourse-ai-tools/edit.hbs
+++ b/admin/assets/javascripts/discourse/templates/admin-plugins/show/discourse-ai-tools/edit.hbs
@ -3,5 +3,7 @@
    @tools={{this.allTools}}
    @model={{this.model}}
    @presets={{this.presets}}
    @llms={{this.llms}}
    @settings={{this.settings}}
  />
 </section>
--- a/admin/assets/javascripts/discourse/templates/admin-plugins/show/discourse-ai-tools/new.hbs
+++ b/admin/assets/javascripts/discourse/templates/admin-plugins/show/discourse-ai-tools/new.hbs
@ -3,5 +3,7 @@
    @tools={{this.allTools}}
    @model={{this.model}}
    @presets={{this.presets}}
    @llms={{this.llms}}
    @settings={{this.settings}}
  />
 </section>
--- a/app/controllers/discourse_ai/admin/ai_personas_controller.rb
+++ b/app/controllers/discourse_ai/admin/ai_personas_controller.rb
@ -32,10 +32,19 @@ module DiscourseAi
            }
          end
        llms =
-          DiscourseAi::Configuration::LlmEnumerator
+          DiscourseAi::Configuration::LlmEnumerator.values_for_serialization(
-            .values(allowed_seeded_llms: SiteSetting.ai_bot_allowed_seeded_models)
+            allowed_seeded_llm_ids: SiteSetting.ai_bot_allowed_seeded_models_map,
-            .map { |hash| { id: hash[:value], name: hash[:name] } }
+          )
-        render json: { ai_personas: ai_personas, meta: { tools: tools, llms: llms } }
+        render json: {
                 ai_personas: ai_personas,
                 meta: {
                   tools: tools,
                   llms: llms,
                   settings: {
                     rag_pdf_images_enabled: SiteSetting.ai_rag_pdf_images_enabled,
                   },
                 },
               }
      end
      def new
@ -187,7 +196,7 @@ module DiscourseAi
            :priority,
            :top_p,
            :temperature,
-            :default_llm,
+            :default_llm_id,
            :user_id,
            :max_context_posts,
            :vision_enabled,
@ -195,7 +204,8 @@ module DiscourseAi
            :rag_chunk_tokens,
            :rag_chunk_overlap_tokens,
            :rag_conversation_chunks,
-            :question_consolidator_llm,
+            :rag_llm_model_id,
            :question_consolidator_llm_id,
            :allow_chat_channel_mentions,
            :allow_chat_direct_messages,
            :allow_topic_mentions,
--- a/app/controllers/discourse_ai/admin/ai_tools_controller.rb
+++ b/app/controllers/discourse_ai/admin/ai_tools_controller.rb
@ -90,6 +90,7 @@ module DiscourseAi
            :summary,
            :rag_chunk_tokens,
            :rag_chunk_overlap_tokens,
            :rag_llm_model_id,
            rag_uploads: [:id],
            parameters: [:name, :type, :description, :required, enum: []],
          )
--- a/app/controllers/discourse_ai/admin/rag_document_fragments_controller.rb
+++ b/app/controllers/discourse_ai/admin/rag_document_fragments_controller.rb
@ -49,6 +49,7 @@ module DiscourseAi
      def validate_extension!(filename)
        extension = File.extname(filename)[1..-1] || ""
        authorized_extensions = %w[txt md]
        authorized_extensions.concat(%w[pdf png jpg jpeg]) if SiteSetting.ai_rag_pdf_images_enabled
        if !authorized_extensions.include?(extension)
          raise Discourse::InvalidParameters.new(
                  I18n.t(
--- a/app/jobs/regular/digest_rag_upload.rb
+++ b/app/jobs/regular/digest_rag_upload.rb
@ -28,7 +28,7 @@ module ::Jobs
      # Check if this is the first time we process this upload.
      if fragment_ids.empty?
-        document = get_uploaded_file(upload)
+        document = get_uploaded_file(upload: upload, target: target)
        return if document.nil?
        RagDocumentFragment.publish_status(upload, { total: 0, indexed: 0, left: 0 })
@ -163,7 +163,38 @@ module ::Jobs
      [buffer, split_char]
    end
-    def get_uploaded_file(upload)
+    def get_uploaded_file(upload:, target:)
      if %w[pdf png jpg jpeg].include?(upload.extension) && !SiteSetting.ai_rag_pdf_images_enabled
        raise Discourse::InvalidAccess.new(
                "The setting ai_rag_pdf_images_enabled is false, can not index images and pdfs.",
              )
      end
      if upload.extension == "pdf"
        pages =
          DiscourseAi::Utils::PdfToImages.new(
            upload: upload,
            user: Discourse.system_user,
          ).uploaded_pages
        return(
          DiscourseAi::Utils::ImageToText.as_fake_file(
            uploads: pages,
            llm_model: target.rag_llm_model,
            user: Discourse.system_user,
          )
        )
      end
      if %w[png jpg jpeg].include?(upload.extension)
        return(
          DiscourseAi::Utils::ImageToText.as_fake_file(
            uploads: [upload],
            llm_model: target.rag_llm_model,
            user: Discourse.system_user,
          )
        )
      end
      store = Discourse.store
      @file ||=
        if store.external?
--- a/app/models/ai_persona.rb
+++ b/app/models/ai_persona.rb
@ -1,8 +1,8 @@
 # frozen_string_literal: true
 class AiPersona < ActiveRecord::Base
-  # TODO remove this line 01-1-2025
+  # TODO remove this line 01-10-2025
-  self.ignored_columns = %i[commands allow_chat mentionable]
+  self.ignored_columns = %i[default_llm question_consolidator_llm]
  # places a hard limit, so per site we cache a maximum of 500 classes
  MAX_PERSONAS_PER_SITE = 500
@ -12,7 +12,7 @@ class AiPersona < ActiveRecord::Base
  validates :system_prompt, presence: true, length: { maximum: 10_000_000 }
  validate :system_persona_unchangeable, on: :update, if: :system
  validate :chat_preconditions
-  validate :allowed_seeded_model, if: :default_llm
+  validate :allowed_seeded_model, if: :default_llm_id
  validates :max_context_posts, numericality: { greater_than: 0 }, allow_nil: true
  # leaves some room for growth but sets a maximum to avoid memory issues
  # we may want to revisit this in the future
@ -30,6 +30,10 @@ class AiPersona < ActiveRecord::Base
  belongs_to :created_by, class_name: "User"
  belongs_to :user
  belongs_to :default_llm, class_name: "LlmModel"
  belongs_to :question_consolidator_llm, class_name: "LlmModel"
  belongs_to :rag_llm_model, class_name: "LlmModel"
  has_many :upload_references, as: :target, dependent: :destroy
  has_many :uploads, through: :upload_references
@ -62,7 +66,7 @@ class AiPersona < ActiveRecord::Base
            user_id: persona.user_id,
            username: persona.user.username_lower,
            allowed_group_ids: persona.allowed_group_ids,
-            default_llm: persona.default_llm,
+            default_llm_id: persona.default_llm_id,
            force_default_llm: persona.force_default_llm,
            allow_chat_channel_mentions: persona.allow_chat_channel_mentions,
            allow_chat_direct_messages: persona.allow_chat_direct_messages,
@ -157,12 +161,12 @@ class AiPersona < ActiveRecord::Base
      user_id
      system
      mentionable
-      default_llm
+      default_llm_id
      max_context_posts
      vision_enabled
      vision_max_pixels
      rag_conversation_chunks
-      question_consolidator_llm
+      question_consolidator_llm_id
      allow_chat_channel_mentions
      allow_chat_direct_messages
      allow_topic_mentions
@ -302,7 +306,7 @@ class AiPersona < ActiveRecord::Base
    if (
         allow_chat_channel_mentions || allow_chat_direct_messages || allow_topic_mentions ||
           force_default_llm
-       ) && !default_llm
+       ) && !default_llm_id
      errors.add(:default_llm, I18n.t("discourse_ai.ai_bot.personas.default_llm_required"))
    end
  end
@ -332,13 +336,12 @@ class AiPersona < ActiveRecord::Base
  end
  def allowed_seeded_model
-    return if default_llm.blank?
+    return if default_llm_id.blank?
-    llm = LlmModel.find_by(id: default_llm.split(":").last.to_i)
+    return if default_llm.nil?
-    return if llm.nil?
+    return if !default_llm.seeded?
    return if !llm.seeded?
-    return if SiteSetting.ai_bot_allowed_seeded_models.include?(llm.id.to_s)
+    return if SiteSetting.ai_bot_allowed_seeded_models_map.include?(default_llm.id.to_s)
    errors.add(:default_llm, I18n.t("discourse_ai.llm.configuration.invalid_seeded_model"))
  end
@ -362,14 +365,12 @@ end
 #  temperature                  :float
 #  top_p                        :float
 #  user_id                      :integer
 #  default_llm                 :text
 #  max_context_posts            :integer
 #  vision_enabled               :boolean          default(FALSE), not null
 #  vision_max_pixels            :integer          default(1048576), not null
 #  rag_chunk_tokens             :integer          default(374), not null
 #  rag_chunk_overlap_tokens     :integer          default(10), not null
 #  rag_conversation_chunks      :integer          default(10), not null
 #  question_consolidator_llm   :text
 #  tool_details                 :boolean          default(TRUE), not null
 #  tools                        :json             not null
 #  forced_tool_count            :integer          default(-1), not null
@ -378,6 +379,9 @@ end
 #  allow_topic_mentions         :boolean          default(FALSE), not null
 #  allow_personal_messages      :boolean          default(TRUE), not null
 #  force_default_llm            :boolean          default(FALSE), not null
 #  rag_llm_model_id             :bigint
 #  default_llm_id               :bigint
 #  question_consolidator_llm_id :bigint
 #
 # Indexes
 #
--- a/app/models/ai_tool.rb
+++ b/app/models/ai_tool.rb
@ -8,6 +8,7 @@ class AiTool < ActiveRecord::Base
  validates :script, presence: true, length: { maximum: 100_000 }
  validates :created_by_id, presence: true
  belongs_to :created_by, class_name: "User"
  belongs_to :rag_llm_model, class_name: "LlmModel"
  has_many :rag_document_fragments, dependent: :destroy, as: :target
  has_many :upload_references, as: :target, dependent: :destroy
  has_many :uploads, through: :upload_references
@ -371,4 +372,4 @@ end
 #  rag_chunk_tokens         :integer          default(374), not null
 #  rag_chunk_overlap_tokens :integer          default(10), not null
 #  tool_name                :string(100)      default(""), not null
-#
+#  rag_llm_model_id         :bigint
--- a/app/models/llm_model.rb
+++ b/app/models/llm_model.rb
@ -70,7 +70,7 @@ class LlmModel < ActiveRecord::Base
  end
  def to_llm
-    DiscourseAi::Completions::Llm.proxy(identifier)
+    DiscourseAi::Completions::Llm.proxy(self)
  end
  def identifier
--- a/app/serializers/ai_custom_tool_list_serializer.rb
+++ b/app/serializers/ai_custom_tool_list_serializer.rb
@ -6,7 +6,13 @@ class AiCustomToolListSerializer < ApplicationSerializer
  has_many :ai_tools, serializer: AiCustomToolSerializer, embed: :objects
  def meta
-    { presets: AiTool.presets }
+    {
      presets: AiTool.presets,
      llms: DiscourseAi::Configuration::LlmEnumerator.values_for_serialization,
      settings: {
        rag_pdf_images_enabled: SiteSetting.ai_rag_pdf_images_enabled,
      },
    }
  end
  def ai_tools
--- a/app/serializers/ai_custom_tool_serializer.rb
+++ b/app/serializers/ai_custom_tool_serializer.rb
@ -10,6 +10,7 @@ class AiCustomToolSerializer < ApplicationSerializer
             :script,
             :rag_chunk_tokens,
             :rag_chunk_overlap_tokens,
             :rag_llm_model_id,
             :created_by_id,
             :created_at,
             :updated_at
--- a/app/serializers/localized_ai_persona_serializer.rb
+++ b/app/serializers/localized_ai_persona_serializer.rb
@ -14,7 +14,7 @@ class LocalizedAiPersonaSerializer < ApplicationSerializer
             :allowed_group_ids,
             :temperature,
             :top_p,
-             :default_llm,
+             :default_llm_id,
             :user_id,
             :max_context_posts,
             :vision_enabled,
@ -22,7 +22,8 @@ class LocalizedAiPersonaSerializer < ApplicationSerializer
             :rag_chunk_tokens,
             :rag_chunk_overlap_tokens,
             :rag_conversation_chunks,
-             :question_consolidator_llm,
+             :rag_llm_model_id,
             :question_consolidator_llm_id,
             :tool_details,
             :forced_tool_count,
             :allow_chat_channel_mentions,
--- a/assets/javascripts/discourse/admin/models/ai-persona.js
+++ b/assets/javascripts/discourse/admin/models/ai-persona.js
@ -15,7 +15,7 @@ const CREATE_ATTRIBUTES = [
  "top_p",
  "temperature",
  "user_id",
-  "default_llm",
+  "default_llm_id",
  "force_default_llm",
  "user",
  "max_context_posts",
@ -25,7 +25,8 @@ const CREATE_ATTRIBUTES = [
  "rag_chunk_tokens",
  "rag_chunk_overlap_tokens",
  "rag_conversation_chunks",
-  "question_consolidator_llm",
+  "rag_llm_model_id",
  "question_consolidator_llm_id",
  "allow_chat",
  "tool_details",
  "forced_tool_count",
@ -43,7 +44,7 @@ const SYSTEM_ATTRIBUTES = [
  "priority",
  "tools",
  "user_id",
-  "default_llm",
+  "default_llm_id",
  "force_default_llm",
  "user",
  "max_context_posts",
@ -53,7 +54,8 @@ const SYSTEM_ATTRIBUTES = [
  "rag_chunk_tokens",
  "rag_chunk_overlap_tokens",
  "rag_conversation_chunks",
-  "question_consolidator_llm",
+  "rag_llm_model_id",
  "question_consolidator_llm_id",
  "tool_details",
  "allow_personal_messages",
  "allow_topic_mentions",
--- a/assets/javascripts/discourse/admin/models/ai-tool.js
+++ b/assets/javascripts/discourse/admin/models/ai-tool.js
@ -12,6 +12,7 @@ const CREATE_ATTRIBUTES = [
  "rag_uploads",
  "rag_chunk_tokens",
  "rag_chunk_overlap_tokens",
  "rag_llm_model_id",
  "enabled",
 ];
--- a/assets/javascripts/discourse/components/ai-persona-editor.gjs
+++ b/assets/javascripts/discourse/components/ai-persona-editor.gjs
@ -167,27 +167,27 @@ export default class PersonaEditor extends Component {
  }
  get mappedQuestionConsolidatorLlm() {
-    return this.editingModel?.question_consolidator_llm || "blank";
+    return this.editingModel?.question_consolidator_llm_id ?? "blank";
  }
  set mappedQuestionConsolidatorLlm(value) {
    if (value === "blank") {
-      this.editingModel.question_consolidator_llm = null;
+      this.editingModel.question_consolidator_llm_id = null;
    } else {
-      this.editingModel.question_consolidator_llm = value;
+      this.editingModel.question_consolidator_llm_id = value;
    }
  }
  get mappedDefaultLlm() {
-    return this.editingModel?.default_llm || "blank";
+    return this.editingModel?.default_llm_id ?? "blank";
  }
  set mappedDefaultLlm(value) {
    if (value === "blank") {
-      this.editingModel.default_llm = null;
+      this.editingModel.default_llm_id = null;
      this.hasDefaultLlm = false;
    } else {
-      this.editingModel.default_llm = value;
+      this.editingModel.default_llm_id = value;
      this.hasDefaultLlm = true;
    }
  }
@ -596,9 +596,14 @@ export default class PersonaEditor extends Component {
            @target={{this.editingModel}}
            @updateUploads={{this.updateUploads}}
            @onRemove={{this.removeUpload}}
            @allowPdfsAndImages={{@personas.resultSetMeta.settings.rag_pdf_images_enabled}}
          />
        </div>
-        <RagOptions @model={{this.editingModel}}>
+        <RagOptions
          @model={{this.editingModel}}
          @llms={{@personas.resultSetMeta.llms}}
          @allowPdfsAndImages={{@personas.resultSetMeta.settings.rag_pdf_images_enabled}}
        >
          <div class="control-group">
            <label>{{i18n
                "discourse_ai.ai_persona.rag_conversation_chunks"
--- a/assets/javascripts/discourse/components/ai-tool-editor.gjs
+++ b/assets/javascripts/discourse/components/ai-tool-editor.gjs
@ -90,7 +90,8 @@ export default class AiToolEditor extends Component {
        "summary",
        "rag_uploads",
        "rag_chunk_tokens",
-        "rag_chunk_overlap_tokens"
+        "rag_chunk_overlap_tokens",
        "rag_llm_model_id"
      );
      await this.args.model.save(data);
@ -244,9 +245,14 @@ export default class AiToolEditor extends Component {
              @target={{this.editingModel}}
              @updateUploads={{this.updateUploads}}
              @onRemove={{this.removeUpload}}
              @allowPdfsAndImages={{@settings.rag_pdf_images_enabled}}
            />
          </div>
-          <RagOptions @model={{this.editingModel}} />
+          <RagOptions
            @model={{this.editingModel}}
            @llms={{@llms}}
            @allowPdfsAndImages={{@settings.rag_pdf_images_enabled}}
          />
        {{/if}}
        <div class="control-group ai-tool-editor__action_panel">
--- a/assets/javascripts/discourse/components/rag-options.gjs
+++ b/assets/javascripts/discourse/components/rag-options.gjs
@ -5,6 +5,7 @@ import { on } from "@ember/modifier";
 import { action } from "@ember/object";
 import DTooltip from "discourse/components/d-tooltip";
 import { i18n } from "discourse-i18n";
 import AiLlmSelector from "./ai-llm-selector";
 export default class RagOptions extends Component {
  @tracked showIndexingOptions = false;
@ -22,6 +23,22 @@ export default class RagOptions extends Component {
      : i18n("discourse_ai.rag.options.show_indexing_options");
  }
  get visionLlms() {
    return this.args.llms.filter((llm) => llm.vision_enabled);
  }
  get visionLlmId() {
    return this.args.model.rag_llm_model_id ?? "blank";
  }
  set visionLlmId(value) {
    if (value === "blank") {
      this.args.model.rag_llm_model_id = null;
    } else {
      this.args.model.rag_llm_model_id = value;
    }
  }
  <template>
    {{#if @model.rag_uploads}}
      <a
@ -64,6 +81,20 @@ export default class RagOptions extends Component {
          }}
        />
      </div>
      {{#if @allowPdfsAndImages}}
        <div class="control-group">
          <label>{{i18n "discourse_ai.rag.options.rag_llm_model"}}</label>
          <AiLlmSelector
            class="ai-persona-editor__llms"
            @value={{this.visionLlmId}}
            @llms={{this.visionLlms}}
          />
          <DTooltip
            @icon="circle-question"
            @content={{i18n "discourse_ai.rag.options.rag_llm_model_help"}}
          />
        </div>
      {{/if}}
      {{yield}}
    {{/if}}
  </template>
--- a/assets/javascripts/discourse/components/rag-uploader.gjs
+++ b/assets/javascripts/discourse/components/rag-uploader.gjs
@ -77,6 +77,14 @@ export default class RagUploader extends Component {
    this.updateUploads(this.ragUploads);
  }
  get acceptedFileTypes() {
    if (this.args?.allowPdfsAndImages) {
      return ".txt,.md,.pdf,.png,.jpg,.jpeg";
    } else {
      return ".txt,.md";
    }
  }
  @action
  submitFiles() {
    this.uppyUpload.openPicker();
@ -119,7 +127,11 @@ export default class RagUploader extends Component {
  <template>
    <div class="rag-uploader">
      <h3>{{i18n "discourse_ai.rag.uploads.title"}}</h3>
      {{#if @allowPdfsAndImages}}
        <p>{{i18n "discourse_ai.rag.uploads.description_with_pdfs"}}</p>
      {{else}}
        <p>{{i18n "discourse_ai.rag.uploads.description"}}</p>
      {{/if}}
      {{#if this.ragUploads}}
        <div class="rag-uploader__search-input-container">
@ -187,7 +199,7 @@ export default class RagUploader extends Component {
        disabled={{this.uploading}}
        type="file"
        multiple="multiple"
-        accept=".txt,.md"
+        accept={{this.acceptedFileTypes}}
      />
      <DButton
        @label="discourse_ai.rag.uploads.button"
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -274,11 +274,14 @@ en:
          rag_chunk_tokens_help: "The number of tokens to use for each chunk in the RAG model. Increase to increase the amount of context the AI can use. (changing will re-index all uploads)"
          rag_chunk_overlap_tokens: "Upload chunk overlap tokens"
          rag_chunk_overlap_tokens_help: "The number of tokens to overlap between chunks in the RAG model. (changing will re-index all uploads)"
          rag_llm_model: "Indexing Language Model"
          rag_llm_model_help: "The language model used for OCR during indexing of PDFs and images"
          show_indexing_options: "Show upload options"
          hide_indexing_options: "Hide upload options"
        uploads:
          title: "Uploads"
-          description: "Uploaded files should be formatted as plaintext (.txt) or markdown (.md)."
+          description: "Plaintext (.txt) or markdown (.md)"
          description_with_pdfs: "Plaintext (.txt), markdown (.md), PDF (.pdf) or image (.png, .jpeg)"
          button: "Add files"
          filter: "Filter uploads"
          indexed: "Indexed"
--- a/config/settings.yml
+++ b/config/settings.yml
@ -347,12 +347,14 @@ discourse_ai:
  ai_spam_detection_enabled:
    default: false
    hidden: true
  ai_spam_detection_user_id:
    default: ""
    hidden: true
  ai_spam_detection_model_allowed_seeded_models:
    default: ""
    hidden: true
    type: list
  ai_rag_pdf_images_enabled:
    default: false
    hidden: true
--- a/db/migrate/20250210024600_add_rag_llm_model.rb
+++ b/db/migrate/20250210024600_add_rag_llm_model.rb
@ -0,0 +1,7 @@
 # frozen_string_literal: true
 class AddRagLlmModel < ActiveRecord::Migration[7.2]
  def change
    add_column :ai_personas, :rag_llm_model_id, :bigint
    add_column :ai_tools, :rag_llm_model_id, :bigint
  end
 end
--- a/db/migrate/20250210032345_migrate_persona_to_llm_model_id.rb
+++ b/db/migrate/20250210032345_migrate_persona_to_llm_model_id.rb
@ -0,0 +1,19 @@
 # frozen_string_literal: true
 class MigratePersonaToLlmModelId < ActiveRecord::Migration[7.2]
  def up
    add_column :ai_personas, :default_llm_id, :bigint
    add_column :ai_personas, :question_consolidator_llm_id, :bigint
    # personas are seeded, we do not mark stuff as readonline
    execute <<~SQL
      UPDATE ai_personas
        set
          default_llm_id = (select id from llm_models where ('custom:' || id) = default_llm),
          question_consolidator_llm_id = (select id from llm_models where ('custom:' || id) = question_consolidator_llm)
      SQL
  end
  def down
    raise ActiveRecord::IrreversibleMigration
  end
 end
--- a/db/post_migrate/20250210032351_post_migrate_persona_to_llm_model_id.rb
+++ b/db/post_migrate/20250210032351_post_migrate_persona_to_llm_model_id.rb
@ -0,0 +1,11 @@
 # frozen_string_literal: true
 class PostMigratePersonaToLlmModelId < ActiveRecord::Migration[7.2]
  def up
    remove_column :ai_personas, :default_llm
    remove_column :ai_personas, :question_consolidator_llm
  end
  def down
    raise ActiveRecord::IrreversibleMigration
  end
 end
--- a/evals/lib/boot.rb
+++ b/evals/lib/boot.rb
@ -0,0 +1,36 @@
 # frozen_string_literal: true
 # got to ensure evals are here
 # rubocop:disable Discourse/Plugins/NamespaceConstants
 EVAL_PATH = File.join(__dir__, "../cases")
 # rubocop:enable Discourse/Plugins/NamespaceConstants
 #
 if !Dir.exist?(EVAL_PATH)
  puts "Evals are missing, cloning from discourse/discourse-ai-evals"
  success =
    system("git clone git@github.com:discourse/discourse-ai-evals.git '#{EVAL_PATH}' 2>/dev/null")
  # Fall back to HTTPS if SSH fails
  if !success
    puts "SSH clone failed, falling back to HTTPS..."
    success = system("git clone https://github.com/discourse/discourse-ai-evals.git '#{EVAL_PATH}'")
  end
  if success
    puts "Successfully cloned evals repository"
  else
    abort "Failed to clone evals repository"
  end
 end
 discourse_path = File.expand_path(File.join(__dir__, "../../../.."))
 # rubocop:disable Discourse/NoChdir
 Dir.chdir(discourse_path)
 # rubocop:enable Discourse/NoChdir
 require "/home/sam/Source/discourse/config/environment"
 ENV["DISCOURSE_AI_NO_DEBUG"] = "1"
 module DiscourseAi::Evals
 end
--- a/evals/lib/cli.rb
+++ b/evals/lib/cli.rb
@ -0,0 +1,47 @@
 # frozen_string_literal: true
 require "optparse"
 class DiscourseAi::Evals::Cli
  class Options
    attr_accessor :eval_name, :model, :list, :list_models
    def initialize(eval_name: nil, model: nil, list: false, list_models: false)
      @eval_name = eval_name
      @model = model
      @list = list
      @list_models = list_models
    end
  end
  def self.parse_options!
    options = Options.new
    parser =
      OptionParser.new do |opts|
        opts.banner = "Usage: evals/run [options]"
        opts.on("-e", "--eval NAME", "Name of the evaluation to run") do |eval_name|
          options.eval_name = eval_name
        end
        opts.on("--list-models", "List models") { |model| options.list_models = true }
        opts.on(
          "-m",
          "--model NAME",
          "Model to evaluate (will eval all models if not specified)",
        ) { |model| options.model = model }
        opts.on("-l", "--list", "List evals") { |model| options.list = true }
      end
    show_help = ARGV.empty?
    parser.parse!
    if show_help
      puts parser
      exit 0
    end
    options
  end
 end
--- a/evals/lib/eval.rb
+++ b/evals/lib/eval.rb
@ -0,0 +1,136 @@
 #frozen_string_literal: true
 class DiscourseAi::Evals::Eval
  attr_reader :type,
              :path,
              :name,
              :description,
              :id,
              :args,
              :vision,
              :expected_output,
              :expected_output_regex
  def initialize(path:)
    @yaml = YAML.load_file(path).symbolize_keys
    @path = path
    @name = @yaml[:name]
    @id = @yaml[:id]
    @description = @yaml[:description]
    @vision = @yaml[:vision]
    @args = @yaml[:args]&.symbolize_keys
    @type = @yaml[:type]
    @expected_output = @yaml[:expected_output]
    @expected_output_regex = @yaml[:expected_output_regex]
    @expected_output_regex =
      Regexp.new(@expected_output_regex, Regexp::MULTILINE) if @expected_output_regex
    @args[:path] = File.expand_path(File.join(File.dirname(path), @args[:path])) if @args&.key?(
      :path,
    )
  end
  def run(llm:)
    result =
      case type
      when "helper"
        helper(llm, **args)
      when "pdf_to_text"
        pdf_to_text(llm, **args)
      when "image_to_text"
        image_to_text(llm, **args)
      end
    if expected_output
      if result == expected_output
        { result: :pass }
      else
        { result: :fail, expected_output: expected_output, actual_output: result }
      end
    elsif expected_output_regex
      if result.match?(expected_output_regex)
        { result: :pass }
      else
        { result: :fail, expected_output: expected_output_regex, actual_output: result }
      end
    else
      { result: :unknown, actual_output: result }
    end
  end
  def print
    puts "#{id}: #{description}"
  end
  def to_json
    {
      type: @type,
      path: @path,
      name: @name,
      description: @description,
      id: @id,
      args: @args,
      vision: @vision,
      expected_output: @expected_output,
      expected_output_regex: @expected_output_regex,
    }.compact
  end
  private
  def helper(llm, input:, name:)
    completion_prompt = CompletionPrompt.find_by(name: name)
    helper = DiscourseAi::AiHelper::Assistant.new(helper_llm: llm.llm_proxy)
    result =
      helper.generate_and_send_prompt(
        completion_prompt,
        input,
        current_user = Discourse.system_user,
        _force_default_locale = false,
      )
    result[:suggestions].first
  end
  def image_to_text(llm, path:)
    upload =
      UploadCreator.new(File.open(path), File.basename(path)).create_for(Discourse.system_user.id)
    text = +""
    DiscourseAi::Utils::ImageToText
      .new(upload: upload, llm_model: llm.llm_model, user: Discourse.system_user)
      .extract_text do |chunk, error|
        text << chunk if chunk
        text << "\n\n" if chunk
      end
    text
  ensure
    upload.destroy if upload
  end
  def pdf_to_text(llm, path:)
    upload =
      UploadCreator.new(File.open(path), File.basename(path)).create_for(Discourse.system_user.id)
    uploads =
      DiscourseAi::Utils::PdfToImages.new(
        upload: upload,
        user: Discourse.system_user,
      ).uploaded_pages
    text = +""
    uploads.each do |page_upload|
      DiscourseAi::Utils::ImageToText
        .new(upload: page_upload, llm_model: llm.llm_model, user: Discourse.system_user)
        .extract_text do |chunk, error|
          text << chunk if chunk
          text << "\n\n" if chunk
        end
      upload.destroy
    end
    text
  ensure
    upload.destroy if upload
  end
 end
--- a/evals/lib/llm.rb
+++ b/evals/lib/llm.rb
@ -0,0 +1,129 @@
 # frozen_string_literal: true
 class DiscourseAi::Evals::Llm
  CONFIGS = {
    "gpt-4o" => {
      display_name: "GPT-4o",
      name: "gpt-4o",
      tokenizer: "DiscourseAi::Tokenizer::OpenAiTokenizer",
      api_key_env: "OPENAI_API_KEY",
      provider: "open_ai",
      url: "https://api.openai.com/v1/chat/completions",
      max_prompt_tokens: 131_072,
      vision_enabled: true,
    },
    "gpt-4o-mini" => {
      display_name: "GPT-4o-mini",
      name: "gpt-4o-mini",
      tokenizer: "DiscourseAi::Tokenizer::OpenAiTokenizer",
      api_key_env: "OPENAI_API_KEY",
      provider: "open_ai",
      url: "https://api.openai.com/v1/chat/completions",
      max_prompt_tokens: 131_072,
      vision_enabled: true,
    },
    "claude-3.5-haiku" => {
      display_name: "Claude 3.5 Haiku",
      name: "claude-3-5-haiku-latest",
      tokenizer: "DiscourseAi::Tokenizer::AnthropicTokenizer",
      api_key_env: "ANTHROPIC_API_KEY",
      provider: "anthropic",
      url: "https://api.anthropic.com/v1/messages",
      max_prompt_tokens: 200_000,
      vision_enabled: false,
    },
    "claude-3.5-sonnet" => {
      display_name: "Claude 3.5 Sonnet",
      name: "claude-3-5-sonnet-latest",
      tokenizer: "DiscourseAi::Tokenizer::AnthropicTokenizer",
      api_key_env: "ANTHROPIC_API_KEY",
      provider: "anthropic",
      url: "https://api.anthropic.com/v1/messages",
      max_prompt_tokens: 200_000,
      vision_enabled: true,
    },
    "gemini-2.0-flash" => {
      display_name: "Gemini 2.0 Flash",
      name: "gemini-2-0-flash",
      tokenizer: "DiscourseAi::Tokenizer::GeminiTokenizer",
      api_key_env: "GEMINI_API_KEY",
      provider: "google",
      url: "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash",
      max_prompt_tokens: 1_000_000,
      vision_enabled: true,
    },
    "gemini-2.0-pro-exp" => {
      display_name: "Gemini 2.0 pro",
      name: "gemini-2-0-pro-exp",
      tokenizer: "DiscourseAi::Tokenizer::GeminiTokenizer",
      api_key_env: "GEMINI_API_KEY",
      provider: "google",
      url: "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-pro-exp",
      max_prompt_tokens: 1_000_000,
      vision_enabled: true,
    },
  }
  def self.print
    CONFIGS
      .keys
      .map do |config_name|
        begin
          new(config_name)
        rescue StandardError
          nil
        end
      end
      .compact
      .each { |llm| puts "#{llm.config_name}: #{llm.name} (#{llm.provider})" }
  end
  def self.choose(config_name)
    if CONFIGS[config_name].nil?
      CONFIGS
        .keys
        .map do |config_name|
          begin
            new(config_name)
          rescue => e
            puts "Error initializing #{config_name}: #{e}"
            nil
          end
        end
        .compact
    elsif !CONFIGS.include?(config_name)
      raise "Invalid llm"
    else
      [new(config_name)]
    end
  end
  attr_reader :llm_model
  attr_reader :llm_proxy
  attr_reader :config_name
  def initialize(config_name)
    config = CONFIGS[config_name].dup
    api_key_env = config.delete(:api_key_env)
    if !ENV[api_key_env]
      raise "Missing API key for #{config_name}, should be set via #{api_key_env}"
    end
    config[:api_key] = ENV[api_key_env]
    @llm_model = LlmModel.new(config)
    @llm_proxy = DiscourseAi::Completions::Llm.proxy(@llm_model)
    @config_name = config_name
  end
  def provider
    @llm_model.provider
  end
  def name
    @llm_model.display_name
  end
  def vision?
    @llm_model.vision_enabled
  end
 end
--- a/evals/lib/runner.rb
+++ b/evals/lib/runner.rb
@ -0,0 +1,183 @@
 #frozen_string_literal: true
 class DiscourseAi::Evals::Runner
  class StructuredLogger
    def initialize
      @log = []
      @current_step = @log
    end
    def log(name, args: nil, start_time: nil, end_time: nil)
      start_time ||= Time.now.utc
      end_time ||= Time.now.utc
      args ||= {}
      object = { name: name, args: args, start_time: start_time, end_time: end_time }
      @current_step << object
    end
    def step(name, args: nil)
      start_time = Time.now.utc
      start_step = @current_step
      new_step = { type: :step, name: name, args: args || {}, log: [], start_time: start_time }
      @current_step << new_step
      @current_step = new_step[:log]
      yield new_step
      @current_step = start_step
      new_step[:end_time] = Time.now.utc
    end
    def to_trace_event_json
      trace_events = []
      process_id = 1
      thread_id = 1
      to_trace_event(@log, process_id, thread_id, trace_events)
      JSON.pretty_generate({ traceEvents: trace_events })
    end
    private
    def to_trace_event(log_items, pid, tid, trace_events, parent_start_time = nil)
      log_items.each do |item|
        if item.is_a?(Hash) && item[:type] == :step
          trace_events << {
            name: item[:name],
            cat: "default",
            ph: "B", # Begin event
            pid: pid,
            tid: tid,
            args: item[:args],
            ts: timestamp_in_microseconds(item[:start_time]),
          }
          to_trace_event(item[:log], pid, tid, trace_events, item[:start_time])
          trace_events << {
            name: item[:name],
            cat: "default",
            ph: "E", # End event
            pid: pid,
            tid: tid,
            ts: timestamp_in_microseconds(item[:end_time]),
          }
        else
          trace_events << {
            name: item[:name],
            cat: "default",
            ph: "B",
            pid: pid,
            tid: tid,
            args: item[:args],
            ts: timestamp_in_microseconds(item[:start_time] || parent_start_time || Time.now.utc),
            s: "p", # Scope: process
          }
          trace_events << {
            name: item[:name],
            cat: "default",
            ph: "E",
            pid: pid,
            tid: tid,
            ts: timestamp_in_microseconds(item[:end_time] || Time.now.utc),
            s: "p",
          }
        end
      end
    end
    def timestamp_in_microseconds(time)
      (time.to_f * 1_000_000).to_i
    end
  end
  attr_reader :llms, :cases
  def self.evals_paths
    @eval_paths ||= Dir.glob(File.join(File.join(__dir__, "../cases"), "*/*.yml"))
  end
  def self.evals
    @evals ||= evals_paths.map { |path| DiscourseAi::Evals::Eval.new(path: path) }
  end
  def self.print
    evals.each(&:print)
  end
  def initialize(eval_name:, llms:)
    @llms = llms
    @eval = self.class.evals.find { |c| c.id == eval_name }
    if !@eval
      puts "Error: Unknown evaluation '#{eval_name}'"
      exit 1
    end
    if @llms.empty?
      puts "Error: Unknown model 'model'"
      exit 1
    end
  end
  def run!
    puts "Running evaluation '#{@eval.id}'"
    structured_log_filename = "#{@eval.id}-#{Time.now.strftime("%Y%m%d-%H%M%S")}.json"
    log_filename = "#{@eval.id}-#{Time.now.strftime("%Y%m%d-%H%M%S")}.log"
    logs_dir = File.join(__dir__, "../log")
    FileUtils.mkdir_p(logs_dir)
    log_path = File.expand_path(File.join(logs_dir, log_filename))
    structured_log_path = File.expand_path(File.join(logs_dir, structured_log_filename))
    logger = Logger.new(File.open(log_path, "a"))
    logger.info("Starting evaluation '#{@eval.id}'")
    Thread.current[:llm_audit_log] = logger
    structured_logger = Thread.current[:llm_audit_structured_log] = StructuredLogger.new
    structured_logger.step("Evaluating #{@eval.id}", args: @eval.to_json) do
      llms.each do |llm|
        if @eval.vision && !llm.vision?
          logger.info("Skipping LLM: #{llm.name} as it does not support vision")
          next
        end
        structured_logger.step("Evaluating with LLM: #{llm.name}") do |step|
          logger.info("Evaluating with LLM: #{llm.name}")
          print "#{llm.name}: "
          result = @eval.run(llm: llm)
          step[:args] = result
          step[:cname] = result[:result] == :pass ? :good : :bad
          if result[:result] == :fail
            puts "Failed 🔴"
            puts "---- Expected ----\n#{result[:expected_output]}"
            puts "---- Actual ----\n#{result[:actual_output]}"
            logger.error("Evaluation failed with LLM: #{llm.name}")
          elsif result[:result] == :pass
            puts "Passed 🟢"
            logger.info("Evaluation passed with LLM: #{llm.name}")
          else
            STDERR.puts "Error: Unknown result #{eval.inspect}"
            logger.error("Unknown result: #{eval.inspect}")
          end
        end
      end
    end
    #structured_logger.save(structured_log_path)
    File.write("#{structured_log_path}", structured_logger.to_trace_event_json)
    puts
    puts "Log file: #{log_path}"
    puts "Structured log file (ui.perfetto.dev): #{structured_log_path}"
    # temp code
    # puts File.read(structured_log_path)
  end
 end
--- a/evals/run
+++ b/evals/run
@ -0,0 +1,25 @@
 #!/usr/bin/env ruby
 # frozen_string_literal: true
 require_relative "lib/boot"
 require_relative "lib/llm"
 require_relative "lib/cli"
 require_relative "lib/runner"
 require_relative "lib/eval"
 options = DiscourseAi::Evals::Cli.parse_options!
 if options.list
  DiscourseAi::Evals::Runner.print
  exit 0
 end
 if options.list_models
  DiscourseAi::Evals::Llm.print
  exit 0
 end
 DiscourseAi::Evals::Runner.new(
  eval_name: options.eval_name,
  llms: DiscourseAi::Evals::Llm.choose(options.model),
 ).run!
--- a/lib/ai_bot/bot.rb
+++ b/lib/ai_bot/bot.rb
@ -16,7 +16,8 @@ module DiscourseAi
      def initialize(bot_user, persona, model = nil)
        @bot_user = bot_user
        @persona = persona
-        @model = model || self.class.guess_model(bot_user) || @persona.class.default_llm
+        @model =
          model || self.class.guess_model(bot_user) || LlmModel.find(@persona.class.default_llm_id)
      end
      attr_reader :bot_user
@ -245,7 +246,7 @@ module DiscourseAi
        return if associated_llm.nil? # Might be a persona user. Handled by constructor.
-        "custom:#{associated_llm.id}"
+        associated_llm
      end
      def build_placeholder(summary, details, custom_raw: nil)
--- a/lib/ai_bot/personas/persona.rb
+++ b/lib/ai_bot/personas/persona.rb
@ -17,7 +17,7 @@ module DiscourseAi
            1_048_576
          end
-          def question_consolidator_llm
+          def question_consolidator_llm_id
            nil
          end
@ -173,9 +173,11 @@ module DiscourseAi
          TEXT
          question_consolidator_llm = llm
-          if self.class.question_consolidator_llm.present?
+          if self.class.question_consolidator_llm_id.present?
-            question_consolidator_llm =
+            question_consolidator_llm ||=
-              DiscourseAi::Completions::Llm.proxy(self.class.question_consolidator_llm)
+              DiscourseAi::Completions::Llm.proxy(
                LlmModel.find_by(id: self.class.question_consolidator_llm_id),
              )
          end
          if context[:custom_instructions].present?
--- a/lib/ai_helper/assistant.rb
+++ b/lib/ai_helper/assistant.rb
@ -13,6 +13,20 @@ module DiscourseAi
        prompt_cache.flush!
      end
      def initialize(helper_llm: nil, image_caption_llm: nil)
        @helper_llm = helper_llm
        @image_caption_llm = image_caption_llm
      end
      def helper_llm
        @helper_llm || DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_helper_model)
      end
      def image_caption_llm
        @image_caption_llm ||
          DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_helper_image_caption_model)
      end
      def available_prompts(user)
        key = "prompt_cache_#{I18n.locale}"
        self
@ -115,7 +129,7 @@ module DiscourseAi
      end
      def generate_prompt(completion_prompt, input, user, force_default_locale = false, &block)
-        llm = DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_helper_model)
+        llm = helper_llm
        prompt = completion_prompt.messages_with_input(input)
        localize_prompt!(prompt, user, force_default_locale)
@ -182,7 +196,7 @@ module DiscourseAi
          )
        raw_caption =
-          DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_helper_image_caption_model).generate(
+          image_caption_llm.generate(
            prompt,
            user: user,
            max_tokens: 1024,
--- a/lib/completions/endpoints/base.rb
+++ b/lib/completions/endpoints/base.rb
@ -156,7 +156,7 @@ module DiscourseAi
                )
              if !@streaming_mode
-                return(
+                response_data =
                  non_streaming_response(
                    response: response,
                    xml_tool_processor: xml_tool_processor,
@ -164,7 +164,7 @@ module DiscourseAi
                    partials_raw: partials_raw,
                    response_raw: response_raw,
                  )
-                )
+                return response_data
              end
              begin
@ -223,10 +223,46 @@ module DiscourseAi
                log.duration_msecs = (Time.now - start_time) * 1000
                log.save!
                LlmQuota.log_usage(@llm_model, user, log.request_tokens, log.response_tokens)
-                if Rails.env.development?
+                if Rails.env.development? && !ENV["DISCOURSE_AI_NO_DEBUG"]
                  puts "#{self.class.name}: request_tokens #{log.request_tokens} response_tokens #{log.response_tokens}"
                end
              end
              if log && (logger = Thread.current[:llm_audit_log])
                call_data = <<~LOG
                  #{self.class.name}: request_tokens #{log.request_tokens} response_tokens #{log.response_tokens}
                  request:
                  #{format_possible_json_payload(log.raw_request_payload)}
                  response:
                  #{response_data}
                LOG
                logger.info(call_data)
              end
              if log && (structured_logger = Thread.current[:llm_audit_structured_log])
                llm_request =
                  begin
                    JSON.parse(log.raw_request_payload)
                  rescue StandardError
                    log.raw_request_payload
                  end
                # gemini puts passwords in query params
                # we don't want to log that
                structured_logger.log(
                  "llm_call",
                  args: {
                    class: self.class.name,
                    completion_url: request.uri.to_s.split("?")[0],
                    request: llm_request,
                    result: response_data,
                    request_tokens: log.request_tokens,
                    response_tokens: log.response_tokens,
                    duration: log.duration_msecs,
                    stream: @streaming_mode,
                  },
                  start_time: start_time.utc,
                  end_time: Time.now.utc,
                )
              end
            end
          end
        end
@ -298,6 +334,14 @@ module DiscourseAi
        private
        def format_possible_json_payload(payload)
          begin
            JSON.pretty_generate(JSON.parse(payload))
          rescue JSON::ParserError
            payload
          end
        end
        def start_log(
          provider_id:,
          request_body:,
--- a/lib/configuration/llm_enumerator.rb
+++ b/lib/configuration/llm_enumerator.rb
@ -16,11 +16,8 @@ module DiscourseAi
          AiPersona
            .where("force_default_llm = ?", true)
-            .pluck(:default_llm, :name, :id)
+            .pluck(:default_llm_id, :name, :id)
-            .each do |llm_name, name, id|
+            .each { |llm_id, name, id| rval[llm_id] << { type: :ai_persona, name: name, id: id } }
              llm_id = llm_name.split(":").last.to_i
              rval[llm_id] << { type: :ai_persona, name: name, id: id }
            end
        end
        if SiteSetting.ai_helper_enabled
@ -50,6 +47,26 @@ module DiscourseAi
        true
      end
      # returns an array of hashes (id: , name:, vision_enabled:)
      def self.values_for_serialization(allowed_seeded_llm_ids: nil)
        builder = DB.build(<<~SQL)
          SELECT id, display_name AS name, vision_enabled
          FROM llm_models
          /*where*/
        SQL
        if allowed_seeded_llm_ids.is_a?(Array) && !allowed_seeded_llm_ids.empty?
          builder.where(
            "id > 0 OR id IN (:allowed_seeded_llm_ids)",
            allowed_seeded_llm_ids: allowed_seeded_llm_ids,
          )
        else
          builder.where("id > 0")
        end
        builder.query_hash.map(&:symbolize_keys)
      end
      def self.values(allowed_seeded_llms: nil)
        values = DB.query_hash(<<~SQL).map(&:symbolize_keys)
          SELECT display_name AS name, id AS value
--- a/lib/discord/bot/persona_replier.rb
+++ b/lib/discord/bot/persona_replier.rb
@ -13,7 +13,7 @@ module DiscourseAi
          DiscourseAi::AiBot::Bot.as(
            Discourse.system_user,
            persona: @persona,
-            model: @persona.class.default_llm,
+            model: LlmModel.find(@persona.class.default_llm_id),
          )
        super(body)
      end
--- a/lib/utils/image_to_text.rb
+++ b/lib/utils/image_to_text.rb
@ -0,0 +1,213 @@
 # frozen_string_literal: true
 class DiscourseAi::Utils::ImageToText
  BACKOFF_SECONDS = [5, 30, 60]
  MAX_IMAGE_SIZE = 10.megabytes
  class Reader
    def initialize(uploads:, llm_model:, user:)
      @uploads = uploads
      @llm_model = llm_model
      @user = user
      @buffer = +""
      @to_process = uploads.dup
    end
    # return nil if no more data
    def read(length)
      # for implementation simplicity we will process one image at a time
      if !@buffer.empty?
        part = @buffer.slice!(0, length)
        return part
      end
      return nil if @to_process.empty?
      upload = @to_process.shift
      extractor =
        DiscourseAi::Utils::ImageToText.new(upload: upload, llm_model: @llm_model, user: @user)
      extractor.extract_text do |chunk, error|
        if error
          Discourse.warn_exception(
            error,
            message: "Discourse AI: Failed to extract text from image",
          )
        else
          # this introduces chunk markers so discourse rag ingestion requires no overlaps
          @buffer << "\n[[metadata ]]\n"
          @buffer << chunk
        end
      end
      read(length)
    end
  end
  def self.as_fake_file(uploads:, llm_model:, user:)
    # given our implementation for extracting text expect a file, return a simple object that can simulate read(size)
    # and stream content
    Reader.new(uploads: uploads, llm_model: llm_model, user: user)
  end
  attr_reader :upload, :llm_model, :user
  def initialize(upload:, llm_model:, user:)
    @upload = upload
    @llm_model = llm_model
    @user = user
  end
  def extract_text(retries: 3)
    uploads ||= @uploaded_pages
    raise "must specify a block" if !block_given?
    extracted = nil
    error = nil
    backoff = BACKOFF_SECONDS.dup
    retries.times do
      seconds = nil
      begin
        extracted = extract_text_from_page(upload)
        break
      rescue => e
        error = e
        seconds = backoff.shift || seconds
        sleep(seconds)
      end
    end
    if extracted
      extracted.each { |chunk| yield(chunk) }
    else
      yield(nil, error)
    end
    extracted || []
  end
  private
  def system_message
    <<~MSG
      OCR the following page into Markdown. Tables should be formatted as Github flavored markdown.
      Do not surround your output with triple backticks.
      Chunk the document into sections of roughly 250 - 1000 words. Our goal is to identify parts of the page with same semantic theme. These chunks will be embedded and used in a RAG pipeline.
      Always prefer returning text in Markdown vs HTML.
      Describe all the images and graphs you encounter.
      Only return text that will assist in the querying of data. Omit text such as "I had trouble recognizing images" and so on.
      Surround the chunks with <chunk> </chunk> html tags.
    MSG
  end
  def extract_text_from_page(page)
    raw_text = extract_text_with_tesseract(page)
    llm = llm_model.to_llm
    if raw_text.present?
      messages = [
        {
          type: :user,
          content:
            "The following text was extracted from an image using OCR. Please enhance, correct, and structure this content while maintaining the original meaning:\n\n#{raw_text}",
          upload_ids: [page.id],
        },
      ]
    else
      messages = [
        { type: :user, content: "Please OCR the content in the image.", upload_ids: [page.id] },
      ]
    end
    prompt = DiscourseAi::Completions::Prompt.new(system_message, messages: messages)
    result = llm.generate(prompt, user: Discourse.system_user)
    extract_chunks(result)
  end
  def extract_text_with_tesseract(page)
    upload_path =
      if page.local?
        Discourse.store.path_for(page)
      else
        Discourse.store.download_safe(page, max_file_size_kb: MAX_IMAGE_SIZE)&.path
      end
    return "" if !upload_path || !File.exist?(upload_path)
    tmp_output_file = Tempfile.new(%w[tesseract_output .txt])
    tmp_output = tmp_output_file.path
    tmp_output_file.unlink
    command = [
      "tesseract",
      upload_path,
      tmp_output.sub(/\.txt$/, ""), # Tesseract adds .txt automatically
    ]
    success =
      Discourse::Utils.execute_command(
        *command,
        timeout: 20.seconds,
        failure_message: "Failed to OCR image with Tesseract",
      )
    if success && File.exist?("#{tmp_output}")
      text = File.read("#{tmp_output}")
      begin
        File.delete("#{tmp_output}")
      rescue StandardError
        nil
      end
      text.strip
    else
      Rails.logger.error("Tesseract OCR failed for #{upload_path}")
      ""
    end
  rescue => e
    Rails.logger.error("Error during OCR processing: #{e.message}")
    ""
  end
  def extract_chunks(text)
    return [] if text.nil? || text.empty?
    if text.include?("<chunk>") && text.include?("</chunk>")
      chunks = []
      remaining_text = text.dup
      while remaining_text.length > 0
        if remaining_text.start_with?("<chunk>")
          # Extract chunk content
          chunk_end = remaining_text.index("</chunk>")
          if chunk_end
            chunk = remaining_text[7..chunk_end - 1].strip
            chunks << chunk unless chunk.empty?
            remaining_text = remaining_text[chunk_end + 8..-1] || ""
          else
            # Malformed chunk - add remaining text and break
            chunks << remaining_text[7..-1].strip
            break
          end
        else
          # Handle text before next chunk if it exists
          next_chunk = remaining_text.index("<chunk>")
          if next_chunk
            text_before = remaining_text[0...next_chunk].strip
            chunks << text_before unless text_before.empty?
            remaining_text = remaining_text[next_chunk..-1]
          else
            # No more chunks - add remaining text and break
            chunks << remaining_text.strip
            break
          end
        end
      end
      return chunks.reject(&:empty?)
    end
    [text]
  end
 end
--- a/lib/utils/pdf_to_images.rb
+++ b/lib/utils/pdf_to_images.rb
@ -0,0 +1,80 @@
 # frozen_string_literal: true
 class DiscourseAi::Utils::PdfToImages
  MAX_PDF_SIZE = 100.megabytes
  # this is long, mutool can be faster than magick, 10 minutes will be enough for quite large pdfs
  MAX_CONVERT_SECONDS = 600
  BACKOFF_SECONDS = [5, 30, 60]
  attr_reader :upload, :user
  def initialize(upload:, user:)
    @upload = upload
    @user = user
    @uploaded_pages = UploadReference.where(target: upload).map(&:upload).presence
  end
  def uploaded_pages
    @uploaded_pages ||= extract_pages
  end
  def extract_pages
    Dir.mktmpdir("discourse-pdf-#{SecureRandom.hex(8)}")
    begin
      pdf_path =
        if upload.local?
          Discourse.store.path_for(upload)
        else
          Discourse.store.download_safe(upload, max_file_size_kb: MAX_PDF_SIZE)&.path
        end
      raise Discourse::InvalidParameters.new("Failed to download PDF") if pdf_path.nil?
      temp_pdf = File.join(temp_dir, "source.pdf")
      FileUtils.cp(pdf_path, temp_pdf)
      # Convert PDF to individual page images
      output_pattern = File.join(temp_dir, "page-%04d.png")
      command = [
        "magick",
        "-density",
        "300",
        temp_pdf,
        "-background",
        "white",
        "-auto-orient",
        "-quality",
        "85",
        output_pattern,
      ]
      Discourse::Utils.execute_command(
        *command,
        failure_message: "Failed to convert PDF to images",
        timeout: MAX_CONVERT_SECONDS,
      )
      uploads = []
      Dir
        .glob(File.join(temp_dir, "page-*.png"))
        .sort
        .each do |page_path|
          upload =
            UploadCreator.new(File.open(page_path), "page-#{File.basename(page_path)}").create_for(
              @user.id,
            )
          uploads << upload
        end
      # Create upload references
      UploadReference.ensure_exist!(upload_ids: uploads.map(&:id), target: @upload)
      @uploaded_pages = uploads
    ensure
      FileUtils.rm_rf(temp_dir)
    end
  end
 end
--- a/spec/configuration/llm_enumerator_spec.rb
+++ b/spec/configuration/llm_enumerator_spec.rb
@ -2,6 +2,39 @@
 RSpec.describe DiscourseAi::Configuration::LlmEnumerator do
  fab!(:fake_model)
  fab!(:llm_model)
  fab!(:seeded_model)
  describe "#values_for_serialization" do
    it "returns an array for that can be used for serialization" do
      fake_model.destroy!
      expect(described_class.values_for_serialization).to eq(
        [
          {
            id: llm_model.id,
            name: llm_model.display_name,
            vision_enabled: llm_model.vision_enabled,
          },
        ],
      )
      expect(
        described_class.values_for_serialization(allowed_seeded_llm_ids: [seeded_model.id.to_s]),
      ).to contain_exactly(
        {
          id: seeded_model.id,
          name: seeded_model.display_name,
          vision_enabled: seeded_model.vision_enabled,
        },
        {
          id: llm_model.id,
          name: llm_model.display_name,
          vision_enabled: llm_model.vision_enabled,
        },
      )
    end
  end
  describe "#global_usage" do
    before do
--- a/spec/jobs/regular/digest_rag_upload_spec.rb
+++ b/spec/jobs/regular/digest_rag_upload_spec.rb
@ -2,8 +2,8 @@
 RSpec.describe Jobs::DigestRagUpload do
  fab!(:persona) { Fabricate(:ai_persona) }
-  fab!(:upload)
+  fab!(:upload) { Fabricate(:upload, extension: "txt") }
-
+  fab!(:pdf_upload) { Fabricate(:upload, extension: "pdf") }
  let(:document_file) { StringIO.new("some text" * 200) }
  fab!(:cloudflare_embedding_def)
@ -31,6 +31,19 @@ RSpec.describe Jobs::DigestRagUpload do
  end
  describe "#execute" do
    context "when processing a PDF upload" do
      it "will reject the indexing if the site setting is not enabled" do
        SiteSetting.ai_rag_pdf_images_enabled = false
        expect {
          described_class.new.execute(
            upload_id: pdf_upload.id,
            target_id: persona.id,
            target_type: persona.class.to_s,
          )
        }.to raise_error(Discourse::InvalidAccess)
      end
    end
    context "when processing an upload containing metadata" do
      it "correctly splits on metadata boundary" do
        # be explicit here about chunking strategy
--- a/spec/jobs/regular/stream_discord_reply_spec.rb
+++ b/spec/jobs/regular/stream_discord_reply_spec.rb
@ -13,9 +13,12 @@ RSpec.describe Jobs::StreamDiscordReply, type: :job do
    }.to_json.to_s
  end
  fab!(:llm_model)
  fab!(:persona) { Fabricate(:ai_persona, default_llm_id: llm_model.id) }
  before do
    SiteSetting.ai_discord_search_mode = "persona"
-    SiteSetting.ai_discord_search_persona = -1
+    SiteSetting.ai_discord_search_persona = persona.id
  end
  it "calls PersonaReplier when search mode is persona" do
--- a/spec/lib/discord/bot/persona_replier_spec.rb
+++ b/spec/lib/discord/bot/persona_replier_spec.rb
@ -8,8 +8,11 @@ RSpec.describe DiscourseAi::Discord::Bot::PersonaReplier do
  end
  let(:persona_replier) { described_class.new(interaction_body) }
  fab!(:llm_model)
  fab!(:persona) { Fabricate(:ai_persona, default_llm_id: llm_model.id) }
  before do
-    SiteSetting.ai_discord_search_persona = "-1"
+    SiteSetting.ai_discord_search_persona = persona.id.to_s
    allow_any_instance_of(DiscourseAi::AiBot::Bot).to receive(:reply).and_return(
      "This is a reply from bot!",
    )
--- a/spec/lib/modules/ai_bot/entry_point_spec.rb
+++ b/spec/lib/modules/ai_bot/entry_point_spec.rb
@ -43,7 +43,7 @@ RSpec.describe DiscourseAi::AiBot::EntryPoint do
            :ai_persona,
            enabled: true,
            allowed_group_ids: [bot_allowed_group.id],
-            default_llm: "claude-2",
+            default_llm_id: claude_2.id,
            force_default_llm: true,
          )
        persona.create_user!
--- a/spec/lib/modules/ai_bot/personas/persona_spec.rb
+++ b/spec/lib/modules/ai_bot/personas/persona_spec.rb
@ -8,7 +8,6 @@ class TestPersona < DiscourseAi::AiBot::Personas::Persona
      DiscourseAi::AiBot::Tools::Image,
    ]
  end
  def system_prompt
    <<~PROMPT
      {site_url}
@ -337,7 +336,7 @@ RSpec.describe DiscourseAi::AiBot::Personas::Persona do
            name: "custom",
            rag_conversation_chunks: 3,
            allowed_group_ids: [Group::AUTO_GROUPS[:trust_level_0]],
-            question_consolidator_llm: "custom:#{llm_model.id}",
+            question_consolidator_llm_id: llm_model.id,
          )
        UploadReference.ensure_exist!(target: custom_ai_persona, upload_ids: [upload.id])
--- a/spec/lib/modules/ai_bot/playground_spec.rb
+++ b/spec/lib/modules/ai_bot/playground_spec.rb
@ -241,7 +241,7 @@ RSpec.describe DiscourseAi::AiBot::Playground do
        system_prompt: "You are a helpful bot",
        vision_enabled: true,
        vision_max_pixels: 1_000,
-        default_llm: "custom:#{opus_model.id}",
+        default_llm_id: opus_model.id,
        allow_topic_mentions: true,
      )
    end
@ -293,7 +293,7 @@ RSpec.describe DiscourseAi::AiBot::Playground do
      persona.create_user!
      persona.update!(
-        default_llm: "custom:#{claude_2.id}",
+        default_llm_id: claude_2.id,
        allow_chat_channel_mentions: true,
        allow_topic_mentions: true,
      )
@ -313,7 +313,7 @@ RSpec.describe DiscourseAi::AiBot::Playground do
        SiteSetting.ai_bot_enabled = true
        SiteSetting.chat_allowed_groups = "#{Group::AUTO_GROUPS[:trust_level_0]}"
        Group.refresh_automatic_groups!
-        persona.update!(allow_chat_channel_mentions: true, default_llm: "custom:#{opus_model.id}")
+        persona.update!(allow_chat_channel_mentions: true, default_llm_id: opus_model.id)
      end
      it "should behave in a sane way when threading is enabled" do
@ -428,7 +428,7 @@ RSpec.describe DiscourseAi::AiBot::Playground do
          allow_chat_direct_messages: true,
          allow_topic_mentions: false,
          allow_chat_channel_mentions: false,
-          default_llm: "custom:#{opus_model.id}",
+          default_llm_id: opus_model.id,
        )
        SiteSetting.ai_bot_enabled = true
      end
@ -629,7 +629,7 @@ RSpec.describe DiscourseAi::AiBot::Playground do
      post = nil
      DiscourseAi::Completions::Llm.with_prepared_responses(
        ["Yes I can", "Magic Title"],
-        llm: "custom:#{claude_2.id}",
+        llm: claude_2,
      ) do
        post =
          create_post(
@ -648,10 +648,7 @@ RSpec.describe DiscourseAi::AiBot::Playground do
      llm2.toggle_companion_user
-      DiscourseAi::Completions::Llm.with_prepared_responses(
+      DiscourseAi::Completions::Llm.with_prepared_responses(["Hi from bot two"], llm: llm2) do
        ["Hi from bot two"],
        llm: "custom:#{llm2.id}",
      ) do
        create_post(
          user: admin,
          raw: "hi @#{llm2.user.username.capitalize} how are you",
@ -664,12 +661,9 @@ RSpec.describe DiscourseAi::AiBot::Playground do
      expect(last_post.user_id).to eq(persona.user_id)
      # tether llm, so it can no longer be switched
-      persona.update!(force_default_llm: true, default_llm: "custom:#{claude_2.id}")
+      persona.update!(force_default_llm: true, default_llm_id: claude_2.id)
-      DiscourseAi::Completions::Llm.with_prepared_responses(
+      DiscourseAi::Completions::Llm.with_prepared_responses(["Hi from bot one"], llm: claude_2) do
        ["Hi from bot one"],
        llm: "custom:#{claude_2.id}",
      ) do
        create_post(
          user: admin,
          raw: "hi @#{llm2.user.username.capitalize} how are you",
@ -689,7 +683,7 @@ RSpec.describe DiscourseAi::AiBot::Playground do
      DiscourseAi::Completions::Llm.with_prepared_responses(
        ["Yes I can", "Magic Title"],
-        llm: "custom:#{claude_2.id}",
+        llm: claude_2,
      ) do
        post =
          create_post(
@ -731,11 +725,11 @@ RSpec.describe DiscourseAi::AiBot::Playground do
      toggle_enabled_bots(bots: [gpt_35_turbo, claude_2])
      post = nil
-      persona.update!(force_default_llm: true, default_llm: "custom:#{gpt_35_turbo.id}")
+      persona.update!(force_default_llm: true, default_llm_id: gpt_35_turbo.id)
      DiscourseAi::Completions::Llm.with_prepared_responses(
        ["Yes I can", "Magic Title"],
-        llm: "custom:#{gpt_35_turbo.id}",
+        llm: gpt_35_turbo,
      ) do
        post =
          create_post(
@ -768,7 +762,7 @@ RSpec.describe DiscourseAi::AiBot::Playground do
      DiscourseAi::Completions::Llm.with_prepared_responses(
        ["Yes I can", "Magic Title"],
-        llm: "custom:#{gpt_35_turbo.id}",
+        llm: gpt_35_turbo,
      ) do
        messages =
          MessageBus.track_publish do
@ -805,10 +799,7 @@ RSpec.describe DiscourseAi::AiBot::Playground do
      )
      # replies as correct persona if replying direct to persona
-      DiscourseAi::Completions::Llm.with_prepared_responses(
+      DiscourseAi::Completions::Llm.with_prepared_responses(["Another reply"], llm: gpt_35_turbo) do
        ["Another reply"],
        llm: "custom:#{gpt_35_turbo.id}",
      ) do
        create_post(
          raw: "Please ignore this bot, I am replying to a user",
          topic: post.topic,
--- a/spec/models/ai_persona_spec.rb
+++ b/spec/models/ai_persona_spec.rb
@ -1,6 +1,9 @@
 # frozen_string_literal: true
 RSpec.describe AiPersona do
  fab!(:llm_model)
  fab!(:seeded_llm_model) { Fabricate(:llm_model, id: -1) }
  it "validates context settings" do
    persona =
      AiPersona.new(
@ -118,7 +121,7 @@ RSpec.describe AiPersona do
    forum_helper = AiPersona.find_by(name: "Forum Helper")
    forum_helper.update!(
      user_id: 1,
-      default_llm: "anthropic:claude-2",
+      default_llm_id: llm_model.id,
      max_context_posts: 3,
      allow_topic_mentions: true,
      allow_personal_messages: true,
@ -133,7 +136,7 @@ RSpec.describe AiPersona do
    # tl 0 by default
    expect(klass.allowed_group_ids).to eq([10])
    expect(klass.user_id).to eq(1)
-    expect(klass.default_llm).to eq("anthropic:claude-2")
+    expect(klass.default_llm_id).to eq(llm_model.id)
    expect(klass.max_context_posts).to eq(3)
    expect(klass.allow_topic_mentions).to eq(true)
    expect(klass.allow_personal_messages).to eq(true)
@ -149,7 +152,7 @@ RSpec.describe AiPersona do
        system_prompt: "test",
        tools: [],
        allowed_group_ids: [],
-        default_llm: "anthropic:claude-2",
+        default_llm_id: llm_model.id,
        max_context_posts: 3,
        allow_topic_mentions: true,
        allow_personal_messages: true,
@ -164,7 +167,7 @@ RSpec.describe AiPersona do
    expect(klass.system).to eq(false)
    expect(klass.allowed_group_ids).to eq([])
    expect(klass.user_id).to eq(1)
-    expect(klass.default_llm).to eq("anthropic:claude-2")
+    expect(klass.default_llm_id).to eq(llm_model.id)
    expect(klass.max_context_posts).to eq(3)
    expect(klass.allow_topic_mentions).to eq(true)
    expect(klass.allow_personal_messages).to eq(true)
@ -227,10 +230,9 @@ RSpec.describe AiPersona do
        system_prompt: "test",
        tools: [],
        allowed_group_ids: [],
-        default_llm: "seeded_model:-1",
+        default_llm_id: seeded_llm_model.id,
      )
    llm_model = Fabricate(:llm_model, id: -1)
    SiteSetting.ai_bot_allowed_seeded_models = ""
    expect(persona.valid?).to eq(false)
--- a/spec/requests/admin/ai_llms_controller_spec.rb
+++ b/spec/requests/admin/ai_llms_controller_spec.rb
@ -16,7 +16,7 @@ RSpec.describe DiscourseAi::Admin::AiLlmsController do
        :ai_persona,
        name: "Cool persona",
        force_default_llm: true,
-        default_llm: "custom:#{llm_model2.id}",
+        default_llm_id: llm_model2.id,
      )
    end
--- a/spec/requests/admin/ai_personas_controller_spec.rb
+++ b/spec/requests/admin/ai_personas_controller_spec.rb
@ -4,6 +4,7 @@ RSpec.describe DiscourseAi::Admin::AiPersonasController do
  fab!(:admin)
  fab!(:ai_persona)
  fab!(:embedding_definition)
  fab!(:llm_model)
  before do
    sign_in(admin)
@ -27,9 +28,13 @@ RSpec.describe DiscourseAi::Admin::AiPersonasController do
      expect(response).to be_successful
      expect(response.parsed_body["meta"]["llms"]).to eq(
-        DiscourseAi::Configuration::LlmEnumerator.values.map do |hash|
+        [
-          { "id" => hash[:value], "name" => hash[:name] }
+          {
-        end,
+            id: llm_model.id,
            name: llm_model.display_name,
            vision_enabled: llm_model.vision_enabled,
          }.stringify_keys,
        ],
      )
    end
@ -44,7 +49,8 @@ RSpec.describe DiscourseAi::Admin::AiPersonasController do
          allow_personal_messages: true,
          allow_chat_channel_mentions: true,
          allow_chat_direct_messages: true,
-          default_llm: "anthropic:claude-2",
+          default_llm_id: llm_model.id,
          question_consolidator_llm_id: llm_model.id,
          forced_tool_count: 2,
        )
      persona2.create_user!
@ -60,7 +66,8 @@ RSpec.describe DiscourseAi::Admin::AiPersonasController do
      expect(serializer_persona2["allow_chat_channel_mentions"]).to eq(true)
      expect(serializer_persona2["allow_chat_direct_messages"]).to eq(true)
-      expect(serializer_persona2["default_llm"]).to eq("anthropic:claude-2")
+      expect(serializer_persona2["default_llm_id"]).to eq(llm_model.id)
      expect(serializer_persona2["question_consolidator_llm_id"]).to eq(llm_model.id)
      expect(serializer_persona2["user_id"]).to eq(persona2.user_id)
      expect(serializer_persona2["user"]["id"]).to eq(persona2.user_id)
      expect(serializer_persona2["forced_tool_count"]).to eq(2)
@ -178,7 +185,8 @@ RSpec.describe DiscourseAi::Admin::AiPersonasController do
          allow_personal_messages: true,
          allow_chat_channel_mentions: true,
          allow_chat_direct_messages: true,
-          default_llm: "anthropic:claude-2",
+          default_llm_id: llm_model.id,
          question_consolidator_llm_id: llm_model.id,
          forced_tool_count: 2,
        }
      end
@ -190,18 +198,20 @@ RSpec.describe DiscourseAi::Admin::AiPersonasController do
               headers: {
                 "CONTENT_TYPE" => "application/json",
               }
          expect(response).to be_successful
          persona_json = response.parsed_body["ai_persona"]
          expect(persona_json["name"]).to eq("superbot")
          expect(persona_json["top_p"]).to eq(0.1)
          expect(persona_json["temperature"]).to eq(0.5)
-          expect(persona_json["default_llm"]).to eq("anthropic:claude-2")
+          expect(persona_json["default_llm_id"]).to eq(llm_model.id)
          expect(persona_json["forced_tool_count"]).to eq(2)
          expect(persona_json["allow_topic_mentions"]).to eq(true)
          expect(persona_json["allow_personal_messages"]).to eq(true)
          expect(persona_json["allow_chat_channel_mentions"]).to eq(true)
          expect(persona_json["allow_chat_direct_messages"]).to eq(true)
          expect(persona_json["question_consolidator_llm_id"]).to eq(llm_model.id)
          persona = AiPersona.find(persona_json["id"])
@ -258,6 +268,8 @@ RSpec.describe DiscourseAi::Admin::AiPersonasController do
              rag_chunk_tokens: "102",
              rag_chunk_overlap_tokens: "12",
              rag_conversation_chunks: "13",
              rag_llm_model_id: llm_model.id,
              question_consolidator_llm_id: llm_model.id,
            },
          }
@ -267,6 +279,8 @@ RSpec.describe DiscourseAi::Admin::AiPersonasController do
      expect(persona.rag_chunk_tokens).to eq(102)
      expect(persona.rag_chunk_overlap_tokens).to eq(12)
      expect(persona.rag_conversation_chunks).to eq(13)
      expect(persona.rag_llm_model_id).to eq(llm_model.id)
      expect(persona.question_consolidator_llm_id).to eq(llm_model.id)
    end
    it "supports updating vision params" do
@ -424,7 +438,7 @@ RSpec.describe DiscourseAi::Admin::AiPersonasController do
    end
    it "ensures question exists" do
-      ai_persona.update!(default_llm: "custom:#{llm.id}")
+      ai_persona.update!(default_llm_id: llm.id)
      post "/admin/plugins/discourse-ai/ai-personas/stream-reply.json",
           params: {
@ -436,7 +450,7 @@ RSpec.describe DiscourseAi::Admin::AiPersonasController do
    end
    it "ensure persona has a user specified" do
-      ai_persona.update!(default_llm: "custom:#{llm.id}")
+      ai_persona.update!(default_llm_id: llm.id)
      post "/admin/plugins/discourse-ai/ai-personas/stream-reply.json",
           params: {
@ -498,7 +512,7 @@ RSpec.describe DiscourseAi::Admin::AiPersonasController do
      ai_persona.create_user!
      ai_persona.update!(
        allowed_group_ids: [Group::AUTO_GROUPS[:trust_level_0]],
-        default_llm: "custom:#{llm.id}",
+        default_llm_id: llm.id,
        allow_personal_messages: true,
        system_prompt: "you are a helpful bot",
      )
--- a/spec/requests/admin/ai_tools_controller_spec.rb
+++ b/spec/requests/admin/ai_tools_controller_spec.rb
@ -1,6 +1,7 @@
 # frozen_string_literal: true
 RSpec.describe DiscourseAi::Admin::AiToolsController do
  fab!(:llm_model)
  fab!(:admin)
  fab!(:ai_tool) do
    AiTool.create!(
@ -33,6 +34,7 @@ RSpec.describe DiscourseAi::Admin::AiToolsController do
      expect(response).to be_successful
      expect(response.parsed_body["ai_tools"].length).to eq(AiTool.count)
      expect(response.parsed_body["meta"]["presets"].length).to be > 0
      expect(response.parsed_body["meta"]["llms"].length).to be > 0
    end
  end
--- a/spec/requests/admin/rag_document_fragments_controller_spec.rb
+++ b/spec/requests/admin/rag_document_fragments_controller_spec.rb
@ -12,6 +12,8 @@ RSpec.describe DiscourseAi::Admin::RagDocumentFragmentsController do
    SiteSetting.ai_embeddings_enabled = true
  end
  after { @cleanup_files&.each(&:unlink) }
  describe "GET #indexing_status_check" do
    it "works for AiPersona" do
      get "/admin/plugins/discourse-ai/rag-document-fragments/files/status.json?target_type=AiPersona&target_id=#{ai_persona.id}"
@ -22,6 +24,15 @@ RSpec.describe DiscourseAi::Admin::RagDocumentFragmentsController do
  end
  describe "POST #upload_file" do
    let :fake_pdf do
      @cleanup_files ||= []
      tempfile = Tempfile.new(%w[test .pdf])
      tempfile.write("fake pdf")
      tempfile.rewind
      @cleanup_files << tempfile
      tempfile
    end
    it "works" do
      post "/admin/plugins/discourse-ai/rag-document-fragments/files/upload.json",
           params: {
@ -33,5 +44,28 @@ RSpec.describe DiscourseAi::Admin::RagDocumentFragmentsController do
      upload = Upload.last
      expect(upload.original_filename).to end_with("spec.txt")
    end
    it "rejects PDF files if site setting is not enabled" do
      SiteSetting.ai_rag_pdf_images_enabled = false
      post "/admin/plugins/discourse-ai/rag-document-fragments/files/upload.json",
           params: {
             file: Rack::Test::UploadedFile.new(fake_pdf),
           }
      expect(response.status).to eq(400)
    end
    it "allows PDF files if site setting is enabled" do
      SiteSetting.ai_rag_pdf_images_enabled = true
      post "/admin/plugins/discourse-ai/rag-document-fragments/files/upload.json",
           params: {
             file: Rack::Test::UploadedFile.new(fake_pdf),
           }
      upload = Upload.last
      expect(upload.original_filename).to end_with(".pdf")
    end
  end
 end
--- a/spec/system/ai_bot/ai_bot_helper_spec.rb
+++ b/spec/system/ai_bot/ai_bot_helper_spec.rb
@ -39,7 +39,7 @@ RSpec.describe "AI chat channel summarization", type: :system, js: true do
    # lets disable bots but still allow 1 persona
    allowed_persona.create_user!
-    allowed_persona.update!(default_llm: "custom:#{gpt_4.id}")
+    allowed_persona.update!(default_llm_id: gpt_4.id)
    gpt_4.update!(enabled_chat_bot: false)
    gpt_3_5_turbo.update!(enabled_chat_bot: false)
--- a/test/javascripts/unit/models/ai-persona-test.js
+++ b/test/javascripts/unit/models/ai-persona-test.js
@ -37,7 +37,7 @@ module("Discourse AI | Unit | Model | ai-persona", function () {
      description: "Description",
      top_p: 0.8,
      temperature: 0.7,
-      default_llm: "Default LLM",
+      default_llm_id: 1,
      force_default_llm: false,
      user: null,
      user_id: null,
@ -48,7 +48,8 @@ module("Discourse AI | Unit | Model | ai-persona", function () {
      rag_chunk_tokens: 374,
      rag_chunk_overlap_tokens: 10,
      rag_conversation_chunks: 10,
-      question_consolidator_llm: "Question Consolidator LLM",
+      rag_llm_model_id: 1,
      question_consolidator_llm_id: 2,
      allow_chat: false,
      tool_details: true,
      forced_tool_count: -1,
@ -85,7 +86,7 @@ module("Discourse AI | Unit | Model | ai-persona", function () {
      temperature: 0.7,
      user: null,
      user_id: null,
-      default_llm: "Default LLM",
+      default_llm_id: 1,
      max_context_posts: 5,
      vision_enabled: true,
      vision_max_pixels: 100,
@ -93,7 +94,7 @@ module("Discourse AI | Unit | Model | ai-persona", function () {
      rag_chunk_tokens: 374,
      rag_chunk_overlap_tokens: 10,
      rag_conversation_chunks: 10,
-      question_consolidator_llm: "Question Consolidator LLM",
+      question_consolidator_llm_id: 2,
      allow_chat: false,
      tool_details: true,
      forced_tool_count: -1,
@ -102,6 +103,7 @@ module("Discourse AI | Unit | Model | ai-persona", function () {
      allow_chat_channel_mentions: true,
      allow_chat_direct_messages: true,
      force_default_llm: false,
      rag_llm_model_id: 1,
    };
    const aiPersona = AiPersona.create({ ...properties });