diff --git a/.gitignore b/.gitignore index 3b519490..e0744210 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ node_modules /gems /auto_generated .env +evals/log +evals/cases diff --git a/admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-tools-edit.js b/admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-tools-edit.js index 7ba40586..65b3f7f3 100644 --- a/admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-tools-edit.js +++ b/admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-tools-edit.js @@ -14,5 +14,7 @@ export default class DiscourseAiToolsEditRoute extends DiscourseRoute { controller.set("allTools", toolsModel); controller.set("presets", toolsModel.resultSetMeta.presets); + controller.set("llms", toolsModel.resultSetMeta.llms); + controller.set("settings", toolsModel.resultSetMeta.settings); } } diff --git a/admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-tools-new.js b/admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-tools-new.js index 5c05907f..1e645097 100644 --- a/admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-tools-new.js +++ b/admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-tools-new.js @@ -11,5 +11,7 @@ export default class DiscourseAiToolsNewRoute extends DiscourseRoute { controller.set("allTools", toolsModel); controller.set("presets", toolsModel.resultSetMeta.presets); + controller.set("llms", toolsModel.resultSetMeta.llms); + controller.set("settings", toolsModel.resultSetMeta.settings); } } diff --git a/admin/assets/javascripts/discourse/templates/admin-plugins/show/discourse-ai-tools/edit.hbs b/admin/assets/javascripts/discourse/templates/admin-plugins/show/discourse-ai-tools/edit.hbs index 7fcc45c5..444f6966 100644 --- a/admin/assets/javascripts/discourse/templates/admin-plugins/show/discourse-ai-tools/edit.hbs +++ b/admin/assets/javascripts/discourse/templates/admin-plugins/show/discourse-ai-tools/edit.hbs @@ -3,5 +3,7 @@ @tools={{this.allTools}} @model={{this.model}} @presets={{this.presets}} + @llms={{this.llms}} + @settings={{this.settings}} /> \ No newline at end of file diff --git a/admin/assets/javascripts/discourse/templates/admin-plugins/show/discourse-ai-tools/new.hbs b/admin/assets/javascripts/discourse/templates/admin-plugins/show/discourse-ai-tools/new.hbs index 7fcc45c5..444f6966 100644 --- a/admin/assets/javascripts/discourse/templates/admin-plugins/show/discourse-ai-tools/new.hbs +++ b/admin/assets/javascripts/discourse/templates/admin-plugins/show/discourse-ai-tools/new.hbs @@ -3,5 +3,7 @@ @tools={{this.allTools}} @model={{this.model}} @presets={{this.presets}} + @llms={{this.llms}} + @settings={{this.settings}} /> \ No newline at end of file diff --git a/app/controllers/discourse_ai/admin/ai_personas_controller.rb b/app/controllers/discourse_ai/admin/ai_personas_controller.rb index b0317f02..540ac824 100644 --- a/app/controllers/discourse_ai/admin/ai_personas_controller.rb +++ b/app/controllers/discourse_ai/admin/ai_personas_controller.rb @@ -32,10 +32,19 @@ module DiscourseAi } end llms = - DiscourseAi::Configuration::LlmEnumerator - .values(allowed_seeded_llms: SiteSetting.ai_bot_allowed_seeded_models) - .map { |hash| { id: hash[:value], name: hash[:name] } } - render json: { ai_personas: ai_personas, meta: { tools: tools, llms: llms } } + DiscourseAi::Configuration::LlmEnumerator.values_for_serialization( + allowed_seeded_llm_ids: SiteSetting.ai_bot_allowed_seeded_models_map, + ) + render json: { + ai_personas: ai_personas, + meta: { + tools: tools, + llms: llms, + settings: { + rag_pdf_images_enabled: SiteSetting.ai_rag_pdf_images_enabled, + }, + }, + } end def new @@ -187,7 +196,7 @@ module DiscourseAi :priority, :top_p, :temperature, - :default_llm, + :default_llm_id, :user_id, :max_context_posts, :vision_enabled, @@ -195,7 +204,8 @@ module DiscourseAi :rag_chunk_tokens, :rag_chunk_overlap_tokens, :rag_conversation_chunks, - :question_consolidator_llm, + :rag_llm_model_id, + :question_consolidator_llm_id, :allow_chat_channel_mentions, :allow_chat_direct_messages, :allow_topic_mentions, diff --git a/app/controllers/discourse_ai/admin/ai_tools_controller.rb b/app/controllers/discourse_ai/admin/ai_tools_controller.rb index fc7b2984..caf7fae6 100644 --- a/app/controllers/discourse_ai/admin/ai_tools_controller.rb +++ b/app/controllers/discourse_ai/admin/ai_tools_controller.rb @@ -90,6 +90,7 @@ module DiscourseAi :summary, :rag_chunk_tokens, :rag_chunk_overlap_tokens, + :rag_llm_model_id, rag_uploads: [:id], parameters: [:name, :type, :description, :required, enum: []], ) diff --git a/app/controllers/discourse_ai/admin/rag_document_fragments_controller.rb b/app/controllers/discourse_ai/admin/rag_document_fragments_controller.rb index 732a28fb..d27f4a82 100644 --- a/app/controllers/discourse_ai/admin/rag_document_fragments_controller.rb +++ b/app/controllers/discourse_ai/admin/rag_document_fragments_controller.rb @@ -49,6 +49,7 @@ module DiscourseAi def validate_extension!(filename) extension = File.extname(filename)[1..-1] || "" authorized_extensions = %w[txt md] + authorized_extensions.concat(%w[pdf png jpg jpeg]) if SiteSetting.ai_rag_pdf_images_enabled if !authorized_extensions.include?(extension) raise Discourse::InvalidParameters.new( I18n.t( diff --git a/app/jobs/regular/digest_rag_upload.rb b/app/jobs/regular/digest_rag_upload.rb index bfc2ac4b..179660d1 100644 --- a/app/jobs/regular/digest_rag_upload.rb +++ b/app/jobs/regular/digest_rag_upload.rb @@ -28,7 +28,7 @@ module ::Jobs # Check if this is the first time we process this upload. if fragment_ids.empty? - document = get_uploaded_file(upload) + document = get_uploaded_file(upload: upload, target: target) return if document.nil? RagDocumentFragment.publish_status(upload, { total: 0, indexed: 0, left: 0 }) @@ -163,7 +163,38 @@ module ::Jobs [buffer, split_char] end - def get_uploaded_file(upload) + def get_uploaded_file(upload:, target:) + if %w[pdf png jpg jpeg].include?(upload.extension) && !SiteSetting.ai_rag_pdf_images_enabled + raise Discourse::InvalidAccess.new( + "The setting ai_rag_pdf_images_enabled is false, can not index images and pdfs.", + ) + end + if upload.extension == "pdf" + pages = + DiscourseAi::Utils::PdfToImages.new( + upload: upload, + user: Discourse.system_user, + ).uploaded_pages + + return( + DiscourseAi::Utils::ImageToText.as_fake_file( + uploads: pages, + llm_model: target.rag_llm_model, + user: Discourse.system_user, + ) + ) + end + + if %w[png jpg jpeg].include?(upload.extension) + return( + DiscourseAi::Utils::ImageToText.as_fake_file( + uploads: [upload], + llm_model: target.rag_llm_model, + user: Discourse.system_user, + ) + ) + end + store = Discourse.store @file ||= if store.external? diff --git a/app/models/ai_persona.rb b/app/models/ai_persona.rb index 2c573977..867d44ce 100644 --- a/app/models/ai_persona.rb +++ b/app/models/ai_persona.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true class AiPersona < ActiveRecord::Base - # TODO remove this line 01-1-2025 - self.ignored_columns = %i[commands allow_chat mentionable] + # TODO remove this line 01-10-2025 + self.ignored_columns = %i[default_llm question_consolidator_llm] # places a hard limit, so per site we cache a maximum of 500 classes MAX_PERSONAS_PER_SITE = 500 @@ -12,7 +12,7 @@ class AiPersona < ActiveRecord::Base validates :system_prompt, presence: true, length: { maximum: 10_000_000 } validate :system_persona_unchangeable, on: :update, if: :system validate :chat_preconditions - validate :allowed_seeded_model, if: :default_llm + validate :allowed_seeded_model, if: :default_llm_id validates :max_context_posts, numericality: { greater_than: 0 }, allow_nil: true # leaves some room for growth but sets a maximum to avoid memory issues # we may want to revisit this in the future @@ -30,6 +30,10 @@ class AiPersona < ActiveRecord::Base belongs_to :created_by, class_name: "User" belongs_to :user + belongs_to :default_llm, class_name: "LlmModel" + belongs_to :question_consolidator_llm, class_name: "LlmModel" + belongs_to :rag_llm_model, class_name: "LlmModel" + has_many :upload_references, as: :target, dependent: :destroy has_many :uploads, through: :upload_references @@ -62,7 +66,7 @@ class AiPersona < ActiveRecord::Base user_id: persona.user_id, username: persona.user.username_lower, allowed_group_ids: persona.allowed_group_ids, - default_llm: persona.default_llm, + default_llm_id: persona.default_llm_id, force_default_llm: persona.force_default_llm, allow_chat_channel_mentions: persona.allow_chat_channel_mentions, allow_chat_direct_messages: persona.allow_chat_direct_messages, @@ -157,12 +161,12 @@ class AiPersona < ActiveRecord::Base user_id system mentionable - default_llm + default_llm_id max_context_posts vision_enabled vision_max_pixels rag_conversation_chunks - question_consolidator_llm + question_consolidator_llm_id allow_chat_channel_mentions allow_chat_direct_messages allow_topic_mentions @@ -302,7 +306,7 @@ class AiPersona < ActiveRecord::Base if ( allow_chat_channel_mentions || allow_chat_direct_messages || allow_topic_mentions || force_default_llm - ) && !default_llm + ) && !default_llm_id errors.add(:default_llm, I18n.t("discourse_ai.ai_bot.personas.default_llm_required")) end end @@ -332,13 +336,12 @@ class AiPersona < ActiveRecord::Base end def allowed_seeded_model - return if default_llm.blank? + return if default_llm_id.blank? - llm = LlmModel.find_by(id: default_llm.split(":").last.to_i) - return if llm.nil? - return if !llm.seeded? + return if default_llm.nil? + return if !default_llm.seeded? - return if SiteSetting.ai_bot_allowed_seeded_models.include?(llm.id.to_s) + return if SiteSetting.ai_bot_allowed_seeded_models_map.include?(default_llm.id.to_s) errors.add(:default_llm, I18n.t("discourse_ai.llm.configuration.invalid_seeded_model")) end @@ -348,36 +351,37 @@ end # # Table name: ai_personas # -# id :bigint not null, primary key -# name :string(100) not null -# description :string(2000) not null -# system_prompt :string(10000000) not null -# allowed_group_ids :integer default([]), not null, is an Array -# created_by_id :integer -# enabled :boolean default(TRUE), not null -# created_at :datetime not null -# updated_at :datetime not null -# system :boolean default(FALSE), not null -# priority :boolean default(FALSE), not null -# temperature :float -# top_p :float -# user_id :integer -# default_llm :text -# max_context_posts :integer -# vision_enabled :boolean default(FALSE), not null -# vision_max_pixels :integer default(1048576), not null -# rag_chunk_tokens :integer default(374), not null -# rag_chunk_overlap_tokens :integer default(10), not null -# rag_conversation_chunks :integer default(10), not null -# question_consolidator_llm :text -# tool_details :boolean default(TRUE), not null -# tools :json not null -# forced_tool_count :integer default(-1), not null -# allow_chat_channel_mentions :boolean default(FALSE), not null -# allow_chat_direct_messages :boolean default(FALSE), not null -# allow_topic_mentions :boolean default(FALSE), not null -# allow_personal_messages :boolean default(TRUE), not null -# force_default_llm :boolean default(FALSE), not null +# id :bigint not null, primary key +# name :string(100) not null +# description :string(2000) not null +# system_prompt :string(10000000) not null +# allowed_group_ids :integer default([]), not null, is an Array +# created_by_id :integer +# enabled :boolean default(TRUE), not null +# created_at :datetime not null +# updated_at :datetime not null +# system :boolean default(FALSE), not null +# priority :boolean default(FALSE), not null +# temperature :float +# top_p :float +# user_id :integer +# max_context_posts :integer +# vision_enabled :boolean default(FALSE), not null +# vision_max_pixels :integer default(1048576), not null +# rag_chunk_tokens :integer default(374), not null +# rag_chunk_overlap_tokens :integer default(10), not null +# rag_conversation_chunks :integer default(10), not null +# tool_details :boolean default(TRUE), not null +# tools :json not null +# forced_tool_count :integer default(-1), not null +# allow_chat_channel_mentions :boolean default(FALSE), not null +# allow_chat_direct_messages :boolean default(FALSE), not null +# allow_topic_mentions :boolean default(FALSE), not null +# allow_personal_messages :boolean default(TRUE), not null +# force_default_llm :boolean default(FALSE), not null +# rag_llm_model_id :bigint +# default_llm_id :bigint +# question_consolidator_llm_id :bigint # # Indexes # diff --git a/app/models/ai_tool.rb b/app/models/ai_tool.rb index 97b2a983..ba3b4098 100644 --- a/app/models/ai_tool.rb +++ b/app/models/ai_tool.rb @@ -8,6 +8,7 @@ class AiTool < ActiveRecord::Base validates :script, presence: true, length: { maximum: 100_000 } validates :created_by_id, presence: true belongs_to :created_by, class_name: "User" + belongs_to :rag_llm_model, class_name: "LlmModel" has_many :rag_document_fragments, dependent: :destroy, as: :target has_many :upload_references, as: :target, dependent: :destroy has_many :uploads, through: :upload_references @@ -371,4 +372,4 @@ end # rag_chunk_tokens :integer default(374), not null # rag_chunk_overlap_tokens :integer default(10), not null # tool_name :string(100) default(""), not null -# +# rag_llm_model_id :bigint diff --git a/app/models/llm_model.rb b/app/models/llm_model.rb index 228c9335..a5f5e510 100644 --- a/app/models/llm_model.rb +++ b/app/models/llm_model.rb @@ -70,7 +70,7 @@ class LlmModel < ActiveRecord::Base end def to_llm - DiscourseAi::Completions::Llm.proxy(identifier) + DiscourseAi::Completions::Llm.proxy(self) end def identifier diff --git a/app/serializers/ai_custom_tool_list_serializer.rb b/app/serializers/ai_custom_tool_list_serializer.rb index f0e3f1cc..9f4ad25d 100644 --- a/app/serializers/ai_custom_tool_list_serializer.rb +++ b/app/serializers/ai_custom_tool_list_serializer.rb @@ -6,7 +6,13 @@ class AiCustomToolListSerializer < ApplicationSerializer has_many :ai_tools, serializer: AiCustomToolSerializer, embed: :objects def meta - { presets: AiTool.presets } + { + presets: AiTool.presets, + llms: DiscourseAi::Configuration::LlmEnumerator.values_for_serialization, + settings: { + rag_pdf_images_enabled: SiteSetting.ai_rag_pdf_images_enabled, + }, + } end def ai_tools diff --git a/app/serializers/ai_custom_tool_serializer.rb b/app/serializers/ai_custom_tool_serializer.rb index 1ac80430..2af1a89f 100644 --- a/app/serializers/ai_custom_tool_serializer.rb +++ b/app/serializers/ai_custom_tool_serializer.rb @@ -10,6 +10,7 @@ class AiCustomToolSerializer < ApplicationSerializer :script, :rag_chunk_tokens, :rag_chunk_overlap_tokens, + :rag_llm_model_id, :created_by_id, :created_at, :updated_at diff --git a/app/serializers/localized_ai_persona_serializer.rb b/app/serializers/localized_ai_persona_serializer.rb index 81f6fa85..dde41dfb 100644 --- a/app/serializers/localized_ai_persona_serializer.rb +++ b/app/serializers/localized_ai_persona_serializer.rb @@ -14,7 +14,7 @@ class LocalizedAiPersonaSerializer < ApplicationSerializer :allowed_group_ids, :temperature, :top_p, - :default_llm, + :default_llm_id, :user_id, :max_context_posts, :vision_enabled, @@ -22,7 +22,8 @@ class LocalizedAiPersonaSerializer < ApplicationSerializer :rag_chunk_tokens, :rag_chunk_overlap_tokens, :rag_conversation_chunks, - :question_consolidator_llm, + :rag_llm_model_id, + :question_consolidator_llm_id, :tool_details, :forced_tool_count, :allow_chat_channel_mentions, diff --git a/assets/javascripts/discourse/admin/models/ai-persona.js b/assets/javascripts/discourse/admin/models/ai-persona.js index 2c9c7383..18c97a7d 100644 --- a/assets/javascripts/discourse/admin/models/ai-persona.js +++ b/assets/javascripts/discourse/admin/models/ai-persona.js @@ -15,7 +15,7 @@ const CREATE_ATTRIBUTES = [ "top_p", "temperature", "user_id", - "default_llm", + "default_llm_id", "force_default_llm", "user", "max_context_posts", @@ -25,7 +25,8 @@ const CREATE_ATTRIBUTES = [ "rag_chunk_tokens", "rag_chunk_overlap_tokens", "rag_conversation_chunks", - "question_consolidator_llm", + "rag_llm_model_id", + "question_consolidator_llm_id", "allow_chat", "tool_details", "forced_tool_count", @@ -43,7 +44,7 @@ const SYSTEM_ATTRIBUTES = [ "priority", "tools", "user_id", - "default_llm", + "default_llm_id", "force_default_llm", "user", "max_context_posts", @@ -53,7 +54,8 @@ const SYSTEM_ATTRIBUTES = [ "rag_chunk_tokens", "rag_chunk_overlap_tokens", "rag_conversation_chunks", - "question_consolidator_llm", + "rag_llm_model_id", + "question_consolidator_llm_id", "tool_details", "allow_personal_messages", "allow_topic_mentions", diff --git a/assets/javascripts/discourse/admin/models/ai-tool.js b/assets/javascripts/discourse/admin/models/ai-tool.js index 7188a68c..4b9cdf7c 100644 --- a/assets/javascripts/discourse/admin/models/ai-tool.js +++ b/assets/javascripts/discourse/admin/models/ai-tool.js @@ -12,6 +12,7 @@ const CREATE_ATTRIBUTES = [ "rag_uploads", "rag_chunk_tokens", "rag_chunk_overlap_tokens", + "rag_llm_model_id", "enabled", ]; diff --git a/assets/javascripts/discourse/components/ai-persona-editor.gjs b/assets/javascripts/discourse/components/ai-persona-editor.gjs index 121ad852..2f4c1bd8 100644 --- a/assets/javascripts/discourse/components/ai-persona-editor.gjs +++ b/assets/javascripts/discourse/components/ai-persona-editor.gjs @@ -167,27 +167,27 @@ export default class PersonaEditor extends Component { } get mappedQuestionConsolidatorLlm() { - return this.editingModel?.question_consolidator_llm || "blank"; + return this.editingModel?.question_consolidator_llm_id ?? "blank"; } set mappedQuestionConsolidatorLlm(value) { if (value === "blank") { - this.editingModel.question_consolidator_llm = null; + this.editingModel.question_consolidator_llm_id = null; } else { - this.editingModel.question_consolidator_llm = value; + this.editingModel.question_consolidator_llm_id = value; } } get mappedDefaultLlm() { - return this.editingModel?.default_llm || "blank"; + return this.editingModel?.default_llm_id ?? "blank"; } set mappedDefaultLlm(value) { if (value === "blank") { - this.editingModel.default_llm = null; + this.editingModel.default_llm_id = null; this.hasDefaultLlm = false; } else { - this.editingModel.default_llm = value; + this.editingModel.default_llm_id = value; this.hasDefaultLlm = true; } } @@ -596,9 +596,14 @@ export default class PersonaEditor extends Component { @target={{this.editingModel}} @updateUploads={{this.updateUploads}} @onRemove={{this.removeUpload}} + @allowPdfsAndImages={{@personas.resultSetMeta.settings.rag_pdf_images_enabled}} /> - +
- + {{/if}}
diff --git a/assets/javascripts/discourse/components/rag-options.gjs b/assets/javascripts/discourse/components/rag-options.gjs index 894c1895..2f38346b 100644 --- a/assets/javascripts/discourse/components/rag-options.gjs +++ b/assets/javascripts/discourse/components/rag-options.gjs @@ -5,6 +5,7 @@ import { on } from "@ember/modifier"; import { action } from "@ember/object"; import DTooltip from "discourse/components/d-tooltip"; import { i18n } from "discourse-i18n"; +import AiLlmSelector from "./ai-llm-selector"; export default class RagOptions extends Component { @tracked showIndexingOptions = false; @@ -22,6 +23,22 @@ export default class RagOptions extends Component { : i18n("discourse_ai.rag.options.show_indexing_options"); } + get visionLlms() { + return this.args.llms.filter((llm) => llm.vision_enabled); + } + + get visionLlmId() { + return this.args.model.rag_llm_model_id ?? "blank"; + } + + set visionLlmId(value) { + if (value === "blank") { + this.args.model.rag_llm_model_id = null; + } else { + this.args.model.rag_llm_model_id = value; + } + } + diff --git a/assets/javascripts/discourse/components/rag-uploader.gjs b/assets/javascripts/discourse/components/rag-uploader.gjs index 115e9293..510f003b 100644 --- a/assets/javascripts/discourse/components/rag-uploader.gjs +++ b/assets/javascripts/discourse/components/rag-uploader.gjs @@ -77,6 +77,14 @@ export default class RagUploader extends Component { this.updateUploads(this.ragUploads); } + get acceptedFileTypes() { + if (this.args?.allowPdfsAndImages) { + return ".txt,.md,.pdf,.png,.jpg,.jpeg"; + } else { + return ".txt,.md"; + } + } + @action submitFiles() { this.uppyUpload.openPicker(); @@ -119,7 +127,11 @@ export default class RagUploader extends Component {