2024-06-27 17:27:40 +10:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module DiscourseAi
|
|
|
|
module Admin
|
|
|
|
class AiToolsController < ::Admin::AdminController
|
|
|
|
requires_plugin ::DiscourseAi::PLUGIN_NAME
|
|
|
|
|
2024-12-12 10:48:58 +11:00
|
|
|
before_action :find_ai_tool, only: %i[test edit update destroy]
|
2024-06-27 17:27:40 +10:00
|
|
|
|
|
|
|
def index
|
|
|
|
ai_tools = AiTool.all
|
|
|
|
render_serialized({ ai_tools: ai_tools }, AiCustomToolListSerializer, root: false)
|
|
|
|
end
|
|
|
|
|
2024-12-12 10:48:58 +11:00
|
|
|
def new
|
|
|
|
end
|
|
|
|
|
|
|
|
def edit
|
2024-06-27 17:27:40 +10:00
|
|
|
render_serialized(@ai_tool, AiCustomToolSerializer)
|
|
|
|
end
|
|
|
|
|
|
|
|
def create
|
2024-10-25 16:01:25 +11:00
|
|
|
ai_tool = AiTool.new(ai_tool_params)
|
2024-06-27 17:27:40 +10:00
|
|
|
ai_tool.created_by_id = current_user.id
|
|
|
|
|
|
|
|
if ai_tool.save
|
2024-09-30 16:27:50 +09:00
|
|
|
RagDocumentFragment.link_target_and_uploads(ai_tool, attached_upload_ids)
|
2025-06-12 12:39:58 -07:00
|
|
|
log_ai_tool_creation(ai_tool)
|
2024-06-27 17:27:40 +10:00
|
|
|
render_serialized(ai_tool, AiCustomToolSerializer, status: :created)
|
|
|
|
else
|
|
|
|
render_json_error ai_tool
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def update
|
2025-06-12 12:39:58 -07:00
|
|
|
initial_attributes = @ai_tool.attributes.dup
|
|
|
|
|
2024-10-25 16:01:25 +11:00
|
|
|
if @ai_tool.update(ai_tool_params)
|
2024-09-30 16:27:50 +09:00
|
|
|
RagDocumentFragment.update_target_uploads(@ai_tool, attached_upload_ids)
|
2025-06-12 12:39:58 -07:00
|
|
|
log_ai_tool_update(@ai_tool, initial_attributes)
|
2024-06-27 17:27:40 +10:00
|
|
|
render_serialized(@ai_tool, AiCustomToolSerializer)
|
|
|
|
else
|
|
|
|
render_json_error @ai_tool
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def destroy
|
2025-06-12 12:39:58 -07:00
|
|
|
tool_logger_details = {
|
|
|
|
tool_id: @ai_tool.id,
|
|
|
|
name: @ai_tool.name,
|
|
|
|
tool_name: @ai_tool.tool_name,
|
|
|
|
subject: @ai_tool.name,
|
|
|
|
}
|
|
|
|
|
2024-06-27 17:27:40 +10:00
|
|
|
if @ai_tool.destroy
|
2025-06-12 12:39:58 -07:00
|
|
|
log_ai_tool_deletion(tool_logger_details)
|
2024-06-27 17:27:40 +10:00
|
|
|
head :no_content
|
|
|
|
else
|
|
|
|
render_json_error @ai_tool
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def test
|
2024-10-25 16:01:25 +11:00
|
|
|
@ai_tool.assign_attributes(ai_tool_params) if params[:ai_tool]
|
2024-06-27 17:27:40 +10:00
|
|
|
parameters = params[:parameters].to_unsafe_h
|
|
|
|
|
|
|
|
# we need an llm so we have a tokenizer
|
|
|
|
# but will do without if none is available
|
|
|
|
llm = LlmModel.first&.to_llm
|
2025-04-01 02:39:07 +11:00
|
|
|
runner = @ai_tool.runner(parameters, llm: llm, bot_user: current_user)
|
2024-06-27 17:27:40 +10:00
|
|
|
result = runner.invoke
|
|
|
|
|
|
|
|
if result.is_a?(Hash) && result[:error]
|
|
|
|
render_json_error result[:error]
|
|
|
|
else
|
|
|
|
render json: { output: result }
|
|
|
|
end
|
|
|
|
rescue ActiveRecord::RecordNotFound => e
|
|
|
|
render_json_error e.message, status: 400
|
|
|
|
rescue => e
|
|
|
|
render_json_error "Error executing the tool: #{e.message}", status: 400
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
2024-09-30 16:27:50 +09:00
|
|
|
def attached_upload_ids
|
2024-10-25 16:01:25 +11:00
|
|
|
params[:ai_tool][:rag_uploads].to_a.map { |h| h[:id] }
|
2024-09-30 16:27:50 +09:00
|
|
|
end
|
|
|
|
|
2024-06-27 17:27:40 +10:00
|
|
|
def find_ai_tool
|
2024-10-25 16:01:25 +11:00
|
|
|
@ai_tool = AiTool.find(params[:id].to_i)
|
2024-06-27 17:27:40 +10:00
|
|
|
end
|
|
|
|
|
|
|
|
def ai_tool_params
|
2024-10-25 16:01:25 +11:00
|
|
|
params
|
|
|
|
.require(:ai_tool)
|
|
|
|
.permit(
|
|
|
|
:name,
|
2025-02-07 14:34:47 +11:00
|
|
|
:tool_name,
|
2024-10-25 16:01:25 +11:00
|
|
|
:description,
|
|
|
|
:script,
|
|
|
|
:summary,
|
|
|
|
:rag_chunk_tokens,
|
|
|
|
:rag_chunk_overlap_tokens,
|
FEATURE: PDF support for rag pipeline (#1118)
This PR introduces several enhancements and refactorings to the AI Persona and RAG (Retrieval-Augmented Generation) functionalities within the discourse-ai plugin. Here's a breakdown of the changes:
**1. LLM Model Association for RAG and Personas:**
- **New Database Columns:** Adds `rag_llm_model_id` to both `ai_personas` and `ai_tools` tables. This allows specifying a dedicated LLM for RAG indexing, separate from the persona's primary LLM. Adds `default_llm_id` and `question_consolidator_llm_id` to `ai_personas`.
- **Migration:** Includes a migration (`20250210032345_migrate_persona_to_llm_model_id.rb`) to populate the new `default_llm_id` and `question_consolidator_llm_id` columns in `ai_personas` based on the existing `default_llm` and `question_consolidator_llm` string columns, and a post migration to remove the latter.
- **Model Changes:** The `AiPersona` and `AiTool` models now `belong_to` an `LlmModel` via `rag_llm_model_id`. The `LlmModel.proxy` method now accepts an `LlmModel` instance instead of just an identifier. `AiPersona` now has `default_llm_id` and `question_consolidator_llm_id` attributes.
- **UI Updates:** The AI Persona and AI Tool editors in the admin panel now allow selecting an LLM for RAG indexing (if PDF/image support is enabled). The RAG options component displays an LLM selector.
- **Serialization:** The serializers (`AiCustomToolSerializer`, `AiCustomToolListSerializer`, `LocalizedAiPersonaSerializer`) have been updated to include the new `rag_llm_model_id`, `default_llm_id` and `question_consolidator_llm_id` attributes.
**2. PDF and Image Support for RAG:**
- **Site Setting:** Introduces a new hidden site setting, `ai_rag_pdf_images_enabled`, to control whether PDF and image files can be indexed for RAG. This defaults to `false`.
- **File Upload Validation:** The `RagDocumentFragmentsController` now checks the `ai_rag_pdf_images_enabled` setting and allows PDF, PNG, JPG, and JPEG files if enabled. Error handling is included for cases where PDF/image indexing is attempted with the setting disabled.
- **PDF Processing:** Adds a new utility class, `DiscourseAi::Utils::PdfToImages`, which uses ImageMagick (`magick`) to convert PDF pages into individual PNG images. A maximum PDF size and conversion timeout are enforced.
- **Image Processing:** A new utility class, `DiscourseAi::Utils::ImageToText`, is included to handle OCR for the images and PDFs.
- **RAG Digestion Job:** The `DigestRagUpload` job now handles PDF and image uploads. It uses `PdfToImages` and `ImageToText` to extract text and create document fragments.
- **UI Updates:** The RAG uploader component now accepts PDF and image file types if `ai_rag_pdf_images_enabled` is true. The UI text is adjusted to indicate supported file types.
**3. Refactoring and Improvements:**
- **LLM Enumeration:** The `DiscourseAi::Configuration::LlmEnumerator` now provides a `values_for_serialization` method, which returns a simplified array of LLM data (id, name, vision_enabled) suitable for use in serializers. This avoids exposing unnecessary details to the frontend.
- **AI Helper:** The `AiHelper::Assistant` now takes optional `helper_llm` and `image_caption_llm` parameters in its constructor, allowing for greater flexibility.
- **Bot and Persona Updates:** Several updates were made across the codebase, changing the string based association to a LLM to the new model based.
- **Audit Logs:** The `DiscourseAi::Completions::Endpoints::Base` now formats raw request payloads as pretty JSON for easier auditing.
- **Eval Script:** An evaluation script is included.
**4. Testing:**
- The PR introduces a new eval system for LLMs, this allows us to test how functionality works across various LLM providers. This lives in `/evals`
2025-02-14 12:15:07 +11:00
|
|
|
:rag_llm_model_id,
|
2024-10-25 16:01:25 +11:00
|
|
|
rag_uploads: [:id],
|
|
|
|
parameters: [:name, :type, :description, :required, enum: []],
|
|
|
|
)
|
|
|
|
.except(:rag_uploads)
|
2024-06-27 17:27:40 +10:00
|
|
|
end
|
2025-06-12 12:39:58 -07:00
|
|
|
|
|
|
|
def ai_tool_logger_fields
|
|
|
|
{
|
|
|
|
name: {
|
|
|
|
},
|
|
|
|
tool_name: {
|
|
|
|
},
|
|
|
|
description: {
|
|
|
|
},
|
|
|
|
summary: {
|
|
|
|
},
|
|
|
|
enabled: {
|
|
|
|
},
|
|
|
|
rag_chunk_tokens: {
|
|
|
|
},
|
|
|
|
rag_chunk_overlap_tokens: {
|
|
|
|
},
|
|
|
|
rag_llm_model_id: {
|
|
|
|
},
|
|
|
|
script: {
|
|
|
|
type: :large_text,
|
|
|
|
},
|
|
|
|
parameters: {
|
|
|
|
type: :large_text,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def log_ai_tool_creation(ai_tool)
|
|
|
|
logger = DiscourseAi::Utils::AiStaffActionLogger.new(current_user)
|
|
|
|
|
|
|
|
entity_details = { tool_id: ai_tool.id, subject: ai_tool.name }
|
|
|
|
entity_details[:parameter_count] = ai_tool.parameters.size if ai_tool.parameters.present?
|
|
|
|
|
|
|
|
logger.log_creation("tool", ai_tool, ai_tool_logger_fields, entity_details)
|
|
|
|
end
|
|
|
|
|
|
|
|
def log_ai_tool_update(ai_tool, initial_attributes)
|
|
|
|
logger = DiscourseAi::Utils::AiStaffActionLogger.new(current_user)
|
|
|
|
entity_details = { tool_id: ai_tool.id, subject: ai_tool.name }
|
|
|
|
|
|
|
|
logger.log_update(
|
|
|
|
"tool",
|
|
|
|
ai_tool,
|
|
|
|
initial_attributes,
|
|
|
|
ai_tool_logger_fields,
|
|
|
|
entity_details,
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
def log_ai_tool_deletion(tool_details)
|
|
|
|
logger = DiscourseAi::Utils::AiStaffActionLogger.new(current_user)
|
|
|
|
logger.log_deletion("tool", tool_details)
|
|
|
|
end
|
2024-06-27 17:27:40 +10:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|