2023-11-27 09:33:31 -03:00
# frozen_string_literal: true
module DiscourseAi
module AiHelper
class Assistant
2024-11-12 15:52:46 -03:00
IMAGE_CAPTION_MAX_WORDS = 50
2024-04-26 22:28:35 +10:00
def self . prompt_cache
@prompt_cache || = :: DiscourseAi :: MultisiteHash . new ( " prompt_cache " )
end
2024-02-16 10:57:14 -08:00
2024-02-19 15:21:55 +11:00
def self . clear_prompt_cache!
2024-04-26 22:28:35 +10:00
prompt_cache . flush!
2024-02-19 15:21:55 +11:00
end
FEATURE: PDF support for rag pipeline (#1118)
This PR introduces several enhancements and refactorings to the AI Persona and RAG (Retrieval-Augmented Generation) functionalities within the discourse-ai plugin. Here's a breakdown of the changes:
**1. LLM Model Association for RAG and Personas:**
- **New Database Columns:** Adds `rag_llm_model_id` to both `ai_personas` and `ai_tools` tables. This allows specifying a dedicated LLM for RAG indexing, separate from the persona's primary LLM. Adds `default_llm_id` and `question_consolidator_llm_id` to `ai_personas`.
- **Migration:** Includes a migration (`20250210032345_migrate_persona_to_llm_model_id.rb`) to populate the new `default_llm_id` and `question_consolidator_llm_id` columns in `ai_personas` based on the existing `default_llm` and `question_consolidator_llm` string columns, and a post migration to remove the latter.
- **Model Changes:** The `AiPersona` and `AiTool` models now `belong_to` an `LlmModel` via `rag_llm_model_id`. The `LlmModel.proxy` method now accepts an `LlmModel` instance instead of just an identifier. `AiPersona` now has `default_llm_id` and `question_consolidator_llm_id` attributes.
- **UI Updates:** The AI Persona and AI Tool editors in the admin panel now allow selecting an LLM for RAG indexing (if PDF/image support is enabled). The RAG options component displays an LLM selector.
- **Serialization:** The serializers (`AiCustomToolSerializer`, `AiCustomToolListSerializer`, `LocalizedAiPersonaSerializer`) have been updated to include the new `rag_llm_model_id`, `default_llm_id` and `question_consolidator_llm_id` attributes.
**2. PDF and Image Support for RAG:**
- **Site Setting:** Introduces a new hidden site setting, `ai_rag_pdf_images_enabled`, to control whether PDF and image files can be indexed for RAG. This defaults to `false`.
- **File Upload Validation:** The `RagDocumentFragmentsController` now checks the `ai_rag_pdf_images_enabled` setting and allows PDF, PNG, JPG, and JPEG files if enabled. Error handling is included for cases where PDF/image indexing is attempted with the setting disabled.
- **PDF Processing:** Adds a new utility class, `DiscourseAi::Utils::PdfToImages`, which uses ImageMagick (`magick`) to convert PDF pages into individual PNG images. A maximum PDF size and conversion timeout are enforced.
- **Image Processing:** A new utility class, `DiscourseAi::Utils::ImageToText`, is included to handle OCR for the images and PDFs.
- **RAG Digestion Job:** The `DigestRagUpload` job now handles PDF and image uploads. It uses `PdfToImages` and `ImageToText` to extract text and create document fragments.
- **UI Updates:** The RAG uploader component now accepts PDF and image file types if `ai_rag_pdf_images_enabled` is true. The UI text is adjusted to indicate supported file types.
**3. Refactoring and Improvements:**
- **LLM Enumeration:** The `DiscourseAi::Configuration::LlmEnumerator` now provides a `values_for_serialization` method, which returns a simplified array of LLM data (id, name, vision_enabled) suitable for use in serializers. This avoids exposing unnecessary details to the frontend.
- **AI Helper:** The `AiHelper::Assistant` now takes optional `helper_llm` and `image_caption_llm` parameters in its constructor, allowing for greater flexibility.
- **Bot and Persona Updates:** Several updates were made across the codebase, changing the string based association to a LLM to the new model based.
- **Audit Logs:** The `DiscourseAi::Completions::Endpoints::Base` now formats raw request payloads as pretty JSON for easier auditing.
- **Eval Script:** An evaluation script is included.
**4. Testing:**
- The PR introduces a new eval system for LLMs, this allows us to test how functionality works across various LLM providers. This lives in `/evals`
2025-02-14 12:15:07 +11:00
def initialize ( helper_llm : nil , image_caption_llm : nil )
@helper_llm = helper_llm
@image_caption_llm = image_caption_llm
end
def helper_llm
@helper_llm || DiscourseAi :: Completions :: Llm . proxy ( SiteSetting . ai_helper_model )
end
def image_caption_llm
@image_caption_llm ||
DiscourseAi :: Completions :: Llm . proxy ( SiteSetting . ai_helper_image_caption_model )
end
2024-07-04 08:23:37 -07:00
def available_prompts ( user )
2024-04-26 22:28:35 +10:00
key = " prompt_cache_ #{ I18n . locale } "
self
. class
. prompt_cache
. fetch ( key ) do
2024-02-16 10:57:14 -08:00
prompts = CompletionPrompt . where ( enabled : true )
# Hide illustrate_post if disabled
prompts =
prompts . where . not (
name : " illustrate_post " ,
) if SiteSetting . ai_helper_illustrate_post_model == " disabled "
prompts =
prompts . map do | prompt |
2024-07-04 08:23:37 -07:00
if prompt . name == " translate "
locale = user . effective_locale
2024-07-05 17:16:09 +08:00
locale_hash =
LocaleSiteSetting . language_names [ locale ] ||
LocaleSiteSetting . language_names [ locale . split ( " _ " ) [ 0 ] ]
2024-07-04 08:23:37 -07:00
translation =
I18n . t (
" discourse_ai.ai_helper.prompts.translate " ,
language : locale_hash [ " nativeName " ] ,
) || prompt . translated_name || prompt . name
else
translation =
I18n . t ( " discourse_ai.ai_helper.prompts. #{ prompt . name } " , default : nil ) ||
prompt . translated_name || prompt . name
end
2024-02-16 10:57:14 -08:00
{
id : prompt . id ,
name : prompt . name ,
translated_name : translation ,
prompt_type : prompt . prompt_type ,
icon : icon_map ( prompt . name ) ,
location : location_map ( prompt . name ) ,
}
end
prompts
end
2023-11-27 09:33:31 -03:00
end
2024-07-04 08:23:37 -07:00
def custom_locale_instructions ( user = nil , force_default_locale )
2024-02-28 06:31:51 +11:00
locale = SiteSetting . default_locale
2024-12-31 08:04:25 +11:00
locale = user . effective_locale if ! force_default_locale && user
2024-02-28 06:31:51 +11:00
locale_hash = LocaleSiteSetting . language_names [ locale ]
if locale != " en " && locale_hash
locale_description = " #{ locale_hash [ " name " ] } ( #{ locale_hash [ " nativeName " ] } ) "
" It is imperative that you write your answer in #{ locale_description } , you are interacting with a #{ locale_description } speaking user. Leave tag names in English. "
else
nil
end
end
2025-04-14 08:18:50 -07:00
def localize_prompt! ( prompt , user = nil , force_default_locale : false )
2024-07-04 08:23:37 -07:00
locale_instructions = custom_locale_instructions ( user , force_default_locale )
2024-02-28 06:31:51 +11:00
if locale_instructions
prompt . messages [ 0 ] [ :content ] = prompt . messages [ 0 ] [ :content ] + locale_instructions
end
if prompt . messages [ 0 ] [ :content ] . include? ( " %LANGUAGE% " )
locale = SiteSetting . default_locale
2024-07-04 08:23:37 -07:00
locale = user . effective_locale if user && ! force_default_locale
2024-02-28 06:31:51 +11:00
locale_hash = LocaleSiteSetting . language_names [ locale ]
prompt . messages [ 0 ] [ :content ] = prompt . messages [ 0 ] [ :content ] . gsub (
" %LANGUAGE% " ,
" #{ locale_hash [ " name " ] } " ,
)
end
2024-12-31 08:04:25 +11:00
if user && prompt . messages [ 0 ] [ :content ] . include? ( " {{temporal_context}} " )
timezone = user . user_option . timezone || " UTC "
current_time = Time . now . in_time_zone ( timezone )
temporal_context = {
utc_date_time : current_time . iso8601 ,
local_time : current_time . strftime ( " %H:%M " ) ,
user : {
timezone : timezone ,
weekday : current_time . strftime ( " %A " ) ,
} ,
}
prompt . messages [ 0 ] [ :content ] = prompt . messages [ 0 ] [ :content ] . gsub (
" {{temporal_context}} " ,
temporal_context . to_json ,
)
prompt . messages . each do | message |
message [ :content ] = DateFormatter . process_date_placeholders ( message [ :content ] , user )
end
end
2024-02-28 06:31:51 +11:00
end
2025-04-14 08:18:50 -07:00
def generate_prompt ( completion_prompt , input , user , force_default_locale : false , & block )
FEATURE: PDF support for rag pipeline (#1118)
This PR introduces several enhancements and refactorings to the AI Persona and RAG (Retrieval-Augmented Generation) functionalities within the discourse-ai plugin. Here's a breakdown of the changes:
**1. LLM Model Association for RAG and Personas:**
- **New Database Columns:** Adds `rag_llm_model_id` to both `ai_personas` and `ai_tools` tables. This allows specifying a dedicated LLM for RAG indexing, separate from the persona's primary LLM. Adds `default_llm_id` and `question_consolidator_llm_id` to `ai_personas`.
- **Migration:** Includes a migration (`20250210032345_migrate_persona_to_llm_model_id.rb`) to populate the new `default_llm_id` and `question_consolidator_llm_id` columns in `ai_personas` based on the existing `default_llm` and `question_consolidator_llm` string columns, and a post migration to remove the latter.
- **Model Changes:** The `AiPersona` and `AiTool` models now `belong_to` an `LlmModel` via `rag_llm_model_id`. The `LlmModel.proxy` method now accepts an `LlmModel` instance instead of just an identifier. `AiPersona` now has `default_llm_id` and `question_consolidator_llm_id` attributes.
- **UI Updates:** The AI Persona and AI Tool editors in the admin panel now allow selecting an LLM for RAG indexing (if PDF/image support is enabled). The RAG options component displays an LLM selector.
- **Serialization:** The serializers (`AiCustomToolSerializer`, `AiCustomToolListSerializer`, `LocalizedAiPersonaSerializer`) have been updated to include the new `rag_llm_model_id`, `default_llm_id` and `question_consolidator_llm_id` attributes.
**2. PDF and Image Support for RAG:**
- **Site Setting:** Introduces a new hidden site setting, `ai_rag_pdf_images_enabled`, to control whether PDF and image files can be indexed for RAG. This defaults to `false`.
- **File Upload Validation:** The `RagDocumentFragmentsController` now checks the `ai_rag_pdf_images_enabled` setting and allows PDF, PNG, JPG, and JPEG files if enabled. Error handling is included for cases where PDF/image indexing is attempted with the setting disabled.
- **PDF Processing:** Adds a new utility class, `DiscourseAi::Utils::PdfToImages`, which uses ImageMagick (`magick`) to convert PDF pages into individual PNG images. A maximum PDF size and conversion timeout are enforced.
- **Image Processing:** A new utility class, `DiscourseAi::Utils::ImageToText`, is included to handle OCR for the images and PDFs.
- **RAG Digestion Job:** The `DigestRagUpload` job now handles PDF and image uploads. It uses `PdfToImages` and `ImageToText` to extract text and create document fragments.
- **UI Updates:** The RAG uploader component now accepts PDF and image file types if `ai_rag_pdf_images_enabled` is true. The UI text is adjusted to indicate supported file types.
**3. Refactoring and Improvements:**
- **LLM Enumeration:** The `DiscourseAi::Configuration::LlmEnumerator` now provides a `values_for_serialization` method, which returns a simplified array of LLM data (id, name, vision_enabled) suitable for use in serializers. This avoids exposing unnecessary details to the frontend.
- **AI Helper:** The `AiHelper::Assistant` now takes optional `helper_llm` and `image_caption_llm` parameters in its constructor, allowing for greater flexibility.
- **Bot and Persona Updates:** Several updates were made across the codebase, changing the string based association to a LLM to the new model based.
- **Audit Logs:** The `DiscourseAi::Completions::Endpoints::Base` now formats raw request payloads as pretty JSON for easier auditing.
- **Eval Script:** An evaluation script is included.
**4. Testing:**
- The PR introduces a new eval system for LLMs, this allows us to test how functionality works across various LLM providers. This lives in `/evals`
2025-02-14 12:15:07 +11:00
llm = helper_llm
2024-01-12 14:36:44 -03:00
prompt = completion_prompt . messages_with_input ( input )
2025-04-14 08:18:50 -07:00
localize_prompt! ( prompt , user , force_default_locale : force_default_locale )
2023-11-27 09:33:31 -03:00
2024-01-04 23:53:47 +11:00
llm . generate (
2024-01-12 14:36:44 -03:00
prompt ,
2024-01-04 23:53:47 +11:00
user : user ,
temperature : completion_prompt . temperature ,
stop_sequences : completion_prompt . stop_sequences ,
2024-05-14 13:28:46 +10:00
feature_name : " ai_helper " ,
2024-01-04 23:53:47 +11:00
& block
)
2023-12-12 09:28:39 -08:00
end
2025-04-14 08:18:50 -07:00
def generate_and_send_prompt ( completion_prompt , input , user , force_default_locale : false )
completion_result =
generate_prompt (
completion_prompt ,
input ,
user ,
force_default_locale : force_default_locale ,
)
2023-11-27 09:33:31 -03:00
result = { type : completion_prompt . prompt_type }
result [ :suggestions ] = (
if completion_prompt . list?
2023-12-12 09:28:39 -08:00
parse_list ( completion_result ) . map { | suggestion | sanitize_result ( suggestion ) }
2023-11-27 09:33:31 -03:00
else
2024-01-04 23:53:47 +11:00
sanitized = sanitize_result ( completion_result )
result [ :diff ] = parse_diff ( input , sanitized ) if completion_prompt . diff?
[ sanitized ]
2023-11-27 09:33:31 -03:00
end
)
result
end
2025-05-23 16:23:06 +10:00
def stream_prompt (
completion_prompt ,
input ,
user ,
channel ,
force_default_locale : false ,
client_id : nil
)
2025-04-14 08:18:50 -07:00
streamed_diff = + " "
2023-12-12 09:28:39 -08:00
streamed_result = + " "
start = Time . now
2025-04-14 08:18:50 -07:00
generate_prompt (
completion_prompt ,
input ,
user ,
force_default_locale : force_default_locale ,
) do | partial_response , cancel_function |
2023-12-12 09:28:39 -08:00
streamed_result << partial_response
2025-04-14 08:18:50 -07:00
streamed_diff = parse_diff ( input , partial_response ) if completion_prompt . diff?
2025-05-15 11:38:46 -07:00
# Throttle updates and check for safe stream points
2025-04-14 08:18:50 -07:00
if ( streamed_result . length > 10 && ( Time . now - start > 0 . 3 ) ) || Rails . env . test?
2025-05-15 11:38:46 -07:00
sanitized = sanitize_result ( streamed_result )
2025-05-15 14:55:30 -07:00
payload = { result : sanitized , diff : streamed_diff , done : false }
2025-05-23 16:23:06 +10:00
publish_update ( channel , payload , user , client_id : client_id )
2025-05-15 14:55:30 -07:00
start = Time . now
2023-12-12 09:28:39 -08:00
end
end
2025-04-14 08:18:50 -07:00
final_diff = parse_diff ( input , streamed_result ) if completion_prompt . diff?
2023-12-12 09:28:39 -08:00
sanitized_result = sanitize_result ( streamed_result )
if sanitized_result . present?
2025-05-23 16:23:06 +10:00
publish_update (
channel ,
{ result : sanitized_result , diff : final_diff , done : true } ,
user ,
client_id : client_id ,
)
2023-12-12 09:28:39 -08:00
end
end
2024-05-28 23:31:15 +10:00
def generate_image_caption ( upload , user )
2024-07-24 16:29:47 -03:00
prompt =
DiscourseAi :: Completions :: Prompt . new (
" You are a bot specializing in image captioning. " ,
messages : [
{
type : :user ,
2025-04-01 02:39:07 +11:00
content : [
2024-07-24 16:29:47 -03:00
" Describe this image in a single sentence #{ custom_locale_instructions ( user ) } " ,
2025-04-01 02:39:07 +11:00
{ upload_id : upload . id } ,
] ,
2024-07-24 16:29:47 -03:00
} ,
] ,
2024-02-19 09:56:28 -08:00
)
2024-07-24 16:29:47 -03:00
2024-10-23 18:38:29 -03:00
raw_caption =
FEATURE: PDF support for rag pipeline (#1118)
This PR introduces several enhancements and refactorings to the AI Persona and RAG (Retrieval-Augmented Generation) functionalities within the discourse-ai plugin. Here's a breakdown of the changes:
**1. LLM Model Association for RAG and Personas:**
- **New Database Columns:** Adds `rag_llm_model_id` to both `ai_personas` and `ai_tools` tables. This allows specifying a dedicated LLM for RAG indexing, separate from the persona's primary LLM. Adds `default_llm_id` and `question_consolidator_llm_id` to `ai_personas`.
- **Migration:** Includes a migration (`20250210032345_migrate_persona_to_llm_model_id.rb`) to populate the new `default_llm_id` and `question_consolidator_llm_id` columns in `ai_personas` based on the existing `default_llm` and `question_consolidator_llm` string columns, and a post migration to remove the latter.
- **Model Changes:** The `AiPersona` and `AiTool` models now `belong_to` an `LlmModel` via `rag_llm_model_id`. The `LlmModel.proxy` method now accepts an `LlmModel` instance instead of just an identifier. `AiPersona` now has `default_llm_id` and `question_consolidator_llm_id` attributes.
- **UI Updates:** The AI Persona and AI Tool editors in the admin panel now allow selecting an LLM for RAG indexing (if PDF/image support is enabled). The RAG options component displays an LLM selector.
- **Serialization:** The serializers (`AiCustomToolSerializer`, `AiCustomToolListSerializer`, `LocalizedAiPersonaSerializer`) have been updated to include the new `rag_llm_model_id`, `default_llm_id` and `question_consolidator_llm_id` attributes.
**2. PDF and Image Support for RAG:**
- **Site Setting:** Introduces a new hidden site setting, `ai_rag_pdf_images_enabled`, to control whether PDF and image files can be indexed for RAG. This defaults to `false`.
- **File Upload Validation:** The `RagDocumentFragmentsController` now checks the `ai_rag_pdf_images_enabled` setting and allows PDF, PNG, JPG, and JPEG files if enabled. Error handling is included for cases where PDF/image indexing is attempted with the setting disabled.
- **PDF Processing:** Adds a new utility class, `DiscourseAi::Utils::PdfToImages`, which uses ImageMagick (`magick`) to convert PDF pages into individual PNG images. A maximum PDF size and conversion timeout are enforced.
- **Image Processing:** A new utility class, `DiscourseAi::Utils::ImageToText`, is included to handle OCR for the images and PDFs.
- **RAG Digestion Job:** The `DigestRagUpload` job now handles PDF and image uploads. It uses `PdfToImages` and `ImageToText` to extract text and create document fragments.
- **UI Updates:** The RAG uploader component now accepts PDF and image file types if `ai_rag_pdf_images_enabled` is true. The UI text is adjusted to indicate supported file types.
**3. Refactoring and Improvements:**
- **LLM Enumeration:** The `DiscourseAi::Configuration::LlmEnumerator` now provides a `values_for_serialization` method, which returns a simplified array of LLM data (id, name, vision_enabled) suitable for use in serializers. This avoids exposing unnecessary details to the frontend.
- **AI Helper:** The `AiHelper::Assistant` now takes optional `helper_llm` and `image_caption_llm` parameters in its constructor, allowing for greater flexibility.
- **Bot and Persona Updates:** Several updates were made across the codebase, changing the string based association to a LLM to the new model based.
- **Audit Logs:** The `DiscourseAi::Completions::Endpoints::Base` now formats raw request payloads as pretty JSON for easier auditing.
- **Eval Script:** An evaluation script is included.
**4. Testing:**
- The PR introduces a new eval system for LLMs, this allows us to test how functionality works across various LLM providers. This lives in `/evals`
2025-02-14 12:15:07 +11:00
image_caption_llm . generate (
2024-10-23 18:38:29 -03:00
prompt ,
user : user ,
max_tokens : 1024 ,
feature_name : " image_caption " ,
)
2024-11-12 15:52:46 -03:00
raw_caption . delete ( " | " ) . squish . truncate_words ( IMAGE_CAPTION_MAX_WORDS )
2024-02-19 09:56:28 -08:00
end
2023-11-27 09:33:31 -03:00
private
2024-01-04 23:53:47 +11:00
SANITIZE_REGEX_STR =
%w[ term context topic replyTo input output result ]
. map { | tag | " < #{ tag } > \\ n?| \\ n?</ #{ tag } > " }
. join ( " | " )
SANITIZE_REGEX = Regexp . new ( SANITIZE_REGEX_STR , Regexp :: IGNORECASE | Regexp :: MULTILINE )
2023-12-12 09:28:39 -08:00
def sanitize_result ( result )
2024-01-04 23:53:47 +11:00
result . gsub ( SANITIZE_REGEX , " " )
2023-12-12 09:28:39 -08:00
end
2025-05-23 16:23:06 +10:00
def publish_update ( channel , payload , user , client_id : nil )
# when publishing we make sure we do not keep large backlogs on the channel
# and make sure we clear the streaming info after 60 seconds
# this ensures we do not bloat redis
if client_id
MessageBus . publish (
channel ,
payload ,
user_ids : [ user . id ] ,
client_ids : [ client_id ] ,
max_backlog_age : 60 ,
)
else
MessageBus . publish ( channel , payload , user_ids : [ user . id ] , max_backlog_age : 60 )
end
2023-12-12 09:28:39 -08:00
end
2023-11-27 09:33:31 -03:00
def icon_map ( name )
case name
when " translate "
" language "
when " generate_titles "
" heading "
when " proofread "
" spell-check "
when " markdown_table "
" table "
when " tone "
" microphone "
when " custom_prompt "
" comment "
when " rewrite "
" pen "
when " explain "
" question "
2023-12-19 11:17:34 -08:00
when " illustrate_post "
" images "
2024-12-31 08:04:25 +11:00
when " replace_dates "
" calendar-days "
2023-11-27 09:33:31 -03:00
else
nil
end
end
def location_map ( name )
case name
when " translate "
%w[ composer post ]
when " generate_titles "
%w[ composer ]
when " proofread "
2023-12-14 19:30:52 -08:00
%w[ composer post ]
2023-11-27 09:33:31 -03:00
when " markdown_table "
%w[ composer ]
when " tone "
%w[ composer ]
when " custom_prompt "
2023-12-14 08:47:20 -08:00
%w[ composer post ]
2023-11-27 09:33:31 -03:00
when " rewrite "
%w[ composer ]
when " explain "
%w[ post ]
when " summarize "
%w[ post ]
2023-12-19 12:55:43 -08:00
when " illustrate_post "
2023-12-19 11:17:34 -08:00
%w[ composer ]
2024-12-31 08:04:25 +11:00
when " replace_dates "
%w[ composer ]
2023-11-27 09:33:31 -03:00
else
2024-11-28 09:14:21 +09:00
%w[ ]
2023-11-27 09:33:31 -03:00
end
end
def parse_diff ( text , suggestion )
cooked_text = PrettyText . cook ( text )
cooked_suggestion = PrettyText . cook ( suggestion )
DiscourseDiff . new ( cooked_text , cooked_suggestion ) . inline_html
end
def parse_list ( list )
2023-11-28 12:52:22 -03:00
Nokogiri :: HTML5 . fragment ( list ) . css ( " item " ) . map ( & :text )
2023-11-27 09:33:31 -03:00
end
end
end
end