2023-08-22 03:46:34 +10:00
|
|
|
discourse_ai:
|
2023-02-23 11:08:34 -03:00
|
|
|
discourse_ai_enabled:
|
2023-02-17 11:33:47 -03:00
|
|
|
default: true
|
|
|
|
client: true
|
2024-11-19 09:22:39 +11:00
|
|
|
ai_artifact_security:
|
|
|
|
client: true
|
|
|
|
type: enum
|
|
|
|
default: "strict"
|
|
|
|
choices:
|
|
|
|
- "disabled"
|
|
|
|
- "lax"
|
|
|
|
- "strict"
|
2023-02-22 20:46:53 -03:00
|
|
|
|
|
|
|
ai_sentiment_enabled:
|
|
|
|
default: false
|
|
|
|
client: true
|
2024-11-04 09:14:34 -03:00
|
|
|
ai_sentiment_model_configs:
|
2024-01-10 19:23:07 -03:00
|
|
|
default: ""
|
2024-11-04 09:14:34 -03:00
|
|
|
json_schema: DiscourseAi::Sentiment::SentimentSiteSettingJsonSchema
|
2024-12-03 10:27:03 -03:00
|
|
|
ai_sentiment_backfill_maximum_posts_per_hour:
|
2025-03-07 14:45:10 -03:00
|
|
|
default: 2500
|
2024-12-03 10:27:03 -03:00
|
|
|
min: 0
|
|
|
|
max: 10000
|
|
|
|
hidden: true
|
|
|
|
ai_sentiment_backfill_post_max_age_days:
|
|
|
|
default: 60
|
|
|
|
hidden: true
|
2024-12-12 09:17:25 +11:00
|
|
|
|
2025-04-29 17:38:54 +10:00
|
|
|
ai_openai_image_generation_url: "https://api.openai.com/v1/images/generations"
|
|
|
|
ai_openai_image_edit_url: "https://api.openai.com/v1/images/edits"
|
2025-01-29 17:13:19 -03:00
|
|
|
ai_openai_embeddings_url:
|
2025-01-21 12:23:19 -03:00
|
|
|
hidden: true
|
|
|
|
default: "https://api.openai.com/v1/embeddings"
|
2024-08-12 15:40:23 -07:00
|
|
|
ai_openai_organization:
|
2024-07-30 13:44:57 -03:00
|
|
|
default: ""
|
|
|
|
hidden: true
|
2023-03-07 16:14:39 -03:00
|
|
|
ai_openai_api_key:
|
|
|
|
default: ""
|
2023-05-12 12:54:25 +10:00
|
|
|
secret: true
|
2023-05-19 20:38:08 -03:00
|
|
|
ai_stability_api_key:
|
|
|
|
default: ""
|
|
|
|
secret: true
|
|
|
|
ai_stability_api_url:
|
|
|
|
default: "https://api.stability.ai"
|
|
|
|
ai_stability_engine:
|
2023-08-03 05:53:28 +10:00
|
|
|
default: "stable-diffusion-xl-1024-v1-0"
|
2023-05-20 17:45:54 +10:00
|
|
|
type: enum
|
|
|
|
choices:
|
2024-04-19 18:08:16 +10:00
|
|
|
- "sd3"
|
|
|
|
- "sd3-turbo"
|
2023-08-03 05:53:28 +10:00
|
|
|
- "stable-diffusion-xl-1024-v1-0"
|
|
|
|
- "stable-diffusion-768-v2-1"
|
2023-05-20 17:45:54 +10:00
|
|
|
- "stable-diffusion-v1-5"
|
2023-11-28 17:05:26 -03:00
|
|
|
ai_hugging_face_tei_endpoint:
|
2025-01-21 12:23:19 -03:00
|
|
|
hidden: true
|
2023-11-28 17:05:26 -03:00
|
|
|
default: ""
|
2023-12-18 13:21:21 -03:00
|
|
|
ai_hugging_face_tei_endpoint_srv:
|
|
|
|
default: ""
|
|
|
|
hidden: true
|
2025-01-29 17:13:19 -03:00
|
|
|
ai_hugging_face_tei_api_key:
|
2025-01-21 12:23:19 -03:00
|
|
|
default: ""
|
|
|
|
hidden: true
|
2024-03-08 08:02:50 -08:00
|
|
|
ai_hugging_face_tei_reranker_endpoint:
|
|
|
|
default: ""
|
|
|
|
ai_hugging_face_tei_reranker_endpoint_srv:
|
|
|
|
default: ""
|
|
|
|
hidden: true
|
|
|
|
ai_hugging_face_tei_reranker_api_key: ""
|
2023-05-20 17:45:54 +10:00
|
|
|
ai_google_custom_search_api_key:
|
|
|
|
default: ""
|
|
|
|
secret: true
|
|
|
|
ai_google_custom_search_cx:
|
|
|
|
default: ""
|
2023-10-04 13:47:51 -03:00
|
|
|
ai_cloudflare_workers_account_id:
|
|
|
|
default: ""
|
|
|
|
secret: true
|
2025-01-21 12:23:19 -03:00
|
|
|
hidden: true
|
2023-10-04 13:47:51 -03:00
|
|
|
ai_cloudflare_workers_api_token:
|
|
|
|
default: ""
|
|
|
|
secret: true
|
2025-01-21 12:23:19 -03:00
|
|
|
hidden: true
|
2023-12-15 14:32:01 -03:00
|
|
|
ai_gemini_api_key:
|
|
|
|
default: ""
|
2025-01-21 12:23:19 -03:00
|
|
|
hidden: true
|
2024-03-14 17:33:30 -03:00
|
|
|
ai_strict_token_counting:
|
|
|
|
default: false
|
|
|
|
hidden: true
|
2023-03-15 17:02:20 -03:00
|
|
|
|
2024-08-12 15:40:23 -07:00
|
|
|
ai_helper_enabled:
|
2023-03-15 17:02:20 -03:00
|
|
|
default: false
|
|
|
|
client: true
|
2024-01-29 16:04:25 -03:00
|
|
|
validator: "DiscourseAi::Configuration::LlmDependencyValidator"
|
2024-08-12 15:40:23 -07:00
|
|
|
composer_ai_helper_allowed_groups:
|
2023-03-15 17:02:20 -03:00
|
|
|
type: group_list
|
|
|
|
list_type: compact
|
|
|
|
default: "3|14" # 3: @staff, 14: @trust_level_4
|
|
|
|
allow_any: false
|
|
|
|
refresh: true
|
|
|
|
ai_helper_allowed_in_pm:
|
|
|
|
default: false
|
|
|
|
client: true
|
2023-03-28 23:22:34 -03:00
|
|
|
ai_helper_model:
|
2024-01-29 16:04:25 -03:00
|
|
|
default: ""
|
|
|
|
allow_any: false
|
2023-03-28 23:22:34 -03:00
|
|
|
type: enum
|
2024-01-29 16:04:25 -03:00
|
|
|
enum: "DiscourseAi::Configuration::LlmEnumerator"
|
|
|
|
validator: "DiscourseAi::Configuration::LlmValidator"
|
2023-09-25 11:12:54 -07:00
|
|
|
ai_helper_custom_prompts_allowed_groups:
|
|
|
|
type: group_list
|
|
|
|
list_type: compact
|
|
|
|
default: "3" # 3: @staff
|
|
|
|
allow_any: false
|
|
|
|
refresh: true
|
2023-10-23 11:41:36 -03:00
|
|
|
post_ai_helper_allowed_groups:
|
|
|
|
type: group_list
|
|
|
|
list_type: compact
|
|
|
|
default: "3|14" # 3: @staff, 14: @trust_level_4
|
|
|
|
allow_any: false
|
|
|
|
refresh: true
|
2023-10-30 11:56:33 -03:00
|
|
|
ai_helper_automatic_chat_thread_title:
|
|
|
|
default: false
|
|
|
|
ai_helper_automatic_chat_thread_title_delay:
|
|
|
|
default: 5
|
2024-01-05 09:03:23 -08:00
|
|
|
ai_helper_illustrate_post_model:
|
|
|
|
default: disabled
|
|
|
|
type: enum
|
|
|
|
choices:
|
|
|
|
- stable_diffusion_xl
|
|
|
|
- dall_e_3
|
|
|
|
- disabled
|
2024-02-01 14:58:04 -08:00
|
|
|
ai_helper_enabled_features:
|
|
|
|
client: true
|
|
|
|
default: "suggestions|context_menu"
|
|
|
|
type: list
|
|
|
|
list_type: compact
|
|
|
|
allow_any: false
|
|
|
|
refresh: true
|
|
|
|
choices:
|
|
|
|
- "suggestions"
|
|
|
|
- "context_menu"
|
2024-02-19 18:08:19 -03:00
|
|
|
- "image_caption"
|
2024-02-19 09:56:28 -08:00
|
|
|
ai_helper_image_caption_model:
|
2024-07-24 16:29:47 -03:00
|
|
|
default: ""
|
2024-02-19 09:56:28 -08:00
|
|
|
type: enum
|
2024-05-28 23:31:15 +10:00
|
|
|
enum: "DiscourseAi::Configuration::LlmVisionEnumerator"
|
2024-05-27 10:49:24 -07:00
|
|
|
ai_auto_image_caption_allowed_groups:
|
|
|
|
client: true
|
|
|
|
type: group_list
|
|
|
|
list_type: compact
|
|
|
|
default: "10" # 10: @trust_level_0
|
|
|
|
allow_any: false
|
|
|
|
refresh: true
|
2024-08-28 15:57:58 -03:00
|
|
|
ai_helper_model_allowed_seeded_models:
|
|
|
|
default: ""
|
|
|
|
hidden: true
|
|
|
|
type: list
|
|
|
|
list_type: compact
|
|
|
|
ai_helper_image_caption_model_allowed_seeded_models:
|
|
|
|
default: ""
|
|
|
|
hidden: true
|
|
|
|
type: list
|
|
|
|
list_type: compact
|
2024-03-06 06:04:37 +11:00
|
|
|
|
2023-05-12 12:54:25 +10:00
|
|
|
ai_embeddings_enabled:
|
2023-03-31 15:29:56 -03:00
|
|
|
default: false
|
|
|
|
client: true
|
2024-02-01 16:54:09 -03:00
|
|
|
validator: "DiscourseAi::Configuration::EmbeddingsModuleValidator"
|
2025-01-21 12:23:19 -03:00
|
|
|
ai_embeddings_selected_model:
|
2023-07-13 12:41:36 -03:00
|
|
|
type: enum
|
2025-01-21 12:23:19 -03:00
|
|
|
default: ""
|
2023-03-15 17:21:45 -03:00
|
|
|
allow_any: false
|
2025-01-21 12:23:19 -03:00
|
|
|
enum: "DiscourseAi::Configuration::EmbeddingDefsEnumerator"
|
|
|
|
validator: "DiscourseAi::Configuration::EmbeddingDefsValidator"
|
2024-01-30 15:51:54 -03:00
|
|
|
ai_embeddings_per_post_enabled:
|
|
|
|
default: false
|
|
|
|
hidden: true
|
2023-03-15 17:21:45 -03:00
|
|
|
ai_embeddings_generate_for_pms: false
|
2023-08-29 10:43:58 +10:00
|
|
|
ai_embeddings_semantic_related_topics_enabled:
|
2023-08-22 14:10:21 -03:00
|
|
|
default: false
|
|
|
|
client: true
|
2023-03-31 11:04:34 +11:00
|
|
|
ai_embeddings_semantic_related_topics: 5
|
2023-05-09 15:30:50 -03:00
|
|
|
ai_embeddings_semantic_related_include_closed_topics: true
|
2023-10-26 12:07:37 -03:00
|
|
|
ai_embeddings_backfill_batch_size:
|
|
|
|
default: 250
|
|
|
|
hidden: true
|
2023-04-03 11:48:38 -03:00
|
|
|
ai_embeddings_semantic_search_enabled:
|
|
|
|
default: false
|
|
|
|
client: true
|
2024-01-29 16:04:25 -03:00
|
|
|
validator: "DiscourseAi::Configuration::LlmDependencyValidator"
|
2023-09-05 11:08:23 -03:00
|
|
|
ai_embeddings_semantic_search_hyde_model:
|
2024-01-29 16:04:25 -03:00
|
|
|
default: ""
|
2023-09-05 11:08:23 -03:00
|
|
|
type: enum
|
|
|
|
allow_any: false
|
2024-01-29 16:04:25 -03:00
|
|
|
enum: "DiscourseAi::Configuration::LlmEnumerator"
|
|
|
|
validator: "DiscourseAi::Configuration::LlmValidator"
|
2024-08-28 15:57:58 -03:00
|
|
|
ai_embeddings_semantic_search_hyde_model_allowed_seeded_models:
|
|
|
|
default: ""
|
|
|
|
hidden: true
|
|
|
|
type: list
|
|
|
|
list_type: compact
|
2024-03-08 08:02:50 -08:00
|
|
|
ai_embeddings_semantic_quick_search_enabled:
|
|
|
|
default: false
|
|
|
|
client: true
|
2024-04-05 12:12:37 -03:00
|
|
|
hidden: true
|
2025-01-21 12:23:19 -03:00
|
|
|
|
2025-01-29 17:13:19 -03:00
|
|
|
ai_embeddings_discourse_service_api_endpoint:
|
2025-01-21 12:23:19 -03:00
|
|
|
default: ""
|
|
|
|
hidden: true
|
|
|
|
ai_embeddings_discourse_service_api_endpoint_srv:
|
|
|
|
default: ""
|
|
|
|
hidden: true
|
|
|
|
ai_embeddings_discourse_service_api_key:
|
|
|
|
hidden: true
|
|
|
|
default: ""
|
|
|
|
secret: true
|
|
|
|
ai_embeddings_model:
|
|
|
|
hidden: true
|
|
|
|
type: enum
|
|
|
|
default: "bge-large-en"
|
|
|
|
allow_any: false
|
|
|
|
choices:
|
|
|
|
- all-mpnet-base-v2
|
|
|
|
- text-embedding-ada-002
|
|
|
|
- text-embedding-3-small
|
|
|
|
- text-embedding-3-large
|
|
|
|
- multilingual-e5-large
|
|
|
|
- bge-large-en
|
|
|
|
- gemini
|
|
|
|
- bge-m3
|
|
|
|
ai_embeddings_pg_connection_string:
|
|
|
|
default: ""
|
|
|
|
hidden: true
|
|
|
|
|
2024-07-04 10:48:18 +10:00
|
|
|
ai_summarization_enabled:
|
|
|
|
default: false
|
2024-10-28 15:15:53 -03:00
|
|
|
client: true
|
2024-07-04 10:48:18 +10:00
|
|
|
validator: "DiscourseAi::Configuration::LlmDependencyValidator"
|
2025-04-10 08:16:31 -07:00
|
|
|
area: "ai-features/summarization"
|
2024-07-04 10:48:18 +10:00
|
|
|
ai_summarization_model:
|
2024-07-02 08:51:59 -07:00
|
|
|
default: ""
|
2024-07-04 10:48:18 +10:00
|
|
|
allow_any: false
|
|
|
|
type: enum
|
|
|
|
enum: "DiscourseAi::Configuration::LlmEnumerator"
|
|
|
|
validator: "DiscourseAi::Configuration::LlmValidator"
|
2025-04-02 12:54:47 -03:00
|
|
|
hidden: true
|
|
|
|
ai_summarization_persona:
|
|
|
|
default: "-11"
|
|
|
|
type: enum
|
|
|
|
enum: "DiscourseAi::Configuration::PersonaEnumerator"
|
2025-04-10 08:16:31 -07:00
|
|
|
area: "ai-features/summarization"
|
2024-08-21 07:58:24 +10:00
|
|
|
ai_pm_summarization_allowed_groups:
|
|
|
|
type: group_list
|
|
|
|
list_type: compact
|
|
|
|
default: ""
|
2025-04-10 08:16:31 -07:00
|
|
|
area: "ai-features/summarization"
|
2025-04-02 12:54:47 -03:00
|
|
|
ai_custom_summarization_allowed_groups: # Deprecated. TODO(roman): Remove 2025-09-01
|
2024-07-02 08:51:59 -07:00
|
|
|
type: group_list
|
|
|
|
list_type: compact
|
|
|
|
default: "3|13" # 3: @staff, 13: @trust_level_3
|
2025-04-02 12:54:47 -03:00
|
|
|
hidden: true
|
2024-11-26 13:44:12 -03:00
|
|
|
ai_summary_gists_enabled:
|
|
|
|
default: false
|
2025-04-10 08:16:31 -07:00
|
|
|
area: "ai-features/gists"
|
2025-04-02 12:54:47 -03:00
|
|
|
ai_summary_gists_persona:
|
|
|
|
default: "-12"
|
|
|
|
type: enum
|
|
|
|
enum: "DiscourseAi::Configuration::PersonaEnumerator"
|
2025-04-10 08:16:31 -07:00
|
|
|
area: "ai-features/gists"
|
2025-04-02 12:54:47 -03:00
|
|
|
ai_summary_gists_allowed_groups: # Deprecated. TODO(roman): Remove 2025-09-01
|
2024-10-21 15:15:25 -03:00
|
|
|
type: group_list
|
|
|
|
list_type: compact
|
2024-12-02 15:22:35 -03:00
|
|
|
default: "0" #everyone
|
2024-07-04 10:48:18 +10:00
|
|
|
hidden: true
|
2024-08-28 15:57:58 -03:00
|
|
|
ai_summarization_model_allowed_seeded_models:
|
|
|
|
default: ""
|
|
|
|
hidden: true
|
|
|
|
type: list
|
|
|
|
list_type: compact
|
2024-12-02 15:22:35 -03:00
|
|
|
ai_summary_backfill_topic_max_age_days:
|
|
|
|
default: 30
|
|
|
|
min: 1
|
|
|
|
max: 10000
|
2025-04-10 08:16:31 -07:00
|
|
|
area: "ai-features/summarization"
|
2024-11-04 17:48:11 -03:00
|
|
|
ai_summary_backfill_maximum_topics_per_hour:
|
2024-11-07 13:40:18 -03:00
|
|
|
default: 0
|
2024-11-04 17:48:11 -03:00
|
|
|
min: 0
|
2024-11-07 13:40:18 -03:00
|
|
|
max: 10000
|
2025-04-10 08:16:31 -07:00
|
|
|
area: "ai-features/summarization"
|
2024-11-04 17:48:11 -03:00
|
|
|
ai_summary_backfill_minimum_word_count:
|
|
|
|
default: 200
|
2025-04-10 08:16:31 -07:00
|
|
|
area: "ai-features/summarization"
|
2023-05-05 15:28:31 -03:00
|
|
|
|
|
|
|
ai_bot_enabled:
|
|
|
|
default: false
|
|
|
|
client: true
|
2025-04-10 08:16:31 -07:00
|
|
|
area: "ai-features/discoveries"
|
2023-08-17 06:29:58 +10:00
|
|
|
ai_bot_enable_chat_warning:
|
|
|
|
default: false
|
|
|
|
client: true
|
2024-04-15 23:22:06 +10:00
|
|
|
ai_bot_debugging_allowed_groups:
|
|
|
|
type: group_list
|
|
|
|
list_type: compact
|
|
|
|
default: ""
|
|
|
|
allow_any: false
|
2023-05-05 15:28:31 -03:00
|
|
|
ai_bot_allowed_groups:
|
|
|
|
type: group_list
|
|
|
|
list_type: compact
|
2023-05-11 10:03:03 -03:00
|
|
|
default: "3|14" # 3: @staff, 14: @trust_level_4
|
2024-03-12 16:51:41 +11:00
|
|
|
ai_bot_public_sharing_allowed_groups:
|
|
|
|
client: false
|
|
|
|
type: group_list
|
|
|
|
list_type: compact
|
|
|
|
default: "1|2" # 1: admins, 2: moderators
|
|
|
|
allow_any: false
|
|
|
|
refresh: true
|
2023-08-31 14:42:28 +10:00
|
|
|
ai_bot_add_to_header:
|
2023-05-16 14:38:21 -03:00
|
|
|
default: true
|
|
|
|
client: true
|
2024-03-08 06:37:23 +11:00
|
|
|
ai_bot_github_access_token:
|
|
|
|
default: ""
|
|
|
|
secret: true
|
2025-01-29 17:13:19 -03:00
|
|
|
ai_bot_allowed_seeded_models:
|
|
|
|
default: ""
|
|
|
|
hidden: true
|
|
|
|
type: list
|
|
|
|
list_type: compact
|
2025-02-20 14:37:58 -03:00
|
|
|
ai_bot_discover_persona:
|
|
|
|
default: ""
|
|
|
|
type: enum
|
|
|
|
client: true
|
|
|
|
enum: "DiscourseAi::Configuration::PersonaEnumerator"
|
2025-04-10 08:16:31 -07:00
|
|
|
area: "ai-features/discoveries"
|
2024-05-16 16:49:44 +10:00
|
|
|
ai_automation_max_triage_per_minute:
|
|
|
|
default: 60
|
|
|
|
hidden: true
|
|
|
|
ai_automation_max_triage_per_post_per_minute:
|
|
|
|
default: 2
|
|
|
|
hidden: true
|
2024-08-28 15:57:58 -03:00
|
|
|
ai_automation_allowed_seeded_models:
|
|
|
|
default: ""
|
|
|
|
hidden: true
|
|
|
|
type: list
|
|
|
|
list_type: compact
|
2024-10-16 12:41:18 -03:00
|
|
|
|
2025-04-10 08:16:31 -07:00
|
|
|
ai_discord_search_enabled:
|
|
|
|
default: false
|
|
|
|
client: true
|
|
|
|
area: "ai-features/discord_search"
|
2024-10-16 12:41:18 -03:00
|
|
|
ai_discord_app_id:
|
|
|
|
default: ""
|
|
|
|
client: false
|
2025-04-10 08:16:31 -07:00
|
|
|
area: "ai-features/discord_search"
|
2024-10-16 12:41:18 -03:00
|
|
|
ai_discord_app_public_key:
|
|
|
|
default: ""
|
|
|
|
client: false
|
2025-04-10 08:16:31 -07:00
|
|
|
area: "ai-features/discord_search"
|
2024-10-16 12:41:18 -03:00
|
|
|
ai_discord_search_mode:
|
|
|
|
default: "search"
|
|
|
|
type: enum
|
|
|
|
choices:
|
|
|
|
- search
|
|
|
|
- persona
|
2025-04-10 08:16:31 -07:00
|
|
|
area: "ai-features/discord_search"
|
2024-10-16 12:41:18 -03:00
|
|
|
ai_discord_search_persona:
|
|
|
|
default: ""
|
|
|
|
type: enum
|
|
|
|
enum: "DiscourseAi::Configuration::PersonaEnumerator"
|
2025-04-10 08:16:31 -07:00
|
|
|
area: "ai-features/discord_search"
|
2024-10-16 12:41:18 -03:00
|
|
|
ai_discord_allowed_guilds:
|
|
|
|
type: list
|
|
|
|
list_type: compact
|
|
|
|
default: ""
|
2025-04-10 08:16:31 -07:00
|
|
|
area: "ai-features/discord_search"
|
2024-12-12 09:17:25 +11:00
|
|
|
|
|
|
|
ai_spam_detection_enabled:
|
|
|
|
default: false
|
|
|
|
hidden: true
|
|
|
|
ai_spam_detection_user_id:
|
|
|
|
default: ""
|
|
|
|
hidden: true
|
|
|
|
ai_spam_detection_model_allowed_seeded_models:
|
|
|
|
default: ""
|
|
|
|
hidden: true
|
|
|
|
type: list
|
FEATURE: PDF support for rag pipeline (#1118)
This PR introduces several enhancements and refactorings to the AI Persona and RAG (Retrieval-Augmented Generation) functionalities within the discourse-ai plugin. Here's a breakdown of the changes:
**1. LLM Model Association for RAG and Personas:**
- **New Database Columns:** Adds `rag_llm_model_id` to both `ai_personas` and `ai_tools` tables. This allows specifying a dedicated LLM for RAG indexing, separate from the persona's primary LLM. Adds `default_llm_id` and `question_consolidator_llm_id` to `ai_personas`.
- **Migration:** Includes a migration (`20250210032345_migrate_persona_to_llm_model_id.rb`) to populate the new `default_llm_id` and `question_consolidator_llm_id` columns in `ai_personas` based on the existing `default_llm` and `question_consolidator_llm` string columns, and a post migration to remove the latter.
- **Model Changes:** The `AiPersona` and `AiTool` models now `belong_to` an `LlmModel` via `rag_llm_model_id`. The `LlmModel.proxy` method now accepts an `LlmModel` instance instead of just an identifier. `AiPersona` now has `default_llm_id` and `question_consolidator_llm_id` attributes.
- **UI Updates:** The AI Persona and AI Tool editors in the admin panel now allow selecting an LLM for RAG indexing (if PDF/image support is enabled). The RAG options component displays an LLM selector.
- **Serialization:** The serializers (`AiCustomToolSerializer`, `AiCustomToolListSerializer`, `LocalizedAiPersonaSerializer`) have been updated to include the new `rag_llm_model_id`, `default_llm_id` and `question_consolidator_llm_id` attributes.
**2. PDF and Image Support for RAG:**
- **Site Setting:** Introduces a new hidden site setting, `ai_rag_pdf_images_enabled`, to control whether PDF and image files can be indexed for RAG. This defaults to `false`.
- **File Upload Validation:** The `RagDocumentFragmentsController` now checks the `ai_rag_pdf_images_enabled` setting and allows PDF, PNG, JPG, and JPEG files if enabled. Error handling is included for cases where PDF/image indexing is attempted with the setting disabled.
- **PDF Processing:** Adds a new utility class, `DiscourseAi::Utils::PdfToImages`, which uses ImageMagick (`magick`) to convert PDF pages into individual PNG images. A maximum PDF size and conversion timeout are enforced.
- **Image Processing:** A new utility class, `DiscourseAi::Utils::ImageToText`, is included to handle OCR for the images and PDFs.
- **RAG Digestion Job:** The `DigestRagUpload` job now handles PDF and image uploads. It uses `PdfToImages` and `ImageToText` to extract text and create document fragments.
- **UI Updates:** The RAG uploader component now accepts PDF and image file types if `ai_rag_pdf_images_enabled` is true. The UI text is adjusted to indicate supported file types.
**3. Refactoring and Improvements:**
- **LLM Enumeration:** The `DiscourseAi::Configuration::LlmEnumerator` now provides a `values_for_serialization` method, which returns a simplified array of LLM data (id, name, vision_enabled) suitable for use in serializers. This avoids exposing unnecessary details to the frontend.
- **AI Helper:** The `AiHelper::Assistant` now takes optional `helper_llm` and `image_caption_llm` parameters in its constructor, allowing for greater flexibility.
- **Bot and Persona Updates:** Several updates were made across the codebase, changing the string based association to a LLM to the new model based.
- **Audit Logs:** The `DiscourseAi::Completions::Endpoints::Base` now formats raw request payloads as pretty JSON for easier auditing.
- **Eval Script:** An evaluation script is included.
**4. Testing:**
- The PR introduces a new eval system for LLMs, this allows us to test how functionality works across various LLM providers. This lives in `/evals`
2025-02-14 12:15:07 +11:00
|
|
|
|
2025-02-18 09:22:57 +11:00
|
|
|
ai_rag_images_enabled:
|
FEATURE: PDF support for rag pipeline (#1118)
This PR introduces several enhancements and refactorings to the AI Persona and RAG (Retrieval-Augmented Generation) functionalities within the discourse-ai plugin. Here's a breakdown of the changes:
**1. LLM Model Association for RAG and Personas:**
- **New Database Columns:** Adds `rag_llm_model_id` to both `ai_personas` and `ai_tools` tables. This allows specifying a dedicated LLM for RAG indexing, separate from the persona's primary LLM. Adds `default_llm_id` and `question_consolidator_llm_id` to `ai_personas`.
- **Migration:** Includes a migration (`20250210032345_migrate_persona_to_llm_model_id.rb`) to populate the new `default_llm_id` and `question_consolidator_llm_id` columns in `ai_personas` based on the existing `default_llm` and `question_consolidator_llm` string columns, and a post migration to remove the latter.
- **Model Changes:** The `AiPersona` and `AiTool` models now `belong_to` an `LlmModel` via `rag_llm_model_id`. The `LlmModel.proxy` method now accepts an `LlmModel` instance instead of just an identifier. `AiPersona` now has `default_llm_id` and `question_consolidator_llm_id` attributes.
- **UI Updates:** The AI Persona and AI Tool editors in the admin panel now allow selecting an LLM for RAG indexing (if PDF/image support is enabled). The RAG options component displays an LLM selector.
- **Serialization:** The serializers (`AiCustomToolSerializer`, `AiCustomToolListSerializer`, `LocalizedAiPersonaSerializer`) have been updated to include the new `rag_llm_model_id`, `default_llm_id` and `question_consolidator_llm_id` attributes.
**2. PDF and Image Support for RAG:**
- **Site Setting:** Introduces a new hidden site setting, `ai_rag_pdf_images_enabled`, to control whether PDF and image files can be indexed for RAG. This defaults to `false`.
- **File Upload Validation:** The `RagDocumentFragmentsController` now checks the `ai_rag_pdf_images_enabled` setting and allows PDF, PNG, JPG, and JPEG files if enabled. Error handling is included for cases where PDF/image indexing is attempted with the setting disabled.
- **PDF Processing:** Adds a new utility class, `DiscourseAi::Utils::PdfToImages`, which uses ImageMagick (`magick`) to convert PDF pages into individual PNG images. A maximum PDF size and conversion timeout are enforced.
- **Image Processing:** A new utility class, `DiscourseAi::Utils::ImageToText`, is included to handle OCR for the images and PDFs.
- **RAG Digestion Job:** The `DigestRagUpload` job now handles PDF and image uploads. It uses `PdfToImages` and `ImageToText` to extract text and create document fragments.
- **UI Updates:** The RAG uploader component now accepts PDF and image file types if `ai_rag_pdf_images_enabled` is true. The UI text is adjusted to indicate supported file types.
**3. Refactoring and Improvements:**
- **LLM Enumeration:** The `DiscourseAi::Configuration::LlmEnumerator` now provides a `values_for_serialization` method, which returns a simplified array of LLM data (id, name, vision_enabled) suitable for use in serializers. This avoids exposing unnecessary details to the frontend.
- **AI Helper:** The `AiHelper::Assistant` now takes optional `helper_llm` and `image_caption_llm` parameters in its constructor, allowing for greater flexibility.
- **Bot and Persona Updates:** Several updates were made across the codebase, changing the string based association to a LLM to the new model based.
- **Audit Logs:** The `DiscourseAi::Completions::Endpoints::Base` now formats raw request payloads as pretty JSON for easier auditing.
- **Eval Script:** An evaluation script is included.
**4. Testing:**
- The PR introduces a new eval system for LLMs, this allows us to test how functionality works across various LLM providers. This lives in `/evals`
2025-02-14 12:15:07 +11:00
|
|
|
default: false
|
|
|
|
hidden: true
|
2025-05-01 12:21:07 +10:00
|
|
|
|
|
|
|
ai_bot_enable_dedicated_ux:
|
|
|
|
default: true
|
2025-04-22 10:22:03 -05:00
|
|
|
client: true
|
|
|
|
|
2025-05-12 12:12:30 -03:00
|
|
|
ai_translation_enabled:
|
|
|
|
default: false
|
|
|
|
client: true
|
|
|
|
validator: "DiscourseAi::Configuration::LlmDependencyValidator"
|
|
|
|
ai_translation_model:
|
|
|
|
default: ""
|
|
|
|
type: enum
|
|
|
|
allow_any: false
|
|
|
|
enum: "DiscourseAi::Configuration::LlmEnumerator"
|
|
|
|
validator: "DiscourseAi::Configuration::LlmValidator"
|