2023-05-05 15:28:31 -03:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
RSpec.describe DiscourseAi::AiBot::EntryPoint do
|
|
|
|
describe "#inject_into" do
|
|
|
|
describe "subscribes to the post_created event" do
|
2024-03-05 16:48:28 +01:00
|
|
|
fab!(:admin)
|
2023-05-05 15:28:31 -03:00
|
|
|
fab!(:bot_allowed_group) { Fabricate(:group) }
|
|
|
|
|
2024-06-18 14:32:14 -03:00
|
|
|
fab!(:gpt_4) { Fabricate(:llm_model, name: "gpt-4") }
|
|
|
|
let(:gpt_bot) { gpt_4.reload.user }
|
|
|
|
|
|
|
|
fab!(:claude_2) { Fabricate(:llm_model, name: "claude-2") }
|
|
|
|
|
2023-05-05 15:28:31 -03:00
|
|
|
let(:post_args) do
|
|
|
|
{
|
|
|
|
title: "Dear AI, I want to ask a question",
|
|
|
|
raw: "Hello, Can you please tell me a story?",
|
|
|
|
archetype: Archetype.private_message,
|
|
|
|
target_usernames: [gpt_bot.username].join(","),
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
before do
|
2024-06-18 14:32:14 -03:00
|
|
|
toggle_enabled_bots(bots: [gpt_4, claude_2])
|
2023-10-23 17:00:58 +11:00
|
|
|
SiteSetting.ai_bot_enabled = true
|
2023-05-05 15:28:31 -03:00
|
|
|
SiteSetting.ai_bot_allowed_groups = bot_allowed_group.id
|
|
|
|
bot_allowed_group.add(admin)
|
|
|
|
end
|
|
|
|
|
2024-04-15 23:22:06 +10:00
|
|
|
it "adds a can_debug_ai_bot_conversations method to current user" do
|
|
|
|
SiteSetting.ai_bot_debugging_allowed_groups = bot_allowed_group.id.to_s
|
|
|
|
serializer = CurrentUserSerializer.new(admin, scope: Guardian.new(admin))
|
|
|
|
serializer = serializer.as_json
|
|
|
|
|
|
|
|
expect(serializer[:current_user][:can_debug_ai_bot_conversations]).to eq(true)
|
|
|
|
end
|
|
|
|
|
2025-04-24 11:17:24 -05:00
|
|
|
describe "adding TOPIC_AI_BOT_PM_FIELD to topic custom fields" do
|
|
|
|
it "is added when user PMs a single bot" do
|
|
|
|
topic = PostCreator.create!(admin, post_args).topic
|
|
|
|
expect(topic.reload.custom_fields[DiscourseAi::AiBot::TOPIC_AI_BOT_PM_FIELD]).to eq("t")
|
|
|
|
end
|
|
|
|
|
|
|
|
it "is not added when user PMs a bot and another user" do
|
|
|
|
user = Fabricate(:user)
|
|
|
|
post_args[:target_usernames] = [gpt_bot.username, user.username].join(",")
|
|
|
|
topic = PostCreator.create!(admin, post_args).topic
|
|
|
|
expect(topic.reload.custom_fields[DiscourseAi::AiBot::TOPIC_AI_BOT_PM_FIELD]).to be_nil
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2024-10-16 07:20:31 +11:00
|
|
|
it "adds information about forcing default llm to current_user_serializer" do
|
2024-03-01 07:53:42 +11:00
|
|
|
Group.refresh_automatic_groups!
|
|
|
|
|
|
|
|
persona =
|
|
|
|
Fabricate(
|
|
|
|
:ai_persona,
|
|
|
|
enabled: true,
|
|
|
|
allowed_group_ids: [bot_allowed_group.id],
|
FEATURE: PDF support for rag pipeline (#1118)
This PR introduces several enhancements and refactorings to the AI Persona and RAG (Retrieval-Augmented Generation) functionalities within the discourse-ai plugin. Here's a breakdown of the changes:
**1. LLM Model Association for RAG and Personas:**
- **New Database Columns:** Adds `rag_llm_model_id` to both `ai_personas` and `ai_tools` tables. This allows specifying a dedicated LLM for RAG indexing, separate from the persona's primary LLM. Adds `default_llm_id` and `question_consolidator_llm_id` to `ai_personas`.
- **Migration:** Includes a migration (`20250210032345_migrate_persona_to_llm_model_id.rb`) to populate the new `default_llm_id` and `question_consolidator_llm_id` columns in `ai_personas` based on the existing `default_llm` and `question_consolidator_llm` string columns, and a post migration to remove the latter.
- **Model Changes:** The `AiPersona` and `AiTool` models now `belong_to` an `LlmModel` via `rag_llm_model_id`. The `LlmModel.proxy` method now accepts an `LlmModel` instance instead of just an identifier. `AiPersona` now has `default_llm_id` and `question_consolidator_llm_id` attributes.
- **UI Updates:** The AI Persona and AI Tool editors in the admin panel now allow selecting an LLM for RAG indexing (if PDF/image support is enabled). The RAG options component displays an LLM selector.
- **Serialization:** The serializers (`AiCustomToolSerializer`, `AiCustomToolListSerializer`, `LocalizedAiPersonaSerializer`) have been updated to include the new `rag_llm_model_id`, `default_llm_id` and `question_consolidator_llm_id` attributes.
**2. PDF and Image Support for RAG:**
- **Site Setting:** Introduces a new hidden site setting, `ai_rag_pdf_images_enabled`, to control whether PDF and image files can be indexed for RAG. This defaults to `false`.
- **File Upload Validation:** The `RagDocumentFragmentsController` now checks the `ai_rag_pdf_images_enabled` setting and allows PDF, PNG, JPG, and JPEG files if enabled. Error handling is included for cases where PDF/image indexing is attempted with the setting disabled.
- **PDF Processing:** Adds a new utility class, `DiscourseAi::Utils::PdfToImages`, which uses ImageMagick (`magick`) to convert PDF pages into individual PNG images. A maximum PDF size and conversion timeout are enforced.
- **Image Processing:** A new utility class, `DiscourseAi::Utils::ImageToText`, is included to handle OCR for the images and PDFs.
- **RAG Digestion Job:** The `DigestRagUpload` job now handles PDF and image uploads. It uses `PdfToImages` and `ImageToText` to extract text and create document fragments.
- **UI Updates:** The RAG uploader component now accepts PDF and image file types if `ai_rag_pdf_images_enabled` is true. The UI text is adjusted to indicate supported file types.
**3. Refactoring and Improvements:**
- **LLM Enumeration:** The `DiscourseAi::Configuration::LlmEnumerator` now provides a `values_for_serialization` method, which returns a simplified array of LLM data (id, name, vision_enabled) suitable for use in serializers. This avoids exposing unnecessary details to the frontend.
- **AI Helper:** The `AiHelper::Assistant` now takes optional `helper_llm` and `image_caption_llm` parameters in its constructor, allowing for greater flexibility.
- **Bot and Persona Updates:** Several updates were made across the codebase, changing the string based association to a LLM to the new model based.
- **Audit Logs:** The `DiscourseAi::Completions::Endpoints::Base` now formats raw request payloads as pretty JSON for easier auditing.
- **Eval Script:** An evaluation script is included.
**4. Testing:**
- The PR introduces a new eval system for LLMs, this allows us to test how functionality works across various LLM providers. This lives in `/evals`
2025-02-14 12:15:07 +11:00
|
|
|
default_llm_id: claude_2.id,
|
2024-10-16 07:20:31 +11:00
|
|
|
force_default_llm: true,
|
2024-03-01 07:53:42 +11:00
|
|
|
)
|
|
|
|
persona.create_user!
|
|
|
|
|
|
|
|
serializer = CurrentUserSerializer.new(admin, scope: Guardian.new(admin))
|
|
|
|
serializer = serializer.as_json
|
|
|
|
bots = serializer[:current_user][:ai_enabled_chat_bots]
|
|
|
|
|
|
|
|
persona_bot = bots.find { |bot| bot["id"] == persona.user_id }
|
|
|
|
|
|
|
|
expect(persona_bot["username"]).to eq(persona.user.username)
|
2024-10-16 07:20:31 +11:00
|
|
|
expect(persona_bot["force_default_llm"]).to eq(true)
|
2024-04-15 23:22:06 +10:00
|
|
|
end
|
|
|
|
|
|
|
|
it "includes user ids for all personas in the serializer" do
|
|
|
|
Group.refresh_automatic_groups!
|
|
|
|
|
|
|
|
persona = Fabricate(:ai_persona, enabled: true, allowed_group_ids: [bot_allowed_group.id])
|
|
|
|
persona.create_user!
|
|
|
|
|
|
|
|
serializer = CurrentUserSerializer.new(admin, scope: Guardian.new(admin))
|
|
|
|
serializer = serializer.as_json
|
|
|
|
bots = serializer[:current_user][:ai_enabled_chat_bots]
|
|
|
|
|
|
|
|
persona_bot = bots.find { |bot| bot["id"] == persona.user_id }
|
|
|
|
expect(persona_bot["username"]).to eq(persona.user.username)
|
2024-10-16 07:20:31 +11:00
|
|
|
expect(persona_bot["force_default_llm"]).to eq(false)
|
2024-03-01 07:53:42 +11:00
|
|
|
end
|
|
|
|
|
2023-05-05 15:28:31 -03:00
|
|
|
it "queues a job to generate a reply by the AI" do
|
|
|
|
expect { PostCreator.create!(admin, post_args) }.to change(
|
|
|
|
Jobs::CreateAiReply.jobs,
|
|
|
|
:size,
|
|
|
|
).by(1)
|
|
|
|
end
|
|
|
|
|
2023-05-20 17:45:54 +10:00
|
|
|
it "does not queue a job for small actions" do
|
|
|
|
post = PostCreator.create!(admin, post_args)
|
|
|
|
|
|
|
|
expect {
|
|
|
|
post.topic.add_moderator_post(
|
|
|
|
admin,
|
|
|
|
"this is a small action",
|
|
|
|
post_type: Post.types[:small_action],
|
|
|
|
)
|
|
|
|
}.not_to change(Jobs::CreateAiReply.jobs, :size)
|
|
|
|
|
|
|
|
expect {
|
|
|
|
post.topic.add_moderator_post(
|
|
|
|
admin,
|
|
|
|
"this is a small action",
|
|
|
|
post_type: Post.types[:moderator_action],
|
|
|
|
)
|
|
|
|
}.not_to change(Jobs::CreateAiReply.jobs, :size)
|
|
|
|
|
|
|
|
expect {
|
|
|
|
post.topic.add_moderator_post(
|
|
|
|
admin,
|
|
|
|
"this is a small action",
|
|
|
|
post_type: Post.types[:whisper],
|
|
|
|
)
|
|
|
|
}.not_to change(Jobs::CreateAiReply.jobs, :size)
|
|
|
|
end
|
|
|
|
|
2023-05-11 10:03:03 -03:00
|
|
|
it "includes the bot's user_id" do
|
2024-06-18 14:32:14 -03:00
|
|
|
claude_bot = DiscourseAi::AiBot::EntryPoint.find_user_from_model("claude-2")
|
2023-05-11 10:03:03 -03:00
|
|
|
claude_post_attrs = post_args.merge(target_usernames: [claude_bot.username].join(","))
|
|
|
|
|
|
|
|
expect { PostCreator.create!(admin, claude_post_attrs) }.to change(
|
|
|
|
Jobs::CreateAiReply.jobs,
|
|
|
|
:size,
|
|
|
|
).by(1)
|
|
|
|
|
|
|
|
job_args = Jobs::CreateAiReply.jobs.last["args"].first
|
|
|
|
expect(job_args["bot_user_id"]).to eq(claude_bot.id)
|
|
|
|
end
|
|
|
|
|
2023-05-05 15:28:31 -03:00
|
|
|
context "when the post is not from a PM" do
|
|
|
|
it "does nothing" do
|
|
|
|
expect {
|
|
|
|
PostCreator.create!(admin, post_args.merge(archetype: Archetype.default))
|
|
|
|
}.not_to change(Jobs::CreateAiReply.jobs, :size)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
context "when the bot doesn't have access to the PM" do
|
|
|
|
it "does nothing" do
|
|
|
|
user_2 = Fabricate(:user)
|
|
|
|
expect {
|
|
|
|
PostCreator.create!(admin, post_args.merge(target_usernames: [user_2.username]))
|
|
|
|
}.not_to change(Jobs::CreateAiReply.jobs, :size)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
context "when the user is not allowed to interact with the bot" do
|
|
|
|
it "does nothing" do
|
|
|
|
bot_allowed_group.remove(admin)
|
|
|
|
expect { PostCreator.create!(admin, post_args) }.not_to change(
|
|
|
|
Jobs::CreateAiReply.jobs,
|
|
|
|
:size,
|
|
|
|
)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
context "when the post was created by the bot" do
|
|
|
|
it "does nothing" do
|
|
|
|
gpt_topic_id = PostCreator.create!(admin, post_args).topic_id
|
|
|
|
reply_args =
|
|
|
|
post_args.except(:archetype, :target_usernames, :title).merge(topic_id: gpt_topic_id)
|
|
|
|
|
|
|
|
expect { PostCreator.create!(gpt_bot, reply_args) }.not_to change(
|
|
|
|
Jobs::CreateAiReply.jobs,
|
|
|
|
:size,
|
|
|
|
)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2025-03-10 14:17:58 -07:00
|
|
|
|
|
|
|
it "will include ai_search_discoveries field in the user_option if discover persona is enabled" do
|
|
|
|
SiteSetting.ai_bot_enabled = true
|
|
|
|
SiteSetting.ai_bot_discover_persona = Fabricate(:ai_persona).id
|
|
|
|
|
|
|
|
serializer =
|
|
|
|
CurrentUserSerializer.new(Fabricate(:user), scope: Guardian.new(Fabricate(:user)))
|
|
|
|
expect(serializer.user_option.ai_search_discoveries).to eq(true)
|
|
|
|
end
|
2023-05-05 15:28:31 -03:00
|
|
|
end
|
|
|
|
end
|