discourse-ai/spec/lib/modules/ai_bot/entry_point_spec.rb

189 lines
6.5 KiB
Ruby
Raw Permalink Normal View History

# frozen_string_literal: true
RSpec.describe DiscourseAi::AiBot::EntryPoint do
describe "#inject_into" do
describe "subscribes to the post_created event" do
2024-03-05 16:48:28 +01:00
fab!(:admin)
fab!(:bot_allowed_group) { Fabricate(:group) }
fab!(:gpt_4) { Fabricate(:llm_model, name: "gpt-4") }
let(:gpt_bot) { gpt_4.reload.user }
fab!(:claude_2) { Fabricate(:llm_model, name: "claude-2") }
let(:post_args) do
{
title: "Dear AI, I want to ask a question",
raw: "Hello, Can you please tell me a story?",
archetype: Archetype.private_message,
target_usernames: [gpt_bot.username].join(","),
}
end
before do
toggle_enabled_bots(bots: [gpt_4, claude_2])
SiteSetting.ai_bot_enabled = true
SiteSetting.ai_bot_allowed_groups = bot_allowed_group.id
bot_allowed_group.add(admin)
end
it "adds a can_debug_ai_bot_conversations method to current user" do
SiteSetting.ai_bot_debugging_allowed_groups = bot_allowed_group.id.to_s
serializer = CurrentUserSerializer.new(admin, scope: Guardian.new(admin))
serializer = serializer.as_json
expect(serializer[:current_user][:can_debug_ai_bot_conversations]).to eq(true)
end
describe "adding TOPIC_AI_BOT_PM_FIELD to topic custom fields" do
it "is added when user PMs a single bot" do
topic = PostCreator.create!(admin, post_args).topic
expect(topic.reload.custom_fields[DiscourseAi::AiBot::TOPIC_AI_BOT_PM_FIELD]).to eq("t")
end
it "is not added when user PMs a bot and another user" do
user = Fabricate(:user)
post_args[:target_usernames] = [gpt_bot.username, user.username].join(",")
topic = PostCreator.create!(admin, post_args).topic
expect(topic.reload.custom_fields[DiscourseAi::AiBot::TOPIC_AI_BOT_PM_FIELD]).to be_nil
end
end
it "adds information about forcing default llm to current_user_serializer" do
Group.refresh_automatic_groups!
persona =
Fabricate(
:ai_persona,
enabled: true,
allowed_group_ids: [bot_allowed_group.id],
FEATURE: PDF support for rag pipeline (#1118) This PR introduces several enhancements and refactorings to the AI Persona and RAG (Retrieval-Augmented Generation) functionalities within the discourse-ai plugin. Here's a breakdown of the changes: **1. LLM Model Association for RAG and Personas:** - **New Database Columns:** Adds `rag_llm_model_id` to both `ai_personas` and `ai_tools` tables. This allows specifying a dedicated LLM for RAG indexing, separate from the persona's primary LLM. Adds `default_llm_id` and `question_consolidator_llm_id` to `ai_personas`. - **Migration:** Includes a migration (`20250210032345_migrate_persona_to_llm_model_id.rb`) to populate the new `default_llm_id` and `question_consolidator_llm_id` columns in `ai_personas` based on the existing `default_llm` and `question_consolidator_llm` string columns, and a post migration to remove the latter. - **Model Changes:** The `AiPersona` and `AiTool` models now `belong_to` an `LlmModel` via `rag_llm_model_id`. The `LlmModel.proxy` method now accepts an `LlmModel` instance instead of just an identifier. `AiPersona` now has `default_llm_id` and `question_consolidator_llm_id` attributes. - **UI Updates:** The AI Persona and AI Tool editors in the admin panel now allow selecting an LLM for RAG indexing (if PDF/image support is enabled). The RAG options component displays an LLM selector. - **Serialization:** The serializers (`AiCustomToolSerializer`, `AiCustomToolListSerializer`, `LocalizedAiPersonaSerializer`) have been updated to include the new `rag_llm_model_id`, `default_llm_id` and `question_consolidator_llm_id` attributes. **2. PDF and Image Support for RAG:** - **Site Setting:** Introduces a new hidden site setting, `ai_rag_pdf_images_enabled`, to control whether PDF and image files can be indexed for RAG. This defaults to `false`. - **File Upload Validation:** The `RagDocumentFragmentsController` now checks the `ai_rag_pdf_images_enabled` setting and allows PDF, PNG, JPG, and JPEG files if enabled. Error handling is included for cases where PDF/image indexing is attempted with the setting disabled. - **PDF Processing:** Adds a new utility class, `DiscourseAi::Utils::PdfToImages`, which uses ImageMagick (`magick`) to convert PDF pages into individual PNG images. A maximum PDF size and conversion timeout are enforced. - **Image Processing:** A new utility class, `DiscourseAi::Utils::ImageToText`, is included to handle OCR for the images and PDFs. - **RAG Digestion Job:** The `DigestRagUpload` job now handles PDF and image uploads. It uses `PdfToImages` and `ImageToText` to extract text and create document fragments. - **UI Updates:** The RAG uploader component now accepts PDF and image file types if `ai_rag_pdf_images_enabled` is true. The UI text is adjusted to indicate supported file types. **3. Refactoring and Improvements:** - **LLM Enumeration:** The `DiscourseAi::Configuration::LlmEnumerator` now provides a `values_for_serialization` method, which returns a simplified array of LLM data (id, name, vision_enabled) suitable for use in serializers. This avoids exposing unnecessary details to the frontend. - **AI Helper:** The `AiHelper::Assistant` now takes optional `helper_llm` and `image_caption_llm` parameters in its constructor, allowing for greater flexibility. - **Bot and Persona Updates:** Several updates were made across the codebase, changing the string based association to a LLM to the new model based. - **Audit Logs:** The `DiscourseAi::Completions::Endpoints::Base` now formats raw request payloads as pretty JSON for easier auditing. - **Eval Script:** An evaluation script is included. **4. Testing:** - The PR introduces a new eval system for LLMs, this allows us to test how functionality works across various LLM providers. This lives in `/evals`
2025-02-14 12:15:07 +11:00
default_llm_id: claude_2.id,
force_default_llm: true,
)
persona.create_user!
serializer = CurrentUserSerializer.new(admin, scope: Guardian.new(admin))
serializer = serializer.as_json
bots = serializer[:current_user][:ai_enabled_chat_bots]
persona_bot = bots.find { |bot| bot["id"] == persona.user_id }
expect(persona_bot["username"]).to eq(persona.user.username)
expect(persona_bot["force_default_llm"]).to eq(true)
end
it "includes user ids for all personas in the serializer" do
Group.refresh_automatic_groups!
persona = Fabricate(:ai_persona, enabled: true, allowed_group_ids: [bot_allowed_group.id])
persona.create_user!
serializer = CurrentUserSerializer.new(admin, scope: Guardian.new(admin))
serializer = serializer.as_json
bots = serializer[:current_user][:ai_enabled_chat_bots]
persona_bot = bots.find { |bot| bot["id"] == persona.user_id }
expect(persona_bot["username"]).to eq(persona.user.username)
expect(persona_bot["force_default_llm"]).to eq(false)
end
it "queues a job to generate a reply by the AI" do
expect { PostCreator.create!(admin, post_args) }.to change(
Jobs::CreateAiReply.jobs,
:size,
).by(1)
end
it "does not queue a job for small actions" do
post = PostCreator.create!(admin, post_args)
expect {
post.topic.add_moderator_post(
admin,
"this is a small action",
post_type: Post.types[:small_action],
)
}.not_to change(Jobs::CreateAiReply.jobs, :size)
expect {
post.topic.add_moderator_post(
admin,
"this is a small action",
post_type: Post.types[:moderator_action],
)
}.not_to change(Jobs::CreateAiReply.jobs, :size)
expect {
post.topic.add_moderator_post(
admin,
"this is a small action",
post_type: Post.types[:whisper],
)
}.not_to change(Jobs::CreateAiReply.jobs, :size)
end
it "includes the bot's user_id" do
claude_bot = DiscourseAi::AiBot::EntryPoint.find_user_from_model("claude-2")
claude_post_attrs = post_args.merge(target_usernames: [claude_bot.username].join(","))
expect { PostCreator.create!(admin, claude_post_attrs) }.to change(
Jobs::CreateAiReply.jobs,
:size,
).by(1)
job_args = Jobs::CreateAiReply.jobs.last["args"].first
expect(job_args["bot_user_id"]).to eq(claude_bot.id)
end
context "when the post is not from a PM" do
it "does nothing" do
expect {
PostCreator.create!(admin, post_args.merge(archetype: Archetype.default))
}.not_to change(Jobs::CreateAiReply.jobs, :size)
end
end
context "when the bot doesn't have access to the PM" do
it "does nothing" do
user_2 = Fabricate(:user)
expect {
PostCreator.create!(admin, post_args.merge(target_usernames: [user_2.username]))
}.not_to change(Jobs::CreateAiReply.jobs, :size)
end
end
context "when the user is not allowed to interact with the bot" do
it "does nothing" do
bot_allowed_group.remove(admin)
expect { PostCreator.create!(admin, post_args) }.not_to change(
Jobs::CreateAiReply.jobs,
:size,
)
end
end
context "when the post was created by the bot" do
it "does nothing" do
gpt_topic_id = PostCreator.create!(admin, post_args).topic_id
reply_args =
post_args.except(:archetype, :target_usernames, :title).merge(topic_id: gpt_topic_id)
expect { PostCreator.create!(gpt_bot, reply_args) }.not_to change(
Jobs::CreateAiReply.jobs,
:size,
)
end
end
end
it "will include ai_search_discoveries field in the user_option if discover persona is enabled" do
SiteSetting.ai_bot_enabled = true
SiteSetting.ai_bot_discover_persona = Fabricate(:ai_persona).id
serializer =
CurrentUserSerializer.new(Fabricate(:user), scope: Guardian.new(Fabricate(:user)))
expect(serializer.user_option.ai_search_discoveries).to eq(true)
end
end
end