mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-07-31 10:23:27 +00:00
Adds context length controls to researcher (max tokens per post and batch) Allow picking LLM for researcher Fix bug where unicode usernames were not working Fix documentation of OR logic
185 lines
6.4 KiB
Ruby
185 lines
6.4 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
RSpec.describe DiscourseAi::Personas::Tools::Researcher do
|
|
before { SearchIndexer.enable }
|
|
after { SearchIndexer.disable }
|
|
|
|
fab!(:llm_model)
|
|
let(:bot_user) { DiscourseAi::AiBot::EntryPoint.find_user_from_model(llm_model.name) }
|
|
let(:llm) { DiscourseAi::Completions::Llm.proxy("custom:#{llm_model.id}") }
|
|
let(:progress_blk) { Proc.new {} }
|
|
|
|
fab!(:admin)
|
|
fab!(:user)
|
|
fab!(:category) { Fabricate(:category, name: "research-category") }
|
|
fab!(:tag_research) { Fabricate(:tag, name: "research") }
|
|
fab!(:tag_data) { Fabricate(:tag, name: "data") }
|
|
|
|
fab!(:topic_with_tags) { Fabricate(:topic, category: category, tags: [tag_research, tag_data]) }
|
|
fab!(:post) { Fabricate(:post, topic: topic_with_tags) }
|
|
fab!(:another_post) { Fabricate(:post) }
|
|
|
|
before { SiteSetting.ai_bot_enabled = true }
|
|
|
|
it "uses custom researcher_llm and applies token limits correctly" do
|
|
# Create a second LLM model to test the researcher_llm option
|
|
secondary_llm_model = Fabricate(:llm_model, name: "secondary_model")
|
|
|
|
# Create test content with long text to test token truncation
|
|
topic = Fabricate(:topic, category: category, tags: [tag_research])
|
|
long_content = "zz " * 100 # This will exceed our token limit
|
|
_test_post =
|
|
Fabricate(:post, topic: topic, raw: long_content, user: user, skip_validation: true)
|
|
|
|
prompts = nil
|
|
responses = [["Research completed"]]
|
|
researcher = nil
|
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(
|
|
responses,
|
|
llm: secondary_llm_model,
|
|
) do |_, _, _prompts|
|
|
researcher =
|
|
described_class.new(
|
|
{ filter: "category:research-category", goals: "analyze test content", dry_run: false },
|
|
persona_options: {
|
|
"researcher_llm" => secondary_llm_model.id,
|
|
"max_tokens_per_post" => 50, # Very small to force truncation
|
|
"max_tokens_per_batch" => 8000,
|
|
},
|
|
bot_user: bot_user,
|
|
llm: nil,
|
|
context: DiscourseAi::Personas::BotContext.new(user: user, post: post),
|
|
)
|
|
|
|
results = researcher.invoke(&progress_blk)
|
|
|
|
expect(results[:dry_run]).to eq(false)
|
|
expect(results[:results]).to be_present
|
|
|
|
prompts = _prompts
|
|
end
|
|
|
|
expect(prompts).to be_present
|
|
|
|
user_message = prompts.first.messages.find { |m| m[:type] == :user }
|
|
expect(user_message[:content]).to be_present
|
|
|
|
# count how many times the the "zz " appears in the content (a bit of token magic, we lose a couple cause we redact)
|
|
expect(user_message[:content].scan("zz ").count).to eq(48)
|
|
end
|
|
|
|
describe "#invoke" do
|
|
it "can correctly filter to a topic id" do
|
|
researcher =
|
|
described_class.new(
|
|
{ dry_run: true, filter: "topic:#{topic_with_tags.id}", goals: "analyze topic content" },
|
|
bot_user: bot_user,
|
|
llm: llm,
|
|
context: DiscourseAi::Personas::BotContext.new(user: user, post: post),
|
|
)
|
|
results = researcher.invoke(&progress_blk)
|
|
expect(results[:number_of_posts]).to eq(1)
|
|
end
|
|
|
|
it "returns filter information and result count" do
|
|
researcher =
|
|
described_class.new(
|
|
{ filter: "tag:research after:2023", goals: "analyze post patterns", dry_run: true },
|
|
bot_user: bot_user,
|
|
llm: llm,
|
|
context: DiscourseAi::Personas::BotContext.new(user: user, post: post),
|
|
)
|
|
|
|
results = researcher.invoke(&progress_blk)
|
|
|
|
expect(results[:filter]).to eq("tag:research after:2023")
|
|
expect(results[:goals]).to eq("analyze post patterns")
|
|
expect(results[:dry_run]).to eq(true)
|
|
expect(results[:number_of_posts]).to be > 0
|
|
expect(researcher.filter).to eq("tag:research after:2023")
|
|
expect(researcher.result_count).to be > 0
|
|
end
|
|
|
|
it "handles empty filters" do
|
|
researcher =
|
|
described_class.new({ goals: "analyze all content" }, bot_user: bot_user, llm: llm)
|
|
|
|
results = researcher.invoke(&progress_blk)
|
|
|
|
expect(results[:error]).to eq("No filter provided")
|
|
end
|
|
|
|
it "accepts max_results option" do
|
|
researcher =
|
|
described_class.new(
|
|
{ filter: "category:research-category" },
|
|
persona_options: {
|
|
"max_results" => "50",
|
|
},
|
|
bot_user: bot_user,
|
|
llm: llm,
|
|
)
|
|
|
|
expect(researcher.options[:max_results]).to eq(50)
|
|
end
|
|
|
|
it "returns error for invalid filter fragments" do
|
|
researcher =
|
|
described_class.new(
|
|
{ filter: "invalidfilter tag:research", goals: "analyze content" },
|
|
bot_user: bot_user,
|
|
llm: llm,
|
|
context: DiscourseAi::Personas::BotContext.new(user: user, post: post),
|
|
)
|
|
|
|
results = researcher.invoke(&progress_blk)
|
|
|
|
expect(results[:error]).to include("Invalid filter fragment")
|
|
end
|
|
|
|
it "returns correct results for non-dry-run with filtered posts" do
|
|
# Stage 2 topics, each with 2 posts
|
|
topics = Array.new(2) { Fabricate(:topic, category: category, tags: [tag_research]) }
|
|
topics.flat_map do |topic|
|
|
[
|
|
Fabricate(:post, topic: topic, raw: "Relevant content 1", user: user),
|
|
Fabricate(:post, topic: topic, raw: "Relevant content 2", user: admin),
|
|
]
|
|
end
|
|
|
|
# Filter to posts by user in research-category
|
|
researcher =
|
|
described_class.new(
|
|
{
|
|
filter: "category:research-category username:#{user.username}",
|
|
goals: "find relevant content",
|
|
dry_run: false,
|
|
},
|
|
bot_user: bot_user,
|
|
llm: llm,
|
|
context: DiscourseAi::Personas::BotContext.new(user: user, post: post),
|
|
)
|
|
|
|
responses = 10.times.map { |i| ["Found: Relevant content #{i + 1}"] }
|
|
results = nil
|
|
|
|
last_progress = nil
|
|
progress_blk = Proc.new { |response| last_progress = response }
|
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(responses) do
|
|
researcher.llm = llm_model.to_llm
|
|
results = researcher.invoke(&progress_blk)
|
|
end
|
|
|
|
expect(last_progress).to include("find relevant content")
|
|
expect(last_progress).to include("category:research-category")
|
|
|
|
expect(results[:dry_run]).to eq(false)
|
|
expect(results[:goals]).to eq("find relevant content")
|
|
expect(results[:filter]).to eq("category:research-category username:#{user.username}")
|
|
expect(results[:results].first).to include("Found: Relevant content 1")
|
|
end
|
|
end
|
|
end
|