discourse-ai/spec/lib/utils/research/llm_formatter_spec.rb
Sam c34fcc8a95
FEATURE: forum researcher persona for deep research (#1313)
This commit introduces a new Forum Researcher persona specialized in deep forum content analysis along with comprehensive improvements to our AI infrastructure.

Key additions:

    New Forum Researcher persona with advanced filtering and analysis capabilities
    Robust filtering system supporting tags, categories, dates, users, and keywords
    LLM formatter to efficiently process and chunk research results

Infrastructure improvements:

    Implemented CancelManager class to centrally manage AI completion cancellations
    Replaced callback-based cancellation with a more robust pattern
    Added systematic cancellation monitoring with callbacks

Other improvements:

    Added configurable default_enabled flag to control which personas are enabled by default
    Updated translation strings for the new researcher functionality
    Added comprehensive specs for the new components

    Renames Researcher -> Web Researcher

This change makes our AI platform more stable while adding powerful research capabilities that can analyze forum trends and surface relevant content.
2025-05-14 12:36:16 +10:00

75 lines
2.5 KiB
Ruby

# frozen_string_literal: true
#
describe DiscourseAi::Utils::Research::LlmFormatter do
fab!(:user) { Fabricate(:user, username: "test_user") }
fab!(:topic) { Fabricate(:topic, title: "This is a Test Topic", user: user) }
fab!(:post) { Fabricate(:post, topic: topic, user: user) }
let(:tokenizer) { DiscourseAi::Tokenizer::OpenAiTokenizer }
let(:filter) { DiscourseAi::Utils::Research::Filter.new("@#{user.username}") }
describe "#truncate_if_needed" do
it "returns original content when under token limit" do
formatter =
described_class.new(
filter,
max_tokens_per_batch: 1000,
tokenizer: tokenizer,
max_tokens_per_post: 100,
)
short_text = "This is a short post"
expect(formatter.send(:truncate_if_needed, short_text)).to eq(short_text)
end
it "truncates content when over token limit" do
# Create a post with content that will exceed our token limit
long_text = ("word " * 200).strip
formatter =
described_class.new(
filter,
max_tokens_per_batch: 1000,
tokenizer: tokenizer,
max_tokens_per_post: 50,
)
truncated = formatter.send(:truncate_if_needed, long_text)
expect(truncated).to include("... elided 150 tokens ...")
expect(truncated).to_not eq(long_text)
# Should have roughly 25 words before and 25 after (half of max_tokens_per_post)
first_chunk = truncated.split("\n\n")[0]
expect(first_chunk.split(" ").length).to be_within(5).of(25)
last_chunk = truncated.split("\n\n")[2]
expect(last_chunk.split(" ").length).to be_within(5).of(25)
end
end
describe "#format_post" do
it "formats posts with truncation for long content" do
# Set up a post with long content
long_content = ("word " * 200).strip
long_post = Fabricate(:post, raw: long_content, topic: topic, user: user)
formatter =
described_class.new(
filter,
max_tokens_per_batch: 1000,
tokenizer: tokenizer,
max_tokens_per_post: 50,
)
formatted = formatter.send(:format_post, long_post)
# Should have standard formatting elements
expect(formatted).to include("## Post by #{user.username}")
expect(formatted).to include("Post url: /t/-/#{long_post.topic_id}/#{long_post.post_number}")
# Should include truncation marker
expect(formatted).to include("... elided 150 tokens ...")
end
end
end