mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-03-09 11:48:47 +00:00
This change introduces a job to summarize topics and cache the results automatically. We provide a setting to control how many topics we'll backfill per hour and what the topic's minimum word count is to qualify. We'll prioritize topics without summary over outdated ones.
104 lines
3.3 KiB
Ruby
104 lines
3.3 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
RSpec.describe Jobs::SummariesBackfill do
|
|
fab!(:topic) { Fabricate(:topic, word_count: 200, highest_post_number: 2) }
|
|
let(:limit) { 24 } # guarantee two summaries per batch
|
|
|
|
before do
|
|
assign_fake_provider_to(:ai_summarization_model)
|
|
SiteSetting.ai_summarization_enabled = true
|
|
SiteSetting.ai_summary_backfill_maximum_topics_per_hour = limit
|
|
end
|
|
|
|
describe "#current_budget" do
|
|
context "when no summary has been backfilled yet" do
|
|
it "returns the full budget" do
|
|
expect(subject.current_budget).to eq(limit)
|
|
end
|
|
|
|
it "ignores summaries generated by users" do
|
|
Fabricate(:ai_summary, target: topic, origin: AiSummary.origins[:human])
|
|
|
|
expect(subject.current_budget).to eq(limit)
|
|
end
|
|
|
|
it "only accounts for complete type summaries" do
|
|
Fabricate(:topic_ai_gist, target: topic, origin: AiSummary.origins[:human])
|
|
|
|
expect(subject.current_budget).to eq(limit)
|
|
end
|
|
end
|
|
|
|
context "when we already backfilled stuff" do
|
|
fab!(:backfilled_summary) do
|
|
Fabricate(:ai_summary, target: topic, origin: AiSummary.origins[:system])
|
|
end
|
|
|
|
context "if it was within the budget window" do
|
|
it "reduces our budget" do
|
|
expect(subject.current_budget).to eq(limit - 1)
|
|
end
|
|
end
|
|
|
|
context "if it wasn't within the budget window" do
|
|
before { freeze_time(2.hours.from_now) }
|
|
|
|
it "returns the full budget" do
|
|
freeze_time(2.hours.from_now)
|
|
|
|
expect(subject.current_budget).to eq(limit)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
describe "#backfill_candidates" do
|
|
it "only selects posts with enough words" do
|
|
topic.update!(word_count: 100)
|
|
|
|
expect(subject.backfill_candidates).to be_empty
|
|
end
|
|
|
|
it "ignores up to date summaries" do
|
|
Fabricate(:ai_summary, target: topic, content_range: (1..2))
|
|
|
|
expect(subject.backfill_candidates).to be_empty
|
|
end
|
|
|
|
it "orders candidates by topic#last_posted_at" do
|
|
topic.update!(last_posted_at: 1.minute.ago)
|
|
topic_2 = Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago)
|
|
|
|
expect(subject.backfill_candidates.map(&:id)).to contain_exactly(topic.id, topic_2.id)
|
|
end
|
|
|
|
it "prioritizes topics without summaries" do
|
|
topic_2 =
|
|
Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago, highest_post_number: 1)
|
|
topic.update!(last_posted_at: 1.minute.ago)
|
|
Fabricate(:ai_summary, target: topic, content_range: (1..1))
|
|
|
|
expect(subject.backfill_candidates.map(&:id)).to contain_exactly(topic_2.id, topic.id)
|
|
end
|
|
end
|
|
|
|
describe "#execute" do
|
|
it "backfills a batch" do
|
|
topic_2 =
|
|
Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago, highest_post_number: 1)
|
|
topic.update!(last_posted_at: 1.minute.ago)
|
|
Fabricate(:ai_summary, target: topic, created_at: 3.hours.ago, content_range: (1..1))
|
|
|
|
summary_1 = "Summary of topic_2"
|
|
summary_2 = "Summary of topic"
|
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses([summary_1, summary_2]) do
|
|
subject.execute({})
|
|
end
|
|
|
|
expect(AiSummary.find_by(target: topic_2).summarized_text).to eq(summary_1)
|
|
expect(AiSummary.find_by(target: topic).summarized_text).to eq(summary_2)
|
|
end
|
|
end
|
|
end
|