mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-03-08 18:29:32 +00:00
Before this change, a summary was only outdated when new content appeared, for topics with "best replies", when the query returned different results. The intent behind this change is to detect when a summary is outdated as a result of an edit. Additionally, we are changing the backfill candidates query to compare "ai_summary_backfill_topic_max_age_days" against "last_posted_at" instead of "created_at", to catch long-lived, active topics. This was discussed here: https://meta.discourse.org/t/ai-summarization-backfill-is-stuck-keeps-regenerating-the-same-topic/347088/14?u=roman_rizzi
134 lines
5.0 KiB
Ruby
134 lines
5.0 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
RSpec.describe Jobs::SummariesBackfill do
|
|
fab!(:topic) do
|
|
Fabricate(:topic, word_count: 200, highest_post_number: 2, last_posted_at: 2.hours.ago)
|
|
end
|
|
let(:limit) { 24 } # guarantee two summaries per batch
|
|
let(:intervals) { 12 } # budget is split into intervals. Job runs every five minutes.
|
|
|
|
before do
|
|
assign_fake_provider_to(:ai_summarization_model)
|
|
SiteSetting.ai_summarization_enabled = true
|
|
SiteSetting.ai_summary_backfill_maximum_topics_per_hour = limit
|
|
SiteSetting.ai_summary_gists_enabled = true
|
|
end
|
|
|
|
describe "#current_budget" do
|
|
let(:type) { AiSummary.summary_types[:complete] }
|
|
|
|
context "when no summary has been backfilled yet" do
|
|
it "returns the full budget" do
|
|
expect(subject.current_budget(type)).to eq(limit / intervals)
|
|
end
|
|
|
|
it "ignores summaries generated by users" do
|
|
Fabricate(:ai_summary, target: topic, origin: AiSummary.origins[:human])
|
|
|
|
expect(subject.current_budget(type)).to eq(limit / intervals)
|
|
end
|
|
|
|
it "only accounts for summaries of the given type" do
|
|
Fabricate(:topic_ai_gist, target: topic, origin: AiSummary.origins[:human])
|
|
|
|
expect(subject.current_budget(type)).to eq(limit / intervals)
|
|
end
|
|
end
|
|
end
|
|
|
|
describe "#backfill_candidates" do
|
|
let(:type) { AiSummary.summary_types[:complete] }
|
|
|
|
it "only selects posts with enough words" do
|
|
topic.update!(word_count: 100)
|
|
|
|
expect(subject.backfill_candidates(type)).to be_empty
|
|
end
|
|
|
|
it "ignores up to date summaries" do
|
|
Fabricate(:ai_summary, target: topic, highest_target_number: 2, updated_at: 10.minutes.ago)
|
|
|
|
expect(subject.backfill_candidates(type)).to be_empty
|
|
end
|
|
|
|
it "ignores outdated summaries updated less than five minutes ago" do
|
|
Fabricate(:ai_summary, target: topic, highest_target_number: 1, updated_at: 4.minutes.ago)
|
|
|
|
expect(subject.backfill_candidates(type)).to be_empty
|
|
end
|
|
|
|
it "orders candidates by topic#last_posted_at" do
|
|
topic.update!(last_posted_at: 1.minute.ago)
|
|
topic_2 = Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago)
|
|
|
|
expect(subject.backfill_candidates(type).map(&:id)).to contain_exactly(topic.id, topic_2.id)
|
|
end
|
|
|
|
it "prioritizes topics without summaries" do
|
|
topic_2 =
|
|
Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago, highest_post_number: 1)
|
|
topic.update!(last_posted_at: 1.minute.ago)
|
|
Fabricate(:ai_summary, target: topic, updated_at: 1.hour.ago, highest_target_number: 1)
|
|
|
|
expect(subject.backfill_candidates(type).map(&:id)).to contain_exactly(topic_2.id, topic.id)
|
|
end
|
|
|
|
it "respects max age setting" do
|
|
SiteSetting.ai_summary_backfill_topic_max_age_days = 1
|
|
topic.update!(last_posted_at: 2.days.ago)
|
|
|
|
expect(subject.backfill_candidates(type)).to be_empty
|
|
end
|
|
end
|
|
|
|
describe "#execute" do
|
|
it "backfills a batch" do
|
|
topic_2 =
|
|
Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago, highest_post_number: 1)
|
|
topic.update!(last_posted_at: 1.minute.ago)
|
|
Fabricate(:ai_summary, target: topic, updated_at: 3.hours.ago, highest_target_number: 1)
|
|
Fabricate(:topic_ai_gist, target: topic, updated_at: 3.hours.ago, highest_target_number: 1)
|
|
|
|
summary_1 = "Summary of topic_2"
|
|
gist_1 = "Gist of topic_2"
|
|
summary_2 = "Updated summary of topic"
|
|
gist_2 = "Updated gist of topic"
|
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(
|
|
[gist_1, gist_2, summary_1, summary_2],
|
|
) { subject.execute({}) }
|
|
|
|
expect(AiSummary.complete.find_by(target: topic_2).summarized_text).to eq(summary_1)
|
|
expect(AiSummary.gist.find_by(target: topic_2).summarized_text).to eq(gist_1)
|
|
expect(AiSummary.complete.find_by(target: topic).summarized_text).to eq(summary_2)
|
|
expect(AiSummary.gist.find_by(target: topic).summarized_text).to eq(gist_2)
|
|
|
|
# Queue has to be empty if we just generated all summaries
|
|
expect(subject.backfill_candidates(AiSummary.summary_types[:complete])).to be_empty
|
|
expect(subject.backfill_candidates(AiSummary.summary_types[:gist])).to be_empty
|
|
|
|
# Queue still empty when they are up to date and time passes.
|
|
AiSummary.update_all(updated_at: 20.minutes.ago)
|
|
expect(subject.backfill_candidates(AiSummary.summary_types[:complete])).to be_empty
|
|
expect(subject.backfill_candidates(AiSummary.summary_types[:gist])).to be_empty
|
|
end
|
|
|
|
it "updates the highest_target_number if the summary turned to be up to date" do
|
|
og_highest_post_number = topic.highest_post_number
|
|
existing_summary =
|
|
Fabricate(
|
|
:ai_summary,
|
|
target: topic,
|
|
updated_at: 3.hours.ago,
|
|
highest_target_number: og_highest_post_number,
|
|
)
|
|
topic.update!(highest_post_number: og_highest_post_number + 1)
|
|
|
|
# No prepared responses here. We don't perform a completion call.
|
|
subject.execute({})
|
|
|
|
expect(existing_summary.reload.highest_target_number).to eq(og_highest_post_number + 1)
|
|
end
|
|
end
|
|
end
|