mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-10-24 02:58:39 +00:00
Before this change, a summary was only outdated when new content appeared, for topics with "best replies", when the query returned different results. The intent behind this change is to detect when a summary is outdated as a result of an edit. Additionally, we are changing the backfill candidates query to compare "ai_summary_backfill_topic_max_age_days" against "last_posted_at" instead of "created_at", to catch long-lived, active topics. This was discussed here: https://meta.discourse.org/t/ai-summarization-backfill-is-stuck-keeps-regenerating-the-same-topic/347088/14?u=roman_rizzi
90 lines
2.9 KiB
Ruby
90 lines
2.9 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
RSpec.describe DiscourseAi::Summarization::FoldContent do
|
|
subject(:summarizer) { DiscourseAi::Summarization.topic_summary(topic) }
|
|
|
|
let!(:llm_model) { assign_fake_provider_to(:ai_summarization_model) }
|
|
|
|
fab!(:topic) { Fabricate(:topic, highest_post_number: 2) }
|
|
fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1, raw: "This is a text") }
|
|
|
|
before { SiteSetting.ai_summarization_enabled = true }
|
|
|
|
describe "#summarize" do
|
|
before do
|
|
# Make sure each content fits in a single chunk.
|
|
# 700 is the number of tokens reserved for the prompt.
|
|
model_tokens =
|
|
700 +
|
|
DiscourseAi::Tokenizer::OpenAiTokenizer.size(
|
|
"(1 #{post_1.user.username_lower} said: This is a text ",
|
|
) + 3
|
|
|
|
llm_model.update!(max_prompt_tokens: model_tokens)
|
|
end
|
|
|
|
let(:single_summary) { "single" }
|
|
let(:concatenated_summary) { "this is a concatenated summary" }
|
|
|
|
fab!(:user)
|
|
|
|
context "when the content to summarize fits in a single call" do
|
|
it "does one call to summarize content" do
|
|
result =
|
|
DiscourseAi::Completions::Llm.with_prepared_responses([single_summary]) do |spy|
|
|
summarizer.summarize(user).tap { expect(spy.completions).to eq(1) }
|
|
end
|
|
|
|
expect(result.summarized_text).to eq(single_summary)
|
|
end
|
|
end
|
|
|
|
context "when the content to summarize doesn't fit in a single call" do
|
|
fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2, raw: "This is a text") }
|
|
|
|
it "keeps extending the summary until there is nothing else to process" do
|
|
result =
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(
|
|
[single_summary, concatenated_summary],
|
|
) { |spy| summarizer.summarize(user).tap { expect(spy.completions).to eq(2) } }
|
|
|
|
expect(result.summarized_text).to eq(concatenated_summary)
|
|
end
|
|
end
|
|
end
|
|
|
|
describe "#existing_summary" do
|
|
context "when a summary already exists" do
|
|
fab!(:ai_summary) do
|
|
Fabricate(
|
|
:ai_summary,
|
|
target: topic,
|
|
highest_target_number: topic.highest_post_number,
|
|
original_content_sha: AiSummary.build_sha("1"),
|
|
)
|
|
end
|
|
|
|
it "doesn't mark it as outdated" do
|
|
expect(summarizer.existing_summary.outdated).to eq(false)
|
|
end
|
|
|
|
context "when it's outdated because there are new targets" do
|
|
before { Fabricate(:post, topic: topic, post_number: 2, raw: "This is a text") }
|
|
|
|
it "marks it as outdated" do
|
|
expect(summarizer.existing_summary.outdated).to eq(true)
|
|
end
|
|
end
|
|
|
|
context "when it's outdated because existing content changes" do
|
|
it "marks it as outdated" do
|
|
ai_summary.update!(updated_at: 20.minutes.ago)
|
|
post_1.update!(last_version_at: 5.minutes.ago)
|
|
|
|
expect(summarizer.existing_summary.outdated).to eq(true)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|