mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-02-14 15:34:42 +00:00
FEATURE: Changes to summaries' outdated logic. (#1108)
Before this change, a summary was only outdated when new content appeared, for topics with "best replies", when the query returned different results. The intent behind this change is to detect when a summary is outdated as a result of an edit. Additionally, we are changing the backfill candidates query to compare "ai_summary_backfill_topic_max_age_days" against "last_posted_at" instead of "created_at", to catch long-lived, active topics. This was discussed here: https://meta.discourse.org/t/ai-summarization-backfill-is-stuck-keeps-regenerating-the-same-topic/347088/14?u=roman_rizzi
This commit is contained in:
parent
d3b93f984d
commit
1b1b44353b
@ -57,7 +57,7 @@ module ::Jobs
|
||||
ais.target_type = 'Topic' AND
|
||||
ais.summary_type = '#{summary_type}'
|
||||
SQL
|
||||
.where("topics.created_at > current_timestamp - INTERVAL '#{max_age_days.to_i} DAY'")
|
||||
.where("topics.last_posted_at > current_timestamp - INTERVAL '#{max_age_days.to_i} DAY'")
|
||||
.where(
|
||||
<<~SQL, # (1..1) gets stored ad (1..2).
|
||||
ais.id IS NULL OR (
|
||||
|
@ -58,8 +58,9 @@ module DiscourseAi
|
||||
if summary
|
||||
@existing_summary = summary
|
||||
|
||||
if existing_summary.original_content_sha != latest_sha
|
||||
@existing_summary.mark_as_outdated
|
||||
if summary.original_content_sha != latest_sha ||
|
||||
content_to_summarize.any? { |cts| cts[:last_version_at] > summary.updated_at }
|
||||
summary.mark_as_outdated
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -23,8 +23,8 @@ module DiscourseAi
|
||||
.where("chat_messages.created_at > ?", since.hours.ago)
|
||||
.includes(:user)
|
||||
.order(created_at: :asc)
|
||||
.pluck(:id, :username_lower, :message)
|
||||
.map { { id: _1, poster: _2, text: _3 } }
|
||||
.pluck(:id, :username_lower, :message, :updated_at)
|
||||
.map { { id: _1, poster: _2, text: _3, last_version_at: _4 } }
|
||||
end
|
||||
|
||||
def summary_extension_prompt(summary, contents)
|
||||
|
@ -49,16 +49,16 @@ module DiscourseAi
|
||||
.joins(:user)
|
||||
.where("post_number IN (?)", recent_hot_posts << op_post_number)
|
||||
.order(:post_number)
|
||||
.pluck(:post_number, :raw, :username)
|
||||
.pluck(:post_number, :raw, :username, :last_version_at)
|
||||
|
||||
posts_data.reduce([]) do |memo, (pn, raw, username)|
|
||||
posts_data.reduce([]) do |memo, (pn, raw, username, last_version_at)|
|
||||
raw_text = raw
|
||||
|
||||
if pn == 1 && target.topic_embed&.embed_content_cache.present?
|
||||
raw_text = target.topic_embed&.embed_content_cache
|
||||
end
|
||||
|
||||
memo << { poster: username, id: pn, text: raw_text }
|
||||
memo << { poster: username, id: pn, text: raw_text, last_version_at: last_version_at }
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -18,16 +18,17 @@ module DiscourseAi
|
||||
:post_number,
|
||||
:raw,
|
||||
:username,
|
||||
:last_version_at,
|
||||
)
|
||||
|
||||
posts_data.reduce([]) do |memo, (pn, raw, username)|
|
||||
posts_data.reduce([]) do |memo, (pn, raw, username, last_version_at)|
|
||||
raw_text = raw
|
||||
|
||||
if pn == 1 && target.topic_embed&.embed_content_cache.present?
|
||||
raw_text = target.topic_embed&.embed_content_cache
|
||||
end
|
||||
|
||||
memo << { poster: username, id: pn, text: raw_text }
|
||||
memo << { poster: username, id: pn, text: raw_text, last_version_at: last_version_at }
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -1,7 +1,9 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe Jobs::SummariesBackfill do
|
||||
fab!(:topic) { Fabricate(:topic, word_count: 200, highest_post_number: 2) }
|
||||
fab!(:topic) do
|
||||
Fabricate(:topic, word_count: 200, highest_post_number: 2, last_posted_at: 2.hours.ago)
|
||||
end
|
||||
let(:limit) { 24 } # guarantee two summaries per batch
|
||||
let(:intervals) { 12 } # budget is split into intervals. Job runs every five minutes.
|
||||
|
||||
@ -73,7 +75,7 @@ RSpec.describe Jobs::SummariesBackfill do
|
||||
|
||||
it "respects max age setting" do
|
||||
SiteSetting.ai_summary_backfill_topic_max_age_days = 1
|
||||
topic.update!(created_at: 2.days.ago)
|
||||
topic.update!(last_posted_at: 2.days.ago)
|
||||
|
||||
expect(subject.backfill_candidates(type)).to be_empty
|
||||
end
|
||||
@ -112,14 +114,14 @@ RSpec.describe Jobs::SummariesBackfill do
|
||||
end
|
||||
|
||||
it "updates the highest_target_number if the summary turned to be up to date" do
|
||||
og_highest_post_number = topic.highest_post_number
|
||||
existing_summary =
|
||||
Fabricate(
|
||||
:ai_summary,
|
||||
target: topic,
|
||||
updated_at: 3.hours.ago,
|
||||
highest_target_number: topic.highest_post_number,
|
||||
highest_target_number: og_highest_post_number,
|
||||
)
|
||||
og_highest_post_number = topic.highest_post_number
|
||||
topic.update!(highest_post_number: og_highest_post_number + 1)
|
||||
|
||||
# No prepared responses here. We don't perform a completion call.
|
||||
|
@ -3,15 +3,15 @@
|
||||
RSpec.describe DiscourseAi::Summarization::FoldContent do
|
||||
subject(:summarizer) { DiscourseAi::Summarization.topic_summary(topic) }
|
||||
|
||||
let!(:llm_model) { assign_fake_provider_to(:ai_summarization_model) }
|
||||
|
||||
fab!(:topic) { Fabricate(:topic, highest_post_number: 2) }
|
||||
fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1, raw: "This is a text") }
|
||||
|
||||
before { SiteSetting.ai_summarization_enabled = true }
|
||||
|
||||
describe "#summarize" do
|
||||
let!(:llm_model) { assign_fake_provider_to(:ai_summarization_model) }
|
||||
|
||||
fab!(:topic) { Fabricate(:topic, highest_post_number: 2) }
|
||||
fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1, raw: "This is a text") }
|
||||
|
||||
before do
|
||||
SiteSetting.ai_summarization_enabled = true
|
||||
|
||||
# Make sure each content fits in a single chunk.
|
||||
# 700 is the number of tokens reserved for the prompt.
|
||||
model_tokens =
|
||||
@ -52,4 +52,38 @@ RSpec.describe DiscourseAi::Summarization::FoldContent do
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "#existing_summary" do
|
||||
context "when a summary already exists" do
|
||||
fab!(:ai_summary) do
|
||||
Fabricate(
|
||||
:ai_summary,
|
||||
target: topic,
|
||||
highest_target_number: topic.highest_post_number,
|
||||
original_content_sha: AiSummary.build_sha("1"),
|
||||
)
|
||||
end
|
||||
|
||||
it "doesn't mark it as outdated" do
|
||||
expect(summarizer.existing_summary.outdated).to eq(false)
|
||||
end
|
||||
|
||||
context "when it's outdated because there are new targets" do
|
||||
before { Fabricate(:post, topic: topic, post_number: 2, raw: "This is a text") }
|
||||
|
||||
it "marks it as outdated" do
|
||||
expect(summarizer.existing_summary.outdated).to eq(true)
|
||||
end
|
||||
end
|
||||
|
||||
context "when it's outdated because existing content changes" do
|
||||
it "marks it as outdated" do
|
||||
ai_summary.update!(updated_at: 20.minutes.ago)
|
||||
post_1.update!(last_version_at: 5.minutes.ago)
|
||||
|
||||
expect(summarizer.existing_summary.outdated).to eq(true)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
Loading…
x
Reference in New Issue
Block a user