mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-06-26 09:32:40 +00:00
FEATURE: Changes to summaries' outdated logic. (#1108)
Before this change, a summary was only outdated when new content appeared, for topics with "best replies", when the query returned different results. The intent behind this change is to detect when a summary is outdated as a result of an edit. Additionally, we are changing the backfill candidates query to compare "ai_summary_backfill_topic_max_age_days" against "last_posted_at" instead of "created_at", to catch long-lived, active topics. This was discussed here: https://meta.discourse.org/t/ai-summarization-backfill-is-stuck-keeps-regenerating-the-same-topic/347088/14?u=roman_rizzi
This commit is contained in:
parent
d3b93f984d
commit
1b1b44353b
@ -57,7 +57,7 @@ module ::Jobs
|
|||||||
ais.target_type = 'Topic' AND
|
ais.target_type = 'Topic' AND
|
||||||
ais.summary_type = '#{summary_type}'
|
ais.summary_type = '#{summary_type}'
|
||||||
SQL
|
SQL
|
||||||
.where("topics.created_at > current_timestamp - INTERVAL '#{max_age_days.to_i} DAY'")
|
.where("topics.last_posted_at > current_timestamp - INTERVAL '#{max_age_days.to_i} DAY'")
|
||||||
.where(
|
.where(
|
||||||
<<~SQL, # (1..1) gets stored ad (1..2).
|
<<~SQL, # (1..1) gets stored ad (1..2).
|
||||||
ais.id IS NULL OR (
|
ais.id IS NULL OR (
|
||||||
|
@ -58,8 +58,9 @@ module DiscourseAi
|
|||||||
if summary
|
if summary
|
||||||
@existing_summary = summary
|
@existing_summary = summary
|
||||||
|
|
||||||
if existing_summary.original_content_sha != latest_sha
|
if summary.original_content_sha != latest_sha ||
|
||||||
@existing_summary.mark_as_outdated
|
content_to_summarize.any? { |cts| cts[:last_version_at] > summary.updated_at }
|
||||||
|
summary.mark_as_outdated
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -23,8 +23,8 @@ module DiscourseAi
|
|||||||
.where("chat_messages.created_at > ?", since.hours.ago)
|
.where("chat_messages.created_at > ?", since.hours.ago)
|
||||||
.includes(:user)
|
.includes(:user)
|
||||||
.order(created_at: :asc)
|
.order(created_at: :asc)
|
||||||
.pluck(:id, :username_lower, :message)
|
.pluck(:id, :username_lower, :message, :updated_at)
|
||||||
.map { { id: _1, poster: _2, text: _3 } }
|
.map { { id: _1, poster: _2, text: _3, last_version_at: _4 } }
|
||||||
end
|
end
|
||||||
|
|
||||||
def summary_extension_prompt(summary, contents)
|
def summary_extension_prompt(summary, contents)
|
||||||
|
@ -49,16 +49,16 @@ module DiscourseAi
|
|||||||
.joins(:user)
|
.joins(:user)
|
||||||
.where("post_number IN (?)", recent_hot_posts << op_post_number)
|
.where("post_number IN (?)", recent_hot_posts << op_post_number)
|
||||||
.order(:post_number)
|
.order(:post_number)
|
||||||
.pluck(:post_number, :raw, :username)
|
.pluck(:post_number, :raw, :username, :last_version_at)
|
||||||
|
|
||||||
posts_data.reduce([]) do |memo, (pn, raw, username)|
|
posts_data.reduce([]) do |memo, (pn, raw, username, last_version_at)|
|
||||||
raw_text = raw
|
raw_text = raw
|
||||||
|
|
||||||
if pn == 1 && target.topic_embed&.embed_content_cache.present?
|
if pn == 1 && target.topic_embed&.embed_content_cache.present?
|
||||||
raw_text = target.topic_embed&.embed_content_cache
|
raw_text = target.topic_embed&.embed_content_cache
|
||||||
end
|
end
|
||||||
|
|
||||||
memo << { poster: username, id: pn, text: raw_text }
|
memo << { poster: username, id: pn, text: raw_text, last_version_at: last_version_at }
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -18,16 +18,17 @@ module DiscourseAi
|
|||||||
:post_number,
|
:post_number,
|
||||||
:raw,
|
:raw,
|
||||||
:username,
|
:username,
|
||||||
|
:last_version_at,
|
||||||
)
|
)
|
||||||
|
|
||||||
posts_data.reduce([]) do |memo, (pn, raw, username)|
|
posts_data.reduce([]) do |memo, (pn, raw, username, last_version_at)|
|
||||||
raw_text = raw
|
raw_text = raw
|
||||||
|
|
||||||
if pn == 1 && target.topic_embed&.embed_content_cache.present?
|
if pn == 1 && target.topic_embed&.embed_content_cache.present?
|
||||||
raw_text = target.topic_embed&.embed_content_cache
|
raw_text = target.topic_embed&.embed_content_cache
|
||||||
end
|
end
|
||||||
|
|
||||||
memo << { poster: username, id: pn, text: raw_text }
|
memo << { poster: username, id: pn, text: raw_text, last_version_at: last_version_at }
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
RSpec.describe Jobs::SummariesBackfill do
|
RSpec.describe Jobs::SummariesBackfill do
|
||||||
fab!(:topic) { Fabricate(:topic, word_count: 200, highest_post_number: 2) }
|
fab!(:topic) do
|
||||||
|
Fabricate(:topic, word_count: 200, highest_post_number: 2, last_posted_at: 2.hours.ago)
|
||||||
|
end
|
||||||
let(:limit) { 24 } # guarantee two summaries per batch
|
let(:limit) { 24 } # guarantee two summaries per batch
|
||||||
let(:intervals) { 12 } # budget is split into intervals. Job runs every five minutes.
|
let(:intervals) { 12 } # budget is split into intervals. Job runs every five minutes.
|
||||||
|
|
||||||
@ -73,7 +75,7 @@ RSpec.describe Jobs::SummariesBackfill do
|
|||||||
|
|
||||||
it "respects max age setting" do
|
it "respects max age setting" do
|
||||||
SiteSetting.ai_summary_backfill_topic_max_age_days = 1
|
SiteSetting.ai_summary_backfill_topic_max_age_days = 1
|
||||||
topic.update!(created_at: 2.days.ago)
|
topic.update!(last_posted_at: 2.days.ago)
|
||||||
|
|
||||||
expect(subject.backfill_candidates(type)).to be_empty
|
expect(subject.backfill_candidates(type)).to be_empty
|
||||||
end
|
end
|
||||||
@ -112,14 +114,14 @@ RSpec.describe Jobs::SummariesBackfill do
|
|||||||
end
|
end
|
||||||
|
|
||||||
it "updates the highest_target_number if the summary turned to be up to date" do
|
it "updates the highest_target_number if the summary turned to be up to date" do
|
||||||
|
og_highest_post_number = topic.highest_post_number
|
||||||
existing_summary =
|
existing_summary =
|
||||||
Fabricate(
|
Fabricate(
|
||||||
:ai_summary,
|
:ai_summary,
|
||||||
target: topic,
|
target: topic,
|
||||||
updated_at: 3.hours.ago,
|
updated_at: 3.hours.ago,
|
||||||
highest_target_number: topic.highest_post_number,
|
highest_target_number: og_highest_post_number,
|
||||||
)
|
)
|
||||||
og_highest_post_number = topic.highest_post_number
|
|
||||||
topic.update!(highest_post_number: og_highest_post_number + 1)
|
topic.update!(highest_post_number: og_highest_post_number + 1)
|
||||||
|
|
||||||
# No prepared responses here. We don't perform a completion call.
|
# No prepared responses here. We don't perform a completion call.
|
||||||
|
@ -3,15 +3,15 @@
|
|||||||
RSpec.describe DiscourseAi::Summarization::FoldContent do
|
RSpec.describe DiscourseAi::Summarization::FoldContent do
|
||||||
subject(:summarizer) { DiscourseAi::Summarization.topic_summary(topic) }
|
subject(:summarizer) { DiscourseAi::Summarization.topic_summary(topic) }
|
||||||
|
|
||||||
|
let!(:llm_model) { assign_fake_provider_to(:ai_summarization_model) }
|
||||||
|
|
||||||
|
fab!(:topic) { Fabricate(:topic, highest_post_number: 2) }
|
||||||
|
fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1, raw: "This is a text") }
|
||||||
|
|
||||||
|
before { SiteSetting.ai_summarization_enabled = true }
|
||||||
|
|
||||||
describe "#summarize" do
|
describe "#summarize" do
|
||||||
let!(:llm_model) { assign_fake_provider_to(:ai_summarization_model) }
|
|
||||||
|
|
||||||
fab!(:topic) { Fabricate(:topic, highest_post_number: 2) }
|
|
||||||
fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1, raw: "This is a text") }
|
|
||||||
|
|
||||||
before do
|
before do
|
||||||
SiteSetting.ai_summarization_enabled = true
|
|
||||||
|
|
||||||
# Make sure each content fits in a single chunk.
|
# Make sure each content fits in a single chunk.
|
||||||
# 700 is the number of tokens reserved for the prompt.
|
# 700 is the number of tokens reserved for the prompt.
|
||||||
model_tokens =
|
model_tokens =
|
||||||
@ -52,4 +52,38 @@ RSpec.describe DiscourseAi::Summarization::FoldContent do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
describe "#existing_summary" do
|
||||||
|
context "when a summary already exists" do
|
||||||
|
fab!(:ai_summary) do
|
||||||
|
Fabricate(
|
||||||
|
:ai_summary,
|
||||||
|
target: topic,
|
||||||
|
highest_target_number: topic.highest_post_number,
|
||||||
|
original_content_sha: AiSummary.build_sha("1"),
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "doesn't mark it as outdated" do
|
||||||
|
expect(summarizer.existing_summary.outdated).to eq(false)
|
||||||
|
end
|
||||||
|
|
||||||
|
context "when it's outdated because there are new targets" do
|
||||||
|
before { Fabricate(:post, topic: topic, post_number: 2, raw: "This is a text") }
|
||||||
|
|
||||||
|
it "marks it as outdated" do
|
||||||
|
expect(summarizer.existing_summary.outdated).to eq(true)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "when it's outdated because existing content changes" do
|
||||||
|
it "marks it as outdated" do
|
||||||
|
ai_summary.update!(updated_at: 20.minutes.ago)
|
||||||
|
post_1.update!(last_version_at: 5.minutes.ago)
|
||||||
|
|
||||||
|
expect(summarizer.existing_summary.outdated).to eq(true)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
Loading…
x
Reference in New Issue
Block a user