diff --git a/app/jobs/scheduled/summaries_backfill.rb b/app/jobs/scheduled/summaries_backfill.rb index 10dcc719..f6114d24 100644 --- a/app/jobs/scheduled/summaries_backfill.rb +++ b/app/jobs/scheduled/summaries_backfill.rb @@ -57,7 +57,7 @@ module ::Jobs ais.target_type = 'Topic' AND ais.summary_type = '#{summary_type}' SQL - .where("topics.created_at > current_timestamp - INTERVAL '#{max_age_days.to_i} DAY'") + .where("topics.last_posted_at > current_timestamp - INTERVAL '#{max_age_days.to_i} DAY'") .where( <<~SQL, # (1..1) gets stored ad (1..2). ais.id IS NULL OR ( diff --git a/lib/summarization/fold_content.rb b/lib/summarization/fold_content.rb index 831d311e..7cacc8c0 100644 --- a/lib/summarization/fold_content.rb +++ b/lib/summarization/fold_content.rb @@ -58,8 +58,9 @@ module DiscourseAi if summary @existing_summary = summary - if existing_summary.original_content_sha != latest_sha - @existing_summary.mark_as_outdated + if summary.original_content_sha != latest_sha || + content_to_summarize.any? { |cts| cts[:last_version_at] > summary.updated_at } + summary.mark_as_outdated end end end diff --git a/lib/summarization/strategies/chat_messages.rb b/lib/summarization/strategies/chat_messages.rb index 2f073d71..20163464 100644 --- a/lib/summarization/strategies/chat_messages.rb +++ b/lib/summarization/strategies/chat_messages.rb @@ -23,8 +23,8 @@ module DiscourseAi .where("chat_messages.created_at > ?", since.hours.ago) .includes(:user) .order(created_at: :asc) - .pluck(:id, :username_lower, :message) - .map { { id: _1, poster: _2, text: _3 } } + .pluck(:id, :username_lower, :message, :updated_at) + .map { { id: _1, poster: _2, text: _3, last_version_at: _4 } } end def summary_extension_prompt(summary, contents) diff --git a/lib/summarization/strategies/hot_topic_gists.rb b/lib/summarization/strategies/hot_topic_gists.rb index b3e88876..24835817 100644 --- a/lib/summarization/strategies/hot_topic_gists.rb +++ b/lib/summarization/strategies/hot_topic_gists.rb @@ -49,16 +49,16 @@ module DiscourseAi .joins(:user) .where("post_number IN (?)", recent_hot_posts << op_post_number) .order(:post_number) - .pluck(:post_number, :raw, :username) + .pluck(:post_number, :raw, :username, :last_version_at) - posts_data.reduce([]) do |memo, (pn, raw, username)| + posts_data.reduce([]) do |memo, (pn, raw, username, last_version_at)| raw_text = raw if pn == 1 && target.topic_embed&.embed_content_cache.present? raw_text = target.topic_embed&.embed_content_cache end - memo << { poster: username, id: pn, text: raw_text } + memo << { poster: username, id: pn, text: raw_text, last_version_at: last_version_at } end end diff --git a/lib/summarization/strategies/topic_summary.rb b/lib/summarization/strategies/topic_summary.rb index e97c391a..4dfca68c 100644 --- a/lib/summarization/strategies/topic_summary.rb +++ b/lib/summarization/strategies/topic_summary.rb @@ -18,16 +18,17 @@ module DiscourseAi :post_number, :raw, :username, + :last_version_at, ) - posts_data.reduce([]) do |memo, (pn, raw, username)| + posts_data.reduce([]) do |memo, (pn, raw, username, last_version_at)| raw_text = raw if pn == 1 && target.topic_embed&.embed_content_cache.present? raw_text = target.topic_embed&.embed_content_cache end - memo << { poster: username, id: pn, text: raw_text } + memo << { poster: username, id: pn, text: raw_text, last_version_at: last_version_at } end end diff --git a/spec/jobs/scheduled/summaries_backfill_spec.rb b/spec/jobs/scheduled/summaries_backfill_spec.rb index 401f4d20..ccf23b06 100644 --- a/spec/jobs/scheduled/summaries_backfill_spec.rb +++ b/spec/jobs/scheduled/summaries_backfill_spec.rb @@ -1,7 +1,9 @@ # frozen_string_literal: true RSpec.describe Jobs::SummariesBackfill do - fab!(:topic) { Fabricate(:topic, word_count: 200, highest_post_number: 2) } + fab!(:topic) do + Fabricate(:topic, word_count: 200, highest_post_number: 2, last_posted_at: 2.hours.ago) + end let(:limit) { 24 } # guarantee two summaries per batch let(:intervals) { 12 } # budget is split into intervals. Job runs every five minutes. @@ -73,7 +75,7 @@ RSpec.describe Jobs::SummariesBackfill do it "respects max age setting" do SiteSetting.ai_summary_backfill_topic_max_age_days = 1 - topic.update!(created_at: 2.days.ago) + topic.update!(last_posted_at: 2.days.ago) expect(subject.backfill_candidates(type)).to be_empty end @@ -112,14 +114,14 @@ RSpec.describe Jobs::SummariesBackfill do end it "updates the highest_target_number if the summary turned to be up to date" do + og_highest_post_number = topic.highest_post_number existing_summary = Fabricate( :ai_summary, target: topic, updated_at: 3.hours.ago, - highest_target_number: topic.highest_post_number, + highest_target_number: og_highest_post_number, ) - og_highest_post_number = topic.highest_post_number topic.update!(highest_post_number: og_highest_post_number + 1) # No prepared responses here. We don't perform a completion call. diff --git a/spec/lib/modules/summarization/fold_content_spec.rb b/spec/lib/modules/summarization/fold_content_spec.rb index e1972bf6..40bf9cd1 100644 --- a/spec/lib/modules/summarization/fold_content_spec.rb +++ b/spec/lib/modules/summarization/fold_content_spec.rb @@ -3,15 +3,15 @@ RSpec.describe DiscourseAi::Summarization::FoldContent do subject(:summarizer) { DiscourseAi::Summarization.topic_summary(topic) } + let!(:llm_model) { assign_fake_provider_to(:ai_summarization_model) } + + fab!(:topic) { Fabricate(:topic, highest_post_number: 2) } + fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1, raw: "This is a text") } + + before { SiteSetting.ai_summarization_enabled = true } + describe "#summarize" do - let!(:llm_model) { assign_fake_provider_to(:ai_summarization_model) } - - fab!(:topic) { Fabricate(:topic, highest_post_number: 2) } - fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1, raw: "This is a text") } - before do - SiteSetting.ai_summarization_enabled = true - # Make sure each content fits in a single chunk. # 700 is the number of tokens reserved for the prompt. model_tokens = @@ -52,4 +52,38 @@ RSpec.describe DiscourseAi::Summarization::FoldContent do end end end + + describe "#existing_summary" do + context "when a summary already exists" do + fab!(:ai_summary) do + Fabricate( + :ai_summary, + target: topic, + highest_target_number: topic.highest_post_number, + original_content_sha: AiSummary.build_sha("1"), + ) + end + + it "doesn't mark it as outdated" do + expect(summarizer.existing_summary.outdated).to eq(false) + end + + context "when it's outdated because there are new targets" do + before { Fabricate(:post, topic: topic, post_number: 2, raw: "This is a text") } + + it "marks it as outdated" do + expect(summarizer.existing_summary.outdated).to eq(true) + end + end + + context "when it's outdated because existing content changes" do + it "marks it as outdated" do + ai_summary.update!(updated_at: 20.minutes.ago) + post_1.update!(last_version_at: 5.minutes.ago) + + expect(summarizer.existing_summary.outdated).to eq(true) + end + end + end + end end