diff --git a/app/jobs/scheduled/summaries_backfill.rb b/app/jobs/scheduled/summaries_backfill.rb index 2d9a3fdf..4b3205cb 100644 --- a/app/jobs/scheduled/summaries_backfill.rb +++ b/app/jobs/scheduled/summaries_backfill.rb @@ -12,22 +12,26 @@ module ::Jobs system_user = Discourse.system_user + if SiteSetting.ai_summary_gists_enabled + gist_t = AiSummary.summary_types[:gist] + backfill_candidates(gist_t) + .limit(current_budget(gist_t)) + .each do |topic| + DiscourseAi::Summarization.topic_gist(topic).force_summarize(system_user) + end + end + complete_t = AiSummary.summary_types[:complete] backfill_candidates(complete_t) .limit(current_budget(complete_t)) .each do |topic| DiscourseAi::Summarization.topic_summary(topic).force_summarize(system_user) end - - return unless SiteSetting.ai_summary_gists_enabled - - gist_t = AiSummary.summary_types[:gist] - backfill_candidates(gist_t) - .limit(current_budget(gist_t)) - .each { |topic| DiscourseAi::Summarization.topic_gist(topic).force_summarize(system_user) } end def backfill_candidates(summary_type) + max_age_days = SiteSetting.ai_summary_backfill_topic_max_age_days + Topic .where("topics.word_count >= ?", SiteSetting.ai_summary_backfill_minimum_word_count) .joins(<<~SQL) @@ -36,6 +40,7 @@ module ::Jobs ais.target_type = 'Topic' AND ais.summary_type = '#{summary_type}' SQL + .where("topics.created_at > current_timestamp - INTERVAL '#{max_age_days.to_i} DAY'") .where( "ais.id IS NULL OR UPPER(ais.content_range) < topics.highest_post_number + 1", ) # (1..1) gets stored ad (1..2). diff --git a/config/settings.yml b/config/settings.yml index 210dc6e1..443c8851 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -228,17 +228,17 @@ discourse_ai: ai_summary_gists_allowed_groups: type: group_list list_type: compact - default: "" - ai_summarization_strategy: # TODO(roman): Deprecated. Remove by Sept 2024 - type: enum - default: "" + default: "0" #everyone hidden: true - choices: "DiscourseAi::Configuration::LlmEnumerator.old_summarization_options + ['']" ai_summarization_model_allowed_seeded_models: default: "" hidden: true type: list list_type: compact + ai_summary_backfill_topic_max_age_days: + default: 30 + min: 1 + max: 10000 ai_summary_backfill_maximum_topics_per_hour: default: 0 min: 0 diff --git a/spec/jobs/scheduled/summaries_backfill_spec.rb b/spec/jobs/scheduled/summaries_backfill_spec.rb index ebf6831f..2a4ee49a 100644 --- a/spec/jobs/scheduled/summaries_backfill_spec.rb +++ b/spec/jobs/scheduled/summaries_backfill_spec.rb @@ -64,6 +64,13 @@ RSpec.describe Jobs::SummariesBackfill do expect(subject.backfill_candidates(type).map(&:id)).to contain_exactly(topic_2.id, topic.id) end + + it "respects max age setting" do + SiteSetting.ai_summary_backfill_topic_max_age_days = 1 + topic.update!(created_at: 2.days.ago) + + expect(subject.backfill_candidates(type)).to be_empty + end end describe "#execute" do @@ -80,7 +87,7 @@ RSpec.describe Jobs::SummariesBackfill do gist_2 = "Gist of topic" DiscourseAi::Completions::Llm.with_prepared_responses( - [summary_1, summary_2, gist_1, gist_2], + [gist_1, gist_2, summary_1, summary_2], ) { subject.execute({}) } expect(AiSummary.complete.find_by(target: topic_2).summarized_text).to eq(summary_1) diff --git a/spec/lib/modules/summarization/entry_point_spec.rb b/spec/lib/modules/summarization/entry_point_spec.rb index 035e57b3..723eed69 100644 --- a/spec/lib/modules/summarization/entry_point_spec.rb +++ b/spec/lib/modules/summarization/entry_point_spec.rb @@ -47,6 +47,7 @@ RSpec.describe DiscourseAi::Summarization::EntryPoint do describe "topic_list_item serializer's ai_summary" do context "when hot topic summarization is disabled" do + before { SiteSetting.ai_summary_gists_enabled = false } it "doesn't include summaries" do gist_topic = topic_query.list_hot.topics.find { |t| t.id == topic_ai_gist.target_id }