2024-11-04 15:48:11 -05:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module ::Jobs
|
|
|
|
class SummariesBackfill < ::Jobs::Scheduled
|
|
|
|
every 5.minutes
|
|
|
|
cluster_concurrency 1
|
|
|
|
|
|
|
|
def execute(_args)
|
|
|
|
return if !SiteSetting.discourse_ai_enabled
|
|
|
|
return if !SiteSetting.ai_summarization_enabled
|
|
|
|
return if SiteSetting.ai_summary_backfill_maximum_topics_per_hour.zero?
|
|
|
|
|
2024-11-07 11:40:18 -05:00
|
|
|
system_user = Discourse.system_user
|
2024-11-04 15:48:11 -05:00
|
|
|
|
2024-12-02 13:22:35 -05:00
|
|
|
if SiteSetting.ai_summary_gists_enabled
|
|
|
|
gist_t = AiSummary.summary_types[:gist]
|
|
|
|
backfill_candidates(gist_t)
|
|
|
|
.limit(current_budget(gist_t))
|
|
|
|
.each do |topic|
|
|
|
|
DiscourseAi::Summarization.topic_gist(topic).force_summarize(system_user)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2024-11-07 11:40:18 -05:00
|
|
|
complete_t = AiSummary.summary_types[:complete]
|
|
|
|
backfill_candidates(complete_t)
|
|
|
|
.limit(current_budget(complete_t))
|
2024-11-04 15:48:11 -05:00
|
|
|
.each do |topic|
|
2024-11-07 11:40:18 -05:00
|
|
|
DiscourseAi::Summarization.topic_summary(topic).force_summarize(system_user)
|
2024-11-04 15:48:11 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2024-11-07 11:40:18 -05:00
|
|
|
def backfill_candidates(summary_type)
|
2024-12-02 13:22:35 -05:00
|
|
|
max_age_days = SiteSetting.ai_summary_backfill_topic_max_age_days
|
|
|
|
|
2024-11-04 15:48:11 -05:00
|
|
|
Topic
|
|
|
|
.where("topics.word_count >= ?", SiteSetting.ai_summary_backfill_minimum_word_count)
|
2024-11-07 11:40:18 -05:00
|
|
|
.joins(<<~SQL)
|
2024-11-26 11:44:12 -05:00
|
|
|
LEFT OUTER JOIN ai_summaries ais ON
|
|
|
|
topics.id = ais.target_id AND
|
|
|
|
ais.target_type = 'Topic' AND
|
2024-11-07 11:40:18 -05:00
|
|
|
ais.summary_type = '#{summary_type}'
|
|
|
|
SQL
|
2024-12-02 13:22:35 -05:00
|
|
|
.where("topics.created_at > current_timestamp - INTERVAL '#{max_age_days.to_i} DAY'")
|
2024-11-04 15:48:11 -05:00
|
|
|
.where(
|
|
|
|
"ais.id IS NULL OR UPPER(ais.content_range) < topics.highest_post_number + 1",
|
|
|
|
) # (1..1) gets stored ad (1..2).
|
|
|
|
.order("ais.created_at DESC NULLS FIRST, topics.last_posted_at DESC")
|
|
|
|
end
|
|
|
|
|
2024-11-07 11:40:18 -05:00
|
|
|
def current_budget(type)
|
|
|
|
# Split budget in 12 intervals, but make sure is at least one.
|
2024-11-04 15:48:11 -05:00
|
|
|
base_budget = SiteSetting.ai_summary_backfill_maximum_topics_per_hour
|
2024-11-07 11:40:18 -05:00
|
|
|
limit_per_job = [base_budget, 12].max / 12
|
|
|
|
|
|
|
|
used_budget =
|
|
|
|
AiSummary.system.where("created_at > ?", 1.hour.ago).where(summary_type: type).count
|
|
|
|
|
|
|
|
current_budget = [(base_budget - used_budget), limit_per_job].min
|
|
|
|
return 0 if current_budget < 0
|
2024-11-04 15:48:11 -05:00
|
|
|
|
2024-11-07 11:40:18 -05:00
|
|
|
current_budget
|
2024-11-04 15:48:11 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|