mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-08-04 20:23:30 +00:00
This change introduces a job to summarize topics and cache the results automatically. We provide a setting to control how many topics we'll backfill per hour and what the topic's minimum word count is to qualify. We'll prioritize topics without summary over outdated ones.
44 lines
1.5 KiB
Ruby
44 lines
1.5 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module ::Jobs
|
|
class SummariesBackfill < ::Jobs::Scheduled
|
|
every 5.minutes
|
|
cluster_concurrency 1
|
|
|
|
def execute(_args)
|
|
return if !SiteSetting.discourse_ai_enabled
|
|
return if !SiteSetting.ai_summarization_enabled
|
|
return if SiteSetting.ai_summary_backfill_maximum_topics_per_hour.zero?
|
|
|
|
# Split budget in 12 intervals, but make sure is at least one.
|
|
limit_per_job = [SiteSetting.ai_summary_backfill_maximum_topics_per_hour, 12].max / 12
|
|
budget = [current_budget, limit_per_job].min
|
|
|
|
backfill_candidates
|
|
.limit(budget)
|
|
.each do |topic|
|
|
DiscourseAi::Summarization.topic_summary(topic).force_summarize(Discourse.system_user)
|
|
end
|
|
end
|
|
|
|
def backfill_candidates
|
|
Topic
|
|
.where("topics.word_count >= ?", SiteSetting.ai_summary_backfill_minimum_word_count)
|
|
.joins(
|
|
"LEFT OUTER JOIN ai_summaries ais ON topics.id = ais.target_id AND ais.target_type = 'Topic'",
|
|
)
|
|
.where(
|
|
"ais.id IS NULL OR UPPER(ais.content_range) < topics.highest_post_number + 1",
|
|
) # (1..1) gets stored ad (1..2).
|
|
.order("ais.created_at DESC NULLS FIRST, topics.last_posted_at DESC")
|
|
end
|
|
|
|
def current_budget
|
|
base_budget = SiteSetting.ai_summary_backfill_maximum_topics_per_hour
|
|
used_budget = AiSummary.complete.system.where("created_at > ?", 1.hour.ago).count
|
|
|
|
base_budget - used_budget
|
|
end
|
|
end
|
|
end
|