FEATURE: Extend summary backfill to also generate gists (#896)
Updates default batch size to 0 and max to 10000
This commit is contained in:
parent
c421f713a3
commit
fbc74c7467
|
@ -10,34 +10,48 @@ module ::Jobs
|
|||
return if !SiteSetting.ai_summarization_enabled
|
||||
return if SiteSetting.ai_summary_backfill_maximum_topics_per_hour.zero?
|
||||
|
||||
# Split budget in 12 intervals, but make sure is at least one.
|
||||
limit_per_job = [SiteSetting.ai_summary_backfill_maximum_topics_per_hour, 12].max / 12
|
||||
budget = [current_budget, limit_per_job].min
|
||||
system_user = Discourse.system_user
|
||||
|
||||
backfill_candidates
|
||||
.limit(budget)
|
||||
complete_t = AiSummary.summary_types[:complete]
|
||||
backfill_candidates(complete_t)
|
||||
.limit(current_budget(complete_t))
|
||||
.each do |topic|
|
||||
DiscourseAi::Summarization.topic_summary(topic).force_summarize(Discourse.system_user)
|
||||
end
|
||||
DiscourseAi::Summarization.topic_summary(topic).force_summarize(system_user)
|
||||
end
|
||||
|
||||
def backfill_candidates
|
||||
gist_t = AiSummary.summary_types[:gist]
|
||||
backfill_candidates(gist_t)
|
||||
.limit(current_budget(gist_t))
|
||||
.each { |topic| DiscourseAi::Summarization.topic_gist(topic).force_summarize(system_user) }
|
||||
end
|
||||
|
||||
def backfill_candidates(summary_type)
|
||||
Topic
|
||||
.where("topics.word_count >= ?", SiteSetting.ai_summary_backfill_minimum_word_count)
|
||||
.joins(
|
||||
"LEFT OUTER JOIN ai_summaries ais ON topics.id = ais.target_id AND ais.target_type = 'Topic'",
|
||||
)
|
||||
.joins(<<~SQL)
|
||||
LEFT OUTER JOIN ai_summaries ais ON
|
||||
topics.id = ais.target_id AND
|
||||
ais.target_type = 'Topic' AND
|
||||
ais.summary_type = '#{summary_type}'
|
||||
SQL
|
||||
.where(
|
||||
"ais.id IS NULL OR UPPER(ais.content_range) < topics.highest_post_number + 1",
|
||||
) # (1..1) gets stored ad (1..2).
|
||||
.order("ais.created_at DESC NULLS FIRST, topics.last_posted_at DESC")
|
||||
end
|
||||
|
||||
def current_budget
|
||||
def current_budget(type)
|
||||
# Split budget in 12 intervals, but make sure is at least one.
|
||||
base_budget = SiteSetting.ai_summary_backfill_maximum_topics_per_hour
|
||||
used_budget = AiSummary.complete.system.where("created_at > ?", 1.hour.ago).count
|
||||
limit_per_job = [base_budget, 12].max / 12
|
||||
|
||||
base_budget - used_budget
|
||||
used_budget =
|
||||
AiSummary.system.where("created_at > ?", 1.hour.ago).where(summary_type: type).count
|
||||
|
||||
current_budget = [(base_budget - used_budget), limit_per_job].min
|
||||
return 0 if current_budget < 0
|
||||
|
||||
current_budget
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -376,9 +376,9 @@ discourse_ai:
|
|||
type: list
|
||||
list_type: compact
|
||||
ai_summary_backfill_maximum_topics_per_hour:
|
||||
default: 10
|
||||
default: 0
|
||||
min: 0
|
||||
max: 1000
|
||||
max: 10000
|
||||
ai_summary_backfill_minimum_word_count:
|
||||
default: 200
|
||||
hidden: true
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
RSpec.describe Jobs::SummariesBackfill do
|
||||
fab!(:topic) { Fabricate(:topic, word_count: 200, highest_post_number: 2) }
|
||||
let(:limit) { 24 } # guarantee two summaries per batch
|
||||
let(:intervals) { 12 } # budget is split into intervals. Job runs every five minutes.
|
||||
|
||||
before do
|
||||
assign_fake_provider_to(:ai_summarization_model)
|
||||
|
@ -11,65 +12,47 @@ RSpec.describe Jobs::SummariesBackfill do
|
|||
end
|
||||
|
||||
describe "#current_budget" do
|
||||
let(:type) { AiSummary.summary_types[:complete] }
|
||||
|
||||
context "when no summary has been backfilled yet" do
|
||||
it "returns the full budget" do
|
||||
expect(subject.current_budget).to eq(limit)
|
||||
expect(subject.current_budget(type)).to eq(limit / intervals)
|
||||
end
|
||||
|
||||
it "ignores summaries generated by users" do
|
||||
Fabricate(:ai_summary, target: topic, origin: AiSummary.origins[:human])
|
||||
|
||||
expect(subject.current_budget).to eq(limit)
|
||||
expect(subject.current_budget(type)).to eq(limit / intervals)
|
||||
end
|
||||
|
||||
it "only accounts for complete type summaries" do
|
||||
it "only accounts for summaries of the given type" do
|
||||
Fabricate(:topic_ai_gist, target: topic, origin: AiSummary.origins[:human])
|
||||
|
||||
expect(subject.current_budget).to eq(limit)
|
||||
end
|
||||
end
|
||||
|
||||
context "when we already backfilled stuff" do
|
||||
fab!(:backfilled_summary) do
|
||||
Fabricate(:ai_summary, target: topic, origin: AiSummary.origins[:system])
|
||||
end
|
||||
|
||||
context "if it was within the budget window" do
|
||||
it "reduces our budget" do
|
||||
expect(subject.current_budget).to eq(limit - 1)
|
||||
end
|
||||
end
|
||||
|
||||
context "if it wasn't within the budget window" do
|
||||
before { freeze_time(2.hours.from_now) }
|
||||
|
||||
it "returns the full budget" do
|
||||
freeze_time(2.hours.from_now)
|
||||
|
||||
expect(subject.current_budget).to eq(limit)
|
||||
end
|
||||
expect(subject.current_budget(type)).to eq(limit / intervals)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "#backfill_candidates" do
|
||||
let(:type) { AiSummary.summary_types[:complete] }
|
||||
|
||||
it "only selects posts with enough words" do
|
||||
topic.update!(word_count: 100)
|
||||
|
||||
expect(subject.backfill_candidates).to be_empty
|
||||
expect(subject.backfill_candidates(type)).to be_empty
|
||||
end
|
||||
|
||||
it "ignores up to date summaries" do
|
||||
Fabricate(:ai_summary, target: topic, content_range: (1..2))
|
||||
|
||||
expect(subject.backfill_candidates).to be_empty
|
||||
expect(subject.backfill_candidates(type)).to be_empty
|
||||
end
|
||||
|
||||
it "orders candidates by topic#last_posted_at" do
|
||||
topic.update!(last_posted_at: 1.minute.ago)
|
||||
topic_2 = Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago)
|
||||
|
||||
expect(subject.backfill_candidates.map(&:id)).to contain_exactly(topic.id, topic_2.id)
|
||||
expect(subject.backfill_candidates(type).map(&:id)).to contain_exactly(topic.id, topic_2.id)
|
||||
end
|
||||
|
||||
it "prioritizes topics without summaries" do
|
||||
|
@ -78,7 +61,7 @@ RSpec.describe Jobs::SummariesBackfill do
|
|||
topic.update!(last_posted_at: 1.minute.ago)
|
||||
Fabricate(:ai_summary, target: topic, content_range: (1..1))
|
||||
|
||||
expect(subject.backfill_candidates.map(&:id)).to contain_exactly(topic_2.id, topic.id)
|
||||
expect(subject.backfill_candidates(type).map(&:id)).to contain_exactly(topic_2.id, topic.id)
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -88,16 +71,21 @@ RSpec.describe Jobs::SummariesBackfill do
|
|||
Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago, highest_post_number: 1)
|
||||
topic.update!(last_posted_at: 1.minute.ago)
|
||||
Fabricate(:ai_summary, target: topic, created_at: 3.hours.ago, content_range: (1..1))
|
||||
Fabricate(:topic_ai_gist, target: topic, created_at: 3.hours.ago, content_range: (1..1))
|
||||
|
||||
summary_1 = "Summary of topic_2"
|
||||
gist_1 = "Gist of topic_2"
|
||||
summary_2 = "Summary of topic"
|
||||
gist_2 = "Gist of topic"
|
||||
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([summary_1, summary_2]) do
|
||||
subject.execute({})
|
||||
end
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses(
|
||||
[summary_1, summary_2, gist_1, gist_2],
|
||||
) { subject.execute({}) }
|
||||
|
||||
expect(AiSummary.find_by(target: topic_2).summarized_text).to eq(summary_1)
|
||||
expect(AiSummary.find_by(target: topic).summarized_text).to eq(summary_2)
|
||||
expect(AiSummary.complete.find_by(target: topic_2).summarized_text).to eq(summary_1)
|
||||
expect(AiSummary.gist.find_by(target: topic_2).summarized_text).to eq(gist_1)
|
||||
expect(AiSummary.complete.find_by(target: topic).summarized_text).to eq(summary_2)
|
||||
expect(AiSummary.gist.find_by(target: topic).summarized_text).to eq(gist_2)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue