discourse-ai/spec/jobs/scheduled/summaries_backfill_spec.rb

# frozen_string_literal: true

RSpec.describe Jobs::SummariesBackfill do
  fab!(:topic) { Fabricate(:topic, word_count: 200, highest_post_number: 2) }
  let(:limit) { 24 } # guarantee two summaries per batch

  before do
    assign_fake_provider_to(:ai_summarization_model)
    SiteSetting.ai_summarization_enabled = true
    SiteSetting.ai_summary_backfill_maximum_topics_per_hour = limit
  end

  describe "#current_budget" do
    context "when no summary has been backfilled yet" do
      it "returns the full budget" do
        expect(subject.current_budget).to eq(limit)
      end

      it "ignores summaries generated by users" do
        Fabricate(:ai_summary, target: topic, origin: AiSummary.origins[:human])

        expect(subject.current_budget).to eq(limit)
      end

      it "only accounts for complete type summaries" do
        Fabricate(:topic_ai_gist, target: topic, origin: AiSummary.origins[:human])

        expect(subject.current_budget).to eq(limit)
      end
    end

    context "when we already backfilled stuff" do
      fab!(:backfilled_summary) do
        Fabricate(:ai_summary, target: topic, origin: AiSummary.origins[:system])
      end

      context "if it was within the budget window" do
        it "reduces our budget" do
          expect(subject.current_budget).to eq(limit - 1)
        end
      end

      context "if it wasn't within the budget window" do
        before { freeze_time(2.hours.from_now) }

        it "returns the full budget" do
          freeze_time(2.hours.from_now)

          expect(subject.current_budget).to eq(limit)
        end
      end
    end
  end

  describe "#backfill_candidates" do
    it "only selects posts with enough words" do
      topic.update!(word_count: 100)

      expect(subject.backfill_candidates).to be_empty
    end

    it "ignores up to date summaries" do
      Fabricate(:ai_summary, target: topic, content_range: (1..2))

      expect(subject.backfill_candidates).to be_empty
    end

    it "orders candidates by topic#last_posted_at" do
      topic.update!(last_posted_at: 1.minute.ago)
      topic_2 = Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago)

      expect(subject.backfill_candidates.map(&:id)).to contain_exactly(topic.id, topic_2.id)
    end

    it "prioritizes topics without summaries" do
      topic_2 =
        Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago, highest_post_number: 1)
      topic.update!(last_posted_at: 1.minute.ago)
      Fabricate(:ai_summary, target: topic, content_range: (1..1))

      expect(subject.backfill_candidates.map(&:id)).to contain_exactly(topic_2.id, topic.id)
    end
  end

  describe "#execute" do
    it "backfills a batch" do
      topic_2 =
        Fabricate(:topic, word_count: 200, last_posted_at: 2.minutes.ago, highest_post_number: 1)
      topic.update!(last_posted_at: 1.minute.ago)
      Fabricate(:ai_summary, target: topic, created_at: 3.hours.ago, content_range: (1..1))

      summary_1 = "Summary of topic_2"
      summary_2 = "Summary of topic"

      DiscourseAi::Completions::Llm.with_prepared_responses([summary_1, summary_2]) do
        subject.execute({})
      end

      expect(AiSummary.find_by(target: topic_2).summarized_text).to eq(summary_1)
      expect(AiSummary.find_by(target: topic).summarized_text).to eq(summary_2)
    end
  end
end