mirror of
				https://github.com/discourse/discourse-ai.git
				synced 2025-11-04 00:18:39 +00:00 
			
		
		
		
	Single and multi-chunk summaries end using different prompts for the last summary. This change detects when the summarized content fits in a single chunk and uses a slightly different prompt, which leads to more consistent summary formats. This PR also moves the chunk-splitting step to the `FoldContent` strategy as preparation for implementing streamed summaries.
		
			
				
	
	
		
			96 lines
		
	
	
		
			3.0 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
			
		
		
	
	
			96 lines
		
	
	
		
			3.0 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
# frozen_string_literal: true
 | 
						|
 | 
						|
RSpec.describe DiscourseAi::Summarization::Models::Discourse do
 | 
						|
  subject(:model) { described_class.new(model_name, max_tokens: max_tokens) }
 | 
						|
 | 
						|
  let(:model_name) { "bart-large-cnn-samsum" }
 | 
						|
  let(:max_tokens) { 20 }
 | 
						|
 | 
						|
  let(:content) do
 | 
						|
    {
 | 
						|
      resource_path: "/t/1/POST_NUMBER",
 | 
						|
      content_title: "This is a title",
 | 
						|
      contents: [{ poster: "asd", id: 1, text: "This is a text" }],
 | 
						|
    }
 | 
						|
  end
 | 
						|
 | 
						|
  before { SiteSetting.ai_summarization_discourse_service_api_endpoint = "https://test.com" }
 | 
						|
 | 
						|
  def stub_request(prompt, response)
 | 
						|
    WebMock
 | 
						|
      .stub_request(
 | 
						|
        :post,
 | 
						|
        "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
 | 
						|
      )
 | 
						|
      .with(body: JSON.dump(model: model_name, content: prompt))
 | 
						|
      .to_return(status: 200, body: JSON.dump(summary_text: response))
 | 
						|
  end
 | 
						|
 | 
						|
  def expected_messages(contents, opts)
 | 
						|
    contents.reduce("") do |memo, item|
 | 
						|
      memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
 | 
						|
    end
 | 
						|
  end
 | 
						|
 | 
						|
  def as_chunk(item)
 | 
						|
    { ids: [item[:id]], summary: "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }
 | 
						|
  end
 | 
						|
 | 
						|
  describe "#summarize_in_chunks" do
 | 
						|
    context "when the content fits in a single chunk" do
 | 
						|
      it "performs a request to summarize" do
 | 
						|
        opts = content.except(:contents)
 | 
						|
 | 
						|
        stub_request(expected_messages(content[:contents], opts), "This is summary 1")
 | 
						|
 | 
						|
        chunks = content[:contents].map { |c| as_chunk(c) }
 | 
						|
        summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
 | 
						|
 | 
						|
        expect(summarized_chunks).to contain_exactly("This is summary 1")
 | 
						|
      end
 | 
						|
    end
 | 
						|
 | 
						|
    context "when the content fits in multiple chunks" do
 | 
						|
      it "performs a request for each one to summarize" do
 | 
						|
        content[:contents] << {
 | 
						|
          poster: "asd2",
 | 
						|
          id: 2,
 | 
						|
          text: "This is a different text to summarize",
 | 
						|
        }
 | 
						|
        opts = content.except(:contents)
 | 
						|
 | 
						|
        content[:contents].each_with_index do |item, idx|
 | 
						|
          stub_request(expected_messages([item], opts), "This is summary #{idx + 1}")
 | 
						|
        end
 | 
						|
 | 
						|
        chunks = content[:contents].map { |c| as_chunk(c) }
 | 
						|
        summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
 | 
						|
 | 
						|
        expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2")
 | 
						|
      end
 | 
						|
    end
 | 
						|
  end
 | 
						|
 | 
						|
  describe "#concatenate_summaries" do
 | 
						|
    it "combines all the different summaries into a single one" do
 | 
						|
      messages = ["summary 1", "summary 2"].join("\n")
 | 
						|
 | 
						|
      stub_request(messages, "concatenated summary")
 | 
						|
 | 
						|
      expect(model.concatenate_summaries(["summary 1", "summary 2"])).to eq("concatenated summary")
 | 
						|
    end
 | 
						|
  end
 | 
						|
 | 
						|
  describe "#summarize_with_truncation" do
 | 
						|
    let(:max_tokens) { 9 }
 | 
						|
 | 
						|
    it "truncates the context to meet the token limit" do
 | 
						|
      opts = content.except(:contents)
 | 
						|
 | 
						|
      stub_request("( 1 asd said : this is", "truncated summary")
 | 
						|
 | 
						|
      expect(model.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary")
 | 
						|
    end
 | 
						|
  end
 | 
						|
end
 |