mirror of
				https://github.com/discourse/discourse-ai.git
				synced 2025-10-30 22:18:38 +00:00 
			
		
		
		
	* DEV: Better strategies for summarization The strategy responsibility needs to be "Given a collection of texts, I know how to summarize them most efficiently, using the minimum amount of requests and maximizing token usage". There are different token limits for each model, so it all boils down to two different strategies: Fold all these texts into a single one, doing the summarization in chunks, and then build a summary from those. Build it by combining texts in a single prompt, and truncate it according to your token limits. While the latter is less than ideal, we need it for "bart-large-cnn-samsum" and "flan-t5-base-samsum", both with low limits. The rest will rely on folding. * Expose summarized chunks to users
		
			
				
	
	
		
			31 lines
		
	
	
		
			845 B
		
	
	
	
		
			Ruby
		
	
	
	
	
	
			
		
		
	
	
			31 lines
		
	
	
		
			845 B
		
	
	
	
		
			Ruby
		
	
	
	
	
	
| # frozen_string_literal: true
 | |
| 
 | |
| module DiscourseAi
 | |
|   module Summarization
 | |
|     module Strategies
 | |
|       class FoldContent < ::Summarization::Base
 | |
|         def initialize(completion_model)
 | |
|           @completion_model = completion_model
 | |
|         end
 | |
| 
 | |
|         attr_reader :completion_model
 | |
| 
 | |
|         delegate :correctly_configured?,
 | |
|                  :display_name,
 | |
|                  :configuration_hint,
 | |
|                  :model,
 | |
|                  to: :completion_model
 | |
| 
 | |
|         def summarize(content)
 | |
|           opts = content.except(:contents)
 | |
|           summaries = completion_model.summarize_in_chunks(content[:contents], opts)
 | |
| 
 | |
|           return { summary: summaries.first[:summary], chunks: [] } if summaries.length == 1
 | |
| 
 | |
|           { summary: completion_model.concatenate_summaries(summaries), chunks: summaries }
 | |
|         end
 | |
|       end
 | |
|     end
 | |
|   end
 | |
| end
 |