mirror of
				https://github.com/discourse/discourse-ai.git
				synced 2025-11-04 00:18:39 +00:00 
			
		
		
		
	Single and multi-chunk summaries end using different prompts for the last summary. This change detects when the summarized content fits in a single chunk and uses a slightly different prompt, which leads to more consistent summary formats. This PR also moves the chunk-splitting step to the `FoldContent` strategy as preparation for implementing streamed summaries.
		
			
				
	
	
		
			74 lines
		
	
	
		
			1.5 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
			
		
		
	
	
			74 lines
		
	
	
		
			1.5 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
# frozen_string_literal: true
 | 
						|
 | 
						|
module DiscourseAi
 | 
						|
  module Summarization
 | 
						|
    module Models
 | 
						|
      class Base
 | 
						|
        def initialize(model, max_tokens:)
 | 
						|
          @model = model
 | 
						|
          @max_tokens = max_tokens
 | 
						|
        end
 | 
						|
 | 
						|
        def correctly_configured?
 | 
						|
          raise NotImplemented
 | 
						|
        end
 | 
						|
 | 
						|
        def display_name
 | 
						|
          raise NotImplemented
 | 
						|
        end
 | 
						|
 | 
						|
        def configuration_hint
 | 
						|
          raise NotImplemented
 | 
						|
        end
 | 
						|
 | 
						|
        def summarize_in_chunks(chunks, opts)
 | 
						|
          chunks.map do |chunk|
 | 
						|
            chunk[:summary] = summarize_chunk(chunk[:summary], opts)
 | 
						|
            chunk
 | 
						|
          end
 | 
						|
        end
 | 
						|
 | 
						|
        def concatenate_summaries(_summaries)
 | 
						|
          raise NotImplemented
 | 
						|
        end
 | 
						|
 | 
						|
        def summarize_with_truncation(_contents, _opts)
 | 
						|
          raise NotImplemented
 | 
						|
        end
 | 
						|
 | 
						|
        def summarize_single(chunk_text, opts)
 | 
						|
          raise NotImplemented
 | 
						|
        end
 | 
						|
 | 
						|
        def format_content_item(item)
 | 
						|
          "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
 | 
						|
        end
 | 
						|
 | 
						|
        def available_tokens
 | 
						|
          max_tokens - reserved_tokens
 | 
						|
        end
 | 
						|
 | 
						|
        attr_reader :model, :max_tokens
 | 
						|
 | 
						|
        protected
 | 
						|
 | 
						|
        def reserved_tokens
 | 
						|
          # Reserve tokens for the response and the base prompt
 | 
						|
          # ~500 words
 | 
						|
          700
 | 
						|
        end
 | 
						|
 | 
						|
        def summarize_chunk(_chunk_text, _opts)
 | 
						|
          raise NotImplemented
 | 
						|
        end
 | 
						|
 | 
						|
        def tokenizer
 | 
						|
          raise NotImplemented
 | 
						|
        end
 | 
						|
 | 
						|
        delegate :can_expand_tokens?, to: :tokenizer
 | 
						|
      end
 | 
						|
    end
 | 
						|
  end
 | 
						|
end
 |