2023-06-27 12:26:33 -03:00
# frozen_string_literal: true
module DiscourseAi
module Summarization
module Strategies
class FoldContent < :: Summarization :: Base
def initialize ( completion_model )
@completion_model = completion_model
end
attr_reader :completion_model
delegate :correctly_configured? ,
:display_name ,
:configuration_hint ,
:model ,
to : :completion_model
2023-11-23 12:58:54 -03:00
def summarize ( content , user , & on_partial_blk )
2023-06-27 12:26:33 -03:00
opts = content . except ( :contents )
2023-11-23 12:58:54 -03:00
llm = DiscourseAi :: Completions :: LLM . proxy ( completion_model . model )
chunks = split_into_chunks ( llm . tokenizer , content [ :contents ] )
2023-06-27 12:26:33 -03:00
2023-07-13 17:05:41 -03:00
if chunks . length == 1
2023-08-11 15:08:54 -03:00
{
2023-11-23 12:58:54 -03:00
summary : summarize_single ( llm , chunks . first [ :summary ] , user , opts , & on_partial_blk ) ,
2023-08-11 15:08:54 -03:00
chunks : [ ] ,
}
2023-07-13 17:05:41 -03:00
else
2023-11-23 12:58:54 -03:00
summaries = summarize_in_chunks ( llm , chunks , user , opts )
2023-07-13 17:05:41 -03:00
2023-08-11 15:08:54 -03:00
{
2023-11-23 12:58:54 -03:00
summary :
concatenate_summaries (
llm ,
summaries . map { | s | s [ :summary ] } ,
user ,
& on_partial_blk
) ,
2023-08-11 15:08:54 -03:00
chunks : summaries ,
}
2023-07-13 17:05:41 -03:00
end
end
private
2023-11-23 12:58:54 -03:00
def format_content_item ( item )
" ( #{ item [ :id ] } #{ item [ :poster ] } said: #{ item [ :text ] } "
end
def split_into_chunks ( tokenizer , contents )
2023-07-13 17:05:41 -03:00
section = { ids : [ ] , summary : " " }
chunks =
contents . reduce ( [ ] ) do | sections , item |
2023-11-23 12:58:54 -03:00
new_content = format_content_item ( item )
2023-07-13 17:05:41 -03:00
2023-11-23 12:58:54 -03:00
if tokenizer . can_expand_tokens? (
2023-07-13 17:05:41 -03:00
section [ :summary ] ,
new_content ,
completion_model . available_tokens ,
)
section [ :summary ] += new_content
section [ :ids ] << item [ :id ]
else
sections << section
section = { ids : [ item [ :id ] ] , summary : new_content }
end
sections
end
chunks << section if section [ :summary ] . present?
chunks
2023-06-27 12:26:33 -03:00
end
2023-11-23 12:58:54 -03:00
def summarize_single ( llm , text , user , opts , & on_partial_blk )
prompt = summarization_prompt ( text , opts )
llm . completion! ( prompt , user , & on_partial_blk )
end
def summarize_in_chunks ( llm , chunks , user , opts )
chunks . map do | chunk |
prompt = summarization_prompt ( chunk [ :summary ] , opts )
prompt [ :post_insts ] = " Don't use more than 400 words for the summary. "
chunk [ :summary ] = llm . completion! ( prompt , user )
chunk
end
end
def concatenate_summaries ( llm , summaries , user , & on_partial_blk )
prompt = summarization_prompt ( summaries . join ( " \n " ) , { } )
prompt [ :insts ] = << ~ TEXT
You are a bot that can concatenate disjoint summaries , creating a cohesive narrative .
Keep the resulting summary in the same language used in the text below .
TEXT
llm . completion! ( prompt , user , & on_partial_blk )
end
def summarization_prompt ( input , opts )
insts = << ~ TEXT
2023-11-23 16:33:37 -03:00
You are a summarization bot that effectively summarize any text
2023-11-23 12:58:54 -03:00
Your replies contain ONLY a summarized version of the text I provided and you , using the same language .
You understand and generate Discourse forum Markdown .
You format the response , including links , using Markdown .
2023-11-23 16:33:37 -03:00
Your summaries are always a cohesive narrative in the form of one or multiple paragraphs .
2023-11-23 12:58:54 -03:00
TEXT
insts += << ~ TEXT if opts [ :resource_path ]
2023-11-23 16:33:37 -03:00
Each post is formatted as " <POST_NUMBER>) <USERNAME> <MESSAGE> "
Try generating links as well the format is #{opts[:resource_path]}/<POST_NUMBER>
For example , a link to the 3 rd post in the topic would be [ post 3 ] ( #{opts[:resource_path]}/3)
2023-11-23 12:58:54 -03:00
TEXT
insts += " The discussion title is: #{ opts [ :content_title ] } . \n " if opts [ :content_title ]
prompt = { insts : insts , input : << ~ TEXT }
2023-11-23 16:33:37 -03:00
Here is the a list of posts , inside < input > < / input> XML tags:
2023-11-23 12:58:54 -03:00
< input >
#{input}
< / input>
TEXT
if opts [ :resource_path ]
prompt [ :examples ] = [
[
2023-11-23 16:33:37 -03:00
" <input>1) user1 said: I love Mondays 2) user2 said: I hate Mondays</input> " ,
2023-11-23 12:58:54 -03:00
" Two users are sharing their feelings toward Mondays. [user1]( #{ opts [ :resource_path ] } /1) hates them, while [user2]( #{ opts [ :resource_path ] } /2) loves them. " ,
] ,
[
" <input>3) usuario1: Amo los lunes 6) usuario2: Odio los lunes</input> " ,
" Dos usuarios charlan sobre los lunes. [usuario1]( #{ opts [ :resource_path ] } /3) dice que los ama, mientras que [usuario2]( #{ opts [ :resource_path ] } /2) los odia. " ,
] ,
]
end
prompt
end
2023-06-27 12:26:33 -03:00
end
end
end
end