2023-06-27 11:26:33 -04:00
# frozen_string_literal: true
require_relative " ../../../../support/openai_completions_inference_stubs "
RSpec . describe DiscourseAi :: Summarization :: Models :: OpenAi do
2023-06-27 13:42:33 -04:00
subject ( :model ) { described_class . new ( model_name , max_tokens : max_tokens ) }
2023-06-27 11:26:33 -04:00
2023-06-27 13:42:33 -04:00
let ( :model_name ) { " gpt-3.5-turbo " }
let ( :max_tokens ) { 720 }
2023-06-27 11:26:33 -04:00
let ( :content ) do
{
resource_path : " /t/1/POST_NUMBER " ,
content_title : " This is a title " ,
contents : [ { poster : " asd " , id : 1 , text : " This is a text " } ] ,
}
end
2023-07-13 16:05:41 -04:00
def as_chunk ( item )
{ ids : [ item [ :id ] ] , summary : " ( #{ item [ :id ] } #{ item [ :poster ] } said: #{ item [ :text ] } " }
end
2023-06-27 11:26:33 -04:00
def expected_messages ( contents , opts )
base_prompt = << ~ TEXT
You are a summarization bot .
You effectively summarise any text and reply ONLY with ONLY the summarized text .
You condense it into a shorter version .
You understand and generate Discourse forum Markdown .
2023-08-21 14:40:32 -04:00
You format the response , including links , using markdown .
2023-06-27 11:26:33 -04:00
Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)
The discussion title is : #{opts[:content_title]}.
TEXT
messages = [ { role : " system " , content : base_prompt } ]
text =
contents . reduce ( " " ) do | memo , item |
memo += " ( #{ item [ :id ] } #{ item [ :poster ] } said: #{ item [ :text ] } "
end
2023-08-21 14:40:32 -04:00
messages << {
role : " user " ,
content :
" Summarize the following in 400 words. Keep the summary in the same language used in the text below. \n #{ text } " ,
}
2023-06-27 11:26:33 -04:00
end
describe " # summarize_in_chunks " do
context " when the content fits in a single chunk " do
it " performs a request to summarize " do
opts = content . except ( :contents )
OpenAiCompletionsInferenceStubs . stub_response (
expected_messages ( content [ :contents ] , opts ) ,
" This is summary 1 " ,
)
2023-07-13 16:05:41 -04:00
chunks = content [ :contents ] . map { | c | as_chunk ( c ) }
summarized_chunks = model . summarize_in_chunks ( chunks , opts ) . map { | c | c [ :summary ] }
2023-06-27 11:26:33 -04:00
expect ( summarized_chunks ) . to contain_exactly ( " This is summary 1 " )
end
end
context " when the content fits in multiple chunks " do
it " performs a request for each one to summarize " do
content [ :contents ] << {
poster : " asd2 " ,
id : 2 ,
text : " This is a different text to summarize " ,
}
opts = content . except ( :contents )
content [ :contents ] . each_with_index do | item , idx |
OpenAiCompletionsInferenceStubs . stub_response (
expected_messages ( [ item ] , opts ) ,
" This is summary #{ idx + 1 } " ,
)
end
2023-07-13 16:05:41 -04:00
chunks = content [ :contents ] . map { | c | as_chunk ( c ) }
summarized_chunks = model . summarize_in_chunks ( chunks , opts ) . map { | c | c [ :summary ] }
2023-06-27 11:26:33 -04:00
expect ( summarized_chunks ) . to contain_exactly ( " This is summary 1 " , " This is summary 2 " )
end
end
end
describe " # concatenate_summaries " do
it " combines all the different summaries into a single one " do
messages = [
{ role : " system " , content : " You are a helpful bot " } ,
{
role : " user " ,
content :
2023-08-21 14:40:32 -04:00
" Concatenate these disjoint summaries, creating a cohesive narrative. Keep the summary in the same language used in the text below. \n summary 1 \n summary 2 " ,
2023-06-27 11:26:33 -04:00
} ,
]
OpenAiCompletionsInferenceStubs . stub_response ( messages , " concatenated summary " )
2023-06-27 13:42:33 -04:00
expect ( model . concatenate_summaries ( [ " summary 1 " , " summary 2 " ] ) ) . to eq ( " concatenated summary " )
2023-06-27 11:26:33 -04:00
end
end
describe " # summarize_with_truncation " do
let ( :max_tokens ) { 709 }
it " truncates the context to meet the token limit " do
opts = content . except ( :contents )
truncated_version = expected_messages ( content [ :contents ] , opts )
truncated_version . last [
:content
2023-08-21 14:40:32 -04:00
] = " Summarize the following in 400 words. Keep the summary in the same language used in the text below. \n (1 asd said: This is a "
2023-06-27 11:26:33 -04:00
OpenAiCompletionsInferenceStubs . stub_response ( truncated_version , " truncated summary " )
2023-06-27 13:42:33 -04:00
expect ( model . summarize_with_truncation ( content [ :contents ] , opts ) ) . to eq ( " truncated summary " )
2023-06-27 11:26:33 -04:00
end
end
end