FIX: Make FoldContent strategy more resilient when using models with low token count. (#341)

We'll recursively summarize  the content into smaller chunks until we are sure we can concatenate
them without going over the token limit.
This commit is contained in:
Roman Rizzi 2023-12-06 19:00:24 -03:00 committed by GitHub
parent c8352f21ce
commit 450ec915d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 79 additions and 28 deletions

View File

@ -21,52 +21,71 @@ module DiscourseAi
llm = DiscourseAi::Completions::Llm.proxy(completion_model.model)
chunks = split_into_chunks(llm.tokenizer, content[:contents])
initial_chunks =
rebalance_chunks(
llm.tokenizer,
content[:contents].map { |c| { ids: [c[:id]], summary: format_content_item(c) } },
)
if chunks.length == 1
# Special case where we can do all the summarization in one pass.
if initial_chunks.length == 1
{
summary: summarize_single(llm, chunks.first[:summary], user, opts, &on_partial_blk),
summary:
summarize_single(llm, initial_chunks.first[:summary], user, opts, &on_partial_blk),
chunks: [],
}
else
summaries = summarize_in_chunks(llm, chunks, user, opts)
{
summary:
concatenate_summaries(
llm,
summaries.map { |s| s[:summary] },
user,
&on_partial_blk
),
chunks: summaries,
}
summarize_chunks(llm, initial_chunks, user, opts, &on_partial_blk)
end
end
private
def summarize_chunks(llm, chunks, user, opts, &on_partial_blk)
# Safely assume we always have more than one chunk.
summarized_chunks = summarize_in_chunks(llm, chunks, user, opts)
total_summaries_size =
llm.tokenizer.size(summarized_chunks.map { |s| s[:summary].to_s }.join)
if total_summaries_size < completion_model.available_tokens
# Chunks are small enough, we can concatenate them.
{
summary:
concatenate_summaries(
llm,
summarized_chunks.map { |s| s[:summary] },
user,
&on_partial_blk
),
chunks: summarized_chunks,
}
else
# We have summarized chunks but we can't concatenate them yet. Split them into smaller summaries and summarize again.
rebalanced_chunks = rebalance_chunks(llm.tokenizer, summarized_chunks)
summarize_chunks(llm, rebalanced_chunks, user, opts, &on_partial_blk)
end
end
def format_content_item(item)
"(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
end
def split_into_chunks(tokenizer, contents)
def rebalance_chunks(tokenizer, chunks)
section = { ids: [], summary: "" }
chunks =
contents.reduce([]) do |sections, item|
new_content = format_content_item(item)
chunks.reduce([]) do |sections, chunk|
if tokenizer.can_expand_tokens?(
section[:summary],
new_content,
chunk[:summary],
completion_model.available_tokens,
)
section[:summary] += new_content
section[:ids] << item[:id]
section[:summary] += chunk[:summary]
section[:ids] = section[:ids].concat(chunk[:ids])
else
sections << section
section = { ids: [item[:id]], summary: new_content }
section = chunk
end
sections
@ -94,10 +113,22 @@ module DiscourseAi
end
def concatenate_summaries(llm, summaries, user, &on_partial_blk)
prompt = summarization_prompt(summaries.join("\n"), {})
prompt = {}
prompt[:insts] = <<~TEXT
You are a bot that can concatenate disjoint summaries, creating a cohesive narrative.
Keep the resulting summary in the same language used in the text below.
You are a summarization bot that effectively concatenates disjoint summaries, creating a cohesive narrative.
The narrative you create is in the form of one or multiple paragraphs.
Your reply MUST BE a single concatenated summary using the summaries I'll provide to you.
I'm NOT interested in anything other than the concatenated summary, don't include additional text or comments.
You understand and generate Discourse forum Markdown.
You format the response, including links, using Markdown.
TEXT
prompt[:input] = <<~TEXT
THESE are the summaries, each one separated by a newline, all of them inside <input></input> XML tags:
<input>
#{summaries.join("\n")}
</input>
TEXT
llm.completion!(prompt, user, &on_partial_blk)
@ -106,7 +137,8 @@ module DiscourseAi
def summarization_prompt(input, opts)
insts = <<~TEXT
You are a summarization bot that effectively summarize any text
Your replies contain ONLY a summarized version of the text I provided and you, using the same language.
Your reply MUST BE a summarized version of the posts I provided, using the first language you detect.
I'm NOT interested in anything other than the summary, don't include additional text or comments.
You understand and generate Discourse forum Markdown.
You format the response, including links, using Markdown.
Your summaries are always a cohesive narrative in the form of one or multiple paragraphs.
@ -122,7 +154,7 @@ module DiscourseAi
insts += "The discussion title is: #{opts[:content_title]}.\n" if opts[:content_title]
prompt = { insts: insts, input: <<~TEXT }
Here is the a list of posts, inside <input></input> XML tags:
Here are the posts, inside <input></input> XML tags:
<input>
#{input}

View File

@ -44,6 +44,25 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
expect(result[:summary]).to eq(concatenated_summary)
end
it "keeps splitting into chunks until the content fits into a single call to create a cohesive narrative" do
content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
max_length_response = "(1 asd said: This is a text "
chunk_of_chunks = "I'm smol"
result =
DiscourseAi::Completions::Llm.with_prepared_responses(
[
max_length_response,
max_length_response,
chunk_of_chunks,
chunk_of_chunks,
concatenated_summary,
],
) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(5) } }
expect(result[:summary]).to eq(concatenated_summary)
end
end
end
end