FEATURE: streaming mode for the FoldContent strategy. (#134)

This commit is contained in:
Roman Rizzi 2023-08-11 15:08:54 -03:00 committed by GitHub
parent 7077c31ab8
commit b076e43d67
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 139 additions and 62 deletions

View File

@ -20,7 +20,7 @@ module DiscourseAi
)
end
def concatenate_summaries(summaries)
def concatenate_summaries(summaries, &on_partial_blk)
instructions = <<~TEXT
Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative.
Include only the summary inside <ai> tags.
@ -29,10 +29,10 @@ module DiscourseAi
instructions += summaries.reduce("") { |m, s| m += "<input>#{s}</input>\n" }
instructions += "Assistant:\n"
completion(instructions)
completion(instructions, &on_partial_blk)
end
def summarize_with_truncation(contents, opts)
def summarize_with_truncation(contents, opts, &on_partial_blk)
instructions = build_base_prompt(opts)
text_to_summarize = contents.map { |c| format_content_item(c) }.join
@ -40,17 +40,20 @@ module DiscourseAi
instructions += "<input>#{truncated_content}</input>\nAssistant:\n"
completion(instructions)
completion(instructions, &on_partial_blk)
end
def summarize_single(chunk_text, opts)
summarize_chunk(chunk_text, opts.merge(single_chunk: true))
def summarize_single(chunk_text, opts, &on_partial_blk)
summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk)
end
private
def summarize_chunk(chunk_text, opts)
completion(build_base_prompt(opts) + "<input>#{chunk_text}</input>\nAssistant:\n")
def summarize_chunk(chunk_text, opts, &on_partial_blk)
completion(
build_base_prompt(opts) + "<input>#{chunk_text}</input>\nAssistant:\n",
&on_partial_blk
)
end
def build_base_prompt(opts)
@ -79,9 +82,33 @@ module DiscourseAi
base_prompt
end
def completion(prompt)
response =
::DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, model).dig(:completion)
def completion(prompt, &on_partial_blk)
# We need to discard any text that might come before the <ai> tag.
# Instructing the model to reply only with the summary seems impossible.
pre_tag_partial = +""
if on_partial_blk
on_partial_read =
Proc.new do |partial|
if pre_tag_partial.include?("<ai>")
on_partial_blk.call(partial[:completion])
else
pre_tag_partial << partial[:completion]
end
end
response =
::DiscourseAi::Inference::AnthropicCompletions.perform!(
prompt,
model,
&on_partial_read
)
else
response =
::DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, model).dig(
:completion,
)
end
Nokogiri::HTML5.fragment(response).at("ai").text
end

View File

@ -20,8 +20,8 @@ module DiscourseAi
)
end
def concatenate_summaries(summaries)
completion(<<~TEXT)
def concatenate_summaries(summaries, &on_partial_blk)
prompt = <<~TEXT
[INST] <<SYS>>
You are a helpful bot
<</SYS>>
@ -29,13 +29,15 @@ module DiscourseAi
Concatenate these disjoint summaries, creating a cohesive narrative:
#{summaries.join("\n")} [/INST]
TEXT
completion(prompt, &on_partial_blk)
end
def summarize_with_truncation(contents, opts)
def summarize_with_truncation(contents, opts, &on_partial_blk)
text_to_summarize = contents.map { |c| format_content_item(c) }.join
truncated_content = tokenizer.truncate(text_to_summarize, available_tokens)
completion(<<~TEXT)
prompt = <<~TEXT
[INST] <<SYS>>
#{build_base_prompt(opts)}
<</SYS>>
@ -44,15 +46,17 @@ module DiscourseAi
#{truncated_content} [/INST]
Here is a summary of the above topic:
TEXT
completion(prompt, &on_partial_blk)
end
def summarize_single(chunk_text, opts)
summarize_chunk(chunk_text, opts.merge(single_chunk: true))
def summarize_single(chunk_text, opts, &on_partial_blk)
summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk)
end
private
def summarize_chunk(chunk_text, opts)
def summarize_chunk(chunk_text, opts, &on_partial_blk)
summary_instruction =
if opts[:single_chunk]
"Summarize the following forum discussion, creating a cohesive narrative:"
@ -60,7 +64,7 @@ module DiscourseAi
"Summarize the following in up to 400 words:"
end
completion(<<~TEXT)
prompt = <<~TEXT
[INST] <<SYS>>
#{build_base_prompt(opts)}
<</SYS>>
@ -69,6 +73,8 @@ module DiscourseAi
#{chunk_text} [/INST]
Here is a summary of the above topic:
TEXT
completion(prompt, &on_partial_blk)
end
def build_base_prompt(opts)
@ -91,10 +97,21 @@ module DiscourseAi
base_prompt
end
def completion(prompt)
::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(prompt, model).dig(
:generated_text,
)
def completion(prompt, &on_partial_blk)
if on_partial_blk
on_partial_read =
Proc.new { |partial| on_partial_blk.call(partial.dig(:token, :text).to_s) }
::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(
prompt,
model,
&on_partial_read
)
else
::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(prompt, model).dig(
:generated_text,
)
end
end
def tokenizer

View File

@ -8,8 +8,8 @@ module DiscourseAi
"Llama2FineTunedOrcaStyle's #{SiteSetting.ai_hugging_face_model_display_name.presence || model}"
end
def concatenate_summaries(summaries)
completion(<<~TEXT)
def concatenate_summaries(summaries, &on_partial_blk)
prompt = <<~TEXT
### System:
You are a helpful bot
@ -19,28 +19,32 @@ module DiscourseAi
### Assistant:
TEXT
completion(prompt, &on_partial_blk)
end
def summarize_with_truncation(contents, opts)
def summarize_with_truncation(contents, opts, &on_partial_blk)
text_to_summarize = contents.map { |c| format_content_item(c) }.join
truncated_content = tokenizer.truncate(text_to_summarize, available_tokens)
completion(<<~TEXT)
### System:
#{build_base_prompt(opts)}
prompt = <<~TEXT
### System:
#{build_base_prompt(opts)}
### User:
Summarize the following in up to 400 words:
#{truncated_content}
### User:
Summarize the following in up to 400 words:
#{truncated_content}
### Assistant:
Here is a summary of the above topic:
TEXT
### Assistant:
Here is a summary of the above topic:
TEXT
completion(prompt, &on_partial_blk)
end
private
def summarize_chunk(chunk_text, opts)
def summarize_chunk(chunk_text, opts, &on_partial_blk)
summary_instruction =
if opts[:single_chunk]
"Summarize the following forum discussion, creating a cohesive narrative:"
@ -48,7 +52,7 @@ module DiscourseAi
"Summarize the following in up to 400 words:"
end
completion(<<~TEXT)
prompt = <<~TEXT
### System:
#{build_base_prompt(opts)}
@ -59,6 +63,8 @@ module DiscourseAi
### Assistant:
Here is a summary of the above topic:
TEXT
completion(prompt, &on_partial_blk)
end
end
end

View File

@ -20,7 +20,7 @@ module DiscourseAi
)
end
def concatenate_summaries(summaries)
def concatenate_summaries(summaries, &on_partial_blk)
messages = [
{ role: "system", content: "You are a helpful bot" },
{
@ -30,10 +30,10 @@ module DiscourseAi
},
]
completion(messages)
completion(messages, &on_partial_blk)
end
def summarize_with_truncation(contents, opts)
def summarize_with_truncation(contents, opts, &on_partial_blk)
messages = [{ role: "system", content: build_base_prompt(opts) }]
text_to_summarize = contents.map { |c| format_content_item(c) }.join
@ -44,16 +44,16 @@ module DiscourseAi
content: "Summarize the following in 400 words:\n#{truncated_content}",
}
completion(messages)
completion(messages, &on_partial_blk)
end
def summarize_single(chunk_text, opts)
summarize_chunk(chunk_text, opts.merge(single_chunk: true))
def summarize_single(chunk_text, opts, &on_partial_blk)
summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk)
end
private
def summarize_chunk(chunk_text, opts)
def summarize_chunk(chunk_text, opts, &on_partial_blk)
summary_instruction =
if opts[:single_chunk]
"Summarize the following forum discussion, creating a cohesive narrative:"
@ -66,6 +66,7 @@ module DiscourseAi
{ role: "system", content: build_base_prompt(opts) },
{ role: "user", content: "#{summary_instruction}\n#{chunk_text}" },
],
&on_partial_blk
)
end
@ -89,13 +90,22 @@ module DiscourseAi
base_prompt
end
def completion(prompt)
::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model).dig(
:choices,
0,
:message,
:content,
)
def completion(prompt, &on_partial_blk)
if on_partial_blk
on_partial_read =
Proc.new do |partial|
on_partial_blk.call(partial.dig(:choices, 0, :delta, :content).to_s)
end
::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model, &on_partial_read)
else
::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model).dig(
:choices,
0,
:message,
:content,
)
end
end
def tokenizer

View File

@ -16,17 +16,24 @@ module DiscourseAi
:model,
to: :completion_model
def summarize(content)
def summarize(content, &on_partial_blk)
opts = content.except(:contents)
chunks = split_into_chunks(content[:contents])
if chunks.length == 1
{ summary: completion_model.summarize_single(chunks.first[:summary], opts), chunks: [] }
{
summary:
completion_model.summarize_single(chunks.first[:summary], opts, &on_partial_blk),
chunks: [],
}
else
summaries = completion_model.summarize_in_chunks(chunks, opts)
{ summary: completion_model.concatenate_summaries(summaries), chunks: summaries }
{
summary: completion_model.concatenate_summaries(summaries, &on_partial_blk),
chunks: summaries,
}
end
end

View File

@ -16,11 +16,12 @@ module DiscourseAi
:model,
to: :completion_model
def summarize(content)
def summarize(content, &on_partial_blk)
opts = content.except(:contents)
{
summary: completion_model.summarize_with_truncation(content[:contents], opts),
summary:
completion_model.summarize_with_truncation(content[:contents], opts, &on_partial_blk),
chunks: [],
}
end

View File

@ -68,10 +68,11 @@ module ::DiscourseAi
return parsed_response
end
response_data = +""
begin
cancelled = false
cancel = lambda { cancelled = true }
response_data = +""
response_raw = +""
response.read_body do |chunk|
@ -111,6 +112,8 @@ module ::DiscourseAi
)
end
end
return response_data
end
end

View File

@ -85,10 +85,11 @@ module ::DiscourseAi
return parsed_response
end
response_data = +""
begin
cancelled = false
cancel = lambda { cancelled = true }
response_data = +""
response_raw = +""
response.read_body do |chunk|
@ -102,7 +103,7 @@ module ::DiscourseAi
chunk
.split("\n")
.each do |line|
data = line.split("data: ", 2)[1]
data = line.split("data:", 2)[1]
next if !data || data.squish == "[DONE]"
if !cancelled
@ -113,7 +114,7 @@ module ::DiscourseAi
# this is the last chunk and contains the full response
next if partial[:token][:special] == true
response_data = partial[:token][:text].to_s
response_data << partial[:token][:text].to_s
yield partial, cancel
rescue JSON::ParserError
@ -131,6 +132,8 @@ module ::DiscourseAi
)
end
end
return response_data
end
end

View File

@ -121,10 +121,11 @@ module ::DiscourseAi
return parsed_response
end
response_data = +""
begin
cancelled = false
cancel = lambda { cancelled = true }
response_data = +""
response_raw = +""
leftover = ""
@ -170,6 +171,8 @@ module ::DiscourseAi
)
end
end
return response_data
end
end
end