diff --git a/lib/modules/summarization/models/anthropic.rb b/lib/modules/summarization/models/anthropic.rb index 4c928706..092da8e2 100644 --- a/lib/modules/summarization/models/anthropic.rb +++ b/lib/modules/summarization/models/anthropic.rb @@ -20,7 +20,7 @@ module DiscourseAi ) end - def concatenate_summaries(summaries) + def concatenate_summaries(summaries, &on_partial_blk) instructions = <<~TEXT Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative. Include only the summary inside tags. @@ -29,10 +29,10 @@ module DiscourseAi instructions += summaries.reduce("") { |m, s| m += "#{s}\n" } instructions += "Assistant:\n" - completion(instructions) + completion(instructions, &on_partial_blk) end - def summarize_with_truncation(contents, opts) + def summarize_with_truncation(contents, opts, &on_partial_blk) instructions = build_base_prompt(opts) text_to_summarize = contents.map { |c| format_content_item(c) }.join @@ -40,17 +40,20 @@ module DiscourseAi instructions += "#{truncated_content}\nAssistant:\n" - completion(instructions) + completion(instructions, &on_partial_blk) end - def summarize_single(chunk_text, opts) - summarize_chunk(chunk_text, opts.merge(single_chunk: true)) + def summarize_single(chunk_text, opts, &on_partial_blk) + summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk) end private - def summarize_chunk(chunk_text, opts) - completion(build_base_prompt(opts) + "#{chunk_text}\nAssistant:\n") + def summarize_chunk(chunk_text, opts, &on_partial_blk) + completion( + build_base_prompt(opts) + "#{chunk_text}\nAssistant:\n", + &on_partial_blk + ) end def build_base_prompt(opts) @@ -79,9 +82,33 @@ module DiscourseAi base_prompt end - def completion(prompt) - response = - ::DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, model).dig(:completion) + def completion(prompt, &on_partial_blk) + # We need to discard any text that might come before the tag. + # Instructing the model to reply only with the summary seems impossible. + pre_tag_partial = +"" + + if on_partial_blk + on_partial_read = + Proc.new do |partial| + if pre_tag_partial.include?("") + on_partial_blk.call(partial[:completion]) + else + pre_tag_partial << partial[:completion] + end + end + + response = + ::DiscourseAi::Inference::AnthropicCompletions.perform!( + prompt, + model, + &on_partial_read + ) + else + response = + ::DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, model).dig( + :completion, + ) + end Nokogiri::HTML5.fragment(response).at("ai").text end diff --git a/lib/modules/summarization/models/llama2.rb b/lib/modules/summarization/models/llama2.rb index 55cf3ac0..1dc96088 100644 --- a/lib/modules/summarization/models/llama2.rb +++ b/lib/modules/summarization/models/llama2.rb @@ -20,8 +20,8 @@ module DiscourseAi ) end - def concatenate_summaries(summaries) - completion(<<~TEXT) + def concatenate_summaries(summaries, &on_partial_blk) + prompt = <<~TEXT [INST] <> You are a helpful bot <> @@ -29,13 +29,15 @@ module DiscourseAi Concatenate these disjoint summaries, creating a cohesive narrative: #{summaries.join("\n")} [/INST] TEXT + + completion(prompt, &on_partial_blk) end - def summarize_with_truncation(contents, opts) + def summarize_with_truncation(contents, opts, &on_partial_blk) text_to_summarize = contents.map { |c| format_content_item(c) }.join truncated_content = tokenizer.truncate(text_to_summarize, available_tokens) - completion(<<~TEXT) + prompt = <<~TEXT [INST] <> #{build_base_prompt(opts)} <> @@ -44,15 +46,17 @@ module DiscourseAi #{truncated_content} [/INST] Here is a summary of the above topic: TEXT + + completion(prompt, &on_partial_blk) end - def summarize_single(chunk_text, opts) - summarize_chunk(chunk_text, opts.merge(single_chunk: true)) + def summarize_single(chunk_text, opts, &on_partial_blk) + summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk) end private - def summarize_chunk(chunk_text, opts) + def summarize_chunk(chunk_text, opts, &on_partial_blk) summary_instruction = if opts[:single_chunk] "Summarize the following forum discussion, creating a cohesive narrative:" @@ -60,7 +64,7 @@ module DiscourseAi "Summarize the following in up to 400 words:" end - completion(<<~TEXT) + prompt = <<~TEXT [INST] <> #{build_base_prompt(opts)} <> @@ -69,6 +73,8 @@ module DiscourseAi #{chunk_text} [/INST] Here is a summary of the above topic: TEXT + + completion(prompt, &on_partial_blk) end def build_base_prompt(opts) @@ -91,10 +97,21 @@ module DiscourseAi base_prompt end - def completion(prompt) - ::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(prompt, model).dig( - :generated_text, - ) + def completion(prompt, &on_partial_blk) + if on_partial_blk + on_partial_read = + Proc.new { |partial| on_partial_blk.call(partial.dig(:token, :text).to_s) } + + ::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!( + prompt, + model, + &on_partial_read + ) + else + ::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(prompt, model).dig( + :generated_text, + ) + end end def tokenizer diff --git a/lib/modules/summarization/models/llama2_fine_tuned_orca_style.rb b/lib/modules/summarization/models/llama2_fine_tuned_orca_style.rb index 1ed1c130..acc682f6 100644 --- a/lib/modules/summarization/models/llama2_fine_tuned_orca_style.rb +++ b/lib/modules/summarization/models/llama2_fine_tuned_orca_style.rb @@ -8,8 +8,8 @@ module DiscourseAi "Llama2FineTunedOrcaStyle's #{SiteSetting.ai_hugging_face_model_display_name.presence || model}" end - def concatenate_summaries(summaries) - completion(<<~TEXT) + def concatenate_summaries(summaries, &on_partial_blk) + prompt = <<~TEXT ### System: You are a helpful bot @@ -19,28 +19,32 @@ module DiscourseAi ### Assistant: TEXT + + completion(prompt, &on_partial_blk) end - def summarize_with_truncation(contents, opts) + def summarize_with_truncation(contents, opts, &on_partial_blk) text_to_summarize = contents.map { |c| format_content_item(c) }.join truncated_content = tokenizer.truncate(text_to_summarize, available_tokens) - completion(<<~TEXT) - ### System: - #{build_base_prompt(opts)} - - ### User: - Summarize the following in up to 400 words: - #{truncated_content} + prompt = <<~TEXT + ### System: + #{build_base_prompt(opts)} + + ### User: + Summarize the following in up to 400 words: + #{truncated_content} - ### Assistant: - Here is a summary of the above topic: - TEXT + ### Assistant: + Here is a summary of the above topic: + TEXT + + completion(prompt, &on_partial_blk) end private - def summarize_chunk(chunk_text, opts) + def summarize_chunk(chunk_text, opts, &on_partial_blk) summary_instruction = if opts[:single_chunk] "Summarize the following forum discussion, creating a cohesive narrative:" @@ -48,7 +52,7 @@ module DiscourseAi "Summarize the following in up to 400 words:" end - completion(<<~TEXT) + prompt = <<~TEXT ### System: #{build_base_prompt(opts)} @@ -59,6 +63,8 @@ module DiscourseAi ### Assistant: Here is a summary of the above topic: TEXT + + completion(prompt, &on_partial_blk) end end end diff --git a/lib/modules/summarization/models/open_ai.rb b/lib/modules/summarization/models/open_ai.rb index 54ac8c5f..20310f5a 100644 --- a/lib/modules/summarization/models/open_ai.rb +++ b/lib/modules/summarization/models/open_ai.rb @@ -20,7 +20,7 @@ module DiscourseAi ) end - def concatenate_summaries(summaries) + def concatenate_summaries(summaries, &on_partial_blk) messages = [ { role: "system", content: "You are a helpful bot" }, { @@ -30,10 +30,10 @@ module DiscourseAi }, ] - completion(messages) + completion(messages, &on_partial_blk) end - def summarize_with_truncation(contents, opts) + def summarize_with_truncation(contents, opts, &on_partial_blk) messages = [{ role: "system", content: build_base_prompt(opts) }] text_to_summarize = contents.map { |c| format_content_item(c) }.join @@ -44,16 +44,16 @@ module DiscourseAi content: "Summarize the following in 400 words:\n#{truncated_content}", } - completion(messages) + completion(messages, &on_partial_blk) end - def summarize_single(chunk_text, opts) - summarize_chunk(chunk_text, opts.merge(single_chunk: true)) + def summarize_single(chunk_text, opts, &on_partial_blk) + summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk) end private - def summarize_chunk(chunk_text, opts) + def summarize_chunk(chunk_text, opts, &on_partial_blk) summary_instruction = if opts[:single_chunk] "Summarize the following forum discussion, creating a cohesive narrative:" @@ -66,6 +66,7 @@ module DiscourseAi { role: "system", content: build_base_prompt(opts) }, { role: "user", content: "#{summary_instruction}\n#{chunk_text}" }, ], + &on_partial_blk ) end @@ -89,13 +90,22 @@ module DiscourseAi base_prompt end - def completion(prompt) - ::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model).dig( - :choices, - 0, - :message, - :content, - ) + def completion(prompt, &on_partial_blk) + if on_partial_blk + on_partial_read = + Proc.new do |partial| + on_partial_blk.call(partial.dig(:choices, 0, :delta, :content).to_s) + end + + ::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model, &on_partial_read) + else + ::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model).dig( + :choices, + 0, + :message, + :content, + ) + end end def tokenizer diff --git a/lib/modules/summarization/strategies/fold_content.rb b/lib/modules/summarization/strategies/fold_content.rb index f6d13c23..39fbb055 100644 --- a/lib/modules/summarization/strategies/fold_content.rb +++ b/lib/modules/summarization/strategies/fold_content.rb @@ -16,17 +16,24 @@ module DiscourseAi :model, to: :completion_model - def summarize(content) + def summarize(content, &on_partial_blk) opts = content.except(:contents) chunks = split_into_chunks(content[:contents]) if chunks.length == 1 - { summary: completion_model.summarize_single(chunks.first[:summary], opts), chunks: [] } + { + summary: + completion_model.summarize_single(chunks.first[:summary], opts, &on_partial_blk), + chunks: [], + } else summaries = completion_model.summarize_in_chunks(chunks, opts) - { summary: completion_model.concatenate_summaries(summaries), chunks: summaries } + { + summary: completion_model.concatenate_summaries(summaries, &on_partial_blk), + chunks: summaries, + } end end diff --git a/lib/modules/summarization/strategies/truncate_content.rb b/lib/modules/summarization/strategies/truncate_content.rb index 7634dd65..af4620a7 100644 --- a/lib/modules/summarization/strategies/truncate_content.rb +++ b/lib/modules/summarization/strategies/truncate_content.rb @@ -16,11 +16,12 @@ module DiscourseAi :model, to: :completion_model - def summarize(content) + def summarize(content, &on_partial_blk) opts = content.except(:contents) { - summary: completion_model.summarize_with_truncation(content[:contents], opts), + summary: + completion_model.summarize_with_truncation(content[:contents], opts, &on_partial_blk), chunks: [], } end diff --git a/lib/shared/inference/anthropic_completions.rb b/lib/shared/inference/anthropic_completions.rb index 13cf75eb..db9f6496 100644 --- a/lib/shared/inference/anthropic_completions.rb +++ b/lib/shared/inference/anthropic_completions.rb @@ -68,10 +68,11 @@ module ::DiscourseAi return parsed_response end + response_data = +"" + begin cancelled = false cancel = lambda { cancelled = true } - response_data = +"" response_raw = +"" response.read_body do |chunk| @@ -111,6 +112,8 @@ module ::DiscourseAi ) end end + + return response_data end end diff --git a/lib/shared/inference/hugging_face_text_generation.rb b/lib/shared/inference/hugging_face_text_generation.rb index fac27a03..f3753a21 100644 --- a/lib/shared/inference/hugging_face_text_generation.rb +++ b/lib/shared/inference/hugging_face_text_generation.rb @@ -85,10 +85,11 @@ module ::DiscourseAi return parsed_response end + response_data = +"" + begin cancelled = false cancel = lambda { cancelled = true } - response_data = +"" response_raw = +"" response.read_body do |chunk| @@ -102,7 +103,7 @@ module ::DiscourseAi chunk .split("\n") .each do |line| - data = line.split("data: ", 2)[1] + data = line.split("data:", 2)[1] next if !data || data.squish == "[DONE]" if !cancelled @@ -113,7 +114,7 @@ module ::DiscourseAi # this is the last chunk and contains the full response next if partial[:token][:special] == true - response_data = partial[:token][:text].to_s + response_data << partial[:token][:text].to_s yield partial, cancel rescue JSON::ParserError @@ -131,6 +132,8 @@ module ::DiscourseAi ) end end + + return response_data end end diff --git a/lib/shared/inference/openai_completions.rb b/lib/shared/inference/openai_completions.rb index cdfb826b..d521214c 100644 --- a/lib/shared/inference/openai_completions.rb +++ b/lib/shared/inference/openai_completions.rb @@ -121,10 +121,11 @@ module ::DiscourseAi return parsed_response end + response_data = +"" + begin cancelled = false cancel = lambda { cancelled = true } - response_data = +"" response_raw = +"" leftover = "" @@ -170,6 +171,8 @@ module ::DiscourseAi ) end end + + return response_data end end end