From 9a79afcdbf981ee5151073566c084e73f37e1b4f Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Tue, 27 Jun 2023 12:26:33 -0300 Subject: [PATCH] DEV: Better strategies for summarization (#88) * DEV: Better strategies for summarization The strategy responsibility needs to be "Given a collection of texts, I know how to summarize them most efficiently, using the minimum amount of requests and maximizing token usage". There are different token limits for each model, so it all boils down to two different strategies: Fold all these texts into a single one, doing the summarization in chunks, and then build a summary from those. Build it by combining texts in a single prompt, and truncate it according to your token limits. While the latter is less than ideal, we need it for "bart-large-cnn-samsum" and "flan-t5-base-samsum", both with low limits. The rest will rely on folding. * Expose summarized chunks to users --- lib/modules/summarization/entry_point.rb | 43 ++++--- lib/modules/summarization/models/anthropic.rb | 84 +++++++++++++ lib/modules/summarization/models/base.rb | 82 +++++++++++++ .../discourse_ai.rb => models/discourse.rb} | 46 ++++--- lib/modules/summarization/models/open_ai.rb | 96 +++++++++++++++ .../summarization/strategies/anthropic.rb | 57 --------- .../summarization/strategies/fold_content.rb | 30 +++++ .../summarization/strategies/open_ai.rb | 56 --------- .../strategies/truncate_content.rb | 30 +++++ lib/shared/inference/anthropic_completions.rb | 2 +- lib/shared/tokenizer/tokenizer.rb | 70 +++++++---- .../summarization/models/anthropic_spec.rb | 116 ++++++++++++++++++ .../summarization/models/discourse_spec.rb | 93 ++++++++++++++ .../summarization/models/open_ai_spec.rb | 116 ++++++++++++++++++ .../strategies/anthropic_spec.rb | 26 ---- .../strategies/discourse_spec.rb | 25 ---- .../strategies/fold_content_spec.rb | 38 ++++++ .../summarization/strategies/open_ai_spec.rb | 21 ---- .../strategies/truncate_content_spec.rb | 26 ++++ .../inference/anthropic_completions_spec.rb | 6 +- .../summarization/dummy_completion_model.rb | 54 ++++++++ 21 files changed, 872 insertions(+), 245 deletions(-) create mode 100644 lib/modules/summarization/models/anthropic.rb create mode 100644 lib/modules/summarization/models/base.rb rename lib/modules/summarization/{strategies/discourse_ai.rb => models/discourse.rb} (58%) create mode 100644 lib/modules/summarization/models/open_ai.rb delete mode 100644 lib/modules/summarization/strategies/anthropic.rb create mode 100644 lib/modules/summarization/strategies/fold_content.rb delete mode 100644 lib/modules/summarization/strategies/open_ai.rb create mode 100644 lib/modules/summarization/strategies/truncate_content.rb create mode 100644 spec/lib/modules/summarization/models/anthropic_spec.rb create mode 100644 spec/lib/modules/summarization/models/discourse_spec.rb create mode 100644 spec/lib/modules/summarization/models/open_ai_spec.rb delete mode 100644 spec/lib/modules/summarization/strategies/anthropic_spec.rb delete mode 100644 spec/lib/modules/summarization/strategies/discourse_spec.rb create mode 100644 spec/lib/modules/summarization/strategies/fold_content_spec.rb delete mode 100644 spec/lib/modules/summarization/strategies/open_ai_spec.rb create mode 100644 spec/lib/modules/summarization/strategies/truncate_content_spec.rb create mode 100644 spec/support/summarization/dummy_completion_model.rb diff --git a/lib/modules/summarization/entry_point.rb b/lib/modules/summarization/entry_point.rb index 93b27ea0..d1af8117 100644 --- a/lib/modules/summarization/entry_point.rb +++ b/lib/modules/summarization/entry_point.rb @@ -4,23 +4,38 @@ module DiscourseAi module Summarization class EntryPoint def load_files - require_relative "strategies/anthropic" - require_relative "strategies/discourse_ai" - require_relative "strategies/open_ai" + require_relative "models/base" + require_relative "models/anthropic" + require_relative "models/discourse" + require_relative "models/open_ai" + + require_relative "strategies/fold_content" + require_relative "strategies/truncate_content" end def inject_into(plugin) - [ - Strategies::OpenAi.new("gpt-4"), - Strategies::OpenAi.new("gpt-4-32k"), - Strategies::OpenAi.new("gpt-3.5-turbo"), - Strategies::OpenAi.new("gpt-3.5-turbo-16k"), - Strategies::DiscourseAi.new("bart-large-cnn-samsum"), - Strategies::DiscourseAi.new("flan-t5-base-samsum"), - Strategies::DiscourseAi.new("long-t5-tglobal-base-16384-book-summary"), - Strategies::Anthropic.new("claude-v1"), - Strategies::Anthropic.new("claude-v1-100k"), - ].each { |strategy| plugin.register_summarization_strategy(strategy) } + foldable_models = [ + Models::OpenAi.new("gpt-4", max_tokens: 8192), + Models::OpenAi.new("gpt-4-32k", max_tokens: 32_768), + Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096), + Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384), + Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384), + Models::Anthropic.new("claude-v1", max_tokens: 9000), + Models::Anthropic.new("claude-v1-100k", max_tokens: 100_000), + ] + + foldable_models.each do |model| + plugin.register_summarization_strategy(Strategies::FoldContent.new(model)) + end + + truncable_models = [ + Models::Discourse.new("bart-large-cnn-samsum", max_tokens: 1024), + Models::Discourse.new("flan-t5-base-samsum", max_tokens: 512), + ] + + truncable_models.each do |model| + plugin.register_summarization_strategy(Strategies::TruncateContent.new(model)) + end end end end diff --git a/lib/modules/summarization/models/anthropic.rb b/lib/modules/summarization/models/anthropic.rb new file mode 100644 index 00000000..d038385e --- /dev/null +++ b/lib/modules/summarization/models/anthropic.rb @@ -0,0 +1,84 @@ +# frozen_string_literal: true + +module DiscourseAi + module Summarization + module Models + class Anthropic < Base + def display_name + "Anthropic's #{model}" + end + + def correctly_configured? + SiteSetting.ai_anthropic_api_key.present? + end + + def configuration_hint + I18n.t( + "discourse_ai.summarization.configuration_hint", + count: 1, + setting: "ai_anthropic_api_key", + ) + end + + def concatenate_summaries(summaries) + instructions = <<~TEXT + Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative. + Include only the summary inside tags. + TEXT + + instructions += summaries.reduce("") { |m, s| m += "#{s}\n" } + instructions += "Assistant:\n" + + completion(instructions) + end + + def summarize_with_truncation(contents, opts) + instructions = build_base_prompt(opts) + + text_to_summarize = contents.map { |c| format_content_item(c) }.join + truncated_content = tokenizer.truncate(text_to_summarize, max_tokens - reserved_tokens) + + instructions += "#{truncated_content}\nAssistant:\n" + + completion(instructions) + end + + private + + def summarize_chunk(chunk_text, opts) + completion(build_base_prompt(opts) + "#{chunk_text}\nAssistant:\n") + end + + def build_base_prompt(opts) + base_prompt = <<~TEXT + Human: Summarize the following forum discussion inside the given tag. + Include only the summary inside tags. + TEXT + + if opts[:resource_path] + base_prompt += "Try generating links as well the format is #{opts[:resource_path]}.\n" + end + + base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[ + :content_title + ] + + base_prompt += "Don't use more than 400 words.\n" + end + + def completion(prompt) + response = + ::DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, model).dig(:completion) + + Nokogiri::HTML5.fragment(response).at("ai").text + end + + def tokenizer + DiscourseAi::Tokenizer::AnthropicTokenizer + end + + attr_reader :max_tokens + end + end + end +end diff --git a/lib/modules/summarization/models/base.rb b/lib/modules/summarization/models/base.rb new file mode 100644 index 00000000..558006a8 --- /dev/null +++ b/lib/modules/summarization/models/base.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +module DiscourseAi + module Summarization + module Models + class Base + def initialize(model, max_tokens:) + @model = model + @max_tokens = max_tokens + end + + def correctly_configured? + raise NotImplemented + end + + def display_name + raise NotImplemented + end + + def configuration_hint + raise NotImplemented + end + + def summarize_in_chunks(contents, opts) + chunks = [] + + section = { ids: [], summary: "" } + + contents.each do |item| + new_content = format_content_item(item) + + if tokenizer.can_expand_tokens?( + section[:summary], + new_content, + max_tokens - reserved_tokens, + ) + section[:summary] += new_content + section[:ids] << item[:id] + else + chunks << section + section = { id: [item[:id]], summary: new_content } + end + end + + chunks << section if section[:summary].present? + + chunks.each { |chunk| chunk[:summary] = summarize_chunk(chunk[:summary], opts) } + + chunks + end + + def concatenate_summaries(_summaries) + raise NotImplemented + end + + def summarize_with_truncation(_contents, _opts) + raise NotImplemented + end + + attr_reader :model + + protected + + attr_reader :max_tokens + + def summarize_chunk(_chunk_text, _opts) + raise NotImplemented + end + + def format_content_item(item) + "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " + end + + def reserved_tokens + # Reserve tokens for the response and the base prompt + # ~500 words + 700 + end + end + end + end +end diff --git a/lib/modules/summarization/strategies/discourse_ai.rb b/lib/modules/summarization/models/discourse.rb similarity index 58% rename from lib/modules/summarization/strategies/discourse_ai.rb rename to lib/modules/summarization/models/discourse.rb index 363b40b8..240f1fb8 100644 --- a/lib/modules/summarization/strategies/discourse_ai.rb +++ b/lib/modules/summarization/models/discourse.rb @@ -2,8 +2,8 @@ module DiscourseAi module Summarization - module Strategies - class DiscourseAi < ::Summarization::Base + module Models + class Discourse < Base def display_name "Discourse AI's #{model}" end @@ -22,29 +22,39 @@ module DiscourseAi ) end - def summarize(content_text) - ::DiscourseAi::Inference::DiscourseClassifier.perform!( - "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify", - model, - prompt(content_text), - SiteSetting.ai_summarization_discourse_service_api_key, - ).dig(:summary_text) + def concatenate_summaries(summaries) + completion(summaries.join("\n")) end - def prompt(text) - ::DiscourseAi::Tokenizer::BertTokenizer.truncate(text, max_length) + def summarize_with_truncation(contents, opts) + text_to_summarize = contents.map { |c| format_content_item(c) }.join + truncated_content = + ::DiscourseAi::Tokenizer::BertTokenizer.truncate(text_to_summarize, max_tokens) + + completion(truncated_content) end private - def max_length - lengths = { - "bart-large-cnn-samsum" => 1024, - "flan-t5-base-samsum" => 512, - "long-t5-tglobal-base-16384-book-summary" => 16_384, - } + def summarize_chunk(chunk_text, _opts) + completion(chunk_text) + end - lengths[model] + def reserved_tokens + 0 + end + + def completion(prompt) + ::DiscourseAi::Inference::DiscourseClassifier.perform!( + "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify", + model, + prompt, + SiteSetting.ai_summarization_discourse_service_api_key, + ).dig(:summary_text) + end + + def tokenizer + DiscourseAi::Tokenizer::BertTokenizer end end end diff --git a/lib/modules/summarization/models/open_ai.rb b/lib/modules/summarization/models/open_ai.rb new file mode 100644 index 00000000..cd91d31e --- /dev/null +++ b/lib/modules/summarization/models/open_ai.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +module DiscourseAi + module Summarization + module Models + class OpenAi < Base + def display_name + "Open AI's #{model}" + end + + def correctly_configured? + SiteSetting.ai_openai_api_key.present? + end + + def configuration_hint + I18n.t( + "discourse_ai.summarization.configuration_hint", + count: 1, + setting: "ai_openai_api_key", + ) + end + + def concatenate_summaries(summaries) + messages = [ + { role: "system", content: "You are a helpful bot" }, + { + role: "user", + content: + "Concatenate these disjoint summaries, creating a cohesive narrative:\n#{summaries.join("\n")}", + }, + ] + + completion(messages) + end + + def summarize_with_truncation(contents, opts) + messages = [{ role: "system", content: build_base_prompt(opts) }] + + text_to_summarize = contents.map { |c| format_content_item(c) }.join + truncated_content = tokenizer.truncate(text_to_summarize, max_tokens - reserved_tokens) + + messages << { + role: "user", + content: "Summarize the following in 400 words:\n#{truncated_content}", + } + + completion(messages) + end + + private + + def summarize_chunk(chunk_text, opts) + completion( + [ + { role: "system", content: build_base_prompt(opts) }, + { role: "user", content: "Summarize the following in 400 words:\n#{chunk_text}" }, + ], + ) + end + + def build_base_prompt(opts) + base_prompt = <<~TEXT + You are a summarization bot. + You effectively summarise any text and reply ONLY with ONLY the summarized text. + You condense it into a shorter version. + You understand and generate Discourse forum Markdown. + TEXT + + if opts[:resource_path] + base_prompt += + "Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)\n" + end + + base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[ + :content_title + ] + + base_prompt + end + + def completion(prompt) + ::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model).dig( + :choices, + 0, + :message, + :content, + ) + end + + def tokenizer + DiscourseAi::Tokenizer::OpenAiTokenizer + end + end + end + end +end diff --git a/lib/modules/summarization/strategies/anthropic.rb b/lib/modules/summarization/strategies/anthropic.rb deleted file mode 100644 index 57e08285..00000000 --- a/lib/modules/summarization/strategies/anthropic.rb +++ /dev/null @@ -1,57 +0,0 @@ -# frozen_string_literal: true - -module DiscourseAi - module Summarization - module Strategies - class Anthropic < ::Summarization::Base - def display_name - "Anthropic's #{model}" - end - - def correctly_configured? - SiteSetting.ai_anthropic_api_key.present? - end - - def configuration_hint - I18n.t( - "discourse_ai.summarization.configuration_hint", - count: 1, - setting: "ai_anthropic_api_key", - ) - end - - def summarize(content_text) - response = - ::DiscourseAi::Inference::AnthropicCompletions.perform!( - prompt(content_text), - model, - ).dig(:completion) - - Nokogiri::HTML5.fragment(response).at("ai").text - end - - def prompt(content) - truncated_content = - ::DiscourseAi::Tokenizer::AnthropicTokenizer.truncate(content, max_length - 50) - - "Human: Summarize the following article that is inside tags. - Please include only the summary inside tags. - - ##{truncated_content} - - - Assistant: - " - end - - private - - def max_length - lengths = { "claude-v1" => 9000, "claude-v1-100k" => 100_000 } - - lengths[model] - end - end - end - end -end diff --git a/lib/modules/summarization/strategies/fold_content.rb b/lib/modules/summarization/strategies/fold_content.rb new file mode 100644 index 00000000..2f4508d6 --- /dev/null +++ b/lib/modules/summarization/strategies/fold_content.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module DiscourseAi + module Summarization + module Strategies + class FoldContent < ::Summarization::Base + def initialize(completion_model) + @completion_model = completion_model + end + + attr_reader :completion_model + + delegate :correctly_configured?, + :display_name, + :configuration_hint, + :model, + to: :completion_model + + def summarize(content) + opts = content.except(:contents) + summaries = completion_model.summarize_in_chunks(content[:contents], opts) + + return { summary: summaries.first[:summary], chunks: [] } if summaries.length == 1 + + { summary: completion_model.concatenate_summaries(summaries), chunks: summaries } + end + end + end + end +end diff --git a/lib/modules/summarization/strategies/open_ai.rb b/lib/modules/summarization/strategies/open_ai.rb deleted file mode 100644 index 35011b66..00000000 --- a/lib/modules/summarization/strategies/open_ai.rb +++ /dev/null @@ -1,56 +0,0 @@ -# frozen_string_literal: true - -module DiscourseAi - module Summarization - module Strategies - class OpenAi < ::Summarization::Base - def display_name - "Open AI's #{model}" - end - - def correctly_configured? - SiteSetting.ai_openai_api_key.present? - end - - def configuration_hint - I18n.t( - "discourse_ai.summarization.configuration_hint", - count: 1, - setting: "ai_openai_api_key", - ) - end - - def summarize(content_text) - ::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt(content_text), model).dig( - :choices, - 0, - :message, - :content, - ) - end - - def prompt(content) - truncated_content = - ::DiscourseAi::Tokenizer::OpenAiTokenizer.truncate(content, max_length - 50) - - messages = [{ role: "system", content: <<~TEXT }] - Summarize the following article:\n\n#{truncated_content} - TEXT - end - - private - - def max_length - lengths = { - "gpt-3.5-turbo" => 4096, - "gpt-4" => 8192, - "gpt-3.5-turbo-16k" => 16_384, - "gpt-4-32k" => 32_768, - } - - lengths[model] - end - end - end - end -end diff --git a/lib/modules/summarization/strategies/truncate_content.rb b/lib/modules/summarization/strategies/truncate_content.rb new file mode 100644 index 00000000..7634dd65 --- /dev/null +++ b/lib/modules/summarization/strategies/truncate_content.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module DiscourseAi + module Summarization + module Strategies + class TruncateContent < ::Summarization::Base + def initialize(completion_model) + @completion_model = completion_model + end + + attr_reader :completion_model + + delegate :correctly_configured?, + :display_name, + :configuration_hint, + :model, + to: :completion_model + + def summarize(content) + opts = content.except(:contents) + + { + summary: completion_model.summarize_with_truncation(content[:contents], opts), + chunks: [], + } + end + end + end + end +end diff --git a/lib/shared/inference/anthropic_completions.rb b/lib/shared/inference/anthropic_completions.rb index 8af3be02..18fd4418 100644 --- a/lib/shared/inference/anthropic_completions.rb +++ b/lib/shared/inference/anthropic_completions.rb @@ -24,7 +24,7 @@ module ::DiscourseAi payload[:temperature] = temperature if temperature payload[:top_p] = top_p if top_p - payload[:max_tokens_to_sample] = max_tokens || 300 + payload[:max_tokens_to_sample] = max_tokens if max_tokens payload[:stream] = true if block_given? Net::HTTP.start( diff --git a/lib/shared/tokenizer/tokenizer.rb b/lib/shared/tokenizer/tokenizer.rb index 0fdcf2c9..01178614 100644 --- a/lib/shared/tokenizer/tokenizer.rb +++ b/lib/shared/tokenizer/tokenizer.rb @@ -3,21 +3,31 @@ module DiscourseAi module Tokenizer class BasicTokenizer - def self.tokenizer - raise NotImplementedError - end + class << self + def tokenizer + raise NotImplementedError + end - def self.tokenize(text) - tokenizer.encode(text).tokens - end - def self.size(text) - tokenize(text).size - end - def self.truncate(text, max_length) - # Fast track the common case where the text is already short enough. - return text if text.size < max_length + def tokenize(text) + tokenizer.encode(text).tokens + end - tokenizer.decode(tokenizer.encode(text).ids.take(max_length)) + def size(text) + tokenize(text).size + end + + def truncate(text, max_length) + # Fast track the common case where the text is already short enough. + return text if text.size < max_length + + tokenizer.decode(tokenizer.encode(text).ids.take(max_length)) + end + + def can_expand_tokens?(text, addition, max_length) + return true if text.size + addition.size < max_length + + tokenizer.encode(text).ids.length + tokenizer.encode(addition).ids.length < max_length + end end end @@ -36,22 +46,30 @@ module DiscourseAi end class OpenAiTokenizer < BasicTokenizer - def self.tokenizer - @@tokenizer ||= Tiktoken.get_encoding("cl100k_base") - end + class << self + def tokenizer + @@tokenizer ||= Tiktoken.get_encoding("cl100k_base") + end - def self.tokenize(text) - tokenizer.encode(text) - end + def tokenize(text) + tokenizer.encode(text) + end - def self.truncate(text, max_length) - # Fast track the common case where the text is already short enough. - return text if text.size < max_length + def truncate(text, max_length) + # Fast track the common case where the text is already short enough. + return text if text.size < max_length - tokenizer.decode(tokenize(text).take(max_length)) - rescue Tiktoken::UnicodeError - max_length = max_length - 1 - retry + tokenizer.decode(tokenize(text).take(max_length)) + rescue Tiktoken::UnicodeError + max_length = max_length - 1 + retry + end + + def can_expand_tokens?(text, addition, max_length) + return true if text.size + addition.size < max_length + + tokenizer.encode(text).length + tokenizer.encode(addition).length < max_length + end end end end diff --git a/spec/lib/modules/summarization/models/anthropic_spec.rb b/spec/lib/modules/summarization/models/anthropic_spec.rb new file mode 100644 index 00000000..2ce99a3a --- /dev/null +++ b/spec/lib/modules/summarization/models/anthropic_spec.rb @@ -0,0 +1,116 @@ +# frozen_string_literal: true + +require_relative "../../../../support/anthropic_completion_stubs" + +RSpec.describe DiscourseAi::Summarization::Models::Anthropic do + let(:model) { "claude-v1" } + let(:max_tokens) { 720 } + + subject { described_class.new(model, max_tokens: max_tokens) } + + let(:content) do + { + resource_path: "/t/1/POST_NUMBER", + content_title: "This is a title", + contents: [{ poster: "asd", id: 1, text: "This is a text" }], + } + end + + def expected_messages(contents, opts) + base_prompt = <<~TEXT + Human: Summarize the following forum discussion inside the given tag. + Include only the summary inside tags. + Try generating links as well the format is #{opts[:resource_path]}. + The discussion title is: #{opts[:content_title]}. + Don't use more than 400 words. + TEXT + + text = + contents.reduce("") do |memo, item| + memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " + end + + base_prompt += "#{text}\nAssistant:\n" + end + + describe "#summarize_in_chunks" do + context "when the content fits in a single chunk" do + it "performs a request to summarize" do + opts = content.except(:contents) + + AnthropicCompletionStubs.stub_response( + expected_messages(content[:contents], opts), + "This is summary 1", + ) + + summarized_chunks = + subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] } + + expect(summarized_chunks).to contain_exactly("This is summary 1") + end + end + + context "when the content fits in multiple chunks" do + it "performs a request for each one to summarize" do + content[:contents] << { + poster: "asd2", + id: 2, + text: "This is a different text to summarize", + } + opts = content.except(:contents) + + content[:contents].each_with_index do |item, idx| + AnthropicCompletionStubs.stub_response( + expected_messages([item], opts), + "This is summary #{idx + 1}", + ) + end + + summarized_chunks = + subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] } + + expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2") + end + end + end + + describe "#concatenate_summaries" do + it "combines all the different summaries into a single one" do + messages = <<~TEXT + Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative. + Include only the summary inside tags. + summary 1 + summary 2 + Assistant: + TEXT + + AnthropicCompletionStubs.stub_response(messages, "concatenated summary") + + expect(subject.concatenate_summaries(["summary 1", "summary 2"])).to eq( + "concatenated summary", + ) + end + end + + describe "#summarize_with_truncation" do + let(:max_tokens) { 709 } + + it "truncates the context to meet the token limit" do + opts = content.except(:contents) + + instructions = <<~TEXT + Human: Summarize the following forum discussion inside the given tag. + Include only the summary inside tags. + Try generating links as well the format is #{opts[:resource_path]}. + The discussion title is: #{opts[:content_title]}. + Don't use more than 400 words. + (1 asd said: This is a + Assistant: + TEXT + + AnthropicCompletionStubs.stub_response(instructions, "truncated summary") + + expect(subject.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary") + end + end +end diff --git a/spec/lib/modules/summarization/models/discourse_spec.rb b/spec/lib/modules/summarization/models/discourse_spec.rb new file mode 100644 index 00000000..c505da1c --- /dev/null +++ b/spec/lib/modules/summarization/models/discourse_spec.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +RSpec.describe DiscourseAi::Summarization::Models::Discourse do + let(:model) { "bart-large-cnn-samsum" } + let(:max_tokens) { 20 } + + subject { described_class.new(model, max_tokens: max_tokens) } + + let(:content) do + { + resource_path: "/t/1/POST_NUMBER", + content_title: "This is a title", + contents: [{ poster: "asd", id: 1, text: "This is a text" }], + } + end + + before { SiteSetting.ai_summarization_discourse_service_api_endpoint = "https://test.com" } + + def stub_request(prompt, response) + WebMock + .stub_request( + :post, + "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify", + ) + .with(body: JSON.dump(model: model, content: prompt)) + .to_return(status: 200, body: JSON.dump(summary_text: response)) + end + + def expected_messages(contents, opts) + contents.reduce("") do |memo, item| + memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " + end + end + + describe "#summarize_in_chunks" do + context "when the content fits in a single chunk" do + it "performs a request to summarize" do + opts = content.except(:contents) + + stub_request(expected_messages(content[:contents], opts), "This is summary 1") + + summarized_chunks = + subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] } + + expect(summarized_chunks).to contain_exactly("This is summary 1") + end + end + + context "when the content fits in multiple chunks" do + it "performs a request for each one to summarize" do + content[:contents] << { + poster: "asd2", + id: 2, + text: "This is a different text to summarize", + } + opts = content.except(:contents) + + content[:contents].each_with_index do |item, idx| + stub_request(expected_messages([item], opts), "This is summary #{idx + 1}") + end + + summarized_chunks = + subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] } + + expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2") + end + end + end + + describe "#concatenate_summaries" do + it "combines all the different summaries into a single one" do + messages = ["summary 1", "summary 2"].join("\n") + + stub_request(messages, "concatenated summary") + + expect(subject.concatenate_summaries(["summary 1", "summary 2"])).to eq( + "concatenated summary", + ) + end + end + + describe "#summarize_with_truncation" do + let(:max_tokens) { 9 } + + it "truncates the context to meet the token limit" do + opts = content.except(:contents) + + stub_request("( 1 asd said : this is", "truncated summary") + + expect(subject.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary") + end + end +end diff --git a/spec/lib/modules/summarization/models/open_ai_spec.rb b/spec/lib/modules/summarization/models/open_ai_spec.rb new file mode 100644 index 00000000..d01fd287 --- /dev/null +++ b/spec/lib/modules/summarization/models/open_ai_spec.rb @@ -0,0 +1,116 @@ +# frozen_string_literal: true + +require_relative "../../../../support/openai_completions_inference_stubs" + +RSpec.describe DiscourseAi::Summarization::Models::OpenAi do + let(:model) { "gpt-3.5-turbo" } + let(:max_tokens) { 720 } + + subject { described_class.new(model, max_tokens: max_tokens) } + + let(:content) do + { + resource_path: "/t/1/POST_NUMBER", + content_title: "This is a title", + contents: [{ poster: "asd", id: 1, text: "This is a text" }], + } + end + + def expected_messages(contents, opts) + base_prompt = <<~TEXT + You are a summarization bot. + You effectively summarise any text and reply ONLY with ONLY the summarized text. + You condense it into a shorter version. + You understand and generate Discourse forum Markdown. + Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77) + The discussion title is: #{opts[:content_title]}. + TEXT + + messages = [{ role: "system", content: base_prompt }] + + text = + contents.reduce("") do |memo, item| + memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " + end + + messages << { role: "user", content: "Summarize the following in 400 words:\n#{text}" } + end + + describe "#summarize_in_chunks" do + context "when the content fits in a single chunk" do + it "performs a request to summarize" do + opts = content.except(:contents) + + OpenAiCompletionsInferenceStubs.stub_response( + expected_messages(content[:contents], opts), + "This is summary 1", + ) + + summarized_chunks = + subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] } + + expect(summarized_chunks).to contain_exactly("This is summary 1") + end + end + + context "when the content fits in multiple chunks" do + it "performs a request for each one to summarize" do + content[:contents] << { + poster: "asd2", + id: 2, + text: "This is a different text to summarize", + } + opts = content.except(:contents) + + content[:contents].each_with_index do |item, idx| + OpenAiCompletionsInferenceStubs.stub_response( + expected_messages([item], opts), + "This is summary #{idx + 1}", + ) + end + + summarized_chunks = + subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] } + + expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2") + end + end + end + + describe "#concatenate_summaries" do + it "combines all the different summaries into a single one" do + messages = [ + { role: "system", content: "You are a helpful bot" }, + { + role: "user", + content: + "Concatenate these disjoint summaries, creating a cohesive narrative:\nsummary 1\nsummary 2", + }, + ] + + OpenAiCompletionsInferenceStubs.stub_response(messages, "concatenated summary") + + expect(subject.concatenate_summaries(["summary 1", "summary 2"])).to eq( + "concatenated summary", + ) + end + end + + describe "#summarize_with_truncation" do + let(:max_tokens) { 709 } + + it "truncates the context to meet the token limit" do + opts = content.except(:contents) + + truncated_version = expected_messages(content[:contents], opts) + + truncated_version.last[ + :content + ] = "Summarize the following in 400 words:\n(1 asd said: This is a" + + OpenAiCompletionsInferenceStubs.stub_response(truncated_version, "truncated summary") + + expect(subject.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary") + end + end +end diff --git a/spec/lib/modules/summarization/strategies/anthropic_spec.rb b/spec/lib/modules/summarization/strategies/anthropic_spec.rb deleted file mode 100644 index afd64f13..00000000 --- a/spec/lib/modules/summarization/strategies/anthropic_spec.rb +++ /dev/null @@ -1,26 +0,0 @@ -# frozen_string_literal: true - -require_relative "../../../../support/anthropic_completion_stubs" - -RSpec.describe DiscourseAi::Summarization::Strategies::Anthropic do - describe "#summarize" do - let(:model) { "claude-v1" } - - subject { described_class.new(model) } - - it "asks an Anthropic's model to summarize the content" do - summarization_text = "This is a text" - expected_response = "This is a summary" - - AnthropicCompletionStubs.stub_response( - subject.prompt(summarization_text), - "#{expected_response}", - req_opts: { - max_tokens_to_sample: 300, - }, - ) - - expect(subject.summarize(summarization_text)).to eq(expected_response) - end - end -end diff --git a/spec/lib/modules/summarization/strategies/discourse_spec.rb b/spec/lib/modules/summarization/strategies/discourse_spec.rb deleted file mode 100644 index 8d52e069..00000000 --- a/spec/lib/modules/summarization/strategies/discourse_spec.rb +++ /dev/null @@ -1,25 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe DiscourseAi::Summarization::Strategies::DiscourseAi do - describe "#summarize" do - let(:model) { "bart-large-cnn-samsum" } - - subject { described_class.new(model) } - - it "asks a Discourse's model to summarize the content" do - SiteSetting.ai_summarization_discourse_service_api_endpoint = "https://test.com" - summarization_text = "This is a text" - expected_response = "This is a summary" - - WebMock - .stub_request( - :post, - "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify", - ) - .with(body: JSON.dump(model: model, content: subject.prompt(summarization_text))) - .to_return(status: 200, body: JSON.dump(summary_text: expected_response)) - - expect(subject.summarize(summarization_text)).to eq(expected_response) - end - end -end diff --git a/spec/lib/modules/summarization/strategies/fold_content_spec.rb b/spec/lib/modules/summarization/strategies/fold_content_spec.rb new file mode 100644 index 00000000..655a7855 --- /dev/null +++ b/spec/lib/modules/summarization/strategies/fold_content_spec.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +require_relative "../../../../support/summarization/dummy_completion_model" + +RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do + describe "#summarize" do + let(:summarize_text) { "This is a text" } + let(:model) { DummyCompletionModel.new(model_tokens) } + let(:model_tokens) do + # Make sure each content fits in a single chunk. + DiscourseAi::Tokenizer::BertTokenizer.size("(1 asd said: This is a text ") + 3 + end + + subject { described_class.new(model) } + + let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } } + + context "when the content to summarize fits in a single call" do + it "does one call to summarize content" do + result = subject.summarize(content) + + expect(model.summarization_calls).to eq(1) + expect(result[:summary]).to eq(DummyCompletionModel::SINGLE_SUMMARY) + end + end + + context "when the content to summarize doesn't fit in a single call" do + it "summarizes each chunk and then concatenates them" do + content[:contents] << { poster: "asd2", id: 2, text: summarize_text } + + result = subject.summarize(content) + + expect(model.summarization_calls).to eq(3) + expect(result[:summary]).to eq(DummyCompletionModel::CONCATENATED_SUMMARIES) + end + end + end +end diff --git a/spec/lib/modules/summarization/strategies/open_ai_spec.rb b/spec/lib/modules/summarization/strategies/open_ai_spec.rb deleted file mode 100644 index a83ca4db..00000000 --- a/spec/lib/modules/summarization/strategies/open_ai_spec.rb +++ /dev/null @@ -1,21 +0,0 @@ -# frozen_string_literal: true - -require_relative "../../../../support/openai_completions_inference_stubs" - -RSpec.describe DiscourseAi::Summarization::Strategies::OpenAi do - let(:model) { "gpt-3.5-turbo" } - - subject { described_class.new(model) } - - it "asks a OpenAI's model to summarize the content" do - summarization_text = "This is a text" - expected_response = "This is a summary" - - OpenAiCompletionsInferenceStubs.stub_response( - subject.prompt(summarization_text), - expected_response, - ) - - expect(subject.summarize(summarization_text)).to eq(expected_response) - end -end diff --git a/spec/lib/modules/summarization/strategies/truncate_content_spec.rb b/spec/lib/modules/summarization/strategies/truncate_content_spec.rb new file mode 100644 index 00000000..4b6f1584 --- /dev/null +++ b/spec/lib/modules/summarization/strategies/truncate_content_spec.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +require_relative "../../../../support/summarization/dummy_completion_model" + +RSpec.describe DiscourseAi::Summarization::Strategies::TruncateContent do + describe "#summarize" do + let(:summarize_text) { "This is a text" } + let(:model_tokens) { summarize_text.length } + let(:model) { DummyCompletionModel.new(model_tokens) } + + subject { described_class.new(model) } + + let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } } + + context "when the content to summarize doesn't fit in a single call" do + it "summarizes a truncated version" do + content[:contents] << { poster: "asd2", id: 2, text: summarize_text } + + result = subject.summarize(content) + + expect(model.summarization_calls).to eq(1) + expect(result[:summary]).to eq(DummyCompletionModel::SINGLE_SUMMARY) + end + end + end +end diff --git a/spec/shared/inference/anthropic_completions_spec.rb b/spec/shared/inference/anthropic_completions_spec.rb index 30e5037a..6ab081a7 100644 --- a/spec/shared/inference/anthropic_completions_spec.rb +++ b/spec/shared/inference/anthropic_completions_spec.rb @@ -45,7 +45,11 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do AnthropicCompletionStubs.stub_streamed_response(prompt, deltas, req_opts: req_opts) - DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, "claude-v1") do |partial, cancel| + DiscourseAi::Inference::AnthropicCompletions.perform!( + prompt, + "claude-v1", + max_tokens: req_opts[:max_tokens_to_sample], + ) do |partial, cancel| data = partial[:completion] content = data if data cancel.call if content.split(" ").length == 2 diff --git a/spec/support/summarization/dummy_completion_model.rb b/spec/support/summarization/dummy_completion_model.rb new file mode 100644 index 00000000..3c4136c0 --- /dev/null +++ b/spec/support/summarization/dummy_completion_model.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +class DummyCompletionModel + SINGLE_SUMMARY = "this is a single summary" + CONCATENATED_SUMMARIES = "this is a concatenated summary" + + def initialize(prompt_length) + @max_length = prompt_length + @summarization_calls = 0 + end + + attr_reader :max_length, :summarization_calls + + def summarize_in_chunks(contents, opts) + chunks = [] + + section = { ids: [], summary: "" } + + contents.each do |item| + new_content = "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " + + if tokenizer.can_expand_tokens?(section[:summary], new_content, max_length) + section[:summary] += new_content + section[:ids] << item[:id] + else + chunks << section + section = { id: [item[:id]], summary: new_content } + end + end + + chunks << section if section[:summary].present? + + chunks.each do |chunk| + chunk[:summary] = SINGLE_SUMMARY + @summarization_calls += 1 + end + + chunks + end + + def concatenate_summaries(summaries) + @summarization_calls += 1 + CONCATENATED_SUMMARIES + end + + def summarize_with_truncation(_contents, _opts) + @summarization_calls += 1 + SINGLE_SUMMARY + end + + def tokenizer + DiscourseAi::Tokenizer::BertTokenizer + end +end