DEV: Better strategies for summarization (#88)

* DEV: Better strategies for summarization The strategy responsibility needs to be "Given a collection of texts, I know how to summarize them most efficiently, using the minimum amount of requests and maximizing token usage". There are different token limits for each model, so it all boils down to two different strategies: Fold all these texts into a single one, doing the summarization in chunks, and then build a summary from those. Build it by combining texts in a single prompt, and truncate it according to your token limits. While the latter is less than ideal, we need it for "bart-large-cnn-samsum" and "flan-t5-base-samsum", both with low limits. The rest will rely on folding. * Expose summarized chunks to users
2023-06-27 12:26:33 -03:00 · 2023-06-27 12:26:33 -03:00 · 9a79afcdbf
parent 9390fba768
commit 9a79afcdbf
21 changed files with 872 additions and 245 deletions
--- a/lib/modules/summarization/entry_point.rb
+++ b/lib/modules/summarization/entry_point.rb
@ -4,23 +4,38 @@ module DiscourseAi
  module Summarization
    class EntryPoint
      def load_files
-        require_relative "strategies/anthropic"
-        require_relative "strategies/discourse_ai"
-        require_relative "strategies/open_ai"
+        require_relative "models/base"
+        require_relative "models/anthropic"
+        require_relative "models/discourse"
+        require_relative "models/open_ai"
+
+        require_relative "strategies/fold_content"
+        require_relative "strategies/truncate_content"
      end

      def inject_into(plugin)
-        [
-          Strategies::OpenAi.new("gpt-4"),
-          Strategies::OpenAi.new("gpt-4-32k"),
-          Strategies::OpenAi.new("gpt-3.5-turbo"),
-          Strategies::OpenAi.new("gpt-3.5-turbo-16k"),
-          Strategies::DiscourseAi.new("bart-large-cnn-samsum"),
-          Strategies::DiscourseAi.new("flan-t5-base-samsum"),
-          Strategies::DiscourseAi.new("long-t5-tglobal-base-16384-book-summary"),
-          Strategies::Anthropic.new("claude-v1"),
-          Strategies::Anthropic.new("claude-v1-100k"),
-        ].each { |strategy| plugin.register_summarization_strategy(strategy) }
+        foldable_models = [
+          Models::OpenAi.new("gpt-4", max_tokens: 8192),
+          Models::OpenAi.new("gpt-4-32k", max_tokens: 32_768),
+          Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096),
+          Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384),
+          Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384),
+          Models::Anthropic.new("claude-v1", max_tokens: 9000),
+          Models::Anthropic.new("claude-v1-100k", max_tokens: 100_000),
+        ]
+
+        foldable_models.each do |model|
+          plugin.register_summarization_strategy(Strategies::FoldContent.new(model))
+        end
+
+        truncable_models = [
+          Models::Discourse.new("bart-large-cnn-samsum", max_tokens: 1024),
+          Models::Discourse.new("flan-t5-base-samsum", max_tokens: 512),
+        ]
+
+        truncable_models.each do |model|
+          plugin.register_summarization_strategy(Strategies::TruncateContent.new(model))
+        end
      end
    end
  end
--- a/lib/modules/summarization/models/anthropic.rb
+++ b/lib/modules/summarization/models/anthropic.rb
@ -0,0 +1,84 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module Summarization
+    module Models
+      class Anthropic < Base
+        def display_name
+          "Anthropic's #{model}"
+        end
+
+        def correctly_configured?
+          SiteSetting.ai_anthropic_api_key.present?
+        end
+
+        def configuration_hint
+          I18n.t(
+            "discourse_ai.summarization.configuration_hint",
+            count: 1,
+            setting: "ai_anthropic_api_key",
+          )
+        end
+
+        def concatenate_summaries(summaries)
+          instructions = <<~TEXT
+            Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative.
+            Include only the summary inside <ai> tags.
+          TEXT
+
+          instructions += summaries.reduce("") { |m, s| m += "<input>#{s}</input>\n" }
+          instructions += "Assistant:\n"
+
+          completion(instructions)
+        end
+
+        def summarize_with_truncation(contents, opts)
+          instructions = build_base_prompt(opts)
+
+          text_to_summarize = contents.map { |c| format_content_item(c) }.join
+          truncated_content = tokenizer.truncate(text_to_summarize, max_tokens - reserved_tokens)
+
+          instructions += "<input>#{truncated_content}</input>\nAssistant:\n"
+
+          completion(instructions)
+        end
+
+        private
+
+        def summarize_chunk(chunk_text, opts)
+          completion(build_base_prompt(opts) + "<input>#{chunk_text}</input>\nAssistant:\n")
+        end
+
+        def build_base_prompt(opts)
+          base_prompt = <<~TEXT
+            Human: Summarize the following forum discussion inside the given <input> tag.
+            Include only the summary inside <ai> tags.
+          TEXT
+
+          if opts[:resource_path]
+            base_prompt += "Try generating links as well the format is #{opts[:resource_path]}.\n"
+          end
+
+          base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[
+            :content_title
+          ]
+
+          base_prompt += "Don't use more than 400 words.\n"
+        end
+
+        def completion(prompt)
+          response =
+            ::DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, model).dig(:completion)
+
+          Nokogiri::HTML5.fragment(response).at("ai").text
+        end
+
+        def tokenizer
+          DiscourseAi::Tokenizer::AnthropicTokenizer
+        end
+
+        attr_reader :max_tokens
+      end
+    end
+  end
+end
--- a/lib/modules/summarization/models/base.rb
+++ b/lib/modules/summarization/models/base.rb
@ -0,0 +1,82 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module Summarization
+    module Models
+      class Base
+        def initialize(model, max_tokens:)
+          @model = model
+          @max_tokens = max_tokens
+        end
+
+        def correctly_configured?
+          raise NotImplemented
+        end
+
+        def display_name
+          raise NotImplemented
+        end
+
+        def configuration_hint
+          raise NotImplemented
+        end
+
+        def summarize_in_chunks(contents, opts)
+          chunks = []
+
+          section = { ids: [], summary: "" }
+
+          contents.each do |item|
+            new_content = format_content_item(item)
+
+            if tokenizer.can_expand_tokens?(
+                 section[:summary],
+                 new_content,
+                 max_tokens - reserved_tokens,
+               )
+              section[:summary] += new_content
+              section[:ids] << item[:id]
+            else
+              chunks << section
+              section = { id: [item[:id]], summary: new_content }
+            end
+          end
+
+          chunks << section if section[:summary].present?
+
+          chunks.each { |chunk| chunk[:summary] = summarize_chunk(chunk[:summary], opts) }
+
+          chunks
+        end
+
+        def concatenate_summaries(_summaries)
+          raise NotImplemented
+        end
+
+        def summarize_with_truncation(_contents, _opts)
+          raise NotImplemented
+        end
+
+        attr_reader :model
+
+        protected
+
+        attr_reader :max_tokens
+
+        def summarize_chunk(_chunk_text, _opts)
+          raise NotImplemented
+        end
+
+        def format_content_item(item)
+          "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
+        end
+
+        def reserved_tokens
+          # Reserve tokens for the response and the base prompt
+          # ~500 words
+          700
+        end
+      end
+    end
+  end
+end
--- a/lib/modules/summarization/strategies/discourse_ai.rb
+++ b/lib/modules/summarization/strategies/discourse_ai.rb
@ -2,8 +2,8 @@

 module DiscourseAi
  module Summarization
-    module Strategies
-      class DiscourseAi < ::Summarization::Base
+    module Models
+      class Discourse < Base
        def display_name
          "Discourse AI's #{model}"
        end
@ -22,29 +22,39 @@ module DiscourseAi
          )
        end

-        def summarize(content_text)
-          ::DiscourseAi::Inference::DiscourseClassifier.perform!(
-            "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
-            model,
-            prompt(content_text),
-            SiteSetting.ai_summarization_discourse_service_api_key,
-          ).dig(:summary_text)
+        def concatenate_summaries(summaries)
+          completion(summaries.join("\n"))
        end

-        def prompt(text)
-          ::DiscourseAi::Tokenizer::BertTokenizer.truncate(text, max_length)
+        def summarize_with_truncation(contents, opts)
+          text_to_summarize = contents.map { |c| format_content_item(c) }.join
+          truncated_content =
+            ::DiscourseAi::Tokenizer::BertTokenizer.truncate(text_to_summarize, max_tokens)
+
+          completion(truncated_content)
        end

        private

-        def max_length
-          lengths = {
-            "bart-large-cnn-samsum" => 1024,
-            "flan-t5-base-samsum" => 512,
-            "long-t5-tglobal-base-16384-book-summary" => 16_384,
-          }
+        def summarize_chunk(chunk_text, _opts)
+          completion(chunk_text)
+        end

-          lengths[model]
+        def reserved_tokens
+          0
+        end
+
+        def completion(prompt)
+          ::DiscourseAi::Inference::DiscourseClassifier.perform!(
+            "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
+            model,
+            prompt,
+            SiteSetting.ai_summarization_discourse_service_api_key,
+          ).dig(:summary_text)
+        end
+
+        def tokenizer
+          DiscourseAi::Tokenizer::BertTokenizer
        end
      end
    end
--- a/lib/modules/summarization/models/open_ai.rb
+++ b/lib/modules/summarization/models/open_ai.rb
@ -0,0 +1,96 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module Summarization
+    module Models
+      class OpenAi < Base
+        def display_name
+          "Open AI's #{model}"
+        end
+
+        def correctly_configured?
+          SiteSetting.ai_openai_api_key.present?
+        end
+
+        def configuration_hint
+          I18n.t(
+            "discourse_ai.summarization.configuration_hint",
+            count: 1,
+            setting: "ai_openai_api_key",
+          )
+        end
+
+        def concatenate_summaries(summaries)
+          messages = [
+            { role: "system", content: "You are a helpful bot" },
+            {
+              role: "user",
+              content:
+                "Concatenate these disjoint summaries, creating a cohesive narrative:\n#{summaries.join("\n")}",
+            },
+          ]
+
+          completion(messages)
+        end
+
+        def summarize_with_truncation(contents, opts)
+          messages = [{ role: "system", content: build_base_prompt(opts) }]
+
+          text_to_summarize = contents.map { |c| format_content_item(c) }.join
+          truncated_content = tokenizer.truncate(text_to_summarize, max_tokens - reserved_tokens)
+
+          messages << {
+            role: "user",
+            content: "Summarize the following in 400 words:\n#{truncated_content}",
+          }
+
+          completion(messages)
+        end
+
+        private
+
+        def summarize_chunk(chunk_text, opts)
+          completion(
+            [
+              { role: "system", content: build_base_prompt(opts) },
+              { role: "user", content: "Summarize the following in 400 words:\n#{chunk_text}" },
+            ],
+          )
+        end
+
+        def build_base_prompt(opts)
+          base_prompt = <<~TEXT
+            You are a summarization bot.
+            You effectively summarise any text and reply ONLY with ONLY the summarized text.
+            You condense it into a shorter version.
+            You understand and generate Discourse forum Markdown.
+          TEXT
+
+          if opts[:resource_path]
+            base_prompt +=
+              "Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)\n"
+          end
+
+          base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[
+            :content_title
+          ]
+
+          base_prompt
+        end
+
+        def completion(prompt)
+          ::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model).dig(
+            :choices,
+            0,
+            :message,
+            :content,
+          )
+        end
+
+        def tokenizer
+          DiscourseAi::Tokenizer::OpenAiTokenizer
+        end
+      end
+    end
+  end
+end
--- a/lib/modules/summarization/strategies/anthropic.rb
+++ b/lib/modules/summarization/strategies/anthropic.rb
@ -1,57 +0,0 @@
-# frozen_string_literal: true
-
-module DiscourseAi
-  module Summarization
-    module Strategies
-      class Anthropic < ::Summarization::Base
-        def display_name
-          "Anthropic's #{model}"
-        end
-
-        def correctly_configured?
-          SiteSetting.ai_anthropic_api_key.present?
-        end
-
-        def configuration_hint
-          I18n.t(
-            "discourse_ai.summarization.configuration_hint",
-            count: 1,
-            setting: "ai_anthropic_api_key",
-          )
-        end
-
-        def summarize(content_text)
-          response =
-            ::DiscourseAi::Inference::AnthropicCompletions.perform!(
-              prompt(content_text),
-              model,
-            ).dig(:completion)
-
-          Nokogiri::HTML5.fragment(response).at("ai").text
-        end
-
-        def prompt(content)
-          truncated_content =
-            ::DiscourseAi::Tokenizer::AnthropicTokenizer.truncate(content, max_length - 50)
-
-          "Human: Summarize the following article that is inside <input> tags.
-          Please include only the summary inside <ai> tags.
-
-          <input>##{truncated_content}</input>
-
-
-          Assistant:
-        "
-        end
-
-        private
-
-        def max_length
-          lengths = { "claude-v1" => 9000, "claude-v1-100k" => 100_000 }
-
-          lengths[model]
-        end
-      end
-    end
-  end
-end
--- a/lib/modules/summarization/strategies/fold_content.rb
+++ b/lib/modules/summarization/strategies/fold_content.rb
@ -0,0 +1,30 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module Summarization
+    module Strategies
+      class FoldContent < ::Summarization::Base
+        def initialize(completion_model)
+          @completion_model = completion_model
+        end
+
+        attr_reader :completion_model
+
+        delegate :correctly_configured?,
+                 :display_name,
+                 :configuration_hint,
+                 :model,
+                 to: :completion_model
+
+        def summarize(content)
+          opts = content.except(:contents)
+          summaries = completion_model.summarize_in_chunks(content[:contents], opts)
+
+          return { summary: summaries.first[:summary], chunks: [] } if summaries.length == 1
+
+          { summary: completion_model.concatenate_summaries(summaries), chunks: summaries }
+        end
+      end
+    end
+  end
+end
--- a/lib/modules/summarization/strategies/open_ai.rb
+++ b/lib/modules/summarization/strategies/open_ai.rb
@ -1,56 +0,0 @@
-# frozen_string_literal: true
-
-module DiscourseAi
-  module Summarization
-    module Strategies
-      class OpenAi < ::Summarization::Base
-        def display_name
-          "Open AI's #{model}"
-        end
-
-        def correctly_configured?
-          SiteSetting.ai_openai_api_key.present?
-        end
-
-        def configuration_hint
-          I18n.t(
-            "discourse_ai.summarization.configuration_hint",
-            count: 1,
-            setting: "ai_openai_api_key",
-          )
-        end
-
-        def summarize(content_text)
-          ::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt(content_text), model).dig(
-            :choices,
-            0,
-            :message,
-            :content,
-          )
-        end
-
-        def prompt(content)
-          truncated_content =
-            ::DiscourseAi::Tokenizer::OpenAiTokenizer.truncate(content, max_length - 50)
-
-          messages = [{ role: "system", content: <<~TEXT }]
-            Summarize the following article:\n\n#{truncated_content}
-          TEXT
-        end
-
-        private
-
-        def max_length
-          lengths = {
-            "gpt-3.5-turbo" => 4096,
-            "gpt-4" => 8192,
-            "gpt-3.5-turbo-16k" => 16_384,
-            "gpt-4-32k" => 32_768,
-          }
-
-          lengths[model]
-        end
-      end
-    end
-  end
-end
--- a/lib/modules/summarization/strategies/truncate_content.rb
+++ b/lib/modules/summarization/strategies/truncate_content.rb
@ -0,0 +1,30 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module Summarization
+    module Strategies
+      class TruncateContent < ::Summarization::Base
+        def initialize(completion_model)
+          @completion_model = completion_model
+        end
+
+        attr_reader :completion_model
+
+        delegate :correctly_configured?,
+                 :display_name,
+                 :configuration_hint,
+                 :model,
+                 to: :completion_model
+
+        def summarize(content)
+          opts = content.except(:contents)
+
+          {
+            summary: completion_model.summarize_with_truncation(content[:contents], opts),
+            chunks: [],
+          }
+        end
+      end
+    end
+  end
+end
--- a/lib/shared/inference/anthropic_completions.rb
+++ b/lib/shared/inference/anthropic_completions.rb
@ -24,7 +24,7 @@ module ::DiscourseAi

        payload[:temperature] = temperature if temperature
        payload[:top_p] = top_p if top_p
-        payload[:max_tokens_to_sample] = max_tokens || 300
+        payload[:max_tokens_to_sample] = max_tokens if max_tokens
        payload[:stream] = true if block_given?

        Net::HTTP.start(
--- a/lib/shared/tokenizer/tokenizer.rb
+++ b/lib/shared/tokenizer/tokenizer.rb
@ -3,21 +3,31 @@
 module DiscourseAi
  module Tokenizer
    class BasicTokenizer
-      def self.tokenizer
-        raise NotImplementedError
-      end
+      class << self
+        def tokenizer
+          raise NotImplementedError
+        end

-      def self.tokenize(text)
-        tokenizer.encode(text).tokens
-      end
-      def self.size(text)
-        tokenize(text).size
-      end
-      def self.truncate(text, max_length)
-        # Fast track the common case where the text is already short enough.
-        return text if text.size < max_length
+        def tokenize(text)
+          tokenizer.encode(text).tokens
+        end

-        tokenizer.decode(tokenizer.encode(text).ids.take(max_length))
+        def size(text)
+          tokenize(text).size
+        end
+
+        def truncate(text, max_length)
+          # Fast track the common case where the text is already short enough.
+          return text if text.size < max_length
+
+          tokenizer.decode(tokenizer.encode(text).ids.take(max_length))
+        end
+
+        def can_expand_tokens?(text, addition, max_length)
+          return true if text.size + addition.size < max_length
+
+          tokenizer.encode(text).ids.length + tokenizer.encode(addition).ids.length < max_length
+        end
      end
    end

@ -36,22 +46,30 @@ module DiscourseAi
    end

    class OpenAiTokenizer < BasicTokenizer
-      def self.tokenizer
-        @@tokenizer ||= Tiktoken.get_encoding("cl100k_base")
-      end
+      class << self
+        def tokenizer
+          @@tokenizer ||= Tiktoken.get_encoding("cl100k_base")
+        end

-      def self.tokenize(text)
-        tokenizer.encode(text)
-      end
+        def tokenize(text)
+          tokenizer.encode(text)
+        end

-      def self.truncate(text, max_length)
-        # Fast track the common case where the text is already short enough.
-        return text if text.size < max_length
+        def truncate(text, max_length)
+          # Fast track the common case where the text is already short enough.
+          return text if text.size < max_length

-        tokenizer.decode(tokenize(text).take(max_length))
-      rescue Tiktoken::UnicodeError
-        max_length = max_length - 1
-        retry
+          tokenizer.decode(tokenize(text).take(max_length))
+        rescue Tiktoken::UnicodeError
+          max_length = max_length - 1
+          retry
+        end
+
+        def can_expand_tokens?(text, addition, max_length)
+          return true if text.size + addition.size < max_length
+
+          tokenizer.encode(text).length + tokenizer.encode(addition).length < max_length
+        end
      end
    end
  end
--- a/spec/lib/modules/summarization/models/anthropic_spec.rb
+++ b/spec/lib/modules/summarization/models/anthropic_spec.rb
@ -0,0 +1,116 @@
+# frozen_string_literal: true
+
+require_relative "../../../../support/anthropic_completion_stubs"
+
+RSpec.describe DiscourseAi::Summarization::Models::Anthropic do
+  let(:model) { "claude-v1" }
+  let(:max_tokens) { 720 }
+
+  subject { described_class.new(model, max_tokens: max_tokens) }
+
+  let(:content) do
+    {
+      resource_path: "/t/1/POST_NUMBER",
+      content_title: "This is a title",
+      contents: [{ poster: "asd", id: 1, text: "This is a text" }],
+    }
+  end
+
+  def expected_messages(contents, opts)
+    base_prompt = <<~TEXT
+      Human: Summarize the following forum discussion inside the given <input> tag.
+      Include only the summary inside <ai> tags.
+      Try generating links as well the format is #{opts[:resource_path]}.
+      The discussion title is: #{opts[:content_title]}.
+      Don't use more than 400 words.
+    TEXT
+
+    text =
+      contents.reduce("") do |memo, item|
+        memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
+      end
+
+    base_prompt += "<input>#{text}</input>\nAssistant:\n"
+  end
+
+  describe "#summarize_in_chunks" do
+    context "when the content fits in a single chunk" do
+      it "performs a request to summarize" do
+        opts = content.except(:contents)
+
+        AnthropicCompletionStubs.stub_response(
+          expected_messages(content[:contents], opts),
+          "<ai>This is summary 1</ai>",
+        )
+
+        summarized_chunks =
+          subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] }
+
+        expect(summarized_chunks).to contain_exactly("This is summary 1")
+      end
+    end
+
+    context "when the content fits in multiple chunks" do
+      it "performs a request for each one to summarize" do
+        content[:contents] << {
+          poster: "asd2",
+          id: 2,
+          text: "This is a different text to summarize",
+        }
+        opts = content.except(:contents)
+
+        content[:contents].each_with_index do |item, idx|
+          AnthropicCompletionStubs.stub_response(
+            expected_messages([item], opts),
+            "<ai>This is summary #{idx + 1}</ai>",
+          )
+        end
+
+        summarized_chunks =
+          subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] }
+
+        expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2")
+      end
+    end
+  end
+
+  describe "#concatenate_summaries" do
+    it "combines all the different summaries into a single one" do
+      messages = <<~TEXT
+        Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative.
+        Include only the summary inside <ai> tags.
+        <input>summary 1</input>
+        <input>summary 2</input>
+        Assistant:
+      TEXT
+
+      AnthropicCompletionStubs.stub_response(messages, "<ai>concatenated summary</ai>")
+
+      expect(subject.concatenate_summaries(["summary 1", "summary 2"])).to eq(
+        "concatenated summary",
+      )
+    end
+  end
+
+  describe "#summarize_with_truncation" do
+    let(:max_tokens) { 709 }
+
+    it "truncates the context to meet the token limit" do
+      opts = content.except(:contents)
+
+      instructions = <<~TEXT
+        Human: Summarize the following forum discussion inside the given <input> tag.
+        Include only the summary inside <ai> tags.
+        Try generating links as well the format is #{opts[:resource_path]}.
+        The discussion title is: #{opts[:content_title]}.
+        Don't use more than 400 words.
+        <input>(1 asd said: This is a</input>
+        Assistant:
+      TEXT
+
+      AnthropicCompletionStubs.stub_response(instructions, "<ai>truncated summary</ai>")
+
+      expect(subject.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary")
+    end
+  end
+end
--- a/spec/lib/modules/summarization/models/discourse_spec.rb
+++ b/spec/lib/modules/summarization/models/discourse_spec.rb
@ -0,0 +1,93 @@
+# frozen_string_literal: true
+
+RSpec.describe DiscourseAi::Summarization::Models::Discourse do
+  let(:model) { "bart-large-cnn-samsum" }
+  let(:max_tokens) { 20 }
+
+  subject { described_class.new(model, max_tokens: max_tokens) }
+
+  let(:content) do
+    {
+      resource_path: "/t/1/POST_NUMBER",
+      content_title: "This is a title",
+      contents: [{ poster: "asd", id: 1, text: "This is a text" }],
+    }
+  end
+
+  before { SiteSetting.ai_summarization_discourse_service_api_endpoint = "https://test.com" }
+
+  def stub_request(prompt, response)
+    WebMock
+      .stub_request(
+        :post,
+        "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
+      )
+      .with(body: JSON.dump(model: model, content: prompt))
+      .to_return(status: 200, body: JSON.dump(summary_text: response))
+  end
+
+  def expected_messages(contents, opts)
+    contents.reduce("") do |memo, item|
+      memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
+    end
+  end
+
+  describe "#summarize_in_chunks" do
+    context "when the content fits in a single chunk" do
+      it "performs a request to summarize" do
+        opts = content.except(:contents)
+
+        stub_request(expected_messages(content[:contents], opts), "This is summary 1")
+
+        summarized_chunks =
+          subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] }
+
+        expect(summarized_chunks).to contain_exactly("This is summary 1")
+      end
+    end
+
+    context "when the content fits in multiple chunks" do
+      it "performs a request for each one to summarize" do
+        content[:contents] << {
+          poster: "asd2",
+          id: 2,
+          text: "This is a different text to summarize",
+        }
+        opts = content.except(:contents)
+
+        content[:contents].each_with_index do |item, idx|
+          stub_request(expected_messages([item], opts), "This is summary #{idx + 1}")
+        end
+
+        summarized_chunks =
+          subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] }
+
+        expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2")
+      end
+    end
+  end
+
+  describe "#concatenate_summaries" do
+    it "combines all the different summaries into a single one" do
+      messages = ["summary 1", "summary 2"].join("\n")
+
+      stub_request(messages, "concatenated summary")
+
+      expect(subject.concatenate_summaries(["summary 1", "summary 2"])).to eq(
+        "concatenated summary",
+      )
+    end
+  end
+
+  describe "#summarize_with_truncation" do
+    let(:max_tokens) { 9 }
+
+    it "truncates the context to meet the token limit" do
+      opts = content.except(:contents)
+
+      stub_request("( 1 asd said : this is", "truncated summary")
+
+      expect(subject.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary")
+    end
+  end
+end
--- a/spec/lib/modules/summarization/models/open_ai_spec.rb
+++ b/spec/lib/modules/summarization/models/open_ai_spec.rb
@ -0,0 +1,116 @@
+# frozen_string_literal: true
+
+require_relative "../../../../support/openai_completions_inference_stubs"
+
+RSpec.describe DiscourseAi::Summarization::Models::OpenAi do
+  let(:model) { "gpt-3.5-turbo" }
+  let(:max_tokens) { 720 }
+
+  subject { described_class.new(model, max_tokens: max_tokens) }
+
+  let(:content) do
+    {
+      resource_path: "/t/1/POST_NUMBER",
+      content_title: "This is a title",
+      contents: [{ poster: "asd", id: 1, text: "This is a text" }],
+    }
+  end
+
+  def expected_messages(contents, opts)
+    base_prompt = <<~TEXT
+      You are a summarization bot.
+      You effectively summarise any text and reply ONLY with ONLY the summarized text.
+      You condense it into a shorter version.
+      You understand and generate Discourse forum Markdown.
+      Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)
+      The discussion title is: #{opts[:content_title]}.
+    TEXT
+
+    messages = [{ role: "system", content: base_prompt }]
+
+    text =
+      contents.reduce("") do |memo, item|
+        memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
+      end
+
+    messages << { role: "user", content: "Summarize the following in 400 words:\n#{text}" }
+  end
+
+  describe "#summarize_in_chunks" do
+    context "when the content fits in a single chunk" do
+      it "performs a request to summarize" do
+        opts = content.except(:contents)
+
+        OpenAiCompletionsInferenceStubs.stub_response(
+          expected_messages(content[:contents], opts),
+          "This is summary 1",
+        )
+
+        summarized_chunks =
+          subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] }
+
+        expect(summarized_chunks).to contain_exactly("This is summary 1")
+      end
+    end
+
+    context "when the content fits in multiple chunks" do
+      it "performs a request for each one to summarize" do
+        content[:contents] << {
+          poster: "asd2",
+          id: 2,
+          text: "This is a different text to summarize",
+        }
+        opts = content.except(:contents)
+
+        content[:contents].each_with_index do |item, idx|
+          OpenAiCompletionsInferenceStubs.stub_response(
+            expected_messages([item], opts),
+            "This is summary #{idx + 1}",
+          )
+        end
+
+        summarized_chunks =
+          subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] }
+
+        expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2")
+      end
+    end
+  end
+
+  describe "#concatenate_summaries" do
+    it "combines all the different summaries into a single one" do
+      messages = [
+        { role: "system", content: "You are a helpful bot" },
+        {
+          role: "user",
+          content:
+            "Concatenate these disjoint summaries, creating a cohesive narrative:\nsummary 1\nsummary 2",
+        },
+      ]
+
+      OpenAiCompletionsInferenceStubs.stub_response(messages, "concatenated summary")
+
+      expect(subject.concatenate_summaries(["summary 1", "summary 2"])).to eq(
+        "concatenated summary",
+      )
+    end
+  end
+
+  describe "#summarize_with_truncation" do
+    let(:max_tokens) { 709 }
+
+    it "truncates the context to meet the token limit" do
+      opts = content.except(:contents)
+
+      truncated_version = expected_messages(content[:contents], opts)
+
+      truncated_version.last[
+        :content
+      ] = "Summarize the following in 400 words:\n(1 asd said: This is a"
+
+      OpenAiCompletionsInferenceStubs.stub_response(truncated_version, "truncated summary")
+
+      expect(subject.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary")
+    end
+  end
+end
--- a/spec/lib/modules/summarization/strategies/anthropic_spec.rb
+++ b/spec/lib/modules/summarization/strategies/anthropic_spec.rb
@ -1,26 +0,0 @@
-# frozen_string_literal: true
-
-require_relative "../../../../support/anthropic_completion_stubs"
-
-RSpec.describe DiscourseAi::Summarization::Strategies::Anthropic do
-  describe "#summarize" do
-    let(:model) { "claude-v1" }
-
-    subject { described_class.new(model) }
-
-    it "asks an Anthropic's model to summarize the content" do
-      summarization_text = "This is a text"
-      expected_response = "This is a summary"
-
-      AnthropicCompletionStubs.stub_response(
-        subject.prompt(summarization_text),
-        "<ai>#{expected_response}</ai>",
-        req_opts: {
-          max_tokens_to_sample: 300,
-        },
-      )
-
-      expect(subject.summarize(summarization_text)).to eq(expected_response)
-    end
-  end
-end
--- a/spec/lib/modules/summarization/strategies/discourse_spec.rb
+++ b/spec/lib/modules/summarization/strategies/discourse_spec.rb
@ -1,25 +0,0 @@
-# frozen_string_literal: true
-
-RSpec.describe DiscourseAi::Summarization::Strategies::DiscourseAi do
-  describe "#summarize" do
-    let(:model) { "bart-large-cnn-samsum" }
-
-    subject { described_class.new(model) }
-
-    it "asks a Discourse's model to summarize the content" do
-      SiteSetting.ai_summarization_discourse_service_api_endpoint = "https://test.com"
-      summarization_text = "This is a text"
-      expected_response = "This is a summary"
-
-      WebMock
-        .stub_request(
-          :post,
-          "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
-        )
-        .with(body: JSON.dump(model: model, content: subject.prompt(summarization_text)))
-        .to_return(status: 200, body: JSON.dump(summary_text: expected_response))
-
-      expect(subject.summarize(summarization_text)).to eq(expected_response)
-    end
-  end
-end
--- a/spec/lib/modules/summarization/strategies/fold_content_spec.rb
+++ b/spec/lib/modules/summarization/strategies/fold_content_spec.rb
@ -0,0 +1,38 @@
+# frozen_string_literal: true
+
+require_relative "../../../../support/summarization/dummy_completion_model"
+
+RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
+  describe "#summarize" do
+    let(:summarize_text) { "This is a text" }
+    let(:model) { DummyCompletionModel.new(model_tokens) }
+    let(:model_tokens) do
+      # Make sure each content fits in a single chunk.
+      DiscourseAi::Tokenizer::BertTokenizer.size("(1 asd said: This is a text ") + 3
+    end
+
+    subject { described_class.new(model) }
+
+    let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } }
+
+    context "when the content to summarize fits in a single call" do
+      it "does one call to summarize content" do
+        result = subject.summarize(content)
+
+        expect(model.summarization_calls).to eq(1)
+        expect(result[:summary]).to eq(DummyCompletionModel::SINGLE_SUMMARY)
+      end
+    end
+
+    context "when the content to summarize doesn't fit in a single call" do
+      it "summarizes each chunk and then concatenates them" do
+        content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
+
+        result = subject.summarize(content)
+
+        expect(model.summarization_calls).to eq(3)
+        expect(result[:summary]).to eq(DummyCompletionModel::CONCATENATED_SUMMARIES)
+      end
+    end
+  end
+end
--- a/spec/lib/modules/summarization/strategies/open_ai_spec.rb
+++ b/spec/lib/modules/summarization/strategies/open_ai_spec.rb
@ -1,21 +0,0 @@
-# frozen_string_literal: true
-
-require_relative "../../../../support/openai_completions_inference_stubs"
-
-RSpec.describe DiscourseAi::Summarization::Strategies::OpenAi do
-  let(:model) { "gpt-3.5-turbo" }
-
-  subject { described_class.new(model) }
-
-  it "asks a OpenAI's model to summarize the content" do
-    summarization_text = "This is a text"
-    expected_response = "This is a summary"
-
-    OpenAiCompletionsInferenceStubs.stub_response(
-      subject.prompt(summarization_text),
-      expected_response,
-    )
-
-    expect(subject.summarize(summarization_text)).to eq(expected_response)
-  end
-end
--- a/spec/lib/modules/summarization/strategies/truncate_content_spec.rb
+++ b/spec/lib/modules/summarization/strategies/truncate_content_spec.rb
@ -0,0 +1,26 @@
+# frozen_string_literal: true
+
+require_relative "../../../../support/summarization/dummy_completion_model"
+
+RSpec.describe DiscourseAi::Summarization::Strategies::TruncateContent do
+  describe "#summarize" do
+    let(:summarize_text) { "This is a text" }
+    let(:model_tokens) { summarize_text.length }
+    let(:model) { DummyCompletionModel.new(model_tokens) }
+
+    subject { described_class.new(model) }
+
+    let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } }
+
+    context "when the content to summarize doesn't fit in a single call" do
+      it "summarizes a truncated version" do
+        content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
+
+        result = subject.summarize(content)
+
+        expect(model.summarization_calls).to eq(1)
+        expect(result[:summary]).to eq(DummyCompletionModel::SINGLE_SUMMARY)
+      end
+    end
+  end
+end
--- a/spec/shared/inference/anthropic_completions_spec.rb
+++ b/spec/shared/inference/anthropic_completions_spec.rb
@ -45,7 +45,11 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do

    AnthropicCompletionStubs.stub_streamed_response(prompt, deltas, req_opts: req_opts)

-    DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, "claude-v1") do |partial, cancel|
+    DiscourseAi::Inference::AnthropicCompletions.perform!(
+      prompt,
+      "claude-v1",
+      max_tokens: req_opts[:max_tokens_to_sample],
+    ) do |partial, cancel|
      data = partial[:completion]
      content = data if data
      cancel.call if content.split(" ").length == 2
--- a/spec/support/summarization/dummy_completion_model.rb
+++ b/spec/support/summarization/dummy_completion_model.rb
@ -0,0 +1,54 @@
+# frozen_string_literal: true
+
+class DummyCompletionModel
+  SINGLE_SUMMARY = "this is a single summary"
+  CONCATENATED_SUMMARIES = "this is a concatenated summary"
+
+  def initialize(prompt_length)
+    @max_length = prompt_length
+    @summarization_calls = 0
+  end
+
+  attr_reader :max_length, :summarization_calls
+
+  def summarize_in_chunks(contents, opts)
+    chunks = []
+
+    section = { ids: [], summary: "" }
+
+    contents.each do |item|
+      new_content = "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
+
+      if tokenizer.can_expand_tokens?(section[:summary], new_content, max_length)
+        section[:summary] += new_content
+        section[:ids] << item[:id]
+      else
+        chunks << section
+        section = { id: [item[:id]], summary: new_content }
+      end
+    end
+
+    chunks << section if section[:summary].present?
+
+    chunks.each do |chunk|
+      chunk[:summary] = SINGLE_SUMMARY
+      @summarization_calls += 1
+    end
+
+    chunks
+  end
+
+  def concatenate_summaries(summaries)
+    @summarization_calls += 1
+    CONCATENATED_SUMMARIES
+  end
+
+  def summarize_with_truncation(_contents, _opts)
+    @summarization_calls += 1
+    SINGLE_SUMMARY
+  end
+
+  def tokenizer
+    DiscourseAi::Tokenizer::BertTokenizer
+  end
+end