From 9a79afcdbf981ee5151073566c084e73f37e1b4f Mon Sep 17 00:00:00 2001
From: Roman Rizzi <roman@discourse.org>
Date: Tue, 27 Jun 2023 12:26:33 -0300
Subject: [PATCH] DEV: Better strategies for summarization (#88)

* DEV: Better strategies for summarization

The strategy responsibility needs to be "Given a collection of texts, I know how to summarize them most efficiently, using the minimum amount of requests and maximizing token usage".

There are different token limits for each model, so it all boils down to two different strategies:

Fold all these texts into a single one, doing the summarization in chunks, and then build a summary from those.
Build it by combining texts in a single prompt, and truncate it according to your token limits.

While the latter is less than ideal, we need it for "bart-large-cnn-samsum" and "flan-t5-base-samsum", both with low limits. The rest will rely on folding.

* Expose summarized chunks to users
---
 lib/modules/summarization/entry_point.rb      |  43 ++++---
 lib/modules/summarization/models/anthropic.rb |  84 +++++++++++++
 lib/modules/summarization/models/base.rb      |  82 +++++++++++++
 .../discourse_ai.rb => models/discourse.rb}   |  46 ++++---
 lib/modules/summarization/models/open_ai.rb   |  96 +++++++++++++++
 .../summarization/strategies/anthropic.rb     |  57 ---------
 .../summarization/strategies/fold_content.rb  |  30 +++++
 .../summarization/strategies/open_ai.rb       |  56 ---------
 .../strategies/truncate_content.rb            |  30 +++++
 lib/shared/inference/anthropic_completions.rb |   2 +-
 lib/shared/tokenizer/tokenizer.rb             |  70 +++++++----
 .../summarization/models/anthropic_spec.rb    | 116 ++++++++++++++++++
 .../summarization/models/discourse_spec.rb    |  93 ++++++++++++++
 .../summarization/models/open_ai_spec.rb      | 116 ++++++++++++++++++
 .../strategies/anthropic_spec.rb              |  26 ----
 .../strategies/discourse_spec.rb              |  25 ----
 .../strategies/fold_content_spec.rb           |  38 ++++++
 .../summarization/strategies/open_ai_spec.rb  |  21 ----
 .../strategies/truncate_content_spec.rb       |  26 ++++
 .../inference/anthropic_completions_spec.rb   |   6 +-
 .../summarization/dummy_completion_model.rb   |  54 ++++++++
 21 files changed, 872 insertions(+), 245 deletions(-)
 create mode 100644 lib/modules/summarization/models/anthropic.rb
 create mode 100644 lib/modules/summarization/models/base.rb
 rename lib/modules/summarization/{strategies/discourse_ai.rb => models/discourse.rb} (58%)
 create mode 100644 lib/modules/summarization/models/open_ai.rb
 delete mode 100644 lib/modules/summarization/strategies/anthropic.rb
 create mode 100644 lib/modules/summarization/strategies/fold_content.rb
 delete mode 100644 lib/modules/summarization/strategies/open_ai.rb
 create mode 100644 lib/modules/summarization/strategies/truncate_content.rb
 create mode 100644 spec/lib/modules/summarization/models/anthropic_spec.rb
 create mode 100644 spec/lib/modules/summarization/models/discourse_spec.rb
 create mode 100644 spec/lib/modules/summarization/models/open_ai_spec.rb
 delete mode 100644 spec/lib/modules/summarization/strategies/anthropic_spec.rb
 delete mode 100644 spec/lib/modules/summarization/strategies/discourse_spec.rb
 create mode 100644 spec/lib/modules/summarization/strategies/fold_content_spec.rb
 delete mode 100644 spec/lib/modules/summarization/strategies/open_ai_spec.rb
 create mode 100644 spec/lib/modules/summarization/strategies/truncate_content_spec.rb
 create mode 100644 spec/support/summarization/dummy_completion_model.rb

diff --git a/lib/modules/summarization/entry_point.rb b/lib/modules/summarization/entry_point.rb
index 93b27ea0..d1af8117 100644
--- a/lib/modules/summarization/entry_point.rb
+++ b/lib/modules/summarization/entry_point.rb
@@ -4,23 +4,38 @@ module DiscourseAi
   module Summarization
     class EntryPoint
       def load_files
-        require_relative "strategies/anthropic"
-        require_relative "strategies/discourse_ai"
-        require_relative "strategies/open_ai"
+        require_relative "models/base"
+        require_relative "models/anthropic"
+        require_relative "models/discourse"
+        require_relative "models/open_ai"
+
+        require_relative "strategies/fold_content"
+        require_relative "strategies/truncate_content"
       end
 
       def inject_into(plugin)
-        [
-          Strategies::OpenAi.new("gpt-4"),
-          Strategies::OpenAi.new("gpt-4-32k"),
-          Strategies::OpenAi.new("gpt-3.5-turbo"),
-          Strategies::OpenAi.new("gpt-3.5-turbo-16k"),
-          Strategies::DiscourseAi.new("bart-large-cnn-samsum"),
-          Strategies::DiscourseAi.new("flan-t5-base-samsum"),
-          Strategies::DiscourseAi.new("long-t5-tglobal-base-16384-book-summary"),
-          Strategies::Anthropic.new("claude-v1"),
-          Strategies::Anthropic.new("claude-v1-100k"),
-        ].each { |strategy| plugin.register_summarization_strategy(strategy) }
+        foldable_models = [
+          Models::OpenAi.new("gpt-4", max_tokens: 8192),
+          Models::OpenAi.new("gpt-4-32k", max_tokens: 32_768),
+          Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096),
+          Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384),
+          Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384),
+          Models::Anthropic.new("claude-v1", max_tokens: 9000),
+          Models::Anthropic.new("claude-v1-100k", max_tokens: 100_000),
+        ]
+
+        foldable_models.each do |model|
+          plugin.register_summarization_strategy(Strategies::FoldContent.new(model))
+        end
+
+        truncable_models = [
+          Models::Discourse.new("bart-large-cnn-samsum", max_tokens: 1024),
+          Models::Discourse.new("flan-t5-base-samsum", max_tokens: 512),
+        ]
+
+        truncable_models.each do |model|
+          plugin.register_summarization_strategy(Strategies::TruncateContent.new(model))
+        end
       end
     end
   end
diff --git a/lib/modules/summarization/models/anthropic.rb b/lib/modules/summarization/models/anthropic.rb
new file mode 100644
index 00000000..d038385e
--- /dev/null
+++ b/lib/modules/summarization/models/anthropic.rb
@@ -0,0 +1,84 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module Summarization
+    module Models
+      class Anthropic < Base
+        def display_name
+          "Anthropic's #{model}"
+        end
+
+        def correctly_configured?
+          SiteSetting.ai_anthropic_api_key.present?
+        end
+
+        def configuration_hint
+          I18n.t(
+            "discourse_ai.summarization.configuration_hint",
+            count: 1,
+            setting: "ai_anthropic_api_key",
+          )
+        end
+
+        def concatenate_summaries(summaries)
+          instructions = <<~TEXT
+            Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative.
+            Include only the summary inside <ai> tags.
+          TEXT
+
+          instructions += summaries.reduce("") { |m, s| m += "<input>#{s}</input>\n" }
+          instructions += "Assistant:\n"
+
+          completion(instructions)
+        end
+
+        def summarize_with_truncation(contents, opts)
+          instructions = build_base_prompt(opts)
+
+          text_to_summarize = contents.map { |c| format_content_item(c) }.join
+          truncated_content = tokenizer.truncate(text_to_summarize, max_tokens - reserved_tokens)
+
+          instructions += "<input>#{truncated_content}</input>\nAssistant:\n"
+
+          completion(instructions)
+        end
+
+        private
+
+        def summarize_chunk(chunk_text, opts)
+          completion(build_base_prompt(opts) + "<input>#{chunk_text}</input>\nAssistant:\n")
+        end
+
+        def build_base_prompt(opts)
+          base_prompt = <<~TEXT
+            Human: Summarize the following forum discussion inside the given <input> tag.
+            Include only the summary inside <ai> tags.
+          TEXT
+
+          if opts[:resource_path]
+            base_prompt += "Try generating links as well the format is #{opts[:resource_path]}.\n"
+          end
+
+          base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[
+            :content_title
+          ]
+
+          base_prompt += "Don't use more than 400 words.\n"
+        end
+
+        def completion(prompt)
+          response =
+            ::DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, model).dig(:completion)
+
+          Nokogiri::HTML5.fragment(response).at("ai").text
+        end
+
+        def tokenizer
+          DiscourseAi::Tokenizer::AnthropicTokenizer
+        end
+
+        attr_reader :max_tokens
+      end
+    end
+  end
+end
diff --git a/lib/modules/summarization/models/base.rb b/lib/modules/summarization/models/base.rb
new file mode 100644
index 00000000..558006a8
--- /dev/null
+++ b/lib/modules/summarization/models/base.rb
@@ -0,0 +1,82 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module Summarization
+    module Models
+      class Base
+        def initialize(model, max_tokens:)
+          @model = model
+          @max_tokens = max_tokens
+        end
+
+        def correctly_configured?
+          raise NotImplemented
+        end
+
+        def display_name
+          raise NotImplemented
+        end
+
+        def configuration_hint
+          raise NotImplemented
+        end
+
+        def summarize_in_chunks(contents, opts)
+          chunks = []
+
+          section = { ids: [], summary: "" }
+
+          contents.each do |item|
+            new_content = format_content_item(item)
+
+            if tokenizer.can_expand_tokens?(
+                 section[:summary],
+                 new_content,
+                 max_tokens - reserved_tokens,
+               )
+              section[:summary] += new_content
+              section[:ids] << item[:id]
+            else
+              chunks << section
+              section = { id: [item[:id]], summary: new_content }
+            end
+          end
+
+          chunks << section if section[:summary].present?
+
+          chunks.each { |chunk| chunk[:summary] = summarize_chunk(chunk[:summary], opts) }
+
+          chunks
+        end
+
+        def concatenate_summaries(_summaries)
+          raise NotImplemented
+        end
+
+        def summarize_with_truncation(_contents, _opts)
+          raise NotImplemented
+        end
+
+        attr_reader :model
+
+        protected
+
+        attr_reader :max_tokens
+
+        def summarize_chunk(_chunk_text, _opts)
+          raise NotImplemented
+        end
+
+        def format_content_item(item)
+          "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
+        end
+
+        def reserved_tokens
+          # Reserve tokens for the response and the base prompt
+          # ~500 words
+          700
+        end
+      end
+    end
+  end
+end
diff --git a/lib/modules/summarization/strategies/discourse_ai.rb b/lib/modules/summarization/models/discourse.rb
similarity index 58%
rename from lib/modules/summarization/strategies/discourse_ai.rb
rename to lib/modules/summarization/models/discourse.rb
index 363b40b8..240f1fb8 100644
--- a/lib/modules/summarization/strategies/discourse_ai.rb
+++ b/lib/modules/summarization/models/discourse.rb
@@ -2,8 +2,8 @@
 
 module DiscourseAi
   module Summarization
-    module Strategies
-      class DiscourseAi < ::Summarization::Base
+    module Models
+      class Discourse < Base
         def display_name
           "Discourse AI's #{model}"
         end
@@ -22,29 +22,39 @@ module DiscourseAi
           )
         end
 
-        def summarize(content_text)
-          ::DiscourseAi::Inference::DiscourseClassifier.perform!(
-            "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
-            model,
-            prompt(content_text),
-            SiteSetting.ai_summarization_discourse_service_api_key,
-          ).dig(:summary_text)
+        def concatenate_summaries(summaries)
+          completion(summaries.join("\n"))
         end
 
-        def prompt(text)
-          ::DiscourseAi::Tokenizer::BertTokenizer.truncate(text, max_length)
+        def summarize_with_truncation(contents, opts)
+          text_to_summarize = contents.map { |c| format_content_item(c) }.join
+          truncated_content =
+            ::DiscourseAi::Tokenizer::BertTokenizer.truncate(text_to_summarize, max_tokens)
+
+          completion(truncated_content)
         end
 
         private
 
-        def max_length
-          lengths = {
-            "bart-large-cnn-samsum" => 1024,
-            "flan-t5-base-samsum" => 512,
-            "long-t5-tglobal-base-16384-book-summary" => 16_384,
-          }
+        def summarize_chunk(chunk_text, _opts)
+          completion(chunk_text)
+        end
 
-          lengths[model]
+        def reserved_tokens
+          0
+        end
+
+        def completion(prompt)
+          ::DiscourseAi::Inference::DiscourseClassifier.perform!(
+            "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
+            model,
+            prompt,
+            SiteSetting.ai_summarization_discourse_service_api_key,
+          ).dig(:summary_text)
+        end
+
+        def tokenizer
+          DiscourseAi::Tokenizer::BertTokenizer
         end
       end
     end
diff --git a/lib/modules/summarization/models/open_ai.rb b/lib/modules/summarization/models/open_ai.rb
new file mode 100644
index 00000000..cd91d31e
--- /dev/null
+++ b/lib/modules/summarization/models/open_ai.rb
@@ -0,0 +1,96 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module Summarization
+    module Models
+      class OpenAi < Base
+        def display_name
+          "Open AI's #{model}"
+        end
+
+        def correctly_configured?
+          SiteSetting.ai_openai_api_key.present?
+        end
+
+        def configuration_hint
+          I18n.t(
+            "discourse_ai.summarization.configuration_hint",
+            count: 1,
+            setting: "ai_openai_api_key",
+          )
+        end
+
+        def concatenate_summaries(summaries)
+          messages = [
+            { role: "system", content: "You are a helpful bot" },
+            {
+              role: "user",
+              content:
+                "Concatenate these disjoint summaries, creating a cohesive narrative:\n#{summaries.join("\n")}",
+            },
+          ]
+
+          completion(messages)
+        end
+
+        def summarize_with_truncation(contents, opts)
+          messages = [{ role: "system", content: build_base_prompt(opts) }]
+
+          text_to_summarize = contents.map { |c| format_content_item(c) }.join
+          truncated_content = tokenizer.truncate(text_to_summarize, max_tokens - reserved_tokens)
+
+          messages << {
+            role: "user",
+            content: "Summarize the following in 400 words:\n#{truncated_content}",
+          }
+
+          completion(messages)
+        end
+
+        private
+
+        def summarize_chunk(chunk_text, opts)
+          completion(
+            [
+              { role: "system", content: build_base_prompt(opts) },
+              { role: "user", content: "Summarize the following in 400 words:\n#{chunk_text}" },
+            ],
+          )
+        end
+
+        def build_base_prompt(opts)
+          base_prompt = <<~TEXT
+            You are a summarization bot.
+            You effectively summarise any text and reply ONLY with ONLY the summarized text.
+            You condense it into a shorter version.
+            You understand and generate Discourse forum Markdown.
+          TEXT
+
+          if opts[:resource_path]
+            base_prompt +=
+              "Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)\n"
+          end
+
+          base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[
+            :content_title
+          ]
+
+          base_prompt
+        end
+
+        def completion(prompt)
+          ::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model).dig(
+            :choices,
+            0,
+            :message,
+            :content,
+          )
+        end
+
+        def tokenizer
+          DiscourseAi::Tokenizer::OpenAiTokenizer
+        end
+      end
+    end
+  end
+end
diff --git a/lib/modules/summarization/strategies/anthropic.rb b/lib/modules/summarization/strategies/anthropic.rb
deleted file mode 100644
index 57e08285..00000000
--- a/lib/modules/summarization/strategies/anthropic.rb
+++ /dev/null
@@ -1,57 +0,0 @@
-# frozen_string_literal: true
-
-module DiscourseAi
-  module Summarization
-    module Strategies
-      class Anthropic < ::Summarization::Base
-        def display_name
-          "Anthropic's #{model}"
-        end
-
-        def correctly_configured?
-          SiteSetting.ai_anthropic_api_key.present?
-        end
-
-        def configuration_hint
-          I18n.t(
-            "discourse_ai.summarization.configuration_hint",
-            count: 1,
-            setting: "ai_anthropic_api_key",
-          )
-        end
-
-        def summarize(content_text)
-          response =
-            ::DiscourseAi::Inference::AnthropicCompletions.perform!(
-              prompt(content_text),
-              model,
-            ).dig(:completion)
-
-          Nokogiri::HTML5.fragment(response).at("ai").text
-        end
-
-        def prompt(content)
-          truncated_content =
-            ::DiscourseAi::Tokenizer::AnthropicTokenizer.truncate(content, max_length - 50)
-
-          "Human: Summarize the following article that is inside <input> tags.
-          Please include only the summary inside <ai> tags.
-
-          <input>##{truncated_content}</input>
-
-
-          Assistant:
-        "
-        end
-
-        private
-
-        def max_length
-          lengths = { "claude-v1" => 9000, "claude-v1-100k" => 100_000 }
-
-          lengths[model]
-        end
-      end
-    end
-  end
-end
diff --git a/lib/modules/summarization/strategies/fold_content.rb b/lib/modules/summarization/strategies/fold_content.rb
new file mode 100644
index 00000000..2f4508d6
--- /dev/null
+++ b/lib/modules/summarization/strategies/fold_content.rb
@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module Summarization
+    module Strategies
+      class FoldContent < ::Summarization::Base
+        def initialize(completion_model)
+          @completion_model = completion_model
+        end
+
+        attr_reader :completion_model
+
+        delegate :correctly_configured?,
+                 :display_name,
+                 :configuration_hint,
+                 :model,
+                 to: :completion_model
+
+        def summarize(content)
+          opts = content.except(:contents)
+          summaries = completion_model.summarize_in_chunks(content[:contents], opts)
+
+          return { summary: summaries.first[:summary], chunks: [] } if summaries.length == 1
+
+          { summary: completion_model.concatenate_summaries(summaries), chunks: summaries }
+        end
+      end
+    end
+  end
+end
diff --git a/lib/modules/summarization/strategies/open_ai.rb b/lib/modules/summarization/strategies/open_ai.rb
deleted file mode 100644
index 35011b66..00000000
--- a/lib/modules/summarization/strategies/open_ai.rb
+++ /dev/null
@@ -1,56 +0,0 @@
-# frozen_string_literal: true
-
-module DiscourseAi
-  module Summarization
-    module Strategies
-      class OpenAi < ::Summarization::Base
-        def display_name
-          "Open AI's #{model}"
-        end
-
-        def correctly_configured?
-          SiteSetting.ai_openai_api_key.present?
-        end
-
-        def configuration_hint
-          I18n.t(
-            "discourse_ai.summarization.configuration_hint",
-            count: 1,
-            setting: "ai_openai_api_key",
-          )
-        end
-
-        def summarize(content_text)
-          ::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt(content_text), model).dig(
-            :choices,
-            0,
-            :message,
-            :content,
-          )
-        end
-
-        def prompt(content)
-          truncated_content =
-            ::DiscourseAi::Tokenizer::OpenAiTokenizer.truncate(content, max_length - 50)
-
-          messages = [{ role: "system", content: <<~TEXT }]
-            Summarize the following article:\n\n#{truncated_content}
-          TEXT
-        end
-
-        private
-
-        def max_length
-          lengths = {
-            "gpt-3.5-turbo" => 4096,
-            "gpt-4" => 8192,
-            "gpt-3.5-turbo-16k" => 16_384,
-            "gpt-4-32k" => 32_768,
-          }
-
-          lengths[model]
-        end
-      end
-    end
-  end
-end
diff --git a/lib/modules/summarization/strategies/truncate_content.rb b/lib/modules/summarization/strategies/truncate_content.rb
new file mode 100644
index 00000000..7634dd65
--- /dev/null
+++ b/lib/modules/summarization/strategies/truncate_content.rb
@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module Summarization
+    module Strategies
+      class TruncateContent < ::Summarization::Base
+        def initialize(completion_model)
+          @completion_model = completion_model
+        end
+
+        attr_reader :completion_model
+
+        delegate :correctly_configured?,
+                 :display_name,
+                 :configuration_hint,
+                 :model,
+                 to: :completion_model
+
+        def summarize(content)
+          opts = content.except(:contents)
+
+          {
+            summary: completion_model.summarize_with_truncation(content[:contents], opts),
+            chunks: [],
+          }
+        end
+      end
+    end
+  end
+end
diff --git a/lib/shared/inference/anthropic_completions.rb b/lib/shared/inference/anthropic_completions.rb
index 8af3be02..18fd4418 100644
--- a/lib/shared/inference/anthropic_completions.rb
+++ b/lib/shared/inference/anthropic_completions.rb
@@ -24,7 +24,7 @@ module ::DiscourseAi
 
         payload[:temperature] = temperature if temperature
         payload[:top_p] = top_p if top_p
-        payload[:max_tokens_to_sample] = max_tokens || 300
+        payload[:max_tokens_to_sample] = max_tokens if max_tokens
         payload[:stream] = true if block_given?
 
         Net::HTTP.start(
diff --git a/lib/shared/tokenizer/tokenizer.rb b/lib/shared/tokenizer/tokenizer.rb
index 0fdcf2c9..01178614 100644
--- a/lib/shared/tokenizer/tokenizer.rb
+++ b/lib/shared/tokenizer/tokenizer.rb
@@ -3,21 +3,31 @@
 module DiscourseAi
   module Tokenizer
     class BasicTokenizer
-      def self.tokenizer
-        raise NotImplementedError
-      end
+      class << self
+        def tokenizer
+          raise NotImplementedError
+        end
 
-      def self.tokenize(text)
-        tokenizer.encode(text).tokens
-      end
-      def self.size(text)
-        tokenize(text).size
-      end
-      def self.truncate(text, max_length)
-        # Fast track the common case where the text is already short enough.
-        return text if text.size < max_length
+        def tokenize(text)
+          tokenizer.encode(text).tokens
+        end
 
-        tokenizer.decode(tokenizer.encode(text).ids.take(max_length))
+        def size(text)
+          tokenize(text).size
+        end
+
+        def truncate(text, max_length)
+          # Fast track the common case where the text is already short enough.
+          return text if text.size < max_length
+
+          tokenizer.decode(tokenizer.encode(text).ids.take(max_length))
+        end
+
+        def can_expand_tokens?(text, addition, max_length)
+          return true if text.size + addition.size < max_length
+
+          tokenizer.encode(text).ids.length + tokenizer.encode(addition).ids.length < max_length
+        end
       end
     end
 
@@ -36,22 +46,30 @@ module DiscourseAi
     end
 
     class OpenAiTokenizer < BasicTokenizer
-      def self.tokenizer
-        @@tokenizer ||= Tiktoken.get_encoding("cl100k_base")
-      end
+      class << self
+        def tokenizer
+          @@tokenizer ||= Tiktoken.get_encoding("cl100k_base")
+        end
 
-      def self.tokenize(text)
-        tokenizer.encode(text)
-      end
+        def tokenize(text)
+          tokenizer.encode(text)
+        end
 
-      def self.truncate(text, max_length)
-        # Fast track the common case where the text is already short enough.
-        return text if text.size < max_length
+        def truncate(text, max_length)
+          # Fast track the common case where the text is already short enough.
+          return text if text.size < max_length
 
-        tokenizer.decode(tokenize(text).take(max_length))
-      rescue Tiktoken::UnicodeError
-        max_length = max_length - 1
-        retry
+          tokenizer.decode(tokenize(text).take(max_length))
+        rescue Tiktoken::UnicodeError
+          max_length = max_length - 1
+          retry
+        end
+
+        def can_expand_tokens?(text, addition, max_length)
+          return true if text.size + addition.size < max_length
+
+          tokenizer.encode(text).length + tokenizer.encode(addition).length < max_length
+        end
       end
     end
   end
diff --git a/spec/lib/modules/summarization/models/anthropic_spec.rb b/spec/lib/modules/summarization/models/anthropic_spec.rb
new file mode 100644
index 00000000..2ce99a3a
--- /dev/null
+++ b/spec/lib/modules/summarization/models/anthropic_spec.rb
@@ -0,0 +1,116 @@
+# frozen_string_literal: true
+
+require_relative "../../../../support/anthropic_completion_stubs"
+
+RSpec.describe DiscourseAi::Summarization::Models::Anthropic do
+  let(:model) { "claude-v1" }
+  let(:max_tokens) { 720 }
+
+  subject { described_class.new(model, max_tokens: max_tokens) }
+
+  let(:content) do
+    {
+      resource_path: "/t/1/POST_NUMBER",
+      content_title: "This is a title",
+      contents: [{ poster: "asd", id: 1, text: "This is a text" }],
+    }
+  end
+
+  def expected_messages(contents, opts)
+    base_prompt = <<~TEXT
+      Human: Summarize the following forum discussion inside the given <input> tag.
+      Include only the summary inside <ai> tags.
+      Try generating links as well the format is #{opts[:resource_path]}.
+      The discussion title is: #{opts[:content_title]}.
+      Don't use more than 400 words.
+    TEXT
+
+    text =
+      contents.reduce("") do |memo, item|
+        memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
+      end
+
+    base_prompt += "<input>#{text}</input>\nAssistant:\n"
+  end
+
+  describe "#summarize_in_chunks" do
+    context "when the content fits in a single chunk" do
+      it "performs a request to summarize" do
+        opts = content.except(:contents)
+
+        AnthropicCompletionStubs.stub_response(
+          expected_messages(content[:contents], opts),
+          "<ai>This is summary 1</ai>",
+        )
+
+        summarized_chunks =
+          subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] }
+
+        expect(summarized_chunks).to contain_exactly("This is summary 1")
+      end
+    end
+
+    context "when the content fits in multiple chunks" do
+      it "performs a request for each one to summarize" do
+        content[:contents] << {
+          poster: "asd2",
+          id: 2,
+          text: "This is a different text to summarize",
+        }
+        opts = content.except(:contents)
+
+        content[:contents].each_with_index do |item, idx|
+          AnthropicCompletionStubs.stub_response(
+            expected_messages([item], opts),
+            "<ai>This is summary #{idx + 1}</ai>",
+          )
+        end
+
+        summarized_chunks =
+          subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] }
+
+        expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2")
+      end
+    end
+  end
+
+  describe "#concatenate_summaries" do
+    it "combines all the different summaries into a single one" do
+      messages = <<~TEXT
+        Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative.
+        Include only the summary inside <ai> tags.
+        <input>summary 1</input>
+        <input>summary 2</input>
+        Assistant:
+      TEXT
+
+      AnthropicCompletionStubs.stub_response(messages, "<ai>concatenated summary</ai>")
+
+      expect(subject.concatenate_summaries(["summary 1", "summary 2"])).to eq(
+        "concatenated summary",
+      )
+    end
+  end
+
+  describe "#summarize_with_truncation" do
+    let(:max_tokens) { 709 }
+
+    it "truncates the context to meet the token limit" do
+      opts = content.except(:contents)
+
+      instructions = <<~TEXT
+        Human: Summarize the following forum discussion inside the given <input> tag.
+        Include only the summary inside <ai> tags.
+        Try generating links as well the format is #{opts[:resource_path]}.
+        The discussion title is: #{opts[:content_title]}.
+        Don't use more than 400 words.
+        <input>(1 asd said: This is a</input>
+        Assistant:
+      TEXT
+
+      AnthropicCompletionStubs.stub_response(instructions, "<ai>truncated summary</ai>")
+
+      expect(subject.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary")
+    end
+  end
+end
diff --git a/spec/lib/modules/summarization/models/discourse_spec.rb b/spec/lib/modules/summarization/models/discourse_spec.rb
new file mode 100644
index 00000000..c505da1c
--- /dev/null
+++ b/spec/lib/modules/summarization/models/discourse_spec.rb
@@ -0,0 +1,93 @@
+# frozen_string_literal: true
+
+RSpec.describe DiscourseAi::Summarization::Models::Discourse do
+  let(:model) { "bart-large-cnn-samsum" }
+  let(:max_tokens) { 20 }
+
+  subject { described_class.new(model, max_tokens: max_tokens) }
+
+  let(:content) do
+    {
+      resource_path: "/t/1/POST_NUMBER",
+      content_title: "This is a title",
+      contents: [{ poster: "asd", id: 1, text: "This is a text" }],
+    }
+  end
+
+  before { SiteSetting.ai_summarization_discourse_service_api_endpoint = "https://test.com" }
+
+  def stub_request(prompt, response)
+    WebMock
+      .stub_request(
+        :post,
+        "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
+      )
+      .with(body: JSON.dump(model: model, content: prompt))
+      .to_return(status: 200, body: JSON.dump(summary_text: response))
+  end
+
+  def expected_messages(contents, opts)
+    contents.reduce("") do |memo, item|
+      memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
+    end
+  end
+
+  describe "#summarize_in_chunks" do
+    context "when the content fits in a single chunk" do
+      it "performs a request to summarize" do
+        opts = content.except(:contents)
+
+        stub_request(expected_messages(content[:contents], opts), "This is summary 1")
+
+        summarized_chunks =
+          subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] }
+
+        expect(summarized_chunks).to contain_exactly("This is summary 1")
+      end
+    end
+
+    context "when the content fits in multiple chunks" do
+      it "performs a request for each one to summarize" do
+        content[:contents] << {
+          poster: "asd2",
+          id: 2,
+          text: "This is a different text to summarize",
+        }
+        opts = content.except(:contents)
+
+        content[:contents].each_with_index do |item, idx|
+          stub_request(expected_messages([item], opts), "This is summary #{idx + 1}")
+        end
+
+        summarized_chunks =
+          subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] }
+
+        expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2")
+      end
+    end
+  end
+
+  describe "#concatenate_summaries" do
+    it "combines all the different summaries into a single one" do
+      messages = ["summary 1", "summary 2"].join("\n")
+
+      stub_request(messages, "concatenated summary")
+
+      expect(subject.concatenate_summaries(["summary 1", "summary 2"])).to eq(
+        "concatenated summary",
+      )
+    end
+  end
+
+  describe "#summarize_with_truncation" do
+    let(:max_tokens) { 9 }
+
+    it "truncates the context to meet the token limit" do
+      opts = content.except(:contents)
+
+      stub_request("( 1 asd said : this is", "truncated summary")
+
+      expect(subject.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary")
+    end
+  end
+end
diff --git a/spec/lib/modules/summarization/models/open_ai_spec.rb b/spec/lib/modules/summarization/models/open_ai_spec.rb
new file mode 100644
index 00000000..d01fd287
--- /dev/null
+++ b/spec/lib/modules/summarization/models/open_ai_spec.rb
@@ -0,0 +1,116 @@
+# frozen_string_literal: true
+
+require_relative "../../../../support/openai_completions_inference_stubs"
+
+RSpec.describe DiscourseAi::Summarization::Models::OpenAi do
+  let(:model) { "gpt-3.5-turbo" }
+  let(:max_tokens) { 720 }
+
+  subject { described_class.new(model, max_tokens: max_tokens) }
+
+  let(:content) do
+    {
+      resource_path: "/t/1/POST_NUMBER",
+      content_title: "This is a title",
+      contents: [{ poster: "asd", id: 1, text: "This is a text" }],
+    }
+  end
+
+  def expected_messages(contents, opts)
+    base_prompt = <<~TEXT
+      You are a summarization bot.
+      You effectively summarise any text and reply ONLY with ONLY the summarized text.
+      You condense it into a shorter version.
+      You understand and generate Discourse forum Markdown.
+      Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)
+      The discussion title is: #{opts[:content_title]}.
+    TEXT
+
+    messages = [{ role: "system", content: base_prompt }]
+
+    text =
+      contents.reduce("") do |memo, item|
+        memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
+      end
+
+    messages << { role: "user", content: "Summarize the following in 400 words:\n#{text}" }
+  end
+
+  describe "#summarize_in_chunks" do
+    context "when the content fits in a single chunk" do
+      it "performs a request to summarize" do
+        opts = content.except(:contents)
+
+        OpenAiCompletionsInferenceStubs.stub_response(
+          expected_messages(content[:contents], opts),
+          "This is summary 1",
+        )
+
+        summarized_chunks =
+          subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] }
+
+        expect(summarized_chunks).to contain_exactly("This is summary 1")
+      end
+    end
+
+    context "when the content fits in multiple chunks" do
+      it "performs a request for each one to summarize" do
+        content[:contents] << {
+          poster: "asd2",
+          id: 2,
+          text: "This is a different text to summarize",
+        }
+        opts = content.except(:contents)
+
+        content[:contents].each_with_index do |item, idx|
+          OpenAiCompletionsInferenceStubs.stub_response(
+            expected_messages([item], opts),
+            "This is summary #{idx + 1}",
+          )
+        end
+
+        summarized_chunks =
+          subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] }
+
+        expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2")
+      end
+    end
+  end
+
+  describe "#concatenate_summaries" do
+    it "combines all the different summaries into a single one" do
+      messages = [
+        { role: "system", content: "You are a helpful bot" },
+        {
+          role: "user",
+          content:
+            "Concatenate these disjoint summaries, creating a cohesive narrative:\nsummary 1\nsummary 2",
+        },
+      ]
+
+      OpenAiCompletionsInferenceStubs.stub_response(messages, "concatenated summary")
+
+      expect(subject.concatenate_summaries(["summary 1", "summary 2"])).to eq(
+        "concatenated summary",
+      )
+    end
+  end
+
+  describe "#summarize_with_truncation" do
+    let(:max_tokens) { 709 }
+
+    it "truncates the context to meet the token limit" do
+      opts = content.except(:contents)
+
+      truncated_version = expected_messages(content[:contents], opts)
+
+      truncated_version.last[
+        :content
+      ] = "Summarize the following in 400 words:\n(1 asd said: This is a"
+
+      OpenAiCompletionsInferenceStubs.stub_response(truncated_version, "truncated summary")
+
+      expect(subject.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary")
+    end
+  end
+end
diff --git a/spec/lib/modules/summarization/strategies/anthropic_spec.rb b/spec/lib/modules/summarization/strategies/anthropic_spec.rb
deleted file mode 100644
index afd64f13..00000000
--- a/spec/lib/modules/summarization/strategies/anthropic_spec.rb
+++ /dev/null
@@ -1,26 +0,0 @@
-# frozen_string_literal: true
-
-require_relative "../../../../support/anthropic_completion_stubs"
-
-RSpec.describe DiscourseAi::Summarization::Strategies::Anthropic do
-  describe "#summarize" do
-    let(:model) { "claude-v1" }
-
-    subject { described_class.new(model) }
-
-    it "asks an Anthropic's model to summarize the content" do
-      summarization_text = "This is a text"
-      expected_response = "This is a summary"
-
-      AnthropicCompletionStubs.stub_response(
-        subject.prompt(summarization_text),
-        "<ai>#{expected_response}</ai>",
-        req_opts: {
-          max_tokens_to_sample: 300,
-        },
-      )
-
-      expect(subject.summarize(summarization_text)).to eq(expected_response)
-    end
-  end
-end
diff --git a/spec/lib/modules/summarization/strategies/discourse_spec.rb b/spec/lib/modules/summarization/strategies/discourse_spec.rb
deleted file mode 100644
index 8d52e069..00000000
--- a/spec/lib/modules/summarization/strategies/discourse_spec.rb
+++ /dev/null
@@ -1,25 +0,0 @@
-# frozen_string_literal: true
-
-RSpec.describe DiscourseAi::Summarization::Strategies::DiscourseAi do
-  describe "#summarize" do
-    let(:model) { "bart-large-cnn-samsum" }
-
-    subject { described_class.new(model) }
-
-    it "asks a Discourse's model to summarize the content" do
-      SiteSetting.ai_summarization_discourse_service_api_endpoint = "https://test.com"
-      summarization_text = "This is a text"
-      expected_response = "This is a summary"
-
-      WebMock
-        .stub_request(
-          :post,
-          "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
-        )
-        .with(body: JSON.dump(model: model, content: subject.prompt(summarization_text)))
-        .to_return(status: 200, body: JSON.dump(summary_text: expected_response))
-
-      expect(subject.summarize(summarization_text)).to eq(expected_response)
-    end
-  end
-end
diff --git a/spec/lib/modules/summarization/strategies/fold_content_spec.rb b/spec/lib/modules/summarization/strategies/fold_content_spec.rb
new file mode 100644
index 00000000..655a7855
--- /dev/null
+++ b/spec/lib/modules/summarization/strategies/fold_content_spec.rb
@@ -0,0 +1,38 @@
+# frozen_string_literal: true
+
+require_relative "../../../../support/summarization/dummy_completion_model"
+
+RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
+  describe "#summarize" do
+    let(:summarize_text) { "This is a text" }
+    let(:model) { DummyCompletionModel.new(model_tokens) }
+    let(:model_tokens) do
+      # Make sure each content fits in a single chunk.
+      DiscourseAi::Tokenizer::BertTokenizer.size("(1 asd said: This is a text ") + 3
+    end
+
+    subject { described_class.new(model) }
+
+    let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } }
+
+    context "when the content to summarize fits in a single call" do
+      it "does one call to summarize content" do
+        result = subject.summarize(content)
+
+        expect(model.summarization_calls).to eq(1)
+        expect(result[:summary]).to eq(DummyCompletionModel::SINGLE_SUMMARY)
+      end
+    end
+
+    context "when the content to summarize doesn't fit in a single call" do
+      it "summarizes each chunk and then concatenates them" do
+        content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
+
+        result = subject.summarize(content)
+
+        expect(model.summarization_calls).to eq(3)
+        expect(result[:summary]).to eq(DummyCompletionModel::CONCATENATED_SUMMARIES)
+      end
+    end
+  end
+end
diff --git a/spec/lib/modules/summarization/strategies/open_ai_spec.rb b/spec/lib/modules/summarization/strategies/open_ai_spec.rb
deleted file mode 100644
index a83ca4db..00000000
--- a/spec/lib/modules/summarization/strategies/open_ai_spec.rb
+++ /dev/null
@@ -1,21 +0,0 @@
-# frozen_string_literal: true
-
-require_relative "../../../../support/openai_completions_inference_stubs"
-
-RSpec.describe DiscourseAi::Summarization::Strategies::OpenAi do
-  let(:model) { "gpt-3.5-turbo" }
-
-  subject { described_class.new(model) }
-
-  it "asks a OpenAI's model to summarize the content" do
-    summarization_text = "This is a text"
-    expected_response = "This is a summary"
-
-    OpenAiCompletionsInferenceStubs.stub_response(
-      subject.prompt(summarization_text),
-      expected_response,
-    )
-
-    expect(subject.summarize(summarization_text)).to eq(expected_response)
-  end
-end
diff --git a/spec/lib/modules/summarization/strategies/truncate_content_spec.rb b/spec/lib/modules/summarization/strategies/truncate_content_spec.rb
new file mode 100644
index 00000000..4b6f1584
--- /dev/null
+++ b/spec/lib/modules/summarization/strategies/truncate_content_spec.rb
@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+
+require_relative "../../../../support/summarization/dummy_completion_model"
+
+RSpec.describe DiscourseAi::Summarization::Strategies::TruncateContent do
+  describe "#summarize" do
+    let(:summarize_text) { "This is a text" }
+    let(:model_tokens) { summarize_text.length }
+    let(:model) { DummyCompletionModel.new(model_tokens) }
+
+    subject { described_class.new(model) }
+
+    let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } }
+
+    context "when the content to summarize doesn't fit in a single call" do
+      it "summarizes a truncated version" do
+        content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
+
+        result = subject.summarize(content)
+
+        expect(model.summarization_calls).to eq(1)
+        expect(result[:summary]).to eq(DummyCompletionModel::SINGLE_SUMMARY)
+      end
+    end
+  end
+end
diff --git a/spec/shared/inference/anthropic_completions_spec.rb b/spec/shared/inference/anthropic_completions_spec.rb
index 30e5037a..6ab081a7 100644
--- a/spec/shared/inference/anthropic_completions_spec.rb
+++ b/spec/shared/inference/anthropic_completions_spec.rb
@@ -45,7 +45,11 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
 
     AnthropicCompletionStubs.stub_streamed_response(prompt, deltas, req_opts: req_opts)
 
-    DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, "claude-v1") do |partial, cancel|
+    DiscourseAi::Inference::AnthropicCompletions.perform!(
+      prompt,
+      "claude-v1",
+      max_tokens: req_opts[:max_tokens_to_sample],
+    ) do |partial, cancel|
       data = partial[:completion]
       content = data if data
       cancel.call if content.split(" ").length == 2
diff --git a/spec/support/summarization/dummy_completion_model.rb b/spec/support/summarization/dummy_completion_model.rb
new file mode 100644
index 00000000..3c4136c0
--- /dev/null
+++ b/spec/support/summarization/dummy_completion_model.rb
@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+
+class DummyCompletionModel
+  SINGLE_SUMMARY = "this is a single summary"
+  CONCATENATED_SUMMARIES = "this is a concatenated summary"
+
+  def initialize(prompt_length)
+    @max_length = prompt_length
+    @summarization_calls = 0
+  end
+
+  attr_reader :max_length, :summarization_calls
+
+  def summarize_in_chunks(contents, opts)
+    chunks = []
+
+    section = { ids: [], summary: "" }
+
+    contents.each do |item|
+      new_content = "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
+
+      if tokenizer.can_expand_tokens?(section[:summary], new_content, max_length)
+        section[:summary] += new_content
+        section[:ids] << item[:id]
+      else
+        chunks << section
+        section = { id: [item[:id]], summary: new_content }
+      end
+    end
+
+    chunks << section if section[:summary].present?
+
+    chunks.each do |chunk|
+      chunk[:summary] = SINGLE_SUMMARY
+      @summarization_calls += 1
+    end
+
+    chunks
+  end
+
+  def concatenate_summaries(summaries)
+    @summarization_calls += 1
+    CONCATENATED_SUMMARIES
+  end
+
+  def summarize_with_truncation(_contents, _opts)
+    @summarization_calls += 1
+    SINGLE_SUMMARY
+  end
+
+  def tokenizer
+    DiscourseAi::Tokenizer::BertTokenizer
+  end
+end