FIX/REFACTOR: FoldContent revamp (#866)

* FIX/REFACTOR: FoldContent revamp We hit a snag with our hot topic gist strategy: the regex we used to split the content didn't work, so we cannot send the original post separately. This was important for letting the model focus on what's new in the topic. The algorithm doesn’t give us full control over how prompts are written, and figuring out how to format the content isn't straightforward. This means we're having to use more complicated workarounds, like regex. To tackle this, I'm suggesting we simplify the approach a bit. Let's focus on summarizing as much as we can upfront, then gradually add new content until there's nothing left to summarize. Also, the "extend" part is mostly for models with small context windows, which shouldn't pose a problem 99% of the time with the content volume we're dealing with. * Fix fold docs * Use #shift instead of #pop to get the first elem, not the last
2024-10-25 11:51:17 -03:00 · 2024-10-25 11:51:17 -03:00 · ec97996905
parent 12869f2146
commit ec97996905
12 changed files with 227 additions and 259 deletions
--- a/app/controllers/discourse_ai/summarization/chat_summary_controller.rb
+++ b/app/controllers/discourse_ai/summarization/chat_summary_controller.rb
@ -26,7 +26,7 @@ module DiscourseAi
          strategy = DiscourseAi::Summarization::Strategies::ChatMessages.new(channel, since)
          summarized_text =
-            if strategy.targets_data[:contents].empty?
+            if strategy.targets_data.empty?
              I18n.t("discourse_ai.summarization.chat.no_targets")
            else
              summarizer.summarize(current_user)&.summarized_text
--- a/lib/summarization/fold_content.rb
+++ b/lib/summarization/fold_content.rb
@ -18,35 +18,18 @@ module DiscourseAi
      attr_reader :llm, :strategy
      # @param user { User } - User object used for auditing usage.
      #
      # @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
      # Note: The block is only called with results of the final summary, not intermediate summaries.
      #
      # @returns { AiSummary } - Resulting summary.
      def summarize(user, &on_partial_blk)
-        opts = content_to_summarize.except(:contents)
+        base_summary = ""
-
+        initial_pos = 0
-        initial_chunks =
+        folded_summary =
-          rebalance_chunks(
+          fold(content_to_summarize, base_summary, initial_pos, user, &on_partial_blk)
            content_to_summarize[:contents].map do |c|
              { ids: [c[:id]], summary: format_content_item(c) }
            end,
          )
        # Special case where we can do all the summarization in one pass.
        result =
          if initial_chunks.length == 1
            {
              summary:
                summarize_single(initial_chunks.first[:summary], user, opts, &on_partial_blk),
              chunks: [],
            }
          else
            summarize_chunks(initial_chunks, user, opts, &on_partial_blk)
          end
        clean_summary =
-          Nokogiri::HTML5.fragment(result[:summary]).css("ai")&.first&.text || result[:summary]
+          Nokogiri::HTML5.fragment(folded_summary).css("ai")&.first&.text || folded_summary
        if persist_summaries
          AiSummary.store!(
@ -54,7 +37,7 @@ module DiscourseAi
            strategy.type,
            llm_model.name,
            clean_summary,
-            content_to_summarize[:contents].map { |c| c[:id] },
+            content_to_summarize.map { |c| c[:id] },
          )
        else
          AiSummary.new(summarized_text: clean_summary)
@ -96,90 +79,58 @@ module DiscourseAi
      end
      def latest_sha
-        @latest_sha ||= AiSummary.build_sha(content_to_summarize[:contents].map { |c| c[:id] }.join)
+        @latest_sha ||= AiSummary.build_sha(content_to_summarize.map { |c| c[:id] }.join)
      end
-      def summarize_chunks(chunks, user, opts, &on_partial_blk)
+      # @param items { Array<Hash> } - Content to summarize. Structure will be: { poster: who wrote the content, id: a way to order content, text: content }
-        # Safely assume we always have more than one chunk.
+      # @param summary { String } - Intermediate summaries that we'll keep extending as part of our "folding" algorithm.
-        summarized_chunks = summarize_in_chunks(chunks, user, opts)
+      # @param cursor { Integer } - Idx to know how much we already summarized.
-        total_summaries_size =
+      # @param user { User } - User object used for auditing usage.
-          llm_model.tokenizer_class.size(summarized_chunks.map { |s| s[:summary].to_s }.join)
+      # @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
      # Note: The block is only called with results of the final summary, not intermediate summaries.
      #
      # The summarization algorithm.
      # The idea is to build an initial summary packing as much content as we can. Once we have the initial summary, we'll keep extending using the leftover
      # content until there is nothing left.
      #
      # @returns { String } - Resulting summary.
      def fold(items, summary, cursor, user, &on_partial_blk)
        tokenizer = llm_model.tokenizer_class
        tokens_left = available_tokens - tokenizer.size(summary)
        iteration_content = []
-        if total_summaries_size < available_tokens
+        items.each_with_index do |item, idx|
-          # Chunks are small enough, we can concatenate them.
+          next if idx < cursor
          {
            summary:
              concatenate_summaries(
                summarized_chunks.map { |s| s[:summary] },
                user,
                &on_partial_blk
              ),
            chunks: summarized_chunks,
          }
        else
          # We have summarized chunks but we can't concatenate them yet. Split them into smaller summaries and summarize again.
          rebalanced_chunks = rebalance_chunks(summarized_chunks)
-          summarize_chunks(rebalanced_chunks, user, opts, &on_partial_blk)
+          as_text = "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
        end
      end
-      def format_content_item(item)
+          if tokenizer.below_limit?(as_text, tokens_left)
-        "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
+            iteration_content << item
-      end
+            tokens_left -= tokenizer.size(as_text)
-
+            cursor += 1
-      def rebalance_chunks(chunks)
+          else
-        section = { ids: [], summary: "" }
+            break
        chunks =
          chunks.reduce([]) do |sections, chunk|
            if llm_model.tokenizer_class.can_expand_tokens?(
                 section[:summary],
                 chunk[:summary],
                 available_tokens,
               )
              section[:summary] += chunk[:summary]
              section[:ids] = section[:ids].concat(chunk[:ids])
            else
              sections << section
              section = chunk
            end
            sections
          end
        end
-        chunks << section if section[:summary].present?
+        prompt =
-
+          (
-        chunks
+            if summary.blank?
-      end
+              strategy.first_summary_prompt(iteration_content)
-
+            else
-      def summarize_single(text, user, opts, &on_partial_blk)
+              strategy.summary_extension_prompt(summary, iteration_content)
-        prompt = strategy.summarize_single_prompt(text, opts)
+            end
        llm.generate(prompt, user: user, feature_name: "summarize", &on_partial_blk)
      end
      def summarize_in_chunks(chunks, user, opts)
        chunks.map do |chunk|
          prompt = strategy.summarize_single_prompt(chunk[:summary], opts)
          chunk[:summary] = llm.generate(
            prompt,
            user: user,
            max_tokens: 300,
            feature_name: "summarize",
          )
-          chunk
+        if cursor == items.length
          llm.generate(prompt, user: user, feature_name: "summarize", &on_partial_blk)
        else
          latest_summary =
            llm.generate(prompt, user: user, max_tokens: 600, feature_name: "summarize")
          fold(items, latest_summary, cursor, user, &on_partial_blk)
        end
      end
      def concatenate_summaries(texts_to_summarize, user, &on_partial_blk)
        prompt = strategy.concatenation_prompt(texts_to_summarize)
        llm.generate(prompt, user: user, &on_partial_blk)
      end
      def available_tokens
        # Reserve tokens for the response and the base prompt
        # ~500 words
--- a/lib/summarization/strategies/base.rb
+++ b/lib/summarization/strategies/base.rb
@ -11,7 +11,7 @@ module DiscourseAi
          @target = target
        end
-        attr_reader :target
+        attr_reader :target, :opts
        # The summary type differentiates instances of `AiSummary` pointing to a single target.
        # See the `summary_type` enum for available options.
@ -19,11 +19,9 @@ module DiscourseAi
          raise NotImplementedError
        end
-        # @returns { Hash } - Content to summarize.
+        # @returns { Array<Hash> } - Content to summarize.
        #
-        # This method returns a hash with the content to summarize and additional information.
+        # This method returns an array of hashes with the content to summarize using the following structure:
        # The only mandatory key is `contents`, which must be an array of hashes with
        # the following structure:
        #
        # {
        #  poster: A way to tell who write the content,
@ -31,26 +29,17 @@ module DiscourseAi
        #  text: Text to summarize
        # }
        #
        # Additionally, you could add more context, which will be available in the prompt. e.g.:
        #
        # {
        #   resource_path: "#{Discourse.base_path}/t/-/#{target.id}",
        #   content_title: target.title,
        #   contents: [...]
        # }
        #
        def targets_data
          raise NotImplementedError
        end
-        # @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM when concatenating multiple chunks.
+        # @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM when extending an existing summary.
-        def contatenation_prompt(_texts_to_summarize)
+        def summary_extension_prompt(_summary, _texts_to_summarize)
          raise NotImplementedError
        end
-        # @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM on each chunk,
+        # @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM for summarizing a single chunk of content.
-        # and when the whole content fits in one call.
+        def first_summary_prompt(_input)
        def summarize_single_prompt(_input, _opts)
          raise NotImplementedError
        end
      end
--- a/lib/summarization/strategies/chat_messages.rb
+++ b/lib/summarization/strategies/chat_messages.rb
@ -14,38 +14,60 @@ module DiscourseAi
        end
        def targets_data
-          content = { content_title: target.name }
+          target
          content[:contents] = target
            .chat_messages
            .where("chat_messages.created_at > ?", since.hours.ago)
            .includes(:user)
            .order(created_at: :asc)
            .pluck(:id, :username_lower, :message)
            .map { { id: _1, poster: _2, text: _3 } }
          content
        end
-        def contatenation_prompt(texts_to_summarize)
+        def summary_extension_prompt(summary, contents)
          input =
            contents
              .map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }
              .join("\n")
          prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
-          You are a summarization bot tasked with creating a cohesive narrative by intelligently merging multiple disjointed summaries. 
+            You are a summarization bot tasked with expanding on an existing summary by incorporating new chat messages.
-          Your response should consist of well-structured paragraphs that combines these summaries into a clear and comprehensive overview. 
+            Your goal is to seamlessly integrate the additional information into the existing summary, preserving the clarity and insights of the original while reflecting any new developments, themes, or conclusions.
-          Avoid adding any additional text or commentary. Format your output using Discourse forum Markdown.
+            Analyze the new messages to identify key themes, participants' intentions, and any significant decisions or resolutions.
            Update the summary to include these aspects in a way that remains concise, comprehensive, and accessible to someone with no prior context of the conversation.
            ### Guidelines:
            - Merge the new information naturally with the existing summary without redundancy.
            - Only include the updated summary, WITHOUT additional commentary.
            - Don't mention the channel title. Avoid extraneous details or subjective opinions.
            - Maintain the original language of the text being summarized.
            - The same user could write multiple messages in a row, don't treat them as different persons.
            - Aim for summaries to be extended by a reasonable amount, but strive to maintain a total length of 400 words or less, unless absolutely necessary for comprehensiveness.
        TEXT
          prompt.push(type: :user, content: <<~TEXT.strip)
-          THESE are the summaries, each one separated by a newline, all of them inside <input></input> XML tags:
+          ### Context:
-          <input>
+          This is the existing summary:
-            #{texts_to_summarize.join("\n")}
+
-          </input>
+          #{summary}
          These are the new chat messages:
          #{input}
          Intengrate the new messages into the existing summary.
        TEXT
          prompt
        end
-        def summarize_single_prompt(input, opts)
+        def first_summary_prompt(contents)
          content_title = target.name
          input =
            contents.map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }.join
          prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
            You are a summarization bot designed to generate clear and insightful paragraphs that conveys the main topics 
            and developments from a series of chat messages within a user-selected time window. 
@ -62,7 +84,7 @@ module DiscourseAi
          TEXT
          prompt.push(type: :user, content: <<~TEXT.strip)
-            #{opts[:content_title].present? ? "The name of the channel is: " + opts[:content_title] + ".\n" : ""}
+            #{content_title.present? ? "The name of the channel is: " + content_title + ".\n" : ""}
            Here are the messages, inside <input></input> XML tags:
--- a/lib/summarization/strategies/hot_topic_gists.rb
+++ b/lib/summarization/strategies/hot_topic_gists.rb
@ -9,8 +9,6 @@ module DiscourseAi
        end
        def targets_data
          content = { content_title: target.title, contents: [] }
          op_post_number = 1
          hot_topics_recent_cutoff = Time.zone.now - SiteSetting.hot_topics_recent_days.days
@ -44,44 +42,62 @@ module DiscourseAi
              .order(:post_number)
              .pluck(:post_number, :raw, :username)
-          posts_data.each do |(pn, raw, username)|
+          posts_data.reduce([]) do |memo, (pn, raw, username)|
            raw_text = raw
            if pn == 1 && target.topic_embed&.embed_content_cache.present?
              raw_text = target.topic_embed&.embed_content_cache
            end
-            content[:contents] << { poster: username, id: pn, text: raw_text }
+            memo << { poster: username, id: pn, text: raw_text }
          end
          content
        end
-        def concatenation_prompt(texts_to_summarize)
+        def summary_extension_prompt(summary, contents)
-          prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
+          statements =
-            You are a summarization bot tasked with creating a single, concise sentence by merging disjointed summaries into a cohesive statement. 
+            contents
-            Your response should strictly be this single, comprehensive sentence, without any additional text or comments.
+              .to_a
              .map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }
              .join("\n")
-            - Focus on the central theme or issue being addressed, maintaining an objective and neutral tone.
+          prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
-            - Exclude extraneous details or subjective opinions.
+            You are an advanced summarization bot. Your task is to update an existing single-sentence summary by integrating new developments from a conversation.
            Analyze the most recent messages to identify key updates or shifts in the main topic and reflect these in the updated summary.
            Emphasize new significant information or developments within the context of the initial conversation theme.
            ### Guidelines:
            - Ensure the revised summary remains concise and objective, maintaining a focus on the central theme or issue.
            - Omit extraneous details or subjective opinions.
            - Use the original language of the text.
            - Begin directly with the main topic or issue, avoiding introductory phrases.
-            - Limit the summary to a maximum of 20 words.
+            - Limit the updated summary to a maximum of 20 words.
            - Return the 20-word summary inside <ai></ai> tags.
          TEXT
          prompt.push(type: :user, content: <<~TEXT.strip)
-            THESE are the summaries, each one separated by a newline, all of them inside <input></input> XML tags:
+            ### Context:
-            <input>
+            This is the existing single-sentence summary:
-              #{texts_to_summarize.join("\n")}
+
-            </input>
+            #{summary}
            And these are the new developments in the conversation:
            #{statements}
            Your task is to update an existing single-sentence summary by integrating new developments from a conversation.
            Return the 20-word summary inside <ai></ai> tags.
          TEXT
          prompt
        end
-        def summarize_single_prompt(input, opts)
+        def first_summary_prompt(contents)
-          statements = input.split(/(?=\d+\) \w+ said:)/)
+          content_title = target.title
          statements =
            contents.to_a.map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }
          prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
            You are an advanced summarization bot. Analyze a given conversation and produce a concise, 
@ -95,25 +111,25 @@ module DiscourseAi
            - Use the original language of the text.
            - Begin directly with the main topic or issue, avoiding introductory phrases.
            - Limit the summary to a maximum of 20 words.
            - Return the 20-word summary inside <ai></ai> tags.
            Return the 20-word summary inside <ai></ai> tags.
          TEXT
          context = +<<~TEXT
            ### Context:
-            #{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""}
+            #{content_title.present? ? "The discussion title is: " + content_title + ".\n" : ""}
            The conversation began with the following statement:
-            #{statements&.pop}\n
+            #{statements.shift}\n
          TEXT
          if statements.present?
            context << <<~TEXT
              Subsequent discussion includes the following:
-              #{statements&.join("\n")}
+              #{statements.join("\n")}
              Your task is to focus on these latest messages, capturing their meaning in the context of the initial statement.
            TEXT
--- a/lib/summarization/strategies/topic_summary.rb
+++ b/lib/summarization/strategies/topic_summary.rb
@ -9,12 +9,6 @@ module DiscourseAi
        end
        def targets_data
          content = {
            resource_path: "#{Discourse.base_path}/t/-/#{target.id}",
            content_title: target.title,
            contents: [],
          }
          posts_data =
            (target.has_summary? ? best_replies : pick_selection).pluck(
              :post_number,
@ -22,85 +16,102 @@ module DiscourseAi
              :username,
            )
-          posts_data.each do |(pn, raw, username)|
+          posts_data.reduce([]) do |memo, (pn, raw, username)|
            raw_text = raw
            if pn == 1 && target.topic_embed&.embed_content_cache.present?
              raw_text = target.topic_embed&.embed_content_cache
            end
-            content[:contents] << { poster: username, id: pn, text: raw_text }
+            memo << { poster: username, id: pn, text: raw_text }
          end
          content
        end
-        def concatenation_prompt(texts_to_summarize)
+        def summary_extension_prompt(summary, contents)
-          prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
+          resource_path = "#{Discourse.base_path}/t/-/#{target.id}"
-            You are a summarization bot that effectively concatenates disjointed summaries, creating a cohesive narrative.
+          content_title = target.title
-            The narrative you create is in the form of one or multiple paragraphs.
+          input =
-            Your reply MUST BE a single concatenated summary using the summaries I'll provide to you.
+            contents.map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]})" }.join
-            I'm NOT interested in anything other than the concatenated summary, don't include additional text or comments.
+
-            You understand and generate Discourse forum Markdown.
+          prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT)
-            You format the response, including links, using Markdown.
+            You are an advanced summarization bot tasked with enhancing an existing summary by incorporating additional posts.
            ### Guidelines:
            - Only include the enhanced summary, without any additional commentary.
            - Understand and generate Discourse forum Markdown; including links, _italics_, **bold**.
            - Maintain the original language of the text being summarized.
            - Aim for summaries to be 400 words or less.
            - Each new post is formatted as "<POST_NUMBER>) <USERNAME> <MESSAGE>"
            - Cite specific noteworthy posts using the format [NAME](#{resource_path}/POST_NUMBER)
              - Example: link to the 3rd post by sam: [sam](#{resource_path}/3)
              - Example: link to the 6th post by jane: [agreed with](#{resource_path}/6)
              - Example: link to the 13th post by joe: [#13](#{resource_path}/13)
            - When formatting usernames either use @USERNAME or [USERNAME](#{resource_path}/POST_NUMBER)
          TEXT
          prompt.push(type: :user, content: <<~TEXT.strip)
-            THESE are the summaries, each one separated by a newline, all of them inside <input></input> XML tags:
+            ### Context:
            #{content_title.present? ? "The discussion title is: " + content_title + ".\n" : ""}
            Here is the existing summary:
            #{summary}
            Here are the new posts, inside <input></input> XML tags:
            <input>
-              #{texts_to_summarize.join("\n")}
+            #{input}
            </input>
            Integrate the new information to generate an enhanced concise and coherent summary.
          TEXT
          prompt
        end
-        def summarize_single_prompt(input, opts)
+        def first_summary_prompt(contents)
-          insts = +<<~TEXT
+          resource_path = "#{Discourse.base_path}/t/-/#{target.id}"
-          You are an advanced summarization bot that generates concise, coherent summaries of provided text.
+          content_title = target.title
          input =
            contents.map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }.join
-          - Only include the summary, without any additional commentary.
+          prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
-          - You understand and generate Discourse forum Markdown; including links, _italics_, **bold**.
+            You are an advanced summarization bot that generates concise, coherent summaries of provided text.
          - Maintain the original language of the text being summarized.
          - Aim for summaries to be 400 words or less.
-        TEXT
+            - Only include the summary, without any additional commentary.
            - You understand and generate Discourse forum Markdown; including links, _italics_, **bold**.
            - Maintain the original language of the text being summarized.
            - Aim for summaries to be 400 words or less.
            - Each post is formatted as "<POST_NUMBER>) <USERNAME> <MESSAGE>"
            - Cite specific noteworthy posts using the format [NAME](#{resource_path}/POST_NUMBER)
              - Example: link to the 3rd post by sam: [sam](#{resource_path}/3)
              - Example: link to the 6th post by jane: [agreed with](#{resource_path}/6)
              - Example: link to the 13th post by joe: [#13](#{resource_path}/13)
            - When formatting usernames either use @USERNMAE OR [USERNAME](#{resource_path}/POST_NUMBER)
          TEXT
-          insts << <<~TEXT if opts[:resource_path]
+          prompt.push(
-              - Each post is formatted as "<POST_NUMBER>) <USERNAME> <MESSAGE>"
+            type: :user,
-              - Cite specific noteworthy posts using the format [NAME](#{opts[:resource_path]}/POST_NUMBER)
+            content:
-                - Example: link to the 3rd post by sam: [sam](#{opts[:resource_path]}/3)
+              "Here are the posts inside <input></input> XML tags:\n\n<input>1) user1 said: I love Mondays 2) user2 said: I hate Mondays</input>\n\nGenerate a concise, coherent summary of the text above maintaining the original language.",
-                - Example: link to the 6th post by jane: [agreed with](#{opts[:resource_path]}/6)
+          )
-                - Example: link to the 13th post by joe: [#13](#{opts[:resource_path]}/13)
+          prompt.push(
-              - When formatting usernames either use @USERNMAE OR [USERNAME](#{opts[:resource_path]}/POST_NUMBER)
+            type: :model,
-            TEXT
+            content:
-
+              "Two users are sharing their feelings toward Mondays. [user1](#{resource_path}/1) hates them, while [user2](#{resource_path}/2) loves them.",
-          prompt = DiscourseAi::Completions::Prompt.new(insts.strip)
+          )
          if opts[:resource_path]
            prompt.push(
              type: :user,
              content:
                "Here are the posts inside <input></input> XML tags:\n\n<input>1) user1 said: I love Mondays 2) user2 said: I hate Mondays</input>\n\nGenerate a concise, coherent summary of the text above maintaining the original language.",
            )
            prompt.push(
              type: :model,
              content:
                "Two users are sharing their feelings toward Mondays. [user1](#{opts[:resource_path]}/1) hates them, while [user2](#{opts[:resource_path]}/2) loves them.",
            )
          end
          prompt.push(type: :user, content: <<~TEXT.strip)
-        #{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""}
+            #{content_title.present? ? "The discussion title is: " + content_title + ".\n" : ""}
-        Here are the posts, inside <input></input> XML tags:
+            Here are the posts, inside <input></input> XML tags:
-        <input>
+            <input>
-          #{input}
+              #{input}
-        </input>
+            </input>
-        Generate a concise, coherent summary of the text above maintaining the original language.
+            Generate a concise, coherent summary of the text above maintaining the original language.
-        TEXT
+          TEXT
          prompt
        end
--- a/lib/tokenizer/basic_tokenizer.rb
+++ b/lib/tokenizer/basic_tokenizer.rb
@ -40,14 +40,12 @@ module DiscourseAi
          tokenizer.decode(tokenizer.encode(text).ids.take(max_length))
        end
-        def can_expand_tokens?(text, addition, max_length)
+        def below_limit?(text, limit)
          # fast track common case, /2 to handle unicode chars
          # than can take more than 1 token per char
-          if !SiteSetting.ai_strict_token_counting && text.size + addition.size < max_length / 2
+          return true if !SiteSetting.ai_strict_token_counting && text.size < limit / 2
            return true
          end
-          tokenizer.encode(text).ids.length + tokenizer.encode(addition).ids.length < max_length
+          tokenizer.encode(text).ids.length < limit
        end
      end
    end
--- a/lib/tokenizer/open_ai_tokenizer.rb
+++ b/lib/tokenizer/open_ai_tokenizer.rb
@ -31,14 +31,12 @@ module DiscourseAi
          retry
        end
-        def can_expand_tokens?(text, addition, max_length)
+        def below_limit?(text, limit)
          # fast track common case, /2 to handle unicode chars
          # than can take more than 1 token per char
-          if !SiteSetting.ai_strict_token_counting && text.size + addition.size < max_length / 2
+          return true if !SiteSetting.ai_strict_token_counting && text.size < limit / 2
            return true
          end
-          tokenizer.encode(text).length + tokenizer.encode(addition).length < max_length
+          tokenizer.encode(text).length < limit
        end
      end
    end
--- a/spec/lib/modules/summarization/fold_content_spec.rb
+++ b/spec/lib/modules/summarization/fold_content_spec.rb
@ -15,12 +15,15 @@ RSpec.describe DiscourseAi::Summarization::FoldContent do
      # Make sure each content fits in a single chunk.
      # 700 is the number of tokens reserved for the prompt.
      model_tokens =
-        700 + DiscourseAi::Tokenizer::OpenAiTokenizer.size("(1 asd said: This is a text ") + 3
+        700 +
          DiscourseAi::Tokenizer::OpenAiTokenizer.size(
            "(1 #{post_1.user.username_lower} said: This is a text ",
          ) + 3
      llm_model.update!(max_prompt_tokens: model_tokens)
    end
-    let(:single_summary) { "this is a single summary" }
+    let(:single_summary) { "single" }
    let(:concatenated_summary) { "this is a concatenated summary" }
    let(:user) { User.new }
@ -39,29 +42,11 @@ RSpec.describe DiscourseAi::Summarization::FoldContent do
    context "when the content to summarize doesn't fit in a single call" do
      fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2, raw: "This is a text") }
-      it "summarizes each chunk and then concatenates them" do
+      it "keeps extending the summary until there is nothing else to process" do
        result =
          DiscourseAi::Completions::Llm.with_prepared_responses(
-            [single_summary, single_summary, concatenated_summary],
+            [single_summary, concatenated_summary],
-          ) { |spy| summarizer.summarize(user).tap { expect(spy.completions).to eq(3) } }
+          ) { |spy| summarizer.summarize(user).tap { expect(spy.completions).to eq(2) } }
        expect(result.summarized_text).to eq(concatenated_summary)
      end
      it "keeps splitting into chunks until the content fits into a single call to create a cohesive narrative" do
        max_length_response = "(1 asd said: This is a text "
        chunk_of_chunks = "I'm smol"
        result =
          DiscourseAi::Completions::Llm.with_prepared_responses(
            [
              max_length_response,
              max_length_response,
              chunk_of_chunks,
              chunk_of_chunks,
              concatenated_summary,
            ],
          ) { |spy| summarizer.summarize(user).tap { expect(spy.completions).to eq(5) } }
        expect(result.summarized_text).to eq(concatenated_summary)
      end
--- a/spec/lib/modules/summarization/strategies/hot_topic_gists_spec.rb
+++ b/spec/lib/modules/summarization/strategies/hot_topic_gists_spec.rb
@ -12,7 +12,7 @@ RSpec.describe DiscourseAi::Summarization::Strategies::HotTopicGists do
      post_2.update(created_at: (SiteSetting.hot_topics_recent_days + 1).days.ago)
      Fabricate(:post, topic: topic, post_number: 3)
-      post_numbers = gist.targets_data[:contents].map { |c| c[:id] }
+      post_numbers = gist.targets_data.map { |c| c[:id] }
      expect(post_numbers).to contain_exactly(1, 3)
    end
@ -20,7 +20,7 @@ RSpec.describe DiscourseAi::Summarization::Strategies::HotTopicGists do
    it "only includes visible posts" do
      post_2.update!(hidden: true)
-      post_numbers = gist.targets_data[:contents].map { |c| c[:id] }
+      post_numbers = gist.targets_data.map { |c| c[:id] }
      expect(post_numbers).to contain_exactly(1)
    end
@ -28,7 +28,7 @@ RSpec.describe DiscourseAi::Summarization::Strategies::HotTopicGists do
    it "doesn't include posts without users" do
      post_2.update!(user_id: nil)
-      post_numbers = gist.targets_data[:contents].map { |c| c[:id] }
+      post_numbers = gist.targets_data.map { |c| c[:id] }
      expect(post_numbers).to contain_exactly(1)
    end
@ -36,7 +36,7 @@ RSpec.describe DiscourseAi::Summarization::Strategies::HotTopicGists do
    it "doesn't include whispers" do
      post_2.update!(post_type: Post.types[:whisper])
-      post_numbers = gist.targets_data[:contents].map { |c| c[:id] }
+      post_numbers = gist.targets_data.map { |c| c[:id] }
      expect(post_numbers).to contain_exactly(1)
    end
@ -51,8 +51,7 @@ RSpec.describe DiscourseAi::Summarization::Strategies::HotTopicGists do
          )
        content = gist.targets_data
-
+        op_content = content.first[:text]
        op_content = content[:contents].first[:text]
        expect(op_content).to include(topic_embed.embed_content_cache)
      end
--- a/spec/lib/modules/summarization/strategies/topic_summary_spec.rb
+++ b/spec/lib/modules/summarization/strategies/topic_summary_spec.rb
@ -12,7 +12,7 @@ RSpec.describe DiscourseAi::Summarization::Strategies::TopicSummary do
      it "only includes visible posts" do
        post_2.update!(hidden: true)
-        post_numbers = topic_summary.targets_data[:contents].map { |c| c[:id] }
+        post_numbers = topic_summary.targets_data.map { |c| c[:id] }
        expect(post_numbers).to contain_exactly(1)
      end
@ -20,7 +20,7 @@ RSpec.describe DiscourseAi::Summarization::Strategies::TopicSummary do
      it "doesn't include posts without users" do
        post_2.update!(user_id: nil)
-        post_numbers = topic_summary.targets_data[:contents].map { |c| c[:id] }
+        post_numbers = topic_summary.targets_data.map { |c| c[:id] }
        expect(post_numbers).to contain_exactly(1)
      end
@ -28,7 +28,7 @@ RSpec.describe DiscourseAi::Summarization::Strategies::TopicSummary do
      it "doesn't include whispers" do
        post_2.update!(post_type: Post.types[:whisper])
-        post_numbers = topic_summary.targets_data[:contents].map { |c| c[:id] }
+        post_numbers = topic_summary.targets_data.map { |c| c[:id] }
        expect(post_numbers).to contain_exactly(1)
      end
@ -56,8 +56,7 @@ RSpec.describe DiscourseAi::Summarization::Strategies::TopicSummary do
          )
        content = topic_summary.targets_data
-
+        op_content = content.first[:text]
        op_content = content[:contents].first[:text]
        expect(op_content).to include(topic_embed.embed_content_cache)
      end
--- a/spec/shared/tokenizer_spec.rb
+++ b/spec/shared/tokenizer_spec.rb
@ -90,21 +90,21 @@ describe DiscourseAi::Tokenizer::OpenAiTokenizer do
    end
  end
-  describe "#can_expand_tokens?" do
+  describe "#below_limit?" do
    it "returns true when the tokens can be expanded" do
-      expect(described_class.can_expand_tokens?("foo bar", "baz qux", 6)).to eq(true)
+      expect(described_class.below_limit?("foo bar baz qux", 6)).to eq(true)
    end
    it "returns false when the tokens cannot be expanded" do
-      expect(described_class.can_expand_tokens?("foo bar", "baz qux", 3)).to eq(false)
+      expect(described_class.below_limit?("foo bar baz qux", 3)).to eq(false)
    end
    it "returns false when the tokens cannot be expanded due to multibyte unicode characters" do
-      expect(described_class.can_expand_tokens?("foo bar 👨🏿", "baz qux", 6)).to eq(false)
+      expect(described_class.below_limit?("foo bar 👨🏿 baz qux", 6)).to eq(false)
    end
    it "handles unicode characters properly when they use more than one token per char" do
-      expect(described_class.can_expand_tokens?("我喜欢吃比萨", "萨", 10)).to eq(false)
+      expect(described_class.below_limit?("我喜欢吃比萨萨", 10)).to eq(false)
    end
  end
 end