FIX: Make FoldContent strategy more resilient when using models with low token count. (#341)

We'll recursively summarize the content into smaller chunks until we are sure we can concatenate them without going over the token limit.
2023-12-06 19:00:24 -03:00 · 2023-12-06 19:00:24 -03:00 · 450ec915d8
parent c8352f21ce
commit 450ec915d8
2 changed files with 79 additions and 28 deletions
--- a/lib/summarization/strategies/fold_content.rb
+++ b/lib/summarization/strategies/fold_content.rb
@ -21,52 +21,71 @@ module DiscourseAi

          llm = DiscourseAi::Completions::Llm.proxy(completion_model.model)

-          chunks = split_into_chunks(llm.tokenizer, content[:contents])
+          initial_chunks =
+            rebalance_chunks(
+              llm.tokenizer,
+              content[:contents].map { |c| { ids: [c[:id]], summary: format_content_item(c) } },
+            )

-          if chunks.length == 1
+          # Special case where we can do all the summarization in one pass.
+          if initial_chunks.length == 1
            {
-              summary: summarize_single(llm, chunks.first[:summary], user, opts, &on_partial_blk),
+              summary:
+                summarize_single(llm, initial_chunks.first[:summary], user, opts, &on_partial_blk),
              chunks: [],
            }
          else
-            summaries = summarize_in_chunks(llm, chunks, user, opts)
-
-            {
-              summary:
-                concatenate_summaries(
-                  llm,
-                  summaries.map { |s| s[:summary] },
-                  user,
-                  &on_partial_blk
-                ),
-              chunks: summaries,
-            }
+            summarize_chunks(llm, initial_chunks, user, opts, &on_partial_blk)
          end
        end

        private

+        def summarize_chunks(llm, chunks, user, opts, &on_partial_blk)
+          # Safely assume we always have more than one chunk.
+          summarized_chunks = summarize_in_chunks(llm, chunks, user, opts)
+          total_summaries_size =
+            llm.tokenizer.size(summarized_chunks.map { |s| s[:summary].to_s }.join)
+
+          if total_summaries_size < completion_model.available_tokens
+            # Chunks are small enough, we can concatenate them.
+            {
+              summary:
+                concatenate_summaries(
+                  llm,
+                  summarized_chunks.map { |s| s[:summary] },
+                  user,
+                  &on_partial_blk
+                ),
+              chunks: summarized_chunks,
+            }
+          else
+            # We have summarized chunks but we can't concatenate them yet. Split them into smaller summaries and summarize again.
+            rebalanced_chunks = rebalance_chunks(llm.tokenizer, summarized_chunks)
+
+            summarize_chunks(llm, rebalanced_chunks, user, opts, &on_partial_blk)
+          end
+        end
+
        def format_content_item(item)
          "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
        end

-        def split_into_chunks(tokenizer, contents)
+        def rebalance_chunks(tokenizer, chunks)
          section = { ids: [], summary: "" }

          chunks =
-            contents.reduce([]) do |sections, item|
-              new_content = format_content_item(item)
-
+            chunks.reduce([]) do |sections, chunk|
              if tokenizer.can_expand_tokens?(
                   section[:summary],
-                   new_content,
+                   chunk[:summary],
                   completion_model.available_tokens,
                 )
-                section[:summary] += new_content
-                section[:ids] << item[:id]
+                section[:summary] += chunk[:summary]
+                section[:ids] = section[:ids].concat(chunk[:ids])
              else
                sections << section
-                section = { ids: [item[:id]], summary: new_content }
+                section = chunk
              end

              sections
@ -94,10 +113,22 @@ module DiscourseAi
        end

        def concatenate_summaries(llm, summaries, user, &on_partial_blk)
-          prompt = summarization_prompt(summaries.join("\n"), {})
+          prompt = {}
          prompt[:insts] = <<~TEXT
-            You are a bot that can concatenate disjoint summaries, creating a cohesive narrative.
-            Keep the resulting summary in the same language used in the text below.
+            You are a summarization bot that effectively concatenates disjoint summaries, creating a cohesive narrative.
+            The narrative you create is in the form of one or multiple paragraphs.
+            Your reply MUST BE a single concatenated summary using the summaries I'll provide to you. 
+            I'm NOT interested in anything other than the concatenated summary, don't include additional text or comments.
+            You understand and generate Discourse forum Markdown.
+            You format the response, including links, using Markdown.
+          TEXT
+
+          prompt[:input] = <<~TEXT
+            THESE are the summaries, each one separated by a newline, all of them inside <input></input> XML tags:
+
+            <input>
+              #{summaries.join("\n")}
+            </input>
          TEXT

          llm.completion!(prompt, user, &on_partial_blk)
@ -106,7 +137,8 @@ module DiscourseAi
        def summarization_prompt(input, opts)
          insts = <<~TEXT
            You are a summarization bot that effectively summarize any text
-            Your replies contain ONLY a summarized version of the text I provided and you, using the same language.
+            Your reply MUST BE a summarized version of the posts I provided, using the first language you detect.
+            I'm NOT interested in anything other than the summary, don't include additional text or comments.
            You understand and generate Discourse forum Markdown.
            You format the response, including links, using Markdown.
            Your summaries are always a cohesive narrative in the form of one or multiple paragraphs.
@ -122,7 +154,7 @@ module DiscourseAi
          insts += "The discussion title is: #{opts[:content_title]}.\n" if opts[:content_title]

          prompt = { insts: insts, input: <<~TEXT }
-              Here is the a list of posts, inside <input></input> XML tags:
+              Here are the posts, inside <input></input> XML tags:

              <input>
                #{input}
--- a/spec/lib/modules/summarization/strategies/fold_content_spec.rb
+++ b/spec/lib/modules/summarization/strategies/fold_content_spec.rb
@ -44,6 +44,25 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do

        expect(result[:summary]).to eq(concatenated_summary)
      end
+
+      it "keeps splitting into chunks until the content fits into a single call to create a cohesive narrative" do
+        content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
+        max_length_response = "(1 asd said: This is a text "
+        chunk_of_chunks = "I'm smol"
+
+        result =
+          DiscourseAi::Completions::Llm.with_prepared_responses(
+            [
+              max_length_response,
+              max_length_response,
+              chunk_of_chunks,
+              chunk_of_chunks,
+              concatenated_summary,
+            ],
+          ) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(5) } }
+
+        expect(result[:summary]).to eq(concatenated_summary)
+      end
    end
  end
 end