FIX: improve token counting (#145)

Previously we were not counting functions correctly and not accounting for minimum token count per message This corrects both issues and improves documentation internally
2023-08-22 08:36:41 +10:00 · 2023-08-22 08:36:41 +10:00 · 78f61914c8
parent ea5a443588
commit 78f61914c8
2 changed files with 22 additions and 11 deletions
--- a/lib/modules/ai_bot/bot.rb
+++ b/lib/modules/ai_bot/bot.rb
@ -182,13 +182,17 @@ module DiscourseAi
        Discourse.warn_exception(e, message: "ai-bot: Reply failed")
      end

+      def extra_tokens_per_message
+        0
+      end
+
      def bot_prompt_with_topic_context(post, prompt: "topic")
        messages = []
        conversation = conversation_context(post)

        rendered_system_prompt = system_prompt(post)

-        total_prompt_tokens = tokenize(rendered_system_prompt).length
+        total_prompt_tokens = tokenize(rendered_system_prompt).length + extra_tokens_per_message

        messages =
          conversation.reduce([]) do |memo, (raw, username, function)|
@ -196,18 +200,20 @@ module DiscourseAi

            tokens = tokenize(raw.to_s)

-            while !raw.blank? && tokens.length + total_prompt_tokens > prompt_limit
+            while !raw.blank? &&
+                    tokens.length + total_prompt_tokens + extra_tokens_per_message > prompt_limit
              raw = raw[0..-100] || ""
              tokens = tokenize(raw.to_s)
            end

            next(memo) if raw.blank?

-            total_prompt_tokens += tokens.length
+            total_prompt_tokens += tokens.length + extra_tokens_per_message
            memo.unshift(build_message(username, raw, function: !!function))
          end

        messages.unshift(build_message(bot_user.username, rendered_system_prompt, system: true))
+
        messages
      end

--- a/lib/modules/ai_bot/open_ai_bot.rb
+++ b/lib/modules/ai_bot/open_ai_bot.rb
@ -13,16 +13,16 @@ module DiscourseAi
      end

      def prompt_limit
-        # note GPT counts both reply and request tokens in limits...
-        # also allow for an extra 500 or so spare tokens
-        #
-        # 2500 are the max reply tokens
-        # Then we have 450 or so for the full function suite
-        # 100 additional for growth around function calls
+        # note this is about 100 tokens over, OpenAI have a more optimal representation
+        @function_size ||= tokenize(available_functions.to_json).length
+
+        # provide a buffer of 50 tokens in case our counting is off
+        buffer = @function_size + reply_params[:max_tokens] + 50
+
        if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
-          8192 - 3050
+          8192 - buffer
        else
-          16_384 - 3050
+          16_384 - buffer
        end
      end

@ -32,6 +32,11 @@ module DiscourseAi
        { temperature: 0.4, top_p: 0.9, max_tokens: 2500 }
      end

+      def extra_tokens_per_message
+        # open ai defines about 4 tokens per message of overhead
+        4
+      end
+
      def submit_prompt(
        prompt,
        prefer_low_cost: false,