FIX: Add a max token limit based on the text to be translated (#1507)

We're seeing that some LLMs are using 65000+ tokens for raw text that is only 10-1000 characters long. This PR adds a max_token to be passed to the LLM API for each translation based on the length of the text.
2025-07-24 06:53:41 +00:00 · 2025-07-17 17:47:15 +08:00 · 2025-07-17 17:47:15 +08:00 · 5d80a34589
commit 5d80a34589
parent 8630bc145e
2 changed files with 70 additions and 1 deletions
--- a/lib/translation/base_translator.rb
+++ b/lib/translation/base_translator.rb
@ -46,12 +46,24 @@ module DiscourseAi
            topic: @topic,
            post: @post,
          )
+        max_tokens = get_max_tokens(text)
+        llm_args = { max_tokens: }

        result = +""
-        bot.reply(context) { |partial| result << partial }
+        bot.reply(context, llm_args:) { |partial| result << partial }
        result
      end

+      def get_max_tokens(text)
+        if text.length < 100
+          500
+        elsif text.length < 500
+          1000
+        else
+          text.length * 2
+        end
+      end
+
      def persona_setting
        raise NotImplementedError
      end
--- a/spec/lib/translation/base_translator_spec.rb
+++ b/spec/lib/translation/base_translator_spec.rb
@ -20,6 +20,7 @@ describe DiscourseAi::Translation::BaseTranslator do
    let(:target_locale) { "de" }
    let(:llm_response) { "hur dur hur dur!" }
    fab!(:post)
+    fab!(:topic) { post.topic }

    it "creates the correct prompt" do
      post_translator =
@ -36,6 +37,62 @@ describe DiscourseAi::Translation::BaseTranslator do
      end
    end

+    it "creates BotContext with the correct parameters and calls bot.reply with correct args" do
+      post_translator =
+        DiscourseAi::Translation::PostRawTranslator.new(text:, target_locale:, post:, topic:)
+
+      expected_content = { content: text, target_locale: target_locale }.to_json
+
+      bot_context = instance_double(DiscourseAi::Personas::BotContext)
+      allow(DiscourseAi::Personas::BotContext).to receive(:new).and_return(bot_context)
+
+      mock_bot = instance_double(DiscourseAi::Personas::Bot)
+      allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(mock_bot)
+      allow(mock_bot).to receive(:reply).and_yield(llm_response)
+
+      post_translator.translate
+
+      expect(DiscourseAi::Personas::BotContext).to have_received(:new).with(
+        user: an_instance_of(User),
+        skip_tool_details: true,
+        feature_name: "translation",
+        messages: [{ type: :user, content: expected_content }],
+        topic: topic,
+        post: post,
+      )
+
+      expect(DiscourseAi::Personas::Bot).to have_received(:as)
+      expect(mock_bot).to have_received(:reply).with(bot_context, llm_args: { max_tokens: 500 })
+    end
+
+    it "sets max_tokens correctly based on text length" do
+      test_cases = [
+        ["Short text", 500], # Short text (< 100 chars)
+        ["a" * 200, 1000], # Medium text (100-500 chars)
+        ["a" * 600, 1200], # Long text (> 500 chars, 600*2=1200)
+      ]
+
+      test_cases.each do |text, expected_max_tokens|
+        translator = DiscourseAi::Translation::PostRawTranslator.new(text: text, target_locale:)
+
+        bot_context = instance_double(DiscourseAi::Personas::BotContext)
+        allow(DiscourseAi::Personas::BotContext).to receive(:new).and_return(bot_context)
+
+        mock_bot = instance_double(DiscourseAi::Personas::Bot)
+        allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(mock_bot)
+        allow(mock_bot).to receive(:reply).and_yield("translated #{text[0..10]}")
+
+        translator.translate
+
+        expect(mock_bot).to have_received(:reply).with(
+          bot_context,
+          llm_args: {
+            max_tokens: expected_max_tokens,
+          },
+        )
+      end
+    end
+
    it "returns the translation from the llm's response" do
      DiscourseAi::Completions::Llm.with_prepared_responses([llm_response]) do
        expect(