FIX: Add a max token limit based on the text to be translated (#1507)

We're seeing that some LLMs are using 65000+ tokens for raw text that is only 10-1000 characters long.

This PR adds a max_token to be passed to the LLM API for each translation based on the length of the text.
This commit is contained in:
Natalie Tay 2025-07-17 17:47:15 +08:00 committed by GitHub
parent 8630bc145e
commit 5d80a34589
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 70 additions and 1 deletions

View File

@ -46,12 +46,24 @@ module DiscourseAi
topic: @topic,
post: @post,
)
max_tokens = get_max_tokens(text)
llm_args = { max_tokens: }
result = +""
bot.reply(context) { |partial| result << partial }
bot.reply(context, llm_args:) { |partial| result << partial }
result
end
def get_max_tokens(text)
if text.length < 100
500
elsif text.length < 500
1000
else
text.length * 2
end
end
def persona_setting
raise NotImplementedError
end

View File

@ -20,6 +20,7 @@ describe DiscourseAi::Translation::BaseTranslator do
let(:target_locale) { "de" }
let(:llm_response) { "hur dur hur dur!" }
fab!(:post)
fab!(:topic) { post.topic }
it "creates the correct prompt" do
post_translator =
@ -36,6 +37,62 @@ describe DiscourseAi::Translation::BaseTranslator do
end
end
it "creates BotContext with the correct parameters and calls bot.reply with correct args" do
post_translator =
DiscourseAi::Translation::PostRawTranslator.new(text:, target_locale:, post:, topic:)
expected_content = { content: text, target_locale: target_locale }.to_json
bot_context = instance_double(DiscourseAi::Personas::BotContext)
allow(DiscourseAi::Personas::BotContext).to receive(:new).and_return(bot_context)
mock_bot = instance_double(DiscourseAi::Personas::Bot)
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(mock_bot)
allow(mock_bot).to receive(:reply).and_yield(llm_response)
post_translator.translate
expect(DiscourseAi::Personas::BotContext).to have_received(:new).with(
user: an_instance_of(User),
skip_tool_details: true,
feature_name: "translation",
messages: [{ type: :user, content: expected_content }],
topic: topic,
post: post,
)
expect(DiscourseAi::Personas::Bot).to have_received(:as)
expect(mock_bot).to have_received(:reply).with(bot_context, llm_args: { max_tokens: 500 })
end
it "sets max_tokens correctly based on text length" do
test_cases = [
["Short text", 500], # Short text (< 100 chars)
["a" * 200, 1000], # Medium text (100-500 chars)
["a" * 600, 1200], # Long text (> 500 chars, 600*2=1200)
]
test_cases.each do |text, expected_max_tokens|
translator = DiscourseAi::Translation::PostRawTranslator.new(text: text, target_locale:)
bot_context = instance_double(DiscourseAi::Personas::BotContext)
allow(DiscourseAi::Personas::BotContext).to receive(:new).and_return(bot_context)
mock_bot = instance_double(DiscourseAi::Personas::Bot)
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(mock_bot)
allow(mock_bot).to receive(:reply).and_yield("translated #{text[0..10]}")
translator.translate
expect(mock_bot).to have_received(:reply).with(
bot_context,
llm_args: {
max_tokens: expected_max_tokens,
},
)
end
end
it "returns the translation from the llm's response" do
DiscourseAi::Completions::Llm.with_prepared_responses([llm_response]) do
expect(