mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-07-24 06:53:41 +00:00
FIX: Add a max token limit based on the text to be translated (#1507)
We're seeing that some LLMs are using 65000+ tokens for raw text that is only 10-1000 characters long. This PR adds a max_token to be passed to the LLM API for each translation based on the length of the text.
This commit is contained in:
parent
8630bc145e
commit
5d80a34589
@ -46,12 +46,24 @@ module DiscourseAi
|
||||
topic: @topic,
|
||||
post: @post,
|
||||
)
|
||||
max_tokens = get_max_tokens(text)
|
||||
llm_args = { max_tokens: }
|
||||
|
||||
result = +""
|
||||
bot.reply(context) { |partial| result << partial }
|
||||
bot.reply(context, llm_args:) { |partial| result << partial }
|
||||
result
|
||||
end
|
||||
|
||||
def get_max_tokens(text)
|
||||
if text.length < 100
|
||||
500
|
||||
elsif text.length < 500
|
||||
1000
|
||||
else
|
||||
text.length * 2
|
||||
end
|
||||
end
|
||||
|
||||
def persona_setting
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
@ -20,6 +20,7 @@ describe DiscourseAi::Translation::BaseTranslator do
|
||||
let(:target_locale) { "de" }
|
||||
let(:llm_response) { "hur dur hur dur!" }
|
||||
fab!(:post)
|
||||
fab!(:topic) { post.topic }
|
||||
|
||||
it "creates the correct prompt" do
|
||||
post_translator =
|
||||
@ -36,6 +37,62 @@ describe DiscourseAi::Translation::BaseTranslator do
|
||||
end
|
||||
end
|
||||
|
||||
it "creates BotContext with the correct parameters and calls bot.reply with correct args" do
|
||||
post_translator =
|
||||
DiscourseAi::Translation::PostRawTranslator.new(text:, target_locale:, post:, topic:)
|
||||
|
||||
expected_content = { content: text, target_locale: target_locale }.to_json
|
||||
|
||||
bot_context = instance_double(DiscourseAi::Personas::BotContext)
|
||||
allow(DiscourseAi::Personas::BotContext).to receive(:new).and_return(bot_context)
|
||||
|
||||
mock_bot = instance_double(DiscourseAi::Personas::Bot)
|
||||
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(mock_bot)
|
||||
allow(mock_bot).to receive(:reply).and_yield(llm_response)
|
||||
|
||||
post_translator.translate
|
||||
|
||||
expect(DiscourseAi::Personas::BotContext).to have_received(:new).with(
|
||||
user: an_instance_of(User),
|
||||
skip_tool_details: true,
|
||||
feature_name: "translation",
|
||||
messages: [{ type: :user, content: expected_content }],
|
||||
topic: topic,
|
||||
post: post,
|
||||
)
|
||||
|
||||
expect(DiscourseAi::Personas::Bot).to have_received(:as)
|
||||
expect(mock_bot).to have_received(:reply).with(bot_context, llm_args: { max_tokens: 500 })
|
||||
end
|
||||
|
||||
it "sets max_tokens correctly based on text length" do
|
||||
test_cases = [
|
||||
["Short text", 500], # Short text (< 100 chars)
|
||||
["a" * 200, 1000], # Medium text (100-500 chars)
|
||||
["a" * 600, 1200], # Long text (> 500 chars, 600*2=1200)
|
||||
]
|
||||
|
||||
test_cases.each do |text, expected_max_tokens|
|
||||
translator = DiscourseAi::Translation::PostRawTranslator.new(text: text, target_locale:)
|
||||
|
||||
bot_context = instance_double(DiscourseAi::Personas::BotContext)
|
||||
allow(DiscourseAi::Personas::BotContext).to receive(:new).and_return(bot_context)
|
||||
|
||||
mock_bot = instance_double(DiscourseAi::Personas::Bot)
|
||||
allow(DiscourseAi::Personas::Bot).to receive(:as).and_return(mock_bot)
|
||||
allow(mock_bot).to receive(:reply).and_yield("translated #{text[0..10]}")
|
||||
|
||||
translator.translate
|
||||
|
||||
expect(mock_bot).to have_received(:reply).with(
|
||||
bot_context,
|
||||
llm_args: {
|
||||
max_tokens: expected_max_tokens,
|
||||
},
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
it "returns the translation from the llm's response" do
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([llm_response]) do
|
||||
expect(
|
||||
|
Loading…
x
Reference in New Issue
Block a user