mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-07-16 02:53:29 +00:00
FEATURE: support configurable thinking tokens for Gemini (#1322)
This commit is contained in:
parent
851ca57866
commit
2a62658248
@ -63,6 +63,8 @@ class LlmModel < ActiveRecord::Base
|
||||
},
|
||||
google: {
|
||||
disable_native_tools: :checkbox,
|
||||
enable_thinking: :checkbox,
|
||||
thinking_tokens: :number,
|
||||
},
|
||||
azure: {
|
||||
disable_native_tools: :checkbox,
|
||||
|
@ -533,6 +533,8 @@ en:
|
||||
disable_streaming: "Disable streaming completions (convert streaming to non streaming requests)"
|
||||
reasoning_effort: "Reasoning effort (only applicable to reasoning models)"
|
||||
enable_reasoning: "Enable reasoning (only applicable to Sonnet 3.7)"
|
||||
enable_thinking: "Enable thinking (only on applicable models eg: flash 2.5)"
|
||||
thinking_tokens: "Number of tokens used for thinking"
|
||||
reasoning_tokens: "Number of tokens used for reasoning"
|
||||
disable_temperature: "Disable temperature (some thinking models don't support temperature)"
|
||||
disable_top_p: "Disable top P (some thinking models don't support top P)"
|
||||
|
@ -94,6 +94,12 @@ module DiscourseAi
|
||||
end
|
||||
end
|
||||
|
||||
if llm_model.lookup_custom_param("enable_thinking")
|
||||
thinking_tokens = llm_model.lookup_custom_param("thinking_tokens").to_i
|
||||
thinking_tokens = thinking_tokens.clamp(0, 24_576)
|
||||
payload[:generationConfig][:thinkingConfig] = { thinkingBudget: thinking_tokens }
|
||||
end
|
||||
|
||||
payload
|
||||
end
|
||||
|
||||
|
@ -153,6 +153,84 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
|
||||
}
|
||||
end
|
||||
|
||||
it "correctly configures thinking when enabled" do
|
||||
model.update!(provider_params: { enable_thinking: "true", thinking_tokens: "10000" })
|
||||
|
||||
response = gemini_mock.response("Using thinking mode").to_json
|
||||
|
||||
req_body = nil
|
||||
|
||||
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
|
||||
url = "#{model.url}:generateContent?key=123"
|
||||
|
||||
stub_request(:post, url).with(
|
||||
body:
|
||||
proc do |_req_body|
|
||||
req_body = _req_body
|
||||
true
|
||||
end,
|
||||
).to_return(status: 200, body: response)
|
||||
|
||||
response = llm.generate("Hello", user: user)
|
||||
|
||||
parsed = JSON.parse(req_body, symbolize_names: true)
|
||||
|
||||
# Verify thinking config is properly set with the token limit
|
||||
expect(parsed.dig(:generationConfig, :thinkingConfig)).to eq({ thinkingBudget: 10_000 })
|
||||
end
|
||||
|
||||
it "clamps thinking tokens within allowed limits" do
|
||||
model.update!(provider_params: { enable_thinking: "true", thinking_tokens: "30000" })
|
||||
|
||||
response = gemini_mock.response("Thinking tokens clamped").to_json
|
||||
|
||||
req_body = nil
|
||||
|
||||
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
|
||||
url = "#{model.url}:generateContent?key=123"
|
||||
|
||||
stub_request(:post, url).with(
|
||||
body:
|
||||
proc do |_req_body|
|
||||
req_body = _req_body
|
||||
true
|
||||
end,
|
||||
).to_return(status: 200, body: response)
|
||||
|
||||
response = llm.generate("Hello", user: user)
|
||||
|
||||
parsed = JSON.parse(req_body, symbolize_names: true)
|
||||
|
||||
# Verify thinking tokens are clamped to 24_576
|
||||
expect(parsed.dig(:generationConfig, :thinkingConfig)).to eq({ thinkingBudget: 24_576 })
|
||||
end
|
||||
|
||||
it "does not add thinking config when disabled" do
|
||||
model.update!(provider_params: { enable_thinking: false, thinking_tokens: "10000" })
|
||||
|
||||
response = gemini_mock.response("No thinking mode").to_json
|
||||
|
||||
req_body = nil
|
||||
|
||||
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
|
||||
url = "#{model.url}:generateContent?key=123"
|
||||
|
||||
stub_request(:post, url).with(
|
||||
body:
|
||||
proc do |_req_body|
|
||||
req_body = _req_body
|
||||
true
|
||||
end,
|
||||
).to_return(status: 200, body: response)
|
||||
|
||||
response = llm.generate("Hello", user: user)
|
||||
|
||||
parsed = JSON.parse(req_body, symbolize_names: true)
|
||||
|
||||
# Verify thinking config is not present
|
||||
expect(parsed.dig(:generationConfig, :thinkingConfig)).to be_nil
|
||||
end
|
||||
|
||||
# by default gemini is meant to use AUTO mode, however new experimental models
|
||||
# appear to require this to be explicitly set
|
||||
it "Explicitly specifies tool config" do
|
||||
|
Loading…
x
Reference in New Issue
Block a user