FEATURE: full support for Sonnet 3.7 (#1151)

* FEATURE: full support for Sonnet 3.7

- Adds support for Sonnet 3.7 with reasoning on bedrock and anthropic
- Fixes regression where provider params were not populated

Note. reasoning tokens are hardcoded to minimum of 100 maximum of 65536

* FIX: open ai non reasoning models need to use deprecate max_tokens
This commit is contained in:
Sam 2025-02-25 17:32:12 +11:00 committed by GitHub
parent 84e791a941
commit fe19133dd4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 184 additions and 15 deletions

View File

@ -26,9 +26,13 @@ class LlmModel < ActiveRecord::Base
access_key_id: :text, access_key_id: :text,
region: :text, region: :text,
disable_native_tools: :checkbox, disable_native_tools: :checkbox,
enable_reasoning: :checkbox,
reasoning_tokens: :number,
}, },
anthropic: { anthropic: {
disable_native_tools: :checkbox, disable_native_tools: :checkbox,
enable_reasoning: :checkbox,
reasoning_tokens: :number,
}, },
open_ai: { open_ai: {
organization: :text, organization: :text,

View File

@ -61,7 +61,10 @@ export default class AiLlmEditorForm extends Component {
provider: model.provider, provider: model.provider,
enabled_chat_bot: model.enabled_chat_bot, enabled_chat_bot: model.enabled_chat_bot,
vision_enabled: model.vision_enabled, vision_enabled: model.vision_enabled,
provider_params: this.computeProviderParams(model.provider), provider_params: this.computeProviderParams(
model.provider,
model.provider_params
),
llm_quotas: model.llm_quotas, llm_quotas: model.llm_quotas,
}; };
} }
@ -128,12 +131,12 @@ export default class AiLlmEditorForm extends Component {
return !this.args.model.isNew; return !this.args.model.isNew;
} }
computeProviderParams(provider) { computeProviderParams(provider, currentParams = {}) {
const params = this.args.llms.resultSetMeta.provider_params[provider] ?? {}; const params = this.args.llms.resultSetMeta.provider_params[provider] ?? {};
return Object.fromEntries( return Object.fromEntries(
Object.entries(params).map(([k, v]) => [ Object.entries(params).map(([k, v]) => [
k, k,
v?.type === "enum" ? v.default : null, currentParams[k] ?? (v?.type === "enum" ? v.default : null),
]) ])
); );
} }

View File

@ -390,7 +390,7 @@ en:
model_description: model_description:
none: "General settings that work for most language models" none: "General settings that work for most language models"
anthropic-claude-3-5-sonnet: "Anthropic's most intelligent model" anthropic-claude-3-7-sonnet: "Anthropic's most intelligent model"
anthropic-claude-3-5-haiku: "Fast and cost-effective" anthropic-claude-3-5-haiku: "Fast and cost-effective"
anthropic-claude-3-opus: "Excels at writing and complex tasks" anthropic-claude-3-opus: "Excels at writing and complex tasks"
google-gemini-1-5-pro: "Mid-sized multimodal model capable of a wide range of tasks" google-gemini-1-5-pro: "Mid-sized multimodal model capable of a wide range of tasks"
@ -459,6 +459,8 @@ en:
provider_quantizations: "Order of provider quantizations (comma delimited list eg: fp16,fp8)" provider_quantizations: "Order of provider quantizations (comma delimited list eg: fp16,fp8)"
disable_streaming: "Disable streaming completions (convert streaming to non streaming requests)" disable_streaming: "Disable streaming completions (convert streaming to non streaming requests)"
reasoning_effort: "Reasoning effort (only applicable to reasoning models)" reasoning_effort: "Reasoning effort (only applicable to reasoning models)"
enable_reasoning: "Enable reasoning (only applicable to Sonnet 3.7)"
reasoning_tokens: "Number of tokens used for reasoning"
related_topics: related_topics:
title: "Related topics" title: "Related topics"

View File

@ -38,6 +38,15 @@ module DiscourseAi
options = { model: mapped_model, max_tokens: max_tokens } options = { model: mapped_model, max_tokens: max_tokens }
if llm_model.lookup_custom_param("enable_reasoning")
reasoning_tokens =
llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536)
# this allows for lots of tokens beyond reasoning
options[:max_tokens] = reasoning_tokens + 30_000
options[:thinking] = { type: "enabled", budget_tokens: reasoning_tokens }
end
options[:stop_sequences] = ["</function_calls>"] if !dialect.native_tool_support? && options[:stop_sequences] = ["</function_calls>"] if !dialect.native_tool_support? &&
dialect.prompt.has_tools? dialect.prompt.has_tools?

View File

@ -26,7 +26,18 @@ module DiscourseAi
max_tokens = 4096 max_tokens = 4096
max_tokens = 8192 if bedrock_model_id.match?(/3.5/) max_tokens = 8192 if bedrock_model_id.match?(/3.5/)
{ max_tokens: max_tokens, anthropic_version: "bedrock-2023-05-31" } result = { anthropic_version: "bedrock-2023-05-31" }
if llm_model.lookup_custom_param("enable_reasoning")
reasoning_tokens =
llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536)
# this allows for ample tokens beyond reasoning
max_tokens = reasoning_tokens + 30_000
result[:thinking] = { type: "enabled", budget_tokens: reasoning_tokens }
end
result[:max_tokens] = max_tokens
result
else else
{} {}
end end
@ -66,6 +77,8 @@ module DiscourseAi
"anthropic.claude-3-5-sonnet-20241022-v2:0" "anthropic.claude-3-5-sonnet-20241022-v2:0"
when "claude-3-5-haiku" when "claude-3-5-haiku"
"anthropic.claude-3-5-haiku-20241022-v1:0" "anthropic.claude-3-5-haiku-20241022-v1:0"
when "claude-3-7-sonnet"
"anthropic.claude-3-7-sonnet-20250219-v1:0"
else else
llm_model.name llm_model.name
end end

View File

@ -11,9 +11,13 @@ module DiscourseAi
def normalize_model_params(model_params) def normalize_model_params(model_params)
model_params = model_params.dup model_params = model_params.dup
# max_tokens is deprecated and is not functional on reasoning models # max_tokens is deprecated however we still need to support it
max_tokens = model_params.delete(:max_tokens) # on older OpenAI models and older Azure models, so we will only normalize
model_params[:max_completion_tokens] = max_tokens if max_tokens # if our model name starts with o (to denote all the reasoning models)
if llm_model.name.starts_with?("o")
max_tokens = model_params.delete(:max_tokens)
model_params[:max_completion_tokens] = max_tokens if max_tokens
end
# temperature is already supported # temperature is already supported
if model_params[:stop_sequences] if model_params[:stop_sequences]

View File

@ -27,9 +27,9 @@ module DiscourseAi
id: "anthropic", id: "anthropic",
models: [ models: [
{ {
name: "claude-3-5-sonnet", name: "claude-3-7-sonnet",
tokens: 200_000, tokens: 200_000,
display_name: "Claude 3.5 Sonnet", display_name: "Claude 3.7 Sonnet",
}, },
{ name: "claude-3-5-haiku", tokens: 200_000, display_name: "Claude 3.5 Haiku" }, { name: "claude-3-5-haiku", tokens: 200_000, display_name: "Claude 3.5 Haiku" },
{ name: "claude-3-opus", tokens: 200_000, display_name: "Claude 3 Opus" }, { name: "claude-3-opus", tokens: 200_000, display_name: "Claude 3 Opus" },

View File

@ -334,6 +334,68 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
expect(requested_body).to eq(request_body) expect(requested_body).to eq(request_body)
end end
it "can support reasoning" do
body = <<~STRING
{
"content": [
{
"text": "Hello!",
"type": "text"
}
],
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
"model": "claude-3-opus-20240229",
"role": "assistant",
"stop_reason": "end_turn",
"stop_sequence": null,
"type": "message",
"usage": {
"input_tokens": 10,
"output_tokens": 25
}
}
STRING
parsed_body = nil
stub_request(:post, url).with(
body:
proc do |req_body|
parsed_body = JSON.parse(req_body, symbolize_names: true)
true
end,
headers: {
"Content-Type" => "application/json",
"X-Api-Key" => "123",
"Anthropic-Version" => "2023-06-01",
},
).to_return(status: 200, body: body)
model.provider_params["enable_reasoning"] = true
model.provider_params["reasoning_tokens"] = 10_000
model.save!
proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
result = proxy.generate(prompt, user: Discourse.system_user)
expect(result).to eq("Hello!")
expected_body = {
model: "claude-3-opus-20240229",
max_tokens: 40_000,
thinking: {
type: "enabled",
budget_tokens: 10_000,
},
messages: [{ role: "user", content: "user1: hello" }],
system: "You are hello bot",
}
expect(parsed_body).to eq(expected_body)
log = AiApiAuditLog.order(:id).last
expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
expect(log.request_tokens).to eq(10)
expect(log.response_tokens).to eq(25)
end
it "can operate in regular mode" do it "can operate in regular mode" do
body = <<~STRING body = <<~STRING
{ {

View File

@ -335,6 +335,57 @@ RSpec.describe DiscourseAi::Completions::Endpoints::AwsBedrock do
expect(log.response_tokens).to eq(20) expect(log.response_tokens).to eq(20)
end end
it "supports thinking" do
model.provider_params["enable_reasoning"] = true
model.provider_params["reasoning_tokens"] = 10_000
model.save!
proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
request = nil
content = {
content: [text: "hello sam"],
usage: {
input_tokens: 10,
output_tokens: 20,
},
}.to_json
stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: content)
response = proxy.generate("hello world", user: user)
expect(request.headers["Authorization"]).to be_present
expect(request.headers["X-Amz-Content-Sha256"]).to be_present
expected = {
"max_tokens" => 40_000,
"thinking" => {
"type" => "enabled",
"budget_tokens" => 10_000,
},
"anthropic_version" => "bedrock-2023-05-31",
"messages" => [{ "role" => "user", "content" => "hello world" }],
"system" => "You are a helpful bot",
}
expect(JSON.parse(request.body)).to eq(expected)
expect(response).to eq("hello sam")
log = AiApiAuditLog.order(:id).last
expect(log.request_tokens).to eq(10)
expect(log.response_tokens).to eq(20)
end
it "supports claude 3 streaming" do it "supports claude 3 streaming" do
proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")

View File

@ -285,6 +285,23 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
end end
end end
describe "max tokens remapping" do
it "remaps max_tokens to max_completion_tokens for reasoning models" do
model.update!(name: "o3-mini")
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
body_parsed = nil
stub_request(:post, "https://api.openai.com/v1/chat/completions").with(
body: ->(body) { body_parsed = JSON.parse(body) },
).to_return(status: 200, body: { choices: [{ message: { content: "hello" } }] }.to_json)
llm.generate("test", user: user, max_tokens: 1000)
expect(body_parsed["max_completion_tokens"]).to eq(1000)
expect(body_parsed["max_tokens"]).to be_nil
end
end
describe "forced tool use" do describe "forced tool use" do
it "can properly force tool use" do it "can properly force tool use" do
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
@ -346,9 +363,11 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
body: proc { |body| body_json = JSON.parse(body, symbolize_names: true) }, body: proc { |body| body_json = JSON.parse(body, symbolize_names: true) },
).to_return(body: response) ).to_return(body: response)
result = llm.generate(prompt, user: user) result = llm.generate(prompt, user: user, max_tokens: 1000)
expect(body_json[:tool_choice]).to eq({ type: "function", function: { name: "echo" } }) expect(body_json[:tool_choice]).to eq({ type: "function", function: { name: "echo" } })
# we expect this not to be remapped on older non reasoning models
expect(body_json[:max_tokens]).to eq(1000)
log = AiApiAuditLog.order(:id).last log = AiApiAuditLog.order(:id).last
expect(log.request_tokens).to eq(55) expect(log.request_tokens).to eq(55)

View File

@ -73,13 +73,15 @@ RSpec.describe "Managing LLM configurations", type: :system, js: true do
context "when changing the provider" do context "when changing the provider" do
it "has the correct provider params when visiting the edit page" do it "has the correct provider params when visiting the edit page" do
llm = Fabricate(:llm_model, provider: "open_ai", provider_params: {}) llm =
Fabricate(:llm_model, provider: "anthropic", provider_params: { enable_reasoning: true })
visit "/admin/plugins/discourse-ai/ai-llms/#{llm.id}/edit" visit "/admin/plugins/discourse-ai/ai-llms/#{llm.id}/edit"
expect(form).to have_field_with_name("provider_params.organization")
expect(form).to have_field_with_name("provider_params.disable_native_tools") expect(form).to have_field_with_name("provider_params.disable_native_tools")
expect(form).to have_field_with_name("provider_params.disable_streaming") expect(form).to have_field_with_name("provider_params.reasoning_tokens")
expect(form).to have_field_with_name("provider_params.reasoning_effort")
reasoning = form.field("provider_params.enable_reasoning")
expect(reasoning).to be_checked
end end
it "correctly changes the provider params" do it "correctly changes the provider params" do
visit "/admin/plugins/discourse-ai/ai-llms" visit "/admin/plugins/discourse-ai/ai-llms"