mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-03-01 14:59:22 +00:00
FEATURE: full support for Sonnet 3.7 (#1151)
* FEATURE: full support for Sonnet 3.7 - Adds support for Sonnet 3.7 with reasoning on bedrock and anthropic - Fixes regression where provider params were not populated Note. reasoning tokens are hardcoded to minimum of 100 maximum of 65536 * FIX: open ai non reasoning models need to use deprecate max_tokens
This commit is contained in:
parent
84e791a941
commit
fe19133dd4
@ -26,9 +26,13 @@ class LlmModel < ActiveRecord::Base
|
|||||||
access_key_id: :text,
|
access_key_id: :text,
|
||||||
region: :text,
|
region: :text,
|
||||||
disable_native_tools: :checkbox,
|
disable_native_tools: :checkbox,
|
||||||
|
enable_reasoning: :checkbox,
|
||||||
|
reasoning_tokens: :number,
|
||||||
},
|
},
|
||||||
anthropic: {
|
anthropic: {
|
||||||
disable_native_tools: :checkbox,
|
disable_native_tools: :checkbox,
|
||||||
|
enable_reasoning: :checkbox,
|
||||||
|
reasoning_tokens: :number,
|
||||||
},
|
},
|
||||||
open_ai: {
|
open_ai: {
|
||||||
organization: :text,
|
organization: :text,
|
||||||
|
@ -61,7 +61,10 @@ export default class AiLlmEditorForm extends Component {
|
|||||||
provider: model.provider,
|
provider: model.provider,
|
||||||
enabled_chat_bot: model.enabled_chat_bot,
|
enabled_chat_bot: model.enabled_chat_bot,
|
||||||
vision_enabled: model.vision_enabled,
|
vision_enabled: model.vision_enabled,
|
||||||
provider_params: this.computeProviderParams(model.provider),
|
provider_params: this.computeProviderParams(
|
||||||
|
model.provider,
|
||||||
|
model.provider_params
|
||||||
|
),
|
||||||
llm_quotas: model.llm_quotas,
|
llm_quotas: model.llm_quotas,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -128,12 +131,12 @@ export default class AiLlmEditorForm extends Component {
|
|||||||
return !this.args.model.isNew;
|
return !this.args.model.isNew;
|
||||||
}
|
}
|
||||||
|
|
||||||
computeProviderParams(provider) {
|
computeProviderParams(provider, currentParams = {}) {
|
||||||
const params = this.args.llms.resultSetMeta.provider_params[provider] ?? {};
|
const params = this.args.llms.resultSetMeta.provider_params[provider] ?? {};
|
||||||
return Object.fromEntries(
|
return Object.fromEntries(
|
||||||
Object.entries(params).map(([k, v]) => [
|
Object.entries(params).map(([k, v]) => [
|
||||||
k,
|
k,
|
||||||
v?.type === "enum" ? v.default : null,
|
currentParams[k] ?? (v?.type === "enum" ? v.default : null),
|
||||||
])
|
])
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -390,7 +390,7 @@ en:
|
|||||||
|
|
||||||
model_description:
|
model_description:
|
||||||
none: "General settings that work for most language models"
|
none: "General settings that work for most language models"
|
||||||
anthropic-claude-3-5-sonnet: "Anthropic's most intelligent model"
|
anthropic-claude-3-7-sonnet: "Anthropic's most intelligent model"
|
||||||
anthropic-claude-3-5-haiku: "Fast and cost-effective"
|
anthropic-claude-3-5-haiku: "Fast and cost-effective"
|
||||||
anthropic-claude-3-opus: "Excels at writing and complex tasks"
|
anthropic-claude-3-opus: "Excels at writing and complex tasks"
|
||||||
google-gemini-1-5-pro: "Mid-sized multimodal model capable of a wide range of tasks"
|
google-gemini-1-5-pro: "Mid-sized multimodal model capable of a wide range of tasks"
|
||||||
@ -459,6 +459,8 @@ en:
|
|||||||
provider_quantizations: "Order of provider quantizations (comma delimited list eg: fp16,fp8)"
|
provider_quantizations: "Order of provider quantizations (comma delimited list eg: fp16,fp8)"
|
||||||
disable_streaming: "Disable streaming completions (convert streaming to non streaming requests)"
|
disable_streaming: "Disable streaming completions (convert streaming to non streaming requests)"
|
||||||
reasoning_effort: "Reasoning effort (only applicable to reasoning models)"
|
reasoning_effort: "Reasoning effort (only applicable to reasoning models)"
|
||||||
|
enable_reasoning: "Enable reasoning (only applicable to Sonnet 3.7)"
|
||||||
|
reasoning_tokens: "Number of tokens used for reasoning"
|
||||||
|
|
||||||
related_topics:
|
related_topics:
|
||||||
title: "Related topics"
|
title: "Related topics"
|
||||||
|
@ -38,6 +38,15 @@ module DiscourseAi
|
|||||||
|
|
||||||
options = { model: mapped_model, max_tokens: max_tokens }
|
options = { model: mapped_model, max_tokens: max_tokens }
|
||||||
|
|
||||||
|
if llm_model.lookup_custom_param("enable_reasoning")
|
||||||
|
reasoning_tokens =
|
||||||
|
llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536)
|
||||||
|
|
||||||
|
# this allows for lots of tokens beyond reasoning
|
||||||
|
options[:max_tokens] = reasoning_tokens + 30_000
|
||||||
|
options[:thinking] = { type: "enabled", budget_tokens: reasoning_tokens }
|
||||||
|
end
|
||||||
|
|
||||||
options[:stop_sequences] = ["</function_calls>"] if !dialect.native_tool_support? &&
|
options[:stop_sequences] = ["</function_calls>"] if !dialect.native_tool_support? &&
|
||||||
dialect.prompt.has_tools?
|
dialect.prompt.has_tools?
|
||||||
|
|
||||||
|
@ -26,7 +26,18 @@ module DiscourseAi
|
|||||||
max_tokens = 4096
|
max_tokens = 4096
|
||||||
max_tokens = 8192 if bedrock_model_id.match?(/3.5/)
|
max_tokens = 8192 if bedrock_model_id.match?(/3.5/)
|
||||||
|
|
||||||
{ max_tokens: max_tokens, anthropic_version: "bedrock-2023-05-31" }
|
result = { anthropic_version: "bedrock-2023-05-31" }
|
||||||
|
if llm_model.lookup_custom_param("enable_reasoning")
|
||||||
|
reasoning_tokens =
|
||||||
|
llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536)
|
||||||
|
|
||||||
|
# this allows for ample tokens beyond reasoning
|
||||||
|
max_tokens = reasoning_tokens + 30_000
|
||||||
|
result[:thinking] = { type: "enabled", budget_tokens: reasoning_tokens }
|
||||||
|
end
|
||||||
|
result[:max_tokens] = max_tokens
|
||||||
|
|
||||||
|
result
|
||||||
else
|
else
|
||||||
{}
|
{}
|
||||||
end
|
end
|
||||||
@ -66,6 +77,8 @@ module DiscourseAi
|
|||||||
"anthropic.claude-3-5-sonnet-20241022-v2:0"
|
"anthropic.claude-3-5-sonnet-20241022-v2:0"
|
||||||
when "claude-3-5-haiku"
|
when "claude-3-5-haiku"
|
||||||
"anthropic.claude-3-5-haiku-20241022-v1:0"
|
"anthropic.claude-3-5-haiku-20241022-v1:0"
|
||||||
|
when "claude-3-7-sonnet"
|
||||||
|
"anthropic.claude-3-7-sonnet-20250219-v1:0"
|
||||||
else
|
else
|
||||||
llm_model.name
|
llm_model.name
|
||||||
end
|
end
|
||||||
|
@ -11,9 +11,13 @@ module DiscourseAi
|
|||||||
def normalize_model_params(model_params)
|
def normalize_model_params(model_params)
|
||||||
model_params = model_params.dup
|
model_params = model_params.dup
|
||||||
|
|
||||||
# max_tokens is deprecated and is not functional on reasoning models
|
# max_tokens is deprecated however we still need to support it
|
||||||
max_tokens = model_params.delete(:max_tokens)
|
# on older OpenAI models and older Azure models, so we will only normalize
|
||||||
model_params[:max_completion_tokens] = max_tokens if max_tokens
|
# if our model name starts with o (to denote all the reasoning models)
|
||||||
|
if llm_model.name.starts_with?("o")
|
||||||
|
max_tokens = model_params.delete(:max_tokens)
|
||||||
|
model_params[:max_completion_tokens] = max_tokens if max_tokens
|
||||||
|
end
|
||||||
|
|
||||||
# temperature is already supported
|
# temperature is already supported
|
||||||
if model_params[:stop_sequences]
|
if model_params[:stop_sequences]
|
||||||
|
@ -27,9 +27,9 @@ module DiscourseAi
|
|||||||
id: "anthropic",
|
id: "anthropic",
|
||||||
models: [
|
models: [
|
||||||
{
|
{
|
||||||
name: "claude-3-5-sonnet",
|
name: "claude-3-7-sonnet",
|
||||||
tokens: 200_000,
|
tokens: 200_000,
|
||||||
display_name: "Claude 3.5 Sonnet",
|
display_name: "Claude 3.7 Sonnet",
|
||||||
},
|
},
|
||||||
{ name: "claude-3-5-haiku", tokens: 200_000, display_name: "Claude 3.5 Haiku" },
|
{ name: "claude-3-5-haiku", tokens: 200_000, display_name: "Claude 3.5 Haiku" },
|
||||||
{ name: "claude-3-opus", tokens: 200_000, display_name: "Claude 3 Opus" },
|
{ name: "claude-3-opus", tokens: 200_000, display_name: "Claude 3 Opus" },
|
||||||
|
@ -334,6 +334,68 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
|
|||||||
expect(requested_body).to eq(request_body)
|
expect(requested_body).to eq(request_body)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "can support reasoning" do
|
||||||
|
body = <<~STRING
|
||||||
|
{
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"text": "Hello!",
|
||||||
|
"type": "text"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
|
||||||
|
"model": "claude-3-opus-20240229",
|
||||||
|
"role": "assistant",
|
||||||
|
"stop_reason": "end_turn",
|
||||||
|
"stop_sequence": null,
|
||||||
|
"type": "message",
|
||||||
|
"usage": {
|
||||||
|
"input_tokens": 10,
|
||||||
|
"output_tokens": 25
|
||||||
|
}
|
||||||
|
}
|
||||||
|
STRING
|
||||||
|
|
||||||
|
parsed_body = nil
|
||||||
|
stub_request(:post, url).with(
|
||||||
|
body:
|
||||||
|
proc do |req_body|
|
||||||
|
parsed_body = JSON.parse(req_body, symbolize_names: true)
|
||||||
|
true
|
||||||
|
end,
|
||||||
|
headers: {
|
||||||
|
"Content-Type" => "application/json",
|
||||||
|
"X-Api-Key" => "123",
|
||||||
|
"Anthropic-Version" => "2023-06-01",
|
||||||
|
},
|
||||||
|
).to_return(status: 200, body: body)
|
||||||
|
|
||||||
|
model.provider_params["enable_reasoning"] = true
|
||||||
|
model.provider_params["reasoning_tokens"] = 10_000
|
||||||
|
model.save!
|
||||||
|
|
||||||
|
proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
|
||||||
|
result = proxy.generate(prompt, user: Discourse.system_user)
|
||||||
|
expect(result).to eq("Hello!")
|
||||||
|
|
||||||
|
expected_body = {
|
||||||
|
model: "claude-3-opus-20240229",
|
||||||
|
max_tokens: 40_000,
|
||||||
|
thinking: {
|
||||||
|
type: "enabled",
|
||||||
|
budget_tokens: 10_000,
|
||||||
|
},
|
||||||
|
messages: [{ role: "user", content: "user1: hello" }],
|
||||||
|
system: "You are hello bot",
|
||||||
|
}
|
||||||
|
expect(parsed_body).to eq(expected_body)
|
||||||
|
|
||||||
|
log = AiApiAuditLog.order(:id).last
|
||||||
|
expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
|
||||||
|
expect(log.request_tokens).to eq(10)
|
||||||
|
expect(log.response_tokens).to eq(25)
|
||||||
|
end
|
||||||
|
|
||||||
it "can operate in regular mode" do
|
it "can operate in regular mode" do
|
||||||
body = <<~STRING
|
body = <<~STRING
|
||||||
{
|
{
|
||||||
|
@ -335,6 +335,57 @@ RSpec.describe DiscourseAi::Completions::Endpoints::AwsBedrock do
|
|||||||
expect(log.response_tokens).to eq(20)
|
expect(log.response_tokens).to eq(20)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "supports thinking" do
|
||||||
|
model.provider_params["enable_reasoning"] = true
|
||||||
|
model.provider_params["reasoning_tokens"] = 10_000
|
||||||
|
model.save!
|
||||||
|
|
||||||
|
proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
|
||||||
|
|
||||||
|
request = nil
|
||||||
|
|
||||||
|
content = {
|
||||||
|
content: [text: "hello sam"],
|
||||||
|
usage: {
|
||||||
|
input_tokens: 10,
|
||||||
|
output_tokens: 20,
|
||||||
|
},
|
||||||
|
}.to_json
|
||||||
|
|
||||||
|
stub_request(
|
||||||
|
:post,
|
||||||
|
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
|
||||||
|
)
|
||||||
|
.with do |inner_request|
|
||||||
|
request = inner_request
|
||||||
|
true
|
||||||
|
end
|
||||||
|
.to_return(status: 200, body: content)
|
||||||
|
|
||||||
|
response = proxy.generate("hello world", user: user)
|
||||||
|
|
||||||
|
expect(request.headers["Authorization"]).to be_present
|
||||||
|
expect(request.headers["X-Amz-Content-Sha256"]).to be_present
|
||||||
|
|
||||||
|
expected = {
|
||||||
|
"max_tokens" => 40_000,
|
||||||
|
"thinking" => {
|
||||||
|
"type" => "enabled",
|
||||||
|
"budget_tokens" => 10_000,
|
||||||
|
},
|
||||||
|
"anthropic_version" => "bedrock-2023-05-31",
|
||||||
|
"messages" => [{ "role" => "user", "content" => "hello world" }],
|
||||||
|
"system" => "You are a helpful bot",
|
||||||
|
}
|
||||||
|
expect(JSON.parse(request.body)).to eq(expected)
|
||||||
|
|
||||||
|
expect(response).to eq("hello sam")
|
||||||
|
|
||||||
|
log = AiApiAuditLog.order(:id).last
|
||||||
|
expect(log.request_tokens).to eq(10)
|
||||||
|
expect(log.response_tokens).to eq(20)
|
||||||
|
end
|
||||||
|
|
||||||
it "supports claude 3 streaming" do
|
it "supports claude 3 streaming" do
|
||||||
proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
|
proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
|
||||||
|
|
||||||
|
@ -285,6 +285,23 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
describe "max tokens remapping" do
|
||||||
|
it "remaps max_tokens to max_completion_tokens for reasoning models" do
|
||||||
|
model.update!(name: "o3-mini")
|
||||||
|
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
|
||||||
|
|
||||||
|
body_parsed = nil
|
||||||
|
stub_request(:post, "https://api.openai.com/v1/chat/completions").with(
|
||||||
|
body: ->(body) { body_parsed = JSON.parse(body) },
|
||||||
|
).to_return(status: 200, body: { choices: [{ message: { content: "hello" } }] }.to_json)
|
||||||
|
|
||||||
|
llm.generate("test", user: user, max_tokens: 1000)
|
||||||
|
|
||||||
|
expect(body_parsed["max_completion_tokens"]).to eq(1000)
|
||||||
|
expect(body_parsed["max_tokens"]).to be_nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
describe "forced tool use" do
|
describe "forced tool use" do
|
||||||
it "can properly force tool use" do
|
it "can properly force tool use" do
|
||||||
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
|
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
|
||||||
@ -346,9 +363,11 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
|
|||||||
body: proc { |body| body_json = JSON.parse(body, symbolize_names: true) },
|
body: proc { |body| body_json = JSON.parse(body, symbolize_names: true) },
|
||||||
).to_return(body: response)
|
).to_return(body: response)
|
||||||
|
|
||||||
result = llm.generate(prompt, user: user)
|
result = llm.generate(prompt, user: user, max_tokens: 1000)
|
||||||
|
|
||||||
expect(body_json[:tool_choice]).to eq({ type: "function", function: { name: "echo" } })
|
expect(body_json[:tool_choice]).to eq({ type: "function", function: { name: "echo" } })
|
||||||
|
# we expect this not to be remapped on older non reasoning models
|
||||||
|
expect(body_json[:max_tokens]).to eq(1000)
|
||||||
|
|
||||||
log = AiApiAuditLog.order(:id).last
|
log = AiApiAuditLog.order(:id).last
|
||||||
expect(log.request_tokens).to eq(55)
|
expect(log.request_tokens).to eq(55)
|
||||||
|
@ -73,13 +73,15 @@ RSpec.describe "Managing LLM configurations", type: :system, js: true do
|
|||||||
|
|
||||||
context "when changing the provider" do
|
context "when changing the provider" do
|
||||||
it "has the correct provider params when visiting the edit page" do
|
it "has the correct provider params when visiting the edit page" do
|
||||||
llm = Fabricate(:llm_model, provider: "open_ai", provider_params: {})
|
llm =
|
||||||
|
Fabricate(:llm_model, provider: "anthropic", provider_params: { enable_reasoning: true })
|
||||||
visit "/admin/plugins/discourse-ai/ai-llms/#{llm.id}/edit"
|
visit "/admin/plugins/discourse-ai/ai-llms/#{llm.id}/edit"
|
||||||
|
|
||||||
expect(form).to have_field_with_name("provider_params.organization")
|
|
||||||
expect(form).to have_field_with_name("provider_params.disable_native_tools")
|
expect(form).to have_field_with_name("provider_params.disable_native_tools")
|
||||||
expect(form).to have_field_with_name("provider_params.disable_streaming")
|
expect(form).to have_field_with_name("provider_params.reasoning_tokens")
|
||||||
expect(form).to have_field_with_name("provider_params.reasoning_effort")
|
|
||||||
|
reasoning = form.field("provider_params.enable_reasoning")
|
||||||
|
expect(reasoning).to be_checked
|
||||||
end
|
end
|
||||||
it "correctly changes the provider params" do
|
it "correctly changes the provider params" do
|
||||||
visit "/admin/plugins/discourse-ai/ai-llms"
|
visit "/admin/plugins/discourse-ai/ai-llms"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user