FEATURE: full support for Sonnet 3.7 (#1151)

* FEATURE: full support for Sonnet 3.7 - Adds support for Sonnet 3.7 with reasoning on bedrock and anthropic - Fixes regression where provider params were not populated Note. reasoning tokens are hardcoded to minimum of 100 maximum of 65536 * FIX: open ai non reasoning models need to use deprecate max_tokens
2025-03-01 14:59:22 +00:00 · 2025-02-25 17:32:12 +11:00 · 2025-02-25 17:32:12 +11:00 · fe19133dd4
commit fe19133dd4
parent 84e791a941
11 changed files with 184 additions and 15 deletions
--- a/app/models/llm_model.rb
+++ b/app/models/llm_model.rb
@ -26,9 +26,13 @@ class LlmModel < ActiveRecord::Base
        access_key_id: :text,
        region: :text,
        disable_native_tools: :checkbox,
        enable_reasoning: :checkbox,
        reasoning_tokens: :number,
      },
      anthropic: {
        disable_native_tools: :checkbox,
        enable_reasoning: :checkbox,
        reasoning_tokens: :number,
      },
      open_ai: {
        organization: :text,
--- a/assets/javascripts/discourse/components/ai-llm-editor-form.gjs
+++ b/assets/javascripts/discourse/components/ai-llm-editor-form.gjs
@ -61,7 +61,10 @@ export default class AiLlmEditorForm extends Component {
      provider: model.provider,
      enabled_chat_bot: model.enabled_chat_bot,
      vision_enabled: model.vision_enabled,
-      provider_params: this.computeProviderParams(model.provider),
+      provider_params: this.computeProviderParams(
        model.provider,
        model.provider_params
      ),
      llm_quotas: model.llm_quotas,
    };
  }
@ -128,12 +131,12 @@ export default class AiLlmEditorForm extends Component {
    return !this.args.model.isNew;
  }
-  computeProviderParams(provider) {
+  computeProviderParams(provider, currentParams = {}) {
    const params = this.args.llms.resultSetMeta.provider_params[provider] ?? {};
    return Object.fromEntries(
      Object.entries(params).map(([k, v]) => [
        k,
-        v?.type === "enum" ? v.default : null,
+        currentParams[k] ?? (v?.type === "enum" ? v.default : null),
      ])
    );
  }
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -390,7 +390,7 @@ en:
        model_description:
          none: "General settings that work for most language models"
-          anthropic-claude-3-5-sonnet: "Anthropic's most intelligent model"
+          anthropic-claude-3-7-sonnet: "Anthropic's most intelligent model"
          anthropic-claude-3-5-haiku: "Fast and cost-effective"
          anthropic-claude-3-opus: "Excels at writing and complex tasks"
          google-gemini-1-5-pro: "Mid-sized multimodal model capable of a wide range of tasks"
@ -459,6 +459,8 @@ en:
          provider_quantizations: "Order of provider quantizations (comma delimited list eg: fp16,fp8)"
          disable_streaming: "Disable streaming completions (convert streaming to non streaming requests)"
          reasoning_effort: "Reasoning effort (only applicable to reasoning models)"
          enable_reasoning: "Enable reasoning (only applicable to Sonnet 3.7)"
          reasoning_tokens: "Number of tokens used for reasoning"
      related_topics:
        title: "Related topics"
--- a/lib/completions/endpoints/anthropic.rb
+++ b/lib/completions/endpoints/anthropic.rb
@ -38,6 +38,15 @@ module DiscourseAi
          options = { model: mapped_model, max_tokens: max_tokens }
          if llm_model.lookup_custom_param("enable_reasoning")
            reasoning_tokens =
              llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536)
            # this allows for lots of tokens beyond reasoning
            options[:max_tokens] = reasoning_tokens + 30_000
            options[:thinking] = { type: "enabled", budget_tokens: reasoning_tokens }
          end
          options[:stop_sequences] = ["</function_calls>"] if !dialect.native_tool_support? &&
            dialect.prompt.has_tools?
--- a/lib/completions/endpoints/aws_bedrock.rb
+++ b/lib/completions/endpoints/aws_bedrock.rb
@ -26,7 +26,18 @@ module DiscourseAi
              max_tokens = 4096
              max_tokens = 8192 if bedrock_model_id.match?(/3.5/)
-              { max_tokens: max_tokens, anthropic_version: "bedrock-2023-05-31" }
+              result = { anthropic_version: "bedrock-2023-05-31" }
              if llm_model.lookup_custom_param("enable_reasoning")
                reasoning_tokens =
                  llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536)
                # this allows for ample tokens beyond reasoning
                max_tokens = reasoning_tokens + 30_000
                result[:thinking] = { type: "enabled", budget_tokens: reasoning_tokens }
              end
              result[:max_tokens] = max_tokens
              result
            else
              {}
            end
@ -66,6 +77,8 @@ module DiscourseAi
            "anthropic.claude-3-5-sonnet-20241022-v2:0"
          when "claude-3-5-haiku"
            "anthropic.claude-3-5-haiku-20241022-v1:0"
          when "claude-3-7-sonnet"
            "anthropic.claude-3-7-sonnet-20250219-v1:0"
          else
            llm_model.name
          end
--- a/lib/completions/endpoints/open_ai.rb
+++ b/lib/completions/endpoints/open_ai.rb
@ -11,9 +11,13 @@ module DiscourseAi
        def normalize_model_params(model_params)
          model_params = model_params.dup
-          # max_tokens is deprecated and is not functional on reasoning models
+          # max_tokens is deprecated however we still need to support it
-          max_tokens = model_params.delete(:max_tokens)
+          # on older OpenAI models and older Azure models, so we will only normalize
-          model_params[:max_completion_tokens] = max_tokens if max_tokens
+          # if our model name starts with o (to denote all the reasoning models)
          if llm_model.name.starts_with?("o")
            max_tokens = model_params.delete(:max_tokens)
            model_params[:max_completion_tokens] = max_tokens if max_tokens
          end
          # temperature is already supported
          if model_params[:stop_sequences]
--- a/lib/completions/llm.rb
+++ b/lib/completions/llm.rb
@ -27,9 +27,9 @@ module DiscourseAi
                  id: "anthropic",
                  models: [
                    {
-                      name: "claude-3-5-sonnet",
+                      name: "claude-3-7-sonnet",
                      tokens: 200_000,
-                      display_name: "Claude 3.5 Sonnet",
+                      display_name: "Claude 3.7 Sonnet",
                    },
                    { name: "claude-3-5-haiku", tokens: 200_000, display_name: "Claude 3.5 Haiku" },
                    { name: "claude-3-opus", tokens: 200_000, display_name: "Claude 3 Opus" },
--- a/spec/lib/completions/endpoints/anthropic_spec.rb
+++ b/spec/lib/completions/endpoints/anthropic_spec.rb
@ -334,6 +334,68 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
    expect(requested_body).to eq(request_body)
  end
  it "can support reasoning" do
    body = <<~STRING
      {
        "content": [
          {
            "text": "Hello!",
            "type": "text"
          }
        ],
        "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
        "model": "claude-3-opus-20240229",
        "role": "assistant",
        "stop_reason": "end_turn",
        "stop_sequence": null,
        "type": "message",
        "usage": {
          "input_tokens": 10,
          "output_tokens": 25
        }
      }
    STRING
    parsed_body = nil
    stub_request(:post, url).with(
      body:
        proc do |req_body|
          parsed_body = JSON.parse(req_body, symbolize_names: true)
          true
        end,
      headers: {
        "Content-Type" => "application/json",
        "X-Api-Key" => "123",
        "Anthropic-Version" => "2023-06-01",
      },
    ).to_return(status: 200, body: body)
    model.provider_params["enable_reasoning"] = true
    model.provider_params["reasoning_tokens"] = 10_000
    model.save!
    proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
    result = proxy.generate(prompt, user: Discourse.system_user)
    expect(result).to eq("Hello!")
    expected_body = {
      model: "claude-3-opus-20240229",
      max_tokens: 40_000,
      thinking: {
        type: "enabled",
        budget_tokens: 10_000,
      },
      messages: [{ role: "user", content: "user1: hello" }],
      system: "You are hello bot",
    }
    expect(parsed_body).to eq(expected_body)
    log = AiApiAuditLog.order(:id).last
    expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
    expect(log.request_tokens).to eq(10)
    expect(log.response_tokens).to eq(25)
  end
  it "can operate in regular mode" do
    body = <<~STRING
      {
--- a/spec/lib/completions/endpoints/aws_bedrock_spec.rb
+++ b/spec/lib/completions/endpoints/aws_bedrock_spec.rb
@ -335,6 +335,57 @@ RSpec.describe DiscourseAi::Completions::Endpoints::AwsBedrock do
      expect(log.response_tokens).to eq(20)
    end
    it "supports thinking" do
      model.provider_params["enable_reasoning"] = true
      model.provider_params["reasoning_tokens"] = 10_000
      model.save!
      proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
      request = nil
      content = {
        content: [text: "hello sam"],
        usage: {
          input_tokens: 10,
          output_tokens: 20,
        },
      }.to_json
      stub_request(
        :post,
        "https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
      )
        .with do |inner_request|
          request = inner_request
          true
        end
        .to_return(status: 200, body: content)
      response = proxy.generate("hello world", user: user)
      expect(request.headers["Authorization"]).to be_present
      expect(request.headers["X-Amz-Content-Sha256"]).to be_present
      expected = {
        "max_tokens" => 40_000,
        "thinking" => {
          "type" => "enabled",
          "budget_tokens" => 10_000,
        },
        "anthropic_version" => "bedrock-2023-05-31",
        "messages" => [{ "role" => "user", "content" => "hello world" }],
        "system" => "You are a helpful bot",
      }
      expect(JSON.parse(request.body)).to eq(expected)
      expect(response).to eq("hello sam")
      log = AiApiAuditLog.order(:id).last
      expect(log.request_tokens).to eq(10)
      expect(log.response_tokens).to eq(20)
    end
    it "supports claude 3 streaming" do
      proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
--- a/spec/lib/completions/endpoints/open_ai_spec.rb
+++ b/spec/lib/completions/endpoints/open_ai_spec.rb
@ -285,6 +285,23 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
    end
  end
  describe "max tokens remapping" do
    it "remaps max_tokens to max_completion_tokens for reasoning models" do
      model.update!(name: "o3-mini")
      llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
      body_parsed = nil
      stub_request(:post, "https://api.openai.com/v1/chat/completions").with(
        body: ->(body) { body_parsed = JSON.parse(body) },
      ).to_return(status: 200, body: { choices: [{ message: { content: "hello" } }] }.to_json)
      llm.generate("test", user: user, max_tokens: 1000)
      expect(body_parsed["max_completion_tokens"]).to eq(1000)
      expect(body_parsed["max_tokens"]).to be_nil
    end
  end
  describe "forced tool use" do
    it "can properly force tool use" do
      llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
@ -346,9 +363,11 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
        body: proc { |body| body_json = JSON.parse(body, symbolize_names: true) },
      ).to_return(body: response)
-      result = llm.generate(prompt, user: user)
+      result = llm.generate(prompt, user: user, max_tokens: 1000)
      expect(body_json[:tool_choice]).to eq({ type: "function", function: { name: "echo" } })
      # we expect this not to be remapped on older non reasoning models
      expect(body_json[:max_tokens]).to eq(1000)
      log = AiApiAuditLog.order(:id).last
      expect(log.request_tokens).to eq(55)
--- a/spec/system/llms/ai_llm_spec.rb
+++ b/spec/system/llms/ai_llm_spec.rb
@ -73,13 +73,15 @@ RSpec.describe "Managing LLM configurations", type: :system, js: true do
  context "when changing the provider" do
    it "has the correct provider params when visiting the edit page" do
-      llm = Fabricate(:llm_model, provider: "open_ai", provider_params: {})
+      llm =
        Fabricate(:llm_model, provider: "anthropic", provider_params: { enable_reasoning: true })
      visit "/admin/plugins/discourse-ai/ai-llms/#{llm.id}/edit"
      expect(form).to have_field_with_name("provider_params.organization")
      expect(form).to have_field_with_name("provider_params.disable_native_tools")
-      expect(form).to have_field_with_name("provider_params.disable_streaming")
+      expect(form).to have_field_with_name("provider_params.reasoning_tokens")
-      expect(form).to have_field_with_name("provider_params.reasoning_effort")
+
      reasoning = form.field("provider_params.enable_reasoning")
      expect(reasoning).to be_checked
    end
    it "correctly changes the provider params" do
      visit "/admin/plugins/discourse-ai/ai-llms"