FEATURE: allow disabling of top_p and temp for thinking models (#1184)

thinking models such as Claude 3.7 Thinking and o1 / o3 do not support top_p or temp. Previously you would have to carefully remove it from everywhere by having it be a provider param we now support blanker removing without forcing people to update automation rules or personas
2025-06-26 17:42:15 +00:00 · 2025-03-11 16:54:02 +11:00 · 2025-03-11 16:54:02 +11:00 · 8f4cd2fcbd
commit 8f4cd2fcbd
parent f4708d4178
10 changed files with 182 additions and 0 deletions
--- a/app/models/llm_model.rb
+++ b/app/models/llm_model.rb
@ -26,17 +26,23 @@ class LlmModel < ActiveRecord::Base
        access_key_id: :text,
        region: :text,
        disable_native_tools: :checkbox,
        disable_temperature: :checkbox,
        disable_top_p: :checkbox,
        enable_reasoning: :checkbox,
        reasoning_tokens: :number,
      },
      anthropic: {
        disable_native_tools: :checkbox,
        disable_temperature: :checkbox,
        disable_top_p: :checkbox,
        enable_reasoning: :checkbox,
        reasoning_tokens: :number,
      },
      open_ai: {
        organization: :text,
        disable_native_tools: :checkbox,
        disable_temperature: :checkbox,
        disable_top_p: :checkbox,
        disable_streaming: :checkbox,
        reasoning_effort: {
          type: :enum,
@ -69,6 +75,8 @@ class LlmModel < ActiveRecord::Base
        provider_order: :text,
        provider_quantizations: :text,
        disable_streaming: :checkbox,
        disable_temperature: :checkbox,
        disable_top_p: :checkbox,
      },
    }
  end
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -488,6 +488,8 @@ en:
          reasoning_effort: "Reasoning effort (only applicable to reasoning models)"
          enable_reasoning: "Enable reasoning (only applicable to Sonnet 3.7)"
          reasoning_tokens: "Number of tokens used for reasoning"
          disable_temperature: "Disable temperature (some thinking models don't support temperature)"
          disable_top_p: "Disable top P (some thinking models don't support top P)"
      related_topics:
        title: "Related topics"
--- a/lib/completions/endpoints/anthropic.rb
+++ b/lib/completions/endpoints/anthropic.rb
@ -10,6 +10,9 @@ module DiscourseAi
        def normalize_model_params(model_params)
          # max_tokens, temperature, stop_sequences are already supported
          model_params = model_params.dup
          model_params.delete(:top_p) if llm_model.lookup_custom_param("disable_top_p")
          model_params.delete(:temperature) if llm_model.lookup_custom_param("disable_temperature")
          model_params
        end
--- a/lib/completions/endpoints/aws_bedrock.rb
+++ b/lib/completions/endpoints/aws_bedrock.rb
@ -16,6 +16,9 @@ module DiscourseAi
          model_params = model_params.dup
          # max_tokens, temperature, stop_sequences, top_p are already supported
          #
          model_params.delete(:top_p) if llm_model.lookup_custom_param("disable_top_p")
          model_params.delete(:temperature) if llm_model.lookup_custom_param("disable_temperature")
          model_params
        end
--- a/lib/completions/endpoints/open_ai.rb
+++ b/lib/completions/endpoints/open_ai.rb
@ -24,6 +24,9 @@ module DiscourseAi
            model_params[:stop] = model_params.delete(:stop_sequences)
          end
          model_params.delete(:top_p) if llm_model.lookup_custom_param("disable_top_p")
          model_params.delete(:temperature) if llm_model.lookup_custom_param("disable_temperature")
          model_params
        end
--- a/lib/completions/endpoints/open_router.rb
+++ b/lib/completions/endpoints/open_router.rb
@ -16,6 +16,9 @@ module DiscourseAi
            model_params[:stop] = model_params.delete(:stop_sequences)
          end
          model_params.delete(:top_p) if llm_model.lookup_custom_param("disable_top_p")
          model_params.delete(:temperature) if llm_model.lookup_custom_param("disable_temperature")
          model_params
        end
--- a/spec/lib/completions/endpoints/anthropic_spec.rb
+++ b/spec/lib/completions/endpoints/anthropic_spec.rb
@ -664,4 +664,54 @@ data: {"type":"content_block_start","index":0,"content_block":{"type":"redacted_
    expect(log.feature_name).to eq("testing")
    expect(log.response_tokens).to eq(30)
  end
  describe "parameter disabling" do
    it "excludes disabled parameters from the request" do
      model.update!(provider_params: { disable_top_p: true, disable_temperature: true })
      parsed_body = nil
      stub_request(:post, url).with(
        body:
          proc do |req_body|
            parsed_body = JSON.parse(req_body, symbolize_names: true)
            true
          end,
        headers: {
          "Content-Type" => "application/json",
          "X-Api-Key" => "123",
          "Anthropic-Version" => "2023-06-01",
        },
      ).to_return(
        status: 200,
        body: {
          id: "msg_123",
          type: "message",
          role: "assistant",
          content: [{ type: "text", text: "test response" }],
          model: "claude-3-opus-20240229",
          usage: {
            input_tokens: 10,
            output_tokens: 5,
          },
        }.to_json,
      )
      # Request with parameters that should be ignored
      llm.generate(
        prompt,
        user: Discourse.system_user,
        top_p: 0.9,
        temperature: 0.8,
        max_tokens: 500,
      )
      # Verify disabled parameters aren't included
      expect(parsed_body).not_to have_key(:top_p)
      expect(parsed_body).not_to have_key(:temperature)
      # Verify other parameters still work
      expect(parsed_body).to have_key(:max_tokens)
      expect(parsed_body[:max_tokens]).to eq(500)
    end
  end
 end
--- a/spec/lib/completions/endpoints/aws_bedrock_spec.rb
+++ b/spec/lib/completions/endpoints/aws_bedrock_spec.rb
@ -436,4 +436,52 @@ RSpec.describe DiscourseAi::Completions::Endpoints::AwsBedrock do
      end
    end
  end
  describe "parameter disabling" do
    it "excludes disabled parameters from the request" do
      model.update!(
        provider_params: {
          access_key_id: "123",
          region: "us-east-1",
          disable_top_p: true,
          disable_temperature: true,
        },
      )
      proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
      request = nil
      content = {
        content: [text: "test response"],
        usage: {
          input_tokens: 10,
          output_tokens: 5,
        },
      }.to_json
      stub_request(
        :post,
        "https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
      )
        .with do |inner_request|
          request = inner_request
          true
        end
        .to_return(status: 200, body: content)
      # Request with parameters that should be ignored
      proxy.generate("test prompt", user: user, top_p: 0.9, temperature: 0.8, max_tokens: 500)
      # Parse the request body
      request_body = JSON.parse(request.body)
      # Verify disabled parameters aren't included
      expect(request_body).not_to have_key("top_p")
      expect(request_body).not_to have_key("temperature")
      # Verify other parameters still work
      expect(request_body).to have_key("max_tokens")
      expect(request_body["max_tokens"]).to eq(500)
    end
  end
 end
--- a/spec/lib/completions/endpoints/open_ai_spec.rb
+++ b/spec/lib/completions/endpoints/open_ai_spec.rb
@ -395,6 +395,37 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
    end
  end
  describe "parameter disabling" do
    it "excludes disabled parameters from the request" do
      model.update!(provider_params: { disable_top_p: true, disable_temperature: true })
      parsed_body = nil
      stub_request(:post, "https://api.openai.com/v1/chat/completions").with(
        body:
          proc do |req_body|
            parsed_body = JSON.parse(req_body, symbolize_names: true)
            true
          end,
      ).to_return(
        status: 200,
        body: { choices: [{ message: { content: "test response" } }] }.to_json,
      )
      dialect = compliance.dialect(prompt: compliance.generic_prompt)
      # Request with parameters that should be ignored
      endpoint.perform_completion!(dialect, user, { top_p: 0.9, temperature: 0.8, max_tokens: 100 })
      # Verify disabled parameters aren't included
      expect(parsed_body).not_to have_key(:top_p)
      expect(parsed_body).not_to have_key(:temperature)
      # Verify other parameters still work
      expect(parsed_body).to have_key(:max_tokens)
      expect(parsed_body[:max_tokens]).to eq(100)
    end
  end
  describe "image support" do
    it "can handle images" do
      model = Fabricate(:llm_model, vision_enabled: true)
--- a/spec/lib/completions/endpoints/open_router_spec.rb
+++ b/spec/lib/completions/endpoints/open_router_spec.rb
@ -44,4 +44,35 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenRouter do
    expect(parsed_body).to eq(expected)
  end
  it "excludes disabled parameters from the request" do
    open_router_model.update!(provider_params: { disable_top_p: true, disable_temperature: true })
    parsed_body = nil
    stub_request(:post, open_router_model.url).with(
      body: proc { |body| parsed_body = JSON.parse(body, symbolize_names: true) },
      headers: {
        "Content-Type" => "application/json",
        "X-Title" => "Discourse AI",
        "HTTP-Referer" => "https://www.discourse.org/ai",
        "Authorization" => "Bearer 123",
      },
    ).to_return(
      status: 200,
      body: { "choices" => [message: { role: "assistant", content: "test response" }] }.to_json,
    )
    proxy = DiscourseAi::Completions::Llm.proxy("custom:#{open_router_model.id}")
    # Request with parameters that should be ignored
    proxy.generate("test", user: user, top_p: 0.9, temperature: 0.8, max_tokens: 500)
    # Verify disabled parameters aren't included
    expect(parsed_body).not_to have_key(:top_p)
    expect(parsed_body).not_to have_key(:temperature)
    # Verify other parameters still work
    expect(parsed_body).to have_key(:max_tokens)
    expect(parsed_body[:max_tokens]).to eq(500)
  end
 end