From d97307e99bcc559e041e9484762e154ef210e18e Mon Sep 17 00:00:00 2001
From: Sam <sam.saffron@gmail.com>
Date: Wed, 11 Jun 2025 17:12:25 +1000
Subject: [PATCH] FEATURE: optionally support OpenAI responses API (#1423)

OpenAI ship a new API for completions called "Responses API"

Certain models (o3-pro) require this API.
Additionally certain features are only made available to the new API.

This allow enabling it per LLM.

see: https://platform.openai.com/docs/api-reference/responses
---
 app/models/llm_model.rb                       |   1 +
 config/locales/client.en.yml                  |   1 +
 lib/completions/dialects/chat_gpt.rb          |  37 ++-
 lib/completions/dialects/open_ai_tools.rb     |  65 +++--
 lib/completions/endpoints/open_ai.rb          |  40 ++-
 .../open_ai_responses_message_processor.rb    | 160 ++++++++++++
 .../endpoints/open_ai_responses_api_spec.rb   | 240 ++++++++++++++++++
 7 files changed, 510 insertions(+), 34 deletions(-)
 create mode 100644 lib/completions/open_ai_responses_message_processor.rb
 create mode 100644 spec/lib/completions/endpoints/open_ai_responses_api_spec.rb

diff --git a/app/models/llm_model.rb b/app/models/llm_model.rb
index 5efbbb0f..3613183e 100644
--- a/app/models/llm_model.rb
+++ b/app/models/llm_model.rb
@@ -52,6 +52,7 @@ class LlmModel < ActiveRecord::Base
         disable_temperature: :checkbox,
         disable_top_p: :checkbox,
         disable_streaming: :checkbox,
+        enable_responses_api: :checkbox,
         reasoning_effort: {
           type: :enum,
           values: %w[default low medium high],
diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml
index fd1f673a..a6dcb112 100644
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@@ -579,6 +579,7 @@ en:
           reasoning_tokens: "Number of tokens used for reasoning"
           disable_temperature: "Disable temperature (some thinking models don't support temperature)"
           disable_top_p: "Disable top P (some thinking models don't support top P)"
+          enable_responses_api: "Enable responses API (required on certain OpenAI models)"
 
       related_topics:
         title: "Related topics"
diff --git a/lib/completions/dialects/chat_gpt.rb b/lib/completions/dialects/chat_gpt.rb
index d2d7245c..3e29dcdd 100644
--- a/lib/completions/dialects/chat_gpt.rb
+++ b/lib/completions/dialects/chat_gpt.rb
@@ -20,12 +20,19 @@ module DiscourseAi
         def embed_user_ids?
           return @embed_user_ids if defined?(@embed_user_ids)
 
-          @embed_user_ids =
+          @embed_user_ids = true if responses_api?
+
+          @embed_user_ids ||=
             prompt.messages.any? do |m|
               m[:id] && m[:type] == :user && !m[:id].to_s.match?(VALID_ID_REGEX)
             end
         end
 
+        def responses_api?
+          return @responses_api if defined?(@responses_api)
+          @responses_api = llm_model.lookup_custom_param("enable_responses_api")
+        end
+
         def max_prompt_tokens
           # provide a buffer of 120 tokens - our function counting is not
           # 100% accurate and getting numbers to align exactly is very hard
@@ -51,7 +58,11 @@ module DiscourseAi
           if disable_native_tools?
             super
           else
-            @tools_dialect ||= DiscourseAi::Completions::Dialects::OpenAiTools.new(prompt.tools)
+            @tools_dialect ||=
+              DiscourseAi::Completions::Dialects::OpenAiTools.new(
+                prompt.tools,
+                responses_api: responses_api?,
+              )
           end
         end
 
@@ -120,7 +131,7 @@ module DiscourseAi
             to_encoded_content_array(
               content: content_array.flatten,
               image_encoder: ->(details) { image_node(details) },
-              text_encoder: ->(text) { { type: "text", text: text } },
+              text_encoder: ->(text) { text_node(text) },
               allow_vision: vision_support?,
             )
 
@@ -136,13 +147,21 @@ module DiscourseAi
           end
         end
 
+        def text_node(text)
+          if responses_api?
+            { type: "input_text", text: text }
+          else
+            { type: "text", text: text }
+          end
+        end
+
         def image_node(details)
-          {
-            type: "image_url",
-            image_url: {
-              url: "data:#{details[:mime_type]};base64,#{details[:base64]}",
-            },
-          }
+          encoded_image = "data:#{details[:mime_type]};base64,#{details[:base64]}"
+          if responses_api?
+            { type: "input_image", image_url: encoded_image }
+          else
+            { type: "image_url", image_url: { url: encoded_image } }
+          end
         end
 
         def per_message_overhead
diff --git a/lib/completions/dialects/open_ai_tools.rb b/lib/completions/dialects/open_ai_tools.rb
index 4cdc97e5..3411dbfe 100644
--- a/lib/completions/dialects/open_ai_tools.rb
+++ b/lib/completions/dialects/open_ai_tools.rb
@@ -4,20 +4,32 @@ module DiscourseAi
   module Completions
     module Dialects
       class OpenAiTools
-        def initialize(tools)
+        def initialize(tools, responses_api: false)
+          @responses_api = responses_api
           @raw_tools = tools
         end
 
         def translated_tools
-          raw_tools.map do |tool|
-            {
-              type: "function",
-              function: {
+          if @responses_api
+            raw_tools.map do |tool|
+              {
+                type: "function",
                 name: tool.name,
                 description: tool.description,
                 parameters: tool.parameters_json_schema,
-              },
-            }
+              }
+            end
+          else
+            raw_tools.map do |tool|
+              {
+                type: "function",
+                function: {
+                  name: tool.name,
+                  description: tool.description,
+                  parameters: tool.parameters_json_schema,
+                },
+              }
+            end
           end
         end
 
@@ -30,20 +42,37 @@ module DiscourseAi
           call_details[:arguments] = call_details[:arguments].to_json
           call_details[:name] = raw_message[:name]
 
-          {
-            role: "assistant",
-            content: nil,
-            tool_calls: [{ type: "function", function: call_details, id: raw_message[:id] }],
-          }
+          if @responses_api
+            {
+              type: "function_call",
+              call_id: raw_message[:id],
+              name: call_details[:name],
+              arguments: call_details[:arguments],
+            }
+          else
+            {
+              role: "assistant",
+              content: nil,
+              tool_calls: [{ type: "function", function: call_details, id: raw_message[:id] }],
+            }
+          end
         end
 
         def from_raw_tool(raw_message)
-          {
-            role: "tool",
-            tool_call_id: raw_message[:id],
-            content: raw_message[:content],
-            name: raw_message[:name],
-          }
+          if @responses_api
+            {
+              type: "function_call_output",
+              call_id: raw_message[:id],
+              output: raw_message[:content],
+            }
+          else
+            {
+              role: "tool",
+              tool_call_id: raw_message[:id],
+              content: raw_message[:content],
+              name: raw_message[:name],
+            }
+          end
         end
 
         private
diff --git a/lib/completions/endpoints/open_ai.rb b/lib/completions/endpoints/open_ai.rb
index 070d391c..36e49a4a 100644
--- a/lib/completions/endpoints/open_ai.rb
+++ b/lib/completions/endpoints/open_ai.rb
@@ -89,6 +89,7 @@ module DiscourseAi
             # We'll fallback to guess this using the tokenizer.
             payload[:stream_options] = { include_usage: true } if llm_model.provider == "open_ai"
           end
+
           if !xml_tools_enabled?
             if dialect.tools.present?
               payload[:tools] = dialect.tools
@@ -96,19 +97,39 @@ module DiscourseAi
                 if dialect.tool_choice == :none
                   payload[:tool_choice] = "none"
                 else
-                  payload[:tool_choice] = {
-                    type: "function",
-                    function: {
-                      name: dialect.tool_choice,
-                    },
-                  }
+                  if responses_api?
+                    payload[:tool_choice] = { type: "function", name: dialect.tool_choice }
+                  else
+                    payload[:tool_choice] = {
+                      type: "function",
+                      function: {
+                        name: dialect.tool_choice,
+                      },
+                    }
+                  end
                 end
               end
             end
           end
+
+          convert_payload_to_responses_api!(payload) if responses_api?
+
           payload
         end
 
+        def responses_api?
+          return @responses_api if defined?(@responses_api)
+          @responses_api = llm_model.lookup_custom_param("enable_responses_api")
+        end
+
+        def convert_payload_to_responses_api!(payload)
+          payload[:input] = payload.delete(:messages)
+          completion_tokens = payload.delete(:max_completion_tokens) || payload.delete(:max_tokens)
+          payload[:max_output_tokens] = completion_tokens if completion_tokens
+          # not supported in responses api
+          payload.delete(:stream_options)
+        end
+
         def prepare_request(payload)
           headers = { "Content-Type" => "application/json" }
           api_key = llm_model.api_key
@@ -159,7 +180,12 @@ module DiscourseAi
         private
 
         def processor
-          @processor ||= OpenAiMessageProcessor.new(partial_tool_calls: partial_tool_calls)
+          @processor ||=
+            if responses_api?
+              OpenAiResponsesMessageProcessor.new(partial_tool_calls: partial_tool_calls)
+            else
+              OpenAiMessageProcessor.new(partial_tool_calls: partial_tool_calls)
+            end
         end
       end
     end
diff --git a/lib/completions/open_ai_responses_message_processor.rb b/lib/completions/open_ai_responses_message_processor.rb
new file mode 100644
index 00000000..51381df7
--- /dev/null
+++ b/lib/completions/open_ai_responses_message_processor.rb
@@ -0,0 +1,160 @@
+# frozen_string_literal: true
+module DiscourseAi::Completions
+  class OpenAiResponsesMessageProcessor
+    attr_reader :prompt_tokens, :completion_tokens, :cached_tokens
+
+    def initialize(partial_tool_calls: false)
+      @tool = nil # currently streaming ToolCall
+      @tool_arguments = +""
+      @prompt_tokens = nil
+      @completion_tokens = nil
+      @cached_tokens = nil
+      @partial_tool_calls = partial_tool_calls
+      @streaming_parser = nil # JsonStreamingTracker, if used
+      @has_new_data = false
+    end
+
+    # @param json [Hash] full JSON response from responses.create / retrieve
+    # @return [Array<String,ToolCall>] pieces in the order they were produced
+    def process_message(json)
+      result = []
+
+      (json[:output] || []).each do |item|
+        type = item[:type]
+
+        case type
+        when "function_call"
+          result << build_tool_call_from_item(item)
+        when "message"
+          text = extract_text(item)
+          result << text if text
+        end
+      end
+
+      update_usage(json)
+      result
+    end
+
+    # @param json [Hash] a single streamed event, already parsed from ND-JSON
+    # @return [String, ToolCall, nil] only when a complete chunk is ready
+    def process_streamed_message(json)
+      rval = nil
+      event_type = json[:type] || json["type"]
+
+      case event_type
+      when "response.output_text.delta"
+        delta = json[:delta] || json["delta"]
+        rval = delta if !delta.empty?
+      when "response.output_item.added"
+        item = json[:item]
+        if item && item[:type] == "function_call"
+          handle_tool_stream(:start, item) { |finished| rval = finished }
+        end
+      when "response.function_call_arguments.delta"
+        delta = json[:delta]
+        handle_tool_stream(:progress, delta) { |finished| rval = finished } if delta
+      when "response.output_item.done"
+        item = json[:item]
+        if item && item[:type] == "function_call"
+          handle_tool_stream(:done, item) { |finished| rval = finished }
+        end
+      end
+
+      update_usage(json)
+      rval
+    end
+
+    # Called by JsonStreamingTracker when partial JSON arguments are parsed
+    def notify_progress(key, value)
+      if @tool
+        @tool.partial = true
+        @tool.parameters[key.to_sym] = value
+        @has_new_data = true
+      end
+    end
+
+    def current_tool_progress
+      if @has_new_data
+        @has_new_data = false
+        @tool
+      end
+    end
+
+    def finish
+      rval = []
+      if @tool
+        process_arguments
+        rval << @tool
+        @tool = nil
+      end
+      rval
+    end
+
+    private
+
+    def extract_text(message_item)
+      (message_item[:content] || message_item["content"] || [])
+        .filter { |c| (c[:type] || c["type"]) == "output_text" }
+        .map { |c| c[:text] || c["text"] }
+        .join
+    end
+
+    def build_tool_call_from_item(item)
+      id = item[:call_id]
+      name = item[:name]
+      arguments = item[:arguments] || ""
+      params = arguments.empty? ? {} : JSON.parse(arguments, symbolize_names: true)
+
+      ToolCall.new(id: id, name: name, parameters: params)
+    end
+
+    def handle_tool_stream(event_type, json)
+      if event_type == :start
+        start_tool_stream(json)
+      elsif event_type == :progress
+        @streaming_parser << json if @streaming_parser
+        yield current_tool_progress
+      elsif event_type == :done
+        @tool_arguments << json[:arguments].to_s
+        process_arguments
+        finished = @tool
+        @tool = nil
+        yield finished
+      end
+    end
+
+    def start_tool_stream(data)
+      # important note... streaming API has both id and call_id
+      # both seem to work as identifiers, api examples seem to favor call_id
+      # so I am using it here
+      id = data[:call_id]
+      name = data[:name]
+
+      @tool_arguments = +""
+      @tool = ToolCall.new(id: id, name: name)
+      @streaming_parser = JsonStreamingTracker.new(self) if @partial_tool_calls
+    end
+
+    # Parse accumulated @tool_arguments once we have a complete JSON blob
+    def process_arguments
+      return if @tool_arguments.to_s.empty?
+      parsed = JSON.parse(@tool_arguments, symbolize_names: true)
+      @tool.parameters = parsed
+      @tool.partial = false
+      @tool_arguments = nil
+    rescue JSON::ParserError
+      # leave arguments empty; caller can decide how to handle
+    end
+
+    def update_usage(json)
+      usage = json.dig(:response, :usage)
+      return if !usage
+
+      cached_tokens = usage.dig(:input_tokens_details, :cached_tokens).to_i
+
+      @prompt_tokens ||= usage[:input_tokens] - cached_tokens
+      @completion_tokens ||= usage[:output_tokens]
+      @cached_tokens ||= cached_tokens
+    end
+  end
+end
diff --git a/spec/lib/completions/endpoints/open_ai_responses_api_spec.rb b/spec/lib/completions/endpoints/open_ai_responses_api_spec.rb
new file mode 100644
index 00000000..f426da29
--- /dev/null
+++ b/spec/lib/completions/endpoints/open_ai_responses_api_spec.rb
@@ -0,0 +1,240 @@
+# frozen_string_literal: true
+
+RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
+  subject(:endpoint) { described_class.new(model) }
+
+  fab!(:model) do
+    Fabricate(
+      :llm_model,
+      provider: "open_ai",
+      url: "https://api.openai.com/v1/responses",
+      provider_params: {
+        enable_responses_api: true,
+      },
+    )
+  end
+
+  let(:prompt_with_tools) do
+    prompt = DiscourseAi::Completions::Prompt.new("echo: Hello")
+    prompt.tools = [
+      DiscourseAi::Completions::ToolDefinition.new(
+        name: "echo",
+        description: "Used for testing of llms, will echo the param given to it",
+        parameters: [
+          DiscourseAi::Completions::ToolDefinition::ParameterDefinition.from_hash(
+            { name: "string", description: "string to echo", type: :string, required: true },
+          ),
+        ],
+      ),
+    ]
+    prompt
+  end
+
+  it "can perform simple streaming completion" do
+    response_payload = <<~TEXT
+      event: response.created
+      data: {"type":"response.created","sequence_number":0,"response":{"id":"resp_6848d84bee44819d98e5f4f5103562090333bc932679b022","object":"response","created_at":1749604427,"status":"in_progress","background":false,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"model":"gpt-4.1-nano-2025-04-14","output":[],"parallel_tool_calls":true,"previous_response_id":null,"reasoning":{"effort":null,"summary":null},"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"}},"tool_choice":"auto","tools":[],"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}}}
+
+      event: response.in_progress
+      data: {"type":"response.in_progress","sequence_number":1,"response":{"id":"resp_6848d84bee44819d98e5f4f5103562090333bc932679b022","object":"response","created_at":1749604427,"status":"in_progress","background":false,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"model":"gpt-4.1-nano-2025-04-14","output":[],"parallel_tool_calls":true,"previous_response_id":null,"reasoning":{"effort":null,"summary":null},"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"}},"tool_choice":"auto","tools":[],"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}}}
+
+      event: response.output_item.added
+      data: {"type":"response.output_item.added","sequence_number":2,"output_index":0,"item":{"id":"msg_6848d84c3bc8819dace0eadec6e205090333bc932679b022","type":"message","status":"in_progress","content":[],"role":"assistant"}}
+
+      event: response.content_part.added
+      data: {"type":"response.content_part.added","sequence_number":3,"item_id":"msg_6848d84c3bc8819dace0eadec6e205090333bc932679b022","output_index":0,"content_index":0,"part":{"type":"output_text","annotations":[],"text":""}}
+
+      event: response.output_text.delta
+      data: {"type":"response.output_text.delta","sequence_number":4,"item_id":"msg_6848d84c3bc8819dace0eadec6e205090333bc932679b022","output_index":0,"content_index":0,"delta":"Hello"}
+
+      event: response.output_text.delta
+      data: {"type":"response.output_text.delta","sequence_number":5,"item_id":"msg_6848d84c3bc8819dace0eadec6e205090333bc932679b022","output_index":0,"content_index":0,"delta":" "}
+
+      event: response.output_text.delta
+      data: {"type":"response.output_text.delta","sequence_number":5,"item_id":"msg_6848d84c3bc8819dace0eadec6e205090333bc932679b022","output_index":0,"content_index":0,"delta":"World"}
+
+      event: response.output_text.done
+      data: {"type":"response.output_text.done","sequence_number":5,"item_id":"msg_6848d84c3bc8819dace0eadec6e205090333bc932679b022","output_index":0,"content_index":0,"text":"Hello World"}
+
+      event: response.content_part.done
+      data: {"type":"response.content_part.done","sequence_number":6,"item_id":"msg_6848d84c3bc8819dace0eadec6e205090333bc932679b022","output_index":0,"content_index":0,"part":{"type":"output_text","annotations":[],"text":"Hello World"}}
+
+      event: response.output_item.done
+      data: {"type":"response.output_item.done","sequence_number":7,"output_index":0,"item":{"id":"msg_6848d84c3bc8819dace0eadec6e205090333bc932679b022","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"text":"Hello World"}],"role":"assistant"}}
+
+      event: response.completed
+      data: {"type":"response.completed","sequence_number":8,"response":{"id":"resp_6848d84bee44819d98e5f4f5103562090333bc932679b022","object":"response","created_at":1749604427,"status":"completed","background":false,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"model":"gpt-4.1-nano-2025-04-14","output":[{"id":"msg_6848d84c3bc8819dace0eadec6e205090333bc932679b022","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"text":"Hello"}],"role":"assistant"}],"parallel_tool_calls":true,"previous_response_id":null,"reasoning":{"effort":null,"summary":null},"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"}},"tool_choice":"auto","tools":[],"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":35,"input_tokens_details":{"cached_tokens":5},"output_tokens":9,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":37},"user":null,"metadata":{}}}
+    TEXT
+
+    partials = []
+
+    stub_request(:post, "https://api.openai.com/v1/responses").to_return(
+      status: 200,
+      body: response_payload,
+    )
+
+    model
+      .to_llm
+      .generate("Say: Hello World", user: Discourse.system_user) { |partial| partials << partial }
+
+    expect(partials).to eq(["Hello", " ", "World"])
+
+    log = AiApiAuditLog.last
+
+    # note: our report counts cache and request tokens separately see: DiscourseAi::Completions::Report
+    expect(log).to be_present
+    expect(log.request_tokens).to eq(30)
+    expect(log.response_tokens).to eq(9)
+    expect(log.cached_tokens).to eq(5)
+  end
+
+  it "can properly stream tool calls" do
+    response_payload = <<~TEXT
+      event: response.created
+      data: {"type":"response.created","sequence_number":0,"response":{"id":"resp_684910c81eec81a3a9222aa336d9fcf202d35c1819a50f63","object":"response","created_at":1749618888,"status":"in_progress","background":false,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"model":"gpt-4.1-nano-2025-04-14","output":[],"parallel_tool_calls":true,"previous_response_id":null,"reasoning":{"effort":null,"summary":null},"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"}},"tool_choice":{"type":"function","name":"echo"},"tools":[{"type":"function","description":"Used for testing of llms, will echo the param given to it","name":"echo","parameters":{"type":"object","properties":{"string":{"type":"string","description":"string to echo"}},"required":["string"]},"strict":true}],"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}}}
+
+      event: response.in_progress
+      data: {"type":"response.in_progress","sequence_number":1,"response":{"id":"resp_684910c81eec81a3a9222aa336d9fcf202d35c1819a50f63","object":"response","created_at":1749618888,"status":"in_progress","background":false,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"model":"gpt-4.1-nano-2025-04-14","output":[],"parallel_tool_calls":true,"previous_response_id":null,"reasoning":{"effort":null,"summary":null},"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"}},"tool_choice":{"type":"function","name":"echo"},"tools":[{"type":"function","description":"Used for testing of llms, will echo the param given to it","name":"echo","parameters":{"type":"object","properties":{"string":{"type":"string","description":"string to echo"}},"required":["string"]},"strict":true}],"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}}}
+
+      event: response.output_item.added
+      data: {"type":"response.output_item.added","sequence_number":2,"output_index":0,"item":{"id":"fc_684910c8b68881a3b43610e1d57ef00702d35c1819a50f63","type":"function_call","status":"in_progress","arguments":"","call_id":"call_TQyfNmFnKblzXl5rlcGeIsg5","name":"echo"}}
+
+      event: response.function_call_arguments.delta
+      data: {"type":"response.function_call_arguments.delta","sequence_number":3,"item_id":"fc_684910c8b68881a3b43610e1d57ef00702d35c1819a50f63","output_index":0,"delta":"{\\""}
+
+      event: response.function_call_arguments.delta
+      data: {"type":"response.function_call_arguments.delta","sequence_number":4,"item_id":"fc_684910c8b68881a3b43610e1d57ef00702d35c1819a50f63","output_index":0,"delta":"string"}
+
+      event: response.function_call_arguments.delta
+      data: {"type":"response.function_call_arguments.delta","sequence_number":5,"item_id":"fc_684910c8b68881a3b43610e1d57ef00702d35c1819a50f63","output_index":0,"delta":"\\":\\""}
+
+      event: response.function_call_arguments.delta
+      data: {"type":"response.function_call_arguments.delta","sequence_number":6,"item_id":"fc_684910c8b68881a3b43610e1d57ef00702d35c1819a50f63","output_index":0,"delta":"hello"}
+
+      event: response.function_call_arguments.delta
+      data: {"type":"response.function_call_arguments.delta","sequence_number":7,"item_id":"fc_684910c8b68881a3b43610e1d57ef00702d35c1819a50f63","output_index":0,"delta":"\\"}"}
+
+      event: response.function_call_arguments.done
+      data: {"type":"response.function_call_arguments.done","sequence_number":8,"item_id":"fc_684910c8b68881a3b43610e1d57ef00702d35c1819a50f63","output_index":0,"arguments":"{\\"string\\":\\"hello\\"}"}
+
+      event: response.output_item.done
+      data: {"type":"response.output_item.done","sequence_number":9,"output_index":0,"item":{"id":"fc_684910c8b68881a3b43610e1d57ef00702d35c1819a50f63","type":"function_call","status":"completed","arguments":"{\\"string\\":\\"hello\\"}","call_id":"call_TQyfNmFnKblzXl5rlcGeIsg5","name":"echo"}}
+
+      event: response.completed
+      data: {"type":"response.completed","sequence_number":10,"response":{"id":"resp_684910c81eec81a3a9222aa336d9fcf202d35c1819a50f63","object":"response","created_at":1749618888,"status":"completed","background":false,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":null,"model":"gpt-4.1-nano-2025-04-14","output":[{"id":"fc_684910c8b68881a3b43610e1d57ef00702d35c1819a50f63","type":"function_call","status":"completed","arguments":"{\\"string\\":\\"hello\\"}","call_id":"call_TQyfNmFnKblzXl5rlcGeIsg5","name":"echo"}],"parallel_tool_calls":true,"previous_response_id":null,"reasoning":{"effort":null,"summary":null},"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"}},"tool_choice":{"type":"function","name":"echo"},"tools":[{"type":"function","description":"Used for testing of llms, will echo the param given to it","name":"echo","parameters":{"type":"object","properties":{"string":{"type":"string","description":"string to echo"}},"required":["string"]},"strict":true}],"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":71,"input_tokens_details":{"cached_tokens":0},"output_tokens":6,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":77},"user":null,"metadata":{}}}
+
+    TEXT
+
+    partials = []
+
+    stub_request(:post, "https://api.openai.com/v1/responses").to_return(
+      status: 200,
+      body: response_payload,
+    )
+
+    model
+      .to_llm
+      .generate(
+        prompt_with_tools,
+        user: Discourse.system_user,
+        partial_tool_calls: true,
+      ) { |partial| partials << partial.dup }
+
+    # the partial tools are deduped
+    expect(partials.length).to eq(1)
+
+    expect(partials.first).to be_a(DiscourseAi::Completions::ToolCall)
+    expect(partials.first.name).to eq("echo")
+    expect(partials.first.parameters).to eq({ string: "hello" })
+    expect(partials.first.id).to eq("call_TQyfNmFnKblzXl5rlcGeIsg5")
+  end
+
+  it "can handle non streaming tool calls" do
+    response_object = {
+      id: "resp_68491ed72974819f94652a73fb58109c08901d75ebf6c66e",
+      object: "response",
+      created_at: 1_749_622_487,
+      status: "completed",
+      background: false,
+      error: nil,
+      incomplete_details: nil,
+      instructions: nil,
+      max_output_tokens: nil,
+      model: "gpt-4.1-nano-2025-04-14",
+      output: [
+        {
+          id: "fc_68491ed75e0c819f87462ff642c58d2e08901d75ebf6c66e",
+          type: "function_call",
+          status: "completed",
+          arguments: "{\"string\":\"sam\"}",
+          call_id: "call_UdxBpinIVc5nRZ0VnWJIgneA",
+          name: "echo",
+        },
+      ],
+      parallel_tool_calls: true,
+      previous_response_id: nil,
+      reasoning: {
+        effort: nil,
+        summary: nil,
+      },
+      service_tier: "default",
+      store: true,
+      temperature: 1.0,
+      text: {
+        format: {
+          type: "text",
+        },
+      },
+      tool_choice: {
+        type: "function",
+        name: "echo",
+      },
+      tools: [
+        {
+          type: "function",
+          description: "Used for testing of llms, will echo the param given to it",
+          name: "echo",
+          parameters: {
+            type: "object",
+            properties: {
+              string: {
+                type: "string",
+                description: "string to echo",
+              },
+            },
+            required: ["string"],
+          },
+          strict: true,
+        },
+      ],
+      top_p: 1.0,
+      truncation: "disabled",
+      usage: {
+        input_tokens: 73,
+        input_tokens_details: {
+          cached_tokens: 0,
+        },
+        output_tokens: 6,
+        output_tokens_details: {
+          reasoning_tokens: 0,
+        },
+        total_tokens: 79,
+      },
+      user: nil,
+      metadata: {
+      },
+    }
+
+    stub_request(:post, "https://api.openai.com/v1/responses").to_return(
+      status: 200,
+      body: response_object.to_json,
+    )
+
+    result = model.to_llm.generate(prompt_with_tools, user: Discourse.system_user)
+
+    expect(result).to be_a(DiscourseAi::Completions::ToolCall)
+    expect(result.name).to eq("echo")
+    expect(result.parameters).to eq({ string: "sam" })
+    expect(result.id).to eq("call_UdxBpinIVc5nRZ0VnWJIgneA")
+  end
+end