discourse-ai/lib/completions/endpoints/canned_response.rb
Sam 823e8ef490
FEATURE: partial tool call support for OpenAI and Anthropic (#908)
Implement streaming tool call implementation for Anthropic and Open AI.

When calling:

llm.generate(..., partial_tool_calls: true) do ...
Partials may contain ToolCall instances with partial: true, These tool calls are partially populated with json partially parsed.

So for example when performing a search you may get:

ToolCall(..., {search: "hello" })
ToolCall(..., {search: "hello world" })

The library used to parse json is:

https://github.com/dgraham/json-stream

We use a fork cause we need access to the internal buffer.

This prepares internals to perform partial tool calls, but does not implement it yet.
2024-11-14 06:58:24 +11:00

79 lines
2.0 KiB
Ruby

# frozen_string_literal: true
module DiscourseAi
module Completions
module Endpoints
class CannedResponse
CANNED_RESPONSE_ERROR = Class.new(StandardError)
def initialize(responses)
@responses = responses
@completions = 0
@dialect = nil
end
def normalize_model_params(model_params)
# max_tokens, temperature, stop_sequences are already supported
model_params
end
attr_reader :responses, :completions, :dialect
def prompt_messages
dialect.prompt.messages
end
def perform_completion!(
dialect,
_user,
_model_params,
feature_name: nil,
feature_context: nil,
partial_tool_calls: false
)
@dialect = dialect
response = responses[completions]
if response.nil?
raise CANNED_RESPONSE_ERROR,
"The number of completions you requested exceed the number of canned responses"
end
raise response if response.is_a?(StandardError)
@completions += 1
if block_given?
cancelled = false
cancel_fn = lambda { cancelled = true }
# We buffer and return tool invocations in one go.
as_array = response.is_a?(Array) ? response : [response]
as_array.each do |response|
if is_tool?(response)
yield(response, cancel_fn)
else
response.each_char do |char|
break if cancelled
yield(char, cancel_fn)
end
end
end
end
response = response.first if response.is_a?(Array) && response.length == 1
response
end
def tokenizer
DiscourseAi::Tokenizer::OpenAiTokenizer
end
private
def is_tool?(response)
response.is_a?(DiscourseAi::Completions::ToolCall)
end
end
end
end
end