mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-03-01 23:09:19 +00:00
Implement streaming tool call implementation for Anthropic and Open AI. When calling: llm.generate(..., partial_tool_calls: true) do ... Partials may contain ToolCall instances with partial: true, These tool calls are partially populated with json partially parsed. So for example when performing a search you may get: ToolCall(..., {search: "hello" }) ToolCall(..., {search: "hello world" }) The library used to parse json is: https://github.com/dgraham/json-stream We use a fork cause we need access to the internal buffer. This prepares internals to perform partial tool calls, but does not implement it yet.
79 lines
2.0 KiB
Ruby
79 lines
2.0 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module DiscourseAi
|
|
module Completions
|
|
module Endpoints
|
|
class CannedResponse
|
|
CANNED_RESPONSE_ERROR = Class.new(StandardError)
|
|
|
|
def initialize(responses)
|
|
@responses = responses
|
|
@completions = 0
|
|
@dialect = nil
|
|
end
|
|
|
|
def normalize_model_params(model_params)
|
|
# max_tokens, temperature, stop_sequences are already supported
|
|
model_params
|
|
end
|
|
|
|
attr_reader :responses, :completions, :dialect
|
|
|
|
def prompt_messages
|
|
dialect.prompt.messages
|
|
end
|
|
|
|
def perform_completion!(
|
|
dialect,
|
|
_user,
|
|
_model_params,
|
|
feature_name: nil,
|
|
feature_context: nil,
|
|
partial_tool_calls: false
|
|
)
|
|
@dialect = dialect
|
|
response = responses[completions]
|
|
if response.nil?
|
|
raise CANNED_RESPONSE_ERROR,
|
|
"The number of completions you requested exceed the number of canned responses"
|
|
end
|
|
|
|
raise response if response.is_a?(StandardError)
|
|
|
|
@completions += 1
|
|
if block_given?
|
|
cancelled = false
|
|
cancel_fn = lambda { cancelled = true }
|
|
|
|
# We buffer and return tool invocations in one go.
|
|
as_array = response.is_a?(Array) ? response : [response]
|
|
as_array.each do |response|
|
|
if is_tool?(response)
|
|
yield(response, cancel_fn)
|
|
else
|
|
response.each_char do |char|
|
|
break if cancelled
|
|
yield(char, cancel_fn)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
response = response.first if response.is_a?(Array) && response.length == 1
|
|
response
|
|
end
|
|
|
|
def tokenizer
|
|
DiscourseAi::Tokenizer::OpenAiTokenizer
|
|
end
|
|
|
|
private
|
|
|
|
def is_tool?(response)
|
|
response.is_a?(DiscourseAi::Completions::ToolCall)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|