discourse-ai/lib/completions/endpoints/canned_response.rb
Sam e817b7dc11
FEATURE: improve tool support (#904)
This re-implements tool support in DiscourseAi::Completions::Llm #generate

Previously tool support was always returned via XML and it would be the responsibility of the caller to parse XML

New implementation has the endpoints return ToolCall objects.

Additionally this simplifies the Llm endpoint interface and gives it more clarity. Llms must implement

decode, decode_chunk (for streaming)

It is the implementers responsibility to figure out how to decode chunks, base no longer implements. To make this easy we ship a flexible json decoder which is easy to wire up.

Also (new)

    Better debugging for PMs, we now have a next / previous button to see all the Llm messages associated with a PM
    Token accounting is fixed for vllm (we were not correctly counting tokens)
2024-11-12 08:14:30 +11:00

78 lines
2.0 KiB
Ruby

# frozen_string_literal: true
module DiscourseAi
module Completions
module Endpoints
class CannedResponse
CANNED_RESPONSE_ERROR = Class.new(StandardError)
def initialize(responses)
@responses = responses
@completions = 0
@dialect = nil
end
def normalize_model_params(model_params)
# max_tokens, temperature, stop_sequences are already supported
model_params
end
attr_reader :responses, :completions, :dialect
def prompt_messages
dialect.prompt.messages
end
def perform_completion!(
dialect,
_user,
_model_params,
feature_name: nil,
feature_context: nil
)
@dialect = dialect
response = responses[completions]
if response.nil?
raise CANNED_RESPONSE_ERROR,
"The number of completions you requested exceed the number of canned responses"
end
raise response if response.is_a?(StandardError)
@completions += 1
if block_given?
cancelled = false
cancel_fn = lambda { cancelled = true }
# We buffer and return tool invocations in one go.
as_array = response.is_a?(Array) ? response : [response]
as_array.each do |response|
if is_tool?(response)
yield(response, cancel_fn)
else
response.each_char do |char|
break if cancelled
yield(char, cancel_fn)
end
end
end
end
response = response.first if response.is_a?(Array) && response.length == 1
response
end
def tokenizer
DiscourseAi::Tokenizer::OpenAiTokenizer
end
private
def is_tool?(response)
response.is_a?(DiscourseAi::Completions::ToolCall)
end
end
end
end
end