mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-10-27 04:28:38 +00:00
This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M
115 lines
4.5 KiB
Ruby
115 lines
4.5 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
# A facade that abstracts multiple LLMs behind a single interface.
|
|
#
|
|
# Internally, it consists of the combination of a dialect and an endpoint.
|
|
# After recieving a prompt using our generic format, it translates it to
|
|
# the target model and routes the completion request through the correct gateway.
|
|
#
|
|
# Use the .proxy method to instantiate an object.
|
|
# It chooses the best dialect and endpoint for the model you want to interact with.
|
|
#
|
|
# Tests of modules that perform LLM calls can use .with_prepared_responses to return canned responses
|
|
# instead of relying on WebMock stubs like we did in the past.
|
|
#
|
|
module DiscourseAi
|
|
module Completions
|
|
class Llm
|
|
UNKNOWN_MODEL = Class.new(StandardError)
|
|
|
|
def self.with_prepared_responses(responses)
|
|
@canned_response = DiscourseAi::Completions::Endpoints::CannedResponse.new(responses)
|
|
|
|
yield(@canned_response).tap { @canned_response = nil }
|
|
end
|
|
|
|
def self.proxy(model_name)
|
|
dialect_klass = DiscourseAi::Completions::Dialects::Dialect.dialect_for(model_name)
|
|
|
|
return new(dialect_klass, @canned_response, model_name) if @canned_response
|
|
|
|
gateway =
|
|
DiscourseAi::Completions::Endpoints::Base.endpoint_for(model_name).new(
|
|
model_name,
|
|
dialect_klass.tokenizer,
|
|
)
|
|
|
|
new(dialect_klass, gateway, model_name)
|
|
end
|
|
|
|
def initialize(dialect_klass, gateway, model_name)
|
|
@dialect_klass = dialect_klass
|
|
@gateway = gateway
|
|
@model_name = model_name
|
|
end
|
|
|
|
delegate :tokenizer, to: :dialect_klass
|
|
|
|
# @param generic_prompt { Hash } - Prompt using our generic format.
|
|
# We use the following keys from the hash:
|
|
# - insts: String with instructions for the LLM.
|
|
# - input: String containing user input
|
|
# - examples (optional): Array of arrays with examples of input and responses. Each array is a input/response pair like [[example1, response1], [example2, response2]].
|
|
# - post_insts (optional): Additional instructions for the LLM. Some dialects like Claude add these at the end of the prompt.
|
|
# - conversation_context (optional): Array of hashes to provide context about an ongoing conversation with the model.
|
|
# We translate the array in reverse order, meaning the first element would be the most recent message in the conversation.
|
|
# Example:
|
|
#
|
|
# [
|
|
# { type: "user", name: "user1", content: "This is a new message by a user" },
|
|
# { type: "assistant", content: "I'm a previous bot reply, that's why there's no user" },
|
|
# { type: "tool", name: "tool_id", content: "I'm a tool result" },
|
|
# ]
|
|
#
|
|
# - tools (optional - only functions supported): Array of functions a model can call. Each function is defined as a hash. Example:
|
|
#
|
|
# {
|
|
# name: "get_weather",
|
|
# description: "Get the weather in a city",
|
|
# parameters: [
|
|
# { name: "location", type: "string", description: "the city name", required: true },
|
|
# {
|
|
# name: "unit",
|
|
# type: "string",
|
|
# description: "the unit of measurement celcius c or fahrenheit f",
|
|
# enum: %w[c f],
|
|
# required: true,
|
|
# },
|
|
# ],
|
|
# }
|
|
#
|
|
# @param user { User } - User requesting the summary.
|
|
#
|
|
# @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
|
|
#
|
|
# @returns { String } - Completion result.
|
|
#
|
|
# When the model invokes a tool, we'll wait until the endpoint finishes replying and feed you a fully-formed tool,
|
|
# even if you passed a partial_read_blk block. Invocations are strings that look like this:
|
|
#
|
|
# <function_calls>
|
|
# <invoke>
|
|
# <tool_name>get_weather</tool_name>
|
|
# <tool_id>get_weather</tool_id>
|
|
# <parameters>
|
|
# <location>Sydney</location>
|
|
# <unit>c</unit>
|
|
# </parameters>
|
|
# </invoke>
|
|
# </function_calls>
|
|
#
|
|
def completion!(generic_prompt, user, &partial_read_blk)
|
|
model_params = generic_prompt.dig(:params, model_name) || {}
|
|
|
|
dialect = dialect_klass.new(generic_prompt, model_name, opts: model_params)
|
|
|
|
gateway.perform_completion!(dialect, user, model_params, &partial_read_blk)
|
|
end
|
|
|
|
private
|
|
|
|
attr_reader :dialect_klass, :gateway, :model_name
|
|
end
|
|
end
|
|
end
|