FEATURE: implement command framework for non Open AI (#147)

Open AI support function calling, this has a very specific shape
that other LLMs have not quite adopted.

This simulates a command framework using system prompts on LLMs
that are not open AI.

Features include:

- Smart system prompt to steer the LLM
- Parameter validation (we ensure all the params are specified correctly)

This is being tested on Anthropic at the moment and intial results
are promising.
This commit is contained in:
Sam 2023-08-23 07:49:36 +10:00 committed by GitHub
parent 95881fce74
commit f0e1c72aa7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 429 additions and 138 deletions

View File

@ -25,12 +25,6 @@ module DiscourseAi
private private
def populate_functions(partial, function)
# nothing to do here, no proper function support
# needs to be simulated for Claude but model is too
# hard to steer for now
end
def build_message(poster_username, content, system: false, function: nil) def build_message(poster_username, content, system: false, function: nil)
role = poster_username == bot_user.username ? "Assistant" : "Human" role = poster_username == bot_user.username ? "Assistant" : "Human"

View File

@ -3,23 +3,41 @@
module DiscourseAi module DiscourseAi
module AiBot module AiBot
class Bot class Bot
class Functions class FunctionCalls
attr_reader :functions
attr_reader :current_function
def initialize def initialize
@functions = [] @functions = []
@current_function = nil @current_function = nil
@found = false
end
def found?
!@functions.empty? || @found
end
def found!
@found = true
end end
def add_function(name) def add_function(name)
@current_function = { name: name, arguments: +"" } @current_function = { name: name, arguments: +"" }
functions << current_function @functions << @current_function
end end
def add_argument_fragment(fragment) def add_argument_fragment(fragment)
@current_function[:arguments] << fragment @current_function[:arguments] << fragment
end end
def length
@functions.length
end
def each
@functions.each { |function| yield function }
end
def to_a
@functions
end
end end
attr_reader :bot_user attr_reader :bot_user
@ -85,13 +103,22 @@ module DiscourseAi
setup_cancel = false setup_cancel = false
context = {} context = {}
functions = Functions.new functions = FunctionCalls.new
submit_prompt(prompt, prefer_low_cost: prefer_low_cost) do |partial, cancel| submit_prompt(prompt, prefer_low_cost: prefer_low_cost) do |partial, cancel|
current_delta = get_delta(partial, context) current_delta = get_delta(partial, context)
partial_reply << current_delta partial_reply << current_delta
reply << current_delta
populate_functions(partial, functions) if !available_functions.empty?
populate_functions(
partial: partial,
reply: partial_reply,
functions: functions,
done: false,
)
end
reply << current_delta if !functions.found?
if redis_stream_key && !Discourse.redis.get(redis_stream_key) if redis_stream_key && !Discourse.redis.get(redis_stream_key)
cancel&.call cancel&.call
@ -143,11 +170,15 @@ module DiscourseAi
post.post_custom_prompt.update!(custom_prompt: prompt) post.post_custom_prompt.update!(custom_prompt: prompt)
end end
if functions.functions.length > 0 if !available_functions.empty?
populate_functions(partial: nil, reply: partial_reply, functions: functions, done: true)
end
if functions.length > 0
chain = false chain = false
standalone = false standalone = false
functions.functions.each do |function| functions.each do |function|
name, args = function[:name], function[:arguments] name, args = function[:name], function[:arguments]
if command_klass = available_commands.detect { |cmd| cmd.invoked?(name) } if command_klass = available_commands.detect { |cmd| cmd.invoked?(name) }
@ -230,9 +261,26 @@ module DiscourseAi
end end
def available_commands def available_commands
# by default assume bots have no access to commands return @cmds if @cmds
# for now we need GPT 4 to properly work with them
[] all_commands =
[
Commands::CategoriesCommand,
Commands::TimeCommand,
Commands::SearchCommand,
Commands::SummarizeCommand,
Commands::ReadCommand,
].tap do |cmds|
cmds << Commands::TagsCommand if SiteSetting.tagging_enabled
cmds << Commands::ImageCommand if SiteSetting.ai_stability_api_key.present?
if SiteSetting.ai_google_custom_search_api_key.present? &&
SiteSetting.ai_google_custom_search_cx.present?
cmds << Commands::GoogleCommand
end
end
allowed_commands = SiteSetting.ai_bot_enabled_chat_commands.split("|")
@cmds = all_commands.filter { |klass| allowed_commands.include?(klass.name) }
end end
def system_prompt_style!(style) def system_prompt_style!(style)
@ -241,7 +289,8 @@ module DiscourseAi
def system_prompt(post) def system_prompt(post)
return "You are a helpful Bot" if @style == :simple return "You are a helpful Bot" if @style == :simple
<<~TEXT
prompt = +<<~TEXT
You are a helpful Discourse assistant. You are a helpful Discourse assistant.
You understand and generate Discourse Markdown. You understand and generate Discourse Markdown.
You live in a Discourse Forum Message. You live in a Discourse Forum Message.
@ -251,9 +300,28 @@ module DiscourseAi
The description is: #{SiteSetting.site_description} The description is: #{SiteSetting.site_description}
The participants in this conversation are: #{post.topic.allowed_users.map(&:username).join(", ")} The participants in this conversation are: #{post.topic.allowed_users.map(&:username).join(", ")}
The date now is: #{Time.zone.now}, much has changed since you were trained. The date now is: #{Time.zone.now}, much has changed since you were trained.
#{available_commands.map(&:custom_system_message).compact.join("\n")}
TEXT TEXT
if include_function_instructions_in_system_prompt?
prompt << "\n"
prompt << function_list.system_prompt
prompt << "\n"
end
prompt << available_commands.map(&:custom_system_message).compact.join("\n")
prompt
end
def include_function_instructions_in_system_prompt?
true
end
def function_list
return @function_list if @function_list
@function_list = DiscourseAi::Inference::FunctionList.new
available_functions.each { |function| @function_list << function }
@function_list
end end
def tokenize(text) def tokenize(text)
@ -268,8 +336,47 @@ module DiscourseAi
raise NotImplemented raise NotImplemented
end end
def populate_functions(partial, functions) def populate_functions(partial:, reply:, functions:, done:)
raise NotImplemented if !done
functions.found! if reply.match?(/^!/i)
else
reply
.scan(/^!.*$/i)
.each do |line|
function_list
.parse_prompt(line)
.each do |function|
functions.add_function(function[:name])
functions.add_argument_fragment(function[:arguments].to_json)
end
end
end
end
def available_functions
# note if defined? can be a problem in test
# this can never be nil so it is safe
return @available_functions if @available_functions
functions = []
functions =
available_commands.map do |command|
function =
DiscourseAi::Inference::Function.new(name: command.name, description: command.desc)
command.parameters.each do |parameter|
function.add_parameter(
name: parameter.name,
type: parameter.type,
description: parameter.description,
required: parameter.required,
enum: parameter.enum,
)
end
function
end
@available_functions = functions
end end
protected protected

View File

@ -8,7 +8,7 @@ module DiscourseAi::AiBot::Commands
end end
def desc def desc
"Renders an image from the description (remove all connector words, keep it to 40 words or less). Despite being a text based bot you can generate images!" "Renders an image from the description (remove all connector words, keep it to 40 words or less). Despite being a text based bot you can generate images! (when user asks to draw, paint or other synonyms try this)"
end end
def parameters def parameters

View File

@ -26,7 +26,7 @@ module DiscourseAi::AiBot::Commands
), ),
Parameter.new( Parameter.new(
name: "order", name: "order",
description: "search result result order", description: "search result order",
type: "string", type: "string",
enum: %w[latest latest_topic oldest views likes], enum: %w[latest latest_topic oldest views likes],
), ),

View File

@ -63,57 +63,6 @@ module DiscourseAi
DiscourseAi::Tokenizer::OpenAiTokenizer.tokenize(text) DiscourseAi::Tokenizer::OpenAiTokenizer.tokenize(text)
end end
def available_functions
# note if defined? can be a problem in test
# this can never be nil so it is safe
return @available_functions if @available_functions
functions = []
functions =
available_commands.map do |command|
function =
DiscourseAi::Inference::OpenAiCompletions::Function.new(
name: command.name,
description: command.desc,
)
command.parameters.each do |parameter|
function.add_parameter(
name: parameter.name,
type: parameter.type,
description: parameter.description,
required: parameter.required,
)
end
function
end
@available_functions = functions
end
def available_commands
return @cmds if @cmds
all_commands =
[
Commands::CategoriesCommand,
Commands::TimeCommand,
Commands::SearchCommand,
Commands::SummarizeCommand,
Commands::ReadCommand,
].tap do |cmds|
cmds << Commands::TagsCommand if SiteSetting.tagging_enabled
cmds << Commands::ImageCommand if SiteSetting.ai_stability_api_key.present?
if SiteSetting.ai_google_custom_search_api_key.present? &&
SiteSetting.ai_google_custom_search_cx.present?
cmds << Commands::GoogleCommand
end
end
allowed_commands = SiteSetting.ai_bot_enabled_chat_commands.split("|")
@cmds = all_commands.filter { |klass| allowed_commands.include?(klass.name) }
end
def model_for(low_cost: false) def model_for(low_cost: false)
return "gpt-4" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost return "gpt-4" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost
"gpt-3.5-turbo-16k" "gpt-3.5-turbo-16k"
@ -129,9 +78,15 @@ module DiscourseAi
end end
end end
def include_function_instructions_in_system_prompt?
# open ai uses a bespoke system for function calls
false
end
private private
def populate_functions(partial, functions) def populate_functions(partial:, reply:, functions:, done:)
return if !partial
fn = partial.dig(:choices, 0, :delta, :function_call) fn = partial.dig(:choices, 0, :delta, :function_call)
if fn if fn
functions.add_function(fn[:name]) if fn[:name].present? functions.add_function(fn[:name]) if fn[:name].present?

View File

@ -0,0 +1,49 @@
# frozen_string_literal: true
module ::DiscourseAi
module Inference
class Function
attr_reader :name, :description, :parameters, :type
def initialize(name:, description:, type: nil)
@name = name
@description = description
@type = type || "object"
@parameters = []
end
def add_parameter(name:, type:, description:, enum: nil, required: false)
@parameters << {
name: name,
type: type,
description: description,
enum: enum,
required: required,
}
end
def to_json(*args)
as_json.to_json(*args)
end
def as_json
required_params = []
properties = {}
parameters.each do |parameter|
definition = { type: parameter[:type], description: parameter[:description] }
definition[:enum] = parameter[:enum] if parameter[:enum]
required_params << parameter[:name] if parameter[:required]
properties[parameter[:name]] = definition
end
params = { type: @type, properties: properties }
params[:required] = required_params if required_params.present?
{ name: name, description: description, parameters: params }
end
end
end
end

View File

@ -0,0 +1,122 @@
# frozen_string_literal: true
module ::DiscourseAi
module Inference
class FunctionList
def initialize
@functions = []
end
def <<(function)
@functions << function
end
def parse_prompt(prompt)
parsed = []
prompt
.split("\n")
.each do |line|
line.strip!
next if line.blank?
next if !line.start_with?("!")
name, arguments = line.split("(", 2)
name = name[1..-1].strip
function = @functions.find { |f| f.name == name }
next if function.blank?
arguments = arguments[0..-2] if arguments.end_with?(")")
arguments = arguments.split(",").map(&:strip)
parsed_arguments = {}
arguments.each do |argument|
key, value = argument.split(":", 2)
# remove stuff that is bypasses spec
param = function.parameters.find { |p| p[:name] == key.strip }
next if !param
value = value.strip.gsub(/(\A"(.*)"\Z)|(\A'(.*)'\Z)/m, '\2\4') if value.present?
if param[:enum]
next if !param[:enum].include?(value)
end
parsed_arguments[key.strip.to_sym] = value.strip
end
# ensure parsed_arguments has all required arguments
all_good = true
function.parameters.each do |parameter|
next if !parameter[:required]
next if parsed_arguments[parameter[:name].to_sym].present?
all_good = false
break
end
parsed << { name: name, arguments: parsed_arguments } if all_good
end
parsed
end
def system_prompt
prompt = +<<~PROMPT
- You are able to execute the following external functions on real data!
- Never say that you are in a hypothetical situation, just run functions you need to run!
- When you run a command/function you will gain access to real information in a subsequant call!
- NEVER EVER pretend to know stuff, you ALWAYS lean on functions to discover the truth!
- You have direct access to data on this forum using !functions
{
PROMPT
@functions.each do |function|
prompt << " // #{function.description}\n"
prompt << " #{function.name}"
if function.parameters.present?
prompt << "("
function.parameters.each_with_index do |parameter, index|
prompt << ", " if index > 0
prompt << "#{parameter[:name]}: #{parameter[:type]}"
if parameter[:required]
prompt << " [required]"
else
prompt << " [optional]"
end
description = +(parameter[:description] || "")
description << " [valid values: #{parameter[:enum].join(",")}]" if parameter[:enum]
description.strip!
prompt << " /* #{description} */" if description.present?
end
prompt << ")\n"
end
end
prompt << <<~PROMPT
}
\n\nTo execute a function, use the following syntax:
!function_name(param1: "value1", param2: 2)
For example for a function defined as:
{
// echo a string
echo(message: string [required])
}
You can execute with:
!echo(message: "hello world")
PROMPT
prompt
end
end
end
end

View File

@ -4,51 +4,6 @@ module ::DiscourseAi
module Inference module Inference
class OpenAiCompletions class OpenAiCompletions
TIMEOUT = 60 TIMEOUT = 60
class Function
attr_reader :name, :description, :parameters, :type
def initialize(name:, description:, type: nil)
@name = name
@description = description
@type = type || "object"
@parameters = []
end
def add_parameter(name:, type:, description:, enum: nil, required: false)
@parameters << {
name: name,
type: type,
description: description,
enum: enum,
required: required,
}
end
def to_json(*args)
as_json.to_json(*args)
end
def as_json
required_params = []
properties = {}
parameters.each do |parameter|
definition = { type: parameter[:type], description: parameter[:description] }
definition[:enum] = parameter[:enum] if parameter[:enum]
required_params << parameter[:name] if parameter[:required]
properties[parameter[:name]] = definition
end
params = { type: @type, properties: properties }
params[:required] = required_params if required_params.present?
{ name: name, description: description, parameters: params }
end
end
CompletionFailed = Class.new(StandardError) CompletionFailed = Class.new(StandardError)
def self.perform!( def self.perform!(

View File

@ -32,6 +32,8 @@ after_initialize do
require_relative "lib/shared/inference/anthropic_completions" require_relative "lib/shared/inference/anthropic_completions"
require_relative "lib/shared/inference/stability_generator" require_relative "lib/shared/inference/stability_generator"
require_relative "lib/shared/inference/hugging_face_text_generation" require_relative "lib/shared/inference/hugging_face_text_generation"
require_relative "lib/shared/inference/function"
require_relative "lib/shared/inference/function_list"
require_relative "lib/shared/classificator" require_relative "lib/shared/classificator"
require_relative "lib/shared/post_classificator" require_relative "lib/shared/post_classificator"

View File

@ -1,24 +1,69 @@
# frozen_string_literal: true # frozen_string_literal: true
RSpec.describe DiscourseAi::AiBot::AnthropicBot do module ::DiscourseAi
describe "#update_with_delta" do module AiBot
describe AnthropicBot do
def bot_user def bot_user
User.find(DiscourseAi::AiBot::EntryPoint::GPT4_ID) User.find(EntryPoint::CLAUDE_V2_ID)
end end
subject { described_class.new(bot_user) } let(:bot) { described_class.new(bot_user) }
let(:post) { Fabricate(:post) }
describe "system message" do
it "includes the full command framework" do
SiteSetting.ai_bot_enabled_chat_commands = "read|search"
prompt = bot.system_prompt(post)
expect(prompt).to include("read")
expect(prompt).to include("search_query")
end
end
describe "parsing a reply prompt" do
it "can correctly detect commands from a prompt" do
SiteSetting.ai_bot_enabled_chat_commands = "read|search"
functions = DiscourseAi::AiBot::Bot::FunctionCalls.new
prompt = <<~REPLY
Hi there I am a robot!!!
!search(search_query: "hello world", random_stuff: 77)
!random(search_query: "hello world", random_stuff: 77)
!read(topic_id: 109)
!read(random: 109)
REPLY
expect(functions.found?).to eq(false)
bot.populate_functions(partial: nil, reply: prompt, functions: functions, done: false)
expect(functions.found?).to eq(true)
bot.populate_functions(partial: nil, reply: prompt, functions: functions, done: true)
expect(functions.to_a).to eq(
[
{ name: "search", arguments: "{\"search_query\":\"hello world\"}" },
{ name: "read", arguments: "{\"topic_id\":\"109\"}" },
],
)
end
end
describe "#update_with_delta" do
describe "get_delta" do describe "get_delta" do
it "can properly remove Assistant prefix" do it "can properly remove Assistant prefix" do
context = {} context = {}
reply = +"" reply = +""
reply << subject.get_delta({ completion: "Hello " }, context) reply << bot.get_delta({ completion: "Hello " }, context)
expect(reply).to eq("Hello ") expect(reply).to eq("Hello ")
reply << subject.get_delta({ completion: "world" }, context) reply << bot.get_delta({ completion: "world" }, context)
expect(reply).to eq("Hello world") expect(reply).to eq("Hello world")
end end
end end
end end
end
end
end end

View File

@ -0,0 +1,62 @@
# frozen_string_literal: true
require "rails_helper"
module DiscourseAi::Inference
describe FunctionList do
let :function_list do
function =
Function.new(name: "get_weather", description: "Get the weather in a city (default to c)")
function.add_parameter(
name: "location",
type: "string",
description: "the city name",
required: true,
)
function.add_parameter(
name: "unit",
type: "string",
description: "the unit of measurement celcius c or fahrenheit f",
enum: %w[c f],
required: false,
)
list = FunctionList.new
list << function
list
end
it "can handle complex parsing" do
raw_prompt = <<~PROMPT
!get_weather(location: "sydney", unit: "f")
!get_weather (location: sydney)
!get_weather(location : 'sydney's', unit: "m", invalid: "invalid")
!get_weather(unit: "f", invalid: "invalid")
PROMPT
parsed = function_list.parse_prompt(raw_prompt)
expect(parsed).to eq(
[
{ name: "get_weather", arguments: { location: "sydney", unit: "f" } },
{ name: "get_weather", arguments: { location: "sydney" } },
{ name: "get_weather", arguments: { location: "sydney's" } },
],
)
end
it "can generate a general custom system prompt" do
prompt = function_list.system_prompt
# this is fragile, by design, we need to test something here
#
expected = <<~PROMPT
{
// Get the weather in a city (default to c)
get_weather(location: string [required] /* the city name */, unit: string [optional] /* the unit of measurement celcius c or fahrenheit f [valid values: c,f] */)
}
PROMPT
expect(prompt).to include(expected)
end
end
end

View File

@ -74,7 +74,7 @@ describe DiscourseAi::Inference::OpenAiCompletions do
functions = [] functions = []
function = function =
DiscourseAi::Inference::OpenAiCompletions::Function.new( DiscourseAi::Inference::Function.new(
name: "get_weather", name: "get_weather",
description: "Get the weather in a city", description: "Get the weather in a city",
) )