From f0e1c72aa7f62c970a3576332dc45fcf4922d21c Mon Sep 17 00:00:00 2001 From: Sam Date: Wed, 23 Aug 2023 07:49:36 +1000 Subject: [PATCH] FEATURE: implement command framework for non Open AI (#147) Open AI support function calling, this has a very specific shape that other LLMs have not quite adopted. This simulates a command framework using system prompts on LLMs that are not open AI. Features include: - Smart system prompt to steer the LLM - Parameter validation (we ensure all the params are specified correctly) This is being tested on Anthropic at the moment and intial results are promising. --- lib/modules/ai_bot/anthropic_bot.rb | 6 - lib/modules/ai_bot/bot.rb | 143 +++++++++++++++--- lib/modules/ai_bot/commands/image_command.rb | 2 +- lib/modules/ai_bot/commands/search_command.rb | 2 +- lib/modules/ai_bot/open_ai_bot.rb | 59 +------- lib/shared/inference/function.rb | 49 ++++++ lib/shared/inference/function_list.rb | 122 +++++++++++++++ lib/shared/inference/openai_completions.rb | 45 ------ plugin.rb | 2 + spec/lib/modules/ai_bot/anthropic_bot_spec.rb | 73 +++++++-- spec/shared/inference/function_list_spec.rb | 62 ++++++++ .../inference/openai_completions_spec.rb | 2 +- 12 files changed, 429 insertions(+), 138 deletions(-) create mode 100644 lib/shared/inference/function.rb create mode 100644 lib/shared/inference/function_list.rb create mode 100644 spec/shared/inference/function_list_spec.rb diff --git a/lib/modules/ai_bot/anthropic_bot.rb b/lib/modules/ai_bot/anthropic_bot.rb index f388e87d..799b0932 100644 --- a/lib/modules/ai_bot/anthropic_bot.rb +++ b/lib/modules/ai_bot/anthropic_bot.rb @@ -25,12 +25,6 @@ module DiscourseAi private - def populate_functions(partial, function) - # nothing to do here, no proper function support - # needs to be simulated for Claude but model is too - # hard to steer for now - end - def build_message(poster_username, content, system: false, function: nil) role = poster_username == bot_user.username ? "Assistant" : "Human" diff --git a/lib/modules/ai_bot/bot.rb b/lib/modules/ai_bot/bot.rb index 1e35d43b..36b40350 100644 --- a/lib/modules/ai_bot/bot.rb +++ b/lib/modules/ai_bot/bot.rb @@ -3,23 +3,41 @@ module DiscourseAi module AiBot class Bot - class Functions - attr_reader :functions - attr_reader :current_function - + class FunctionCalls def initialize @functions = [] @current_function = nil + @found = false + end + + def found? + !@functions.empty? || @found + end + + def found! + @found = true end def add_function(name) @current_function = { name: name, arguments: +"" } - functions << current_function + @functions << @current_function end def add_argument_fragment(fragment) @current_function[:arguments] << fragment end + + def length + @functions.length + end + + def each + @functions.each { |function| yield function } + end + + def to_a + @functions + end end attr_reader :bot_user @@ -85,13 +103,22 @@ module DiscourseAi setup_cancel = false context = {} - functions = Functions.new + functions = FunctionCalls.new submit_prompt(prompt, prefer_low_cost: prefer_low_cost) do |partial, cancel| current_delta = get_delta(partial, context) partial_reply << current_delta - reply << current_delta - populate_functions(partial, functions) + + if !available_functions.empty? + populate_functions( + partial: partial, + reply: partial_reply, + functions: functions, + done: false, + ) + end + + reply << current_delta if !functions.found? if redis_stream_key && !Discourse.redis.get(redis_stream_key) cancel&.call @@ -143,11 +170,15 @@ module DiscourseAi post.post_custom_prompt.update!(custom_prompt: prompt) end - if functions.functions.length > 0 + if !available_functions.empty? + populate_functions(partial: nil, reply: partial_reply, functions: functions, done: true) + end + + if functions.length > 0 chain = false standalone = false - functions.functions.each do |function| + functions.each do |function| name, args = function[:name], function[:arguments] if command_klass = available_commands.detect { |cmd| cmd.invoked?(name) } @@ -230,9 +261,26 @@ module DiscourseAi end def available_commands - # by default assume bots have no access to commands - # for now we need GPT 4 to properly work with them - [] + return @cmds if @cmds + + all_commands = + [ + Commands::CategoriesCommand, + Commands::TimeCommand, + Commands::SearchCommand, + Commands::SummarizeCommand, + Commands::ReadCommand, + ].tap do |cmds| + cmds << Commands::TagsCommand if SiteSetting.tagging_enabled + cmds << Commands::ImageCommand if SiteSetting.ai_stability_api_key.present? + if SiteSetting.ai_google_custom_search_api_key.present? && + SiteSetting.ai_google_custom_search_cx.present? + cmds << Commands::GoogleCommand + end + end + + allowed_commands = SiteSetting.ai_bot_enabled_chat_commands.split("|") + @cmds = all_commands.filter { |klass| allowed_commands.include?(klass.name) } end def system_prompt_style!(style) @@ -241,7 +289,8 @@ module DiscourseAi def system_prompt(post) return "You are a helpful Bot" if @style == :simple - <<~TEXT + + prompt = +<<~TEXT You are a helpful Discourse assistant. You understand and generate Discourse Markdown. You live in a Discourse Forum Message. @@ -251,9 +300,28 @@ module DiscourseAi The description is: #{SiteSetting.site_description} The participants in this conversation are: #{post.topic.allowed_users.map(&:username).join(", ")} The date now is: #{Time.zone.now}, much has changed since you were trained. - - #{available_commands.map(&:custom_system_message).compact.join("\n")} TEXT + + if include_function_instructions_in_system_prompt? + prompt << "\n" + prompt << function_list.system_prompt + prompt << "\n" + end + + prompt << available_commands.map(&:custom_system_message).compact.join("\n") + prompt + end + + def include_function_instructions_in_system_prompt? + true + end + + def function_list + return @function_list if @function_list + + @function_list = DiscourseAi::Inference::FunctionList.new + available_functions.each { |function| @function_list << function } + @function_list end def tokenize(text) @@ -268,8 +336,47 @@ module DiscourseAi raise NotImplemented end - def populate_functions(partial, functions) - raise NotImplemented + def populate_functions(partial:, reply:, functions:, done:) + if !done + functions.found! if reply.match?(/^!/i) + else + reply + .scan(/^!.*$/i) + .each do |line| + function_list + .parse_prompt(line) + .each do |function| + functions.add_function(function[:name]) + functions.add_argument_fragment(function[:arguments].to_json) + end + end + end + end + + def available_functions + # note if defined? can be a problem in test + # this can never be nil so it is safe + return @available_functions if @available_functions + + functions = [] + + functions = + available_commands.map do |command| + function = + DiscourseAi::Inference::Function.new(name: command.name, description: command.desc) + command.parameters.each do |parameter| + function.add_parameter( + name: parameter.name, + type: parameter.type, + description: parameter.description, + required: parameter.required, + enum: parameter.enum, + ) + end + function + end + + @available_functions = functions end protected diff --git a/lib/modules/ai_bot/commands/image_command.rb b/lib/modules/ai_bot/commands/image_command.rb index 4eb82934..044321a6 100644 --- a/lib/modules/ai_bot/commands/image_command.rb +++ b/lib/modules/ai_bot/commands/image_command.rb @@ -8,7 +8,7 @@ module DiscourseAi::AiBot::Commands end def desc - "Renders an image from the description (remove all connector words, keep it to 40 words or less). Despite being a text based bot you can generate images!" + "Renders an image from the description (remove all connector words, keep it to 40 words or less). Despite being a text based bot you can generate images! (when user asks to draw, paint or other synonyms try this)" end def parameters diff --git a/lib/modules/ai_bot/commands/search_command.rb b/lib/modules/ai_bot/commands/search_command.rb index dd7b794c..330a7d97 100644 --- a/lib/modules/ai_bot/commands/search_command.rb +++ b/lib/modules/ai_bot/commands/search_command.rb @@ -26,7 +26,7 @@ module DiscourseAi::AiBot::Commands ), Parameter.new( name: "order", - description: "search result result order", + description: "search result order", type: "string", enum: %w[latest latest_topic oldest views likes], ), diff --git a/lib/modules/ai_bot/open_ai_bot.rb b/lib/modules/ai_bot/open_ai_bot.rb index 47772874..2d0025b1 100644 --- a/lib/modules/ai_bot/open_ai_bot.rb +++ b/lib/modules/ai_bot/open_ai_bot.rb @@ -63,57 +63,6 @@ module DiscourseAi DiscourseAi::Tokenizer::OpenAiTokenizer.tokenize(text) end - def available_functions - # note if defined? can be a problem in test - # this can never be nil so it is safe - return @available_functions if @available_functions - - functions = [] - - functions = - available_commands.map do |command| - function = - DiscourseAi::Inference::OpenAiCompletions::Function.new( - name: command.name, - description: command.desc, - ) - command.parameters.each do |parameter| - function.add_parameter( - name: parameter.name, - type: parameter.type, - description: parameter.description, - required: parameter.required, - ) - end - function - end - - @available_functions = functions - end - - def available_commands - return @cmds if @cmds - - all_commands = - [ - Commands::CategoriesCommand, - Commands::TimeCommand, - Commands::SearchCommand, - Commands::SummarizeCommand, - Commands::ReadCommand, - ].tap do |cmds| - cmds << Commands::TagsCommand if SiteSetting.tagging_enabled - cmds << Commands::ImageCommand if SiteSetting.ai_stability_api_key.present? - if SiteSetting.ai_google_custom_search_api_key.present? && - SiteSetting.ai_google_custom_search_cx.present? - cmds << Commands::GoogleCommand - end - end - - allowed_commands = SiteSetting.ai_bot_enabled_chat_commands.split("|") - @cmds = all_commands.filter { |klass| allowed_commands.include?(klass.name) } - end - def model_for(low_cost: false) return "gpt-4" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost "gpt-3.5-turbo-16k" @@ -129,9 +78,15 @@ module DiscourseAi end end + def include_function_instructions_in_system_prompt? + # open ai uses a bespoke system for function calls + false + end + private - def populate_functions(partial, functions) + def populate_functions(partial:, reply:, functions:, done:) + return if !partial fn = partial.dig(:choices, 0, :delta, :function_call) if fn functions.add_function(fn[:name]) if fn[:name].present? diff --git a/lib/shared/inference/function.rb b/lib/shared/inference/function.rb new file mode 100644 index 00000000..616229a3 --- /dev/null +++ b/lib/shared/inference/function.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +module ::DiscourseAi + module Inference + class Function + attr_reader :name, :description, :parameters, :type + + def initialize(name:, description:, type: nil) + @name = name + @description = description + @type = type || "object" + @parameters = [] + end + + def add_parameter(name:, type:, description:, enum: nil, required: false) + @parameters << { + name: name, + type: type, + description: description, + enum: enum, + required: required, + } + end + + def to_json(*args) + as_json.to_json(*args) + end + + def as_json + required_params = [] + + properties = {} + parameters.each do |parameter| + definition = { type: parameter[:type], description: parameter[:description] } + definition[:enum] = parameter[:enum] if parameter[:enum] + + required_params << parameter[:name] if parameter[:required] + properties[parameter[:name]] = definition + end + + params = { type: @type, properties: properties } + + params[:required] = required_params if required_params.present? + + { name: name, description: description, parameters: params } + end + end + end +end diff --git a/lib/shared/inference/function_list.rb b/lib/shared/inference/function_list.rb new file mode 100644 index 00000000..f1aa3fe0 --- /dev/null +++ b/lib/shared/inference/function_list.rb @@ -0,0 +1,122 @@ +# frozen_string_literal: true + +module ::DiscourseAi + module Inference + class FunctionList + def initialize + @functions = [] + end + + def <<(function) + @functions << function + end + + def parse_prompt(prompt) + parsed = [] + + prompt + .split("\n") + .each do |line| + line.strip! + next if line.blank? + next if !line.start_with?("!") + + name, arguments = line.split("(", 2) + name = name[1..-1].strip + + function = @functions.find { |f| f.name == name } + next if function.blank? + + arguments = arguments[0..-2] if arguments.end_with?(")") + arguments = arguments.split(",").map(&:strip) + + parsed_arguments = {} + arguments.each do |argument| + key, value = argument.split(":", 2) + # remove stuff that is bypasses spec + param = function.parameters.find { |p| p[:name] == key.strip } + next if !param + + value = value.strip.gsub(/(\A"(.*)"\Z)|(\A'(.*)'\Z)/m, '\2\4') if value.present? + + if param[:enum] + next if !param[:enum].include?(value) + end + + parsed_arguments[key.strip.to_sym] = value.strip + end + + # ensure parsed_arguments has all required arguments + all_good = true + function.parameters.each do |parameter| + next if !parameter[:required] + next if parsed_arguments[parameter[:name].to_sym].present? + + all_good = false + break + end + + parsed << { name: name, arguments: parsed_arguments } if all_good + end + + parsed + end + + def system_prompt + prompt = +<<~PROMPT + - You are able to execute the following external functions on real data! + - Never say that you are in a hypothetical situation, just run functions you need to run! + - When you run a command/function you will gain access to real information in a subsequant call! + - NEVER EVER pretend to know stuff, you ALWAYS lean on functions to discover the truth! + - You have direct access to data on this forum using !functions + + { + PROMPT + + @functions.each do |function| + prompt << " // #{function.description}\n" + prompt << " #{function.name}" + if function.parameters.present? + prompt << "(" + function.parameters.each_with_index do |parameter, index| + prompt << ", " if index > 0 + prompt << "#{parameter[:name]}: #{parameter[:type]}" + if parameter[:required] + prompt << " [required]" + else + prompt << " [optional]" + end + + description = +(parameter[:description] || "") + description << " [valid values: #{parameter[:enum].join(",")}]" if parameter[:enum] + + description.strip! + + prompt << " /* #{description} */" if description.present? + end + prompt << ")\n" + end + end + + prompt << <<~PROMPT + } + \n\nTo execute a function, use the following syntax: + + !function_name(param1: "value1", param2: 2) + + For example for a function defined as: + + { + // echo a string + echo(message: string [required]) + } + + You can execute with: + !echo(message: "hello world") + PROMPT + + prompt + end + end + end +end diff --git a/lib/shared/inference/openai_completions.rb b/lib/shared/inference/openai_completions.rb index 1381c6b8..5a445c30 100644 --- a/lib/shared/inference/openai_completions.rb +++ b/lib/shared/inference/openai_completions.rb @@ -4,51 +4,6 @@ module ::DiscourseAi module Inference class OpenAiCompletions TIMEOUT = 60 - - class Function - attr_reader :name, :description, :parameters, :type - - def initialize(name:, description:, type: nil) - @name = name - @description = description - @type = type || "object" - @parameters = [] - end - - def add_parameter(name:, type:, description:, enum: nil, required: false) - @parameters << { - name: name, - type: type, - description: description, - enum: enum, - required: required, - } - end - - def to_json(*args) - as_json.to_json(*args) - end - - def as_json - required_params = [] - - properties = {} - parameters.each do |parameter| - definition = { type: parameter[:type], description: parameter[:description] } - definition[:enum] = parameter[:enum] if parameter[:enum] - - required_params << parameter[:name] if parameter[:required] - properties[parameter[:name]] = definition - end - - params = { type: @type, properties: properties } - - params[:required] = required_params if required_params.present? - - { name: name, description: description, parameters: params } - end - end - CompletionFailed = Class.new(StandardError) def self.perform!( diff --git a/plugin.rb b/plugin.rb index fbee3e0e..cf01059c 100644 --- a/plugin.rb +++ b/plugin.rb @@ -32,6 +32,8 @@ after_initialize do require_relative "lib/shared/inference/anthropic_completions" require_relative "lib/shared/inference/stability_generator" require_relative "lib/shared/inference/hugging_face_text_generation" + require_relative "lib/shared/inference/function" + require_relative "lib/shared/inference/function_list" require_relative "lib/shared/classificator" require_relative "lib/shared/post_classificator" diff --git a/spec/lib/modules/ai_bot/anthropic_bot_spec.rb b/spec/lib/modules/ai_bot/anthropic_bot_spec.rb index 90086ae4..718fd59e 100644 --- a/spec/lib/modules/ai_bot/anthropic_bot_spec.rb +++ b/spec/lib/modules/ai_bot/anthropic_bot_spec.rb @@ -1,23 +1,68 @@ # frozen_string_literal: true -RSpec.describe DiscourseAi::AiBot::AnthropicBot do - describe "#update_with_delta" do - def bot_user - User.find(DiscourseAi::AiBot::EntryPoint::GPT4_ID) - end +module ::DiscourseAi + module AiBot + describe AnthropicBot do + def bot_user + User.find(EntryPoint::CLAUDE_V2_ID) + end - subject { described_class.new(bot_user) } + let(:bot) { described_class.new(bot_user) } + let(:post) { Fabricate(:post) } - describe "get_delta" do - it "can properly remove Assistant prefix" do - context = {} - reply = +"" + describe "system message" do + it "includes the full command framework" do + SiteSetting.ai_bot_enabled_chat_commands = "read|search" + prompt = bot.system_prompt(post) - reply << subject.get_delta({ completion: "Hello " }, context) - expect(reply).to eq("Hello ") + expect(prompt).to include("read") + expect(prompt).to include("search_query") + end + end - reply << subject.get_delta({ completion: "world" }, context) - expect(reply).to eq("Hello world") + describe "parsing a reply prompt" do + it "can correctly detect commands from a prompt" do + SiteSetting.ai_bot_enabled_chat_commands = "read|search" + functions = DiscourseAi::AiBot::Bot::FunctionCalls.new + + prompt = <<~REPLY + Hi there I am a robot!!! + + !search(search_query: "hello world", random_stuff: 77) + !random(search_query: "hello world", random_stuff: 77) + !read(topic_id: 109) + !read(random: 109) + REPLY + + expect(functions.found?).to eq(false) + + bot.populate_functions(partial: nil, reply: prompt, functions: functions, done: false) + expect(functions.found?).to eq(true) + + bot.populate_functions(partial: nil, reply: prompt, functions: functions, done: true) + + expect(functions.to_a).to eq( + [ + { name: "search", arguments: "{\"search_query\":\"hello world\"}" }, + { name: "read", arguments: "{\"topic_id\":\"109\"}" }, + ], + ) + end + end + + describe "#update_with_delta" do + describe "get_delta" do + it "can properly remove Assistant prefix" do + context = {} + reply = +"" + + reply << bot.get_delta({ completion: "Hello " }, context) + expect(reply).to eq("Hello ") + + reply << bot.get_delta({ completion: "world" }, context) + expect(reply).to eq("Hello world") + end + end end end end diff --git a/spec/shared/inference/function_list_spec.rb b/spec/shared/inference/function_list_spec.rb new file mode 100644 index 00000000..81d0d89a --- /dev/null +++ b/spec/shared/inference/function_list_spec.rb @@ -0,0 +1,62 @@ +# frozen_string_literal: true +require "rails_helper" + +module DiscourseAi::Inference + describe FunctionList do + let :function_list do + function = + Function.new(name: "get_weather", description: "Get the weather in a city (default to c)") + + function.add_parameter( + name: "location", + type: "string", + description: "the city name", + required: true, + ) + + function.add_parameter( + name: "unit", + type: "string", + description: "the unit of measurement celcius c or fahrenheit f", + enum: %w[c f], + required: false, + ) + + list = FunctionList.new + list << function + list + end + + it "can handle complex parsing" do + raw_prompt = <<~PROMPT + !get_weather(location: "sydney", unit: "f") + !get_weather (location: sydney) + !get_weather(location : 'sydney's', unit: "m", invalid: "invalid") + !get_weather(unit: "f", invalid: "invalid") + PROMPT + parsed = function_list.parse_prompt(raw_prompt) + + expect(parsed).to eq( + [ + { name: "get_weather", arguments: { location: "sydney", unit: "f" } }, + { name: "get_weather", arguments: { location: "sydney" } }, + { name: "get_weather", arguments: { location: "sydney's" } }, + ], + ) + end + + it "can generate a general custom system prompt" do + prompt = function_list.system_prompt + + # this is fragile, by design, we need to test something here + # + expected = <<~PROMPT + { + // Get the weather in a city (default to c) + get_weather(location: string [required] /* the city name */, unit: string [optional] /* the unit of measurement celcius c or fahrenheit f [valid values: c,f] */) + } + PROMPT + expect(prompt).to include(expected) + end + end +end diff --git a/spec/shared/inference/openai_completions_spec.rb b/spec/shared/inference/openai_completions_spec.rb index 5c9d8504..ed4d8908 100644 --- a/spec/shared/inference/openai_completions_spec.rb +++ b/spec/shared/inference/openai_completions_spec.rb @@ -74,7 +74,7 @@ describe DiscourseAi::Inference::OpenAiCompletions do functions = [] function = - DiscourseAi::Inference::OpenAiCompletions::Function.new( + DiscourseAi::Inference::Function.new( name: "get_weather", description: "Get the weather in a city", )