FEATURE: implement command framework for non Open AI (#147)

Open AI support function calling, this has a very specific shape that other LLMs have not quite adopted. This simulates a command framework using system prompts on LLMs that are not open AI. Features include: - Smart system prompt to steer the LLM - Parameter validation (we ensure all the params are specified correctly) This is being tested on Anthropic at the moment and intial results are promising.
2023-08-23 07:49:36 +10:00 · 2023-08-23 07:49:36 +10:00 · f0e1c72aa7
parent 95881fce74
commit f0e1c72aa7
12 changed files with 429 additions and 138 deletions
--- a/lib/modules/ai_bot/anthropic_bot.rb
+++ b/lib/modules/ai_bot/anthropic_bot.rb
@ -25,12 +25,6 @@ module DiscourseAi

      private

-      def populate_functions(partial, function)
-        # nothing to do here, no proper function support
-        # needs to be simulated for Claude but model is too
-        # hard to steer for now
-      end
-
      def build_message(poster_username, content, system: false, function: nil)
        role = poster_username == bot_user.username ? "Assistant" : "Human"

--- a/lib/modules/ai_bot/bot.rb
+++ b/lib/modules/ai_bot/bot.rb
@ -3,23 +3,41 @@
 module DiscourseAi
  module AiBot
    class Bot
-      class Functions
-        attr_reader :functions
-        attr_reader :current_function
-
+      class FunctionCalls
        def initialize
          @functions = []
          @current_function = nil
+          @found = false
+        end
+
+        def found?
+          !@functions.empty? || @found
+        end
+
+        def found!
+          @found = true
        end

        def add_function(name)
          @current_function = { name: name, arguments: +"" }
-          functions << current_function
+          @functions << @current_function
        end

        def add_argument_fragment(fragment)
          @current_function[:arguments] << fragment
        end
+
+        def length
+          @functions.length
+        end
+
+        def each
+          @functions.each { |function| yield function }
+        end
+
+        def to_a
+          @functions
+        end
      end

      attr_reader :bot_user
@ -85,13 +103,22 @@ module DiscourseAi

        setup_cancel = false
        context = {}
-        functions = Functions.new
+        functions = FunctionCalls.new

        submit_prompt(prompt, prefer_low_cost: prefer_low_cost) do |partial, cancel|
          current_delta = get_delta(partial, context)
          partial_reply << current_delta
-          reply << current_delta
-          populate_functions(partial, functions)
+
+          if !available_functions.empty?
+            populate_functions(
+              partial: partial,
+              reply: partial_reply,
+              functions: functions,
+              done: false,
+            )
+          end
+
+          reply << current_delta if !functions.found?

          if redis_stream_key && !Discourse.redis.get(redis_stream_key)
            cancel&.call
@ -143,11 +170,15 @@ module DiscourseAi
          post.post_custom_prompt.update!(custom_prompt: prompt)
        end

-        if functions.functions.length > 0
+        if !available_functions.empty?
+          populate_functions(partial: nil, reply: partial_reply, functions: functions, done: true)
+        end
+
+        if functions.length > 0
          chain = false
          standalone = false

-          functions.functions.each do |function|
+          functions.each do |function|
            name, args = function[:name], function[:arguments]

            if command_klass = available_commands.detect { |cmd| cmd.invoked?(name) }
@ -230,9 +261,26 @@ module DiscourseAi
      end

      def available_commands
-        # by default assume bots have no access to commands
-        # for now we need GPT 4 to properly work with them
-        []
+        return @cmds if @cmds
+
+        all_commands =
+          [
+            Commands::CategoriesCommand,
+            Commands::TimeCommand,
+            Commands::SearchCommand,
+            Commands::SummarizeCommand,
+            Commands::ReadCommand,
+          ].tap do |cmds|
+            cmds << Commands::TagsCommand if SiteSetting.tagging_enabled
+            cmds << Commands::ImageCommand if SiteSetting.ai_stability_api_key.present?
+            if SiteSetting.ai_google_custom_search_api_key.present? &&
+                 SiteSetting.ai_google_custom_search_cx.present?
+              cmds << Commands::GoogleCommand
+            end
+          end
+
+        allowed_commands = SiteSetting.ai_bot_enabled_chat_commands.split("|")
+        @cmds = all_commands.filter { |klass| allowed_commands.include?(klass.name) }
      end

      def system_prompt_style!(style)
@ -241,7 +289,8 @@ module DiscourseAi

      def system_prompt(post)
        return "You are a helpful Bot" if @style == :simple
-        <<~TEXT
+
+        prompt = +<<~TEXT
          You are a helpful Discourse assistant.
          You understand and generate Discourse Markdown.
          You live in a Discourse Forum Message.
@ -251,9 +300,28 @@ module DiscourseAi
          The description is: #{SiteSetting.site_description}
          The participants in this conversation are: #{post.topic.allowed_users.map(&:username).join(", ")}
          The date now is: #{Time.zone.now}, much has changed since you were trained.
-
-          #{available_commands.map(&:custom_system_message).compact.join("\n")}
        TEXT
+
+        if include_function_instructions_in_system_prompt?
+          prompt << "\n"
+          prompt << function_list.system_prompt
+          prompt << "\n"
+        end
+
+        prompt << available_commands.map(&:custom_system_message).compact.join("\n")
+        prompt
+      end
+
+      def include_function_instructions_in_system_prompt?
+        true
+      end
+
+      def function_list
+        return @function_list if @function_list
+
+        @function_list = DiscourseAi::Inference::FunctionList.new
+        available_functions.each { |function| @function_list << function }
+        @function_list
      end

      def tokenize(text)
@ -268,8 +336,47 @@ module DiscourseAi
        raise NotImplemented
      end

-      def populate_functions(partial, functions)
-        raise NotImplemented
+      def populate_functions(partial:, reply:, functions:, done:)
+        if !done
+          functions.found! if reply.match?(/^!/i)
+        else
+          reply
+            .scan(/^!.*$/i)
+            .each do |line|
+              function_list
+                .parse_prompt(line)
+                .each do |function|
+                  functions.add_function(function[:name])
+                  functions.add_argument_fragment(function[:arguments].to_json)
+                end
+            end
+        end
+      end
+
+      def available_functions
+        # note if defined? can be a problem in test
+        # this can never be nil so it is safe
+        return @available_functions if @available_functions
+
+        functions = []
+
+        functions =
+          available_commands.map do |command|
+            function =
+              DiscourseAi::Inference::Function.new(name: command.name, description: command.desc)
+            command.parameters.each do |parameter|
+              function.add_parameter(
+                name: parameter.name,
+                type: parameter.type,
+                description: parameter.description,
+                required: parameter.required,
+                enum: parameter.enum,
+              )
+            end
+            function
+          end
+
+        @available_functions = functions
      end

      protected
--- a/lib/modules/ai_bot/commands/image_command.rb
+++ b/lib/modules/ai_bot/commands/image_command.rb
@ -8,7 +8,7 @@ module DiscourseAi::AiBot::Commands
      end

      def desc
-        "Renders an image from the description (remove all connector words, keep it to 40 words or less). Despite being a text based bot you can generate images!"
+        "Renders an image from the description (remove all connector words, keep it to 40 words or less). Despite being a text based bot you can generate images! (when user asks to draw, paint or other synonyms try this)"
      end

      def parameters
--- a/lib/modules/ai_bot/commands/search_command.rb
+++ b/lib/modules/ai_bot/commands/search_command.rb
@ -26,7 +26,7 @@ module DiscourseAi::AiBot::Commands
          ),
          Parameter.new(
            name: "order",
-            description: "search result result order",
+            description: "search result order",
            type: "string",
            enum: %w[latest latest_topic oldest views likes],
          ),
--- a/lib/modules/ai_bot/open_ai_bot.rb
+++ b/lib/modules/ai_bot/open_ai_bot.rb
@ -63,57 +63,6 @@ module DiscourseAi
        DiscourseAi::Tokenizer::OpenAiTokenizer.tokenize(text)
      end

-      def available_functions
-        # note if defined? can be a problem in test
-        # this can never be nil so it is safe
-        return @available_functions if @available_functions
-
-        functions = []
-
-        functions =
-          available_commands.map do |command|
-            function =
-              DiscourseAi::Inference::OpenAiCompletions::Function.new(
-                name: command.name,
-                description: command.desc,
-              )
-            command.parameters.each do |parameter|
-              function.add_parameter(
-                name: parameter.name,
-                type: parameter.type,
-                description: parameter.description,
-                required: parameter.required,
-              )
-            end
-            function
-          end
-
-        @available_functions = functions
-      end
-
-      def available_commands
-        return @cmds if @cmds
-
-        all_commands =
-          [
-            Commands::CategoriesCommand,
-            Commands::TimeCommand,
-            Commands::SearchCommand,
-            Commands::SummarizeCommand,
-            Commands::ReadCommand,
-          ].tap do |cmds|
-            cmds << Commands::TagsCommand if SiteSetting.tagging_enabled
-            cmds << Commands::ImageCommand if SiteSetting.ai_stability_api_key.present?
-            if SiteSetting.ai_google_custom_search_api_key.present? &&
-                 SiteSetting.ai_google_custom_search_cx.present?
-              cmds << Commands::GoogleCommand
-            end
-          end
-
-        allowed_commands = SiteSetting.ai_bot_enabled_chat_commands.split("|")
-        @cmds = all_commands.filter { |klass| allowed_commands.include?(klass.name) }
-      end
-
      def model_for(low_cost: false)
        return "gpt-4" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost
        "gpt-3.5-turbo-16k"
@ -129,9 +78,15 @@ module DiscourseAi
        end
      end

+      def include_function_instructions_in_system_prompt?
+        # open ai uses a bespoke system for function calls
+        false
+      end
+
      private

-      def populate_functions(partial, functions)
+      def populate_functions(partial:, reply:, functions:, done:)
+        return if !partial
        fn = partial.dig(:choices, 0, :delta, :function_call)
        if fn
          functions.add_function(fn[:name]) if fn[:name].present?
--- a/lib/shared/inference/function.rb
+++ b/lib/shared/inference/function.rb
@ -0,0 +1,49 @@
+# frozen_string_literal: true
+
+module ::DiscourseAi
+  module Inference
+    class Function
+      attr_reader :name, :description, :parameters, :type
+
+      def initialize(name:, description:, type: nil)
+        @name = name
+        @description = description
+        @type = type || "object"
+        @parameters = []
+      end
+
+      def add_parameter(name:, type:, description:, enum: nil, required: false)
+        @parameters << {
+          name: name,
+          type: type,
+          description: description,
+          enum: enum,
+          required: required,
+        }
+      end
+
+      def to_json(*args)
+        as_json.to_json(*args)
+      end
+
+      def as_json
+        required_params = []
+
+        properties = {}
+        parameters.each do |parameter|
+          definition = { type: parameter[:type], description: parameter[:description] }
+          definition[:enum] = parameter[:enum] if parameter[:enum]
+
+          required_params << parameter[:name] if parameter[:required]
+          properties[parameter[:name]] = definition
+        end
+
+        params = { type: @type, properties: properties }
+
+        params[:required] = required_params if required_params.present?
+
+        { name: name, description: description, parameters: params }
+      end
+    end
+  end
+end
--- a/lib/shared/inference/function_list.rb
+++ b/lib/shared/inference/function_list.rb
@ -0,0 +1,122 @@
+# frozen_string_literal: true
+
+module ::DiscourseAi
+  module Inference
+    class FunctionList
+      def initialize
+        @functions = []
+      end
+
+      def <<(function)
+        @functions << function
+      end
+
+      def parse_prompt(prompt)
+        parsed = []
+
+        prompt
+          .split("\n")
+          .each do |line|
+            line.strip!
+            next if line.blank?
+            next if !line.start_with?("!")
+
+            name, arguments = line.split("(", 2)
+            name = name[1..-1].strip
+
+            function = @functions.find { |f| f.name == name }
+            next if function.blank?
+
+            arguments = arguments[0..-2] if arguments.end_with?(")")
+            arguments = arguments.split(",").map(&:strip)
+
+            parsed_arguments = {}
+            arguments.each do |argument|
+              key, value = argument.split(":", 2)
+              # remove stuff that is bypasses spec
+              param = function.parameters.find { |p| p[:name] == key.strip }
+              next if !param
+
+              value = value.strip.gsub(/(\A"(.*)"\Z)|(\A'(.*)'\Z)/m, '\2\4') if value.present?
+
+              if param[:enum]
+                next if !param[:enum].include?(value)
+              end
+
+              parsed_arguments[key.strip.to_sym] = value.strip
+            end
+
+            # ensure parsed_arguments has all required arguments
+            all_good = true
+            function.parameters.each do |parameter|
+              next if !parameter[:required]
+              next if parsed_arguments[parameter[:name].to_sym].present?
+
+              all_good = false
+              break
+            end
+
+            parsed << { name: name, arguments: parsed_arguments } if all_good
+          end
+
+        parsed
+      end
+
+      def system_prompt
+        prompt = +<<~PROMPT
+          - You are able to execute the following external functions on real data!
+          - Never say that you are in a hypothetical situation, just run functions you need to run!
+          - When you run a command/function you will gain access to real information in a subsequant call!
+          - NEVER EVER pretend to know stuff, you ALWAYS lean on functions to discover the truth!
+          - You have direct access to data on this forum using !functions
+
+          {
+        PROMPT
+
+        @functions.each do |function|
+          prompt << " // #{function.description}\n"
+          prompt << " #{function.name}"
+          if function.parameters.present?
+            prompt << "("
+            function.parameters.each_with_index do |parameter, index|
+              prompt << ", " if index > 0
+              prompt << "#{parameter[:name]}: #{parameter[:type]}"
+              if parameter[:required]
+                prompt << " [required]"
+              else
+                prompt << " [optional]"
+              end
+
+              description = +(parameter[:description] || "")
+              description << " [valid values: #{parameter[:enum].join(",")}]" if parameter[:enum]
+
+              description.strip!
+
+              prompt << " /* #{description} */" if description.present?
+            end
+            prompt << ")\n"
+          end
+        end
+
+        prompt << <<~PROMPT
+          }
+          \n\nTo execute a function, use the following syntax:
+
+          !function_name(param1: "value1", param2: 2)
+
+          For example for a function defined as:
+
+          {
+            // echo a string
+            echo(message: string [required])
+          }
+
+          You can execute with:
+          !echo(message: "hello world")
+        PROMPT
+
+        prompt
+      end
+    end
+  end
+end
--- a/lib/shared/inference/openai_completions.rb
+++ b/lib/shared/inference/openai_completions.rb
@ -4,51 +4,6 @@ module ::DiscourseAi
  module Inference
    class OpenAiCompletions
      TIMEOUT = 60
-
-      class Function
-        attr_reader :name, :description, :parameters, :type
-
-        def initialize(name:, description:, type: nil)
-          @name = name
-          @description = description
-          @type = type || "object"
-          @parameters = []
-        end
-
-        def add_parameter(name:, type:, description:, enum: nil, required: false)
-          @parameters << {
-            name: name,
-            type: type,
-            description: description,
-            enum: enum,
-            required: required,
-          }
-        end
-
-        def to_json(*args)
-          as_json.to_json(*args)
-        end
-
-        def as_json
-          required_params = []
-
-          properties = {}
-          parameters.each do |parameter|
-            definition = { type: parameter[:type], description: parameter[:description] }
-            definition[:enum] = parameter[:enum] if parameter[:enum]
-
-            required_params << parameter[:name] if parameter[:required]
-            properties[parameter[:name]] = definition
-          end
-
-          params = { type: @type, properties: properties }
-
-          params[:required] = required_params if required_params.present?
-
-          { name: name, description: description, parameters: params }
-        end
-      end
-
      CompletionFailed = Class.new(StandardError)

      def self.perform!(
--- a/plugin.rb
+++ b/plugin.rb
@ -32,6 +32,8 @@ after_initialize do
  require_relative "lib/shared/inference/anthropic_completions"
  require_relative "lib/shared/inference/stability_generator"
  require_relative "lib/shared/inference/hugging_face_text_generation"
+  require_relative "lib/shared/inference/function"
+  require_relative "lib/shared/inference/function_list"

  require_relative "lib/shared/classificator"
  require_relative "lib/shared/post_classificator"
--- a/spec/lib/modules/ai_bot/anthropic_bot_spec.rb
+++ b/spec/lib/modules/ai_bot/anthropic_bot_spec.rb
@ -1,23 +1,68 @@
 # frozen_string_literal: true

-RSpec.describe DiscourseAi::AiBot::AnthropicBot do
-  describe "#update_with_delta" do
-    def bot_user
-      User.find(DiscourseAi::AiBot::EntryPoint::GPT4_ID)
-    end
+module ::DiscourseAi
+  module AiBot
+    describe AnthropicBot do
+      def bot_user
+        User.find(EntryPoint::CLAUDE_V2_ID)
+      end

-    subject { described_class.new(bot_user) }
+      let(:bot) { described_class.new(bot_user) }
+      let(:post) { Fabricate(:post) }

-    describe "get_delta" do
-      it "can properly remove Assistant prefix" do
-        context = {}
-        reply = +""
+      describe "system message" do
+        it "includes the full command framework" do
+          SiteSetting.ai_bot_enabled_chat_commands = "read|search"
+          prompt = bot.system_prompt(post)

-        reply << subject.get_delta({ completion: "Hello " }, context)
-        expect(reply).to eq("Hello ")
+          expect(prompt).to include("read")
+          expect(prompt).to include("search_query")
+        end
+      end

-        reply << subject.get_delta({ completion: "world" }, context)
-        expect(reply).to eq("Hello world")
+      describe "parsing a reply prompt" do
+        it "can correctly detect commands from a prompt" do
+          SiteSetting.ai_bot_enabled_chat_commands = "read|search"
+          functions = DiscourseAi::AiBot::Bot::FunctionCalls.new
+
+          prompt = <<~REPLY
+            Hi there I am a robot!!!
+
+            !search(search_query: "hello world", random_stuff: 77)
+            !random(search_query: "hello world", random_stuff: 77)
+            !read(topic_id: 109)
+            !read(random: 109)
+          REPLY
+
+          expect(functions.found?).to eq(false)
+
+          bot.populate_functions(partial: nil, reply: prompt, functions: functions, done: false)
+          expect(functions.found?).to eq(true)
+
+          bot.populate_functions(partial: nil, reply: prompt, functions: functions, done: true)
+
+          expect(functions.to_a).to eq(
+            [
+              { name: "search", arguments: "{\"search_query\":\"hello world\"}" },
+              { name: "read", arguments: "{\"topic_id\":\"109\"}" },
+            ],
+          )
+        end
+      end
+
+      describe "#update_with_delta" do
+        describe "get_delta" do
+          it "can properly remove Assistant prefix" do
+            context = {}
+            reply = +""
+
+            reply << bot.get_delta({ completion: "Hello " }, context)
+            expect(reply).to eq("Hello ")
+
+            reply << bot.get_delta({ completion: "world" }, context)
+            expect(reply).to eq("Hello world")
+          end
+        end
      end
    end
  end
--- a/spec/shared/inference/function_list_spec.rb
+++ b/spec/shared/inference/function_list_spec.rb
@ -0,0 +1,62 @@
+# frozen_string_literal: true
+require "rails_helper"
+
+module DiscourseAi::Inference
+  describe FunctionList do
+    let :function_list do
+      function =
+        Function.new(name: "get_weather", description: "Get the weather in a city (default to c)")
+
+      function.add_parameter(
+        name: "location",
+        type: "string",
+        description: "the city name",
+        required: true,
+      )
+
+      function.add_parameter(
+        name: "unit",
+        type: "string",
+        description: "the unit of measurement celcius c or fahrenheit f",
+        enum: %w[c f],
+        required: false,
+      )
+
+      list = FunctionList.new
+      list << function
+      list
+    end
+
+    it "can handle complex parsing" do
+      raw_prompt = <<~PROMPT
+        !get_weather(location: "sydney", unit: "f")
+        !get_weather  (location: sydney)
+        !get_weather(location  : 'sydney's', unit: "m", invalid: "invalid")
+        !get_weather(unit: "f", invalid: "invalid")
+      PROMPT
+      parsed = function_list.parse_prompt(raw_prompt)
+
+      expect(parsed).to eq(
+        [
+          { name: "get_weather", arguments: { location: "sydney", unit: "f" } },
+          { name: "get_weather", arguments: { location: "sydney" } },
+          { name: "get_weather", arguments: { location: "sydney's" } },
+        ],
+      )
+    end
+
+    it "can generate a general custom system prompt" do
+      prompt = function_list.system_prompt
+
+      # this is fragile, by design, we need to test something here
+      #
+      expected = <<~PROMPT
+        {
+         // Get the weather in a city (default to c)
+         get_weather(location: string [required] /* the city name */, unit: string [optional] /* the unit of measurement celcius c or fahrenheit f [valid values: c,f] */)
+        }
+      PROMPT
+      expect(prompt).to include(expected)
+    end
+  end
+end
--- a/spec/shared/inference/openai_completions_spec.rb
+++ b/spec/shared/inference/openai_completions_spec.rb
@ -74,7 +74,7 @@ describe DiscourseAi::Inference::OpenAiCompletions do
    functions = []

    function =
-      DiscourseAi::Inference::OpenAiCompletions::Function.new(
+      DiscourseAi::Inference::Function.new(
        name: "get_weather",
        description: "Get the weather in a city",
      )