FIX: improve bot behavior (#408)

* FIX: improve bot behavior - Provide more information to Gemini context post function execution - Use system prompts for Claude (fixes Dall E) - Ensure Assistant is properly separated - Teach Claude to return arrays in JSON vs XML Also refactors tests so we do not copy tool preamble everywhere * System msg is claude-2 only. fix typo --------- Co-authored-by: Roman Rizzi <rizziromanalejandro@gmail.com>
2024-01-09 00:28:03 +11:00 · 2024-01-09 00:28:03 +11:00 · b0a0cbe3ca
parent 6124f910c1
commit b0a0cbe3ca
8 changed files with 98 additions and 80 deletions
--- a/lib/completions/dialects/claude.rb
+++ b/lib/completions/dialects/claude.rb
@ -15,7 +15,8 @@ module DiscourseAi
        end

        def translate
-          claude_prompt = +"Human: #{prompt[:insts]}\n"
+          claude_prompt = uses_system_message? ? +"" : +"Human: "
+          claude_prompt << prompt[:insts] << "\n"

          claude_prompt << build_tools_prompt if prompt[:tools]

@ -27,6 +28,7 @@ module DiscourseAi

          claude_prompt << "#{prompt[:post_insts]}\n" if prompt[:post_insts]

+          claude_prompt << "\n\n"
          claude_prompt << "Assistant:"
          claude_prompt << " #{prompt[:final_insts]}:" if prompt[:final_insts]
          claude_prompt << "\n"
@ -70,6 +72,10 @@ module DiscourseAi

        private

+        def uses_system_message?
+          model_name == "claude-2"
+        end
+
        def build_examples(examples_arr)
          examples_arr.reduce("") do |memo, example|
            memo += "<example>\nH: #{example[0]}\nA: #{example[1]}\n</example>\n"
--- a/lib/completions/dialects/dialect.rb
+++ b/lib/completions/dialects/dialect.rb
@ -142,9 +142,7 @@ module DiscourseAi
          self.class.tokenizer.size(context[:content].to_s)
        end

-        def build_tools_prompt
-          return "" if prompt[:tools].blank?
-
+        def self.tool_preamble
          <<~TEXT
            In this environment you have access to a set of tools you can use to answer the user's question.
            You may call them like this. Only invoke one function at a time and wait for the results before invoking another function:
@ -158,8 +156,18 @@ module DiscourseAi
            </invoke>
            </function_calls>

-            Here are the tools available:
+            if a parameter type is an array, return a JSON array of values. For example:
+            [1,"two",3.0]

+            Here are the tools available:
+          TEXT
+        end
+
+        def build_tools_prompt
+          return "" if prompt[:tools].blank?
+
+          <<~TEXT
+            #{self.class.tool_preamble}
            <tools>
            #{tools}</tools>
          TEXT
--- a/lib/completions/dialects/gemini.rb
+++ b/lib/completions/dialects/gemini.rb
@ -128,19 +128,20 @@ module DiscourseAi
        private

        def flatten_context(context)
-          context.map do |a_context|
-            if a_context[:type] == "multi_turn"
-              # Some multi-turn, like the ones that generate images, doesn't chain a next
-              # response. We don't have an assistant call for those, so we use the tool_call instead.
-              # We cannot use tool since it confuses the model, making it stop calling tools in next responses,
-              # and replying with a JSON.
+          flattened = []
+          context.each do |c|
+            if c[:type] == "multi_turn"
+              # gemini quirk
+              if c[:content].first[:type] == "tool"
+                flattend << { type: "assistant", content: "ok." }
+              end

-              a_context[:content].find { |c| c[:type] == "assistant" } ||
-                a_context[:content].find { |c| c[:type] == "tool_call" }
+              flattened.concat(c[:content])
            else
-              a_context
+              flattened << c
            end
          end
+          flattened
        end
      end
    end
--- a/spec/lib/completions/dialects/claude_spec.rb
+++ b/spec/lib/completions/dialects/claude_spec.rb
@ -48,9 +48,11 @@ RSpec.describe DiscourseAi::Completions::Dialects::Claude do
  describe "#translate" do
    it "translates a prompt written in our generic format to Claude's format" do
      anthropic_version = <<~TEXT
-      Human: #{prompt[:insts]}
+      #{prompt[:insts]}
      #{prompt[:input]}
      #{prompt[:post_insts]}
+
+
      Assistant:
      TEXT

@ -67,13 +69,15 @@ RSpec.describe DiscourseAi::Completions::Dialects::Claude do
        ],
      ]
      anthropic_version = <<~TEXT
-      Human: #{prompt[:insts]}
+      #{prompt[:insts]}
      <example>
      H: #{prompt[:examples][0][0]}
      A: #{prompt[:examples][0][1]}
      </example>
      #{prompt[:input]}
      #{prompt[:post_insts]}
+
+
      Assistant:
      TEXT

@ -86,25 +90,14 @@ RSpec.describe DiscourseAi::Completions::Dialects::Claude do
      prompt[:tools] = [tool]

      anthropic_version = <<~TEXT
-      Human: #{prompt[:insts]}
-      In this environment you have access to a set of tools you can use to answer the user's question.
-      You may call them like this. Only invoke one function at a time and wait for the results before invoking another function:
-      <function_calls>
-      <invoke>
-      <tool_name>$TOOL_NAME</tool_name>
-      <parameters>
-      <$PARAMETER_NAME>$PARAMETER_VALUE</$PARAMETER_NAME>
-      ...
-      </parameters>
-      </invoke>
-      </function_calls>
-
-      Here are the tools available:
-      
+      #{prompt[:insts]}
+      #{DiscourseAi::Completions::Dialects::Claude.tool_preamble}
      <tools>
      #{dialect.tools}</tools>
      #{prompt[:input]}
      #{prompt[:post_insts]}
+
+
      Assistant:
      TEXT

--- a/spec/lib/completions/dialects/gemini_spec.rb
+++ b/spec/lib/completions/dialects/gemini_spec.rb
@ -150,14 +150,38 @@ RSpec.describe DiscourseAi::Completions::Dialects::Gemini do

          translated_context = dialect.conversation_context

-          expect(translated_context.size).to eq(1)
-          expect(translated_context.last[:role]).to eq("model")
-          expect(translated_context.last.dig(:parts, :functionCall)).to be_present
+          expected = [
+            {
+              role: "function",
+              parts: {
+                functionResponse: {
+                  name: "get_weather",
+                  response: {
+                    content: "I'm a tool result",
+                  },
+                },
+              },
+            },
+            {
+              role: "model",
+              parts: {
+                functionCall: {
+                  name: "get_weather",
+                  args: {
+                    location: "Sydney",
+                    unit: "c",
+                  },
+                },
+              },
+            },
+          ]
+
+          expect(translated_context).to eq(expected)
        end
      end

      context "when the multi-turn is from a chainable tool" do
-        it "uses the assistand context" do
+        it "uses the assistant context" do
          prompt[:conversation_context] = [
            {
              type: "multi_turn",
@ -181,9 +205,34 @@ RSpec.describe DiscourseAi::Completions::Dialects::Gemini do

          translated_context = dialect.conversation_context

-          expect(translated_context.size).to eq(1)
-          expect(translated_context.last[:role]).to eq("model")
-          expect(translated_context.last.dig(:parts, :text)).to be_present
+          expected = [
+            { role: "model", parts: { text: "I'm a bot reply!" } },
+            {
+              role: "function",
+              parts: {
+                functionResponse: {
+                  name: "get_weather",
+                  response: {
+                    content: "I'm a tool result",
+                  },
+                },
+              },
+            },
+            {
+              role: "model",
+              parts: {
+                functionCall: {
+                  name: "get_weather",
+                  args: {
+                    location: "Sydney",
+                    unit: "c",
+                  },
+                },
+              },
+            },
+          ]
+
+          expect(translated_context).to eq(expected)
        end
      end
    end
--- a/spec/lib/completions/dialects/llama2_classic_spec.rb
+++ b/spec/lib/completions/dialects/llama2_classic_spec.rb
@ -94,20 +94,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::Llama2Classic do
      [INST]
      <<SYS>>
      #{prompt[:insts]}
-      In this environment you have access to a set of tools you can use to answer the user's question.
-      You may call them like this. Only invoke one function at a time and wait for the results before invoking another function:
-      <function_calls>
-      <invoke>
-      <tool_name>$TOOL_NAME</tool_name>
-      <parameters>
-      <$PARAMETER_NAME>$PARAMETER_VALUE</$PARAMETER_NAME>
-      ...
-      </parameters>
-      </invoke>
-      </function_calls>
-
-      Here are the tools available:
-      
+      #{DiscourseAi::Completions::Dialects::Llama2Classic.tool_preamble}
      <tools>
      #{dialect.tools}</tools>
      #{prompt[:post_insts]}
--- a/spec/lib/completions/dialects/mixtral_spec.rb
+++ b/spec/lib/completions/dialects/mixtral_spec.rb
@ -89,20 +89,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::Mixtral do
      orca_style_version = <<~TEXT
      <s> [INST]
      #{prompt[:insts]}
-      In this environment you have access to a set of tools you can use to answer the user's question.
-      You may call them like this. Only invoke one function at a time and wait for the results before invoking another function:
-      <function_calls>
-      <invoke>
-      <tool_name>$TOOL_NAME</tool_name>
-      <parameters>
-      <$PARAMETER_NAME>$PARAMETER_VALUE</$PARAMETER_NAME>
-      ...
-      </parameters>
-      </invoke>
-      </function_calls>
-
-      Here are the tools available:
-
+      #{DiscourseAi::Completions::Dialects::Mixtral.tool_preamble}
      <tools>
      #{dialect.tools}</tools>
      #{prompt[:post_insts]}
--- a/spec/lib/completions/dialects/orca_style_spec.rb
+++ b/spec/lib/completions/dialects/orca_style_spec.rb
@ -93,20 +93,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::OrcaStyle do
      orca_style_version = <<~TEXT
      ### System:
      #{prompt[:insts]}
-      In this environment you have access to a set of tools you can use to answer the user's question.
-      You may call them like this. Only invoke one function at a time and wait for the results before invoking another function:
-      <function_calls>
-      <invoke>
-      <tool_name>$TOOL_NAME</tool_name>
-      <parameters>
-      <$PARAMETER_NAME>$PARAMETER_VALUE</$PARAMETER_NAME>
-      ...
-      </parameters>
-      </invoke>
-      </function_calls>
-
-      Here are the tools available:
-      
+      #{DiscourseAi::Completions::Dialects::OrcaStyle.tool_preamble}
      <tools>
      #{dialect.tools}</tools>
      #{prompt[:post_insts]}