FIX: encode parameters returned from LLMs correctly (#889)

Fixes encoding of params on LLM function calls. Previously we would improperly return results if a function parameter returned an HTML tag. Additionally adds some missing HTTP verbs to tool calls.
2024-11-04 10:07:17 +11:00 · 2024-11-04 10:07:17 +11:00 · c352054d4e
parent 7e3a543f6f
commit c352054d4e
10 changed files with 112 additions and 62 deletions
--- a/app/models/ai_tool.rb
+++ b/app/models/ai_tool.rb
@ -73,6 +73,8 @@ class AiTool < ActiveRecord::Base
      *    Returns:
      *      { status: number, body: string }
      *
+      *    (also available: http.put, http.patch, http.delete)
+      *
      *    Note: Max 20 HTTP requests per execution.
      *
      * 2. llm
--- a/lib/ai_bot/tool_runner.rb
+++ b/lib/ai_bot/tool_runner.rb
@ -44,10 +44,14 @@ module DiscourseAi
      end

      def framework_script
+        http_methods = %i[get post put patch delete].map { |method| <<~JS }.join("\n")
+          #{method}: function(url, options) {
+            return _http_#{method}(url, options);
+          },
+          JS
        <<~JS
        const http = {
-          get: function(url, options) { return _http_get(url, options) },
-          post: function(url, options) { return _http_post(url, options) },
+          #{http_methods}
        };

        const llm = {
@ -249,36 +253,44 @@ module DiscourseAi
          end,
        )

-        mini_racer_context.attach(
-          "_http_post",
-          ->(url, options) do
-            begin
-              @http_requests_made += 1
-              if @http_requests_made > MAX_HTTP_REQUESTS
-                raise TooManyRequestsError.new("Tool made too many HTTP requests")
+        %i[post put patch delete].each do |method|
+          mini_racer_context.attach(
+            "_http_#{method}",
+            ->(url, options) do
+              begin
+                @http_requests_made += 1
+                if @http_requests_made > MAX_HTTP_REQUESTS
+                  raise TooManyRequestsError.new("Tool made too many HTTP requests")
+                end
+
+                self.running_attached_function = true
+                headers = (options && options["headers"]) || {}
+                body = options && options["body"]
+
+                result = {}
+                DiscourseAi::AiBot::Tools::Tool.send_http_request(
+                  url,
+                  method: method,
+                  headers: headers,
+                  body: body,
+                ) do |response|
+                  result[:body] = response.body
+                  result[:status] = response.code.to_i
+                end
+
+                result
+              rescue => e
+                p url
+                p options
+                p e
+                puts e.backtrace
+                raise e
+              ensure
+                self.running_attached_function = false
              end
-
-              self.running_attached_function = true
-              headers = (options && options["headers"]) || {}
-              body = options && options["body"]
-
-              result = {}
-              DiscourseAi::AiBot::Tools::Tool.send_http_request(
-                url,
-                method: :post,
-                headers: headers,
-                body: body,
-              ) do |response|
-                result[:body] = response.body
-                result[:status] = response.code.to_i
-              end
-
-              result
-            ensure
-              self.running_attached_function = false
-            end
-          end,
-        )
+            end,
+          )
+        end
      end
    end
  end
--- a/lib/ai_bot/tools/tool.rb
+++ b/lib/ai_bot/tools/tool.rb
@ -188,6 +188,12 @@ module DiscourseAi
            request = FinalDestination::HTTP::Get.new(uri)
          elsif method == :post
            request = FinalDestination::HTTP::Post.new(uri)
+          elsif method == :put
+            request = FinalDestination::HTTP::Put.new(uri)
+          elsif method == :patch
+            request = FinalDestination::HTTP::Patch.new(uri)
+          elsif method == :delete
+            request = FinalDestination::HTTP::Delete.new(uri)
          end

          raise ArgumentError, "Invalid method: #{method}" if !request
--- a/lib/completions/anthropic_message_processor.rb
+++ b/lib/completions/anthropic_message_processor.rb
@ -39,7 +39,7 @@ class DiscourseAi::Completions::AnthropicMessageProcessor
        )

      params = JSON.parse(tool_call.raw_json, symbolize_names: true)
-      xml = params.map { |name, value| "<#{name}>#{value}</#{name}>" }.join("\n")
+      xml = params.map { |name, value| "<#{name}>#{CGI.escapeHTML(value)}</#{name}>" }.join("\n")

      node.at("tool_name").content = tool_call.name
      node.at("tool_id").content = tool_call.id
--- a/lib/completions/endpoints/gemini.rb
+++ b/lib/completions/endpoints/gemini.rb
@ -179,7 +179,7 @@ module DiscourseAi
          if partial[:args]
            argument_fragments =
              partial[:args].reduce(+"") do |memo, (arg_name, value)|
-                memo << "\n<#{arg_name}>#{value}</#{arg_name}>"
+                memo << "\n<#{arg_name}>#{CGI.escapeHTML(value)}</#{arg_name}>"
              end
            argument_fragments << "\n"

--- a/lib/completions/endpoints/open_ai.rb
+++ b/lib/completions/endpoints/open_ai.rb
@ -173,7 +173,7 @@ module DiscourseAi

              argument_fragments =
                json_args.reduce(+"") do |memo, (arg_name, value)|
-                  memo << "\n<#{arg_name}>#{value}</#{arg_name}>"
+                  memo << "\n<#{arg_name}>#{CGI.escapeHTML(value)}</#{arg_name}>"
                end
              argument_fragments << "\n"

--- a/spec/lib/completions/endpoints/anthropic_spec.rb
+++ b/spec/lib/completions/endpoints/anthropic_spec.rb
@ -74,7 +74,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
    data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"y\\": \\"s"}      }

    event: content_block_delta
-    data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"am"}          }
+    data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":"<a>m"}          }

    event: content_block_delta
    data: {"type":"content_block_delta","index":0,"delta":{"type":"input_json_delta","partial_json":" "}          }
@ -118,7 +118,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
      <function_calls>
      <invoke>
      <tool_name>search</tool_name>
-      <parameters><search_query>sam sam</search_query>
+      <parameters><search_query>s&lt;a&gt;m sam</search_query>
      <category>general</category></parameters>
      <tool_id>toolu_01DjrShFRRHp9SnHYRFRc53F</tool_id>
      </invoke>
--- a/spec/lib/completions/endpoints/gemini_spec.rb
+++ b/spec/lib/completions/endpoints/gemini_spec.rb
@ -182,6 +182,34 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
    expect(parsed[:tool_config]).to eq({ function_calling_config: { mode: "AUTO" } })
  end

+  it "properly encodes tool calls" do
+    prompt = DiscourseAi::Completions::Prompt.new("Hello", tools: [echo_tool])
+
+    llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
+    url = "#{model.url}:generateContent?key=123"
+
+    response_json = { "functionCall" => { name: "echo", args: { text: "<S>ydney" } } }
+    response = gemini_mock.response(response_json, tool_call: true).to_json
+
+    stub_request(:post, url).to_return(status: 200, body: response)
+
+    response = llm.generate(prompt, user: user)
+
+    expected = (<<~XML).strip
+      <function_calls>
+      <invoke>
+      <tool_name>echo</tool_name>
+      <parameters>
+      <text>&lt;S&gt;ydney</text>
+      </parameters>
+      <tool_id>tool_0</tool_id>
+      </invoke>
+      </function_calls>
+    XML
+
+    expect(response.strip).to eq(expected)
+  end
+
  it "Supports Vision API" do
    prompt =
      DiscourseAi::Completions::Prompt.new(
--- a/spec/lib/completions/endpoints/open_ai_spec.rb
+++ b/spec/lib/completions/endpoints/open_ai_spec.rb
@ -294,7 +294,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
                  type: "function",
                  function: {
                    name: "echo",
-                    arguments: "{\"text\":\"hello\"}",
+                    arguments: "{\"text\":\"h<e>llo\"}",
                  },
                },
              ],
@ -325,7 +325,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
        <invoke>
        <tool_name>echo</tool_name>
        <parameters>
-        <text>hello</text>
+        <text>h&lt;e&gt;llo</text>
        </parameters>
        <tool_id>call_I8LKnoijVuhKOM85nnEQgWwd</tool_id>
        </invoke>
--- a/spec/models/ai_tool_spec.rb
+++ b/spec/models/ai_tool_spec.rb
@ -38,35 +38,37 @@ RSpec.describe AiTool do
    expect(runner.invoke).to eq("query" => "test")
  end

-  it "can perform POST HTTP requests" do
-    script = <<~JS
-    function invoke(params) {
-      result = http.post("https://example.com/api",
-        {
-          headers: { TestHeader: "TestValue" },
-          body: JSON.stringify({ data: params.data })
-        }
-      );
+  it "can perform HTTP requests with various verbs" do
+    %i[post put delete patch].each do |verb|
+      script = <<~JS
+      function invoke(params) {
+        result = http.#{verb}("https://example.com/api",
+          {
+            headers: { TestHeader: "TestValue" },
+            body: JSON.stringify({ data: params.data })
+          }
+        );

-      return result.body;
-    }
-  JS
+        return result.body;
+      }
+    JS

-    tool = create_tool(script: script)
-    runner = tool.runner({ "data" => "test data" }, llm: nil, bot_user: nil, context: {})
+      tool = create_tool(script: script)
+      runner = tool.runner({ "data" => "test data" }, llm: nil, bot_user: nil, context: {})

-    stub_request(:post, "https://example.com/api").with(
-      body: "{\"data\":\"test data\"}",
-      headers: {
-        "Accept" => "*/*",
-        "Testheader" => "TestValue",
-        "User-Agent" => "Discourse AI Bot 1.0 (https://www.discourse.org)",
-      },
-    ).to_return(status: 200, body: "Success", headers: {})
+      stub_request(verb, "https://example.com/api").with(
+        body: "{\"data\":\"test data\"}",
+        headers: {
+          "Accept" => "*/*",
+          "Testheader" => "TestValue",
+          "User-Agent" => "Discourse AI Bot 1.0 (https://www.discourse.org)",
+        },
+      ).to_return(status: 200, body: "Success", headers: {})

-    result = runner.invoke
+      result = runner.invoke

-    expect(result).to eq("Success")
+      expect(result).to eq("Success")
+    end
  end

  it "can perform GET HTTP requests, with 1 param" do