FIX: implement tools framework for Anthropic (#307)

Previous to this changeset we used a custom system for tools/command
support for Anthropic.

We defined commands by using !command as a signal to execute it

Following Anthropic Claude 2.1, there is an official supported syntax (beta)
for tools execution.

eg:

```
+      <function_calls>
+      <invoke>
+      <tool_name>image</tool_name>
+      <parameters>
+      <prompts>
+      [
+      "an oil painting",
+      "a cute fluffy orange",
+      "3 apple's",
+      "a cat"
+      ]
+      </prompts>
+      </parameters>
+      </invoke>
+      </function_calls>
```

This implements the spec per Anthropic, it should be stable enough
to also work on other LLMs.

Keep in mind that OpenAI is not impacted here at all, as it has its
own custom system for function calls.

Additionally:

- Fixes the title system prompt so it works with latest Anthropic
- Uses new spec for "system" messages by Anthropic
- Tweak forum helper persona to guide Anthropic a tiny be better

Overall results are pretty awesome and Anthropic Claude performs
really well now on Discourse
This commit is contained in:
Sam 2023-11-24 06:39:56 +11:00 committed by GitHub
parent 419c43592a
commit 6282b6d21f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 379 additions and 216 deletions

View File

@ -38,7 +38,11 @@ module DiscourseAi
def build_message(poster_username, content, system: false, function: nil)
role = poster_username == bot_user.username ? "Assistant" : "Human"
"#{role}: #{content}"
if system || function
content
else
"#{role}: #{content}"
end
end
def model_for
@ -61,6 +65,7 @@ module DiscourseAi
temperature: 0.4,
max_tokens: 3000,
post: post,
stop_sequences: ["\n\nHuman:", "</function_calls>"],
&blk
)
end

View File

@ -4,11 +4,20 @@ module DiscourseAi
module AiBot
class Bot
class FunctionCalls
attr_accessor :maybe_buffer, :maybe_found, :custom
def initialize
@functions = []
@current_function = nil
@found = false
@cancel_completion = false
@maybe_buffer = +""
@maybe_found = false
@custom = false
end
def custom?
@custom
end
def found?
@ -19,6 +28,10 @@ module DiscourseAi
@found = true
end
def maybe_found?
@maybe_found
end
def cancel_completion?
@cancel_completion
end
@ -47,24 +60,6 @@ module DiscourseAi
def to_a
@functions
end
def truncate(partial_reply)
lines = []
found_command = false
partial_reply
.split("\n")
.each do |line|
if line.match?(/^!/)
found_command = true
lines << line
elsif found_command && line.match(/^\s*[^!]+/)
break
else
lines << line
end
end
lines.join("\n")
end
end
attr_reader :bot_user, :persona
@ -150,13 +145,19 @@ module DiscourseAi
partial: partial,
reply: partial_reply,
functions: functions,
current_delta: current_delta,
done: false,
)
cancel&.call if functions.cancel_completion?
end
reply << current_delta if !functions.found?
if functions.maybe_buffer.present? && !functions.maybe_found?
reply << functions.maybe_buffer
functions.maybe_buffer = +""
end
reply << current_delta if !functions.found? && !functions.maybe_found?
if redis_stream_key && !Discourse.redis.get(redis_stream_key)
cancel&.call
@ -189,6 +190,21 @@ module DiscourseAi
end
end
if !available_functions.empty?
populate_functions(
partial: nil,
reply: partial_reply,
current_delta: "",
functions: functions,
done: true,
)
end
if functions.maybe_buffer.present?
reply << functions.maybe_buffer
functions.maybe_buffer = +""
end
if bot_reply_post
publish_update(bot_reply_post, done: true)
@ -204,9 +220,11 @@ module DiscourseAi
truncated_reply = partial_reply
if functions.found? && functions.cancel_completion?
# we need to truncate the partial_reply
truncated_reply = functions.truncate(partial_reply)
# TODO: we may want to move this code
if functions.length > 0 && partial_reply.include?("</invoke>")
# recover stop word potentially
truncated_reply =
partial_reply.split("</invoke>").first + "</invoke>\n</function_calls>"
end
prompt << [truncated_reply, bot_user.username] if truncated_reply.present?
@ -214,10 +232,6 @@ module DiscourseAi
post.post_custom_prompt.update!(custom_prompt: prompt)
end
if !available_functions.empty?
populate_functions(partial: nil, reply: partial_reply, functions: functions, done: true)
end
if functions.length > 0
chain = false
standalone = false
@ -227,7 +241,13 @@ module DiscourseAi
if command_klass = available_commands.detect { |cmd| cmd.invoked?(name) }
command =
command_klass.new(bot: self, args: args, post: bot_reply_post, parent_post: post)
command_klass.new(
bot: self,
args: args,
post: bot_reply_post,
parent_post: post,
xml_format: !functions.custom?,
)
chain_intermediate, bot_reply_post = command.invoke!
chain ||= chain_intermediate
standalone ||= command.standalone?
@ -292,13 +312,20 @@ module DiscourseAi
end
def title_prompt(post)
[build_message(bot_user.username, <<~TEXT)]
prompt = <<~TEXT
You are titlebot. Given a topic you will figure out a title.
You will never respond with anything but a topic title.
You will never respond with anything but a 7 word topic title.
TEXT
messages = [build_message(bot_user.username, prompt, system: true)]
messages << build_message("User", <<~TEXT)
Suggest a 7 word title for the following topic without quoting any of it:
<content>
#{post.topic.posts.map(&:raw).join("\n\n")[0..prompt_limit(allow_commands: false)]}
</content>
TEXT
messages
end
def available_commands
@ -351,23 +378,34 @@ module DiscourseAi
raise NotImplemented
end
def populate_functions(partial:, reply:, functions:, done:)
def populate_functions(partial:, reply:, functions:, done:, current_delta:)
if !done
functions.found! if reply.match?(/^!/i)
search_length = "<function_calls>".length
index = -1
while index > -search_length
substr = reply[index..-1] || reply
index -= 1
functions.maybe_found = "<function_calls>".start_with?(substr)
break if functions.maybe_found?
end
functions.maybe_buffer << current_delta if functions.maybe_found?
functions.found! if reply.match?(/^<function_calls>/i)
if functions.found?
functions.cancel_completion! if reply.split("\n")[-1].match?(/^\s*[^!]+/)
functions.maybe_buffer = functions.maybe_buffer.to_s.split("<")[0..-2].join("<")
functions.cancel_completion! if reply.match?(%r{</function_calls>}i)
end
else
reply
.scan(/^!.*$/i)
.each do |line|
function_list
.parse_prompt(line)
.each do |function|
functions.add_function(function[:name])
functions.add_argument_fragment(function[:arguments].to_json)
end
end
functions_string = reply.scan(%r{(<function_calls>(.*?)</invoke>)}im)&.first&.first
if functions_string
function_list
.parse_prompt(functions_string + "</function_calls>")
.each do |function|
functions.add_function(function[:name])
functions.add_argument_fragment(function[:arguments].to_json)
end
end
end
end

View File

@ -42,12 +42,13 @@ module DiscourseAi
attr_reader :bot_user, :bot
def initialize(bot:, args:, post: nil, parent_post: nil)
def initialize(bot:, args:, post: nil, parent_post: nil, xml_format: false)
@bot = bot
@bot_user = bot&.bot_user
@args = args
@post = post
@parent_post = parent_post
@xml_format = xml_format
@placeholder = +(<<~HTML).strip
<details>
@ -145,7 +146,18 @@ module DiscourseAi
parsed_args = JSON.parse(@args).symbolize_keys
prompt << [process(**parsed_args).to_json, self.class.name, "function"]
function_results = process(**parsed_args).to_json
function_results = <<~XML if @xml_format
<function_results>
<result>
<tool_name>#{self.class.name}</tool_name>
<json>
#{function_results}
</json>
</result>
</function_results>
XML
prompt << [function_results, self.class.name, "function"]
@post.post_custom_prompt.update!(custom_prompt: prompt)
raw = +(<<~HTML)

View File

@ -96,12 +96,13 @@ module DiscourseAi
private
def populate_functions(partial:, reply:, functions:, done:)
def populate_functions(partial:, reply:, functions:, done:, current_delta:)
return if !partial
fn = partial.dig(:choices, 0, :delta, :function_call)
if fn
functions.add_function(fn[:name]) if fn[:name].present?
functions.add_argument_fragment(fn[:arguments]) if !fn[:arguments].nil?
functions.custom = true
end
end

View File

@ -19,7 +19,7 @@ module DiscourseAi
def system_prompt
<<~PROMPT
You are a helpful Discourse assistant.
You understand and generate Discourse Markdown.
You _understand_ and **generate** Discourse Markdown.
You live in a Discourse Forum Message.
You live in the forum with the URL: {site_url}

View File

@ -12,136 +12,102 @@ module ::DiscourseAi
end
def parse_prompt(prompt)
parsed = []
prompt
.split("\n")
.each do |line|
line.strip!
next if line.blank?
next if !line.start_with?("!")
name, arguments = line.split("(", 2)
name = name[1..-1].strip
function = @functions.find { |f| f.name == name }
next if function.blank?
parsed_arguments = {}
if arguments
arguments = arguments[0..-2] if arguments.end_with?(")")
temp_string = +""
in_string = nil
replace = SecureRandom.hex(10)
arguments.each_char do |char|
if %w[" '].include?(char) && !in_string
in_string = char
elsif char == in_string
in_string = nil
elsif char == "," && in_string
char = replace
xml = prompt.sub(%r{<function_calls>(.*)</function_calls>}m, '\1')
if xml.present?
parsed = []
Nokogiri
.XML(xml)
.xpath("//invoke")
.each do |invoke_node|
function = { name: invoke_node.xpath("//tool_name").text, arguments: {} }
parsed << function
invoke_node
.xpath("//parameters")
.children
.each do |parameters_node|
if parameters_node.is_a?(Nokogiri::XML::Element) && name = parameters_node.name
function[:arguments][name.to_sym] = parameters_node.text
end
end
temp_string << char
end
end
coerce_arguments!(parsed)
end
end
arguments = temp_string.split(",").map { |s| s.gsub(replace, ",").strip }
def coerce_arguments!(parsed)
parsed.each do |function_call|
arguments = function_call[:arguments]
arguments.each do |argument|
key, value = argument.split(":", 2)
# remove stuff that is bypasses spec
param = function.parameters.find { |p| p[:name] == key.strip }
next if !param
function = @functions.find { |f| f.name == function_call[:name] }
next if !function
value = value.strip.gsub(/(\A"(.*)"\Z)|(\A'(.*)'\Z)/m, '\2\4') if value.present?
if param[:enum]
next if !param[:enum].include?(value)
end
parsed_arguments[key.strip.to_sym] = value.strip
end
arguments.each do |name, value|
parameter = function.parameters.find { |p| p[:name].to_s == name.to_s }
if !parameter
arguments.delete(name)
next
end
# ensure parsed_arguments has all required arguments
all_good = true
function.parameters.each do |parameter|
next if !parameter[:required]
next if parsed_arguments[parameter[:name].to_sym].present?
all_good = false
break
type = parameter[:type]
if type == "array"
arguments[name] = JSON.parse(value)
elsif type == "integer"
arguments[name] = value.to_i
elsif type == "float"
arguments[name] = value.to_f
end
parsed << { name: name, arguments: parsed_arguments } if all_good
end
end
parsed
end
def system_prompt
prompt = +<<~PROMPT
- You are able to execute the following external functions on real data!
- Never say that you are in a hypothetical situation, just run functions you need to run!
- When you run a command/function you will gain access to real information in a subsequant call!
- NEVER EVER pretend to know stuff, you ALWAYS lean on functions to discover the truth!
- You have direct access to data on this forum using !functions
- You are not a liar, liars are bad bots, you are a good bot!
- You always prefer to say "I don't know" as opposed to inventing a lie!
{
PROMPT
tools = +""
@functions.each do |function|
prompt << "// #{function.description}\n"
prompt << "!#{function.name}"
parameters = +""
if function.parameters.present?
prompt << "("
function.parameters.each_with_index do |parameter, index|
prompt << ", " if index > 0
prompt << "#{parameter[:name]}: #{parameter[:type]}"
if parameter[:required]
prompt << " [required]"
else
prompt << " [optional]"
end
description = +(parameter[:description] || "")
description << " [valid values: #{parameter[:enum].join(",")}]" if parameter[:enum]
description.strip!
prompt << " /* #{description} */" if description.present?
parameters << "\n"
function.parameters.each do |parameter|
parameters << <<~PARAMETER
<parameter>
<name>#{parameter[:name]}</name>
<type>#{parameter[:type]}</type>
<description>#{parameter[:description]}</description>
<required>#{parameter[:required]}</required>
PARAMETER
parameters << "<options>#{parameter[:enum].join(",")}</options>\n" if parameter[:enum]
parameters << "</parameter>\n"
end
prompt << ")"
end
prompt << "\n"
tools << <<~TOOLS
<tool_description>
<tool_name>#{function.name}</tool_name>
<description>#{function.description}</description>
<parameters>#{parameters}</parameters>
</tool_description>
TOOLS
end
prompt << <<~PROMPT
}
\n\nTo execute a function, use the following syntax:
<<~PROMPT
In this environment you have access to a set of tools you can use to answer the user's question.
You may call them like this. Only invoke one function at a time and wait for the results before invoking another function:
<function_calls>
<invoke>
<tool_name>$TOOL_NAME</tool_name>
<parameters>
<$PARAMETER_NAME>$PARAMETER_VALUE</$PARAMETER_NAME>
...
</parameters>
</invoke>
</function_calls>
!function_name(param1: "value1", param2: 2)
For example for a function defined as:
{
// echo a string
!echo(message: string [required])
}
Human: please echo out "hello"
Assistant: !echo(message: "hello")
Human: please say "hello"
Assistant: !echo(message: "hello")
Here are the tools available:
<tools>
#{tools}</tools>
PROMPT
prompt
end
end
end

View File

@ -13,7 +13,7 @@ module ::DiscourseAi
end
let(:bot) { described_class.new(bot_user) }
let(:post) { Fabricate(:post) }
fab!(:post)
describe "system message" do
it "includes the full command framework" do
@ -24,60 +24,133 @@ module ::DiscourseAi
end
end
it "does not include half parsed function calls in reply" do
completion1 = "<function"
completion2 = <<~REPLY
_calls>
<invoke>
<tool_name>search</tool_name>
<parameters>
<search_query>hello world</search_query>
</parameters>
</invoke>
</function_calls>
junk
REPLY
completion1 = { completion: completion1 }.to_json
completion2 = { completion: completion2 }.to_json
completion3 = { completion: "<func" }.to_json
request_number = 0
last_body = nil
stub_request(:post, "https://api.anthropic.com/v1/complete").with(
body:
lambda do |body|
last_body = body
request_number == 2
end,
).to_return(status: 200, body: lambda { |request| +"data: #{completion3}" })
stub_request(:post, "https://api.anthropic.com/v1/complete").with(
body:
lambda do |body|
request_number += 1
request_number == 1
end,
).to_return(
status: 200,
body: lambda { |request| +"data: #{completion1}\ndata: #{completion2}" },
)
bot.reply_to(post)
post.topic.reload
raw = post.topic.ordered_posts.last.raw
prompt = JSON.parse(last_body)["prompt"]
# function call is bundled into Assitant prompt
expect(prompt.split("Human:").length).to eq(2)
# this should be stripped
expect(prompt).not_to include("junk")
expect(raw).to end_with("<func")
# leading <function_call> should be stripped
expect(raw).to start_with("\n\n<details")
end
it "does not include Assistant: in front of the system prompt" do
prompt = nil
stub_request(:post, "https://api.anthropic.com/v1/complete").with(
body:
lambda do |body|
json = JSON.parse(body)
prompt = json["prompt"]
true
end,
).to_return(
status: 200,
body: lambda { |request| +"data: " << { completion: "Hello World" }.to_json },
)
bot.reply_to(post)
expect(prompt).not_to be_nil
expect(prompt).not_to start_with("Assistant:")
end
describe "parsing a reply prompt" do
it "can correctly predict that a completion needs to be cancelled" do
functions = DiscourseAi::AiBot::Bot::FunctionCalls.new
# note anthropic API has a silly leading space, we need to make sure we can handle that
prompt = +<<~REPLY.strip
hello world
!search(search_query: "hello world", random_stuff: 77)
!search(search_query: "hello world 2", random_stuff: 77
<function_calls>
<invoke>
<tool_name>search</tool_name>
<parameters>
<search_query>hello world</search_query>
<random_stuff>77</random_stuff>
</parameters>
</invoke>
</function_calls
REPLY
bot.populate_functions(partial: nil, reply: prompt, functions: functions, done: false)
bot.populate_functions(
partial: nil,
reply: prompt,
functions: functions,
done: false,
current_delta: "",
)
expect(functions.found?).to eq(true)
expect(functions.cancel_completion?).to eq(false)
prompt << ")\n"
prompt << ">"
bot.populate_functions(partial: nil, reply: prompt, functions: functions, done: false)
bot.populate_functions(
partial: nil,
reply: prompt,
functions: functions,
done: true,
current_delta: "",
)
expect(functions.found?).to eq(true)
expect(functions.cancel_completion?).to eq(false)
prompt << "a test test"
bot.populate_functions(partial: nil, reply: prompt, functions: functions, done: false)
expect(functions.cancel_completion?).to eq(true)
end
it "can correctly detect commands from a prompt" do
functions = DiscourseAi::AiBot::Bot::FunctionCalls.new
# note anthropic API has a silly leading space, we need to make sure we can handle that
prompt = <<~REPLY
hello world
!search(search_query: "hello world", random_stuff: 77)
!random(search_query: "hello world", random_stuff: 77)
!read(topic_id: 109)
!read(random: 109)
REPLY
expect(functions.found?).to eq(false)
bot.populate_functions(partial: nil, reply: prompt, functions: functions, done: false)
expect(functions.found?).to eq(true)
bot.populate_functions(partial: nil, reply: prompt, functions: functions, done: true)
expect(functions.to_a.length).to eq(1)
expect(functions.to_a).to eq(
[
{ name: "search", arguments: "{\"search_query\":\"hello world\"}" },
{ name: "read", arguments: "{\"topic_id\":\"109\"}" },
],
[{ name: "search", arguments: "{\"search_query\":\"hello world\"}" }],
)
end
end

View File

@ -48,7 +48,15 @@ describe FakeBot do
it "can handle command truncation for long messages" do
bot = FakeBot.new(bot_user)
bot.add_response(["hello this is a big test I am testing 123\n", "!tags\nabc"])
tags_command = <<~TEXT
<function_calls>
<invoke>
<tool_name>tags</tool_name>
</invoke>
</function_calls>
TEXT
bot.add_response(["hello this is a big test I am testing 123\n", "#{tags_command}\nabc"])
bot.add_response(["this is the reply"])
bot.reply_to(post)
@ -59,14 +67,22 @@ describe FakeBot do
expect(reply.post_custom_prompt.custom_prompt.to_s).not_to include("abc")
expect(reply.post_custom_prompt.custom_prompt.length).to eq(3)
expect(reply.post_custom_prompt.custom_prompt[0][0]).to eq(
"hello this is a big test I am testing 123\n!tags",
"hello this is a big test I am testing 123\n#{tags_command.strip}",
)
end
it "can handle command truncation for short bot messages" do
bot = FakeBot.new(bot_user)
bot.add_response(["hello\n", "!tags\nabc"])
tags_command = <<~TEXT
_calls>
<invoke>
<tool_name>tags</tool_name>
</invoke>
</function_calls>
TEXT
bot.add_response(["hello\n<function", "#{tags_command}\nabc"])
bot.add_response(["this is the reply"])
bot.reply_to(post)
@ -76,7 +92,12 @@ describe FakeBot do
expect(reply.raw).not_to include("abc")
expect(reply.post_custom_prompt.custom_prompt.to_s).not_to include("abc")
expect(reply.post_custom_prompt.custom_prompt.length).to eq(3)
expect(reply.post_custom_prompt.custom_prompt[0][0]).to eq("hello\n!tags")
expect(reply.post_custom_prompt.custom_prompt[0][0]).to eq(
"hello\n<function#{tags_command.strip}",
)
# we don't want function leftovers
expect(reply.raw).to start_with("hello\n\n<details>")
end
end

View File

@ -93,6 +93,7 @@ RSpec.describe Jobs::CreateAiReply do
max_tokens_to_sample: 3000,
temperature: 0.4,
stream: true,
stop_sequences: ["\n\nHuman:", "</function_calls>"],
},
)
end

View File

@ -62,16 +62,16 @@ module DiscourseAi::AiBot::Personas
expect(rendered).to include("test site description")
expect(rendered).to include("joe, jane")
expect(rendered).to include(Time.zone.now.to_s)
expect(rendered).to include("!search")
expect(rendered).to include("!tags")
expect(rendered).to include("<tool_name>search</tool_name>")
expect(rendered).to include("<tool_name>tags</tool_name>")
# needs to be configured so it is not available
expect(rendered).not_to include("!image")
expect(rendered).not_to include("<tool_name>image</tool_name>")
rendered =
persona.render_system_prompt(topic: topic_with_users, render_function_instructions: false)
expect(rendered).not_to include("!search")
expect(rendered).not_to include("!tags")
expect(rendered).not_to include("<tool_name>search</tool_name>")
expect(rendered).not_to include("<tool_name>tags</tool_name>")
end
describe "custom personas" do

View File

@ -27,20 +27,49 @@ module DiscourseAi::Inference
list
end
it "can handle complex parsing" do
raw_prompt = <<~PROMPT
!get_weather(location: "sydney,melbourne", unit: "f")
!get_weather (location: sydney)
!get_weather(location : "sydney's", unit: "m", invalid: "invalid")
!get_weather(unit: "f", invalid: "invalid")
PROMPT
parsed = function_list.parse_prompt(raw_prompt)
let :image_function_list do
function = Function.new(name: "image", description: "generates an image")
function.add_parameter(
name: "prompts",
type: "array",
item_type: "string",
required: true,
description: "the prompts",
)
list = FunctionList.new
list << function
list
end
it "can handle function call parsing" do
raw_prompt = <<~PROMPT
<function_calls>
<invoke>
<tool_name>image</tool_name>
<parameters>
<prompts>
[
"an oil painting",
"a cute fluffy orange",
"3 apple's",
"a cat"
]
</prompts>
</parameters>
</invoke>
</function_calls>
PROMPT
parsed = image_function_list.parse_prompt(raw_prompt)
expect(parsed).to eq(
[
{ name: "get_weather", arguments: { location: "sydney,melbourne", unit: "f" } },
{ name: "get_weather", arguments: { location: "sydney" } },
{ name: "get_weather", arguments: { location: "sydney's" } },
{
name: "image",
arguments: {
prompts: ["an oil painting", "a cute fluffy orange", "3 apple's", "a cat"],
},
},
],
)
end
@ -51,10 +80,27 @@ module DiscourseAi::Inference
# this is fragile, by design, we need to test something here
#
expected = <<~PROMPT
{
// Get the weather in a city (default to c)
!get_weather(location: string [required] /* the city name */, unit: string [optional] /* the unit of measurement celcius c or fahrenheit f [valid values: c,f] */)
}
<tools>
<tool_description>
<tool_name>get_weather</tool_name>
<description>Get the weather in a city (default to c)</description>
<parameters>
<parameter>
<name>location</name>
<type>string</type>
<description>the city name</description>
<required>true</required>
</parameter>
<parameter>
<name>unit</name>
<type>string</type>
<description>the unit of measurement celcius c or fahrenheit f</description>
<required>false</required>
<options>c,f</options>
</parameter>
</parameters>
</tool_description>
</tools>
PROMPT
expect(prompt).to include(expected)
end