diff --git a/lib/modules/ai_bot/anthropic_bot.rb b/lib/modules/ai_bot/anthropic_bot.rb index d1a5dc5a..1549144b 100644 --- a/lib/modules/ai_bot/anthropic_bot.rb +++ b/lib/modules/ai_bot/anthropic_bot.rb @@ -38,7 +38,11 @@ module DiscourseAi def build_message(poster_username, content, system: false, function: nil) role = poster_username == bot_user.username ? "Assistant" : "Human" - "#{role}: #{content}" + if system || function + content + else + "#{role}: #{content}" + end end def model_for @@ -61,6 +65,7 @@ module DiscourseAi temperature: 0.4, max_tokens: 3000, post: post, + stop_sequences: ["\n\nHuman:", ""], &blk ) end diff --git a/lib/modules/ai_bot/bot.rb b/lib/modules/ai_bot/bot.rb index 15bdfe09..c6804e00 100644 --- a/lib/modules/ai_bot/bot.rb +++ b/lib/modules/ai_bot/bot.rb @@ -4,11 +4,20 @@ module DiscourseAi module AiBot class Bot class FunctionCalls + attr_accessor :maybe_buffer, :maybe_found, :custom + def initialize @functions = [] @current_function = nil @found = false @cancel_completion = false + @maybe_buffer = +"" + @maybe_found = false + @custom = false + end + + def custom? + @custom end def found? @@ -19,6 +28,10 @@ module DiscourseAi @found = true end + def maybe_found? + @maybe_found + end + def cancel_completion? @cancel_completion end @@ -47,24 +60,6 @@ module DiscourseAi def to_a @functions end - - def truncate(partial_reply) - lines = [] - found_command = false - partial_reply - .split("\n") - .each do |line| - if line.match?(/^!/) - found_command = true - lines << line - elsif found_command && line.match(/^\s*[^!]+/) - break - else - lines << line - end - end - lines.join("\n") - end end attr_reader :bot_user, :persona @@ -150,13 +145,19 @@ module DiscourseAi partial: partial, reply: partial_reply, functions: functions, + current_delta: current_delta, done: false, ) cancel&.call if functions.cancel_completion? end - reply << current_delta if !functions.found? + if functions.maybe_buffer.present? && !functions.maybe_found? + reply << functions.maybe_buffer + functions.maybe_buffer = +"" + end + + reply << current_delta if !functions.found? && !functions.maybe_found? if redis_stream_key && !Discourse.redis.get(redis_stream_key) cancel&.call @@ -189,6 +190,21 @@ module DiscourseAi end end + if !available_functions.empty? + populate_functions( + partial: nil, + reply: partial_reply, + current_delta: "", + functions: functions, + done: true, + ) + end + + if functions.maybe_buffer.present? + reply << functions.maybe_buffer + functions.maybe_buffer = +"" + end + if bot_reply_post publish_update(bot_reply_post, done: true) @@ -204,9 +220,11 @@ module DiscourseAi truncated_reply = partial_reply - if functions.found? && functions.cancel_completion? - # we need to truncate the partial_reply - truncated_reply = functions.truncate(partial_reply) + # TODO: we may want to move this code + if functions.length > 0 && partial_reply.include?("") + # recover stop word potentially + truncated_reply = + partial_reply.split("").first + "\n" end prompt << [truncated_reply, bot_user.username] if truncated_reply.present? @@ -214,10 +232,6 @@ module DiscourseAi post.post_custom_prompt.update!(custom_prompt: prompt) end - if !available_functions.empty? - populate_functions(partial: nil, reply: partial_reply, functions: functions, done: true) - end - if functions.length > 0 chain = false standalone = false @@ -227,7 +241,13 @@ module DiscourseAi if command_klass = available_commands.detect { |cmd| cmd.invoked?(name) } command = - command_klass.new(bot: self, args: args, post: bot_reply_post, parent_post: post) + command_klass.new( + bot: self, + args: args, + post: bot_reply_post, + parent_post: post, + xml_format: !functions.custom?, + ) chain_intermediate, bot_reply_post = command.invoke! chain ||= chain_intermediate standalone ||= command.standalone? @@ -292,13 +312,20 @@ module DiscourseAi end def title_prompt(post) - [build_message(bot_user.username, <<~TEXT)] + prompt = <<~TEXT You are titlebot. Given a topic you will figure out a title. - You will never respond with anything but a topic title. + You will never respond with anything but a 7 word topic title. + TEXT + messages = [build_message(bot_user.username, prompt, system: true)] + + messages << build_message("User", <<~TEXT) Suggest a 7 word title for the following topic without quoting any of it: + #{post.topic.posts.map(&:raw).join("\n\n")[0..prompt_limit(allow_commands: false)]} + TEXT + messages end def available_commands @@ -351,23 +378,34 @@ module DiscourseAi raise NotImplemented end - def populate_functions(partial:, reply:, functions:, done:) + def populate_functions(partial:, reply:, functions:, done:, current_delta:) if !done - functions.found! if reply.match?(/^!/i) + search_length = "".length + index = -1 + while index > -search_length + substr = reply[index..-1] || reply + index -= 1 + + functions.maybe_found = "".start_with?(substr) + break if functions.maybe_found? + end + + functions.maybe_buffer << current_delta if functions.maybe_found? + functions.found! if reply.match?(/^/i) if functions.found? - functions.cancel_completion! if reply.split("\n")[-1].match?(/^\s*[^!]+/) + functions.maybe_buffer = functions.maybe_buffer.to_s.split("<")[0..-2].join("<") + functions.cancel_completion! if reply.match?(%r{}i) end else - reply - .scan(/^!.*$/i) - .each do |line| - function_list - .parse_prompt(line) - .each do |function| - functions.add_function(function[:name]) - functions.add_argument_fragment(function[:arguments].to_json) - end - end + functions_string = reply.scan(%r{((.*?))}im)&.first&.first + if functions_string + function_list + .parse_prompt(functions_string + "") + .each do |function| + functions.add_function(function[:name]) + functions.add_argument_fragment(function[:arguments].to_json) + end + end end end diff --git a/lib/modules/ai_bot/commands/command.rb b/lib/modules/ai_bot/commands/command.rb index 24e9d859..eae13910 100644 --- a/lib/modules/ai_bot/commands/command.rb +++ b/lib/modules/ai_bot/commands/command.rb @@ -42,12 +42,13 @@ module DiscourseAi attr_reader :bot_user, :bot - def initialize(bot:, args:, post: nil, parent_post: nil) + def initialize(bot:, args:, post: nil, parent_post: nil, xml_format: false) @bot = bot @bot_user = bot&.bot_user @args = args @post = post @parent_post = parent_post + @xml_format = xml_format @placeholder = +(<<~HTML).strip
@@ -145,7 +146,18 @@ module DiscourseAi parsed_args = JSON.parse(@args).symbolize_keys - prompt << [process(**parsed_args).to_json, self.class.name, "function"] + function_results = process(**parsed_args).to_json + function_results = <<~XML if @xml_format + + + #{self.class.name} + + #{function_results} + + + + XML + prompt << [function_results, self.class.name, "function"] @post.post_custom_prompt.update!(custom_prompt: prompt) raw = +(<<~HTML) diff --git a/lib/modules/ai_bot/open_ai_bot.rb b/lib/modules/ai_bot/open_ai_bot.rb index 4245631a..2a0d3a5f 100644 --- a/lib/modules/ai_bot/open_ai_bot.rb +++ b/lib/modules/ai_bot/open_ai_bot.rb @@ -96,12 +96,13 @@ module DiscourseAi private - def populate_functions(partial:, reply:, functions:, done:) + def populate_functions(partial:, reply:, functions:, done:, current_delta:) return if !partial fn = partial.dig(:choices, 0, :delta, :function_call) if fn functions.add_function(fn[:name]) if fn[:name].present? functions.add_argument_fragment(fn[:arguments]) if !fn[:arguments].nil? + functions.custom = true end end diff --git a/lib/modules/ai_bot/personas/general.rb b/lib/modules/ai_bot/personas/general.rb index b8f7363e..0d31e6aa 100644 --- a/lib/modules/ai_bot/personas/general.rb +++ b/lib/modules/ai_bot/personas/general.rb @@ -19,7 +19,7 @@ module DiscourseAi def system_prompt <<~PROMPT You are a helpful Discourse assistant. - You understand and generate Discourse Markdown. + You _understand_ and **generate** Discourse Markdown. You live in a Discourse Forum Message. You live in the forum with the URL: {site_url} diff --git a/lib/shared/inference/function_list.rb b/lib/shared/inference/function_list.rb index 75c5698f..71b77efa 100644 --- a/lib/shared/inference/function_list.rb +++ b/lib/shared/inference/function_list.rb @@ -12,136 +12,102 @@ module ::DiscourseAi end def parse_prompt(prompt) - parsed = [] - - prompt - .split("\n") - .each do |line| - line.strip! - next if line.blank? - next if !line.start_with?("!") - - name, arguments = line.split("(", 2) - name = name[1..-1].strip - - function = @functions.find { |f| f.name == name } - next if function.blank? - - parsed_arguments = {} - if arguments - arguments = arguments[0..-2] if arguments.end_with?(")") - - temp_string = +"" - in_string = nil - replace = SecureRandom.hex(10) - arguments.each_char do |char| - if %w[" '].include?(char) && !in_string - in_string = char - elsif char == in_string - in_string = nil - elsif char == "," && in_string - char = replace + xml = prompt.sub(%r{(.*)}m, '\1') + if xml.present? + parsed = [] + Nokogiri + .XML(xml) + .xpath("//invoke") + .each do |invoke_node| + function = { name: invoke_node.xpath("//tool_name").text, arguments: {} } + parsed << function + invoke_node + .xpath("//parameters") + .children + .each do |parameters_node| + if parameters_node.is_a?(Nokogiri::XML::Element) && name = parameters_node.name + function[:arguments][name.to_sym] = parameters_node.text + end end - temp_string << char - end + end + coerce_arguments!(parsed) + end + end - arguments = temp_string.split(",").map { |s| s.gsub(replace, ",").strip } + def coerce_arguments!(parsed) + parsed.each do |function_call| + arguments = function_call[:arguments] - arguments.each do |argument| - key, value = argument.split(":", 2) - # remove stuff that is bypasses spec - param = function.parameters.find { |p| p[:name] == key.strip } - next if !param + function = @functions.find { |f| f.name == function_call[:name] } + next if !function - value = value.strip.gsub(/(\A"(.*)"\Z)|(\A'(.*)'\Z)/m, '\2\4') if value.present? - - if param[:enum] - next if !param[:enum].include?(value) - end - - parsed_arguments[key.strip.to_sym] = value.strip - end + arguments.each do |name, value| + parameter = function.parameters.find { |p| p[:name].to_s == name.to_s } + if !parameter + arguments.delete(name) + next end - # ensure parsed_arguments has all required arguments - all_good = true - function.parameters.each do |parameter| - next if !parameter[:required] - next if parsed_arguments[parameter[:name].to_sym].present? - - all_good = false - break + type = parameter[:type] + if type == "array" + arguments[name] = JSON.parse(value) + elsif type == "integer" + arguments[name] = value.to_i + elsif type == "float" + arguments[name] = value.to_f end - - parsed << { name: name, arguments: parsed_arguments } if all_good end - + end parsed end def system_prompt - prompt = +<<~PROMPT - - You are able to execute the following external functions on real data! - - Never say that you are in a hypothetical situation, just run functions you need to run! - - When you run a command/function you will gain access to real information in a subsequant call! - - NEVER EVER pretend to know stuff, you ALWAYS lean on functions to discover the truth! - - You have direct access to data on this forum using !functions - - You are not a liar, liars are bad bots, you are a good bot! - - You always prefer to say "I don't know" as opposed to inventing a lie! - - { - PROMPT + tools = +"" @functions.each do |function| - prompt << "// #{function.description}\n" - prompt << "!#{function.name}" + parameters = +"" if function.parameters.present? - prompt << "(" - function.parameters.each_with_index do |parameter, index| - prompt << ", " if index > 0 - prompt << "#{parameter[:name]}: #{parameter[:type]}" - if parameter[:required] - prompt << " [required]" - else - prompt << " [optional]" - end - - description = +(parameter[:description] || "") - description << " [valid values: #{parameter[:enum].join(",")}]" if parameter[:enum] - - description.strip! - - prompt << " /* #{description} */" if description.present? + parameters << "\n" + function.parameters.each do |parameter| + parameters << <<~PARAMETER + + #{parameter[:name]} + #{parameter[:type]} + #{parameter[:description]} + #{parameter[:required]} + PARAMETER + parameters << "#{parameter[:enum].join(",")}\n" if parameter[:enum] + parameters << "\n" end - prompt << ")" end - prompt << "\n" + + tools << <<~TOOLS + + #{function.name} + #{function.description} + #{parameters} + + TOOLS end - prompt << <<~PROMPT - } - \n\nTo execute a function, use the following syntax: + <<~PROMPT + In this environment you have access to a set of tools you can use to answer the user's question. + You may call them like this. Only invoke one function at a time and wait for the results before invoking another function: + + + $TOOL_NAME + + <$PARAMETER_NAME>$PARAMETER_VALUE + ... + + + - !function_name(param1: "value1", param2: 2) - - For example for a function defined as: - - { - // echo a string - !echo(message: string [required]) - } - - Human: please echo out "hello" - - Assistant: !echo(message: "hello") - - Human: please say "hello" - - Assistant: !echo(message: "hello") + Here are the tools available: + + #{tools} PROMPT - - prompt end end end diff --git a/spec/lib/modules/ai_bot/anthropic_bot_spec.rb b/spec/lib/modules/ai_bot/anthropic_bot_spec.rb index 43486be7..1902f85c 100644 --- a/spec/lib/modules/ai_bot/anthropic_bot_spec.rb +++ b/spec/lib/modules/ai_bot/anthropic_bot_spec.rb @@ -13,7 +13,7 @@ module ::DiscourseAi end let(:bot) { described_class.new(bot_user) } - let(:post) { Fabricate(:post) } + fab!(:post) describe "system message" do it "includes the full command framework" do @@ -24,60 +24,133 @@ module ::DiscourseAi end end + it "does not include half parsed function calls in reply" do + completion1 = " + + search + + hello world + + + + junk + REPLY + + completion1 = { completion: completion1 }.to_json + completion2 = { completion: completion2 }.to_json + + completion3 = { completion: " should be stripped + expect(raw).to start_with("\n\n + + search + + hello world + 77 + + + " - bot.populate_functions(partial: nil, reply: prompt, functions: functions, done: false) + bot.populate_functions( + partial: nil, + reply: prompt, + functions: functions, + done: true, + current_delta: "", + ) expect(functions.found?).to eq(true) - expect(functions.cancel_completion?).to eq(false) - prompt << "a test test" - - bot.populate_functions(partial: nil, reply: prompt, functions: functions, done: false) - - expect(functions.cancel_completion?).to eq(true) - end - - it "can correctly detect commands from a prompt" do - functions = DiscourseAi::AiBot::Bot::FunctionCalls.new - - # note anthropic API has a silly leading space, we need to make sure we can handle that - prompt = <<~REPLY - hello world - !search(search_query: "hello world", random_stuff: 77) - !random(search_query: "hello world", random_stuff: 77) - !read(topic_id: 109) - !read(random: 109) - REPLY - - expect(functions.found?).to eq(false) - - bot.populate_functions(partial: nil, reply: prompt, functions: functions, done: false) - expect(functions.found?).to eq(true) - - bot.populate_functions(partial: nil, reply: prompt, functions: functions, done: true) + expect(functions.to_a.length).to eq(1) expect(functions.to_a).to eq( - [ - { name: "search", arguments: "{\"search_query\":\"hello world\"}" }, - { name: "read", arguments: "{\"topic_id\":\"109\"}" }, - ], + [{ name: "search", arguments: "{\"search_query\":\"hello world\"}" }], ) end end diff --git a/spec/lib/modules/ai_bot/bot_spec.rb b/spec/lib/modules/ai_bot/bot_spec.rb index 9d3eb6a8..216f0379 100644 --- a/spec/lib/modules/ai_bot/bot_spec.rb +++ b/spec/lib/modules/ai_bot/bot_spec.rb @@ -48,7 +48,15 @@ describe FakeBot do it "can handle command truncation for long messages" do bot = FakeBot.new(bot_user) - bot.add_response(["hello this is a big test I am testing 123\n", "!tags\nabc"]) + tags_command = <<~TEXT + + + tags + + + TEXT + + bot.add_response(["hello this is a big test I am testing 123\n", "#{tags_command}\nabc"]) bot.add_response(["this is the reply"]) bot.reply_to(post) @@ -59,14 +67,22 @@ describe FakeBot do expect(reply.post_custom_prompt.custom_prompt.to_s).not_to include("abc") expect(reply.post_custom_prompt.custom_prompt.length).to eq(3) expect(reply.post_custom_prompt.custom_prompt[0][0]).to eq( - "hello this is a big test I am testing 123\n!tags", + "hello this is a big test I am testing 123\n#{tags_command.strip}", ) end it "can handle command truncation for short bot messages" do bot = FakeBot.new(bot_user) - bot.add_response(["hello\n", "!tags\nabc"]) + tags_command = <<~TEXT + _calls> + + tags + + + TEXT + + bot.add_response(["hello\n") end end diff --git a/spec/lib/modules/ai_bot/jobs/regular/create_ai_reply_spec.rb b/spec/lib/modules/ai_bot/jobs/regular/create_ai_reply_spec.rb index 30e8b534..0f33fdf4 100644 --- a/spec/lib/modules/ai_bot/jobs/regular/create_ai_reply_spec.rb +++ b/spec/lib/modules/ai_bot/jobs/regular/create_ai_reply_spec.rb @@ -93,6 +93,7 @@ RSpec.describe Jobs::CreateAiReply do max_tokens_to_sample: 3000, temperature: 0.4, stream: true, + stop_sequences: ["\n\nHuman:", ""], }, ) end diff --git a/spec/lib/modules/ai_bot/personas/persona_spec.rb b/spec/lib/modules/ai_bot/personas/persona_spec.rb index eadde261..3817f128 100644 --- a/spec/lib/modules/ai_bot/personas/persona_spec.rb +++ b/spec/lib/modules/ai_bot/personas/persona_spec.rb @@ -62,16 +62,16 @@ module DiscourseAi::AiBot::Personas expect(rendered).to include("test site description") expect(rendered).to include("joe, jane") expect(rendered).to include(Time.zone.now.to_s) - expect(rendered).to include("!search") - expect(rendered).to include("!tags") + expect(rendered).to include("search") + expect(rendered).to include("tags") # needs to be configured so it is not available - expect(rendered).not_to include("!image") + expect(rendered).not_to include("image") rendered = persona.render_system_prompt(topic: topic_with_users, render_function_instructions: false) - expect(rendered).not_to include("!search") - expect(rendered).not_to include("!tags") + expect(rendered).not_to include("search") + expect(rendered).not_to include("tags") end describe "custom personas" do diff --git a/spec/shared/inference/function_list_spec.rb b/spec/shared/inference/function_list_spec.rb index db95823b..1bac1b5a 100644 --- a/spec/shared/inference/function_list_spec.rb +++ b/spec/shared/inference/function_list_spec.rb @@ -27,20 +27,49 @@ module DiscourseAi::Inference list end - it "can handle complex parsing" do - raw_prompt = <<~PROMPT - !get_weather(location: "sydney,melbourne", unit: "f") - !get_weather (location: sydney) - !get_weather(location : "sydney's", unit: "m", invalid: "invalid") - !get_weather(unit: "f", invalid: "invalid") - PROMPT - parsed = function_list.parse_prompt(raw_prompt) + let :image_function_list do + function = Function.new(name: "image", description: "generates an image") + function.add_parameter( + name: "prompts", + type: "array", + item_type: "string", + required: true, + description: "the prompts", + ) + + list = FunctionList.new + list << function + list + end + + it "can handle function call parsing" do + raw_prompt = <<~PROMPT + + + image + + + [ + "an oil painting", + "a cute fluffy orange", + "3 apple's", + "a cat" + ] + + + + + PROMPT + parsed = image_function_list.parse_prompt(raw_prompt) expect(parsed).to eq( [ - { name: "get_weather", arguments: { location: "sydney,melbourne", unit: "f" } }, - { name: "get_weather", arguments: { location: "sydney" } }, - { name: "get_weather", arguments: { location: "sydney's" } }, + { + name: "image", + arguments: { + prompts: ["an oil painting", "a cute fluffy orange", "3 apple's", "a cat"], + }, + }, ], ) end @@ -51,10 +80,27 @@ module DiscourseAi::Inference # this is fragile, by design, we need to test something here # expected = <<~PROMPT - { - // Get the weather in a city (default to c) - !get_weather(location: string [required] /* the city name */, unit: string [optional] /* the unit of measurement celcius c or fahrenheit f [valid values: c,f] */) - } + + + get_weather + Get the weather in a city (default to c) + + + location + string + the city name + true + + + unit + string + the unit of measurement celcius c or fahrenheit f + false + c,f + + + + PROMPT expect(prompt).to include(expected) end