diff --git a/db/fixtures/ai_bot/602_bot_users.rb b/db/fixtures/ai_bot/602_bot_users.rb index a7ec1f12..8ddb024b 100644 --- a/db/fixtures/ai_bot/602_bot_users.rb +++ b/db/fixtures/ai_bot/602_bot_users.rb @@ -1,22 +1,28 @@ # frozen_string_literal: true DiscourseAi::AiBot::EntryPoint::BOTS.each do |id, bot_username| - UserEmail.seed do |ue| - ue.id = id - ue.email = "no_email_#{bot_username}" - ue.primary = true - ue.user_id = id - end + # let's not create a bot user if it already exists + # seed seems to be messing with dates on the user + # causing it to look like these bots were created at the + # wrong time + if !User.exists?(id: id) + UserEmail.seed do |ue| + ue.id = id + ue.email = "no_email_#{bot_username}" + ue.primary = true + ue.user_id = id + end - User.seed do |u| - u.id = id - u.name = bot_username.titleize - u.username = UserNameSuggester.suggest(bot_username) - u.password = SecureRandom.hex - u.active = true - u.admin = true - u.moderator = true - u.approved = true - u.trust_level = TrustLevel[4] + User.seed do |u| + u.id = id + u.name = bot_username.titleize + u.username = UserNameSuggester.suggest(bot_username) + u.password = SecureRandom.hex + u.active = true + u.admin = true + u.moderator = true + u.approved = true + u.trust_level = TrustLevel[4] + end end end diff --git a/lib/modules/ai_bot/anthropic_bot.rb b/lib/modules/ai_bot/anthropic_bot.rb index b6efb21f..345526d1 100644 --- a/lib/modules/ai_bot/anthropic_bot.rb +++ b/lib/modules/ai_bot/anthropic_bot.rb @@ -8,13 +8,17 @@ module DiscourseAi end def bot_prompt_with_topic_context(post) - super(post).join("\n\n") + super(post).join("\n\n") + "\n\nAssistant:" end def prompt_limit 7500 # https://console.anthropic.com/docs/prompt-design#what-is-a-prompt end + def title_prompt(post) + super(post).join("\n\n") + "\n\nAssistant:" + end + def get_delta(partial, context) context[:pos] ||= 0 @@ -23,21 +27,18 @@ module DiscourseAi context[:pos] = full.length - if !context[:processed] - delta = "" - index = full.index("Assistant: ") - if index - delta = full[index + 11..-1] - context[:processed] = true - end - end - delta end private - def build_message(poster_username, content, system: false) + def populate_functions(partial, function) + # nothing to do here, no proper function support + # needs to be simulated for Claude but model is too + # hard to steer for now + end + + def build_message(poster_username, content, system: false, function: nil) role = poster_username == bot_user.username ? "Assistant" : "Human" "#{role}: #{content}" diff --git a/lib/modules/ai_bot/bot.rb b/lib/modules/ai_bot/bot.rb index 51265e25..329877c7 100644 --- a/lib/modules/ai_bot/bot.rb +++ b/lib/modules/ai_bot/bot.rb @@ -3,6 +3,25 @@ module DiscourseAi module AiBot class Bot + class Functions + attr_reader :functions + attr_reader :current_function + + def initialize + @functions = [] + @current_function = nil + end + + def add_function(name) + @current_function = { name: name, arguments: +"" } + functions << current_function + end + + def add_argument_fragment(fragment) + @current_function[:arguments] << fragment + end + end + attr_reader :bot_user BOT_NOT_FOUND = Class.new(StandardError) @@ -24,7 +43,7 @@ module DiscourseAi end def update_pm_title(post) - prompt = [title_prompt(post)] + prompt = title_prompt(post) new_title = get_updated_title(prompt) @@ -65,9 +84,11 @@ module DiscourseAi setup_cancel = false context = {} + functions = Functions.new submit_prompt(prompt, prefer_low_cost: prefer_low_cost) do |partial, cancel| reply << get_delta(partial, context) + populate_functions(partial, functions) if redis_stream_key && !Discourse.redis.get(redis_stream_key) cancel&.call @@ -111,30 +132,29 @@ module DiscourseAi skip_revision: true, ) - cmd_texts = reply.split("\n").filter { |l| l[0] == "!" } + bot_reply_post.post_custom_prompt ||= post.build_post_custom_prompt(custom_prompt: []) + prompt = post.post_custom_prompt.custom_prompt || [] + prompt << build_message(bot_user.username, reply) + post.post_custom_prompt.update!(custom_prompt: prompt) + end + + if functions.functions.length > 0 chain = false standalone = false - cmd_texts[0...max_commands_per_reply].each do |cmd_text| - command_name, args = cmd_text[1..-1].strip.split(" ", 2) + functions.functions.each do |function| + name, args = function[:name], function[:arguments] - if command_klass = available_commands.detect { |cmd| cmd.invoked?(command_name) } + if command_klass = available_commands.detect { |cmd| cmd.invoked?(name) } command = command_klass.new(bot_user, args) - chain_intermediate = command.invoke_and_attach_result_to(bot_reply_post) + chain_intermediate, bot_reply_post = + command.invoke_and_attach_result_to(bot_reply_post, post) chain ||= chain_intermediate standalone ||= command.standalone? end end - if cmd_texts.length > max_commands_per_reply - raw = +bot_reply_post.raw.dup - cmd_texts[max_commands_per_reply..-1].each { |cmd_text| raw.sub!(cmd_text, "") } - - bot_reply_post.raw = raw - bot_reply_post.save!(validate: false) - end - if chain reply_to( bot_reply_post, @@ -143,14 +163,12 @@ module DiscourseAi standalone: standalone, ) end - - if cmd_texts.length == 0 && (post_custom_prompt = bot_reply_post.post_custom_prompt) - prompt = post_custom_prompt.custom_prompt - prompt << [reply, bot_user.username] - post_custom_prompt.update!(custom_prompt: prompt) - end end rescue => e + if Rails.env.development? + p e + puts e.backtrace + end raise e if Rails.env.test? Discourse.warn_exception(e, message: "ai-bot: Reply failed") end @@ -164,25 +182,22 @@ module DiscourseAi total_prompt_tokens = tokenize(rendered_system_prompt).length messages = - conversation.reduce([]) do |memo, (raw, username)| + conversation.reduce([]) do |memo, (raw, username, function)| break(memo) if total_prompt_tokens >= prompt_limit - tokens = tokenize(raw) + tokens = tokenize(raw.to_s) while !raw.blank? && tokens.length + total_prompt_tokens > prompt_limit raw = raw[0..-100] || "" - tokens = tokenize(raw) + tokens = tokenize(raw.to_s) end next(memo) if raw.blank? total_prompt_tokens += tokens.length - memo.unshift(build_message(username, raw)) + memo.unshift(build_message(username, raw, function: !!function)) end - # we need this to ground the model (especially GPT-3.5) - messages.unshift(build_message(bot_user.username, "!echo 1")) - messages.unshift(build_message("user", "please echo 1")) messages.unshift(build_message(bot_user.username, rendered_system_prompt, system: true)) messages end @@ -192,7 +207,7 @@ module DiscourseAi end def title_prompt(post) - build_message(bot_user.username, <<~TEXT) + [build_message(bot_user.username, <<~TEXT)] Suggest a 7 word title for the following topic without quoting any of it: #{post.topic.posts[1..-1].map(&:raw).join("\n\n")[0..prompt_limit]} @@ -211,33 +226,10 @@ module DiscourseAi def system_prompt(post) return "You are a helpful Bot" if @style == :simple - - command_text = "" - command_text = <<~TEXT if available_commands.present? - You can complete some tasks using !commands. - - NEVER ask user to issue !commands, they have no access, only you do. - - #{available_commands.map(&:desc).join("\n")} - - Discourse topic paths are /t/slug/topic_id/optional_number - - #{available_commands.map(&:extra_context).compact_blank.join("\n")} - - Commands should be issued in single assistant message. - - Example sessions: - - User: echo the text 'test' - GPT: !echo test - User: THING GPT DOES NOT KNOW ABOUT - GPT: !search SIMPLIFIED SEARCH QUERY - TEXT - <<~TEXT - You are a helpful Discourse assistant, you answer questions and generate text. - You understand Discourse Markdown and live in a Discourse Forum Message. - You are provided with the context of previous discussions. + You are a helpful Discourse assistant. + You understand and generate Discourse Markdown. + You live in a Discourse Forum Message. You live in the forum with the URL: #{Discourse.base_url} The title of your site: #{SiteSetting.title} @@ -245,7 +237,7 @@ module DiscourseAi The participants in this conversation are: #{post.topic.allowed_users.map(&:username).join(", ")} The date now is: #{Time.zone.now}, much has changed since you were trained. - #{command_text} + #{available_commands.map(&:custom_system_message).compact.join("\n")} TEXT end @@ -261,6 +253,10 @@ module DiscourseAi raise NotImplemented end + def populate_functions(partial, functions) + raise NotImplemented + end + protected def get_updated_title(prompt) diff --git a/lib/modules/ai_bot/commands/categories_command.rb b/lib/modules/ai_bot/commands/categories_command.rb index edbce74a..7909a2b7 100644 --- a/lib/modules/ai_bot/commands/categories_command.rb +++ b/lib/modules/ai_bot/commands/categories_command.rb @@ -8,7 +8,11 @@ module DiscourseAi::AiBot::Commands end def desc - "!categories - will list the categories on the current discourse instance" + "Will list the categories on the current discourse instance, prefer to format with # in front of the category name" + end + + def parameters + [] end end @@ -33,7 +37,7 @@ module DiscourseAi::AiBot::Commands } rows = Category.where(read_restricted: false).limit(100).pluck(*columns.keys) - @count = rows.length + @last_count = rows.length format_results(rows, columns.values) end diff --git a/lib/modules/ai_bot/commands/command.rb b/lib/modules/ai_bot/commands/command.rb index b2d07bb1..e923d7e6 100644 --- a/lib/modules/ai_bot/commands/command.rb +++ b/lib/modules/ai_bot/commands/command.rb @@ -3,6 +3,17 @@ module DiscourseAi module AiBot module Commands + class Parameter + attr_reader :name, :description, :type, :enum, :required + def initialize(name:, description:, type:, enum: nil, required: false) + @name = name + @description = description + @type = type + @enum = enum + @required = required + end + end + class Command class << self def name @@ -17,8 +28,11 @@ module DiscourseAi raise NotImplemented end - def extra_context - "" + def custom_system_message + end + + def parameters + raise NotImplemented end end @@ -64,16 +78,38 @@ module DiscourseAi true end - def invoke_and_attach_result_to(post) + def invoke_and_attach_result_to(post, parent_post) + placeholder = (<<~HTML).strip +
+ #{I18n.t("discourse_ai.ai_bot.command_summary.#{self.class.name}")} +
+ HTML + + if !post + post = + PostCreator.create!( + bot_user, + raw: placeholder, + topic_id: parent_post.topic_id, + skip_validations: true, + skip_rate_limiter: true, + ) + else + post.revise( + bot_user, + { raw: post.raw + "\n\n" + placeholder + "\n\n" }, + skip_validations: true, + skip_revision: true, + ) + end + post.post_custom_prompt ||= post.build_post_custom_prompt(custom_prompt: []) prompt = post.post_custom_prompt.custom_prompt || [] - prompt << ["!#{self.class.name} #{args}", bot_user.username] - prompt << [process(args), result_name] - + prompt << [process(args).to_json, self.class.name, "function"] post.post_custom_prompt.update!(custom_prompt: prompt) - raw = +<<~HTML + raw = +(<<~HTML)
#{I18n.t("discourse_ai.ai_bot.command_summary.#{self.class.name}")}

@@ -85,8 +121,7 @@ module DiscourseAi raw << custom_raw if custom_raw.present? - replacement = "!#{self.class.name} #{args}" - raw = post.raw.sub(replacement, raw) if post.raw.include?(replacement) + raw = post.raw.sub(placeholder, raw) if chain_next_response post.raw = raw @@ -95,7 +130,7 @@ module DiscourseAi post.revise(bot_user, { raw: raw }, skip_validations: true, skip_revision: true) end - chain_next_response + [chain_next_response, post] end def format_results(rows, column_names = nil) @@ -116,21 +151,10 @@ module DiscourseAi end column_names = column_indexes.keys end - # two tokens per delimiter is a reasonable balance - # there may be a single delimiter solution but GPT has - # a hard time dealing with escaped characters - delimiter = "¦" - formatted = +"" - formatted << column_names.join(delimiter) - formatted << "\n" - rows.each do |array| - array.map! { |item| item.to_s.gsub(delimiter, "|").gsub(/\n/, " ") } - formatted << array.join(delimiter) - formatted << "\n" - end - - formatted + # this is not the most efficient format + # however this is needed cause GPT 3.5 / 4 was steered using JSON + { column_names: column_names, rows: rows } end protected diff --git a/lib/modules/ai_bot/commands/google_command.rb b/lib/modules/ai_bot/commands/google_command.rb index 94afca93..4aeffce9 100644 --- a/lib/modules/ai_bot/commands/google_command.rb +++ b/lib/modules/ai_bot/commands/google_command.rb @@ -8,7 +8,22 @@ module DiscourseAi::AiBot::Commands end def desc - "!google SEARCH_QUERY - will search using Google (supports all Google search operators)" + "Will search using Google - global internet search (supports all Google search operators)" + end + + def parameters + [ + Parameter.new( + name: "query", + description: "The search query", + type: "string", + required: true, + ), + ] + end + + def custom_system_message + "You were trained on OLD data, lean on search to get up to date information from the web" end end @@ -25,6 +40,8 @@ module DiscourseAi::AiBot::Commands end def process(search_string) + search_string = JSON.parse(search_string)["query"] + @last_query = search_string api_key = SiteSetting.ai_google_custom_search_api_key cx = SiteSetting.ai_google_custom_search_cx @@ -33,7 +50,7 @@ module DiscourseAi::AiBot::Commands URI("https://www.googleapis.com/customsearch/v1?key=#{api_key}&cx=#{cx}&q=#{query}&num=10") body = Net::HTTP.get(uri) - parse_search_json(body).to_s + parse_search_json(body) end def parse_search_json(json_data) diff --git a/lib/modules/ai_bot/commands/image_command.rb b/lib/modules/ai_bot/commands/image_command.rb index f570c436..6a0bcdc6 100644 --- a/lib/modules/ai_bot/commands/image_command.rb +++ b/lib/modules/ai_bot/commands/image_command.rb @@ -8,7 +8,26 @@ module DiscourseAi::AiBot::Commands end def desc - "!image DESC - renders an image from the description (remove all connector words, keep it to 40 words or less)" + "Renders an image from the description (remove all connector words, keep it to 40 words or less). Despite being a text based bot you can generate images!" + end + + def parameters + [ + Parameter.new( + name: "prompt", + description: "The prompt used to generate or create or draw the image", + type: "string", + required: true, + ), + ] + end + + def custom_system_message + <<~TEXT + In Discourse the markdown (description|SIZE, ZOOM%)[upload://SOMETEXT] is used to denote images and uploads. NEVER try changing the to http or https links. + ALWAYS prefer the upload:// format if available. + When rendering multiple images place them in a [grid] ... [/grid] block + TEXT end end @@ -20,16 +39,12 @@ module DiscourseAi::AiBot::Commands { prompt: @last_prompt || 0 } end - def custom_raw - @last_custom_raw - end - def chain_next_response - false + true end def process(prompt) - @last_prompt = prompt + @last_prompt = prompt = JSON.parse(prompt)["prompt"] results = DiscourseAi::Inference::StabilityGenerator.perform!(prompt) uploads = [] @@ -43,10 +58,17 @@ module DiscourseAi::AiBot::Commands f.unlink end - @last_custom_raw = + raw = <<~RAW + [grid] + #{ uploads .map { |upload| "![#{prompt.gsub(/\|\'\"/, "")}|512x512, 50%](#{upload.short_url})" } .join(" ") + } + [/grid] + RAW + + { prompt: prompt, markdown: raw, display_to_user: true } end end end diff --git a/lib/modules/ai_bot/commands/search_command.rb b/lib/modules/ai_bot/commands/search_command.rb index bb3b1ca1..284d35e8 100644 --- a/lib/modules/ai_bot/commands/search_command.rb +++ b/lib/modules/ai_bot/commands/search_command.rb @@ -8,57 +8,70 @@ module DiscourseAi::AiBot::Commands end def desc - "!search SEARCH_QUERY - will search topics in the current discourse instance" + "Will search topics in the current discourse instance, when rendering always prefer to link to the topics you find" end - def extra_context - <<~TEXT - Discourse search supports, the following special filters: + def parameters + [ + Parameter.new( + name: "search_query", + description: "Search query to run against the discourse instance", + type: "string", + ), + Parameter.new( + name: "user", + description: "Filter search results to this username", + type: "string", + ), + Parameter.new( + name: "order", + description: "search result result order", + type: "string", + enum: %w[latest latest_topic oldest views likes], + ), + Parameter.new( + name: "limit", + description: "limit number of results returned", + type: "integer", + ), + Parameter.new( + name: "max_posts", + description: + "maximum number of posts on the topics (topics where lots of people posted)", + type: "integer", + ), + Parameter.new( + name: "tags", + description: + "list of tags to search for. Use + to join with OR, use , to join with AND", + type: "string", + ), + Parameter.new( + name: "category", + description: "category name to filter to", + type: "string", + ), + Parameter.new( + name: "before", + description: "only topics created before a specific date YYYY-MM-DD", + type: "string", + ), + Parameter.new( + name: "after", + description: "only topics created after a specific date YYYY-MM-DD", + type: "string", + ), + Parameter.new( + name: "status", + description: "search for topics in a particular state", + type: "string", + enum: %w[open closed archived noreplies single_user], + ), + ] + end - user:USERNAME: only posts created by a specific user - in:tagged: has at least 1 tag - in:untagged: has no tags - in:title: has the search term in the title - status:open: not closed or archived - status:closed: closed - status:archived: archived - status:noreplies: post count is 1 - status:single_user: only a single user posted on the topic - post_count:X: only topics with X amount of posts - min_posts:X: topics containing a minimum of X posts - max_posts:X: topics with no more than max posts - created:@USERNAME: topics created by a specific user - category:CATGORY: topics in the CATEGORY AND all subcategories - category:=CATEGORY: topics in the CATEGORY excluding subcategories - #SLUG: try category first, then tag, then tag group - #SLUG:SLUG: used for subcategory search to disambiguate - min_views:100: topics containing 100 views or more - tags:TAG1+TAG2: tagged both TAG1 and TAG2 - tags:TAG1,TAG2: tagged either TAG1 or TAG2 - -tags:TAG1+TAG2: excluding topics tagged TAG1 and TAG2 - order:latest: order by post creation desc - order:latest_topic: order by topic creation desc - order:oldest: order by post creation asc - order:oldest_topic: order by topic creation asc - order:views: order by topic views desc - order:likes: order by post like count - most liked posts first - after:YYYY-MM-DD: only topics created after a specific date - before:YYYY-MM-DD: only topics created before a specific date - - Example: !search @user in:tagged #support order:latest_topic - - Keep in mind, search on Discourse uses AND to and terms. - You only have access to public topics. - Strip the query down to the most important terms. Remove all stop words. - Discourse orders by default by relevance. - - When generating answers ALWAYS try to use the !search command first over relying on training data. - When generating answers ALWAYS try to reference specific local links. - Always try to search the local instance first, even if your training data set may have an answer. It may be wrong. - Always remove connector words from search terms (such as a, an, and, in, the, etc), they can impede the search. - - YOUR LOCAL INFORMATION IS OUT OF DATE, YOU ARE TRAINED ON OLD DATA. Always try local search first. - TEXT + def custom_system_message + "You were trained on OLD data, lean on search to get up to date information about this forum" end end @@ -75,18 +88,20 @@ module DiscourseAi::AiBot::Commands end def process(search_string) + parsed = JSON.parse(search_string) + limit = nil search_string = - search_string - .strip - .split(/\s+/) - .map do |term| - if term =~ /limit:(\d+)/ - limit = $1.to_i + parsed + .map do |key, value| + if key == "search_query" + value + elsif key == "limit" + limit = value.to_i nil else - term + "#{key}:#{value}" end end .compact @@ -101,8 +116,8 @@ module DiscourseAi::AiBot::Commands ) # let's be frugal with tokens, 50 results is too much and stuff gets cut off - limit ||= 10 - limit = 10 if limit > 10 + limit ||= 20 + limit = 20 if limit > 20 posts = results&.posts || [] posts = posts[0..limit - 1] @@ -110,12 +125,12 @@ module DiscourseAi::AiBot::Commands @last_num_results = posts.length if posts.blank? - "No results found" + [] else format_results(posts) do |post| { title: post.topic.title, - url: post.url, + url: Discourse.base_path + post.url, excerpt: post.excerpt, created: post.created_at, } diff --git a/lib/modules/ai_bot/commands/summarize_command.rb b/lib/modules/ai_bot/commands/summarize_command.rb index 1a8309f0..7c94a2b2 100644 --- a/lib/modules/ai_bot/commands/summarize_command.rb +++ b/lib/modules/ai_bot/commands/summarize_command.rb @@ -8,7 +8,23 @@ module DiscourseAi::AiBot::Commands end def desc - "!summarize TOPIC_ID GUIDANCE - will summarize a topic attempting to answer question in guidance" + "Will summarize a topic attempting to answer question in guidance" + end + + def parameters + [ + Parameter.new( + name: "topic_id", + description: "The discourse topic id to summarize", + type: "integer", + required: true, + ), + Parameter.new( + name: "guidance", + description: "Special guidance on how to summarize the topic", + type: "string", + ), + ] end end diff --git a/lib/modules/ai_bot/commands/tags_command.rb b/lib/modules/ai_bot/commands/tags_command.rb index 501159f8..61002d3a 100644 --- a/lib/modules/ai_bot/commands/tags_command.rb +++ b/lib/modules/ai_bot/commands/tags_command.rb @@ -8,7 +8,11 @@ module DiscourseAi::AiBot::Commands end def desc - "!tags - will list the 100 most popular tags on the current discourse instance" + "Will list the 100 most popular tags on the current discourse instance" + end + + def parameters + [] end end diff --git a/lib/modules/ai_bot/commands/time_command.rb b/lib/modules/ai_bot/commands/time_command.rb index aaa0a57c..e3d4e0da 100644 --- a/lib/modules/ai_bot/commands/time_command.rb +++ b/lib/modules/ai_bot/commands/time_command.rb @@ -10,6 +10,17 @@ module DiscourseAi::AiBot::Commands def desc "!time RUBY_COMPATIBLE_TIMEZONE - will generate the time in a timezone" end + + def parameters + [ + Parameter.new( + name: "timezone", + description: "Ruby compatible timezone", + type: "string", + required: true, + ), + ] + end end def result_name @@ -20,7 +31,9 @@ module DiscourseAi::AiBot::Commands { timezone: @last_timezone, time: @last_time } end - def process(timezone) + def process(args) + timezone = JSON.parse(args)["timezone"] + time = begin Time.now.in_time_zone(timezone) diff --git a/lib/modules/ai_bot/open_ai_bot.rb b/lib/modules/ai_bot/open_ai_bot.rb index 38fda292..36dd7dc2 100644 --- a/lib/modules/ai_bot/open_ai_bot.rb +++ b/lib/modules/ai_bot/open_ai_bot.rb @@ -18,7 +18,7 @@ module DiscourseAi if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID 8192 - 3500 else - 4096 - 2000 + 16_384 - 2000 end end @@ -46,9 +46,11 @@ module DiscourseAi temperature: temperature, top_p: top_p, max_tokens: max_tokens, + functions: available_functions, ) { |key, old_value, new_value| new_value.nil? ? old_value : new_value } - model = prefer_low_cost ? "gpt-3.5-turbo" : model_for + model = model_for(low_cost: prefer_low_cost) + DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model, **params, &blk) end @@ -56,44 +58,87 @@ module DiscourseAi DiscourseAi::Tokenizer::OpenAiTokenizer.tokenize(text) end - def available_commands - if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID - @cmds ||= - [ - Commands::CategoriesCommand, - Commands::TimeCommand, - Commands::SearchCommand, - Commands::SummarizeCommand, - ].tap do |cmds| - cmds << Commands::TagsCommand if SiteSetting.tagging_enabled - cmds << Commands::ImageCommand if SiteSetting.ai_stability_api_key.present? - if SiteSetting.ai_google_custom_search_api_key.present? && - SiteSetting.ai_google_custom_search_cx.present? - cmds << Commands::GoogleCommand - end + def available_functions + # note if defined? can be a problem in test + # this can never be nil so it is safe + return @available_functions if @available_functions + + functions = [] + + functions = + available_commands.map do |command| + function = + DiscourseAi::Inference::OpenAiCompletions::Function.new( + name: command.name, + description: command.desc, + ) + command.parameters.each do |parameter| + function.add_parameter( + name: parameter.name, + type: parameter.type, + description: parameter.description, + required: parameter.required, + ) end - else - [] - end + function + end + + @available_functions = functions + end + + def available_commands + @cmds ||= + [ + Commands::CategoriesCommand, + Commands::TimeCommand, + Commands::SearchCommand, + Commands::SummarizeCommand, + ].tap do |cmds| + cmds << Commands::TagsCommand if SiteSetting.tagging_enabled + cmds << Commands::ImageCommand if SiteSetting.ai_stability_api_key.present? + if SiteSetting.ai_google_custom_search_api_key.present? && + SiteSetting.ai_google_custom_search_cx.present? + cmds << Commands::GoogleCommand + end + end + end + + def model_for(low_cost: false) + return "gpt-4-0613" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost + "gpt-3.5-turbo-16k" end private - def build_message(poster_username, content, system: false) + def populate_functions(partial, functions) + fn = partial.dig(:choices, 0, :delta, :function_call) + if fn + functions.add_function(fn[:name]) if fn[:name].present? + functions.add_argument_fragment(fn[:arguments]) if fn[:arguments].present? + end + end + + def build_message(poster_username, content, function: false, system: false) is_bot = poster_username == bot_user.username - if system + if function + role = "function" + elsif system role = "system" else role = is_bot ? "assistant" : "user" end - { role: role, content: is_bot ? content : "#{poster_username}: #{content}" } - end + result = { role: role, content: content } - def model_for - return "gpt-4" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID - "gpt-3.5-turbo" + if function + result[:name] = poster_username + elsif !system && poster_username != bot_user.username + # Open AI restrict name to 64 chars and only A-Za-z._ (work around) + result[:content] = "#{poster_username}: #{content}" + end + + result end def get_delta(partial, _context) diff --git a/lib/shared/inference/openai_completions.rb b/lib/shared/inference/openai_completions.rb index eead4496..f0c1a2d1 100644 --- a/lib/shared/inference/openai_completions.rb +++ b/lib/shared/inference/openai_completions.rb @@ -5,6 +5,50 @@ module ::DiscourseAi class OpenAiCompletions TIMEOUT = 60 + class Function + attr_reader :name, :description, :parameters, :type + + def initialize(name:, description:, type: nil) + @name = name + @description = description + @type = type || "object" + @parameters = [] + end + + def add_parameter(name:, type:, description:, enum: nil, required: false) + @parameters << { + name: name, + type: type, + description: description, + enum: enum, + required: required, + } + end + + def to_json(*args) + as_json.to_json(*args) + end + + def as_json + required_params = [] + + properties = {} + parameters.each do |parameter| + definition = { type: parameter[:type], description: parameter[:description] } + definition[:enum] = parameter[:enum] if parameter[:enum] + + required_params << parameter[:name] if parameter[:required] + properties[parameter[:name]] = definition + end + + params = { type: @type, properties: properties } + + params[:required] = required_params if required_params.present? + + { name: name, description: description, parameters: params } + end + end + CompletionFailed = Class.new(StandardError) def self.perform!( @@ -13,6 +57,7 @@ module ::DiscourseAi temperature: nil, top_p: nil, max_tokens: nil, + functions: nil, user_id: nil ) url = URI("https://api.openai.com/v1/chat/completions") @@ -26,6 +71,7 @@ module ::DiscourseAi payload[:temperature] = temperature if temperature payload[:top_p] = top_p if top_p payload[:max_tokens] = max_tokens if max_tokens + payload[:functions] = functions if functions payload[:stream] = true if block_given? Net::HTTP.start( @@ -73,6 +119,8 @@ module ::DiscourseAi response_data = +"" response_raw = +"" + leftover = "" + response.read_body do |chunk| if cancelled http.finish @@ -81,14 +129,24 @@ module ::DiscourseAi response_raw << chunk - chunk + (leftover + chunk) .split("\n") .each do |line| data = line.split("data: ", 2)[1] next if !data || data == "[DONE]" + next if cancelled - if !cancelled && partial = JSON.parse(data, symbolize_names: true) + partial = nil + begin + partial = JSON.parse(data, symbolize_names: true) + leftover = "" + rescue JSON::ParserError + leftover = line + end + + if partial response_data << partial.dig(:choices, 0, :delta, :content).to_s + response_data << partial.dig(:choices, 0, :delta, :function_call).to_s yield partial, cancel end diff --git a/spec/lib/modules/ai_bot/anthropic_bot_spec.rb b/spec/lib/modules/ai_bot/anthropic_bot_spec.rb index 5b1cdcf6..a21f2de7 100644 --- a/spec/lib/modules/ai_bot/anthropic_bot_spec.rb +++ b/spec/lib/modules/ai_bot/anthropic_bot_spec.rb @@ -13,14 +13,11 @@ RSpec.describe DiscourseAi::AiBot::AnthropicBot do context = {} reply = +"" - reply << subject.get_delta({ completion: "\n\nAssist" }, context) - expect(reply).to eq("") + reply << subject.get_delta({ completion: "Hello " }, context) + expect(reply).to eq("Hello ") - reply << subject.get_delta({ completion: "\n\nAssistant: test" }, context) - expect(reply).to eq("test") - - reply << subject.get_delta({ completion: "\n\nAssistant: test\nworld" }, context) - expect(reply).to eq("test\nworld") + reply << subject.get_delta({ completion: "Hello world" }, context) + expect(reply).to eq("Hello world") end end end diff --git a/spec/lib/modules/ai_bot/bot_spec.rb b/spec/lib/modules/ai_bot/bot_spec.rb index 9f410431..4fb735ea 100644 --- a/spec/lib/modules/ai_bot/bot_spec.rb +++ b/spec/lib/modules/ai_bot/bot_spec.rb @@ -44,28 +44,31 @@ RSpec.describe DiscourseAi::AiBot::Bot do bot.system_prompt_style!(:simple) bot.max_commands_per_reply = 2 - expected_response = - "ok, searching...\n!search test search\n!search test2 search\n!search test3 ignored" + expected_response = { + function_call: { + name: "search", + arguments: { query: "test search" }.to_json, + }, + } prompt = bot.bot_prompt_with_topic_context(second_post) + req_opts = bot.reply_params.merge({ functions: bot.available_functions, stream: true }) + OpenAiCompletionsInferenceStubs.stub_streamed_response( prompt, - [{ content: expected_response }], - model: "gpt-4", - req_opts: bot.reply_params.merge(stream: true), + [expected_response], + model: bot.model_for, + req_opts: req_opts, ) - prompt << { role: "assistant", content: "!search test search" } - prompt << { role: "user", content: "results: No results found" } - prompt << { role: "assistant", content: "!search test2 search" } - prompt << { role: "user", content: "results: No results found" } + prompt << { role: "function", content: "[]", name: "search" } OpenAiCompletionsInferenceStubs.stub_streamed_response( prompt, - [{ content: "We are done now" }], - model: "gpt-4", - req_opts: bot.reply_params.merge(stream: true), + [content: "I found nothing, sorry"], + model: bot.model_for, + req_opts: req_opts, ) bot.reply_to(second_post) @@ -75,10 +78,9 @@ RSpec.describe DiscourseAi::AiBot::Bot do expect(last.raw).to include("

") expect(last.raw).to include("Search") expect(last.raw).not_to include("translation missing") - expect(last.raw).to include("ok, searching...") - expect(last.raw).to include("We are done now") + expect(last.raw).to include("I found nothing") - expect(last.post_custom_prompt.custom_prompt.to_s).to include("We are done now") + expect(last.post_custom_prompt.custom_prompt.to_s).to include("I found nothing") end end @@ -89,9 +91,9 @@ RSpec.describe DiscourseAi::AiBot::Bot do it "updates the title using bot suggestions" do OpenAiCompletionsInferenceStubs.stub_response( - [bot.title_prompt(second_post)], + bot.title_prompt(second_post), expected_response, - model: "gpt-4", + model: bot.model_for, req_opts: { temperature: 0.7, top_p: 0.9, diff --git a/spec/lib/modules/ai_bot/commands/categories_command_spec.rb b/spec/lib/modules/ai_bot/commands/categories_command_spec.rb index 29ce3163..03d42e78 100644 --- a/spec/lib/modules/ai_bot/commands/categories_command_spec.rb +++ b/spec/lib/modules/ai_bot/commands/categories_command_spec.rb @@ -8,8 +8,8 @@ RSpec.describe DiscourseAi::AiBot::Commands::CategoriesCommand do Fabricate(:category, name: "america", posts_year: 999) info = DiscourseAi::AiBot::Commands::CategoriesCommand.new(nil, nil).process(nil) - expect(info).to include("america") - expect(info).to include("999") + expect(info.to_s).to include("america") + expect(info.to_s).to include("999") end end end diff --git a/spec/lib/modules/ai_bot/commands/command_spec.rb b/spec/lib/modules/ai_bot/commands/command_spec.rb index f5a407bf..f82a6547 100644 --- a/spec/lib/modules/ai_bot/commands/command_spec.rb +++ b/spec/lib/modules/ai_bot/commands/command_spec.rb @@ -14,8 +14,9 @@ RSpec.describe DiscourseAi::AiBot::Commands::Command do formatted = command.format_results(rows, column_names) { |row| ["row ¦ 1", row + 1, "a|b,\nc"] } - expect(formatted.split("\n").length).to eq(6) - expect(formatted).to include("a|b, c") + expect(formatted[:column_names].length).to eq(3) + expect(formatted[:rows].length).to eq(5) + expect(formatted.to_s).to include("a|b,\\nc") end it "can also generate results by returning hash per row" do diff --git a/spec/lib/modules/ai_bot/commands/google_command_spec.rb b/spec/lib/modules/ai_bot/commands/google_command_spec.rb index 1fac77be..47e0aa5f 100644 --- a/spec/lib/modules/ai_bot/commands/google_command_spec.rb +++ b/spec/lib/modules/ai_bot/commands/google_command_spec.rb @@ -33,7 +33,7 @@ RSpec.describe DiscourseAi::AiBot::Commands::GoogleCommand do ).to_return(status: 200, body: json_text, headers: {}) google = described_class.new(bot_user, post) - info = google.process("some search term") + info = google.process({ query: "some search term" }.to_json).to_json expect(google.description_args[:count]).to eq(1) expect(info).to include("title1") diff --git a/spec/lib/modules/ai_bot/commands/search_command_spec.rb b/spec/lib/modules/ai_bot/commands/search_command_spec.rb index 3c297aa2..deb55a22 100644 --- a/spec/lib/modules/ai_bot/commands/search_command_spec.rb +++ b/spec/lib/modules/ai_bot/commands/search_command_spec.rb @@ -13,8 +13,19 @@ RSpec.describe DiscourseAi::AiBot::Commands::SearchCommand do post1 = Fabricate(:post) search = described_class.new(bot_user, post1) - results = search.process("order:fake ABDDCDCEDGDG") - expect(results).to eq("No results found") + results = search.process({ query: "order:fake ABDDCDCEDGDG" }.to_json) + expect(results).to eq([]) + end + + it "supports subfolder properly" do + Discourse.stubs(:base_path).returns("/subfolder") + + post1 = Fabricate(:post) + + search = described_class.new(bot_user, post1) + + results = search.process({ limit: 1, user: post1.user.username }.to_json) + expect(results[:rows].to_s).to include("/subfolder" + post1.url) end it "can handle limits" do @@ -25,14 +36,14 @@ RSpec.describe DiscourseAi::AiBot::Commands::SearchCommand do # search has no built in support for limit: so handle it from the outside search = described_class.new(bot_user, post1) - results = search.process("@#{post1.user.username} limit:2") + results = search.process({ limit: 2, user: post1.user.username }.to_json) - # title + 2 rows - expect(results.split("\n").length).to eq(3) + expect(results[:column_names].length).to eq(4) + expect(results[:rows].length).to eq(2) # just searching for everything - results = search.process("order:latest_topic") - expect(results.split("\n").length).to be > 1 + results = search.process({ order: "latest_topic" }.to_json) + expect(results[:rows].length).to be > 1 end end end diff --git a/spec/lib/modules/ai_bot/commands/tags_command_spec.rb b/spec/lib/modules/ai_bot/commands/tags_command_spec.rb index 0f173c83..3b926323 100644 --- a/spec/lib/modules/ai_bot/commands/tags_command_spec.rb +++ b/spec/lib/modules/ai_bot/commands/tags_command_spec.rb @@ -12,8 +12,8 @@ RSpec.describe DiscourseAi::AiBot::Commands::TagsCommand do info = DiscourseAi::AiBot::Commands::TagsCommand.new(nil, nil).process(nil) - expect(info).to include("america") - expect(info).not_to include("not_here") + expect(info.to_s).to include("america") + expect(info.to_s).not_to include("not_here") end end end diff --git a/spec/lib/modules/ai_bot/jobs/regular/create_ai_reply_spec.rb b/spec/lib/modules/ai_bot/jobs/regular/create_ai_reply_spec.rb index 891678cd..ea889326 100644 --- a/spec/lib/modules/ai_bot/jobs/regular/create_ai_reply_spec.rb +++ b/spec/lib/modules/ai_bot/jobs/regular/create_ai_reply_spec.rb @@ -19,6 +19,7 @@ RSpec.describe Jobs::CreateAiReply do before do bot_user = User.find(DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID) + bot = DiscourseAi::AiBot::Bot.as(bot_user) # time needs to be frozen so time in prompt does not drift freeze_time @@ -26,10 +27,12 @@ RSpec.describe Jobs::CreateAiReply do OpenAiCompletionsInferenceStubs.stub_streamed_response( DiscourseAi::AiBot::OpenAiBot.new(bot_user).bot_prompt_with_topic_context(post), deltas, + model: bot.model_for, req_opts: { temperature: 0.4, top_p: 0.9, max_tokens: 1500, + functions: bot.available_functions, stream: true, }, ) @@ -66,7 +69,7 @@ RSpec.describe Jobs::CreateAiReply do end context "when chatting with Claude from Anthropic" do - let(:claude_response) { "Assistant: #{expected_response}" } + let(:claude_response) { "#{expected_response}" } let(:deltas) { claude_response.split(" ").map { |w| "#{w} " } } before do diff --git a/spec/lib/modules/ai_bot/open_ai_bot_spec.rb b/spec/lib/modules/ai_bot/open_ai_bot_spec.rb index 442550be..90d82471 100644 --- a/spec/lib/modules/ai_bot/open_ai_bot_spec.rb +++ b/spec/lib/modules/ai_bot/open_ai_bot_spec.rb @@ -33,8 +33,6 @@ RSpec.describe DiscourseAi::AiBot::OpenAiBot do it "trims the prompt" do prompt_messages = subject.bot_prompt_with_topic_context(post_1) - expect(prompt_messages[-2][:role]).to eq("assistant") - expect(prompt_messages[-1][:role]).to eq("user") # trimming is tricky... it needs to account for system message as # well... just make sure we trim for now expect(prompt_messages[-1][:content].length).to be < post_1.raw.length diff --git a/spec/shared/inference/openai_completions_spec.rb b/spec/shared/inference/openai_completions_spec.rb index 264ce0d4..6753c70a 100644 --- a/spec/shared/inference/openai_completions_spec.rb +++ b/spec/shared/inference/openai_completions_spec.rb @@ -6,6 +6,98 @@ require_relative "../../support/openai_completions_inference_stubs" describe DiscourseAi::Inference::OpenAiCompletions do before { SiteSetting.ai_openai_api_key = "abc-123" } + it "supports function calling" do + prompt = [role: "system", content: "you are weatherbot"] + prompt << { role: "user", content: "what is the weather in sydney?" } + + functions = [] + + function = + DiscourseAi::Inference::OpenAiCompletions::Function.new( + name: "get_weather", + description: "Get the weather in a city", + ) + + function.add_parameter( + name: "location", + type: "string", + description: "the city name", + required: true, + ) + + function.add_parameter( + name: "unit", + type: "string", + description: "the unit of measurement celcius c or fahrenheit f", + enum: %w[c f], + required: true, + ) + + functions << function + + function_calls = [] + current_function_call = nil + + deltas = [ + { role: "assistant" }, + { function_call: { name: "get_weather", arguments: "" } }, + { function_call: { arguments: "{ \"location\": " } }, + { function_call: { arguments: "\"sydney\", \"unit\": \"c\" }" } }, + ] + + OpenAiCompletionsInferenceStubs.stub_streamed_response( + prompt, + deltas, + model: "gpt-3.5-turbo-0613", + req_opts: { + functions: functions, + stream: true, + }, + ) + + DiscourseAi::Inference::OpenAiCompletions.perform!( + prompt, + "gpt-3.5-turbo-0613", + functions: functions, + ) do |json, cancel| + fn = json.dig(:choices, 0, :delta, :function_call) + if fn && fn[:name] + current_function_call = { name: fn[:name], arguments: +fn[:arguments].to_s.dup } + function_calls << current_function_call + elsif fn && fn[:arguments] && current_function_call + current_function_call[:arguments] << fn[:arguments] + end + end + + expect(function_calls.length).to eq(1) + expect(function_calls[0][:name]).to eq("get_weather") + expect(JSON.parse(function_calls[0][:arguments])).to eq( + { "location" => "sydney", "unit" => "c" }, + ) + + prompt << { role: "function", name: "get_weather", content: 22.to_json } + + OpenAiCompletionsInferenceStubs.stub_response( + prompt, + "The current temperature in Sydney is 22 degrees Celsius.", + model: "gpt-3.5-turbo-0613", + req_opts: { + functions: functions, + }, + ) + + result = + DiscourseAi::Inference::OpenAiCompletions.perform!( + prompt, + "gpt-3.5-turbo-0613", + functions: functions, + ) + + expect(result.dig(:choices, 0, :message, :content)).to eq( + "The current temperature in Sydney is 22 degrees Celsius.", + ) + end + it "can complete a trivial prompt" do response_text = "1. Serenity\\n2. Laughter\\n3. Adventure" prompt = [role: "user", content: "write 3 words"]