From b14cb864dcd443ba2431421c1e85ecf1c15fc682 Mon Sep 17 00:00:00 2001 From: Sam Date: Tue, 29 Aug 2023 10:43:58 +1000 Subject: [PATCH] FEATURE: add setting_context experimental command (#160) This command can be used to extract information about a discourse site setting directly from source. To operate it needs the rg binary in the container. --- config/locales/server.en.yml | 4 +- config/settings.yml | 5 +- lib/modules/ai_bot/anthropic_bot.rb | 4 +- lib/modules/ai_bot/bot.rb | 7 +- lib/modules/ai_bot/commands/command.rb | 4 + lib/modules/ai_bot/commands/read_command.rb | 2 +- .../commands/setting_context_command.rb | 154 ++++++++++++++++++ lib/modules/ai_bot/entry_point.rb | 1 + lib/modules/ai_bot/open_ai_bot.rb | 4 +- .../ai_bot/commands/search_command_spec.rb | 2 +- .../commands/setting_context_command.rb | 42 +++++ spec/lib/modules/ai_bot/open_ai_bot_spec.rb | 1 - 12 files changed, 219 insertions(+), 11 deletions(-) create mode 100644 lib/modules/ai_bot/commands/setting_context_command.rb create mode 100644 spec/lib/modules/ai_bot/commands/setting_context_command.rb diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 8d87f97b..75e5d13a 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -63,7 +63,7 @@ en: ai_bot_enable_chat_warning: "Display a warning when PM chat is initiated. Can be overriden by editing the translation string: discourse_ai.ai_bot.pm_warning" ai_bot_allowed_groups: "When the GPT Bot has access to the PM, it will reply to members of these groups." ai_bot_enabled_chat_bots: "Available models to act as an AI Bot" - ai_bot_enabled_chat_commands: "Available GPT integrations used to provide external functionality to the model. Only works with GPT-4 and GPT-3.5" + ai_bot_enabled_chat_commands: "Available GPT integrations used to provide external functionality to the model." ai_helper_add_ai_pm_to_header: "Display a button in the header to start a PM with a AI Bot" ai_stability_api_key: "API key for the stability.ai API" @@ -104,6 +104,7 @@ en: image: "Generate image" google: "Search Google" read: "Read topic" + setting_context: "Look up site setting context" command_description: read: "Reading: %{title}" time: "Time in %{timezone} is %{time}" @@ -121,6 +122,7 @@ en: google: one: "Found %{count} result for '%{query}'" other: "Found %{count} results for '%{query}'" + setting_context: "Reading context for: %{setting_name}" summarization: configuration_hint: diff --git a/config/settings.yml b/config/settings.yml index 48952fe6..32956bb3 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -166,7 +166,7 @@ discourse_ai: - text-embedding-ada-002 - multilingual-e5-large ai_embeddings_generate_for_pms: false - ai_embeddings_semantic_related_topics_enabled: + ai_embeddings_semantic_related_topics_enabled: default: false client: true ai_embeddings_semantic_related_topics: 5 @@ -203,7 +203,7 @@ discourse_ai: - claude-2 ai_bot_enabled_chat_commands: type: list - default: "categories|google|image|search|tags|time" + default: "categories|google|image|search|tags|time|read" client: true choices: - categories @@ -214,6 +214,7 @@ discourse_ai: - read - tags - time + - setting_context ai_helper_add_ai_pm_to_header: default: true client: true diff --git a/lib/modules/ai_bot/anthropic_bot.rb b/lib/modules/ai_bot/anthropic_bot.rb index b9f7a072..2251cffc 100644 --- a/lib/modules/ai_bot/anthropic_bot.rb +++ b/lib/modules/ai_bot/anthropic_bot.rb @@ -54,8 +54,8 @@ module DiscourseAi ) end - def tokenize(text) - DiscourseAi::Tokenizer::AnthropicTokenizer.tokenize(text) + def tokenizer + DiscourseAi::Tokenizer::AnthropicTokenizer end end end diff --git a/lib/modules/ai_bot/bot.rb b/lib/modules/ai_bot/bot.rb index d0159e78..97e3412e 100644 --- a/lib/modules/ai_bot/bot.rb +++ b/lib/modules/ai_bot/bot.rb @@ -274,6 +274,7 @@ module DiscourseAi Commands::SearchCommand, Commands::SummarizeCommand, Commands::ReadCommand, + Commands::SettingContextCommand, ].tap do |cmds| cmds << Commands::TagsCommand if SiteSetting.tagging_enabled cmds << Commands::ImageCommand if SiteSetting.ai_stability_api_key.present? @@ -328,10 +329,14 @@ module DiscourseAi @function_list end - def tokenize(text) + def tokenizer raise NotImplemented end + def tokenize(text) + tokenizer.tokenize(text) + end + def submit_prompt(prompt, prefer_low_cost: false, &blk) raise NotImplemented end diff --git a/lib/modules/ai_bot/commands/command.rb b/lib/modules/ai_bot/commands/command.rb index b15437e0..07e54f60 100644 --- a/lib/modules/ai_bot/commands/command.rb +++ b/lib/modules/ai_bot/commands/command.rb @@ -64,6 +64,10 @@ module DiscourseAi @bot ||= DiscourseAi::AiBot::Bot.as(bot_user) end + def tokenizer + bot.tokenizer + end + def standalone? false end diff --git a/lib/modules/ai_bot/commands/read_command.rb b/lib/modules/ai_bot/commands/read_command.rb index 70ab3727..22adbe26 100644 --- a/lib/modules/ai_bot/commands/read_command.rb +++ b/lib/modules/ai_bot/commands/read_command.rb @@ -55,7 +55,7 @@ module DiscourseAi::AiBot::Commands posts.each { |post| content << "\n\n#{post.username} said:\n\n#{post.raw}" } # TODO: 16k or 100k models can handle a lot more tokens - content = ::DiscourseAi::Tokenizer::BertTokenizer.truncate(content, 1500).squish + content = tokenizer.truncate(content, 1500).squish result = { topic_id: topic_id, content: content, complete: true } result[:post_number] = post_number if post_number diff --git a/lib/modules/ai_bot/commands/setting_context_command.rb b/lib/modules/ai_bot/commands/setting_context_command.rb new file mode 100644 index 00000000..65dd1cca --- /dev/null +++ b/lib/modules/ai_bot/commands/setting_context_command.rb @@ -0,0 +1,154 @@ +#frozen_string_literal: true + +module DiscourseAi::AiBot::Commands + MAX_CONTEXT_TOKENS = 2000 + + class SettingContextCommand < Command + def self.rg_installed? + if defined?(@rg_installed) + @rg_installed + else + @rg_installed = + begin + Discourse::Utils.execute_command("which", "rg") + true + rescue Discourse::Utils::CommandError + false + end + end + end + + class << self + def name + "setting_context" + end + + def desc + "Will provide you with full context regarding a particular site setting in Discourse" + end + + def parameters + [ + Parameter.new( + name: "setting_name", + description: "The name of the site setting we need context for", + type: "string", + required: true, + ), + ] + end + end + + def result_name + "context" + end + + def description_args + { setting_name: @setting_name } + end + + CODE_FILE_EXTENSIONS = "rb,js,gjs,hbs" + + def process(setting_name:) + if !self.class.rg_installed? + return( + { + setting_name: setting_name, + context: "This command requires the rg command line tool to be installed on the server", + } + ) + end + + @setting_name = setting_name + if !SiteSetting.has_setting?(setting_name) + { setting_name: setting_name, context: "This setting does not exist" } + else + description = SiteSetting.description(setting_name) + result = +"# #{setting_name}\n#{description}\n\n" + + setting_info = + find_setting_info(setting_name, [Rails.root.join("config", "site_settings.yml").to_s]) + if !setting_info + setting_info = + find_setting_info(setting_name, Dir[Rails.root.join("plugins/**/settings.yml")]) + end + + result << setting_info + result << "\n\n" + + %w[lib app plugins].each do |dir| + path = Rails.root.join(dir).to_s + result << Discourse::Utils.execute_command( + "rg", + setting_name, + path, + "-g", + "!**/spec/**", + "-g", + "!**/dist/**", + "-g", + "*.{#{CODE_FILE_EXTENSIONS}}", + "-C", + "10", + "--color", + "never", + "--heading", + "--no-ignore", + chdir: path, + success_status_codes: [0, 1], + ) + end + + result.gsub!(/^#{Regexp.escape(Rails.root.to_s)}/, "") + + result = tokenizer.truncate(result, MAX_CONTEXT_TOKENS) + + { setting_name: setting_name, context: result } + end + end + + def find_setting_info(name, paths) + path, result = nil + + paths.each do |search_path| + result = + Discourse::Utils.execute_command( + "rg", + name, + search_path, + "-g", + "*.{#{CODE_FILE_EXTENSIONS}}", + "-A", + "10", + "--color", + "never", + "--heading", + success_status_codes: [0, 1], + ) + if !result.blank? + path = search_path + break + end + end + + if result.blank? + nil + else + rows = result.split("\n") + leading_spaces = rows[0].match(/^\s*/)[0].length + + filtered = [] + + rows.each do |row| + if !filtered.blank? + break if row.match(/^\s*/)[0].length <= leading_spaces + end + filtered << row + end + + filtered.unshift("#{path}") + filtered.join("\n") + end + end + end +end diff --git a/lib/modules/ai_bot/entry_point.rb b/lib/modules/ai_bot/entry_point.rb index cdbc8e89..a6aa336e 100644 --- a/lib/modules/ai_bot/entry_point.rb +++ b/lib/modules/ai_bot/entry_point.rb @@ -38,6 +38,7 @@ module DiscourseAi require_relative "commands/image_command" require_relative "commands/google_command" require_relative "commands/read_command" + require_relative "commands/setting_context_command" end def inject_into(plugin) diff --git a/lib/modules/ai_bot/open_ai_bot.rb b/lib/modules/ai_bot/open_ai_bot.rb index 2d0025b1..eb4bbcd3 100644 --- a/lib/modules/ai_bot/open_ai_bot.rb +++ b/lib/modules/ai_bot/open_ai_bot.rb @@ -59,8 +59,8 @@ module DiscourseAi DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model, **params, &blk) end - def tokenize(text) - DiscourseAi::Tokenizer::OpenAiTokenizer.tokenize(text) + def tokenizer + DiscourseAi::Tokenizer::OpenAiTokenizer end def model_for(low_cost: false) diff --git a/spec/lib/modules/ai_bot/commands/search_command_spec.rb b/spec/lib/modules/ai_bot/commands/search_command_spec.rb index 073f5e76..9b4333ab 100644 --- a/spec/lib/modules/ai_bot/commands/search_command_spec.rb +++ b/spec/lib/modules/ai_bot/commands/search_command_spec.rb @@ -3,7 +3,7 @@ require_relative "../../../../support/openai_completions_inference_stubs" RSpec.describe DiscourseAi::AiBot::Commands::SearchCommand do - fab!(:bot_user) { User.find(DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID) } + let(:bot_user) { User.find(DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID) } before { SearchIndexer.enable } after { SearchIndexer.disable } diff --git a/spec/lib/modules/ai_bot/commands/setting_context_command.rb b/spec/lib/modules/ai_bot/commands/setting_context_command.rb new file mode 100644 index 00000000..dcf3620c --- /dev/null +++ b/spec/lib/modules/ai_bot/commands/setting_context_command.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +RSpec.describe DiscourseAi::AiBot::Commands::SettingContextCommand do + let(:bot_user) { User.find(DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID) } + let(:command) { described_class.new(bot_user: bot_user, args: nil) } + + def has_rg? + if defined?(@has_rg) + @has_rg + else + @has_rg |= system("which rg") + end + end + + describe "#execute" do + skip("rg is needed for these tests") if !has_rg? + it "returns the context for core setting" do + result = command.process(setting_name: "moderators_view_emails") + + expect(result[:setting_name]).to eq("moderators_view_emails") + + expect(result[:context]).to include("site_settings.yml") + expect(result[:context]).to include("moderators_view_emails") + end + + skip("rg is needed for these tests") if !has_rg? + it "returns the context for plugin setting" do + result = command.process(setting_name: "ai_bot_enabled") + + expect(result[:setting_name]).to eq("ai_bot_enabled") + expect(result[:context]).to include("ai_bot_enabled:") + end + + context "when the setting does not exist" do + skip("rg is needed for these tests") if !has_rg? + it "returns an error message" do + result = command.process(setting_name: "this_setting_does_not_exist") + expect(result[:context]).to eq("This setting does not exist") + end + end + end +end diff --git a/spec/lib/modules/ai_bot/open_ai_bot_spec.rb b/spec/lib/modules/ai_bot/open_ai_bot_spec.rb index 54a01032..3bf61738 100644 --- a/spec/lib/modules/ai_bot/open_ai_bot_spec.rb +++ b/spec/lib/modules/ai_bot/open_ai_bot_spec.rb @@ -21,7 +21,6 @@ RSpec.describe DiscourseAi::AiBot::OpenAiBot do SiteSetting.ai_google_custom_search_api_key = "test" SiteSetting.ai_google_custom_search_cx = "test" - expect(subject.available_commands.length).to eq(6) expect(subject.available_commands.length).to eq( SiteSetting.ai_bot_enabled_chat_commands.split("|").length, )