FEATURE: add setting_context experimental command (#160)

This command can be used to extract information about a discourse site setting directly from source. To operate it needs the rg binary in the container.
2023-08-29 10:43:58 +10:00 · 2023-08-29 10:43:58 +10:00 · b14cb864dc
parent fba419f864
commit b14cb864dc
12 changed files with 219 additions and 11 deletions
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@ -63,7 +63,7 @@ en:
    ai_bot_enable_chat_warning: "Display a warning when PM chat is initiated. Can be overriden by editing the translation string: discourse_ai.ai_bot.pm_warning"
    ai_bot_allowed_groups: "When the GPT Bot has access to the PM, it will reply to members of these groups."
    ai_bot_enabled_chat_bots: "Available models to act as an AI Bot"
-    ai_bot_enabled_chat_commands: "Available GPT integrations used to provide external functionality to the model. Only works with GPT-4 and GPT-3.5"
+    ai_bot_enabled_chat_commands: "Available GPT integrations used to provide external functionality to the model."
    ai_helper_add_ai_pm_to_header: "Display a button in the header to start a PM with a AI Bot"

    ai_stability_api_key: "API key for the stability.ai API"
@ -104,6 +104,7 @@ en:
        image: "Generate image"
        google: "Search Google"
        read: "Read topic"
+        setting_context: "Look up site setting context"
      command_description:
        read: "Reading: <a href='%{url}'>%{title}</a>"
        time: "Time in %{timezone} is %{time}"
@ -121,6 +122,7 @@ en:
        google:
          one: "Found %{count} <a href='%{url}'>result</a> for '%{query}'"
          other: "Found %{count} <a href='%{url}'>results</a> for '%{query}'"
+        setting_context: "Reading context for: %{setting_name}"

    summarization:
      configuration_hint:
--- a/config/settings.yml
+++ b/config/settings.yml
@ -166,7 +166,7 @@ discourse_ai:
     - text-embedding-ada-002
     - multilingual-e5-large
  ai_embeddings_generate_for_pms: false
-  ai_embeddings_semantic_related_topics_enabled: 
+  ai_embeddings_semantic_related_topics_enabled:
    default: false
    client: true
  ai_embeddings_semantic_related_topics: 5
@ -203,7 +203,7 @@ discourse_ai:
     - claude-2
  ai_bot_enabled_chat_commands:
    type: list
-    default: "categories|google|image|search|tags|time"
+    default: "categories|google|image|search|tags|time|read"
    client: true
    choices:
     - categories
@ -214,6 +214,7 @@ discourse_ai:
     - read
     - tags
     - time
+     - setting_context
  ai_helper_add_ai_pm_to_header:
    default: true
    client: true
--- a/lib/modules/ai_bot/anthropic_bot.rb
+++ b/lib/modules/ai_bot/anthropic_bot.rb
@ -54,8 +54,8 @@ module DiscourseAi
        )
      end

-      def tokenize(text)
-        DiscourseAi::Tokenizer::AnthropicTokenizer.tokenize(text)
+      def tokenizer
+        DiscourseAi::Tokenizer::AnthropicTokenizer
      end
    end
  end
--- a/lib/modules/ai_bot/bot.rb
+++ b/lib/modules/ai_bot/bot.rb
@ -274,6 +274,7 @@ module DiscourseAi
            Commands::SearchCommand,
            Commands::SummarizeCommand,
            Commands::ReadCommand,
+            Commands::SettingContextCommand,
          ].tap do |cmds|
            cmds << Commands::TagsCommand if SiteSetting.tagging_enabled
            cmds << Commands::ImageCommand if SiteSetting.ai_stability_api_key.present?
@ -328,10 +329,14 @@ module DiscourseAi
        @function_list
      end

-      def tokenize(text)
+      def tokenizer
        raise NotImplemented
      end

+      def tokenize(text)
+        tokenizer.tokenize(text)
+      end
+
      def submit_prompt(prompt, prefer_low_cost: false, &blk)
        raise NotImplemented
      end
--- a/lib/modules/ai_bot/commands/command.rb
+++ b/lib/modules/ai_bot/commands/command.rb
@ -64,6 +64,10 @@ module DiscourseAi
          @bot ||= DiscourseAi::AiBot::Bot.as(bot_user)
        end

+        def tokenizer
+          bot.tokenizer
+        end
+
        def standalone?
          false
        end
--- a/lib/modules/ai_bot/commands/read_command.rb
+++ b/lib/modules/ai_bot/commands/read_command.rb
@ -55,7 +55,7 @@ module DiscourseAi::AiBot::Commands
      posts.each { |post| content << "\n\n#{post.username} said:\n\n#{post.raw}" }

      # TODO: 16k or 100k models can handle a lot more tokens
-      content = ::DiscourseAi::Tokenizer::BertTokenizer.truncate(content, 1500).squish
+      content = tokenizer.truncate(content, 1500).squish

      result = { topic_id: topic_id, content: content, complete: true }
      result[:post_number] = post_number if post_number
--- a/lib/modules/ai_bot/commands/setting_context_command.rb
+++ b/lib/modules/ai_bot/commands/setting_context_command.rb
@ -0,0 +1,154 @@
+#frozen_string_literal: true
+
+module DiscourseAi::AiBot::Commands
+  MAX_CONTEXT_TOKENS = 2000
+
+  class SettingContextCommand < Command
+    def self.rg_installed?
+      if defined?(@rg_installed)
+        @rg_installed
+      else
+        @rg_installed =
+          begin
+            Discourse::Utils.execute_command("which", "rg")
+            true
+          rescue Discourse::Utils::CommandError
+            false
+          end
+      end
+    end
+
+    class << self
+      def name
+        "setting_context"
+      end
+
+      def desc
+        "Will provide you with full context regarding a particular site setting in Discourse"
+      end
+
+      def parameters
+        [
+          Parameter.new(
+            name: "setting_name",
+            description: "The name of the site setting we need context for",
+            type: "string",
+            required: true,
+          ),
+        ]
+      end
+    end
+
+    def result_name
+      "context"
+    end
+
+    def description_args
+      { setting_name: @setting_name }
+    end
+
+    CODE_FILE_EXTENSIONS = "rb,js,gjs,hbs"
+
+    def process(setting_name:)
+      if !self.class.rg_installed?
+        return(
+          {
+            setting_name: setting_name,
+            context: "This command requires the rg command line tool to be installed on the server",
+          }
+        )
+      end
+
+      @setting_name = setting_name
+      if !SiteSetting.has_setting?(setting_name)
+        { setting_name: setting_name, context: "This setting does not exist" }
+      else
+        description = SiteSetting.description(setting_name)
+        result = +"# #{setting_name}\n#{description}\n\n"
+
+        setting_info =
+          find_setting_info(setting_name, [Rails.root.join("config", "site_settings.yml").to_s])
+        if !setting_info
+          setting_info =
+            find_setting_info(setting_name, Dir[Rails.root.join("plugins/**/settings.yml")])
+        end
+
+        result << setting_info
+        result << "\n\n"
+
+        %w[lib app plugins].each do |dir|
+          path = Rails.root.join(dir).to_s
+          result << Discourse::Utils.execute_command(
+            "rg",
+            setting_name,
+            path,
+            "-g",
+            "!**/spec/**",
+            "-g",
+            "!**/dist/**",
+            "-g",
+            "*.{#{CODE_FILE_EXTENSIONS}}",
+            "-C",
+            "10",
+            "--color",
+            "never",
+            "--heading",
+            "--no-ignore",
+            chdir: path,
+            success_status_codes: [0, 1],
+          )
+        end
+
+        result.gsub!(/^#{Regexp.escape(Rails.root.to_s)}/, "")
+
+        result = tokenizer.truncate(result, MAX_CONTEXT_TOKENS)
+
+        { setting_name: setting_name, context: result }
+      end
+    end
+
+    def find_setting_info(name, paths)
+      path, result = nil
+
+      paths.each do |search_path|
+        result =
+          Discourse::Utils.execute_command(
+            "rg",
+            name,
+            search_path,
+            "-g",
+            "*.{#{CODE_FILE_EXTENSIONS}}",
+            "-A",
+            "10",
+            "--color",
+            "never",
+            "--heading",
+            success_status_codes: [0, 1],
+          )
+        if !result.blank?
+          path = search_path
+          break
+        end
+      end
+
+      if result.blank?
+        nil
+      else
+        rows = result.split("\n")
+        leading_spaces = rows[0].match(/^\s*/)[0].length
+
+        filtered = []
+
+        rows.each do |row|
+          if !filtered.blank?
+            break if row.match(/^\s*/)[0].length <= leading_spaces
+          end
+          filtered << row
+        end
+
+        filtered.unshift("#{path}")
+        filtered.join("\n")
+      end
+    end
+  end
+end
--- a/lib/modules/ai_bot/entry_point.rb
+++ b/lib/modules/ai_bot/entry_point.rb
@ -38,6 +38,7 @@ module DiscourseAi
        require_relative "commands/image_command"
        require_relative "commands/google_command"
        require_relative "commands/read_command"
+        require_relative "commands/setting_context_command"
      end

      def inject_into(plugin)
--- a/lib/modules/ai_bot/open_ai_bot.rb
+++ b/lib/modules/ai_bot/open_ai_bot.rb
@ -59,8 +59,8 @@ module DiscourseAi
        DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model, **params, &blk)
      end

-      def tokenize(text)
-        DiscourseAi::Tokenizer::OpenAiTokenizer.tokenize(text)
+      def tokenizer
+        DiscourseAi::Tokenizer::OpenAiTokenizer
      end

      def model_for(low_cost: false)
--- a/spec/lib/modules/ai_bot/commands/search_command_spec.rb
+++ b/spec/lib/modules/ai_bot/commands/search_command_spec.rb
@ -3,7 +3,7 @@
 require_relative "../../../../support/openai_completions_inference_stubs"

 RSpec.describe DiscourseAi::AiBot::Commands::SearchCommand do
-  fab!(:bot_user) { User.find(DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID) }
+  let(:bot_user) { User.find(DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID) }

  before { SearchIndexer.enable }
  after { SearchIndexer.disable }
--- a/spec/lib/modules/ai_bot/commands/setting_context_command.rb
+++ b/spec/lib/modules/ai_bot/commands/setting_context_command.rb
@ -0,0 +1,42 @@
+# frozen_string_literal: true
+
+RSpec.describe DiscourseAi::AiBot::Commands::SettingContextCommand do
+  let(:bot_user) { User.find(DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID) }
+  let(:command) { described_class.new(bot_user: bot_user, args: nil) }
+
+  def has_rg?
+    if defined?(@has_rg)
+      @has_rg
+    else
+      @has_rg |= system("which rg")
+    end
+  end
+
+  describe "#execute" do
+    skip("rg is needed for these tests") if !has_rg?
+    it "returns the context for core setting" do
+      result = command.process(setting_name: "moderators_view_emails")
+
+      expect(result[:setting_name]).to eq("moderators_view_emails")
+
+      expect(result[:context]).to include("site_settings.yml")
+      expect(result[:context]).to include("moderators_view_emails")
+    end
+
+    skip("rg is needed for these tests") if !has_rg?
+    it "returns the context for plugin setting" do
+      result = command.process(setting_name: "ai_bot_enabled")
+
+      expect(result[:setting_name]).to eq("ai_bot_enabled")
+      expect(result[:context]).to include("ai_bot_enabled:")
+    end
+
+    context "when the setting does not exist" do
+      skip("rg is needed for these tests") if !has_rg?
+      it "returns an error message" do
+        result = command.process(setting_name: "this_setting_does_not_exist")
+        expect(result[:context]).to eq("This setting does not exist")
+      end
+    end
+  end
+end
--- a/spec/lib/modules/ai_bot/open_ai_bot_spec.rb
+++ b/spec/lib/modules/ai_bot/open_ai_bot_spec.rb
@ -21,7 +21,6 @@ RSpec.describe DiscourseAi::AiBot::OpenAiBot do
        SiteSetting.ai_google_custom_search_api_key = "test"
        SiteSetting.ai_google_custom_search_cx = "test"

-        expect(subject.available_commands.length).to eq(6)
        expect(subject.available_commands.length).to eq(
          SiteSetting.ai_bot_enabled_chat_commands.split("|").length,
        )