FEATURE: better logging for automation reports (#853)

A new feature_context json column was added to ai_api_audit_logs This allows us to store rich json like context on any LLM request made. This new field now stores automation id and name. Additionally allows llm_triage to specify maximum number of tokens This means that you can limit the cost of llm triage by scanning only first N tokens of a post.
2024-10-23 16:49:56 +11:00 · 2024-10-23 16:49:56 +11:00 · 059d3b6fd2
parent eae7716177
commit 059d3b6fd2
14 changed files with 138 additions and 15 deletions
--- a/app/models/ai_api_audit_log.rb
+++ b/app/models/ai_api_audit_log.rb
@ -33,4 +33,4 @@ end
 #  post_id              :integer
 #  feature_name         :string(255)
 #  language_model       :string(255)
-#
+#  feature_context      :jsonb
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -81,6 +81,9 @@ en:
            system_prompt:
              label: "System Prompt"
              description: "The prompt that will be used to triage, be sure for it to reply with a single word you can use to trigger the action"
+            max_post_tokens:
+              label: "Max Post Tokens"
+              description: "The maximum number of tokens to scan using LLM triage"
            search_for_text:
              label: "Search for text"
              description: "If the following text appears in the llm reply, apply this actions"
--- a/db/migrate/20241023033955_add_feature_context_to_ai_api_log.rb
+++ b/db/migrate/20241023033955_add_feature_context_to_ai_api_log.rb
@ -0,0 +1,7 @@
+# frozen_string_literal: true
+#
+class AddFeatureContextToAiApiLog < ActiveRecord::Migration[7.1]
+  def change
+    add_column :ai_api_audit_logs, :feature_context, :jsonb
+  end
+end
--- a/discourse_automation/llm_report.rb
+++ b/discourse_automation/llm_report.rb
@ -93,6 +93,7 @@ if defined?(DiscourseAutomation)
          temperature: temperature,
          top_p: top_p,
          suppress_notifications: suppress_notifications,
+          automation: self.automation,
        )
      rescue => e
        Discourse.warn_exception e, message: "Error running LLM report!"
--- a/discourse_automation/llm_triage.rb
+++ b/discourse_automation/llm_triage.rb
@ -11,6 +11,7 @@ if defined?(DiscourseAutomation)

    field :system_prompt, component: :message, required: false
    field :search_for_text, component: :text, required: true
+    field :max_post_tokens, component: :text
    field :model,
          component: :choices,
          required: true,
@ -49,6 +50,9 @@ if defined?(DiscourseAutomation)
      hide_topic = fields.dig("hide_topic", "value")
      flag_post = fields.dig("flag_post", "value")
      flag_type = fields.dig("flag_type", "value")
+      max_post_tokens = fields.dig("max_post_tokens", "value").to_i
+
+      max_post_tokens = nil if max_post_tokens <= 0

      begin
        RateLimiter.new(
@ -77,6 +81,7 @@ if defined?(DiscourseAutomation)
          hide_topic: hide_topic,
          flag_post: flag_post,
          flag_type: flag_type.to_s.to_sym,
+          max_post_tokens: max_post_tokens,
          automation: self.automation,
        )
      rescue => e
--- a/lib/automation/llm_triage.rb
+++ b/lib/automation/llm_triage.rb
@ -15,21 +15,27 @@ module DiscourseAi
        hide_topic: nil,
        flag_post: nil,
        flag_type: nil,
-        automation: nil
+        automation: nil,
+        max_post_tokens: nil
      )
        if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank? &&
             flag_post.blank?
          raise ArgumentError, "llm_triage: no action specified!"
        end

+        llm = DiscourseAi::Completions::Llm.proxy(model)
+
        s_prompt = system_prompt.to_s.sub("%%POST%%", "") # Backwards-compat. We no longer sub this.
        prompt = DiscourseAi::Completions::Prompt.new(s_prompt)
-        prompt.push(type: :user, content: "title: #{post.topic.title}\n#{post.raw}")
+
+        content = "title: #{post.topic.title}\n#{post.raw}"
+
+        content = llm.tokenizer.truncate(content, max_post_tokens) if max_post_tokens.present?
+
+        prompt.push(type: :user, content: content)

        result = nil

-        llm = DiscourseAi::Completions::Llm.proxy(model)
-
        result =
          llm.generate(
            prompt,
@ -37,6 +43,10 @@ module DiscourseAi
            max_tokens: 700, # ~500 words
            user: Discourse.system_user,
            feature_name: "llm_triage",
+            feature_context: {
+              automation_id: automation&.id,
+              automation_name: automation&.name,
+            },
          )&.strip

        if result.present? && result.downcase.include?(search_for_text.downcase)
--- a/lib/automation/report_runner.rb
+++ b/lib/automation/report_runner.rb
@ -53,7 +53,8 @@ module DiscourseAi
        exclude_tags: nil,
        top_p: 0.1,
        temperature: 0.2,
-        suppress_notifications: false
+        suppress_notifications: false,
+        automation: nil
      )
        @sender = User.find_by(username: sender_username)
        @receivers = User.where(username: receivers)
@ -90,6 +91,7 @@ module DiscourseAi
        if !@topic_id && !@receivers.present? && !@email_receivers.present?
          raise ArgumentError, "Must specify topic_id or receivers"
        end
+        @automation = automation
      end

      def run!
@ -153,6 +155,10 @@ Follow the provided writing composition instructions carefully and precisely ste
          top_p: @top_p,
          user: Discourse.system_user,
          feature_name: "ai_report",
+          feature_context: {
+            automation_id: @automation&.id,
+            automation_name: @automation&.name,
+          },
        ) do |response|
          print response if Rails.env.development? && @debug_mode
          result << response
--- a/lib/completions/endpoints/base.rb
+++ b/lib/completions/endpoints/base.rb
@ -56,7 +56,14 @@ module DiscourseAi
          []
        end

-        def perform_completion!(dialect, user, model_params = {}, feature_name: nil, &blk)
+        def perform_completion!(
+          dialect,
+          user,
+          model_params = {},
+          feature_name: nil,
+          feature_context: nil,
+          &blk
+        )
          allow_tools = dialect.prompt.has_tools?
          model_params = normalize_model_params(model_params)
          orig_blk = blk
@ -111,6 +118,7 @@ module DiscourseAi
                  post_id: dialect.prompt.post_id,
                  feature_name: feature_name,
                  language_model: llm_model.name,
+                  feature_context: feature_context.present? ? feature_context.as_json : nil,
                )

              if !@streaming_mode
--- a/lib/completions/endpoints/canned_response.rb
+++ b/lib/completions/endpoints/canned_response.rb
@ -23,7 +23,13 @@ module DiscourseAi
          dialect.prompt.messages
        end

-        def perform_completion!(dialect, _user, _model_params, feature_name: nil)
+        def perform_completion!(
+          dialect,
+          _user,
+          _model_params,
+          feature_name: nil,
+          feature_context: nil
+        )
          @dialect = dialect
          response = responses[completions]
          if response.nil?
--- a/lib/completions/endpoints/fake.rb
+++ b/lib/completions/endpoints/fake.rb
@ -100,7 +100,13 @@ module DiscourseAi
          @last_call = params
        end

-        def perform_completion!(dialect, user, model_params = {}, feature_name: nil)
+        def perform_completion!(
+          dialect,
+          user,
+          model_params = {},
+          feature_name: nil,
+          feature_context: nil
+        )
          self.class.last_call = { dialect: dialect, user: user, model_params: model_params }

          content = self.class.fake_content
--- a/lib/completions/endpoints/open_ai.rb
+++ b/lib/completions/endpoints/open_ai.rb
@ -27,7 +27,14 @@ module DiscourseAi
          AiApiAuditLog::Provider::OpenAI
        end

-        def perform_completion!(dialect, user, model_params = {}, feature_name: nil, &blk)
+        def perform_completion!(
+          dialect,
+          user,
+          model_params = {},
+          feature_name: nil,
+          feature_context: nil,
+          &blk
+        )
          if dialect.respond_to?(:is_gpt_o?) && dialect.is_gpt_o? && block_given?
            # we need to disable streaming and simulate it
            blk.call "", lambda { |*| }
--- a/lib/completions/llm.rb
+++ b/lib/completions/llm.rb
@ -191,6 +191,7 @@ module DiscourseAi
        stop_sequences: nil,
        user:,
        feature_name: nil,
+        feature_context: nil,
        &partial_read_blk
      )
        self.class.record_prompt(prompt)
@ -224,6 +225,7 @@ module DiscourseAi
          user,
          model_params,
          feature_name: feature_name,
+          feature_context: feature_context,
          &partial_read_blk
        )
      end
--- a/spec/lib/completions/llm_spec.rb
+++ b/spec/lib/completions/llm_spec.rb
@ -55,6 +55,40 @@ RSpec.describe DiscourseAi::Completions::Llm do
      expect(log.topic_id).to eq(123)
      expect(log.post_id).to eq(1)
    end
+
+    it "can track feature_name and feature_context" do
+      body = {
+        model: "gpt-3.5-turbo-0301",
+        usage: {
+          prompt_tokens: 337,
+          completion_tokens: 162,
+          total_tokens: 499,
+        },
+        choices: [
+          { message: { role: "assistant", content: "test" }, finish_reason: "stop", index: 0 },
+        ],
+      }.to_json
+
+      WebMock.stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
+        status: 200,
+        body: body,
+      )
+
+      result =
+        described_class.proxy("custom:#{model.id}").generate(
+          "Hello",
+          user: user,
+          feature_name: "llm_triage",
+          feature_context: {
+            foo: "bar",
+          },
+        )
+
+      expect(result).to eq("test")
+      log = AiApiAuditLog.order("id desc").first
+      expect(log.feature_name).to eq("llm_triage")
+      expect(log.feature_context).to eq({ "foo" => "bar" })
+    end
  end

  describe "#generate with fake model" do
--- a/spec/lib/discourse_automation/llm_triage_spec.rb
+++ b/spec/lib/discourse_automation/llm_triage_spec.rb
@ -32,15 +32,31 @@ describe DiscourseAi::Automation::LlmTriage do
    add_automation_field("flag_post", true, type: "boolean")
    add_automation_field("canned_reply", "Yo this is a reply")
    add_automation_field("canned_reply_user", reply_user.username, type: "user")
+    add_automation_field("max_post_tokens", 100)
  end

  it "can trigger via automation" do
-    post = Fabricate(:post)
+    post = Fabricate(:post, raw: "hello " * 5000)

-    DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
-      automation.running_in_background!
-      automation.trigger!({ "post" => post })
-    end
+    body = {
+      model: "gpt-3.5-turbo-0301",
+      usage: {
+        prompt_tokens: 337,
+        completion_tokens: 162,
+        total_tokens: 499,
+      },
+      choices: [
+        { message: { role: "assistant", content: "bad" }, finish_reason: "stop", index: 0 },
+      ],
+    }.to_json
+
+    WebMock.stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
+      status: 200,
+      body: body,
+    )
+
+    automation.running_in_background!
+    automation.trigger!({ "post" => post })

    topic = post.topic.reload
    expect(topic.category_id).to eq(category.id)
@ -49,6 +65,18 @@ describe DiscourseAi::Automation::LlmTriage do
    reply = topic.posts.order(:post_number).last
    expect(reply.raw).to eq("Yo this is a reply")
    expect(reply.user.id).to eq(reply_user.id)
+
+    ai_log = AiApiAuditLog.order("id desc").first
+    expect(ai_log.feature_name).to eq("llm_triage")
+    expect(ai_log.feature_context).to eq(
+      { "automation_id" => automation.id, "automation_name" => automation.name },
+    )
+
+    count = ai_log.raw_request_payload.scan("hello").size
+    # we could use the exact count here but it can get fragile
+    # as we change tokenizers, this will give us reasonable confidence
+    expect(count).to be <= (100)
+    expect(count).to be > (50)
  end

  it "does not reply to the canned_reply_user" do