FEATURE: better logging for automation reports (#853)

A new feature_context json column was added to ai_api_audit_logs This allows us to store rich json like context on any LLM request made. This new field now stores automation id and name. Additionally allows llm_triage to specify maximum number of tokens This means that you can limit the cost of llm triage by scanning only first N tokens of a post.
2024-10-23 16:49:56 +11:00 · 2024-10-23 16:49:56 +11:00 · 059d3b6fd2
parent eae7716177
commit 059d3b6fd2
14 changed files with 138 additions and 15 deletions
--- a/app/models/ai_api_audit_log.rb
+++ b/app/models/ai_api_audit_log.rb
@ -33,4 +33,4 @@ end
 #  post_id              :integer
 #  feature_name         :string(255)
 #  language_model       :string(255)
-#
+#  feature_context      :jsonb
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -81,6 +81,9 @@ en:
            system_prompt:
              label: "System Prompt"
              description: "The prompt that will be used to triage, be sure for it to reply with a single word you can use to trigger the action"
            max_post_tokens:
              label: "Max Post Tokens"
              description: "The maximum number of tokens to scan using LLM triage"
            search_for_text:
              label: "Search for text"
              description: "If the following text appears in the llm reply, apply this actions"
--- a/db/migrate/20241023033955_add_feature_context_to_ai_api_log.rb
+++ b/db/migrate/20241023033955_add_feature_context_to_ai_api_log.rb
@ -0,0 +1,7 @@
 # frozen_string_literal: true
 #
 class AddFeatureContextToAiApiLog < ActiveRecord::Migration[7.1]
  def change
    add_column :ai_api_audit_logs, :feature_context, :jsonb
  end
 end
--- a/discourse_automation/llm_report.rb
+++ b/discourse_automation/llm_report.rb
@ -93,6 +93,7 @@ if defined?(DiscourseAutomation)
          temperature: temperature,
          top_p: top_p,
          suppress_notifications: suppress_notifications,
          automation: self.automation,
        )
      rescue => e
        Discourse.warn_exception e, message: "Error running LLM report!"
--- a/discourse_automation/llm_triage.rb
+++ b/discourse_automation/llm_triage.rb
@ -11,6 +11,7 @@ if defined?(DiscourseAutomation)
    field :system_prompt, component: :message, required: false
    field :search_for_text, component: :text, required: true
    field :max_post_tokens, component: :text
    field :model,
          component: :choices,
          required: true,
@ -49,6 +50,9 @@ if defined?(DiscourseAutomation)
      hide_topic = fields.dig("hide_topic", "value")
      flag_post = fields.dig("flag_post", "value")
      flag_type = fields.dig("flag_type", "value")
      max_post_tokens = fields.dig("max_post_tokens", "value").to_i
      max_post_tokens = nil if max_post_tokens <= 0
      begin
        RateLimiter.new(
@ -77,6 +81,7 @@ if defined?(DiscourseAutomation)
          hide_topic: hide_topic,
          flag_post: flag_post,
          flag_type: flag_type.to_s.to_sym,
          max_post_tokens: max_post_tokens,
          automation: self.automation,
        )
      rescue => e
--- a/lib/automation/llm_triage.rb
+++ b/lib/automation/llm_triage.rb
@ -15,21 +15,27 @@ module DiscourseAi
        hide_topic: nil,
        flag_post: nil,
        flag_type: nil,
-        automation: nil
+        automation: nil,
        max_post_tokens: nil
      )
        if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank? &&
             flag_post.blank?
          raise ArgumentError, "llm_triage: no action specified!"
        end
        llm = DiscourseAi::Completions::Llm.proxy(model)
        s_prompt = system_prompt.to_s.sub("%%POST%%", "") # Backwards-compat. We no longer sub this.
        prompt = DiscourseAi::Completions::Prompt.new(s_prompt)
-        prompt.push(type: :user, content: "title: #{post.topic.title}\n#{post.raw}")
+
        content = "title: #{post.topic.title}\n#{post.raw}"
        content = llm.tokenizer.truncate(content, max_post_tokens) if max_post_tokens.present?
        prompt.push(type: :user, content: content)
        result = nil
        llm = DiscourseAi::Completions::Llm.proxy(model)
        result =
          llm.generate(
            prompt,
@ -37,6 +43,10 @@ module DiscourseAi
            max_tokens: 700, # ~500 words
            user: Discourse.system_user,
            feature_name: "llm_triage",
            feature_context: {
              automation_id: automation&.id,
              automation_name: automation&.name,
            },
          )&.strip
        if result.present? && result.downcase.include?(search_for_text.downcase)
--- a/lib/automation/report_runner.rb
+++ b/lib/automation/report_runner.rb
@ -53,7 +53,8 @@ module DiscourseAi
        exclude_tags: nil,
        top_p: 0.1,
        temperature: 0.2,
-        suppress_notifications: false
+        suppress_notifications: false,
        automation: nil
      )
        @sender = User.find_by(username: sender_username)
        @receivers = User.where(username: receivers)
@ -90,6 +91,7 @@ module DiscourseAi
        if !@topic_id && !@receivers.present? && !@email_receivers.present?
          raise ArgumentError, "Must specify topic_id or receivers"
        end
        @automation = automation
      end
      def run!
@ -153,6 +155,10 @@ Follow the provided writing composition instructions carefully and precisely ste
          top_p: @top_p,
          user: Discourse.system_user,
          feature_name: "ai_report",
          feature_context: {
            automation_id: @automation&.id,
            automation_name: @automation&.name,
          },
        ) do |response|
          print response if Rails.env.development? && @debug_mode
          result << response
--- a/lib/completions/endpoints/base.rb
+++ b/lib/completions/endpoints/base.rb
@ -56,7 +56,14 @@ module DiscourseAi
          []
        end
-        def perform_completion!(dialect, user, model_params = {}, feature_name: nil, &blk)
+        def perform_completion!(
          dialect,
          user,
          model_params = {},
          feature_name: nil,
          feature_context: nil,
          &blk
        )
          allow_tools = dialect.prompt.has_tools?
          model_params = normalize_model_params(model_params)
          orig_blk = blk
@ -111,6 +118,7 @@ module DiscourseAi
                  post_id: dialect.prompt.post_id,
                  feature_name: feature_name,
                  language_model: llm_model.name,
                  feature_context: feature_context.present? ? feature_context.as_json : nil,
                )
              if !@streaming_mode
--- a/lib/completions/endpoints/canned_response.rb
+++ b/lib/completions/endpoints/canned_response.rb
@ -23,7 +23,13 @@ module DiscourseAi
          dialect.prompt.messages
        end
-        def perform_completion!(dialect, _user, _model_params, feature_name: nil)
+        def perform_completion!(
          dialect,
          _user,
          _model_params,
          feature_name: nil,
          feature_context: nil
        )
          @dialect = dialect
          response = responses[completions]
          if response.nil?
--- a/lib/completions/endpoints/fake.rb
+++ b/lib/completions/endpoints/fake.rb
@ -100,7 +100,13 @@ module DiscourseAi
          @last_call = params
        end
-        def perform_completion!(dialect, user, model_params = {}, feature_name: nil)
+        def perform_completion!(
          dialect,
          user,
          model_params = {},
          feature_name: nil,
          feature_context: nil
        )
          self.class.last_call = { dialect: dialect, user: user, model_params: model_params }
          content = self.class.fake_content
--- a/lib/completions/endpoints/open_ai.rb
+++ b/lib/completions/endpoints/open_ai.rb
@ -27,7 +27,14 @@ module DiscourseAi
          AiApiAuditLog::Provider::OpenAI
        end
-        def perform_completion!(dialect, user, model_params = {}, feature_name: nil, &blk)
+        def perform_completion!(
          dialect,
          user,
          model_params = {},
          feature_name: nil,
          feature_context: nil,
          &blk
        )
          if dialect.respond_to?(:is_gpt_o?) && dialect.is_gpt_o? && block_given?
            # we need to disable streaming and simulate it
            blk.call "", lambda { |*| }
--- a/lib/completions/llm.rb
+++ b/lib/completions/llm.rb
@ -191,6 +191,7 @@ module DiscourseAi
        stop_sequences: nil,
        user:,
        feature_name: nil,
        feature_context: nil,
        &partial_read_blk
      )
        self.class.record_prompt(prompt)
@ -224,6 +225,7 @@ module DiscourseAi
          user,
          model_params,
          feature_name: feature_name,
          feature_context: feature_context,
          &partial_read_blk
        )
      end
--- a/spec/lib/completions/llm_spec.rb
+++ b/spec/lib/completions/llm_spec.rb
@ -55,6 +55,40 @@ RSpec.describe DiscourseAi::Completions::Llm do
      expect(log.topic_id).to eq(123)
      expect(log.post_id).to eq(1)
    end
    it "can track feature_name and feature_context" do
      body = {
        model: "gpt-3.5-turbo-0301",
        usage: {
          prompt_tokens: 337,
          completion_tokens: 162,
          total_tokens: 499,
        },
        choices: [
          { message: { role: "assistant", content: "test" }, finish_reason: "stop", index: 0 },
        ],
      }.to_json
      WebMock.stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
        status: 200,
        body: body,
      )
      result =
        described_class.proxy("custom:#{model.id}").generate(
          "Hello",
          user: user,
          feature_name: "llm_triage",
          feature_context: {
            foo: "bar",
          },
        )
      expect(result).to eq("test")
      log = AiApiAuditLog.order("id desc").first
      expect(log.feature_name).to eq("llm_triage")
      expect(log.feature_context).to eq({ "foo" => "bar" })
    end
  end
  describe "#generate with fake model" do
--- a/spec/lib/discourse_automation/llm_triage_spec.rb
+++ b/spec/lib/discourse_automation/llm_triage_spec.rb
@ -32,15 +32,31 @@ describe DiscourseAi::Automation::LlmTriage do
    add_automation_field("flag_post", true, type: "boolean")
    add_automation_field("canned_reply", "Yo this is a reply")
    add_automation_field("canned_reply_user", reply_user.username, type: "user")
    add_automation_field("max_post_tokens", 100)
  end
  it "can trigger via automation" do
-    post = Fabricate(:post)
+    post = Fabricate(:post, raw: "hello " * 5000)
-    DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
+    body = {
-      automation.running_in_background!
+      model: "gpt-3.5-turbo-0301",
-      automation.trigger!({ "post" => post })
+      usage: {
-    end
+        prompt_tokens: 337,
        completion_tokens: 162,
        total_tokens: 499,
      },
      choices: [
        { message: { role: "assistant", content: "bad" }, finish_reason: "stop", index: 0 },
      ],
    }.to_json
    WebMock.stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
      status: 200,
      body: body,
    )
    automation.running_in_background!
    automation.trigger!({ "post" => post })
    topic = post.topic.reload
    expect(topic.category_id).to eq(category.id)
@ -49,6 +65,18 @@ describe DiscourseAi::Automation::LlmTriage do
    reply = topic.posts.order(:post_number).last
    expect(reply.raw).to eq("Yo this is a reply")
    expect(reply.user.id).to eq(reply_user.id)
    ai_log = AiApiAuditLog.order("id desc").first
    expect(ai_log.feature_name).to eq("llm_triage")
    expect(ai_log.feature_context).to eq(
      { "automation_id" => automation.id, "automation_name" => automation.name },
    )
    count = ai_log.raw_request_payload.scan("hello").size
    # we could use the exact count here but it can get fragile
    # as we change tokenizers, this will give us reasonable confidence
    expect(count).to be <= (100)
    expect(count).to be > (50)
  end
  it "does not reply to the canned_reply_user" do