FEATURE: better logging for automation reports (#853)

A new feature_context json column was added to ai_api_audit_logs

This allows us to store rich json like context on any LLM request
made.

This new field now stores automation id and name.

Additionally allows llm_triage to specify maximum number of tokens

This means that you can limit the cost of llm triage by scanning only
first N tokens of a post.
This commit is contained in:
Sam 2024-10-23 16:49:56 +11:00 committed by GitHub
parent eae7716177
commit 059d3b6fd2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 138 additions and 15 deletions

View File

@ -33,4 +33,4 @@ end
# post_id :integer # post_id :integer
# feature_name :string(255) # feature_name :string(255)
# language_model :string(255) # language_model :string(255)
# # feature_context :jsonb

View File

@ -81,6 +81,9 @@ en:
system_prompt: system_prompt:
label: "System Prompt" label: "System Prompt"
description: "The prompt that will be used to triage, be sure for it to reply with a single word you can use to trigger the action" description: "The prompt that will be used to triage, be sure for it to reply with a single word you can use to trigger the action"
max_post_tokens:
label: "Max Post Tokens"
description: "The maximum number of tokens to scan using LLM triage"
search_for_text: search_for_text:
label: "Search for text" label: "Search for text"
description: "If the following text appears in the llm reply, apply this actions" description: "If the following text appears in the llm reply, apply this actions"

View File

@ -0,0 +1,7 @@
# frozen_string_literal: true
#
class AddFeatureContextToAiApiLog < ActiveRecord::Migration[7.1]
def change
add_column :ai_api_audit_logs, :feature_context, :jsonb
end
end

View File

@ -93,6 +93,7 @@ if defined?(DiscourseAutomation)
temperature: temperature, temperature: temperature,
top_p: top_p, top_p: top_p,
suppress_notifications: suppress_notifications, suppress_notifications: suppress_notifications,
automation: self.automation,
) )
rescue => e rescue => e
Discourse.warn_exception e, message: "Error running LLM report!" Discourse.warn_exception e, message: "Error running LLM report!"

View File

@ -11,6 +11,7 @@ if defined?(DiscourseAutomation)
field :system_prompt, component: :message, required: false field :system_prompt, component: :message, required: false
field :search_for_text, component: :text, required: true field :search_for_text, component: :text, required: true
field :max_post_tokens, component: :text
field :model, field :model,
component: :choices, component: :choices,
required: true, required: true,
@ -49,6 +50,9 @@ if defined?(DiscourseAutomation)
hide_topic = fields.dig("hide_topic", "value") hide_topic = fields.dig("hide_topic", "value")
flag_post = fields.dig("flag_post", "value") flag_post = fields.dig("flag_post", "value")
flag_type = fields.dig("flag_type", "value") flag_type = fields.dig("flag_type", "value")
max_post_tokens = fields.dig("max_post_tokens", "value").to_i
max_post_tokens = nil if max_post_tokens <= 0
begin begin
RateLimiter.new( RateLimiter.new(
@ -77,6 +81,7 @@ if defined?(DiscourseAutomation)
hide_topic: hide_topic, hide_topic: hide_topic,
flag_post: flag_post, flag_post: flag_post,
flag_type: flag_type.to_s.to_sym, flag_type: flag_type.to_s.to_sym,
max_post_tokens: max_post_tokens,
automation: self.automation, automation: self.automation,
) )
rescue => e rescue => e

View File

@ -15,21 +15,27 @@ module DiscourseAi
hide_topic: nil, hide_topic: nil,
flag_post: nil, flag_post: nil,
flag_type: nil, flag_type: nil,
automation: nil automation: nil,
max_post_tokens: nil
) )
if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank? && if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank? &&
flag_post.blank? flag_post.blank?
raise ArgumentError, "llm_triage: no action specified!" raise ArgumentError, "llm_triage: no action specified!"
end end
llm = DiscourseAi::Completions::Llm.proxy(model)
s_prompt = system_prompt.to_s.sub("%%POST%%", "") # Backwards-compat. We no longer sub this. s_prompt = system_prompt.to_s.sub("%%POST%%", "") # Backwards-compat. We no longer sub this.
prompt = DiscourseAi::Completions::Prompt.new(s_prompt) prompt = DiscourseAi::Completions::Prompt.new(s_prompt)
prompt.push(type: :user, content: "title: #{post.topic.title}\n#{post.raw}")
content = "title: #{post.topic.title}\n#{post.raw}"
content = llm.tokenizer.truncate(content, max_post_tokens) if max_post_tokens.present?
prompt.push(type: :user, content: content)
result = nil result = nil
llm = DiscourseAi::Completions::Llm.proxy(model)
result = result =
llm.generate( llm.generate(
prompt, prompt,
@ -37,6 +43,10 @@ module DiscourseAi
max_tokens: 700, # ~500 words max_tokens: 700, # ~500 words
user: Discourse.system_user, user: Discourse.system_user,
feature_name: "llm_triage", feature_name: "llm_triage",
feature_context: {
automation_id: automation&.id,
automation_name: automation&.name,
},
)&.strip )&.strip
if result.present? && result.downcase.include?(search_for_text.downcase) if result.present? && result.downcase.include?(search_for_text.downcase)

View File

@ -53,7 +53,8 @@ module DiscourseAi
exclude_tags: nil, exclude_tags: nil,
top_p: 0.1, top_p: 0.1,
temperature: 0.2, temperature: 0.2,
suppress_notifications: false suppress_notifications: false,
automation: nil
) )
@sender = User.find_by(username: sender_username) @sender = User.find_by(username: sender_username)
@receivers = User.where(username: receivers) @receivers = User.where(username: receivers)
@ -90,6 +91,7 @@ module DiscourseAi
if !@topic_id && !@receivers.present? && !@email_receivers.present? if !@topic_id && !@receivers.present? && !@email_receivers.present?
raise ArgumentError, "Must specify topic_id or receivers" raise ArgumentError, "Must specify topic_id or receivers"
end end
@automation = automation
end end
def run! def run!
@ -153,6 +155,10 @@ Follow the provided writing composition instructions carefully and precisely ste
top_p: @top_p, top_p: @top_p,
user: Discourse.system_user, user: Discourse.system_user,
feature_name: "ai_report", feature_name: "ai_report",
feature_context: {
automation_id: @automation&.id,
automation_name: @automation&.name,
},
) do |response| ) do |response|
print response if Rails.env.development? && @debug_mode print response if Rails.env.development? && @debug_mode
result << response result << response

View File

@ -56,7 +56,14 @@ module DiscourseAi
[] []
end end
def perform_completion!(dialect, user, model_params = {}, feature_name: nil, &blk) def perform_completion!(
dialect,
user,
model_params = {},
feature_name: nil,
feature_context: nil,
&blk
)
allow_tools = dialect.prompt.has_tools? allow_tools = dialect.prompt.has_tools?
model_params = normalize_model_params(model_params) model_params = normalize_model_params(model_params)
orig_blk = blk orig_blk = blk
@ -111,6 +118,7 @@ module DiscourseAi
post_id: dialect.prompt.post_id, post_id: dialect.prompt.post_id,
feature_name: feature_name, feature_name: feature_name,
language_model: llm_model.name, language_model: llm_model.name,
feature_context: feature_context.present? ? feature_context.as_json : nil,
) )
if !@streaming_mode if !@streaming_mode

View File

@ -23,7 +23,13 @@ module DiscourseAi
dialect.prompt.messages dialect.prompt.messages
end end
def perform_completion!(dialect, _user, _model_params, feature_name: nil) def perform_completion!(
dialect,
_user,
_model_params,
feature_name: nil,
feature_context: nil
)
@dialect = dialect @dialect = dialect
response = responses[completions] response = responses[completions]
if response.nil? if response.nil?

View File

@ -100,7 +100,13 @@ module DiscourseAi
@last_call = params @last_call = params
end end
def perform_completion!(dialect, user, model_params = {}, feature_name: nil) def perform_completion!(
dialect,
user,
model_params = {},
feature_name: nil,
feature_context: nil
)
self.class.last_call = { dialect: dialect, user: user, model_params: model_params } self.class.last_call = { dialect: dialect, user: user, model_params: model_params }
content = self.class.fake_content content = self.class.fake_content

View File

@ -27,7 +27,14 @@ module DiscourseAi
AiApiAuditLog::Provider::OpenAI AiApiAuditLog::Provider::OpenAI
end end
def perform_completion!(dialect, user, model_params = {}, feature_name: nil, &blk) def perform_completion!(
dialect,
user,
model_params = {},
feature_name: nil,
feature_context: nil,
&blk
)
if dialect.respond_to?(:is_gpt_o?) && dialect.is_gpt_o? && block_given? if dialect.respond_to?(:is_gpt_o?) && dialect.is_gpt_o? && block_given?
# we need to disable streaming and simulate it # we need to disable streaming and simulate it
blk.call "", lambda { |*| } blk.call "", lambda { |*| }

View File

@ -191,6 +191,7 @@ module DiscourseAi
stop_sequences: nil, stop_sequences: nil,
user:, user:,
feature_name: nil, feature_name: nil,
feature_context: nil,
&partial_read_blk &partial_read_blk
) )
self.class.record_prompt(prompt) self.class.record_prompt(prompt)
@ -224,6 +225,7 @@ module DiscourseAi
user, user,
model_params, model_params,
feature_name: feature_name, feature_name: feature_name,
feature_context: feature_context,
&partial_read_blk &partial_read_blk
) )
end end

View File

@ -55,6 +55,40 @@ RSpec.describe DiscourseAi::Completions::Llm do
expect(log.topic_id).to eq(123) expect(log.topic_id).to eq(123)
expect(log.post_id).to eq(1) expect(log.post_id).to eq(1)
end end
it "can track feature_name and feature_context" do
body = {
model: "gpt-3.5-turbo-0301",
usage: {
prompt_tokens: 337,
completion_tokens: 162,
total_tokens: 499,
},
choices: [
{ message: { role: "assistant", content: "test" }, finish_reason: "stop", index: 0 },
],
}.to_json
WebMock.stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
status: 200,
body: body,
)
result =
described_class.proxy("custom:#{model.id}").generate(
"Hello",
user: user,
feature_name: "llm_triage",
feature_context: {
foo: "bar",
},
)
expect(result).to eq("test")
log = AiApiAuditLog.order("id desc").first
expect(log.feature_name).to eq("llm_triage")
expect(log.feature_context).to eq({ "foo" => "bar" })
end
end end
describe "#generate with fake model" do describe "#generate with fake model" do

View File

@ -32,15 +32,31 @@ describe DiscourseAi::Automation::LlmTriage do
add_automation_field("flag_post", true, type: "boolean") add_automation_field("flag_post", true, type: "boolean")
add_automation_field("canned_reply", "Yo this is a reply") add_automation_field("canned_reply", "Yo this is a reply")
add_automation_field("canned_reply_user", reply_user.username, type: "user") add_automation_field("canned_reply_user", reply_user.username, type: "user")
add_automation_field("max_post_tokens", 100)
end end
it "can trigger via automation" do it "can trigger via automation" do
post = Fabricate(:post) post = Fabricate(:post, raw: "hello " * 5000)
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do body = {
automation.running_in_background! model: "gpt-3.5-turbo-0301",
automation.trigger!({ "post" => post }) usage: {
end prompt_tokens: 337,
completion_tokens: 162,
total_tokens: 499,
},
choices: [
{ message: { role: "assistant", content: "bad" }, finish_reason: "stop", index: 0 },
],
}.to_json
WebMock.stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
status: 200,
body: body,
)
automation.running_in_background!
automation.trigger!({ "post" => post })
topic = post.topic.reload topic = post.topic.reload
expect(topic.category_id).to eq(category.id) expect(topic.category_id).to eq(category.id)
@ -49,6 +65,18 @@ describe DiscourseAi::Automation::LlmTriage do
reply = topic.posts.order(:post_number).last reply = topic.posts.order(:post_number).last
expect(reply.raw).to eq("Yo this is a reply") expect(reply.raw).to eq("Yo this is a reply")
expect(reply.user.id).to eq(reply_user.id) expect(reply.user.id).to eq(reply_user.id)
ai_log = AiApiAuditLog.order("id desc").first
expect(ai_log.feature_name).to eq("llm_triage")
expect(ai_log.feature_context).to eq(
{ "automation_id" => automation.id, "automation_name" => automation.name },
)
count = ai_log.raw_request_payload.scan("hello").size
# we could use the exact count here but it can get fragile
# as we change tokenizers, this will give us reasonable confidence
expect(count).to be <= (100)
expect(count).to be > (50)
end end
it "does not reply to the canned_reply_user" do it "does not reply to the canned_reply_user" do