diff --git a/app/models/ai_api_audit_log.rb b/app/models/ai_api_audit_log.rb index c38d68eb..2fa9f5c3 100644 --- a/app/models/ai_api_audit_log.rb +++ b/app/models/ai_api_audit_log.rb @@ -33,4 +33,4 @@ end # post_id :integer # feature_name :string(255) # language_model :string(255) -# +# feature_context :jsonb diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml index a58b987a..ef24a436 100644 --- a/config/locales/client.en.yml +++ b/config/locales/client.en.yml @@ -81,6 +81,9 @@ en: system_prompt: label: "System Prompt" description: "The prompt that will be used to triage, be sure for it to reply with a single word you can use to trigger the action" + max_post_tokens: + label: "Max Post Tokens" + description: "The maximum number of tokens to scan using LLM triage" search_for_text: label: "Search for text" description: "If the following text appears in the llm reply, apply this actions" diff --git a/db/migrate/20241023033955_add_feature_context_to_ai_api_log.rb b/db/migrate/20241023033955_add_feature_context_to_ai_api_log.rb new file mode 100644 index 00000000..5191541c --- /dev/null +++ b/db/migrate/20241023033955_add_feature_context_to_ai_api_log.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true +# +class AddFeatureContextToAiApiLog < ActiveRecord::Migration[7.1] + def change + add_column :ai_api_audit_logs, :feature_context, :jsonb + end +end diff --git a/discourse_automation/llm_report.rb b/discourse_automation/llm_report.rb index c190af0f..e309fbbb 100644 --- a/discourse_automation/llm_report.rb +++ b/discourse_automation/llm_report.rb @@ -93,6 +93,7 @@ if defined?(DiscourseAutomation) temperature: temperature, top_p: top_p, suppress_notifications: suppress_notifications, + automation: self.automation, ) rescue => e Discourse.warn_exception e, message: "Error running LLM report!" diff --git a/discourse_automation/llm_triage.rb b/discourse_automation/llm_triage.rb index ad90d361..6ef37079 100644 --- a/discourse_automation/llm_triage.rb +++ b/discourse_automation/llm_triage.rb @@ -11,6 +11,7 @@ if defined?(DiscourseAutomation) field :system_prompt, component: :message, required: false field :search_for_text, component: :text, required: true + field :max_post_tokens, component: :text field :model, component: :choices, required: true, @@ -49,6 +50,9 @@ if defined?(DiscourseAutomation) hide_topic = fields.dig("hide_topic", "value") flag_post = fields.dig("flag_post", "value") flag_type = fields.dig("flag_type", "value") + max_post_tokens = fields.dig("max_post_tokens", "value").to_i + + max_post_tokens = nil if max_post_tokens <= 0 begin RateLimiter.new( @@ -77,6 +81,7 @@ if defined?(DiscourseAutomation) hide_topic: hide_topic, flag_post: flag_post, flag_type: flag_type.to_s.to_sym, + max_post_tokens: max_post_tokens, automation: self.automation, ) rescue => e diff --git a/lib/automation/llm_triage.rb b/lib/automation/llm_triage.rb index 103119e7..253a2724 100644 --- a/lib/automation/llm_triage.rb +++ b/lib/automation/llm_triage.rb @@ -15,21 +15,27 @@ module DiscourseAi hide_topic: nil, flag_post: nil, flag_type: nil, - automation: nil + automation: nil, + max_post_tokens: nil ) if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank? && flag_post.blank? raise ArgumentError, "llm_triage: no action specified!" end + llm = DiscourseAi::Completions::Llm.proxy(model) + s_prompt = system_prompt.to_s.sub("%%POST%%", "") # Backwards-compat. We no longer sub this. prompt = DiscourseAi::Completions::Prompt.new(s_prompt) - prompt.push(type: :user, content: "title: #{post.topic.title}\n#{post.raw}") + + content = "title: #{post.topic.title}\n#{post.raw}" + + content = llm.tokenizer.truncate(content, max_post_tokens) if max_post_tokens.present? + + prompt.push(type: :user, content: content) result = nil - llm = DiscourseAi::Completions::Llm.proxy(model) - result = llm.generate( prompt, @@ -37,6 +43,10 @@ module DiscourseAi max_tokens: 700, # ~500 words user: Discourse.system_user, feature_name: "llm_triage", + feature_context: { + automation_id: automation&.id, + automation_name: automation&.name, + }, )&.strip if result.present? && result.downcase.include?(search_for_text.downcase) diff --git a/lib/automation/report_runner.rb b/lib/automation/report_runner.rb index 842de0bf..02363a45 100644 --- a/lib/automation/report_runner.rb +++ b/lib/automation/report_runner.rb @@ -53,7 +53,8 @@ module DiscourseAi exclude_tags: nil, top_p: 0.1, temperature: 0.2, - suppress_notifications: false + suppress_notifications: false, + automation: nil ) @sender = User.find_by(username: sender_username) @receivers = User.where(username: receivers) @@ -90,6 +91,7 @@ module DiscourseAi if !@topic_id && !@receivers.present? && !@email_receivers.present? raise ArgumentError, "Must specify topic_id or receivers" end + @automation = automation end def run! @@ -153,6 +155,10 @@ Follow the provided writing composition instructions carefully and precisely ste top_p: @top_p, user: Discourse.system_user, feature_name: "ai_report", + feature_context: { + automation_id: @automation&.id, + automation_name: @automation&.name, + }, ) do |response| print response if Rails.env.development? && @debug_mode result << response diff --git a/lib/completions/endpoints/base.rb b/lib/completions/endpoints/base.rb index 3782d735..a0405b42 100644 --- a/lib/completions/endpoints/base.rb +++ b/lib/completions/endpoints/base.rb @@ -56,7 +56,14 @@ module DiscourseAi [] end - def perform_completion!(dialect, user, model_params = {}, feature_name: nil, &blk) + def perform_completion!( + dialect, + user, + model_params = {}, + feature_name: nil, + feature_context: nil, + &blk + ) allow_tools = dialect.prompt.has_tools? model_params = normalize_model_params(model_params) orig_blk = blk @@ -111,6 +118,7 @@ module DiscourseAi post_id: dialect.prompt.post_id, feature_name: feature_name, language_model: llm_model.name, + feature_context: feature_context.present? ? feature_context.as_json : nil, ) if !@streaming_mode diff --git a/lib/completions/endpoints/canned_response.rb b/lib/completions/endpoints/canned_response.rb index ee0c9e5f..eaef21da 100644 --- a/lib/completions/endpoints/canned_response.rb +++ b/lib/completions/endpoints/canned_response.rb @@ -23,7 +23,13 @@ module DiscourseAi dialect.prompt.messages end - def perform_completion!(dialect, _user, _model_params, feature_name: nil) + def perform_completion!( + dialect, + _user, + _model_params, + feature_name: nil, + feature_context: nil + ) @dialect = dialect response = responses[completions] if response.nil? diff --git a/lib/completions/endpoints/fake.rb b/lib/completions/endpoints/fake.rb index 72d24d57..2beec61a 100644 --- a/lib/completions/endpoints/fake.rb +++ b/lib/completions/endpoints/fake.rb @@ -100,7 +100,13 @@ module DiscourseAi @last_call = params end - def perform_completion!(dialect, user, model_params = {}, feature_name: nil) + def perform_completion!( + dialect, + user, + model_params = {}, + feature_name: nil, + feature_context: nil + ) self.class.last_call = { dialect: dialect, user: user, model_params: model_params } content = self.class.fake_content diff --git a/lib/completions/endpoints/open_ai.rb b/lib/completions/endpoints/open_ai.rb index 35b3e724..a8cff595 100644 --- a/lib/completions/endpoints/open_ai.rb +++ b/lib/completions/endpoints/open_ai.rb @@ -27,7 +27,14 @@ module DiscourseAi AiApiAuditLog::Provider::OpenAI end - def perform_completion!(dialect, user, model_params = {}, feature_name: nil, &blk) + def perform_completion!( + dialect, + user, + model_params = {}, + feature_name: nil, + feature_context: nil, + &blk + ) if dialect.respond_to?(:is_gpt_o?) && dialect.is_gpt_o? && block_given? # we need to disable streaming and simulate it blk.call "", lambda { |*| } diff --git a/lib/completions/llm.rb b/lib/completions/llm.rb index 445bfc19..0d53b413 100644 --- a/lib/completions/llm.rb +++ b/lib/completions/llm.rb @@ -191,6 +191,7 @@ module DiscourseAi stop_sequences: nil, user:, feature_name: nil, + feature_context: nil, &partial_read_blk ) self.class.record_prompt(prompt) @@ -224,6 +225,7 @@ module DiscourseAi user, model_params, feature_name: feature_name, + feature_context: feature_context, &partial_read_blk ) end diff --git a/spec/lib/completions/llm_spec.rb b/spec/lib/completions/llm_spec.rb index e6402b15..4f22c16f 100644 --- a/spec/lib/completions/llm_spec.rb +++ b/spec/lib/completions/llm_spec.rb @@ -55,6 +55,40 @@ RSpec.describe DiscourseAi::Completions::Llm do expect(log.topic_id).to eq(123) expect(log.post_id).to eq(1) end + + it "can track feature_name and feature_context" do + body = { + model: "gpt-3.5-turbo-0301", + usage: { + prompt_tokens: 337, + completion_tokens: 162, + total_tokens: 499, + }, + choices: [ + { message: { role: "assistant", content: "test" }, finish_reason: "stop", index: 0 }, + ], + }.to_json + + WebMock.stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return( + status: 200, + body: body, + ) + + result = + described_class.proxy("custom:#{model.id}").generate( + "Hello", + user: user, + feature_name: "llm_triage", + feature_context: { + foo: "bar", + }, + ) + + expect(result).to eq("test") + log = AiApiAuditLog.order("id desc").first + expect(log.feature_name).to eq("llm_triage") + expect(log.feature_context).to eq({ "foo" => "bar" }) + end end describe "#generate with fake model" do diff --git a/spec/lib/discourse_automation/llm_triage_spec.rb b/spec/lib/discourse_automation/llm_triage_spec.rb index 0a896592..26aea4d4 100644 --- a/spec/lib/discourse_automation/llm_triage_spec.rb +++ b/spec/lib/discourse_automation/llm_triage_spec.rb @@ -32,15 +32,31 @@ describe DiscourseAi::Automation::LlmTriage do add_automation_field("flag_post", true, type: "boolean") add_automation_field("canned_reply", "Yo this is a reply") add_automation_field("canned_reply_user", reply_user.username, type: "user") + add_automation_field("max_post_tokens", 100) end it "can trigger via automation" do - post = Fabricate(:post) + post = Fabricate(:post, raw: "hello " * 5000) - DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do - automation.running_in_background! - automation.trigger!({ "post" => post }) - end + body = { + model: "gpt-3.5-turbo-0301", + usage: { + prompt_tokens: 337, + completion_tokens: 162, + total_tokens: 499, + }, + choices: [ + { message: { role: "assistant", content: "bad" }, finish_reason: "stop", index: 0 }, + ], + }.to_json + + WebMock.stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return( + status: 200, + body: body, + ) + + automation.running_in_background! + automation.trigger!({ "post" => post }) topic = post.topic.reload expect(topic.category_id).to eq(category.id) @@ -49,6 +65,18 @@ describe DiscourseAi::Automation::LlmTriage do reply = topic.posts.order(:post_number).last expect(reply.raw).to eq("Yo this is a reply") expect(reply.user.id).to eq(reply_user.id) + + ai_log = AiApiAuditLog.order("id desc").first + expect(ai_log.feature_name).to eq("llm_triage") + expect(ai_log.feature_context).to eq( + { "automation_id" => automation.id, "automation_name" => automation.name }, + ) + + count = ai_log.raw_request_payload.scan("hello").size + # we could use the exact count here but it can get fragile + # as we change tokenizers, this will give us reasonable confidence + expect(count).to be <= (100) + expect(count).to be > (50) end it "does not reply to the canned_reply_user" do