FEATURE: automation rule for triaging posts using LLM (#236)

The new automation rule can be used to perform llm based classification and categorization of topics. You specify a system prompt (which has %%POST%% as an input), if it returns a particular piece of text then we will apply rules such as tagging, hiding, replying or categorizing. This can be used as a spam filter, a "oops you are in the wrong place" filter and so on. Co-authored-by: Joffrey JAFFEUX <j.jaffeux@gmail.com>
2023-10-03 08:55:30 +11:00 · 2023-10-03 08:55:30 +11:00 · 0cbf14e343
parent 453928e7bb
commit 0cbf14e343
5 changed files with 326 additions and 0 deletions
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -5,6 +5,39 @@ en:
        categories:
          discourse_ai: "Discourse AI"
  js:
+    discourse_automation:
+      scriptables:
+        llm_triage:
+          models:
+            gpt_4: GPT 4
+            gpt_3_5_turbo: GPT 3.5 Turbo
+            claude_2: Claude 2
+          fields:
+            system_prompt:
+              label: "System Prompt"
+              description: "The prompt that will be used to triage, be sure for it to reply with a single word you can use to trigger the action"
+            search_for_text:
+              label: "Search for text"
+              description: "If the following text appears in the llm reply, apply this actions"
+            category:
+              label: "Category"
+              description: "Category to apply to the topic"
+            tags:
+              label: "Tags"
+              description: "Tags to apply to the topic"
+            canned_reply:
+              label: "Reply"
+              description: "Raw text of canned reply to post on the topic"
+            canned_reply_user:
+              label: "Reply User"
+              description: "Username of the user to post the canned reply"
+            hide_topic:
+              label: "Hide topic"
+              description: "Make topic non visible to the public if triggered"
+            model:
+              label: "Model"
+              description: "Either gpt-4 or gpt-3-5-turbo or claude-2"
+
    discourse_ai:
      modals:
        select_option: "Select an option..."
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@ -1,4 +1,10 @@
 en:
+  discourse_automation:
+    scriptables:
+      llm_triage:
+        title: Triage posts using AI
+        description: "Triage posts using a large language model"
+        system_prompt_missing_post_placeholder: "System prompt must contain a placeholder for the post: %%POST%%"
  site_settings:
    discourse_ai_enabled: "Enable the discourse AI plugin."
    ai_toxicity_enabled: "Enable the toxicity module."
--- a/lib/discourse_automation/llm_triage.rb
+++ b/lib/discourse_automation/llm_triage.rb
@ -0,0 +1,154 @@
+# frozen_string_literal: true
+
+if defined?(DiscourseAutomation)
+  module DiscourseAutomation::LlmTriage
+    def self.handle(
+      post:,
+      model:,
+      search_for_text:,
+      system_prompt:,
+      category_id: nil,
+      tags: nil,
+      canned_reply: nil,
+      canned_reply_user: nil,
+      hide_topic: nil
+    )
+      if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank?
+        raise ArgumentError, "llm_triage: no action specified!"
+      end
+
+      post_template = +""
+      post_template << "title: #{post.topic.title}\n"
+      post_template << "#{post.raw}"
+
+      filled_system_prompt = system_prompt.sub("%%POST%%", post_template)
+
+      if filled_system_prompt == system_prompt
+        raise ArgumentError, "llm_triage: system_prompt does not contain %%POST%% placeholder"
+      end
+
+      result =
+        DiscourseAi::Inference::OpenAiCompletions.perform!(
+          [{ :role => "system", "content" => filled_system_prompt }],
+          model,
+          temperature: 0.7,
+          top_p: 0.9,
+          max_tokens: 40,
+        ).dig(:choices, 0, :message, :content)
+
+      if result.strip == search_for_text.strip
+        user = User.find_by_username(canned_reply_user) if canned_reply_user.present?
+        user = user || Discourse.system_user
+        if canned_reply.present?
+          PostCreator.create!(
+            user,
+            topic_id: post.topic_id,
+            raw: canned_reply,
+            reply_to_post_number: post.post_number,
+            skip_validations: true,
+          )
+        end
+
+        changes = {}
+        changes[:category_id] = category_id if category_id.present?
+        changes[:tags] = tags if SiteSetting.tagging_enabled? && tags.present?
+
+        if changes.present?
+          first_post = post.topic.posts.where(post_number: 1).first
+          changes[:bypass_bump] = true
+          changes[:skip_validations] = true
+          first_post.revise(Discourse.system_user, changes)
+        end
+
+        post.topic.update!(visible: false) if hide_topic
+      end
+    end
+  end
+
+  DiscourseAutomation::Scriptable::LLM_TRIAGE = "llm_triage"
+
+  AVAILABLE_MODELS = [
+    {
+      id: "gpt-4",
+      name:
+        "discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.gpt_4",
+    },
+    {
+      id: "gpt-3-5-turbo",
+      name:
+        "discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.gpt_3_5_turbo",
+    },
+    {
+      id: "claude-2",
+      name:
+        "discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.claude_2",
+    },
+  ]
+
+  DiscourseAutomation::Scriptable.add(DiscourseAutomation::Scriptable::LLM_TRIAGE) do
+    version 1
+    run_in_background
+
+    placeholder :post
+
+    triggerables %i[post_created_edited]
+
+    field :system_prompt,
+          component: :message,
+          required: true,
+          validator: ->(input) {
+            if !input.include?("%%POST%%")
+              I18n.t(
+                "discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.system_prompt_missing_post_placeholder",
+              )
+            end
+          },
+          accepts_placeholders: true
+    field :search_for_text, component: :text, required: true
+    field :model, component: :choices, required: true, extra: { content: AVAILABLE_MODELS }
+    field :category, component: :category
+    field :tags, component: :tags
+    field :hide_topic, component: :boolean
+    field :canned_reply, component: :message
+    field :canned_reply_user, component: :user
+
+    script do |context, fields, automation|
+      post = context["post"]
+      system_prompt = fields["system_prompt"]["value"]
+      search_for_text = fields["search_for_text"]["value"]
+      model = fields["model"]["value"]
+
+      if !%w[gpt-4 gpt-3-5-turbo].include?(model)
+        Rails.logger.warn("llm_triage: model #{model} is not supported")
+        next
+      end
+
+      category_id = fields.dig("category", "value")
+      tags = fields.dig("tags", "value")
+      hide_topic = fields.dig("hide_topic", "value")
+      canned_reply = fields.dig("canned_reply", "value")
+      canned_reply_user = fields.dig("canned_reply_user", "value")
+
+      if post.raw.strip == canned_reply.to_s.strip
+        # nothing to do if we already replied
+        next
+      end
+
+      begin
+        DiscourseAutomation::LlmTriage.handle(
+          post: post,
+          model: model,
+          search_for_text: search_for_text,
+          system_prompt: system_prompt,
+          category_id: category_id,
+          tags: tags,
+          canned_reply: canned_reply,
+          canned_reply_user: canned_reply_user,
+          hide_topic: hide_topic,
+        )
+      rescue => e
+        Discourse.warn_exception(e, message: "llm_triage: failed to run inference")
+      end
+    end
+  end
+end
--- a/plugin.rb
+++ b/plugin.rb
@ -53,6 +53,7 @@ after_initialize do
  require_relative "lib/modules/embeddings/entry_point"
  require_relative "lib/modules/summarization/entry_point"
  require_relative "lib/modules/ai_bot/entry_point"
+  require_relative "lib/discourse_automation/llm_triage"

  [
    DiscourseAi::Embeddings::EntryPoint.new,
--- a/spec/lib/discourse_automation/llm_triage_spec.rb
+++ b/spec/lib/discourse_automation/llm_triage_spec.rb
@ -0,0 +1,132 @@
+# frozen_string_literal: true
+
+return if !defined?(DiscourseAutomation)
+
+describe DiscourseAutomation::LlmTriage do
+  fab!(:post) { Fabricate(:post) }
+
+  def triage(**args)
+    DiscourseAutomation::LlmTriage.handle(**args)
+  end
+
+  it "does nothing if it does not pass triage" do
+    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
+      status: 200,
+      body: { choices: [{ message: { content: "good" } }] }.to_json,
+    )
+
+    triage(
+      post: post,
+      model: "gpt-4",
+      hide_topic: true,
+      system_prompt: "test %%POST%%",
+      search_for_text: "bad",
+    )
+
+    expect(post.topic.reload.visible).to eq(true)
+  end
+
+  it "can hide topics on triage" do
+    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
+      status: 200,
+      body: { choices: [{ message: { content: "bad" } }] }.to_json,
+    )
+
+    triage(
+      post: post,
+      model: "gpt-4",
+      hide_topic: true,
+      system_prompt: "test %%POST%%",
+      search_for_text: "bad",
+    )
+
+    expect(post.topic.reload.visible).to eq(false)
+  end
+
+  it "can categorize topics on triage" do
+    category = Fabricate(:category)
+
+    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
+      status: 200,
+      body: { choices: [{ message: { content: "bad" } }] }.to_json,
+    )
+
+    triage(
+      post: post,
+      model: "gpt-4",
+      category_id: category.id,
+      system_prompt: "test %%POST%%",
+      search_for_text: "bad",
+    )
+
+    expect(post.topic.reload.category_id).to eq(category.id)
+  end
+
+  it "can reply to topics on triage" do
+    user = Fabricate(:user)
+
+    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
+      status: 200,
+      body: { choices: [{ message: { content: "bad" } }] }.to_json,
+    )
+
+    triage(
+      post: post,
+      model: "gpt-4",
+      system_prompt: "test %%POST%%",
+      search_for_text: "bad",
+      canned_reply: "test canned reply 123",
+      canned_reply_user: user.username,
+    )
+
+    reply = post.topic.posts.order(:post_number).last
+
+    expect(reply.raw).to eq("test canned reply 123")
+    expect(reply.user.id).to eq(user.id)
+  end
+
+  let(:automation) { Fabricate(:automation, script: "llm_triage", enabled: true) }
+
+  def add_automation_field(name, value, type: "text")
+    automation.fields.create!(
+      component: type,
+      name: name,
+      metadata: {
+        value: value,
+      },
+      target: "script",
+    )
+  end
+
+  it "can trigger via automation" do
+    SiteSetting.tagging_enabled = true
+
+    category = Fabricate(:category)
+    user = Fabricate(:user)
+
+    add_automation_field("system_prompt", "hello %%POST%%")
+    add_automation_field("search_for_text", "bad")
+    add_automation_field("model", "gpt-4")
+    add_automation_field("category", category.id, type: "category")
+    add_automation_field("tags", %w[aaa bbb], type: "tags")
+    add_automation_field("hide_topic", true, type: "boolean")
+    add_automation_field("canned_reply", "Yo this is a reply")
+    add_automation_field("canned_reply_user", user.username, type: "user")
+
+    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
+      status: 200,
+      body: { choices: [{ message: { content: "bad" } }] }.to_json,
+    )
+
+    automation.running_in_background!
+    automation.trigger!({ "post" => post })
+
+    topic = post.topic.reload
+    expect(topic.category_id).to eq(category.id)
+    expect(topic.tags.pluck(:name)).to contain_exactly("aaa", "bbb")
+    expect(topic.visible).to eq(false)
+    reply = topic.posts.order(:post_number).last
+    expect(reply.raw).to eq("Yo this is a reply")
+    expect(reply.user.id).to eq(user.id)
+  end
+end