FEATURE: automation rule for triaging posts using LLM (#236)

The new automation rule can be used to perform llm based classification and categorization of topics. You specify a system prompt (which has %%POST%% as an input), if it returns a particular piece of text then we will apply rules such as tagging, hiding, replying or categorizing. This can be used as a spam filter, a "oops you are in the wrong place" filter and so on. Co-authored-by: Joffrey JAFFEUX <j.jaffeux@gmail.com>
2023-10-03 08:55:30 +11:00 · 2023-10-03 08:55:30 +11:00 · 0cbf14e343
parent 453928e7bb
commit 0cbf14e343
5 changed files with 326 additions and 0 deletions
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -5,6 +5,39 @@ en:
        categories:
          discourse_ai: "Discourse AI"
  js:
    discourse_automation:
      scriptables:
        llm_triage:
          models:
            gpt_4: GPT 4
            gpt_3_5_turbo: GPT 3.5 Turbo
            claude_2: Claude 2
          fields:
            system_prompt:
              label: "System Prompt"
              description: "The prompt that will be used to triage, be sure for it to reply with a single word you can use to trigger the action"
            search_for_text:
              label: "Search for text"
              description: "If the following text appears in the llm reply, apply this actions"
            category:
              label: "Category"
              description: "Category to apply to the topic"
            tags:
              label: "Tags"
              description: "Tags to apply to the topic"
            canned_reply:
              label: "Reply"
              description: "Raw text of canned reply to post on the topic"
            canned_reply_user:
              label: "Reply User"
              description: "Username of the user to post the canned reply"
            hide_topic:
              label: "Hide topic"
              description: "Make topic non visible to the public if triggered"
            model:
              label: "Model"
              description: "Either gpt-4 or gpt-3-5-turbo or claude-2"
    discourse_ai:
      modals:
        select_option: "Select an option..."
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@ -1,4 +1,10 @@
 en:
  discourse_automation:
    scriptables:
      llm_triage:
        title: Triage posts using AI
        description: "Triage posts using a large language model"
        system_prompt_missing_post_placeholder: "System prompt must contain a placeholder for the post: %%POST%%"
  site_settings:
    discourse_ai_enabled: "Enable the discourse AI plugin."
    ai_toxicity_enabled: "Enable the toxicity module."
--- a/lib/discourse_automation/llm_triage.rb
+++ b/lib/discourse_automation/llm_triage.rb
@ -0,0 +1,154 @@
 # frozen_string_literal: true
 if defined?(DiscourseAutomation)
  module DiscourseAutomation::LlmTriage
    def self.handle(
      post:,
      model:,
      search_for_text:,
      system_prompt:,
      category_id: nil,
      tags: nil,
      canned_reply: nil,
      canned_reply_user: nil,
      hide_topic: nil
    )
      if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank?
        raise ArgumentError, "llm_triage: no action specified!"
      end
      post_template = +""
      post_template << "title: #{post.topic.title}\n"
      post_template << "#{post.raw}"
      filled_system_prompt = system_prompt.sub("%%POST%%", post_template)
      if filled_system_prompt == system_prompt
        raise ArgumentError, "llm_triage: system_prompt does not contain %%POST%% placeholder"
      end
      result =
        DiscourseAi::Inference::OpenAiCompletions.perform!(
          [{ :role => "system", "content" => filled_system_prompt }],
          model,
          temperature: 0.7,
          top_p: 0.9,
          max_tokens: 40,
        ).dig(:choices, 0, :message, :content)
      if result.strip == search_for_text.strip
        user = User.find_by_username(canned_reply_user) if canned_reply_user.present?
        user = user || Discourse.system_user
        if canned_reply.present?
          PostCreator.create!(
            user,
            topic_id: post.topic_id,
            raw: canned_reply,
            reply_to_post_number: post.post_number,
            skip_validations: true,
          )
        end
        changes = {}
        changes[:category_id] = category_id if category_id.present?
        changes[:tags] = tags if SiteSetting.tagging_enabled? && tags.present?
        if changes.present?
          first_post = post.topic.posts.where(post_number: 1).first
          changes[:bypass_bump] = true
          changes[:skip_validations] = true
          first_post.revise(Discourse.system_user, changes)
        end
        post.topic.update!(visible: false) if hide_topic
      end
    end
  end
  DiscourseAutomation::Scriptable::LLM_TRIAGE = "llm_triage"
  AVAILABLE_MODELS = [
    {
      id: "gpt-4",
      name:
        "discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.gpt_4",
    },
    {
      id: "gpt-3-5-turbo",
      name:
        "discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.gpt_3_5_turbo",
    },
    {
      id: "claude-2",
      name:
        "discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.claude_2",
    },
  ]
  DiscourseAutomation::Scriptable.add(DiscourseAutomation::Scriptable::LLM_TRIAGE) do
    version 1
    run_in_background
    placeholder :post
    triggerables %i[post_created_edited]
    field :system_prompt,
          component: :message,
          required: true,
          validator: ->(input) {
            if !input.include?("%%POST%%")
              I18n.t(
                "discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.system_prompt_missing_post_placeholder",
              )
            end
          },
          accepts_placeholders: true
    field :search_for_text, component: :text, required: true
    field :model, component: :choices, required: true, extra: { content: AVAILABLE_MODELS }
    field :category, component: :category
    field :tags, component: :tags
    field :hide_topic, component: :boolean
    field :canned_reply, component: :message
    field :canned_reply_user, component: :user
    script do |context, fields, automation|
      post = context["post"]
      system_prompt = fields["system_prompt"]["value"]
      search_for_text = fields["search_for_text"]["value"]
      model = fields["model"]["value"]
      if !%w[gpt-4 gpt-3-5-turbo].include?(model)
        Rails.logger.warn("llm_triage: model #{model} is not supported")
        next
      end
      category_id = fields.dig("category", "value")
      tags = fields.dig("tags", "value")
      hide_topic = fields.dig("hide_topic", "value")
      canned_reply = fields.dig("canned_reply", "value")
      canned_reply_user = fields.dig("canned_reply_user", "value")
      if post.raw.strip == canned_reply.to_s.strip
        # nothing to do if we already replied
        next
      end
      begin
        DiscourseAutomation::LlmTriage.handle(
          post: post,
          model: model,
          search_for_text: search_for_text,
          system_prompt: system_prompt,
          category_id: category_id,
          tags: tags,
          canned_reply: canned_reply,
          canned_reply_user: canned_reply_user,
          hide_topic: hide_topic,
        )
      rescue => e
        Discourse.warn_exception(e, message: "llm_triage: failed to run inference")
      end
    end
  end
 end
--- a/plugin.rb
+++ b/plugin.rb
@ -53,6 +53,7 @@ after_initialize do
  require_relative "lib/modules/embeddings/entry_point"
  require_relative "lib/modules/summarization/entry_point"
  require_relative "lib/modules/ai_bot/entry_point"
  require_relative "lib/discourse_automation/llm_triage"
  [
    DiscourseAi::Embeddings::EntryPoint.new,
--- a/spec/lib/discourse_automation/llm_triage_spec.rb
+++ b/spec/lib/discourse_automation/llm_triage_spec.rb
@ -0,0 +1,132 @@
 # frozen_string_literal: true
 return if !defined?(DiscourseAutomation)
 describe DiscourseAutomation::LlmTriage do
  fab!(:post) { Fabricate(:post) }
  def triage(**args)
    DiscourseAutomation::LlmTriage.handle(**args)
  end
  it "does nothing if it does not pass triage" do
    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
      status: 200,
      body: { choices: [{ message: { content: "good" } }] }.to_json,
    )
    triage(
      post: post,
      model: "gpt-4",
      hide_topic: true,
      system_prompt: "test %%POST%%",
      search_for_text: "bad",
    )
    expect(post.topic.reload.visible).to eq(true)
  end
  it "can hide topics on triage" do
    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
      status: 200,
      body: { choices: [{ message: { content: "bad" } }] }.to_json,
    )
    triage(
      post: post,
      model: "gpt-4",
      hide_topic: true,
      system_prompt: "test %%POST%%",
      search_for_text: "bad",
    )
    expect(post.topic.reload.visible).to eq(false)
  end
  it "can categorize topics on triage" do
    category = Fabricate(:category)
    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
      status: 200,
      body: { choices: [{ message: { content: "bad" } }] }.to_json,
    )
    triage(
      post: post,
      model: "gpt-4",
      category_id: category.id,
      system_prompt: "test %%POST%%",
      search_for_text: "bad",
    )
    expect(post.topic.reload.category_id).to eq(category.id)
  end
  it "can reply to topics on triage" do
    user = Fabricate(:user)
    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
      status: 200,
      body: { choices: [{ message: { content: "bad" } }] }.to_json,
    )
    triage(
      post: post,
      model: "gpt-4",
      system_prompt: "test %%POST%%",
      search_for_text: "bad",
      canned_reply: "test canned reply 123",
      canned_reply_user: user.username,
    )
    reply = post.topic.posts.order(:post_number).last
    expect(reply.raw).to eq("test canned reply 123")
    expect(reply.user.id).to eq(user.id)
  end
  let(:automation) { Fabricate(:automation, script: "llm_triage", enabled: true) }
  def add_automation_field(name, value, type: "text")
    automation.fields.create!(
      component: type,
      name: name,
      metadata: {
        value: value,
      },
      target: "script",
    )
  end
  it "can trigger via automation" do
    SiteSetting.tagging_enabled = true
    category = Fabricate(:category)
    user = Fabricate(:user)
    add_automation_field("system_prompt", "hello %%POST%%")
    add_automation_field("search_for_text", "bad")
    add_automation_field("model", "gpt-4")
    add_automation_field("category", category.id, type: "category")
    add_automation_field("tags", %w[aaa bbb], type: "tags")
    add_automation_field("hide_topic", true, type: "boolean")
    add_automation_field("canned_reply", "Yo this is a reply")
    add_automation_field("canned_reply_user", user.username, type: "user")
    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
      status: 200,
      body: { choices: [{ message: { content: "bad" } }] }.to_json,
    )
    automation.running_in_background!
    automation.trigger!({ "post" => post })
    topic = post.topic.reload
    expect(topic.category_id).to eq(category.id)
    expect(topic.tags.pluck(:name)).to contain_exactly("aaa", "bbb")
    expect(topic.visible).to eq(false)
    reply = topic.posts.order(:post_number).last
    expect(reply.raw).to eq("Yo this is a reply")
    expect(reply.user.id).to eq(user.id)
  end
 end