FEATURE: automation rule for triaging posts using LLM (#236)

The new automation rule can be used to perform llm based classification and categorization of topics. 

You specify a system prompt (which has %%POST%% as an input), if it returns a particular piece of text then we will apply rules such as tagging, hiding, replying or categorizing.

This can be used as a spam filter, a "oops you are in the wrong place" filter and so on. 

Co-authored-by: Joffrey JAFFEUX <j.jaffeux@gmail.com>
This commit is contained in:
Sam 2023-10-03 08:55:30 +11:00 committed by GitHub
parent 453928e7bb
commit 0cbf14e343
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 326 additions and 0 deletions

View File

@ -5,6 +5,39 @@ en:
categories:
discourse_ai: "Discourse AI"
js:
discourse_automation:
scriptables:
llm_triage:
models:
gpt_4: GPT 4
gpt_3_5_turbo: GPT 3.5 Turbo
claude_2: Claude 2
fields:
system_prompt:
label: "System Prompt"
description: "The prompt that will be used to triage, be sure for it to reply with a single word you can use to trigger the action"
search_for_text:
label: "Search for text"
description: "If the following text appears in the llm reply, apply this actions"
category:
label: "Category"
description: "Category to apply to the topic"
tags:
label: "Tags"
description: "Tags to apply to the topic"
canned_reply:
label: "Reply"
description: "Raw text of canned reply to post on the topic"
canned_reply_user:
label: "Reply User"
description: "Username of the user to post the canned reply"
hide_topic:
label: "Hide topic"
description: "Make topic non visible to the public if triggered"
model:
label: "Model"
description: "Either gpt-4 or gpt-3-5-turbo or claude-2"
discourse_ai:
modals:
select_option: "Select an option..."

View File

@ -1,4 +1,10 @@
en:
discourse_automation:
scriptables:
llm_triage:
title: Triage posts using AI
description: "Triage posts using a large language model"
system_prompt_missing_post_placeholder: "System prompt must contain a placeholder for the post: %%POST%%"
site_settings:
discourse_ai_enabled: "Enable the discourse AI plugin."
ai_toxicity_enabled: "Enable the toxicity module."

View File

@ -0,0 +1,154 @@
# frozen_string_literal: true
if defined?(DiscourseAutomation)
module DiscourseAutomation::LlmTriage
def self.handle(
post:,
model:,
search_for_text:,
system_prompt:,
category_id: nil,
tags: nil,
canned_reply: nil,
canned_reply_user: nil,
hide_topic: nil
)
if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank?
raise ArgumentError, "llm_triage: no action specified!"
end
post_template = +""
post_template << "title: #{post.topic.title}\n"
post_template << "#{post.raw}"
filled_system_prompt = system_prompt.sub("%%POST%%", post_template)
if filled_system_prompt == system_prompt
raise ArgumentError, "llm_triage: system_prompt does not contain %%POST%% placeholder"
end
result =
DiscourseAi::Inference::OpenAiCompletions.perform!(
[{ :role => "system", "content" => filled_system_prompt }],
model,
temperature: 0.7,
top_p: 0.9,
max_tokens: 40,
).dig(:choices, 0, :message, :content)
if result.strip == search_for_text.strip
user = User.find_by_username(canned_reply_user) if canned_reply_user.present?
user = user || Discourse.system_user
if canned_reply.present?
PostCreator.create!(
user,
topic_id: post.topic_id,
raw: canned_reply,
reply_to_post_number: post.post_number,
skip_validations: true,
)
end
changes = {}
changes[:category_id] = category_id if category_id.present?
changes[:tags] = tags if SiteSetting.tagging_enabled? && tags.present?
if changes.present?
first_post = post.topic.posts.where(post_number: 1).first
changes[:bypass_bump] = true
changes[:skip_validations] = true
first_post.revise(Discourse.system_user, changes)
end
post.topic.update!(visible: false) if hide_topic
end
end
end
DiscourseAutomation::Scriptable::LLM_TRIAGE = "llm_triage"
AVAILABLE_MODELS = [
{
id: "gpt-4",
name:
"discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.gpt_4",
},
{
id: "gpt-3-5-turbo",
name:
"discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.gpt_3_5_turbo",
},
{
id: "claude-2",
name:
"discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.claude_2",
},
]
DiscourseAutomation::Scriptable.add(DiscourseAutomation::Scriptable::LLM_TRIAGE) do
version 1
run_in_background
placeholder :post
triggerables %i[post_created_edited]
field :system_prompt,
component: :message,
required: true,
validator: ->(input) {
if !input.include?("%%POST%%")
I18n.t(
"discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.system_prompt_missing_post_placeholder",
)
end
},
accepts_placeholders: true
field :search_for_text, component: :text, required: true
field :model, component: :choices, required: true, extra: { content: AVAILABLE_MODELS }
field :category, component: :category
field :tags, component: :tags
field :hide_topic, component: :boolean
field :canned_reply, component: :message
field :canned_reply_user, component: :user
script do |context, fields, automation|
post = context["post"]
system_prompt = fields["system_prompt"]["value"]
search_for_text = fields["search_for_text"]["value"]
model = fields["model"]["value"]
if !%w[gpt-4 gpt-3-5-turbo].include?(model)
Rails.logger.warn("llm_triage: model #{model} is not supported")
next
end
category_id = fields.dig("category", "value")
tags = fields.dig("tags", "value")
hide_topic = fields.dig("hide_topic", "value")
canned_reply = fields.dig("canned_reply", "value")
canned_reply_user = fields.dig("canned_reply_user", "value")
if post.raw.strip == canned_reply.to_s.strip
# nothing to do if we already replied
next
end
begin
DiscourseAutomation::LlmTriage.handle(
post: post,
model: model,
search_for_text: search_for_text,
system_prompt: system_prompt,
category_id: category_id,
tags: tags,
canned_reply: canned_reply,
canned_reply_user: canned_reply_user,
hide_topic: hide_topic,
)
rescue => e
Discourse.warn_exception(e, message: "llm_triage: failed to run inference")
end
end
end
end

View File

@ -53,6 +53,7 @@ after_initialize do
require_relative "lib/modules/embeddings/entry_point"
require_relative "lib/modules/summarization/entry_point"
require_relative "lib/modules/ai_bot/entry_point"
require_relative "lib/discourse_automation/llm_triage"
[
DiscourseAi::Embeddings::EntryPoint.new,

View File

@ -0,0 +1,132 @@
# frozen_string_literal: true
return if !defined?(DiscourseAutomation)
describe DiscourseAutomation::LlmTriage do
fab!(:post) { Fabricate(:post) }
def triage(**args)
DiscourseAutomation::LlmTriage.handle(**args)
end
it "does nothing if it does not pass triage" do
stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
status: 200,
body: { choices: [{ message: { content: "good" } }] }.to_json,
)
triage(
post: post,
model: "gpt-4",
hide_topic: true,
system_prompt: "test %%POST%%",
search_for_text: "bad",
)
expect(post.topic.reload.visible).to eq(true)
end
it "can hide topics on triage" do
stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
status: 200,
body: { choices: [{ message: { content: "bad" } }] }.to_json,
)
triage(
post: post,
model: "gpt-4",
hide_topic: true,
system_prompt: "test %%POST%%",
search_for_text: "bad",
)
expect(post.topic.reload.visible).to eq(false)
end
it "can categorize topics on triage" do
category = Fabricate(:category)
stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
status: 200,
body: { choices: [{ message: { content: "bad" } }] }.to_json,
)
triage(
post: post,
model: "gpt-4",
category_id: category.id,
system_prompt: "test %%POST%%",
search_for_text: "bad",
)
expect(post.topic.reload.category_id).to eq(category.id)
end
it "can reply to topics on triage" do
user = Fabricate(:user)
stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
status: 200,
body: { choices: [{ message: { content: "bad" } }] }.to_json,
)
triage(
post: post,
model: "gpt-4",
system_prompt: "test %%POST%%",
search_for_text: "bad",
canned_reply: "test canned reply 123",
canned_reply_user: user.username,
)
reply = post.topic.posts.order(:post_number).last
expect(reply.raw).to eq("test canned reply 123")
expect(reply.user.id).to eq(user.id)
end
let(:automation) { Fabricate(:automation, script: "llm_triage", enabled: true) }
def add_automation_field(name, value, type: "text")
automation.fields.create!(
component: type,
name: name,
metadata: {
value: value,
},
target: "script",
)
end
it "can trigger via automation" do
SiteSetting.tagging_enabled = true
category = Fabricate(:category)
user = Fabricate(:user)
add_automation_field("system_prompt", "hello %%POST%%")
add_automation_field("search_for_text", "bad")
add_automation_field("model", "gpt-4")
add_automation_field("category", category.id, type: "category")
add_automation_field("tags", %w[aaa bbb], type: "tags")
add_automation_field("hide_topic", true, type: "boolean")
add_automation_field("canned_reply", "Yo this is a reply")
add_automation_field("canned_reply_user", user.username, type: "user")
stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
status: 200,
body: { choices: [{ message: { content: "bad" } }] }.to_json,
)
automation.running_in_background!
automation.trigger!({ "post" => post })
topic = post.topic.reload
expect(topic.category_id).to eq(category.id)
expect(topic.tags.pluck(:name)).to contain_exactly("aaa", "bbb")
expect(topic.visible).to eq(false)
reply = topic.posts.order(:post_number).last
expect(reply.raw).to eq("Yo this is a reply")
expect(reply.user.id).to eq(user.id)
end
end