discourse-ai/spec/lib/discourse_automation/llm_triage_spec.rb

# frozen_string_literal: true

return if !defined?(DiscourseAutomation)

describe DiscourseAi::Automation::LlmTriage do
  fab!(:category)
  fab!(:reply_user) { Fabricate(:user) }

  let(:automation) { Fabricate(:automation, script: "llm_triage", enabled: true) }

  fab!(:llm_model)

  def add_automation_field(name, value, type: "text")
    automation.fields.create!(
      component: type,
      name: name,
      metadata: {
        value: value,
      },
      target: "script",
    )
  end

  before do
    SiteSetting.tagging_enabled = true
    add_automation_field("system_prompt", "hello %%POST%%")
    add_automation_field("search_for_text", "bad")
    add_automation_field("model", "custom:#{llm_model.id}")
    add_automation_field("category", category.id, type: "category")
    add_automation_field("tags", %w[aaa bbb], type: "tags")
    add_automation_field("hide_topic", true, type: "boolean")
    add_automation_field("flag_post", true, type: "boolean")
    add_automation_field("canned_reply", "Yo this is a reply")
    add_automation_field("canned_reply_user", reply_user.username, type: "user")
    add_automation_field("max_post_tokens", 100)
  end

  it "can trigger via automation" do
    post = Fabricate(:post, raw: "hello " * 5000)

    body = {
      model: "gpt-3.5-turbo-0301",
      usage: {
        prompt_tokens: 337,
        completion_tokens: 162,
        total_tokens: 499,
      },
      choices: [
        { message: { role: "assistant", content: "bad" }, finish_reason: "stop", index: 0 },
      ],
    }.to_json

    WebMock.stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
      status: 200,
      body: body,
    )

    automation.running_in_background!
    automation.trigger!({ "post" => post })

    topic = post.topic.reload
    expect(topic.category_id).to eq(category.id)
    expect(topic.tags.pluck(:name)).to contain_exactly("aaa", "bbb")
    expect(topic.visible).to eq(false)
    reply = topic.posts.order(:post_number).last
    expect(reply.raw).to eq("Yo this is a reply")
    expect(reply.user.id).to eq(reply_user.id)

    ai_log = AiApiAuditLog.order("id desc").first
    expect(ai_log.feature_name).to eq("llm_triage")
    expect(ai_log.feature_context).to eq(
      { "automation_id" => automation.id, "automation_name" => automation.name },
    )

    count = ai_log.raw_request_payload.scan("hello").size
    # we could use the exact count here but it can get fragile
    # as we change tokenizers, this will give us reasonable confidence
    expect(count).to be <= (100)
    expect(count).to be > (50)
  end

  it "does not reply to the canned_reply_user" do
    post = Fabricate(:post, user: reply_user)

    DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
      automation.running_in_background!
      automation.trigger!({ "post" => post })
    end

    last_post = post.topic.reload.posts.order(:post_number).last
    expect(last_post.raw).to eq post.raw
  end
end
FEATURE: automation rule for triaging posts using LLM (#236) The new automation rule can be used to perform llm based classification and categorization of topics. You specify a system prompt (which has %%POST%% as an input), if it returns a particular piece of text then we will apply rules such as tagging, hiding, replying or categorizing. This can be used as a spam filter, a "oops you are in the wrong place" filter and so on. Co-authored-by: Joffrey JAFFEUX <j.jaffeux@gmail.com> 2023-10-02 17:55:30 -04:00			`# frozen_string_literal: true`

			`return if !defined?(DiscourseAutomation)`

FEATURE: LLM based peroidical summary report (#357) Introduce a Discourse Automation based periodical report. Depends on Discourse Automation. Report works best with very large context language models such as GPT-4-Turbo and Claude 2. - Introduces final_insts to generic llm format, for claude to work best it is better to guide the last assistant message (we should add this to other spots as well) - Adds GPT-4 turbo support to generic llm interface 2023-12-18 20:04:15 -05:00			`describe DiscourseAi::Automation::LlmTriage do`
FIX: Avoid replying to the reply user for llm_triage automation (#544) Avoid replying to the reply user. This causes an infinite conversation from the reply_user to the reply_user 2024-03-22 00:34:18 -04:00			`fab!(:category)`
			`fab!(:reply_user) { Fabricate(:user) }`
FEATURE: automation rule for triaging posts using LLM (#236) The new automation rule can be used to perform llm based classification and categorization of topics. You specify a system prompt (which has %%POST%% as an input), if it returns a particular piece of text then we will apply rules such as tagging, hiding, replying or categorizing. This can be used as a spam filter, a "oops you are in the wrong place" filter and so on. Co-authored-by: Joffrey JAFFEUX <j.jaffeux@gmail.com> 2023-10-02 17:55:30 -04:00
			`let(:automation) { Fabricate(:automation, script: "llm_triage", enabled: true) }`

DEV: Remove old code now that features rely on LlmModels. (#729) * DEV: Remove old code now that features rely on LlmModels. * Hide old settings and migrate persona llm overrides * Remove shadowing special URL + seeding code. Use srv:// prefix instead. 2024-07-30 12:44:57 -04:00			`fab!(:llm_model)`

FEATURE: automation rule for triaging posts using LLM (#236) The new automation rule can be used to perform llm based classification and categorization of topics. You specify a system prompt (which has %%POST%% as an input), if it returns a particular piece of text then we will apply rules such as tagging, hiding, replying or categorizing. This can be used as a spam filter, a "oops you are in the wrong place" filter and so on. Co-authored-by: Joffrey JAFFEUX <j.jaffeux@gmail.com> 2023-10-02 17:55:30 -04:00			`def add_automation_field(name, value, type: "text")`
			`automation.fields.create!(`
			`component: type,`
			`name: name,`
			`metadata: {`
			`value: value,`
			`},`
			`target: "script",`
			`)`
			`end`

FIX: Avoid replying to the reply user for llm_triage automation (#544) Avoid replying to the reply user. This causes an infinite conversation from the reply_user to the reply_user 2024-03-22 00:34:18 -04:00			`before do`
FEATURE: automation rule for triaging posts using LLM (#236) The new automation rule can be used to perform llm based classification and categorization of topics. You specify a system prompt (which has %%POST%% as an input), if it returns a particular piece of text then we will apply rules such as tagging, hiding, replying or categorizing. This can be used as a spam filter, a "oops you are in the wrong place" filter and so on. Co-authored-by: Joffrey JAFFEUX <j.jaffeux@gmail.com> 2023-10-02 17:55:30 -04:00			`SiteSetting.tagging_enabled = true`
			`add_automation_field("system_prompt", "hello %%POST%%")`
			`add_automation_field("search_for_text", "bad")`
DEV: Remove old code now that features rely on LlmModels. (#729) * DEV: Remove old code now that features rely on LlmModels. * Hide old settings and migrate persona llm overrides * Remove shadowing special URL + seeding code. Use srv:// prefix instead. 2024-07-30 12:44:57 -04:00			`add_automation_field("model", "custom:#{llm_model.id}")`
FEATURE: automation rule for triaging posts using LLM (#236) The new automation rule can be used to perform llm based classification and categorization of topics. You specify a system prompt (which has %%POST%% as an input), if it returns a particular piece of text then we will apply rules such as tagging, hiding, replying or categorizing. This can be used as a spam filter, a "oops you are in the wrong place" filter and so on. Co-authored-by: Joffrey JAFFEUX <j.jaffeux@gmail.com> 2023-10-02 17:55:30 -04:00			`add_automation_field("category", category.id, type: "category")`
			`add_automation_field("tags", %w[aaa bbb], type: "tags")`
			`add_automation_field("hide_topic", true, type: "boolean")`
FEATURE: Option for AI triage to send a post to the review queue (#498) Option for AI triage to send a post to the review queue 2024-02-28 20:33:28 -05:00			`add_automation_field("flag_post", true, type: "boolean")`
FEATURE: automation rule for triaging posts using LLM (#236) The new automation rule can be used to perform llm based classification and categorization of topics. You specify a system prompt (which has %%POST%% as an input), if it returns a particular piece of text then we will apply rules such as tagging, hiding, replying or categorizing. This can be used as a spam filter, a "oops you are in the wrong place" filter and so on. Co-authored-by: Joffrey JAFFEUX <j.jaffeux@gmail.com> 2023-10-02 17:55:30 -04:00			`add_automation_field("canned_reply", "Yo this is a reply")`
FIX: Avoid replying to the reply user for llm_triage automation (#544) Avoid replying to the reply user. This causes an infinite conversation from the reply_user to the reply_user 2024-03-22 00:34:18 -04:00			`add_automation_field("canned_reply_user", reply_user.username, type: "user")`
FEATURE: better logging for automation reports (#853) A new feature_context json column was added to ai_api_audit_logs This allows us to store rich json like context on any LLM request made. This new field now stores automation id and name. Additionally allows llm_triage to specify maximum number of tokens This means that you can limit the cost of llm triage by scanning only first N tokens of a post. 2024-10-23 01:49:56 -04:00			`add_automation_field("max_post_tokens", 100)`
FIX: Avoid replying to the reply user for llm_triage automation (#544) Avoid replying to the reply user. This causes an infinite conversation from the reply_user to the reply_user 2024-03-22 00:34:18 -04:00			`end`

			`it "can trigger via automation" do`
FEATURE: better logging for automation reports (#853) A new feature_context json column was added to ai_api_audit_logs This allows us to store rich json like context on any LLM request made. This new field now stores automation id and name. Additionally allows llm_triage to specify maximum number of tokens This means that you can limit the cost of llm triage by scanning only first N tokens of a post. 2024-10-23 01:49:56 -04:00			`post = Fabricate(:post, raw: "hello " * 5000)`
FEATURE: automation rule for triaging posts using LLM (#236) The new automation rule can be used to perform llm based classification and categorization of topics. You specify a system prompt (which has %%POST%% as an input), if it returns a particular piece of text then we will apply rules such as tagging, hiding, replying or categorizing. This can be used as a spam filter, a "oops you are in the wrong place" filter and so on. Co-authored-by: Joffrey JAFFEUX <j.jaffeux@gmail.com> 2023-10-02 17:55:30 -04:00
FEATURE: better logging for automation reports (#853) A new feature_context json column was added to ai_api_audit_logs This allows us to store rich json like context on any LLM request made. This new field now stores automation id and name. Additionally allows llm_triage to specify maximum number of tokens This means that you can limit the cost of llm triage by scanning only first N tokens of a post. 2024-10-23 01:49:56 -04:00			`body = {`
			`model: "gpt-3.5-turbo-0301",`
			`usage: {`
			`prompt_tokens: 337,`
			`completion_tokens: 162,`
			`total_tokens: 499,`
			`},`
			`choices: [`
			`{ message: { role: "assistant", content: "bad" }, finish_reason: "stop", index: 0 },`
			`],`
			`}.to_json`

			`WebMock.stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(`
			`status: 200,`
			`body: body,`
			`)`

			`automation.running_in_background!`
			`automation.trigger!({ "post" => post })`
FEATURE: automation rule for triaging posts using LLM (#236) The new automation rule can be used to perform llm based classification and categorization of topics. You specify a system prompt (which has %%POST%% as an input), if it returns a particular piece of text then we will apply rules such as tagging, hiding, replying or categorizing. This can be used as a spam filter, a "oops you are in the wrong place" filter and so on. Co-authored-by: Joffrey JAFFEUX <j.jaffeux@gmail.com> 2023-10-02 17:55:30 -04:00
			`topic = post.topic.reload`
			`expect(topic.category_id).to eq(category.id)`
			`expect(topic.tags.pluck(:name)).to contain_exactly("aaa", "bbb")`
			`expect(topic.visible).to eq(false)`
			`reply = topic.posts.order(:post_number).last`
			`expect(reply.raw).to eq("Yo this is a reply")`
FIX: Avoid replying to the reply user for llm_triage automation (#544) Avoid replying to the reply user. This causes an infinite conversation from the reply_user to the reply_user 2024-03-22 00:34:18 -04:00			`expect(reply.user.id).to eq(reply_user.id)`
FEATURE: better logging for automation reports (#853) A new feature_context json column was added to ai_api_audit_logs This allows us to store rich json like context on any LLM request made. This new field now stores automation id and name. Additionally allows llm_triage to specify maximum number of tokens This means that you can limit the cost of llm triage by scanning only first N tokens of a post. 2024-10-23 01:49:56 -04:00
			`ai_log = AiApiAuditLog.order("id desc").first`
			`expect(ai_log.feature_name).to eq("llm_triage")`
			`expect(ai_log.feature_context).to eq(`
			`{ "automation_id" => automation.id, "automation_name" => automation.name },`
			`)`

			`count = ai_log.raw_request_payload.scan("hello").size`
			`# we could use the exact count here but it can get fragile`
			`# as we change tokenizers, this will give us reasonable confidence`
			`expect(count).to be <= (100)`
			`expect(count).to be > (50)`
FIX: Avoid replying to the reply user for llm_triage automation (#544) Avoid replying to the reply user. This causes an infinite conversation from the reply_user to the reply_user 2024-03-22 00:34:18 -04:00			`end`

			`it "does not reply to the canned_reply_user" do`
			`post = Fabricate(:post, user: reply_user)`

			`DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do`
			`automation.running_in_background!`
			`automation.trigger!({ "post" => post })`
			`end`

			`last_post = post.topic.reload.posts.order(:post_number).last`
			`expect(last_post.raw).to eq post.raw`
FEATURE: automation rule for triaging posts using LLM (#236) The new automation rule can be used to perform llm based classification and categorization of topics. You specify a system prompt (which has %%POST%% as an input), if it returns a particular piece of text then we will apply rules such as tagging, hiding, replying or categorizing. This can be used as a spam filter, a "oops you are in the wrong place" filter and so on. Co-authored-by: Joffrey JAFFEUX <j.jaffeux@gmail.com> 2023-10-02 17:55:30 -04:00			`end`
			`end`