FEATURE: Use Persona's when scanning posts for spam (#1465)

This commit is contained in:
Roman Rizzi 2025-06-27 10:35:47 -03:00 committed by GitHub
parent cc4e9e030f
commit b35f9bcc7c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 375 additions and 143 deletions

View File

@ -11,6 +11,13 @@ module DiscourseAi
def update
initial_settings = AiModerationSetting.spam
initial_data = {
custom_instructions: initial_settings&.data&.dig("custom_instructions"),
llm_model_id: initial_settings&.llm_model_id,
ai_persona_id: initial_settings&.ai_persona_id,
}
initial_custom_instructions = initial_settings&.data&.dig("custom_instructions")
initial_llm_model_id = initial_settings&.llm_model_id
@ -29,6 +36,22 @@ module DiscourseAi
)
end
end
if allowed_params.key?(:ai_persona_id)
updated_params[:ai_persona_id] = allowed_params[:ai_persona_id]
persona = AiPersona.find_by(id: allowed_params[:ai_persona_id])
if persona.nil? ||
persona.response_format.to_a.none? { |rf|
rf["key"] == "spam" && rf["type"] == "boolean"
}
return(
render_json_error(
I18n.t("discourse_ai.llm.configuration.invalid_persona_response_format"),
status: 422,
)
)
end
end
updated_params[:data] = {
custom_instructions: allowed_params[:custom_instructions],
} if allowed_params.key?(:custom_instructions)
@ -41,7 +64,7 @@ module DiscourseAi
AiModerationSetting.create!(updated_params.merge(setting_type: :spam))
end
log_ai_spam_update(initial_llm_model_id, initial_custom_instructions, allowed_params)
log_ai_spam_update(initial_data, allowed_params)
end
is_enabled = ActiveModel::Type::Boolean.new.cast(allowed_params[:is_enabled])
@ -119,9 +142,10 @@ module DiscourseAi
private
def log_ai_spam_update(initial_llm_model_id, initial_custom_instructions, params)
def log_ai_spam_update(initial_data, params)
changes_to_log = {}
initial_llm_model_id = initial_data[:llm_model_id]
if params.key?(:llm_model_id) && initial_llm_model_id.to_s != params[:llm_model_id].to_s
old_model_name =
LlmModel.find_by(id: initial_llm_model_id)&.display_name || initial_llm_model_id
@ -131,11 +155,22 @@ module DiscourseAi
changes_to_log[:llm_model_id] = "#{old_model_name}#{new_model_name}"
end
initial_custom_instructions = initial_data[:custom_instructions]
if params.key?(:custom_instructions) &&
initial_custom_instructions != params[:custom_instructions]
changes_to_log[:custom_instructions] = params[:custom_instructions]
end
initial_ai_persona_id = initial_data[:ai_persona_id]
if params.key?(:ai_persona_id) && initial_ai_persona_id.to_s != params[:ai_persona_id].to_s
old_persona_name =
AiPersona.find_by(id: initial_ai_persona_id)&.name || initial_ai_persona_id
new_persona_name =
AiPersona.find_by(id: params[:ai_persona_id])&.name || params[:ai_persona_id]
changes_to_log[:ai_persona_id] = "#{old_persona_name}#{new_persona_name}"
end
if changes_to_log.present?
changes_to_log[:subject] = I18n.t("discourse_ai.spam_detection.logging_subject")
logger = DiscourseAi::Utils::AiStaffActionLogger.new(current_user)
@ -144,7 +179,7 @@ module DiscourseAi
end
def allowed_params
params.permit(:is_enabled, :llm_model_id, :custom_instructions)
params.permit(:is_enabled, :llm_model_id, :custom_instructions, :ai_persona_id)
end
def spam_config

View File

@ -1,6 +1,7 @@
# frozen_string_literal: true
class AiModerationSetting < ActiveRecord::Base
belongs_to :llm_model
belongs_to :ai_persona
validates :llm_model_id, presence: true
validates :setting_type, presence: true
@ -19,12 +20,13 @@ end
#
# Table name: ai_moderation_settings
#
# id :bigint not null, primary key
# setting_type :enum not null
# data :jsonb
# llm_model_id :bigint not null
# created_at :datetime not null
# updated_at :datetime not null
# id :bigint not null, primary key
# setting_type :enum not null
# data :jsonb
# llm_model_id :bigint not null
# created_at :datetime not null
# updated_at :datetime not null
# ai_persona_id :bigint default(-31), not null
#
# Indexes
#

View File

@ -8,7 +8,9 @@ class AiSpamSerializer < ApplicationSerializer
:stats,
:flagging_username,
:spam_score_type,
:spam_scanning_user
:spam_scanning_user,
:ai_persona_id,
:available_personas
def is_enabled
object[:enabled]
@ -18,6 +20,11 @@ class AiSpamSerializer < ApplicationSerializer
settings&.llm_model&.id
end
def ai_persona_id
settings&.ai_persona&.id ||
DiscourseAi::Personas::Persona.system_personas[DiscourseAi::Personas::SpamDetector]
end
def custom_instructions
settings&.custom_instructions
end
@ -28,6 +35,12 @@ class AiSpamSerializer < ApplicationSerializer
.map { |hash| { id: hash[:value], name: hash[:name] } }
end
def available_personas
DiscourseAi::Configuration::PersonaEnumerator.values.map do |h|
{ id: h[:value], name: h[:name] }
end
end
def flagging_username
object[:flagging_username]
end

View File

@ -35,6 +35,7 @@ export default class AiSpam extends Component {
};
@tracked isEnabled = false;
@tracked selectedLLM = null;
@tracked selectedPersonaId = null;
@tracked customInstructions = "";
@tracked errors = [];
@ -98,6 +99,7 @@ export default class AiSpam extends Component {
}
this.customInstructions = model.custom_instructions;
this.stats = model.stats;
this.selectedPersonaId = model.ai_persona_id;
}
get availableLLMs() {
@ -133,6 +135,11 @@ export default class AiSpam extends Component {
this.selectedLLM = value;
}
@action
async updatePersona(value) {
this.selectedPersonaId = value;
}
@action
async save() {
try {
@ -141,6 +148,7 @@ export default class AiSpam extends Component {
data: {
llm_model_id: this.llmId,
custom_instructions: this.customInstructions,
ai_persona_id: this.selectedPersonaId,
},
});
this.toasts.success({
@ -256,6 +264,18 @@ export default class AiSpam extends Component {
{{/if}}
</div>
<div class="ai-spam__persona">
<label class="ai-spam__persona-label">{{i18n
"discourse_ai.spam.select_persona"
}}</label>
<ComboBox
@value={{this.selectedPersonaId}}
@content={{@model.available_personas}}
@onChange={{this.updatePersona}}
class="ai-spam__persona-selector"
/>
</div>
<div class="ai-spam__instructions">
<label class="ai-spam__instructions-label">
{{i18n "discourse_ai.spam.custom_instructions"}}

View File

@ -24,12 +24,14 @@
&__toggle,
&__llm,
&__persona,
&__instructions {
margin-bottom: 1em;
}
&__toggle-label,
&__llm-label,
&__persona-label,
&__instructions-label {
display: block;
margin-bottom: 0.5em;

View File

@ -260,6 +260,7 @@ en:
short_title: "Spam"
title: "Configure spam handling"
select_llm: "Select LLM"
select_persona: "Select persona"
custom_instructions: "Custom instructions"
custom_instructions_help: "Custom instructions specific to your site to help guide the AI in identifying spam, e.g. 'Be more aggressive about scanning posts not in English'."
last_seven_days: "Last 7 days"

View File

@ -391,6 +391,9 @@ en:
short_text_translator:
name: "Short text translator"
description: "Powers the translation feature by as a generic text translator, used for short texts like category names or tags"
spam_detector:
name: "Spam detector"
description: "Default persona powering our Spam detection feature"
topic_not_found: "Summary unavailable, topic not found!"
summarizing: "Summarizing topic"
@ -577,6 +580,7 @@ en:
set_llm_first: "Set %{setting} first"
model_unreachable: "We couldn't get a response from this model. Check your settings first."
invalid_seeded_model: "You can't use this model with this feature"
invalid_persona_response_format: "The selected persona must have a response format with a boolean field names \"spam\""
must_select_model: "You must select a LLM first"
endpoints:
not_configured: "%{display_name} (not configured)"

View File

@ -0,0 +1,6 @@
# frozen_string_literal: true
class AddPersonaToAiModerationSettings < ActiveRecord::Migration[7.2]
def change
add_column :ai_moderation_settings, :ai_persona_id, :bigint, null: false, default: -31
end
end

View File

@ -134,23 +134,32 @@ module DiscourseAi
def self.test_post(post, custom_instructions: nil, llm_id: nil)
settings = AiModerationSetting.spam
llm_model = llm_id ? LlmModel.find(llm_id) : settings.llm_model
llm = llm_model.to_llm
custom_instructions = custom_instructions || settings.custom_instructions.presence
context = build_context(post, post.topic || Topic.with_deleted.find_by(id: post.topic_id))
prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)
result =
llm.generate(
prompt,
temperature: 0.1,
max_tokens: 5,
user: Discourse.system_user,
target_msg =
build_target_content_msg(
post,
post.topic || Topic.with_deleted.find_by(id: post.topic_id),
)
custom_insts = custom_instructions || settings.custom_instructions.presence
if custom_insts.present?
custom_insts =
"\n\nAdditional site-specific instructions provided by Staff:\n#{custom_insts}"
end
ctx =
build_bot_context(
feature_name: "spam_detection_test",
feature_context: {
post_id: post.id,
},
)&.strip
messages: [target_msg],
custom_instructions: custom_insts,
)
bot = build_scanner_bot(settings: settings, llm_id: llm_id)
structured_output = nil
llm_args = { feature_context: { post_id: post.id } }
bot.reply(ctx, llm_args: llm_args) do |partial, _, type|
structured_output = partial if type == :structured_output
end
history = nil
AiSpamLog
@ -169,45 +178,46 @@ module DiscourseAi
log << "\n"
end
log << "LLM: #{llm_model.name}\n\n"
log << "System Prompt: #{build_system_prompt(custom_instructions)}\n\n"
log << "Context: #{context}\n\n"
used_llm = bot.model
log << "LLM: #{used_llm.name}\n\n"
is_spam = check_if_spam(result)
spam_persona = bot.persona
used_prompt = spam_persona.craft_prompt(ctx, llm: used_llm).system_message_text
log << "System Prompt: #{used_prompt}\n\n"
prompt.push(type: :model, content: result)
prompt.push(type: :user, content: "Explain your reasoning")
text_content =
if target_msg[:content].is_a?(Array)
target_msg[:content].first
else
target_msg[:content]
end
reasoning =
llm.generate(
prompt,
temperature: 0.1,
max_tokens: 100,
user: Discourse.system_user,
feature_name: "spam_detection_test",
feature_context: {
post_id: post.id,
},
)&.strip
log << "Context: #{text_content}\n\n"
log << "#{reasoning}"
is_spam = is_spam?(structured_output)
reasoning_insts = {
type: :user,
content: "Don't return a JSON this time. Explain your reasoning in plain text.",
}
ctx.messages = [
target_msg,
{ type: :model, content: { spam: is_spam }.to_json },
reasoning_insts,
]
ctx.bypass_response_format = true
reasoning = +""
bot.reply(ctx, llm_args: llm_args.merge(max_tokens: 100)) do |partial, _, type|
reasoning << partial if type.blank?
end
log << "#{reasoning.strip}"
{ is_spam: is_spam, log: log }
end
def self.completion_prompt(post, context:, custom_instructions:)
system_prompt = build_system_prompt(custom_instructions)
prompt = DiscourseAi::Completions::Prompt.new(system_prompt)
args = { type: :user, content: context }
upload_ids = post.upload_ids
if upload_ids.present?
args[:content] = [args[:content]]
upload_ids.take(3).each { |upload_id| args[:content] << { upload_id: upload_id } }
end
prompt.push(**args)
prompt
end
def self.perform_scan(post)
return if !should_scan_post?(post)
@ -217,29 +227,39 @@ module DiscourseAi
def self.perform_scan!(post)
return if !enabled?
settings = AiModerationSetting.spam
return if !settings || !settings.llm_model
return if !settings || !settings.llm_model || !settings.ai_persona
context = build_context(post)
llm = settings.llm_model.to_llm
target_msg = build_target_content_msg(post)
custom_instructions = settings.custom_instructions.presence
prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)
if custom_instructions.present?
custom_instructions =
"\n\nAdditional site-specific instructions provided by Staff:\n#{custom_instructions}"
end
ctx =
build_bot_context(
messages: [target_msg],
custom_instructions: custom_instructions,
user: self.flagging_user,
)
bot = build_scanner_bot(settings: settings, user: self.flagging_user)
structured_output = nil
begin
result =
llm.generate(
prompt,
temperature: 0.1,
max_tokens: 5,
user: Discourse.system_user,
feature_name: "spam_detection",
feature_context: {
post_id: post.id,
},
)&.strip
llm_args = { feature_context: { post_id: post.id } }
bot.reply(ctx, llm_args: llm_args) do |partial, _, type|
structured_output = partial if type == :structured_output
end
is_spam = check_if_spam(result)
is_spam = is_spam?(structured_output)
log = AiApiAuditLog.order(id: :desc).where(feature_name: "spam_detection").first
text_content =
if target_msg[:content].is_a?(Array)
target_msg[:content].first
else
target_msg[:content]
end
AiSpamLog.transaction do
log =
AiSpamLog.create!(
@ -247,7 +267,7 @@ module DiscourseAi
llm_model: settings.llm_model,
ai_api_audit_log: log,
is_spam: is_spam,
payload: context,
payload: text_content,
)
handle_spam(post, log) if is_spam
end
@ -273,11 +293,42 @@ module DiscourseAi
private
def self.check_if_spam(result)
(result.present? && result.strip.downcase.start_with?("spam"))
def self.build_bot_context(
feature_name: "spam_detection",
messages:,
custom_instructions: nil,
bypass_response_format: false,
user: Discourse.system_user
)
DiscourseAi::Personas::BotContext
.new(
user: user,
skip_tool_details: true,
feature_name: feature_name,
messages: messages,
bypass_response_format: bypass_response_format,
)
.tap { |ctx| ctx.custom_instructions = custom_instructions if custom_instructions }
end
def self.build_context(post, topic = nil)
def self.build_scanner_bot(
settings:,
use_structured_output: true,
llm_id: nil,
user: Discourse.system_user
)
persona = settings.ai_persona.class_instance&.new
llm_model = llm_id ? LlmModel.find(llm_id) : settings.llm_model
DiscourseAi::Personas::Bot.as(user, persona: persona, model: llm_model)
end
def self.is_spam?(structured_output)
structured_output.present? && structured_output.read_buffered_property(:spam)
end
def self.build_target_content_msg(post, topic = nil)
topic ||= post.topic
context = []
@ -318,7 +369,16 @@ module DiscourseAi
context << "\nPost Content (first #{MAX_RAW_SCAN_LENGTH} chars):\n"
context << post.raw[0..MAX_RAW_SCAN_LENGTH]
context.join("\n")
user_msg = { type: :user, content: context.join("\n") }
upload_ids = post.upload_ids
if upload_ids.present?
user_msg[:content] = [user_msg[:content]]
upload_ids.take(3).each { |upload_id| user_msg[:content] << { upload_id: upload_id } }
end
user_msg
end
def self.location_info(user)
@ -348,53 +408,6 @@ module DiscourseAi
nil
end
def self.build_system_prompt(custom_instructions)
base_prompt = +<<~PROMPT
You are a spam detection system. Analyze the following post content and context.
Respond with "SPAM" if the post is spam, or "NOT_SPAM" if it's legitimate.
- ALWAYS lead your reply with the word SPAM or NOT_SPAM - you are consumed via an API
Consider the post type carefully:
- For REPLY posts: Check if the response is relevant and topical to the thread
- For NEW TOPIC posts: Check if it's a legitimate topic or spam promotion
A post is spam if it matches any of these criteria:
- Contains unsolicited commercial content or promotions
- Has suspicious or unrelated external links
- Shows patterns of automated/bot posting
- Contains irrelevant content or advertisements
- For replies: Completely unrelated to the discussion thread
- Uses excessive keywords or repetitive text patterns
- Shows suspicious formatting or character usage
Be especially strict with:
- Replies that ignore the previous conversation
- Posts containing multiple unrelated external links
- Generic responses that could be posted anywhere
Be fair to:
- New users making legitimate first contributions
- Non-native speakers making genuine efforts to participate
- Topic-relevant product mentions in appropriate contexts
PROMPT
base_prompt << "\n\n"
base_prompt << <<~SITE_SPECIFIC
Site Specific Information:
- Site name: #{SiteSetting.title}
- Site URL: #{Discourse.base_url}
- Site description: #{SiteSetting.site_description}
- Site top 10 categories: #{Category.where(read_restricted: false).order(posts_year: :desc).limit(10).pluck(:name).join(", ")}
SITE_SPECIFIC
if custom_instructions.present?
base_prompt << "\n\nAdditional site-specific instructions provided by Staff:\n#{custom_instructions}"
end
base_prompt
end
def self.handle_spam(post, log)
url = "#{Discourse.base_url}/admin/plugins/discourse-ai/ai-spam"
reason = I18n.t("discourse_ai.spam_detection.flag_reason", url: url)

View File

@ -5,7 +5,7 @@ module DiscourseAi
class Prompt
INVALID_TURN = Class.new(StandardError)
attr_reader :messages, :tools
attr_reader :messages, :tools, :system_message_text
attr_accessor :topic_id, :post_id, :max_pixels, :tool_choice
def initialize(
@ -28,8 +28,10 @@ module DiscourseAi
@messages = []
if system_message_text
system_message = { type: :system, content: system_message_text }
@messages << system_message
@system_message_text = system_message_text
@messages << { type: :system, content: @system_message_text }
else
@system_message_text = messages.find { |m| m[:type] == :system }&.dig(:content)
end
@messages.concat(messages)

View File

@ -37,7 +37,7 @@ module DiscourseAi
end
# Maybe we haven't read that part of the JSON yet.
return nil if @tracked[prop_name].blank?
return nil if @tracked[prop_name].nil?
# This means this property is a string and we want to return unread chunks.
if @property_cursors[prop_name].present?

View File

@ -3,8 +3,6 @@
module DiscourseAi
module Personas
class Bot
attr_reader :model
BOT_NOT_FOUND = Class.new(StandardError)
# the future is agentic, allow for more turns
@ -24,7 +22,7 @@ module DiscourseAi
model || self.class.guess_model(bot_user) || LlmModel.find(@persona.class.default_llm_id)
end
attr_reader :bot_user
attr_reader :bot_user, :model
attr_accessor :persona
def llm
@ -69,9 +67,10 @@ module DiscourseAi
llm_kwargs[:user] = user
llm_kwargs[:temperature] = persona.temperature if persona.temperature
llm_kwargs[:top_p] = persona.top_p if persona.top_p
llm_kwargs[:response_format] = build_json_schema(
persona.response_format,
) if persona.response_format.present?
if !context.bypass_response_format && persona.response_format.present?
llm_kwargs[:response_format] = build_json_schema(persona.response_format)
end
needs_newlines = false
tools_ran = 0

View File

@ -21,7 +21,8 @@ module DiscourseAi
:inferred_concepts,
:format_dates,
:temporal_context,
:user_language
:user_language,
:bypass_response_format
def initialize(
post: nil,
@ -42,7 +43,8 @@ module DiscourseAi
resource_url: nil,
cancel_manager: nil,
inferred_concepts: [],
format_dates: false
format_dates: false,
bypass_response_format: false
)
@participants = participants
@user = user
@ -66,6 +68,8 @@ module DiscourseAi
@cancel_manager = cancel_manager
@bypass_response_format = bypass_response_format
if post
@post_id = post.id
@topic_id = post.topic_id
@ -93,6 +97,7 @@ module DiscourseAi
inferred_concepts
user_language
temporal_context
top_categories
]
def lookup_template_param(key)
@ -119,6 +124,16 @@ module DiscourseAi
@private_message
end
def top_categories
@top_categories ||=
Category
.where(read_restricted: false)
.order(posts_year: :desc)
.limit(10)
.pluck(:name)
.join(", ")
end
def to_json
{
messages: @messages,
@ -142,6 +157,8 @@ module DiscourseAi
inferred_concepts: @inferred_concepts,
user_language: @user_language,
temporal_context: @temporal_context,
top_categories: @top_categories,
bypass_response_format: @bypass_response_format,
}
end
end

View File

@ -68,6 +68,7 @@ module DiscourseAi
PostRawTranslator => -28,
TopicTitleTranslator => -29,
ShortTextTranslator => -30,
SpamDetector => -31,
}
end

View File

@ -0,0 +1,62 @@
# frozen_string_literal: true
module DiscourseAi
module Personas
class SpamDetector < Persona
def self.default_enabled
false
end
def temperature
0.1
end
def system_prompt
<<~PROMPT
You are a spam detection system. Analyze the following post content and context.
Consider the post type carefully:
- For REPLY posts: Check if the response is relevant and topical to the thread
- For NEW TOPIC posts: Check if it's a legitimate topic or spam promotion
A post is spam if it matches any of these criteria:
- Contains unsolicited commercial content or promotions
- Has suspicious or unrelated external links
- Shows patterns of automated/bot posting
- Contains irrelevant content or advertisements
- For replies: Completely unrelated to the discussion thread
- Uses excessive keywords or repetitive text patterns
- Shows suspicious formatting or character usage
Be especially strict with:
- Replies that ignore the previous conversation
- Posts containing multiple unrelated external links
- Generic responses that could be posted anywhere
Be fair to:
- New users making legitimate first contributions
- Non-native speakers making genuine efforts to participate
- Topic-relevant product mentions in appropriate contexts
Site Specific Information:
- Site name: {site_title}
- Site URL: {site_url}
- Site description: {site_description}
- Site top 10 categories: {top_categories}
Format your response as a JSON object with a one key named "spam", which indicates if a post is spam or legitimate.
Your output should be in the following format:
<output>
{"spam": "xx"}
</output>
Where "xx" is true if the post is spam, or false if it's legitimate.
PROMPT
end
def response_format
[{ "key" => "spam", "type" => "boolean" }]
end
end
end
end

View File

@ -248,7 +248,7 @@ RSpec.describe DiscourseAi::AiModeration::SpamScanner do
prompts = nil
result =
DiscourseAi::Completions::Llm.with_prepared_responses(
["spam", "the reason is just because"],
[true, "the reason is just because"],
) do |_, _, _prompts|
prompts = _prompts
described_class.test_post(post, custom_instructions: "123")
@ -261,7 +261,7 @@ RSpec.describe DiscourseAi::AiModeration::SpamScanner do
result =
DiscourseAi::Completions::Llm.with_prepared_responses(
["not_spam", "the reason is just because"],
[false, "the reason is just because"],
) do |_, _, _prompts|
prompts = _prompts
described_class.test_post(post, custom_instructions: "123")
@ -284,7 +284,7 @@ RSpec.describe DiscourseAi::AiModeration::SpamScanner do
described_class.new_post(post)
prompt = nil
DiscourseAi::Completions::Llm.with_prepared_responses(["spam"]) do |_, _, _prompts|
DiscourseAi::Completions::Llm.with_prepared_responses([true]) do |_, _, _prompts|
# force a rebake so we actually scan
post.rebake!
prompt = _prompts.first
@ -336,7 +336,7 @@ RSpec.describe DiscourseAi::AiModeration::SpamScanner do
described_class.new_post(post)
DiscourseAi::Completions::Llm.with_prepared_responses(["spam"]) do |_, _, _prompts|
DiscourseAi::Completions::Llm.with_prepared_responses([true]) do |_, _, _prompts|
# force a rebake so we actually scan
post.rebake!
end
@ -364,7 +364,7 @@ RSpec.describe DiscourseAi::AiModeration::SpamScanner do
prompts = nil
DiscourseAi::Completions::Llm.with_prepared_responses(
["spam", "just because"],
[true, "just because"],
) do |_, _, _prompts|
prompts = _prompts
described_class.test_post(post)

View File

@ -16,12 +16,17 @@ RSpec.describe DiscourseAi::Admin::AiSpamController do
params: {
is_enabled: true,
llm_model_id: llm_model.id,
ai_persona_id:
DiscourseAi::Personas::Persona.system_personas[DiscourseAi::Personas::SpamDetector],
custom_instructions: "custom instructions",
}
expect(response.status).to eq(200)
expect(SiteSetting.ai_spam_detection_enabled).to eq(true)
expect(AiModerationSetting.spam.llm_model_id).to eq(llm_model.id)
expect(AiModerationSetting.spam.ai_persona_id).to eq(
DiscourseAi::Personas::Persona.system_personas[DiscourseAi::Personas::SpamDetector],
)
expect(AiModerationSetting.spam.data["custom_instructions"]).to eq("custom instructions")
end
@ -49,6 +54,33 @@ RSpec.describe DiscourseAi::Admin::AiSpamController do
expect(response.status).to eq(200)
end
it "validates the selected persona has a valid response format" do
ai_persona = Fabricate(:ai_persona, response_format: nil)
put "/admin/plugins/discourse-ai/ai-spam.json",
params: {
is_enabled: true,
llm_model_id: llm_model.id,
ai_persona_id: ai_persona.id,
custom_instructions: "custom instructions",
}
expect(response.status).to eq(422)
ai_persona.update!(response_format: [{ "key" => "spam", "type" => "boolean" }])
put "/admin/plugins/discourse-ai/ai-spam.json",
params: {
is_enabled: true,
llm_model_id: llm_model.id,
ai_persona_id: ai_persona.id,
custom_instructions: "custom instructions",
}
expect(response.status).to eq(200)
expect(AiModerationSetting.spam.ai_persona_id).to eq(ai_persona.id)
end
it "ensures that seeded llm ID is properly passed and allowed" do
seeded_llm = Fabricate(:seeded_model)
@ -158,6 +190,29 @@ RSpec.describe DiscourseAi::Admin::AiSpamController do
expect(history.details).to include("llm_model_id")
end
it "logs staff actio when ai_persona_id changes" do
new_persona =
Fabricate(
:ai_persona,
name: "Updated Persona",
response_format: [{ "key" => "spam", "type" => "boolean" }],
)
put "/admin/plugins/discourse-ai/ai-spam.json", params: { ai_persona_id: new_persona.id }
expect(response.status).to eq(200)
# Verify the log was created with the right subject
history =
UserHistory.where(
action: UserHistory.actions[:custom_staff],
custom_type: "update_ai_spam_settings",
).last
expect(history).to be_present
expect(history.details).to include("ai_persona_id")
expect(history.details).to include(new_persona.name)
end
it "does not log staff action when only is_enabled changes" do
# Check initial count of logs
initial_count =
@ -231,7 +286,7 @@ RSpec.describe DiscourseAi::Admin::AiSpamController do
llm2 = Fabricate(:llm_model, name: "DiffLLM")
DiscourseAi::Completions::Llm.with_prepared_responses(["spam", "just because"]) do
DiscourseAi::Completions::Llm.with_prepared_responses([true, "just because"]) do
post "/admin/plugins/discourse-ai/ai-spam/test.json",
params: {
post_url: spam_post2.url,
@ -247,7 +302,7 @@ RSpec.describe DiscourseAi::Admin::AiSpamController do
end
it "can scan using post id" do
DiscourseAi::Completions::Llm.with_prepared_responses(["spam", "because apples"]) do
DiscourseAi::Completions::Llm.with_prepared_responses([true, "because apples"]) do
post "/admin/plugins/discourse-ai/ai-spam/test.json",
params: {
post_url: spam_post.id.to_s,
@ -272,7 +327,7 @@ RSpec.describe DiscourseAi::Admin::AiSpamController do
AiSpamLog.create!(post: spam_post, llm_model: llm_model, is_spam: true, created_at: 1.day.ago)
DiscourseAi::Completions::Llm.with_prepared_responses(["spam", "because banana"]) do
DiscourseAi::Completions::Llm.with_prepared_responses([true, "because banana"]) do
post "/admin/plugins/discourse-ai/ai-spam/test.json",
params: {
post_url: spam_post.url,