mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-06-28 18:42:16 +00:00
FEATURE: Use Persona's when scanning posts for spam (#1465)
This commit is contained in:
parent
cc4e9e030f
commit
b35f9bcc7c
@ -11,6 +11,13 @@ module DiscourseAi
|
||||
|
||||
def update
|
||||
initial_settings = AiModerationSetting.spam
|
||||
|
||||
initial_data = {
|
||||
custom_instructions: initial_settings&.data&.dig("custom_instructions"),
|
||||
llm_model_id: initial_settings&.llm_model_id,
|
||||
ai_persona_id: initial_settings&.ai_persona_id,
|
||||
}
|
||||
|
||||
initial_custom_instructions = initial_settings&.data&.dig("custom_instructions")
|
||||
initial_llm_model_id = initial_settings&.llm_model_id
|
||||
|
||||
@ -29,6 +36,22 @@ module DiscourseAi
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
if allowed_params.key?(:ai_persona_id)
|
||||
updated_params[:ai_persona_id] = allowed_params[:ai_persona_id]
|
||||
persona = AiPersona.find_by(id: allowed_params[:ai_persona_id])
|
||||
if persona.nil? ||
|
||||
persona.response_format.to_a.none? { |rf|
|
||||
rf["key"] == "spam" && rf["type"] == "boolean"
|
||||
}
|
||||
return(
|
||||
render_json_error(
|
||||
I18n.t("discourse_ai.llm.configuration.invalid_persona_response_format"),
|
||||
status: 422,
|
||||
)
|
||||
)
|
||||
end
|
||||
end
|
||||
updated_params[:data] = {
|
||||
custom_instructions: allowed_params[:custom_instructions],
|
||||
} if allowed_params.key?(:custom_instructions)
|
||||
@ -41,7 +64,7 @@ module DiscourseAi
|
||||
AiModerationSetting.create!(updated_params.merge(setting_type: :spam))
|
||||
end
|
||||
|
||||
log_ai_spam_update(initial_llm_model_id, initial_custom_instructions, allowed_params)
|
||||
log_ai_spam_update(initial_data, allowed_params)
|
||||
end
|
||||
|
||||
is_enabled = ActiveModel::Type::Boolean.new.cast(allowed_params[:is_enabled])
|
||||
@ -119,9 +142,10 @@ module DiscourseAi
|
||||
|
||||
private
|
||||
|
||||
def log_ai_spam_update(initial_llm_model_id, initial_custom_instructions, params)
|
||||
def log_ai_spam_update(initial_data, params)
|
||||
changes_to_log = {}
|
||||
|
||||
initial_llm_model_id = initial_data[:llm_model_id]
|
||||
if params.key?(:llm_model_id) && initial_llm_model_id.to_s != params[:llm_model_id].to_s
|
||||
old_model_name =
|
||||
LlmModel.find_by(id: initial_llm_model_id)&.display_name || initial_llm_model_id
|
||||
@ -131,11 +155,22 @@ module DiscourseAi
|
||||
changes_to_log[:llm_model_id] = "#{old_model_name} → #{new_model_name}"
|
||||
end
|
||||
|
||||
initial_custom_instructions = initial_data[:custom_instructions]
|
||||
if params.key?(:custom_instructions) &&
|
||||
initial_custom_instructions != params[:custom_instructions]
|
||||
changes_to_log[:custom_instructions] = params[:custom_instructions]
|
||||
end
|
||||
|
||||
initial_ai_persona_id = initial_data[:ai_persona_id]
|
||||
if params.key?(:ai_persona_id) && initial_ai_persona_id.to_s != params[:ai_persona_id].to_s
|
||||
old_persona_name =
|
||||
AiPersona.find_by(id: initial_ai_persona_id)&.name || initial_ai_persona_id
|
||||
new_persona_name =
|
||||
AiPersona.find_by(id: params[:ai_persona_id])&.name || params[:ai_persona_id]
|
||||
|
||||
changes_to_log[:ai_persona_id] = "#{old_persona_name} → #{new_persona_name}"
|
||||
end
|
||||
|
||||
if changes_to_log.present?
|
||||
changes_to_log[:subject] = I18n.t("discourse_ai.spam_detection.logging_subject")
|
||||
logger = DiscourseAi::Utils::AiStaffActionLogger.new(current_user)
|
||||
@ -144,7 +179,7 @@ module DiscourseAi
|
||||
end
|
||||
|
||||
def allowed_params
|
||||
params.permit(:is_enabled, :llm_model_id, :custom_instructions)
|
||||
params.permit(:is_enabled, :llm_model_id, :custom_instructions, :ai_persona_id)
|
||||
end
|
||||
|
||||
def spam_config
|
||||
|
@ -1,6 +1,7 @@
|
||||
# frozen_string_literal: true
|
||||
class AiModerationSetting < ActiveRecord::Base
|
||||
belongs_to :llm_model
|
||||
belongs_to :ai_persona
|
||||
|
||||
validates :llm_model_id, presence: true
|
||||
validates :setting_type, presence: true
|
||||
@ -19,12 +20,13 @@ end
|
||||
#
|
||||
# Table name: ai_moderation_settings
|
||||
#
|
||||
# id :bigint not null, primary key
|
||||
# setting_type :enum not null
|
||||
# data :jsonb
|
||||
# llm_model_id :bigint not null
|
||||
# created_at :datetime not null
|
||||
# updated_at :datetime not null
|
||||
# id :bigint not null, primary key
|
||||
# setting_type :enum not null
|
||||
# data :jsonb
|
||||
# llm_model_id :bigint not null
|
||||
# created_at :datetime not null
|
||||
# updated_at :datetime not null
|
||||
# ai_persona_id :bigint default(-31), not null
|
||||
#
|
||||
# Indexes
|
||||
#
|
||||
|
@ -8,7 +8,9 @@ class AiSpamSerializer < ApplicationSerializer
|
||||
:stats,
|
||||
:flagging_username,
|
||||
:spam_score_type,
|
||||
:spam_scanning_user
|
||||
:spam_scanning_user,
|
||||
:ai_persona_id,
|
||||
:available_personas
|
||||
|
||||
def is_enabled
|
||||
object[:enabled]
|
||||
@ -18,6 +20,11 @@ class AiSpamSerializer < ApplicationSerializer
|
||||
settings&.llm_model&.id
|
||||
end
|
||||
|
||||
def ai_persona_id
|
||||
settings&.ai_persona&.id ||
|
||||
DiscourseAi::Personas::Persona.system_personas[DiscourseAi::Personas::SpamDetector]
|
||||
end
|
||||
|
||||
def custom_instructions
|
||||
settings&.custom_instructions
|
||||
end
|
||||
@ -28,6 +35,12 @@ class AiSpamSerializer < ApplicationSerializer
|
||||
.map { |hash| { id: hash[:value], name: hash[:name] } }
|
||||
end
|
||||
|
||||
def available_personas
|
||||
DiscourseAi::Configuration::PersonaEnumerator.values.map do |h|
|
||||
{ id: h[:value], name: h[:name] }
|
||||
end
|
||||
end
|
||||
|
||||
def flagging_username
|
||||
object[:flagging_username]
|
||||
end
|
||||
|
@ -35,6 +35,7 @@ export default class AiSpam extends Component {
|
||||
};
|
||||
@tracked isEnabled = false;
|
||||
@tracked selectedLLM = null;
|
||||
@tracked selectedPersonaId = null;
|
||||
@tracked customInstructions = "";
|
||||
@tracked errors = [];
|
||||
|
||||
@ -98,6 +99,7 @@ export default class AiSpam extends Component {
|
||||
}
|
||||
this.customInstructions = model.custom_instructions;
|
||||
this.stats = model.stats;
|
||||
this.selectedPersonaId = model.ai_persona_id;
|
||||
}
|
||||
|
||||
get availableLLMs() {
|
||||
@ -133,6 +135,11 @@ export default class AiSpam extends Component {
|
||||
this.selectedLLM = value;
|
||||
}
|
||||
|
||||
@action
|
||||
async updatePersona(value) {
|
||||
this.selectedPersonaId = value;
|
||||
}
|
||||
|
||||
@action
|
||||
async save() {
|
||||
try {
|
||||
@ -141,6 +148,7 @@ export default class AiSpam extends Component {
|
||||
data: {
|
||||
llm_model_id: this.llmId,
|
||||
custom_instructions: this.customInstructions,
|
||||
ai_persona_id: this.selectedPersonaId,
|
||||
},
|
||||
});
|
||||
this.toasts.success({
|
||||
@ -256,6 +264,18 @@ export default class AiSpam extends Component {
|
||||
{{/if}}
|
||||
</div>
|
||||
|
||||
<div class="ai-spam__persona">
|
||||
<label class="ai-spam__persona-label">{{i18n
|
||||
"discourse_ai.spam.select_persona"
|
||||
}}</label>
|
||||
<ComboBox
|
||||
@value={{this.selectedPersonaId}}
|
||||
@content={{@model.available_personas}}
|
||||
@onChange={{this.updatePersona}}
|
||||
class="ai-spam__persona-selector"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div class="ai-spam__instructions">
|
||||
<label class="ai-spam__instructions-label">
|
||||
{{i18n "discourse_ai.spam.custom_instructions"}}
|
||||
|
@ -24,12 +24,14 @@
|
||||
|
||||
&__toggle,
|
||||
&__llm,
|
||||
&__persona,
|
||||
&__instructions {
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
|
||||
&__toggle-label,
|
||||
&__llm-label,
|
||||
&__persona-label,
|
||||
&__instructions-label {
|
||||
display: block;
|
||||
margin-bottom: 0.5em;
|
||||
|
@ -260,6 +260,7 @@ en:
|
||||
short_title: "Spam"
|
||||
title: "Configure spam handling"
|
||||
select_llm: "Select LLM"
|
||||
select_persona: "Select persona"
|
||||
custom_instructions: "Custom instructions"
|
||||
custom_instructions_help: "Custom instructions specific to your site to help guide the AI in identifying spam, e.g. 'Be more aggressive about scanning posts not in English'."
|
||||
last_seven_days: "Last 7 days"
|
||||
|
@ -391,6 +391,9 @@ en:
|
||||
short_text_translator:
|
||||
name: "Short text translator"
|
||||
description: "Powers the translation feature by as a generic text translator, used for short texts like category names or tags"
|
||||
spam_detector:
|
||||
name: "Spam detector"
|
||||
description: "Default persona powering our Spam detection feature"
|
||||
|
||||
topic_not_found: "Summary unavailable, topic not found!"
|
||||
summarizing: "Summarizing topic"
|
||||
@ -577,6 +580,7 @@ en:
|
||||
set_llm_first: "Set %{setting} first"
|
||||
model_unreachable: "We couldn't get a response from this model. Check your settings first."
|
||||
invalid_seeded_model: "You can't use this model with this feature"
|
||||
invalid_persona_response_format: "The selected persona must have a response format with a boolean field names \"spam\""
|
||||
must_select_model: "You must select a LLM first"
|
||||
endpoints:
|
||||
not_configured: "%{display_name} (not configured)"
|
||||
|
@ -0,0 +1,6 @@
|
||||
# frozen_string_literal: true
|
||||
class AddPersonaToAiModerationSettings < ActiveRecord::Migration[7.2]
|
||||
def change
|
||||
add_column :ai_moderation_settings, :ai_persona_id, :bigint, null: false, default: -31
|
||||
end
|
||||
end
|
@ -134,23 +134,32 @@ module DiscourseAi
|
||||
|
||||
def self.test_post(post, custom_instructions: nil, llm_id: nil)
|
||||
settings = AiModerationSetting.spam
|
||||
llm_model = llm_id ? LlmModel.find(llm_id) : settings.llm_model
|
||||
llm = llm_model.to_llm
|
||||
custom_instructions = custom_instructions || settings.custom_instructions.presence
|
||||
context = build_context(post, post.topic || Topic.with_deleted.find_by(id: post.topic_id))
|
||||
prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)
|
||||
|
||||
result =
|
||||
llm.generate(
|
||||
prompt,
|
||||
temperature: 0.1,
|
||||
max_tokens: 5,
|
||||
user: Discourse.system_user,
|
||||
target_msg =
|
||||
build_target_content_msg(
|
||||
post,
|
||||
post.topic || Topic.with_deleted.find_by(id: post.topic_id),
|
||||
)
|
||||
custom_insts = custom_instructions || settings.custom_instructions.presence
|
||||
if custom_insts.present?
|
||||
custom_insts =
|
||||
"\n\nAdditional site-specific instructions provided by Staff:\n#{custom_insts}"
|
||||
end
|
||||
|
||||
ctx =
|
||||
build_bot_context(
|
||||
feature_name: "spam_detection_test",
|
||||
feature_context: {
|
||||
post_id: post.id,
|
||||
},
|
||||
)&.strip
|
||||
messages: [target_msg],
|
||||
custom_instructions: custom_insts,
|
||||
)
|
||||
bot = build_scanner_bot(settings: settings, llm_id: llm_id)
|
||||
|
||||
structured_output = nil
|
||||
llm_args = { feature_context: { post_id: post.id } }
|
||||
bot.reply(ctx, llm_args: llm_args) do |partial, _, type|
|
||||
structured_output = partial if type == :structured_output
|
||||
end
|
||||
|
||||
history = nil
|
||||
AiSpamLog
|
||||
@ -169,45 +178,46 @@ module DiscourseAi
|
||||
log << "\n"
|
||||
end
|
||||
|
||||
log << "LLM: #{llm_model.name}\n\n"
|
||||
log << "System Prompt: #{build_system_prompt(custom_instructions)}\n\n"
|
||||
log << "Context: #{context}\n\n"
|
||||
used_llm = bot.model
|
||||
log << "LLM: #{used_llm.name}\n\n"
|
||||
|
||||
is_spam = check_if_spam(result)
|
||||
spam_persona = bot.persona
|
||||
used_prompt = spam_persona.craft_prompt(ctx, llm: used_llm).system_message_text
|
||||
log << "System Prompt: #{used_prompt}\n\n"
|
||||
|
||||
prompt.push(type: :model, content: result)
|
||||
prompt.push(type: :user, content: "Explain your reasoning")
|
||||
text_content =
|
||||
if target_msg[:content].is_a?(Array)
|
||||
target_msg[:content].first
|
||||
else
|
||||
target_msg[:content]
|
||||
end
|
||||
|
||||
reasoning =
|
||||
llm.generate(
|
||||
prompt,
|
||||
temperature: 0.1,
|
||||
max_tokens: 100,
|
||||
user: Discourse.system_user,
|
||||
feature_name: "spam_detection_test",
|
||||
feature_context: {
|
||||
post_id: post.id,
|
||||
},
|
||||
)&.strip
|
||||
log << "Context: #{text_content}\n\n"
|
||||
|
||||
log << "#{reasoning}"
|
||||
is_spam = is_spam?(structured_output)
|
||||
|
||||
reasoning_insts = {
|
||||
type: :user,
|
||||
content: "Don't return a JSON this time. Explain your reasoning in plain text.",
|
||||
}
|
||||
ctx.messages = [
|
||||
target_msg,
|
||||
{ type: :model, content: { spam: is_spam }.to_json },
|
||||
reasoning_insts,
|
||||
]
|
||||
ctx.bypass_response_format = true
|
||||
|
||||
reasoning = +""
|
||||
|
||||
bot.reply(ctx, llm_args: llm_args.merge(max_tokens: 100)) do |partial, _, type|
|
||||
reasoning << partial if type.blank?
|
||||
end
|
||||
|
||||
log << "#{reasoning.strip}"
|
||||
|
||||
{ is_spam: is_spam, log: log }
|
||||
end
|
||||
|
||||
def self.completion_prompt(post, context:, custom_instructions:)
|
||||
system_prompt = build_system_prompt(custom_instructions)
|
||||
prompt = DiscourseAi::Completions::Prompt.new(system_prompt)
|
||||
args = { type: :user, content: context }
|
||||
upload_ids = post.upload_ids
|
||||
if upload_ids.present?
|
||||
args[:content] = [args[:content]]
|
||||
upload_ids.take(3).each { |upload_id| args[:content] << { upload_id: upload_id } }
|
||||
end
|
||||
prompt.push(**args)
|
||||
prompt
|
||||
end
|
||||
|
||||
def self.perform_scan(post)
|
||||
return if !should_scan_post?(post)
|
||||
|
||||
@ -217,29 +227,39 @@ module DiscourseAi
|
||||
def self.perform_scan!(post)
|
||||
return if !enabled?
|
||||
settings = AiModerationSetting.spam
|
||||
return if !settings || !settings.llm_model
|
||||
return if !settings || !settings.llm_model || !settings.ai_persona
|
||||
|
||||
context = build_context(post)
|
||||
llm = settings.llm_model.to_llm
|
||||
target_msg = build_target_content_msg(post)
|
||||
custom_instructions = settings.custom_instructions.presence
|
||||
prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)
|
||||
if custom_instructions.present?
|
||||
custom_instructions =
|
||||
"\n\nAdditional site-specific instructions provided by Staff:\n#{custom_instructions}"
|
||||
end
|
||||
|
||||
ctx =
|
||||
build_bot_context(
|
||||
messages: [target_msg],
|
||||
custom_instructions: custom_instructions,
|
||||
user: self.flagging_user,
|
||||
)
|
||||
bot = build_scanner_bot(settings: settings, user: self.flagging_user)
|
||||
structured_output = nil
|
||||
|
||||
begin
|
||||
result =
|
||||
llm.generate(
|
||||
prompt,
|
||||
temperature: 0.1,
|
||||
max_tokens: 5,
|
||||
user: Discourse.system_user,
|
||||
feature_name: "spam_detection",
|
||||
feature_context: {
|
||||
post_id: post.id,
|
||||
},
|
||||
)&.strip
|
||||
llm_args = { feature_context: { post_id: post.id } }
|
||||
bot.reply(ctx, llm_args: llm_args) do |partial, _, type|
|
||||
structured_output = partial if type == :structured_output
|
||||
end
|
||||
|
||||
is_spam = check_if_spam(result)
|
||||
is_spam = is_spam?(structured_output)
|
||||
|
||||
log = AiApiAuditLog.order(id: :desc).where(feature_name: "spam_detection").first
|
||||
text_content =
|
||||
if target_msg[:content].is_a?(Array)
|
||||
target_msg[:content].first
|
||||
else
|
||||
target_msg[:content]
|
||||
end
|
||||
AiSpamLog.transaction do
|
||||
log =
|
||||
AiSpamLog.create!(
|
||||
@ -247,7 +267,7 @@ module DiscourseAi
|
||||
llm_model: settings.llm_model,
|
||||
ai_api_audit_log: log,
|
||||
is_spam: is_spam,
|
||||
payload: context,
|
||||
payload: text_content,
|
||||
)
|
||||
handle_spam(post, log) if is_spam
|
||||
end
|
||||
@ -273,11 +293,42 @@ module DiscourseAi
|
||||
|
||||
private
|
||||
|
||||
def self.check_if_spam(result)
|
||||
(result.present? && result.strip.downcase.start_with?("spam"))
|
||||
def self.build_bot_context(
|
||||
feature_name: "spam_detection",
|
||||
messages:,
|
||||
custom_instructions: nil,
|
||||
bypass_response_format: false,
|
||||
user: Discourse.system_user
|
||||
)
|
||||
DiscourseAi::Personas::BotContext
|
||||
.new(
|
||||
user: user,
|
||||
skip_tool_details: true,
|
||||
feature_name: feature_name,
|
||||
messages: messages,
|
||||
bypass_response_format: bypass_response_format,
|
||||
)
|
||||
.tap { |ctx| ctx.custom_instructions = custom_instructions if custom_instructions }
|
||||
end
|
||||
|
||||
def self.build_context(post, topic = nil)
|
||||
def self.build_scanner_bot(
|
||||
settings:,
|
||||
use_structured_output: true,
|
||||
llm_id: nil,
|
||||
user: Discourse.system_user
|
||||
)
|
||||
persona = settings.ai_persona.class_instance&.new
|
||||
|
||||
llm_model = llm_id ? LlmModel.find(llm_id) : settings.llm_model
|
||||
|
||||
DiscourseAi::Personas::Bot.as(user, persona: persona, model: llm_model)
|
||||
end
|
||||
|
||||
def self.is_spam?(structured_output)
|
||||
structured_output.present? && structured_output.read_buffered_property(:spam)
|
||||
end
|
||||
|
||||
def self.build_target_content_msg(post, topic = nil)
|
||||
topic ||= post.topic
|
||||
context = []
|
||||
|
||||
@ -318,7 +369,16 @@ module DiscourseAi
|
||||
|
||||
context << "\nPost Content (first #{MAX_RAW_SCAN_LENGTH} chars):\n"
|
||||
context << post.raw[0..MAX_RAW_SCAN_LENGTH]
|
||||
context.join("\n")
|
||||
|
||||
user_msg = { type: :user, content: context.join("\n") }
|
||||
|
||||
upload_ids = post.upload_ids
|
||||
if upload_ids.present?
|
||||
user_msg[:content] = [user_msg[:content]]
|
||||
upload_ids.take(3).each { |upload_id| user_msg[:content] << { upload_id: upload_id } }
|
||||
end
|
||||
|
||||
user_msg
|
||||
end
|
||||
|
||||
def self.location_info(user)
|
||||
@ -348,53 +408,6 @@ module DiscourseAi
|
||||
nil
|
||||
end
|
||||
|
||||
def self.build_system_prompt(custom_instructions)
|
||||
base_prompt = +<<~PROMPT
|
||||
You are a spam detection system. Analyze the following post content and context.
|
||||
Respond with "SPAM" if the post is spam, or "NOT_SPAM" if it's legitimate.
|
||||
|
||||
- ALWAYS lead your reply with the word SPAM or NOT_SPAM - you are consumed via an API
|
||||
|
||||
Consider the post type carefully:
|
||||
- For REPLY posts: Check if the response is relevant and topical to the thread
|
||||
- For NEW TOPIC posts: Check if it's a legitimate topic or spam promotion
|
||||
|
||||
A post is spam if it matches any of these criteria:
|
||||
- Contains unsolicited commercial content or promotions
|
||||
- Has suspicious or unrelated external links
|
||||
- Shows patterns of automated/bot posting
|
||||
- Contains irrelevant content or advertisements
|
||||
- For replies: Completely unrelated to the discussion thread
|
||||
- Uses excessive keywords or repetitive text patterns
|
||||
- Shows suspicious formatting or character usage
|
||||
|
||||
Be especially strict with:
|
||||
- Replies that ignore the previous conversation
|
||||
- Posts containing multiple unrelated external links
|
||||
- Generic responses that could be posted anywhere
|
||||
|
||||
Be fair to:
|
||||
- New users making legitimate first contributions
|
||||
- Non-native speakers making genuine efforts to participate
|
||||
- Topic-relevant product mentions in appropriate contexts
|
||||
PROMPT
|
||||
|
||||
base_prompt << "\n\n"
|
||||
base_prompt << <<~SITE_SPECIFIC
|
||||
Site Specific Information:
|
||||
- Site name: #{SiteSetting.title}
|
||||
- Site URL: #{Discourse.base_url}
|
||||
- Site description: #{SiteSetting.site_description}
|
||||
- Site top 10 categories: #{Category.where(read_restricted: false).order(posts_year: :desc).limit(10).pluck(:name).join(", ")}
|
||||
SITE_SPECIFIC
|
||||
|
||||
if custom_instructions.present?
|
||||
base_prompt << "\n\nAdditional site-specific instructions provided by Staff:\n#{custom_instructions}"
|
||||
end
|
||||
|
||||
base_prompt
|
||||
end
|
||||
|
||||
def self.handle_spam(post, log)
|
||||
url = "#{Discourse.base_url}/admin/plugins/discourse-ai/ai-spam"
|
||||
reason = I18n.t("discourse_ai.spam_detection.flag_reason", url: url)
|
||||
|
@ -5,7 +5,7 @@ module DiscourseAi
|
||||
class Prompt
|
||||
INVALID_TURN = Class.new(StandardError)
|
||||
|
||||
attr_reader :messages, :tools
|
||||
attr_reader :messages, :tools, :system_message_text
|
||||
attr_accessor :topic_id, :post_id, :max_pixels, :tool_choice
|
||||
|
||||
def initialize(
|
||||
@ -28,8 +28,10 @@ module DiscourseAi
|
||||
@messages = []
|
||||
|
||||
if system_message_text
|
||||
system_message = { type: :system, content: system_message_text }
|
||||
@messages << system_message
|
||||
@system_message_text = system_message_text
|
||||
@messages << { type: :system, content: @system_message_text }
|
||||
else
|
||||
@system_message_text = messages.find { |m| m[:type] == :system }&.dig(:content)
|
||||
end
|
||||
|
||||
@messages.concat(messages)
|
||||
|
@ -37,7 +37,7 @@ module DiscourseAi
|
||||
end
|
||||
|
||||
# Maybe we haven't read that part of the JSON yet.
|
||||
return nil if @tracked[prop_name].blank?
|
||||
return nil if @tracked[prop_name].nil?
|
||||
|
||||
# This means this property is a string and we want to return unread chunks.
|
||||
if @property_cursors[prop_name].present?
|
||||
|
@ -3,8 +3,6 @@
|
||||
module DiscourseAi
|
||||
module Personas
|
||||
class Bot
|
||||
attr_reader :model
|
||||
|
||||
BOT_NOT_FOUND = Class.new(StandardError)
|
||||
|
||||
# the future is agentic, allow for more turns
|
||||
@ -24,7 +22,7 @@ module DiscourseAi
|
||||
model || self.class.guess_model(bot_user) || LlmModel.find(@persona.class.default_llm_id)
|
||||
end
|
||||
|
||||
attr_reader :bot_user
|
||||
attr_reader :bot_user, :model
|
||||
attr_accessor :persona
|
||||
|
||||
def llm
|
||||
@ -69,9 +67,10 @@ module DiscourseAi
|
||||
llm_kwargs[:user] = user
|
||||
llm_kwargs[:temperature] = persona.temperature if persona.temperature
|
||||
llm_kwargs[:top_p] = persona.top_p if persona.top_p
|
||||
llm_kwargs[:response_format] = build_json_schema(
|
||||
persona.response_format,
|
||||
) if persona.response_format.present?
|
||||
|
||||
if !context.bypass_response_format && persona.response_format.present?
|
||||
llm_kwargs[:response_format] = build_json_schema(persona.response_format)
|
||||
end
|
||||
|
||||
needs_newlines = false
|
||||
tools_ran = 0
|
||||
|
@ -21,7 +21,8 @@ module DiscourseAi
|
||||
:inferred_concepts,
|
||||
:format_dates,
|
||||
:temporal_context,
|
||||
:user_language
|
||||
:user_language,
|
||||
:bypass_response_format
|
||||
|
||||
def initialize(
|
||||
post: nil,
|
||||
@ -42,7 +43,8 @@ module DiscourseAi
|
||||
resource_url: nil,
|
||||
cancel_manager: nil,
|
||||
inferred_concepts: [],
|
||||
format_dates: false
|
||||
format_dates: false,
|
||||
bypass_response_format: false
|
||||
)
|
||||
@participants = participants
|
||||
@user = user
|
||||
@ -66,6 +68,8 @@ module DiscourseAi
|
||||
|
||||
@cancel_manager = cancel_manager
|
||||
|
||||
@bypass_response_format = bypass_response_format
|
||||
|
||||
if post
|
||||
@post_id = post.id
|
||||
@topic_id = post.topic_id
|
||||
@ -93,6 +97,7 @@ module DiscourseAi
|
||||
inferred_concepts
|
||||
user_language
|
||||
temporal_context
|
||||
top_categories
|
||||
]
|
||||
|
||||
def lookup_template_param(key)
|
||||
@ -119,6 +124,16 @@ module DiscourseAi
|
||||
@private_message
|
||||
end
|
||||
|
||||
def top_categories
|
||||
@top_categories ||=
|
||||
Category
|
||||
.where(read_restricted: false)
|
||||
.order(posts_year: :desc)
|
||||
.limit(10)
|
||||
.pluck(:name)
|
||||
.join(", ")
|
||||
end
|
||||
|
||||
def to_json
|
||||
{
|
||||
messages: @messages,
|
||||
@ -142,6 +157,8 @@ module DiscourseAi
|
||||
inferred_concepts: @inferred_concepts,
|
||||
user_language: @user_language,
|
||||
temporal_context: @temporal_context,
|
||||
top_categories: @top_categories,
|
||||
bypass_response_format: @bypass_response_format,
|
||||
}
|
||||
end
|
||||
end
|
||||
|
@ -68,6 +68,7 @@ module DiscourseAi
|
||||
PostRawTranslator => -28,
|
||||
TopicTitleTranslator => -29,
|
||||
ShortTextTranslator => -30,
|
||||
SpamDetector => -31,
|
||||
}
|
||||
end
|
||||
|
||||
|
62
lib/personas/spam_detector.rb
Normal file
62
lib/personas/spam_detector.rb
Normal file
@ -0,0 +1,62 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Personas
|
||||
class SpamDetector < Persona
|
||||
def self.default_enabled
|
||||
false
|
||||
end
|
||||
|
||||
def temperature
|
||||
0.1
|
||||
end
|
||||
|
||||
def system_prompt
|
||||
<<~PROMPT
|
||||
You are a spam detection system. Analyze the following post content and context.
|
||||
|
||||
Consider the post type carefully:
|
||||
- For REPLY posts: Check if the response is relevant and topical to the thread
|
||||
- For NEW TOPIC posts: Check if it's a legitimate topic or spam promotion
|
||||
|
||||
A post is spam if it matches any of these criteria:
|
||||
- Contains unsolicited commercial content or promotions
|
||||
- Has suspicious or unrelated external links
|
||||
- Shows patterns of automated/bot posting
|
||||
- Contains irrelevant content or advertisements
|
||||
- For replies: Completely unrelated to the discussion thread
|
||||
- Uses excessive keywords or repetitive text patterns
|
||||
- Shows suspicious formatting or character usage
|
||||
|
||||
Be especially strict with:
|
||||
- Replies that ignore the previous conversation
|
||||
- Posts containing multiple unrelated external links
|
||||
- Generic responses that could be posted anywhere
|
||||
|
||||
Be fair to:
|
||||
- New users making legitimate first contributions
|
||||
- Non-native speakers making genuine efforts to participate
|
||||
- Topic-relevant product mentions in appropriate contexts
|
||||
|
||||
Site Specific Information:
|
||||
- Site name: {site_title}
|
||||
- Site URL: {site_url}
|
||||
- Site description: {site_description}
|
||||
- Site top 10 categories: {top_categories}
|
||||
|
||||
Format your response as a JSON object with a one key named "spam", which indicates if a post is spam or legitimate.
|
||||
Your output should be in the following format:
|
||||
<output>
|
||||
{"spam": "xx"}
|
||||
</output>
|
||||
|
||||
Where "xx" is true if the post is spam, or false if it's legitimate.
|
||||
PROMPT
|
||||
end
|
||||
|
||||
def response_format
|
||||
[{ "key" => "spam", "type" => "boolean" }]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
@ -248,7 +248,7 @@ RSpec.describe DiscourseAi::AiModeration::SpamScanner do
|
||||
prompts = nil
|
||||
result =
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses(
|
||||
["spam", "the reason is just because"],
|
||||
[true, "the reason is just because"],
|
||||
) do |_, _, _prompts|
|
||||
prompts = _prompts
|
||||
described_class.test_post(post, custom_instructions: "123")
|
||||
@ -261,7 +261,7 @@ RSpec.describe DiscourseAi::AiModeration::SpamScanner do
|
||||
|
||||
result =
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses(
|
||||
["not_spam", "the reason is just because"],
|
||||
[false, "the reason is just because"],
|
||||
) do |_, _, _prompts|
|
||||
prompts = _prompts
|
||||
described_class.test_post(post, custom_instructions: "123")
|
||||
@ -284,7 +284,7 @@ RSpec.describe DiscourseAi::AiModeration::SpamScanner do
|
||||
described_class.new_post(post)
|
||||
|
||||
prompt = nil
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses(["spam"]) do |_, _, _prompts|
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([true]) do |_, _, _prompts|
|
||||
# force a rebake so we actually scan
|
||||
post.rebake!
|
||||
prompt = _prompts.first
|
||||
@ -336,7 +336,7 @@ RSpec.describe DiscourseAi::AiModeration::SpamScanner do
|
||||
|
||||
described_class.new_post(post)
|
||||
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses(["spam"]) do |_, _, _prompts|
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([true]) do |_, _, _prompts|
|
||||
# force a rebake so we actually scan
|
||||
post.rebake!
|
||||
end
|
||||
@ -364,7 +364,7 @@ RSpec.describe DiscourseAi::AiModeration::SpamScanner do
|
||||
|
||||
prompts = nil
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses(
|
||||
["spam", "just because"],
|
||||
[true, "just because"],
|
||||
) do |_, _, _prompts|
|
||||
prompts = _prompts
|
||||
described_class.test_post(post)
|
||||
|
@ -16,12 +16,17 @@ RSpec.describe DiscourseAi::Admin::AiSpamController do
|
||||
params: {
|
||||
is_enabled: true,
|
||||
llm_model_id: llm_model.id,
|
||||
ai_persona_id:
|
||||
DiscourseAi::Personas::Persona.system_personas[DiscourseAi::Personas::SpamDetector],
|
||||
custom_instructions: "custom instructions",
|
||||
}
|
||||
|
||||
expect(response.status).to eq(200)
|
||||
expect(SiteSetting.ai_spam_detection_enabled).to eq(true)
|
||||
expect(AiModerationSetting.spam.llm_model_id).to eq(llm_model.id)
|
||||
expect(AiModerationSetting.spam.ai_persona_id).to eq(
|
||||
DiscourseAi::Personas::Persona.system_personas[DiscourseAi::Personas::SpamDetector],
|
||||
)
|
||||
expect(AiModerationSetting.spam.data["custom_instructions"]).to eq("custom instructions")
|
||||
end
|
||||
|
||||
@ -49,6 +54,33 @@ RSpec.describe DiscourseAi::Admin::AiSpamController do
|
||||
expect(response.status).to eq(200)
|
||||
end
|
||||
|
||||
it "validates the selected persona has a valid response format" do
|
||||
ai_persona = Fabricate(:ai_persona, response_format: nil)
|
||||
|
||||
put "/admin/plugins/discourse-ai/ai-spam.json",
|
||||
params: {
|
||||
is_enabled: true,
|
||||
llm_model_id: llm_model.id,
|
||||
ai_persona_id: ai_persona.id,
|
||||
custom_instructions: "custom instructions",
|
||||
}
|
||||
|
||||
expect(response.status).to eq(422)
|
||||
|
||||
ai_persona.update!(response_format: [{ "key" => "spam", "type" => "boolean" }])
|
||||
|
||||
put "/admin/plugins/discourse-ai/ai-spam.json",
|
||||
params: {
|
||||
is_enabled: true,
|
||||
llm_model_id: llm_model.id,
|
||||
ai_persona_id: ai_persona.id,
|
||||
custom_instructions: "custom instructions",
|
||||
}
|
||||
|
||||
expect(response.status).to eq(200)
|
||||
expect(AiModerationSetting.spam.ai_persona_id).to eq(ai_persona.id)
|
||||
end
|
||||
|
||||
it "ensures that seeded llm ID is properly passed and allowed" do
|
||||
seeded_llm = Fabricate(:seeded_model)
|
||||
|
||||
@ -158,6 +190,29 @@ RSpec.describe DiscourseAi::Admin::AiSpamController do
|
||||
expect(history.details).to include("llm_model_id")
|
||||
end
|
||||
|
||||
it "logs staff actio when ai_persona_id changes" do
|
||||
new_persona =
|
||||
Fabricate(
|
||||
:ai_persona,
|
||||
name: "Updated Persona",
|
||||
response_format: [{ "key" => "spam", "type" => "boolean" }],
|
||||
)
|
||||
|
||||
put "/admin/plugins/discourse-ai/ai-spam.json", params: { ai_persona_id: new_persona.id }
|
||||
|
||||
expect(response.status).to eq(200)
|
||||
|
||||
# Verify the log was created with the right subject
|
||||
history =
|
||||
UserHistory.where(
|
||||
action: UserHistory.actions[:custom_staff],
|
||||
custom_type: "update_ai_spam_settings",
|
||||
).last
|
||||
expect(history).to be_present
|
||||
expect(history.details).to include("ai_persona_id")
|
||||
expect(history.details).to include(new_persona.name)
|
||||
end
|
||||
|
||||
it "does not log staff action when only is_enabled changes" do
|
||||
# Check initial count of logs
|
||||
initial_count =
|
||||
@ -231,7 +286,7 @@ RSpec.describe DiscourseAi::Admin::AiSpamController do
|
||||
|
||||
llm2 = Fabricate(:llm_model, name: "DiffLLM")
|
||||
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses(["spam", "just because"]) do
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([true, "just because"]) do
|
||||
post "/admin/plugins/discourse-ai/ai-spam/test.json",
|
||||
params: {
|
||||
post_url: spam_post2.url,
|
||||
@ -247,7 +302,7 @@ RSpec.describe DiscourseAi::Admin::AiSpamController do
|
||||
end
|
||||
|
||||
it "can scan using post id" do
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses(["spam", "because apples"]) do
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([true, "because apples"]) do
|
||||
post "/admin/plugins/discourse-ai/ai-spam/test.json",
|
||||
params: {
|
||||
post_url: spam_post.id.to_s,
|
||||
@ -272,7 +327,7 @@ RSpec.describe DiscourseAi::Admin::AiSpamController do
|
||||
|
||||
AiSpamLog.create!(post: spam_post, llm_model: llm_model, is_spam: true, created_at: 1.day.ago)
|
||||
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses(["spam", "because banana"]) do
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([true, "because banana"]) do
|
||||
post "/admin/plugins/discourse-ai/ai-spam/test.json",
|
||||
params: {
|
||||
post_url: spam_post.url,
|
||||
|
Loading…
x
Reference in New Issue
Block a user