mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-06-30 19:42:17 +00:00
Previously staff and bots would get scanned if TL was low Additionally if somehow spam scanner user was blocked (deactivated, silenced, banned) it would stop the feature from working This adds an override that ensures unconditionally the user is setup correctly prior to scanning
454 lines
15 KiB
Ruby
454 lines
15 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module DiscourseAi
|
|
module AiModeration
|
|
class SpamScanner
|
|
POSTS_TO_SCAN = 3
|
|
MINIMUM_EDIT_DIFFERENCE = 10
|
|
EDIT_DELAY_MINUTES = 10
|
|
MAX_AGE_TO_SCAN = 1.day
|
|
MAX_RAW_SCAN_LENGTH = 5000
|
|
|
|
SHOULD_SCAN_POST_CUSTOM_FIELD = "discourse_ai_should_scan_post"
|
|
|
|
def self.new_post(post)
|
|
return if !enabled?
|
|
return if !should_scan_post?(post)
|
|
|
|
flag_post_for_scanning(post)
|
|
end
|
|
|
|
def self.ensure_flagging_user!
|
|
if !SiteSetting.ai_spam_detection_user_id.present?
|
|
User.transaction do
|
|
# prefer a "high" id for this bot
|
|
id = User.where("id > -20").minimum(:id) - 1
|
|
id = User.minimum(:id) - 1 if id == -100
|
|
|
|
user =
|
|
User.create!(
|
|
id: id,
|
|
username: UserNameSuggester.suggest("discourse_ai_spam"),
|
|
name: "Discourse AI Spam Scanner",
|
|
email: "#{SecureRandom.hex(10)}@invalid.invalid",
|
|
active: true,
|
|
approved: true,
|
|
trust_level: TrustLevel[4],
|
|
admin: true,
|
|
)
|
|
Group.user_trust_level_change!(user.id, user.trust_level)
|
|
|
|
SiteSetting.ai_spam_detection_user_id = user.id
|
|
end
|
|
end
|
|
end
|
|
|
|
def self.flagging_user
|
|
user = nil
|
|
if SiteSetting.ai_spam_detection_user_id.present?
|
|
user = User.find_by(id: SiteSetting.ai_spam_detection_user_id)
|
|
ensure_safe_flagging_user!(user)
|
|
end
|
|
user || Discourse.system_user
|
|
end
|
|
|
|
def self.ensure_safe_flagging_user!(user)
|
|
# only do repair on bot users, if somehow it is set to a human skip repairs
|
|
return if !user.bot?
|
|
user.update!(silenced_till: nil) if user.silenced?
|
|
user.update!(trust_level: TrustLevel[4]) if user.trust_level != TrustLevel[4]
|
|
user.update!(suspended_till: nil, suspended_at: nil) if user.suspended?
|
|
user.update!(active: true) if !user.active?
|
|
end
|
|
|
|
def self.after_cooked_post(post)
|
|
return if !enabled?
|
|
return if !should_scan_post?(post)
|
|
return if !post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD]
|
|
return if post.updated_at < MAX_AGE_TO_SCAN.ago
|
|
|
|
last_scan = AiSpamLog.where(post_id: post.id).order(created_at: :desc).first
|
|
|
|
if last_scan && last_scan.created_at > EDIT_DELAY_MINUTES.minutes.ago
|
|
delay_minutes =
|
|
((last_scan.created_at + EDIT_DELAY_MINUTES.minutes) - Time.current).to_i / 60
|
|
Jobs.enqueue_in(delay_minutes.minutes, :ai_spam_scan, post_id: post.id)
|
|
else
|
|
Jobs.enqueue(:ai_spam_scan, post_id: post.id)
|
|
end
|
|
end
|
|
|
|
def self.edited_post(post)
|
|
return if !enabled?
|
|
return if !should_scan_post?(post)
|
|
return if scanned_max_times?(post)
|
|
|
|
previous_version = post.revisions.last&.modifications&.dig("raw", 0)
|
|
current_version = post.raw
|
|
|
|
return if !significant_change?(previous_version, current_version)
|
|
|
|
flag_post_for_scanning(post)
|
|
end
|
|
|
|
def self.flag_post_for_scanning(post)
|
|
post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD] = "true"
|
|
post.save_custom_fields
|
|
end
|
|
|
|
def self.enabled?
|
|
SiteSetting.ai_spam_detection_enabled && SiteSetting.discourse_ai_enabled
|
|
end
|
|
|
|
def self.should_scan_post?(post)
|
|
return false if !post.present?
|
|
return false if post.user.trust_level > TrustLevel[1]
|
|
return false if post.topic.private_message?
|
|
return false if post.user.bot?
|
|
return false if post.user.staff?
|
|
|
|
if Post
|
|
.where(user_id: post.user_id)
|
|
.joins(:topic)
|
|
.where(topic: { archetype: Archetype.default })
|
|
.limit(4)
|
|
.count > 3
|
|
return false
|
|
end
|
|
true
|
|
end
|
|
|
|
def self.scanned_max_times?(post)
|
|
AiSpamLog.where(post_id: post.id).count >= 3
|
|
end
|
|
|
|
def self.significant_change?(previous_version, current_version)
|
|
return true if previous_version.nil? # First edit should be scanned
|
|
|
|
# Use Discourse's built-in levenshtein implementation
|
|
distance =
|
|
ScreenedEmail.levenshtein(previous_version.to_s[0...1000], current_version.to_s[0...1000])
|
|
|
|
distance >= MINIMUM_EDIT_DIFFERENCE
|
|
end
|
|
|
|
def self.test_post(post, custom_instructions: nil, llm_id: nil)
|
|
settings = AiModerationSetting.spam
|
|
llm_model = llm_id ? LlmModel.find(llm_id) : settings.llm_model
|
|
llm = llm_model.to_llm
|
|
custom_instructions = custom_instructions || settings.custom_instructions.presence
|
|
context = build_context(post, post.topic || Topic.with_deleted.find_by(id: post.topic_id))
|
|
prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)
|
|
|
|
result =
|
|
llm.generate(
|
|
prompt,
|
|
temperature: 0.1,
|
|
max_tokens: 5,
|
|
user: Discourse.system_user,
|
|
feature_name: "spam_detection_test",
|
|
feature_context: {
|
|
post_id: post.id,
|
|
},
|
|
)&.strip
|
|
|
|
history = nil
|
|
AiSpamLog
|
|
.where(post: post)
|
|
.order(:created_at)
|
|
.limit(100)
|
|
.each do |log|
|
|
history ||= +"Scan History:\n"
|
|
history << "date: #{log.created_at} is_spam: #{log.is_spam}\n"
|
|
end
|
|
|
|
log = +"Scanning #{post.url}\n\n"
|
|
|
|
if history
|
|
log << history
|
|
log << "\n"
|
|
end
|
|
|
|
log << "LLM: #{llm_model.name}\n\n"
|
|
log << "System Prompt: #{build_system_prompt(custom_instructions)}\n\n"
|
|
log << "Context: #{context}\n\n"
|
|
|
|
is_spam = check_if_spam(result)
|
|
|
|
prompt.push(type: :model, content: result)
|
|
prompt.push(type: :user, content: "Explain your reasoning")
|
|
|
|
reasoning =
|
|
llm.generate(
|
|
prompt,
|
|
temperature: 0.1,
|
|
max_tokens: 100,
|
|
user: Discourse.system_user,
|
|
feature_name: "spam_detection_test",
|
|
feature_context: {
|
|
post_id: post.id,
|
|
},
|
|
)&.strip
|
|
|
|
log << "#{reasoning}"
|
|
|
|
{ is_spam: is_spam, log: log }
|
|
end
|
|
|
|
def self.completion_prompt(post, context:, custom_instructions:)
|
|
system_prompt = build_system_prompt(custom_instructions)
|
|
prompt = DiscourseAi::Completions::Prompt.new(system_prompt)
|
|
args = { type: :user, content: context }
|
|
upload_ids = post.upload_ids
|
|
if upload_ids.present?
|
|
args[:content] = [args[:content]]
|
|
upload_ids.take(3).each { |upload_id| args[:content] << { upload_id: upload_id } }
|
|
end
|
|
prompt.push(**args)
|
|
prompt
|
|
end
|
|
|
|
def self.perform_scan(post)
|
|
return if !should_scan_post?(post)
|
|
|
|
perform_scan!(post)
|
|
end
|
|
|
|
def self.perform_scan!(post)
|
|
return if !enabled?
|
|
settings = AiModerationSetting.spam
|
|
return if !settings || !settings.llm_model
|
|
|
|
context = build_context(post)
|
|
llm = settings.llm_model.to_llm
|
|
custom_instructions = settings.custom_instructions.presence
|
|
prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)
|
|
|
|
begin
|
|
result =
|
|
llm.generate(
|
|
prompt,
|
|
temperature: 0.1,
|
|
max_tokens: 5,
|
|
user: Discourse.system_user,
|
|
feature_name: "spam_detection",
|
|
feature_context: {
|
|
post_id: post.id,
|
|
},
|
|
)&.strip
|
|
|
|
is_spam = check_if_spam(result)
|
|
|
|
log = AiApiAuditLog.order(id: :desc).where(feature_name: "spam_detection").first
|
|
AiSpamLog.transaction do
|
|
log =
|
|
AiSpamLog.create!(
|
|
post: post,
|
|
llm_model: settings.llm_model,
|
|
ai_api_audit_log: log,
|
|
is_spam: is_spam,
|
|
payload: context,
|
|
)
|
|
handle_spam(post, log) if is_spam
|
|
end
|
|
rescue StandardError => e
|
|
# we need retries otherwise stuff will not be handled
|
|
Discourse.warn_exception(
|
|
e,
|
|
message: "Discourse AI: Error in SpamScanner for post #{post.id}",
|
|
)
|
|
raise e
|
|
end
|
|
end
|
|
|
|
def self.fix_spam_scanner_not_admin
|
|
user = DiscourseAi::AiModeration::SpamScanner.flagging_user
|
|
|
|
if user.present?
|
|
user.update!(admin: true)
|
|
else
|
|
raise Discourse::NotFound
|
|
end
|
|
end
|
|
|
|
private
|
|
|
|
def self.check_if_spam(result)
|
|
(result.present? && result.strip.downcase.start_with?("spam"))
|
|
end
|
|
|
|
def self.build_context(post, topic = nil)
|
|
topic ||= post.topic
|
|
context = []
|
|
|
|
# Clear distinction between reply and new topic
|
|
if post.is_first_post?
|
|
context << "NEW TOPIC POST ANALYSIS"
|
|
context << "- Topic title: #{topic.title}"
|
|
context << "- Category: #{topic.category&.name}"
|
|
else
|
|
context << "REPLY POST ANALYSIS"
|
|
context << "- In topic: #{topic.title}"
|
|
context << "- Category: #{topic.category&.name}"
|
|
context << "- Topic started by: #{topic.user&.username}"
|
|
|
|
if post.reply_to_post_number.present?
|
|
parent =
|
|
Post.with_deleted.find_by(topic_id: topic.id, post_number: post.reply_to_post_number)
|
|
if parent
|
|
context << "\nReplying to #{parent.user&.username}'s post:"
|
|
context << "#{parent.raw[0..500]}..." if parent.raw.length > 500
|
|
context << parent.raw if parent.raw.length <= 500
|
|
end
|
|
end
|
|
end
|
|
|
|
context << "\nPost Author Information:"
|
|
if user = post.user # during test we may not have a user
|
|
context << "- Username: #{user.username}\n"
|
|
context << "- Email: #{user.email}\n"
|
|
context << "- Account age: #{(Time.current - user.created_at).to_i / 86_400} days\n"
|
|
context << "- Total posts: #{user.post_count}\n"
|
|
context << "- Trust level: #{user.trust_level}\n"
|
|
if info = location_info(user)
|
|
context << "- Registration Location: #{info[:registration]}\n" if info[:registration]
|
|
context << "- Last Location: #{info[:last]}\n" if info[:last]
|
|
end
|
|
end
|
|
|
|
context << "\nPost Content (first #{MAX_RAW_SCAN_LENGTH} chars):\n"
|
|
context << post.raw[0..MAX_RAW_SCAN_LENGTH]
|
|
context.join("\n")
|
|
end
|
|
|
|
def self.location_info(user)
|
|
registration, last = nil
|
|
if user.ip_address.present?
|
|
info = DiscourseIpInfo.get(user.ip_address, resolve_hostname: true)
|
|
last = "#{info[:location]} (#{info[:organization]})" if info && info[:location].present?
|
|
end
|
|
if user.registration_ip_address.present?
|
|
info = DiscourseIpInfo.get(user.registration_ip_address, resolve_hostname: true)
|
|
registration = "#{info[:location]} (#{info[:organization]})" if info &&
|
|
info[:location].present?
|
|
end
|
|
|
|
rval = nil
|
|
if registration || last
|
|
rval = { registration: registration } if registration
|
|
if last && last != registration
|
|
rval ||= {}
|
|
rval[:last] = last
|
|
end
|
|
end
|
|
|
|
rval
|
|
rescue => e
|
|
Discourse.warn_exception(e, message: "Failed to lookup location info")
|
|
nil
|
|
end
|
|
|
|
def self.build_system_prompt(custom_instructions)
|
|
base_prompt = +<<~PROMPT
|
|
You are a spam detection system. Analyze the following post content and context.
|
|
Respond with "SPAM" if the post is spam, or "NOT_SPAM" if it's legitimate.
|
|
|
|
- ALWAYS lead your reply with the word SPAM or NOT_SPAM - you are consumed via an API
|
|
|
|
Consider the post type carefully:
|
|
- For REPLY posts: Check if the response is relevant and topical to the thread
|
|
- For NEW TOPIC posts: Check if it's a legitimate topic or spam promotion
|
|
|
|
A post is spam if it matches any of these criteria:
|
|
- Contains unsolicited commercial content or promotions
|
|
- Has suspicious or unrelated external links
|
|
- Shows patterns of automated/bot posting
|
|
- Contains irrelevant content or advertisements
|
|
- For replies: Completely unrelated to the discussion thread
|
|
- Uses excessive keywords or repetitive text patterns
|
|
- Shows suspicious formatting or character usage
|
|
|
|
Be especially strict with:
|
|
- Replies that ignore the previous conversation
|
|
- Posts containing multiple unrelated external links
|
|
- Generic responses that could be posted anywhere
|
|
|
|
Be fair to:
|
|
- New users making legitimate first contributions
|
|
- Non-native speakers making genuine efforts to participate
|
|
- Topic-relevant product mentions in appropriate contexts
|
|
PROMPT
|
|
|
|
base_prompt << "\n\n"
|
|
base_prompt << <<~SITE_SPECIFIC
|
|
Site Specific Information:
|
|
- Site name: #{SiteSetting.title}
|
|
- Site URL: #{Discourse.base_url}
|
|
- Site description: #{SiteSetting.site_description}
|
|
- Site top 10 categories: #{Category.where(read_restricted: false).order(posts_year: :desc).limit(10).pluck(:name).join(", ")}
|
|
SITE_SPECIFIC
|
|
|
|
if custom_instructions.present?
|
|
base_prompt << "\n\nAdditional site-specific instructions provided by Staff:\n#{custom_instructions}"
|
|
end
|
|
|
|
base_prompt
|
|
end
|
|
|
|
def self.handle_spam(post, log)
|
|
url = "#{Discourse.base_url}/admin/plugins/discourse-ai/ai-spam"
|
|
reason = I18n.t("discourse_ai.spam_detection.flag_reason", url: url)
|
|
|
|
flagging_user = self.flagging_user
|
|
|
|
result =
|
|
PostActionCreator.new(
|
|
flagging_user,
|
|
post,
|
|
PostActionType.types[:spam],
|
|
reason: reason,
|
|
queue_for_review: true,
|
|
).perform
|
|
|
|
# Currently in core re-flagging something that is already flagged as spam
|
|
# is not supported, long term we may want to support this but in the meantime
|
|
# we should not be silencing/hiding if the PostActionCreator fails.
|
|
if result.success?
|
|
log.update!(reviewable: result.reviewable)
|
|
|
|
reason = I18n.t("discourse_ai.spam_detection.silence_reason", url: url)
|
|
silencer =
|
|
UserSilencer.new(
|
|
post.user,
|
|
flagging_user,
|
|
message: :too_many_spam_flags,
|
|
post_id: post.id,
|
|
reason: reason,
|
|
keep_posts: true,
|
|
)
|
|
silencer.silence
|
|
|
|
# silencer will not hide tl1 posts, so we do this here
|
|
hide_post(post)
|
|
else
|
|
log.update!(
|
|
error:
|
|
"unable to flag post as spam, post action failed for post #{post.id} with error: '#{result.errors.full_messages.join(", ").truncate(3000)}'",
|
|
)
|
|
end
|
|
end
|
|
|
|
def self.hide_post(post)
|
|
Post.where(id: post.id).update_all(
|
|
[
|
|
"hidden = true, hidden_reason_id = COALESCE(hidden_reason_id, ?)",
|
|
Post.hidden_reasons[:new_user_spam_threshold_reached],
|
|
],
|
|
)
|
|
|
|
Topic.where(id: post.topic_id).update_all(visible: false) if post.post_number == 1
|
|
end
|
|
end
|
|
end
|
|
end
|