372 lines
12 KiB
Ruby
372 lines
12 KiB
Ruby
|
# frozen_string_literal: true
|
||
|
|
||
|
module DiscourseAi
|
||
|
module AiModeration
|
||
|
class SpamScanner
|
||
|
POSTS_TO_SCAN = 3
|
||
|
MINIMUM_EDIT_DIFFERENCE = 10
|
||
|
EDIT_DELAY_MINUTES = 10
|
||
|
MAX_AGE_TO_SCAN = 1.day
|
||
|
MAX_RAW_SCAN_LENGTH = 5000
|
||
|
|
||
|
SHOULD_SCAN_POST_CUSTOM_FIELD = "discourse_ai_should_scan_post"
|
||
|
|
||
|
def self.new_post(post)
|
||
|
return if !enabled?
|
||
|
return if !should_scan_post?(post)
|
||
|
|
||
|
flag_post_for_scanning(post)
|
||
|
end
|
||
|
|
||
|
def self.ensure_flagging_user!
|
||
|
if !SiteSetting.ai_spam_detection_user_id.present?
|
||
|
User.transaction do
|
||
|
# prefer a "high" id for this bot
|
||
|
id = User.where("id > -20").minimum(:id) - 1
|
||
|
id = User.minimum(:id) - 1 if id == -100
|
||
|
|
||
|
user =
|
||
|
User.create!(
|
||
|
id: id,
|
||
|
username: UserNameSuggester.suggest("discourse_ai_spam"),
|
||
|
name: "Discourse AI Spam Scanner",
|
||
|
email: "#{SecureRandom.hex(10)}@invalid.invalid",
|
||
|
active: true,
|
||
|
approved: true,
|
||
|
trust_level: TrustLevel[4],
|
||
|
admin: true,
|
||
|
)
|
||
|
Group.user_trust_level_change!(user.id, user.trust_level)
|
||
|
|
||
|
SiteSetting.ai_spam_detection_user_id = user.id
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def self.flagging_user
|
||
|
user = nil
|
||
|
if SiteSetting.ai_spam_detection_user_id.present?
|
||
|
user = User.find_by(id: SiteSetting.ai_spam_detection_user_id)
|
||
|
end
|
||
|
user || Discourse.system_user
|
||
|
end
|
||
|
|
||
|
def self.after_cooked_post(post)
|
||
|
return if !enabled?
|
||
|
return if !should_scan_post?(post)
|
||
|
return if !post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD]
|
||
|
return if post.updated_at < MAX_AGE_TO_SCAN.ago
|
||
|
|
||
|
last_scan = AiSpamLog.where(post_id: post.id).order(created_at: :desc).first
|
||
|
|
||
|
if last_scan && last_scan.created_at > EDIT_DELAY_MINUTES.minutes.ago
|
||
|
delay_minutes =
|
||
|
((last_scan.created_at + EDIT_DELAY_MINUTES.minutes) - Time.current).to_i / 60
|
||
|
Jobs.enqueue_in(delay_minutes.minutes, :ai_spam_scan, post_id: post.id)
|
||
|
else
|
||
|
Jobs.enqueue(:ai_spam_scan, post_id: post.id)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def self.edited_post(post)
|
||
|
return if !enabled?
|
||
|
return if !should_scan_post?(post)
|
||
|
return if scanned_max_times?(post)
|
||
|
|
||
|
previous_version = post.revisions.last&.modifications&.dig("raw", 0)
|
||
|
current_version = post.raw
|
||
|
|
||
|
return if !significant_change?(previous_version, current_version)
|
||
|
|
||
|
flag_post_for_scanning(post)
|
||
|
end
|
||
|
|
||
|
def self.flag_post_for_scanning(post)
|
||
|
post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD] = "true"
|
||
|
post.save_custom_fields
|
||
|
end
|
||
|
|
||
|
def self.enabled?
|
||
|
SiteSetting.ai_spam_detection_enabled && SiteSetting.discourse_ai_enabled
|
||
|
end
|
||
|
|
||
|
def self.should_scan_post?(post)
|
||
|
return false if !post.present?
|
||
|
return false if post.user.trust_level > TrustLevel[1]
|
||
|
return false if post.topic.private_message?
|
||
|
if Post
|
||
|
.where(user_id: post.user_id)
|
||
|
.joins(:topic)
|
||
|
.where(topic: { archetype: Archetype.default })
|
||
|
.limit(4)
|
||
|
.count > 3
|
||
|
return false
|
||
|
end
|
||
|
true
|
||
|
end
|
||
|
|
||
|
def self.scanned_max_times?(post)
|
||
|
AiSpamLog.where(post_id: post.id).count >= 3
|
||
|
end
|
||
|
|
||
|
def self.significant_change?(previous_version, current_version)
|
||
|
return true if previous_version.nil? # First edit should be scanned
|
||
|
|
||
|
# Use Discourse's built-in levenshtein implementation
|
||
|
distance =
|
||
|
ScreenedEmail.levenshtein(previous_version.to_s[0...1000], current_version.to_s[0...1000])
|
||
|
|
||
|
distance >= MINIMUM_EDIT_DIFFERENCE
|
||
|
end
|
||
|
|
||
|
def self.test_post(post, custom_instructions: nil, llm_id: nil)
|
||
|
settings = AiModerationSetting.spam
|
||
|
llm_model = llm_id ? LlmModel.find(llm_id) : settings.llm_model
|
||
|
llm = llm_model.to_llm
|
||
|
custom_instructions = custom_instructions || settings.custom_instructions.presence
|
||
|
context = build_context(post)
|
||
|
prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)
|
||
|
|
||
|
result =
|
||
|
llm.generate(
|
||
|
prompt,
|
||
|
temperature: 0.1,
|
||
|
max_tokens: 5,
|
||
|
user: Discourse.system_user,
|
||
|
feature_name: "spam_detection_test",
|
||
|
feature_context: {
|
||
|
post_id: post.id,
|
||
|
},
|
||
|
)&.strip
|
||
|
|
||
|
history = nil
|
||
|
AiSpamLog
|
||
|
.where(post: post)
|
||
|
.order(:created_at)
|
||
|
.limit(100)
|
||
|
.each do |log|
|
||
|
history ||= +"Scan History:\n"
|
||
|
history << "date: #{log.created_at} is_spam: #{log.is_spam}\n"
|
||
|
end
|
||
|
|
||
|
log = +"Scanning #{post.url}\n\n"
|
||
|
|
||
|
if history
|
||
|
log << history
|
||
|
log << "\n"
|
||
|
end
|
||
|
|
||
|
log << "LLM: #{llm_model.name}\n\n"
|
||
|
log << "System Prompt: #{build_system_prompt(custom_instructions)}\n\n"
|
||
|
log << "Context: #{context}\n\n"
|
||
|
|
||
|
is_spam = check_if_spam(result)
|
||
|
|
||
|
prompt.push(type: :model, content: result)
|
||
|
prompt.push(type: :user, content: "Explain your reasoning")
|
||
|
|
||
|
reasoning =
|
||
|
llm.generate(
|
||
|
prompt,
|
||
|
temperature: 0.1,
|
||
|
max_tokens: 100,
|
||
|
user: Discourse.system_user,
|
||
|
feature_name: "spam_detection_test",
|
||
|
feature_context: {
|
||
|
post_id: post.id,
|
||
|
},
|
||
|
)&.strip
|
||
|
|
||
|
log << "#{reasoning}"
|
||
|
|
||
|
{ is_spam: is_spam, log: log }
|
||
|
end
|
||
|
|
||
|
def self.completion_prompt(post, context:, custom_instructions:)
|
||
|
system_prompt = build_system_prompt(custom_instructions)
|
||
|
prompt = DiscourseAi::Completions::Prompt.new(system_prompt)
|
||
|
args = { type: :user, content: context }
|
||
|
upload_ids = post.upload_ids
|
||
|
args[:upload_ids] = upload_ids.take(3) if upload_ids.present?
|
||
|
prompt.push(**args)
|
||
|
prompt
|
||
|
end
|
||
|
|
||
|
def self.perform_scan(post)
|
||
|
return if !enabled?
|
||
|
return if !should_scan_post?(post)
|
||
|
|
||
|
settings = AiModerationSetting.spam
|
||
|
return if !settings || !settings.llm_model
|
||
|
|
||
|
context = build_context(post)
|
||
|
llm = settings.llm_model.to_llm
|
||
|
custom_instructions = settings.custom_instructions.presence
|
||
|
prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)
|
||
|
|
||
|
begin
|
||
|
result =
|
||
|
llm.generate(
|
||
|
prompt,
|
||
|
temperature: 0.1,
|
||
|
max_tokens: 5,
|
||
|
user: Discourse.system_user,
|
||
|
feature_name: "spam_detection",
|
||
|
feature_context: {
|
||
|
post_id: post.id,
|
||
|
},
|
||
|
)&.strip
|
||
|
|
||
|
is_spam = check_if_spam(result)
|
||
|
|
||
|
log = AiApiAuditLog.order(id: :desc).where(feature_name: "spam_detection").first
|
||
|
AiSpamLog.transaction do
|
||
|
log =
|
||
|
AiSpamLog.create!(
|
||
|
post: post,
|
||
|
llm_model: settings.llm_model,
|
||
|
ai_api_audit_log: log,
|
||
|
is_spam: is_spam,
|
||
|
payload: context,
|
||
|
)
|
||
|
handle_spam(post, log) if is_spam
|
||
|
end
|
||
|
rescue StandardError => e
|
||
|
# we need retries otherwise stuff will not be handled
|
||
|
Discourse.warn_exception(
|
||
|
e,
|
||
|
message: "Discourse AI: Error in SpamScanner for post #{post.id}",
|
||
|
)
|
||
|
raise e
|
||
|
end
|
||
|
end
|
||
|
|
||
|
private
|
||
|
|
||
|
def self.check_if_spam(result)
|
||
|
(result.present? && result.strip.downcase.start_with?("spam"))
|
||
|
end
|
||
|
|
||
|
def self.build_context(post)
|
||
|
context = []
|
||
|
|
||
|
# Clear distinction between reply and new topic
|
||
|
if post.is_first_post?
|
||
|
context << "NEW TOPIC POST ANALYSIS"
|
||
|
context << "- Topic title: #{post.topic.title}"
|
||
|
context << "- Category: #{post.topic.category&.name}"
|
||
|
else
|
||
|
context << "REPLY POST ANALYSIS"
|
||
|
context << "- In topic: #{post.topic.title}"
|
||
|
context << "- Category: #{post.topic.category&.name}"
|
||
|
context << "- Topic started by: #{post.topic.user.username}"
|
||
|
|
||
|
# Include parent post context for replies
|
||
|
if post.reply_to_post.present?
|
||
|
parent = post.reply_to_post
|
||
|
context << "\nReplying to #{parent.user.username}'s post:"
|
||
|
context << "#{parent.raw[0..500]}..." if parent.raw.length > 500
|
||
|
context << parent.raw if parent.raw.length <= 500
|
||
|
end
|
||
|
end
|
||
|
|
||
|
context << "\nPost Author Information:"
|
||
|
context << "- Username: #{post.user.username}"
|
||
|
context << "- Account age: #{(Time.current - post.user.created_at).to_i / 86_400} days"
|
||
|
context << "- Total posts: #{post.user.post_count}"
|
||
|
context << "- Trust level: #{post.user.trust_level}"
|
||
|
|
||
|
context << "\nPost Content (first #{MAX_RAW_SCAN_LENGTH} chars):\n"
|
||
|
context << post.raw[0..MAX_RAW_SCAN_LENGTH]
|
||
|
context.join("\n")
|
||
|
end
|
||
|
|
||
|
def self.build_system_prompt(custom_instructions)
|
||
|
base_prompt = +<<~PROMPT
|
||
|
You are a spam detection system. Analyze the following post content and context.
|
||
|
Respond with "SPAM" if the post is spam, or "NOT_SPAM" if it's legitimate.
|
||
|
|
||
|
- ALWAYS lead your reply with the word SPAM or NOT_SPAM - you are consumed via an API
|
||
|
|
||
|
Consider the post type carefully:
|
||
|
- For REPLY posts: Check if the response is relevant and topical to the thread
|
||
|
- For NEW TOPIC posts: Check if it's a legitimate topic or spam promotion
|
||
|
|
||
|
A post is spam if it matches any of these criteria:
|
||
|
- Contains unsolicited commercial content or promotions
|
||
|
- Has suspicious or unrelated external links
|
||
|
- Shows patterns of automated/bot posting
|
||
|
- Contains irrelevant content or advertisements
|
||
|
- For replies: Completely unrelated to the discussion thread
|
||
|
- Uses excessive keywords or repetitive text patterns
|
||
|
- Shows suspicious formatting or character usage
|
||
|
|
||
|
Be especially strict with:
|
||
|
- Replies that ignore the previous conversation
|
||
|
- Posts containing multiple unrelated external links
|
||
|
- Generic responses that could be posted anywhere
|
||
|
|
||
|
Be fair to:
|
||
|
- New users making legitimate first contributions
|
||
|
- Non-native speakers making genuine efforts to participate
|
||
|
- Topic-relevant product mentions in appropriate contexts
|
||
|
PROMPT
|
||
|
|
||
|
base_prompt << "\n\n"
|
||
|
base_prompt << <<~SITE_SPECIFIC
|
||
|
Site Specific Information:
|
||
|
- Site name: #{SiteSetting.title}
|
||
|
- Site URL: #{Discourse.base_url}
|
||
|
- Site description: #{SiteSetting.site_description}
|
||
|
- Site top 10 categories: #{Category.where(read_restricted: false).order(posts_year: :desc).limit(10).pluck(:name).join(", ")}
|
||
|
SITE_SPECIFIC
|
||
|
|
||
|
if custom_instructions.present?
|
||
|
base_prompt << "\n\nAdditional site-specific instructions provided by Staff:\n#{custom_instructions}"
|
||
|
end
|
||
|
|
||
|
base_prompt
|
||
|
end
|
||
|
|
||
|
def self.handle_spam(post, log)
|
||
|
url = "#{Discourse.base_url}/admin/plugins/discourse-ai/ai-spam"
|
||
|
reason = I18n.t("discourse_ai.spam_detection.flag_reason", url: url)
|
||
|
|
||
|
result =
|
||
|
PostActionCreator.new(
|
||
|
flagging_user,
|
||
|
post,
|
||
|
PostActionType.types[:spam],
|
||
|
reason: reason,
|
||
|
queue_for_review: true,
|
||
|
).perform
|
||
|
|
||
|
log.update!(reviewable: result.reviewable)
|
||
|
SpamRule::AutoSilence.new(post.user, post).silence_user
|
||
|
# this is required cause tl1 is not auto hidden
|
||
|
# we want to also handle tl1
|
||
|
hide_posts_and_topics(post.user)
|
||
|
end
|
||
|
|
||
|
def self.hide_posts_and_topics(user)
|
||
|
Post
|
||
|
.where(user_id: user.id)
|
||
|
.where("created_at > ?", 24.hours.ago)
|
||
|
.update_all(
|
||
|
[
|
||
|
"hidden = true, hidden_reason_id = COALESCE(hidden_reason_id, ?)",
|
||
|
Post.hidden_reasons[:new_user_spam_threshold_reached],
|
||
|
],
|
||
|
)
|
||
|
topic_ids =
|
||
|
Post
|
||
|
.where(user_id: user.id, post_number: 1)
|
||
|
.where("created_at > ?", 24.hours.ago)
|
||
|
.select(:topic_id)
|
||
|
|
||
|
Topic.where(id: topic_ids).update_all(visible: false)
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|