377 lines
13 KiB
Ruby
377 lines
13 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module DiscourseAi
|
|
module AiModeration
|
|
class SpamScanner
|
|
POSTS_TO_SCAN = 3
|
|
MINIMUM_EDIT_DIFFERENCE = 10
|
|
EDIT_DELAY_MINUTES = 10
|
|
MAX_AGE_TO_SCAN = 1.day
|
|
MAX_RAW_SCAN_LENGTH = 5000
|
|
|
|
SHOULD_SCAN_POST_CUSTOM_FIELD = "discourse_ai_should_scan_post"
|
|
|
|
def self.new_post(post)
|
|
return if !enabled?
|
|
return if !should_scan_post?(post)
|
|
|
|
flag_post_for_scanning(post)
|
|
end
|
|
|
|
def self.ensure_flagging_user!
|
|
if !SiteSetting.ai_spam_detection_user_id.present?
|
|
User.transaction do
|
|
# prefer a "high" id for this bot
|
|
id = User.where("id > -20").minimum(:id) - 1
|
|
id = User.minimum(:id) - 1 if id == -100
|
|
|
|
user =
|
|
User.create!(
|
|
id: id,
|
|
username: UserNameSuggester.suggest("discourse_ai_spam"),
|
|
name: "Discourse AI Spam Scanner",
|
|
email: "#{SecureRandom.hex(10)}@invalid.invalid",
|
|
active: true,
|
|
approved: true,
|
|
trust_level: TrustLevel[4],
|
|
admin: true,
|
|
)
|
|
Group.user_trust_level_change!(user.id, user.trust_level)
|
|
|
|
SiteSetting.ai_spam_detection_user_id = user.id
|
|
end
|
|
end
|
|
end
|
|
|
|
def self.flagging_user
|
|
user = nil
|
|
if SiteSetting.ai_spam_detection_user_id.present?
|
|
user = User.find_by(id: SiteSetting.ai_spam_detection_user_id)
|
|
end
|
|
user || Discourse.system_user
|
|
end
|
|
|
|
def self.after_cooked_post(post)
|
|
return if !enabled?
|
|
return if !should_scan_post?(post)
|
|
return if !post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD]
|
|
return if post.updated_at < MAX_AGE_TO_SCAN.ago
|
|
|
|
last_scan = AiSpamLog.where(post_id: post.id).order(created_at: :desc).first
|
|
|
|
if last_scan && last_scan.created_at > EDIT_DELAY_MINUTES.minutes.ago
|
|
delay_minutes =
|
|
((last_scan.created_at + EDIT_DELAY_MINUTES.minutes) - Time.current).to_i / 60
|
|
Jobs.enqueue_in(delay_minutes.minutes, :ai_spam_scan, post_id: post.id)
|
|
else
|
|
Jobs.enqueue(:ai_spam_scan, post_id: post.id)
|
|
end
|
|
end
|
|
|
|
def self.edited_post(post)
|
|
return if !enabled?
|
|
return if !should_scan_post?(post)
|
|
return if scanned_max_times?(post)
|
|
|
|
previous_version = post.revisions.last&.modifications&.dig("raw", 0)
|
|
current_version = post.raw
|
|
|
|
return if !significant_change?(previous_version, current_version)
|
|
|
|
flag_post_for_scanning(post)
|
|
end
|
|
|
|
def self.flag_post_for_scanning(post)
|
|
post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD] = "true"
|
|
post.save_custom_fields
|
|
end
|
|
|
|
def self.enabled?
|
|
SiteSetting.ai_spam_detection_enabled && SiteSetting.discourse_ai_enabled
|
|
end
|
|
|
|
def self.should_scan_post?(post)
|
|
return false if !post.present?
|
|
return false if post.user.trust_level > TrustLevel[1]
|
|
return false if post.topic.private_message?
|
|
if Post
|
|
.where(user_id: post.user_id)
|
|
.joins(:topic)
|
|
.where(topic: { archetype: Archetype.default })
|
|
.limit(4)
|
|
.count > 3
|
|
return false
|
|
end
|
|
true
|
|
end
|
|
|
|
def self.scanned_max_times?(post)
|
|
AiSpamLog.where(post_id: post.id).count >= 3
|
|
end
|
|
|
|
def self.significant_change?(previous_version, current_version)
|
|
return true if previous_version.nil? # First edit should be scanned
|
|
|
|
# Use Discourse's built-in levenshtein implementation
|
|
distance =
|
|
ScreenedEmail.levenshtein(previous_version.to_s[0...1000], current_version.to_s[0...1000])
|
|
|
|
distance >= MINIMUM_EDIT_DIFFERENCE
|
|
end
|
|
|
|
def self.test_post(post, custom_instructions: nil, llm_id: nil)
|
|
settings = AiModerationSetting.spam
|
|
llm_model = llm_id ? LlmModel.find(llm_id) : settings.llm_model
|
|
llm = llm_model.to_llm
|
|
custom_instructions = custom_instructions || settings.custom_instructions.presence
|
|
context = build_context(post, post.topic || Topic.with_deleted.find_by(id: post.topic_id))
|
|
prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)
|
|
|
|
result =
|
|
llm.generate(
|
|
prompt,
|
|
temperature: 0.1,
|
|
max_tokens: 5,
|
|
user: Discourse.system_user,
|
|
feature_name: "spam_detection_test",
|
|
feature_context: {
|
|
post_id: post.id,
|
|
},
|
|
)&.strip
|
|
|
|
history = nil
|
|
AiSpamLog
|
|
.where(post: post)
|
|
.order(:created_at)
|
|
.limit(100)
|
|
.each do |log|
|
|
history ||= +"Scan History:\n"
|
|
history << "date: #{log.created_at} is_spam: #{log.is_spam}\n"
|
|
end
|
|
|
|
log = +"Scanning #{post.url}\n\n"
|
|
|
|
if history
|
|
log << history
|
|
log << "\n"
|
|
end
|
|
|
|
log << "LLM: #{llm_model.name}\n\n"
|
|
log << "System Prompt: #{build_system_prompt(custom_instructions)}\n\n"
|
|
log << "Context: #{context}\n\n"
|
|
|
|
is_spam = check_if_spam(result)
|
|
|
|
prompt.push(type: :model, content: result)
|
|
prompt.push(type: :user, content: "Explain your reasoning")
|
|
|
|
reasoning =
|
|
llm.generate(
|
|
prompt,
|
|
temperature: 0.1,
|
|
max_tokens: 100,
|
|
user: Discourse.system_user,
|
|
feature_name: "spam_detection_test",
|
|
feature_context: {
|
|
post_id: post.id,
|
|
},
|
|
)&.strip
|
|
|
|
log << "#{reasoning}"
|
|
|
|
{ is_spam: is_spam, log: log }
|
|
end
|
|
|
|
def self.completion_prompt(post, context:, custom_instructions:)
|
|
system_prompt = build_system_prompt(custom_instructions)
|
|
prompt = DiscourseAi::Completions::Prompt.new(system_prompt)
|
|
args = { type: :user, content: context }
|
|
upload_ids = post.upload_ids
|
|
args[:upload_ids] = upload_ids.take(3) if upload_ids.present?
|
|
prompt.push(**args)
|
|
prompt
|
|
end
|
|
|
|
def self.perform_scan(post)
|
|
return if !enabled?
|
|
return if !should_scan_post?(post)
|
|
|
|
settings = AiModerationSetting.spam
|
|
return if !settings || !settings.llm_model
|
|
|
|
context = build_context(post)
|
|
llm = settings.llm_model.to_llm
|
|
custom_instructions = settings.custom_instructions.presence
|
|
prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)
|
|
|
|
begin
|
|
result =
|
|
llm.generate(
|
|
prompt,
|
|
temperature: 0.1,
|
|
max_tokens: 5,
|
|
user: Discourse.system_user,
|
|
feature_name: "spam_detection",
|
|
feature_context: {
|
|
post_id: post.id,
|
|
},
|
|
)&.strip
|
|
|
|
is_spam = check_if_spam(result)
|
|
|
|
log = AiApiAuditLog.order(id: :desc).where(feature_name: "spam_detection").first
|
|
AiSpamLog.transaction do
|
|
log =
|
|
AiSpamLog.create!(
|
|
post: post,
|
|
llm_model: settings.llm_model,
|
|
ai_api_audit_log: log,
|
|
is_spam: is_spam,
|
|
payload: context,
|
|
)
|
|
handle_spam(post, log) if is_spam
|
|
end
|
|
rescue StandardError => e
|
|
# we need retries otherwise stuff will not be handled
|
|
Discourse.warn_exception(
|
|
e,
|
|
message: "Discourse AI: Error in SpamScanner for post #{post.id}",
|
|
)
|
|
raise e
|
|
end
|
|
end
|
|
|
|
private
|
|
|
|
def self.check_if_spam(result)
|
|
(result.present? && result.strip.downcase.start_with?("spam"))
|
|
end
|
|
|
|
def self.build_context(post, topic = nil)
|
|
topic ||= post.topic
|
|
context = []
|
|
|
|
# Clear distinction between reply and new topic
|
|
if post.is_first_post?
|
|
context << "NEW TOPIC POST ANALYSIS"
|
|
context << "- Topic title: #{topic.title}"
|
|
context << "- Category: #{topic.category&.name}"
|
|
else
|
|
context << "REPLY POST ANALYSIS"
|
|
context << "- In topic: #{topic.title}"
|
|
context << "- Category: #{topic.category&.name}"
|
|
context << "- Topic started by: #{topic.user&.username}"
|
|
|
|
if post.reply_to_post_number.present?
|
|
parent =
|
|
Post.with_deleted.find_by(topic_id: topic.id, post_number: post.reply_to_post_number)
|
|
if parent
|
|
context << "\nReplying to #{parent.user&.username}'s post:"
|
|
context << "#{parent.raw[0..500]}..." if parent.raw.length > 500
|
|
context << parent.raw if parent.raw.length <= 500
|
|
end
|
|
end
|
|
end
|
|
|
|
context << "\nPost Author Information:"
|
|
if post.user # during test we may not have a user
|
|
context << "- Username: #{post.user.username}"
|
|
context << "- Account age: #{(Time.current - post.user.created_at).to_i / 86_400} days"
|
|
context << "- Total posts: #{post.user.post_count}"
|
|
context << "- Trust level: #{post.user.trust_level}"
|
|
end
|
|
|
|
context << "\nPost Content (first #{MAX_RAW_SCAN_LENGTH} chars):\n"
|
|
context << post.raw[0..MAX_RAW_SCAN_LENGTH]
|
|
context.join("\n")
|
|
end
|
|
|
|
def self.build_system_prompt(custom_instructions)
|
|
base_prompt = +<<~PROMPT
|
|
You are a spam detection system. Analyze the following post content and context.
|
|
Respond with "SPAM" if the post is spam, or "NOT_SPAM" if it's legitimate.
|
|
|
|
- ALWAYS lead your reply with the word SPAM or NOT_SPAM - you are consumed via an API
|
|
|
|
Consider the post type carefully:
|
|
- For REPLY posts: Check if the response is relevant and topical to the thread
|
|
- For NEW TOPIC posts: Check if it's a legitimate topic or spam promotion
|
|
|
|
A post is spam if it matches any of these criteria:
|
|
- Contains unsolicited commercial content or promotions
|
|
- Has suspicious or unrelated external links
|
|
- Shows patterns of automated/bot posting
|
|
- Contains irrelevant content or advertisements
|
|
- For replies: Completely unrelated to the discussion thread
|
|
- Uses excessive keywords or repetitive text patterns
|
|
- Shows suspicious formatting or character usage
|
|
|
|
Be especially strict with:
|
|
- Replies that ignore the previous conversation
|
|
- Posts containing multiple unrelated external links
|
|
- Generic responses that could be posted anywhere
|
|
|
|
Be fair to:
|
|
- New users making legitimate first contributions
|
|
- Non-native speakers making genuine efforts to participate
|
|
- Topic-relevant product mentions in appropriate contexts
|
|
PROMPT
|
|
|
|
base_prompt << "\n\n"
|
|
base_prompt << <<~SITE_SPECIFIC
|
|
Site Specific Information:
|
|
- Site name: #{SiteSetting.title}
|
|
- Site URL: #{Discourse.base_url}
|
|
- Site description: #{SiteSetting.site_description}
|
|
- Site top 10 categories: #{Category.where(read_restricted: false).order(posts_year: :desc).limit(10).pluck(:name).join(", ")}
|
|
SITE_SPECIFIC
|
|
|
|
if custom_instructions.present?
|
|
base_prompt << "\n\nAdditional site-specific instructions provided by Staff:\n#{custom_instructions}"
|
|
end
|
|
|
|
base_prompt
|
|
end
|
|
|
|
def self.handle_spam(post, log)
|
|
url = "#{Discourse.base_url}/admin/plugins/discourse-ai/ai-spam"
|
|
reason = I18n.t("discourse_ai.spam_detection.flag_reason", url: url)
|
|
|
|
result =
|
|
PostActionCreator.new(
|
|
flagging_user,
|
|
post,
|
|
PostActionType.types[:spam],
|
|
reason: reason,
|
|
queue_for_review: true,
|
|
).perform
|
|
|
|
log.update!(reviewable: result.reviewable)
|
|
SpamRule::AutoSilence.new(post.user, post).silence_user
|
|
# this is required cause tl1 is not auto hidden
|
|
# we want to also handle tl1
|
|
hide_posts_and_topics(post.user)
|
|
end
|
|
|
|
def self.hide_posts_and_topics(user)
|
|
Post
|
|
.where(user_id: user.id)
|
|
.where("created_at > ?", 24.hours.ago)
|
|
.update_all(
|
|
[
|
|
"hidden = true, hidden_reason_id = COALESCE(hidden_reason_id, ?)",
|
|
Post.hidden_reasons[:new_user_spam_threshold_reached],
|
|
],
|
|
)
|
|
topic_ids =
|
|
Post
|
|
.where(user_id: user.id, post_number: 1)
|
|
.where("created_at > ?", 24.hours.ago)
|
|
.select(:topic_id)
|
|
|
|
Topic.where(id: topic_ids).update_all(visible: false)
|
|
end
|
|
end
|
|
end
|
|
end
|