mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-04-26 09:04:54 +00:00
* DEV: refactor bot internals This introduces a proper object for bot context, this makes it simpler to improve context management as we go cause we have a nice object to work with Starts refactoring allowing for a single message to have multiple uploads throughout * transplant method to message builder * chipping away at inline uploads * image support is improved but not fully fixed yet partially working in anthropic, still got quite a few dialects to go * open ai and claude are now working * Gemini is now working as well * fix nova * more dialects... * fix ollama * fix specs * update artifact fixed * more tests * spam scanner * pass more specs * bunch of specs improved * more bug fixes. * all the rest of the tests are working * improve tests coverage and ensure custom tools are aware of new context object * tests are working, but we need more tests * resolve merge conflict * new preamble and expanded specs on ai tool * remove concept of "standalone tools" This is no longer needed, we can set custom raw, tool details are injected into tool calls
441 lines
15 KiB
Ruby
441 lines
15 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module DiscourseAi
|
|
module AiModeration
|
|
class SpamScanner
|
|
POSTS_TO_SCAN = 3
|
|
MINIMUM_EDIT_DIFFERENCE = 10
|
|
EDIT_DELAY_MINUTES = 10
|
|
MAX_AGE_TO_SCAN = 1.day
|
|
MAX_RAW_SCAN_LENGTH = 5000
|
|
|
|
SHOULD_SCAN_POST_CUSTOM_FIELD = "discourse_ai_should_scan_post"
|
|
|
|
def self.new_post(post)
|
|
return if !enabled?
|
|
return if !should_scan_post?(post)
|
|
|
|
flag_post_for_scanning(post)
|
|
end
|
|
|
|
def self.ensure_flagging_user!
|
|
if !SiteSetting.ai_spam_detection_user_id.present?
|
|
User.transaction do
|
|
# prefer a "high" id for this bot
|
|
id = User.where("id > -20").minimum(:id) - 1
|
|
id = User.minimum(:id) - 1 if id == -100
|
|
|
|
user =
|
|
User.create!(
|
|
id: id,
|
|
username: UserNameSuggester.suggest("discourse_ai_spam"),
|
|
name: "Discourse AI Spam Scanner",
|
|
email: "#{SecureRandom.hex(10)}@invalid.invalid",
|
|
active: true,
|
|
approved: true,
|
|
trust_level: TrustLevel[4],
|
|
admin: true,
|
|
)
|
|
Group.user_trust_level_change!(user.id, user.trust_level)
|
|
|
|
SiteSetting.ai_spam_detection_user_id = user.id
|
|
end
|
|
end
|
|
end
|
|
|
|
def self.flagging_user
|
|
user = nil
|
|
if SiteSetting.ai_spam_detection_user_id.present?
|
|
user = User.find_by(id: SiteSetting.ai_spam_detection_user_id)
|
|
end
|
|
user || Discourse.system_user
|
|
end
|
|
|
|
def self.after_cooked_post(post)
|
|
return if !enabled?
|
|
return if !should_scan_post?(post)
|
|
return if !post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD]
|
|
return if post.updated_at < MAX_AGE_TO_SCAN.ago
|
|
|
|
last_scan = AiSpamLog.where(post_id: post.id).order(created_at: :desc).first
|
|
|
|
if last_scan && last_scan.created_at > EDIT_DELAY_MINUTES.minutes.ago
|
|
delay_minutes =
|
|
((last_scan.created_at + EDIT_DELAY_MINUTES.minutes) - Time.current).to_i / 60
|
|
Jobs.enqueue_in(delay_minutes.minutes, :ai_spam_scan, post_id: post.id)
|
|
else
|
|
Jobs.enqueue(:ai_spam_scan, post_id: post.id)
|
|
end
|
|
end
|
|
|
|
def self.edited_post(post)
|
|
return if !enabled?
|
|
return if !should_scan_post?(post)
|
|
return if scanned_max_times?(post)
|
|
|
|
previous_version = post.revisions.last&.modifications&.dig("raw", 0)
|
|
current_version = post.raw
|
|
|
|
return if !significant_change?(previous_version, current_version)
|
|
|
|
flag_post_for_scanning(post)
|
|
end
|
|
|
|
def self.flag_post_for_scanning(post)
|
|
post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD] = "true"
|
|
post.save_custom_fields
|
|
end
|
|
|
|
def self.enabled?
|
|
SiteSetting.ai_spam_detection_enabled && SiteSetting.discourse_ai_enabled
|
|
end
|
|
|
|
def self.should_scan_post?(post)
|
|
return false if !post.present?
|
|
return false if post.user.trust_level > TrustLevel[1]
|
|
return false if post.topic.private_message?
|
|
if Post
|
|
.where(user_id: post.user_id)
|
|
.joins(:topic)
|
|
.where(topic: { archetype: Archetype.default })
|
|
.limit(4)
|
|
.count > 3
|
|
return false
|
|
end
|
|
true
|
|
end
|
|
|
|
def self.scanned_max_times?(post)
|
|
AiSpamLog.where(post_id: post.id).count >= 3
|
|
end
|
|
|
|
def self.significant_change?(previous_version, current_version)
|
|
return true if previous_version.nil? # First edit should be scanned
|
|
|
|
# Use Discourse's built-in levenshtein implementation
|
|
distance =
|
|
ScreenedEmail.levenshtein(previous_version.to_s[0...1000], current_version.to_s[0...1000])
|
|
|
|
distance >= MINIMUM_EDIT_DIFFERENCE
|
|
end
|
|
|
|
def self.test_post(post, custom_instructions: nil, llm_id: nil)
|
|
settings = AiModerationSetting.spam
|
|
llm_model = llm_id ? LlmModel.find(llm_id) : settings.llm_model
|
|
llm = llm_model.to_llm
|
|
custom_instructions = custom_instructions || settings.custom_instructions.presence
|
|
context = build_context(post, post.topic || Topic.with_deleted.find_by(id: post.topic_id))
|
|
prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)
|
|
|
|
result =
|
|
llm.generate(
|
|
prompt,
|
|
temperature: 0.1,
|
|
max_tokens: 5,
|
|
user: Discourse.system_user,
|
|
feature_name: "spam_detection_test",
|
|
feature_context: {
|
|
post_id: post.id,
|
|
},
|
|
)&.strip
|
|
|
|
history = nil
|
|
AiSpamLog
|
|
.where(post: post)
|
|
.order(:created_at)
|
|
.limit(100)
|
|
.each do |log|
|
|
history ||= +"Scan History:\n"
|
|
history << "date: #{log.created_at} is_spam: #{log.is_spam}\n"
|
|
end
|
|
|
|
log = +"Scanning #{post.url}\n\n"
|
|
|
|
if history
|
|
log << history
|
|
log << "\n"
|
|
end
|
|
|
|
log << "LLM: #{llm_model.name}\n\n"
|
|
log << "System Prompt: #{build_system_prompt(custom_instructions)}\n\n"
|
|
log << "Context: #{context}\n\n"
|
|
|
|
is_spam = check_if_spam(result)
|
|
|
|
prompt.push(type: :model, content: result)
|
|
prompt.push(type: :user, content: "Explain your reasoning")
|
|
|
|
reasoning =
|
|
llm.generate(
|
|
prompt,
|
|
temperature: 0.1,
|
|
max_tokens: 100,
|
|
user: Discourse.system_user,
|
|
feature_name: "spam_detection_test",
|
|
feature_context: {
|
|
post_id: post.id,
|
|
},
|
|
)&.strip
|
|
|
|
log << "#{reasoning}"
|
|
|
|
{ is_spam: is_spam, log: log }
|
|
end
|
|
|
|
def self.completion_prompt(post, context:, custom_instructions:)
|
|
system_prompt = build_system_prompt(custom_instructions)
|
|
prompt = DiscourseAi::Completions::Prompt.new(system_prompt)
|
|
args = { type: :user, content: context }
|
|
upload_ids = post.upload_ids
|
|
if upload_ids.present?
|
|
args[:content] = [args[:content]]
|
|
upload_ids.take(3).each { |upload_id| args[:content] << { upload_id: upload_id } }
|
|
end
|
|
prompt.push(**args)
|
|
prompt
|
|
end
|
|
|
|
def self.perform_scan(post)
|
|
return if !should_scan_post?(post)
|
|
|
|
perform_scan!(post)
|
|
end
|
|
|
|
def self.perform_scan!(post)
|
|
return if !enabled?
|
|
settings = AiModerationSetting.spam
|
|
return if !settings || !settings.llm_model
|
|
|
|
context = build_context(post)
|
|
llm = settings.llm_model.to_llm
|
|
custom_instructions = settings.custom_instructions.presence
|
|
prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)
|
|
|
|
begin
|
|
result =
|
|
llm.generate(
|
|
prompt,
|
|
temperature: 0.1,
|
|
max_tokens: 5,
|
|
user: Discourse.system_user,
|
|
feature_name: "spam_detection",
|
|
feature_context: {
|
|
post_id: post.id,
|
|
},
|
|
)&.strip
|
|
|
|
is_spam = check_if_spam(result)
|
|
|
|
log = AiApiAuditLog.order(id: :desc).where(feature_name: "spam_detection").first
|
|
AiSpamLog.transaction do
|
|
log =
|
|
AiSpamLog.create!(
|
|
post: post,
|
|
llm_model: settings.llm_model,
|
|
ai_api_audit_log: log,
|
|
is_spam: is_spam,
|
|
payload: context,
|
|
)
|
|
handle_spam(post, log) if is_spam
|
|
end
|
|
rescue StandardError => e
|
|
# we need retries otherwise stuff will not be handled
|
|
Discourse.warn_exception(
|
|
e,
|
|
message: "Discourse AI: Error in SpamScanner for post #{post.id}",
|
|
)
|
|
raise e
|
|
end
|
|
end
|
|
|
|
def self.fix_spam_scanner_not_admin
|
|
user = DiscourseAi::AiModeration::SpamScanner.flagging_user
|
|
|
|
if user.present?
|
|
user.update!(admin: true)
|
|
else
|
|
raise Discourse::NotFound
|
|
end
|
|
end
|
|
|
|
private
|
|
|
|
def self.check_if_spam(result)
|
|
(result.present? && result.strip.downcase.start_with?("spam"))
|
|
end
|
|
|
|
def self.build_context(post, topic = nil)
|
|
topic ||= post.topic
|
|
context = []
|
|
|
|
# Clear distinction between reply and new topic
|
|
if post.is_first_post?
|
|
context << "NEW TOPIC POST ANALYSIS"
|
|
context << "- Topic title: #{topic.title}"
|
|
context << "- Category: #{topic.category&.name}"
|
|
else
|
|
context << "REPLY POST ANALYSIS"
|
|
context << "- In topic: #{topic.title}"
|
|
context << "- Category: #{topic.category&.name}"
|
|
context << "- Topic started by: #{topic.user&.username}"
|
|
|
|
if post.reply_to_post_number.present?
|
|
parent =
|
|
Post.with_deleted.find_by(topic_id: topic.id, post_number: post.reply_to_post_number)
|
|
if parent
|
|
context << "\nReplying to #{parent.user&.username}'s post:"
|
|
context << "#{parent.raw[0..500]}..." if parent.raw.length > 500
|
|
context << parent.raw if parent.raw.length <= 500
|
|
end
|
|
end
|
|
end
|
|
|
|
context << "\nPost Author Information:"
|
|
if user = post.user # during test we may not have a user
|
|
context << "- Username: #{user.username}\n"
|
|
context << "- Email: #{user.email}\n"
|
|
context << "- Account age: #{(Time.current - user.created_at).to_i / 86_400} days\n"
|
|
context << "- Total posts: #{user.post_count}\n"
|
|
context << "- Trust level: #{user.trust_level}\n"
|
|
if info = location_info(user)
|
|
context << "- Registration Location: #{info[:registration]}\n" if info[:registration]
|
|
context << "- Last Location: #{info[:last]}\n" if info[:last]
|
|
end
|
|
end
|
|
|
|
context << "\nPost Content (first #{MAX_RAW_SCAN_LENGTH} chars):\n"
|
|
context << post.raw[0..MAX_RAW_SCAN_LENGTH]
|
|
context.join("\n")
|
|
end
|
|
|
|
def self.location_info(user)
|
|
registration, last = nil
|
|
if user.ip_address.present?
|
|
info = DiscourseIpInfo.get(user.ip_address, resolve_hostname: true)
|
|
last = "#{info[:location]} (#{info[:organization]})" if info && info[:location].present?
|
|
end
|
|
if user.registration_ip_address.present?
|
|
info = DiscourseIpInfo.get(user.registration_ip_address, resolve_hostname: true)
|
|
registration = "#{info[:location]} (#{info[:organization]})" if info &&
|
|
info[:location].present?
|
|
end
|
|
|
|
rval = nil
|
|
if registration || last
|
|
rval = { registration: registration } if registration
|
|
if last && last != registration
|
|
rval ||= {}
|
|
rval[:last] = last
|
|
end
|
|
end
|
|
|
|
rval
|
|
rescue => e
|
|
Discourse.warn_exception(e, message: "Failed to lookup location info")
|
|
nil
|
|
end
|
|
|
|
def self.build_system_prompt(custom_instructions)
|
|
base_prompt = +<<~PROMPT
|
|
You are a spam detection system. Analyze the following post content and context.
|
|
Respond with "SPAM" if the post is spam, or "NOT_SPAM" if it's legitimate.
|
|
|
|
- ALWAYS lead your reply with the word SPAM or NOT_SPAM - you are consumed via an API
|
|
|
|
Consider the post type carefully:
|
|
- For REPLY posts: Check if the response is relevant and topical to the thread
|
|
- For NEW TOPIC posts: Check if it's a legitimate topic or spam promotion
|
|
|
|
A post is spam if it matches any of these criteria:
|
|
- Contains unsolicited commercial content or promotions
|
|
- Has suspicious or unrelated external links
|
|
- Shows patterns of automated/bot posting
|
|
- Contains irrelevant content or advertisements
|
|
- For replies: Completely unrelated to the discussion thread
|
|
- Uses excessive keywords or repetitive text patterns
|
|
- Shows suspicious formatting or character usage
|
|
|
|
Be especially strict with:
|
|
- Replies that ignore the previous conversation
|
|
- Posts containing multiple unrelated external links
|
|
- Generic responses that could be posted anywhere
|
|
|
|
Be fair to:
|
|
- New users making legitimate first contributions
|
|
- Non-native speakers making genuine efforts to participate
|
|
- Topic-relevant product mentions in appropriate contexts
|
|
PROMPT
|
|
|
|
base_prompt << "\n\n"
|
|
base_prompt << <<~SITE_SPECIFIC
|
|
Site Specific Information:
|
|
- Site name: #{SiteSetting.title}
|
|
- Site URL: #{Discourse.base_url}
|
|
- Site description: #{SiteSetting.site_description}
|
|
- Site top 10 categories: #{Category.where(read_restricted: false).order(posts_year: :desc).limit(10).pluck(:name).join(", ")}
|
|
SITE_SPECIFIC
|
|
|
|
if custom_instructions.present?
|
|
base_prompt << "\n\nAdditional site-specific instructions provided by Staff:\n#{custom_instructions}"
|
|
end
|
|
|
|
base_prompt
|
|
end
|
|
|
|
def self.handle_spam(post, log)
|
|
url = "#{Discourse.base_url}/admin/plugins/discourse-ai/ai-spam"
|
|
reason = I18n.t("discourse_ai.spam_detection.flag_reason", url: url)
|
|
|
|
flagging_user = self.flagging_user
|
|
|
|
result =
|
|
PostActionCreator.new(
|
|
flagging_user,
|
|
post,
|
|
PostActionType.types[:spam],
|
|
reason: reason,
|
|
queue_for_review: true,
|
|
).perform
|
|
|
|
# Currently in core re-flagging something that is already flagged as spam
|
|
# is not supported, long term we may want to support this but in the meantime
|
|
# we should not be silencing/hiding if the PostActionCreator fails.
|
|
if result.success?
|
|
log.update!(reviewable: result.reviewable)
|
|
|
|
reason = I18n.t("discourse_ai.spam_detection.silence_reason", url: url)
|
|
silencer =
|
|
UserSilencer.new(
|
|
post.user,
|
|
flagging_user,
|
|
message: :too_many_spam_flags,
|
|
post_id: post.id,
|
|
reason: reason,
|
|
keep_posts: true,
|
|
)
|
|
silencer.silence
|
|
|
|
# silencer will not hide tl1 posts, so we do this here
|
|
hide_post(post)
|
|
else
|
|
log.update!(
|
|
error:
|
|
"unable to flag post as spam, post action failed for post #{post.id} with error: '#{result.errors.full_messages.join(", ").truncate(3000)}'",
|
|
)
|
|
end
|
|
end
|
|
|
|
def self.hide_post(post)
|
|
Post.where(id: post.id).update_all(
|
|
[
|
|
"hidden = true, hidden_reason_id = COALESCE(hidden_reason_id, ?)",
|
|
Post.hidden_reasons[:new_user_spam_threshold_reached],
|
|
],
|
|
)
|
|
|
|
Topic.where(id: post.topic_id).update_all(visible: false) if post.post_number == 1
|
|
end
|
|
end
|
|
end
|
|
end
|