# frozen_string_literal: true module DiscourseAi module AiModeration class SpamScanner POSTS_TO_SCAN = 3 MINIMUM_EDIT_DIFFERENCE = 10 EDIT_DELAY_MINUTES = 10 MAX_AGE_TO_SCAN = 1.day MAX_RAW_SCAN_LENGTH = 5000 SHOULD_SCAN_POST_CUSTOM_FIELD = "discourse_ai_should_scan_post" def self.new_post(post) return if !enabled? return if !should_scan_post?(post) flag_post_for_scanning(post) end def self.ensure_flagging_user! if !SiteSetting.ai_spam_detection_user_id.present? User.transaction do # prefer a "high" id for this bot id = User.where("id > -20").minimum(:id) - 1 id = User.minimum(:id) - 1 if id == -100 user = User.create!( id: id, username: UserNameSuggester.suggest("discourse_ai_spam"), name: "Discourse AI Spam Scanner", email: "#{SecureRandom.hex(10)}@invalid.invalid", active: true, approved: true, trust_level: TrustLevel[4], admin: true, ) Group.user_trust_level_change!(user.id, user.trust_level) SiteSetting.ai_spam_detection_user_id = user.id end end end def self.flagging_user user = nil if SiteSetting.ai_spam_detection_user_id.present? user = User.find_by(id: SiteSetting.ai_spam_detection_user_id) ensure_safe_flagging_user!(user) end user || Discourse.system_user end def self.ensure_safe_flagging_user!(user) # only do repair on bot users, if somehow it is set to a human skip repairs return if !user.bot? user.update!(silenced_till: nil) if user.silenced? user.update!(trust_level: TrustLevel[4]) if user.trust_level != TrustLevel[4] user.update!(suspended_till: nil, suspended_at: nil) if user.suspended? user.update!(active: true) if !user.active? end def self.after_cooked_post(post) return if !enabled? return if !should_scan_post?(post) return if !post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD] return if post.updated_at < MAX_AGE_TO_SCAN.ago last_scan = AiSpamLog.where(post_id: post.id).order(created_at: :desc).first if last_scan && last_scan.created_at > EDIT_DELAY_MINUTES.minutes.ago delay_minutes = ((last_scan.created_at + EDIT_DELAY_MINUTES.minutes) - Time.current).to_i / 60 Jobs.enqueue_in(delay_minutes.minutes, :ai_spam_scan, post_id: post.id) else Jobs.enqueue(:ai_spam_scan, post_id: post.id) end end def self.edited_post(post) return if !enabled? return if !should_scan_post?(post) return if scanned_max_times?(post) previous_version = post.revisions.last&.modifications&.dig("raw", 0) current_version = post.raw return if !significant_change?(previous_version, current_version) flag_post_for_scanning(post) end def self.flag_post_for_scanning(post) post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD] = "true" post.save_custom_fields end def self.enabled? SiteSetting.ai_spam_detection_enabled && SiteSetting.discourse_ai_enabled end def self.should_scan_post?(post) return false if !post.present? return false if post.user.trust_level > TrustLevel[1] return false if post.topic.private_message? return false if post.user.bot? return false if post.user.staff? if Post .where(user_id: post.user_id) .joins(:topic) .where(topic: { archetype: Archetype.default }) .limit(4) .count > 3 return false end true end def self.scanned_max_times?(post) AiSpamLog.where(post_id: post.id).count >= 3 end def self.significant_change?(previous_version, current_version) return true if previous_version.nil? # First edit should be scanned # Use Discourse's built-in levenshtein implementation distance = ScreenedEmail.levenshtein(previous_version.to_s[0...1000], current_version.to_s[0...1000]) distance >= MINIMUM_EDIT_DIFFERENCE end def self.test_post(post, custom_instructions: nil, llm_id: nil) settings = AiModerationSetting.spam custom_instructions = custom_instructions || settings.custom_instructions.presence target_msg = build_target_content_msg( post, post.topic || Topic.with_deleted.find_by(id: post.topic_id), ) custom_insts = custom_instructions || settings.custom_instructions.presence if custom_insts.present? custom_insts = "\n\nAdditional site-specific instructions provided by Staff:\n#{custom_insts}" end ctx = build_bot_context( feature_name: "spam_detection_test", messages: [target_msg], custom_instructions: custom_insts, ) bot = build_scanner_bot(settings: settings, llm_id: llm_id) structured_output = nil llm_args = { feature_context: { post_id: post.id } } bot.reply(ctx, llm_args: llm_args) do |partial, _, type| structured_output = partial if type == :structured_output end history = nil AiSpamLog .where(post: post) .order(:created_at) .limit(100) .each do |log| history ||= +"Scan History:\n" history << "date: #{log.created_at} is_spam: #{log.is_spam}\n" end log = +"Scanning #{post.url}\n\n" if history log << history log << "\n" end used_llm = bot.model log << "LLM: #{used_llm.name}\n\n" spam_persona = bot.persona used_prompt = spam_persona.craft_prompt(ctx, llm: used_llm).system_message_text log << "System Prompt: #{used_prompt}\n\n" text_content = if target_msg[:content].is_a?(Array) target_msg[:content].first else target_msg[:content] end log << "Context: #{text_content}\n\n" is_spam = is_spam?(structured_output) reasoning_insts = { type: :user, content: "Don't return a JSON this time. Explain your reasoning in plain text.", } ctx.messages = [ target_msg, { type: :model, content: { spam: is_spam }.to_json }, reasoning_insts, ] ctx.bypass_response_format = true reasoning = +"" bot.reply(ctx, llm_args: llm_args.merge(max_tokens: 100)) do |partial, _, type| reasoning << partial if type.blank? end log << "#{reasoning.strip}" { is_spam: is_spam, log: log } end def self.perform_scan(post) return if !should_scan_post?(post) perform_scan!(post) end def self.perform_scan!(post) return if !enabled? settings = AiModerationSetting.spam return if !settings || !settings.llm_model || !settings.ai_persona target_msg = build_target_content_msg(post) custom_instructions = settings.custom_instructions.presence if custom_instructions.present? custom_instructions = "\n\nAdditional site-specific instructions provided by Staff:\n#{custom_instructions}" end ctx = build_bot_context( messages: [target_msg], custom_instructions: custom_instructions, user: self.flagging_user, ) bot = build_scanner_bot(settings: settings, user: self.flagging_user) structured_output = nil begin llm_args = { feature_context: { post_id: post.id } } bot.reply(ctx, llm_args: llm_args) do |partial, _, type| structured_output = partial if type == :structured_output end is_spam = is_spam?(structured_output) log = AiApiAuditLog.order(id: :desc).where(feature_name: "spam_detection").first text_content = if target_msg[:content].is_a?(Array) target_msg[:content].first else target_msg[:content] end AiSpamLog.transaction do log = AiSpamLog.create!( post: post, llm_model: settings.llm_model, ai_api_audit_log: log, is_spam: is_spam, payload: text_content, ) handle_spam(post, log) if is_spam end rescue StandardError => e # we need retries otherwise stuff will not be handled Discourse.warn_exception( e, message: "Discourse AI: Error in SpamScanner for post #{post.id}", ) raise e end end def self.fix_spam_scanner_not_admin user = DiscourseAi::AiModeration::SpamScanner.flagging_user if user.present? user.update!(admin: true) else raise Discourse::NotFound end end private def self.build_bot_context( feature_name: "spam_detection", messages:, custom_instructions: nil, bypass_response_format: false, user: Discourse.system_user ) DiscourseAi::Personas::BotContext .new( user: user, skip_tool_details: true, feature_name: feature_name, messages: messages, bypass_response_format: bypass_response_format, ) .tap { |ctx| ctx.custom_instructions = custom_instructions if custom_instructions } end def self.build_scanner_bot( settings:, use_structured_output: true, llm_id: nil, user: Discourse.system_user ) persona = settings.ai_persona.class_instance&.new llm_model = llm_id ? LlmModel.find(llm_id) : settings.llm_model DiscourseAi::Personas::Bot.as(user, persona: persona, model: llm_model) end def self.is_spam?(structured_output) structured_output.present? && structured_output.read_buffered_property(:spam) end def self.build_target_content_msg(post, topic = nil) topic ||= post.topic context = [] # Clear distinction between reply and new topic if post.is_first_post? context << "NEW TOPIC POST ANALYSIS" context << "- Topic title: #{topic.title}" context << "- Category: #{topic.category&.name}" else context << "REPLY POST ANALYSIS" context << "- In topic: #{topic.title}" context << "- Category: #{topic.category&.name}" context << "- Topic started by: #{topic.user&.username}" if post.reply_to_post_number.present? parent = Post.with_deleted.find_by(topic_id: topic.id, post_number: post.reply_to_post_number) if parent context << "\nReplying to #{parent.user&.username}'s post:" context << "#{parent.raw[0..500]}..." if parent.raw.length > 500 context << parent.raw if parent.raw.length <= 500 end end end context << "\nPost Author Information:" if user = post.user # during test we may not have a user context << "- Username: #{user.username}\n" context << "- Email: #{user.email}\n" context << "- Account age: #{(Time.current - user.created_at).to_i / 86_400} days\n" context << "- Total posts: #{user.post_count}\n" context << "- Trust level: #{user.trust_level}\n" if info = location_info(user) context << "- Registration Location: #{info[:registration]}\n" if info[:registration] context << "- Last Location: #{info[:last]}\n" if info[:last] end end context << "\nPost Content (first #{MAX_RAW_SCAN_LENGTH} chars):\n" context << post.raw[0..MAX_RAW_SCAN_LENGTH] user_msg = { type: :user, content: context.join("\n") } upload_ids = post.upload_ids if upload_ids.present? user_msg[:content] = [user_msg[:content]] upload_ids.take(3).each { |upload_id| user_msg[:content] << { upload_id: upload_id } } end user_msg end def self.location_info(user) registration, last = nil if user.ip_address.present? info = DiscourseIpInfo.get(user.ip_address, resolve_hostname: true) last = "#{info[:location]} (#{info[:organization]})" if info && info[:location].present? end if user.registration_ip_address.present? info = DiscourseIpInfo.get(user.registration_ip_address, resolve_hostname: true) registration = "#{info[:location]} (#{info[:organization]})" if info && info[:location].present? end rval = nil if registration || last rval = { registration: registration } if registration if last && last != registration rval ||= {} rval[:last] = last end end rval rescue => e Discourse.warn_exception(e, message: "Failed to lookup location info") nil end def self.handle_spam(post, log) url = "#{Discourse.base_url}/admin/plugins/discourse-ai/ai-spam" reason = I18n.t("discourse_ai.spam_detection.flag_reason", url: url) flagging_user = self.flagging_user result = PostActionCreator.new( flagging_user, post, PostActionType.types[:spam], reason: reason, queue_for_review: true, ).perform # Currently in core re-flagging something that is already flagged as spam # is not supported, long term we may want to support this but in the meantime # we should not be silencing/hiding if the PostActionCreator fails. if result.success? log.update!(reviewable: result.reviewable) reason = I18n.t("discourse_ai.spam_detection.silence_reason", url: url) silencer = UserSilencer.new( post.user, flagging_user, message: :too_many_spam_flags, post_id: post.id, reason: reason, keep_posts: true, ) silencer.silence # silencer will not hide tl1 posts, so we do this here hide_post(post) else log.update!( error: "unable to flag post as spam, post action failed for post #{post.id} with error: '#{result.errors.full_messages.join(", ").truncate(3000)}'", ) end end def self.hide_post(post) Post.where(id: post.id).update_all( [ "hidden = true, hidden_reason_id = COALESCE(hidden_reason_id, ?)", Post.hidden_reasons[:new_user_spam_threshold_reached], ], ) Topic.where(id: post.topic_id).update_all(visible: false) if post.post_number == 1 end end end end