discourse-ai/lib/ai_moderation/spam_scanner.rb

# frozen_string_literal: true

module DiscourseAi
  module AiModeration
    class SpamScanner
      POSTS_TO_SCAN = 3
      MINIMUM_EDIT_DIFFERENCE = 10
      EDIT_DELAY_MINUTES = 10
      MAX_AGE_TO_SCAN = 1.day
      MAX_RAW_SCAN_LENGTH = 5000

      SHOULD_SCAN_POST_CUSTOM_FIELD = "discourse_ai_should_scan_post"

      def self.new_post(post)
        return if !enabled?
        return if !should_scan_post?(post)

        flag_post_for_scanning(post)
      end

      def self.ensure_flagging_user!
        if !SiteSetting.ai_spam_detection_user_id.present?
          User.transaction do
            # prefer a "high" id for this bot
            id = User.where("id > -20").minimum(:id) - 1
            id = User.minimum(:id) - 1 if id == -100

            user =
              User.create!(
                id: id,
                username: UserNameSuggester.suggest("discourse_ai_spam"),
                name: "Discourse AI Spam Scanner",
                email: "#{SecureRandom.hex(10)}@invalid.invalid",
                active: true,
                approved: true,
                trust_level: TrustLevel[4],
                admin: true,
              )
            Group.user_trust_level_change!(user.id, user.trust_level)

            SiteSetting.ai_spam_detection_user_id = user.id
          end
        end
      end

      def self.flagging_user
        user = nil
        if SiteSetting.ai_spam_detection_user_id.present?
          user = User.find_by(id: SiteSetting.ai_spam_detection_user_id)
          ensure_safe_flagging_user!(user)
        end
        user || Discourse.system_user
      end

      def self.ensure_safe_flagging_user!(user)
        # only do repair on bot users, if somehow it is set to a human skip repairs
        return if !user.bot?
        user.update!(silenced_till: nil) if user.silenced?
        user.update!(trust_level: TrustLevel[4]) if user.trust_level != TrustLevel[4]
        user.update!(suspended_till: nil, suspended_at: nil) if user.suspended?
        user.update!(active: true) if !user.active?
      end

      def self.after_cooked_post(post)
        return if !enabled?
        return if !should_scan_post?(post)
        return if !post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD]
        return if post.updated_at < MAX_AGE_TO_SCAN.ago

        last_scan = AiSpamLog.where(post_id: post.id).order(created_at: :desc).first

        if last_scan && last_scan.created_at > EDIT_DELAY_MINUTES.minutes.ago
          delay_minutes =
            ((last_scan.created_at + EDIT_DELAY_MINUTES.minutes) - Time.current).to_i / 60
          Jobs.enqueue_in(delay_minutes.minutes, :ai_spam_scan, post_id: post.id)
        else
          Jobs.enqueue(:ai_spam_scan, post_id: post.id)
        end
      end

      def self.edited_post(post)
        return if !enabled?
        return if !should_scan_post?(post)
        return if scanned_max_times?(post)

        previous_version = post.revisions.last&.modifications&.dig("raw", 0)
        current_version = post.raw

        return if !significant_change?(previous_version, current_version)

        flag_post_for_scanning(post)
      end

      def self.flag_post_for_scanning(post)
        post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD] = "true"
        post.save_custom_fields
      end

      def self.enabled?
        SiteSetting.ai_spam_detection_enabled && SiteSetting.discourse_ai_enabled
      end

      def self.should_scan_post?(post)
        return false if !post.present?
        return false if post.user.trust_level > TrustLevel[1]
        return false if post.topic.private_message?
        return false if post.user.bot?
        return false if post.user.staff?

        if Post
             .where(user_id: post.user_id)
             .joins(:topic)
             .where(topic: { archetype: Archetype.default })
             .limit(4)
             .count > 3
          return false
        end
        true
      end

      def self.scanned_max_times?(post)
        AiSpamLog.where(post_id: post.id).count >= 3
      end

      def self.significant_change?(previous_version, current_version)
        return true if previous_version.nil? # First edit should be scanned

        # Use Discourse's built-in levenshtein implementation
        distance =
          ScreenedEmail.levenshtein(previous_version.to_s[0...1000], current_version.to_s[0...1000])

        distance >= MINIMUM_EDIT_DIFFERENCE
      end

      def self.test_post(post, custom_instructions: nil, llm_id: nil)
        settings = AiModerationSetting.spam
        llm_model = llm_id ? LlmModel.find(llm_id) : settings.llm_model
        llm = llm_model.to_llm
        custom_instructions = custom_instructions || settings.custom_instructions.presence
        context = build_context(post, post.topic || Topic.with_deleted.find_by(id: post.topic_id))
        prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)

        result =
          llm.generate(
            prompt,
            temperature: 0.1,
            max_tokens: 5,
            user: Discourse.system_user,
            feature_name: "spam_detection_test",
            feature_context: {
              post_id: post.id,
            },
          )&.strip

        history = nil
        AiSpamLog
          .where(post: post)
          .order(:created_at)
          .limit(100)
          .each do |log|
            history ||= +"Scan History:\n"
            history << "date: #{log.created_at} is_spam: #{log.is_spam}\n"
          end

        log = +"Scanning #{post.url}\n\n"

        if history
          log << history
          log << "\n"
        end

        log << "LLM: #{llm_model.name}\n\n"
        log << "System Prompt: #{build_system_prompt(custom_instructions)}\n\n"
        log << "Context: #{context}\n\n"

        is_spam = check_if_spam(result)

        prompt.push(type: :model, content: result)
        prompt.push(type: :user, content: "Explain your reasoning")

        reasoning =
          llm.generate(
            prompt,
            temperature: 0.1,
            max_tokens: 100,
            user: Discourse.system_user,
            feature_name: "spam_detection_test",
            feature_context: {
              post_id: post.id,
            },
          )&.strip

        log << "#{reasoning}"

        { is_spam: is_spam, log: log }
      end

      def self.completion_prompt(post, context:, custom_instructions:)
        system_prompt = build_system_prompt(custom_instructions)
        prompt = DiscourseAi::Completions::Prompt.new(system_prompt)
        args = { type: :user, content: context }
        upload_ids = post.upload_ids
        if upload_ids.present?
          args[:content] = [args[:content]]
          upload_ids.take(3).each { |upload_id| args[:content] << { upload_id: upload_id } }
        end
        prompt.push(**args)
        prompt
      end

      def self.perform_scan(post)
        return if !should_scan_post?(post)

        perform_scan!(post)
      end

      def self.perform_scan!(post)
        return if !enabled?
        settings = AiModerationSetting.spam
        return if !settings || !settings.llm_model

        context = build_context(post)
        llm = settings.llm_model.to_llm
        custom_instructions = settings.custom_instructions.presence
        prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)

        begin
          result =
            llm.generate(
              prompt,
              temperature: 0.1,
              max_tokens: 5,
              user: Discourse.system_user,
              feature_name: "spam_detection",
              feature_context: {
                post_id: post.id,
              },
            )&.strip

          is_spam = check_if_spam(result)

          log = AiApiAuditLog.order(id: :desc).where(feature_name: "spam_detection").first
          AiSpamLog.transaction do
            log =
              AiSpamLog.create!(
                post: post,
                llm_model: settings.llm_model,
                ai_api_audit_log: log,
                is_spam: is_spam,
                payload: context,
              )
            handle_spam(post, log) if is_spam
          end
        rescue StandardError => e
          # we need retries otherwise stuff will not be handled
          Discourse.warn_exception(
            e,
            message: "Discourse AI: Error in SpamScanner for post #{post.id}",
          )
          raise e
        end
      end

      def self.fix_spam_scanner_not_admin
        user = DiscourseAi::AiModeration::SpamScanner.flagging_user

        if user.present?
          user.update!(admin: true)
        else
          raise Discourse::NotFound
        end
      end

      private

      def self.check_if_spam(result)
        (result.present? && result.strip.downcase.start_with?("spam"))
      end

      def self.build_context(post, topic = nil)
        topic ||= post.topic
        context = []

        # Clear distinction between reply and new topic
        if post.is_first_post?
          context << "NEW TOPIC POST ANALYSIS"
          context << "- Topic title: #{topic.title}"
          context << "- Category: #{topic.category&.name}"
        else
          context << "REPLY POST ANALYSIS"
          context << "- In topic: #{topic.title}"
          context << "- Category: #{topic.category&.name}"
          context << "- Topic started by: #{topic.user&.username}"

          if post.reply_to_post_number.present?
            parent =
              Post.with_deleted.find_by(topic_id: topic.id, post_number: post.reply_to_post_number)
            if parent
              context << "\nReplying to #{parent.user&.username}'s post:"
              context << "#{parent.raw[0..500]}..." if parent.raw.length > 500
              context << parent.raw if parent.raw.length <= 500
            end
          end
        end

        context << "\nPost Author Information:"
        if user = post.user # during test we may not have a user
          context << "- Username: #{user.username}\n"
          context << "- Email: #{user.email}\n"
          context << "- Account age: #{(Time.current - user.created_at).to_i / 86_400} days\n"
          context << "- Total posts: #{user.post_count}\n"
          context << "- Trust level: #{user.trust_level}\n"
          if info = location_info(user)
            context << "- Registration Location: #{info[:registration]}\n" if info[:registration]
            context << "- Last Location: #{info[:last]}\n" if info[:last]
          end
        end

        context << "\nPost Content (first #{MAX_RAW_SCAN_LENGTH} chars):\n"
        context << post.raw[0..MAX_RAW_SCAN_LENGTH]
        context.join("\n")
      end

      def self.location_info(user)
        registration, last = nil
        if user.ip_address.present?
          info = DiscourseIpInfo.get(user.ip_address, resolve_hostname: true)
          last = "#{info[:location]} (#{info[:organization]})" if info && info[:location].present?
        end
        if user.registration_ip_address.present?
          info = DiscourseIpInfo.get(user.registration_ip_address, resolve_hostname: true)
          registration = "#{info[:location]} (#{info[:organization]})" if info &&
            info[:location].present?
        end

        rval = nil
        if registration || last
          rval = { registration: registration } if registration
          if last && last != registration
            rval ||= {}
            rval[:last] = last
          end
        end

        rval
      rescue => e
        Discourse.warn_exception(e, message: "Failed to lookup location info")
        nil
      end

      def self.build_system_prompt(custom_instructions)
        base_prompt = +<<~PROMPT
          You are a spam detection system. Analyze the following post content and context.
          Respond with "SPAM" if the post is spam, or "NOT_SPAM" if it's legitimate.

          - ALWAYS lead your reply with the word SPAM or NOT_SPAM - you are consumed via an API

          Consider the post type carefully:
          - For REPLY posts: Check if the response is relevant and topical to the thread
          - For NEW TOPIC posts: Check if it's a legitimate topic or spam promotion

          A post is spam if it matches any of these criteria:
          - Contains unsolicited commercial content or promotions
          - Has suspicious or unrelated external links
          - Shows patterns of automated/bot posting
          - Contains irrelevant content or advertisements
          - For replies: Completely unrelated to the discussion thread
          - Uses excessive keywords or repetitive text patterns
          - Shows suspicious formatting or character usage

          Be especially strict with:
          - Replies that ignore the previous conversation
          - Posts containing multiple unrelated external links
          - Generic responses that could be posted anywhere

          Be fair to:
          - New users making legitimate first contributions
          - Non-native speakers making genuine efforts to participate
          - Topic-relevant product mentions in appropriate contexts
        PROMPT

        base_prompt << "\n\n"
        base_prompt << <<~SITE_SPECIFIC
          Site Specific Information:
          - Site name: #{SiteSetting.title}
          - Site URL: #{Discourse.base_url}
          - Site description: #{SiteSetting.site_description}
          - Site top 10 categories: #{Category.where(read_restricted: false).order(posts_year: :desc).limit(10).pluck(:name).join(", ")}
        SITE_SPECIFIC

        if custom_instructions.present?
          base_prompt << "\n\nAdditional site-specific instructions provided by Staff:\n#{custom_instructions}"
        end

        base_prompt
      end

      def self.handle_spam(post, log)
        url = "#{Discourse.base_url}/admin/plugins/discourse-ai/ai-spam"
        reason = I18n.t("discourse_ai.spam_detection.flag_reason", url: url)

        flagging_user = self.flagging_user

        result =
          PostActionCreator.new(
            flagging_user,
            post,
            PostActionType.types[:spam],
            reason: reason,
            queue_for_review: true,
          ).perform

        # Currently in core re-flagging something that is already flagged as spam
        # is not supported, long term we may want to support this but in the meantime
        # we should not be silencing/hiding if the PostActionCreator fails.
        if result.success?
          log.update!(reviewable: result.reviewable)

          reason = I18n.t("discourse_ai.spam_detection.silence_reason", url: url)
          silencer =
            UserSilencer.new(
              post.user,
              flagging_user,
              message: :too_many_spam_flags,
              post_id: post.id,
              reason: reason,
              keep_posts: true,
            )
          silencer.silence

          # silencer will not hide tl1 posts, so we do this here
          hide_post(post)
        else
          log.update!(
            error:
              "unable to flag post as spam, post action failed for post #{post.id} with error: '#{result.errors.full_messages.join(", ").truncate(3000)}'",
          )
        end
      end

      def self.hide_post(post)
        Post.where(id: post.id).update_all(
          [
            "hidden = true, hidden_reason_id = COALESCE(hidden_reason_id, ?)",
            Post.hidden_reasons[:new_user_spam_threshold_reached],
          ],
        )

        Topic.where(id: post.topic_id).update_all(visible: false) if post.post_number == 1
      end
    end
  end
end