FEATURE: Use Persona's when scanning posts for spam (#1465)

2025-07-22 22:13:27 +00:00 · 2025-06-27 10:35:47 -03:00 · 2025-06-27 10:35:47 -03:00 · b35f9bcc7c
commit b35f9bcc7c
parent cc4e9e030f
17 changed files with 375 additions and 143 deletions
--- a/app/controllers/discourse_ai/admin/ai_spam_controller.rb
+++ b/app/controllers/discourse_ai/admin/ai_spam_controller.rb
@ -11,6 +11,13 @@ module DiscourseAi

      def update
        initial_settings = AiModerationSetting.spam
+
+        initial_data = {
+          custom_instructions: initial_settings&.data&.dig("custom_instructions"),
+          llm_model_id: initial_settings&.llm_model_id,
+          ai_persona_id: initial_settings&.ai_persona_id,
+        }
+
        initial_custom_instructions = initial_settings&.data&.dig("custom_instructions")
        initial_llm_model_id = initial_settings&.llm_model_id

@ -29,6 +36,22 @@ module DiscourseAi
            )
          end
        end
+
+        if allowed_params.key?(:ai_persona_id)
+          updated_params[:ai_persona_id] = allowed_params[:ai_persona_id]
+          persona = AiPersona.find_by(id: allowed_params[:ai_persona_id])
+          if persona.nil? ||
+               persona.response_format.to_a.none? { |rf|
+                 rf["key"] == "spam" && rf["type"] == "boolean"
+               }
+            return(
+              render_json_error(
+                I18n.t("discourse_ai.llm.configuration.invalid_persona_response_format"),
+                status: 422,
+              )
+            )
+          end
+        end
        updated_params[:data] = {
          custom_instructions: allowed_params[:custom_instructions],
        } if allowed_params.key?(:custom_instructions)
@ -41,7 +64,7 @@ module DiscourseAi
            AiModerationSetting.create!(updated_params.merge(setting_type: :spam))
          end

-          log_ai_spam_update(initial_llm_model_id, initial_custom_instructions, allowed_params)
+          log_ai_spam_update(initial_data, allowed_params)
        end

        is_enabled = ActiveModel::Type::Boolean.new.cast(allowed_params[:is_enabled])
@ -119,9 +142,10 @@ module DiscourseAi

      private

-      def log_ai_spam_update(initial_llm_model_id, initial_custom_instructions, params)
+      def log_ai_spam_update(initial_data, params)
        changes_to_log = {}

+        initial_llm_model_id = initial_data[:llm_model_id]
        if params.key?(:llm_model_id) && initial_llm_model_id.to_s != params[:llm_model_id].to_s
          old_model_name =
            LlmModel.find_by(id: initial_llm_model_id)&.display_name || initial_llm_model_id
@ -131,11 +155,22 @@ module DiscourseAi
          changes_to_log[:llm_model_id] = "#{old_model_name} → #{new_model_name}"
        end

+        initial_custom_instructions = initial_data[:custom_instructions]
        if params.key?(:custom_instructions) &&
             initial_custom_instructions != params[:custom_instructions]
          changes_to_log[:custom_instructions] = params[:custom_instructions]
        end

+        initial_ai_persona_id = initial_data[:ai_persona_id]
+        if params.key?(:ai_persona_id) && initial_ai_persona_id.to_s != params[:ai_persona_id].to_s
+          old_persona_name =
+            AiPersona.find_by(id: initial_ai_persona_id)&.name || initial_ai_persona_id
+          new_persona_name =
+            AiPersona.find_by(id: params[:ai_persona_id])&.name || params[:ai_persona_id]
+
+          changes_to_log[:ai_persona_id] = "#{old_persona_name} → #{new_persona_name}"
+        end
+
        if changes_to_log.present?
          changes_to_log[:subject] = I18n.t("discourse_ai.spam_detection.logging_subject")
          logger = DiscourseAi::Utils::AiStaffActionLogger.new(current_user)
@ -144,7 +179,7 @@ module DiscourseAi
      end

      def allowed_params
-        params.permit(:is_enabled, :llm_model_id, :custom_instructions)
+        params.permit(:is_enabled, :llm_model_id, :custom_instructions, :ai_persona_id)
      end

      def spam_config
--- a/app/models/ai_moderation_setting.rb
+++ b/app/models/ai_moderation_setting.rb
@ -1,6 +1,7 @@
 # frozen_string_literal: true
 class AiModerationSetting < ActiveRecord::Base
  belongs_to :llm_model
+  belongs_to :ai_persona

  validates :llm_model_id, presence: true
  validates :setting_type, presence: true
@ -19,12 +20,13 @@ end
 #
 # Table name: ai_moderation_settings
 #
-#  id           :bigint           not null, primary key
-#  setting_type :enum             not null
-#  data         :jsonb
-#  llm_model_id :bigint           not null
-#  created_at   :datetime         not null
-#  updated_at   :datetime         not null
+#  id            :bigint           not null, primary key
+#  setting_type  :enum             not null
+#  data          :jsonb
+#  llm_model_id  :bigint           not null
+#  created_at    :datetime         not null
+#  updated_at    :datetime         not null
+#  ai_persona_id :bigint           default(-31), not null
 #
 # Indexes
 #
--- a/app/serializers/ai_spam_serializer.rb
+++ b/app/serializers/ai_spam_serializer.rb
@ -8,7 +8,9 @@ class AiSpamSerializer < ApplicationSerializer
             :stats,
             :flagging_username,
             :spam_score_type,
-             :spam_scanning_user
+             :spam_scanning_user,
+             :ai_persona_id,
+             :available_personas

  def is_enabled
    object[:enabled]
@ -18,6 +20,11 @@ class AiSpamSerializer < ApplicationSerializer
    settings&.llm_model&.id
  end

+  def ai_persona_id
+    settings&.ai_persona&.id ||
+      DiscourseAi::Personas::Persona.system_personas[DiscourseAi::Personas::SpamDetector]
+  end
+
  def custom_instructions
    settings&.custom_instructions
  end
@ -28,6 +35,12 @@ class AiSpamSerializer < ApplicationSerializer
      .map { |hash| { id: hash[:value], name: hash[:name] } }
  end

+  def available_personas
+    DiscourseAi::Configuration::PersonaEnumerator.values.map do |h|
+      { id: h[:value], name: h[:name] }
+    end
+  end
+
  def flagging_username
    object[:flagging_username]
  end
--- a/assets/javascripts/discourse/components/ai-spam.gjs
+++ b/assets/javascripts/discourse/components/ai-spam.gjs
@ -35,6 +35,7 @@ export default class AiSpam extends Component {
  };
  @tracked isEnabled = false;
  @tracked selectedLLM = null;
+  @tracked selectedPersonaId = null;
  @tracked customInstructions = "";
  @tracked errors = [];

@ -98,6 +99,7 @@ export default class AiSpam extends Component {
    }
    this.customInstructions = model.custom_instructions;
    this.stats = model.stats;
+    this.selectedPersonaId = model.ai_persona_id;
  }

  get availableLLMs() {
@ -133,6 +135,11 @@ export default class AiSpam extends Component {
    this.selectedLLM = value;
  }

+  @action
+  async updatePersona(value) {
+    this.selectedPersonaId = value;
+  }
+
  @action
  async save() {
    try {
@ -141,6 +148,7 @@ export default class AiSpam extends Component {
        data: {
          llm_model_id: this.llmId,
          custom_instructions: this.customInstructions,
+          ai_persona_id: this.selectedPersonaId,
        },
      });
      this.toasts.success({
@ -256,6 +264,18 @@ export default class AiSpam extends Component {
          {{/if}}
        </div>

+        <div class="ai-spam__persona">
+          <label class="ai-spam__persona-label">{{i18n
+              "discourse_ai.spam.select_persona"
+            }}</label>
+          <ComboBox
+            @value={{this.selectedPersonaId}}
+            @content={{@model.available_personas}}
+            @onChange={{this.updatePersona}}
+            class="ai-spam__persona-selector"
+          />
+        </div>
+
        <div class="ai-spam__instructions">
          <label class="ai-spam__instructions-label">
            {{i18n "discourse_ai.spam.custom_instructions"}}
--- a/assets/stylesheets/modules/llms/common/spam.scss
+++ b/assets/stylesheets/modules/llms/common/spam.scss
@ -24,12 +24,14 @@

  &__toggle,
  &__llm,
+  &__persona,
  &__instructions {
    margin-bottom: 1em;
  }

  &__toggle-label,
  &__llm-label,
+  &__persona-label,
  &__instructions-label {
    display: block;
    margin-bottom: 0.5em;
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -260,6 +260,7 @@ en:
        short_title: "Spam"
        title: "Configure spam handling"
        select_llm: "Select LLM"
+        select_persona: "Select persona"
        custom_instructions: "Custom instructions"
        custom_instructions_help: "Custom instructions specific to your site to help guide the AI in identifying spam, e.g. 'Be more aggressive about scanning posts not in English'."
        last_seven_days: "Last 7 days"
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@ -391,6 +391,9 @@ en:
        short_text_translator:
          name: "Short text translator"
          description: "Powers the translation feature by as a generic text translator, used for short texts like category names or tags"
+        spam_detector:
+          name: "Spam detector"
+          description: "Default persona powering our Spam detection feature"

      topic_not_found: "Summary unavailable, topic not found!"
      summarizing: "Summarizing topic"
@ -577,6 +580,7 @@ en:
        set_llm_first: "Set %{setting} first"
        model_unreachable: "We couldn't get a response from this model. Check your settings first."
        invalid_seeded_model: "You can't use this model with this feature"
+        invalid_persona_response_format: "The selected persona must have a response format with a boolean field names \"spam\""
        must_select_model: "You must select a LLM first"
      endpoints:
        not_configured: "%{display_name} (not configured)"
--- a/db/migrate/20250619105705_add_persona_to_ai_moderation_settings.rb
+++ b/db/migrate/20250619105705_add_persona_to_ai_moderation_settings.rb
@ -0,0 +1,6 @@
+# frozen_string_literal: true
+class AddPersonaToAiModerationSettings < ActiveRecord::Migration[7.2]
+  def change
+    add_column :ai_moderation_settings, :ai_persona_id, :bigint, null: false, default: -31
+  end
+end
--- a/lib/ai_moderation/spam_scanner.rb
+++ b/lib/ai_moderation/spam_scanner.rb
@ -134,23 +134,32 @@ module DiscourseAi

      def self.test_post(post, custom_instructions: nil, llm_id: nil)
        settings = AiModerationSetting.spam
-        llm_model = llm_id ? LlmModel.find(llm_id) : settings.llm_model
-        llm = llm_model.to_llm
        custom_instructions = custom_instructions || settings.custom_instructions.presence
-        context = build_context(post, post.topic || Topic.with_deleted.find_by(id: post.topic_id))
-        prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)

-        result =
-          llm.generate(
-            prompt,
-            temperature: 0.1,
-            max_tokens: 5,
-            user: Discourse.system_user,
+        target_msg =
+          build_target_content_msg(
+            post,
+            post.topic || Topic.with_deleted.find_by(id: post.topic_id),
+          )
+        custom_insts = custom_instructions || settings.custom_instructions.presence
+        if custom_insts.present?
+          custom_insts =
+            "\n\nAdditional site-specific instructions provided by Staff:\n#{custom_insts}"
+        end
+
+        ctx =
+          build_bot_context(
            feature_name: "spam_detection_test",
-            feature_context: {
-              post_id: post.id,
-            },
-          )&.strip
+            messages: [target_msg],
+            custom_instructions: custom_insts,
+          )
+        bot = build_scanner_bot(settings: settings, llm_id: llm_id)
+
+        structured_output = nil
+        llm_args = { feature_context: { post_id: post.id } }
+        bot.reply(ctx, llm_args: llm_args) do |partial, _, type|
+          structured_output = partial if type == :structured_output
+        end

        history = nil
        AiSpamLog
@ -169,45 +178,46 @@ module DiscourseAi
          log << "\n"
        end

-        log << "LLM: #{llm_model.name}\n\n"
-        log << "System Prompt: #{build_system_prompt(custom_instructions)}\n\n"
-        log << "Context: #{context}\n\n"
+        used_llm = bot.model
+        log << "LLM: #{used_llm.name}\n\n"

-        is_spam = check_if_spam(result)
+        spam_persona = bot.persona
+        used_prompt = spam_persona.craft_prompt(ctx, llm: used_llm).system_message_text
+        log << "System Prompt: #{used_prompt}\n\n"

-        prompt.push(type: :model, content: result)
-        prompt.push(type: :user, content: "Explain your reasoning")
+        text_content =
+          if target_msg[:content].is_a?(Array)
+            target_msg[:content].first
+          else
+            target_msg[:content]
+          end

-        reasoning =
-          llm.generate(
-            prompt,
-            temperature: 0.1,
-            max_tokens: 100,
-            user: Discourse.system_user,
-            feature_name: "spam_detection_test",
-            feature_context: {
-              post_id: post.id,
-            },
-          )&.strip
+        log << "Context: #{text_content}\n\n"

-        log << "#{reasoning}"
+        is_spam = is_spam?(structured_output)
+
+        reasoning_insts = {
+          type: :user,
+          content: "Don't return a JSON this time. Explain your reasoning in plain text.",
+        }
+        ctx.messages = [
+          target_msg,
+          { type: :model, content: { spam: is_spam }.to_json },
+          reasoning_insts,
+        ]
+        ctx.bypass_response_format = true
+
+        reasoning = +""
+
+        bot.reply(ctx, llm_args: llm_args.merge(max_tokens: 100)) do |partial, _, type|
+          reasoning << partial if type.blank?
+        end
+
+        log << "#{reasoning.strip}"

        { is_spam: is_spam, log: log }
      end

-      def self.completion_prompt(post, context:, custom_instructions:)
-        system_prompt = build_system_prompt(custom_instructions)
-        prompt = DiscourseAi::Completions::Prompt.new(system_prompt)
-        args = { type: :user, content: context }
-        upload_ids = post.upload_ids
-        if upload_ids.present?
-          args[:content] = [args[:content]]
-          upload_ids.take(3).each { |upload_id| args[:content] << { upload_id: upload_id } }
-        end
-        prompt.push(**args)
-        prompt
-      end
-
      def self.perform_scan(post)
        return if !should_scan_post?(post)

@ -217,29 +227,39 @@ module DiscourseAi
      def self.perform_scan!(post)
        return if !enabled?
        settings = AiModerationSetting.spam
-        return if !settings || !settings.llm_model
+        return if !settings || !settings.llm_model || !settings.ai_persona

-        context = build_context(post)
-        llm = settings.llm_model.to_llm
+        target_msg = build_target_content_msg(post)
        custom_instructions = settings.custom_instructions.presence
-        prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)
+        if custom_instructions.present?
+          custom_instructions =
+            "\n\nAdditional site-specific instructions provided by Staff:\n#{custom_instructions}"
+        end
+
+        ctx =
+          build_bot_context(
+            messages: [target_msg],
+            custom_instructions: custom_instructions,
+            user: self.flagging_user,
+          )
+        bot = build_scanner_bot(settings: settings, user: self.flagging_user)
+        structured_output = nil

        begin
-          result =
-            llm.generate(
-              prompt,
-              temperature: 0.1,
-              max_tokens: 5,
-              user: Discourse.system_user,
-              feature_name: "spam_detection",
-              feature_context: {
-                post_id: post.id,
-              },
-            )&.strip
+          llm_args = { feature_context: { post_id: post.id } }
+          bot.reply(ctx, llm_args: llm_args) do |partial, _, type|
+            structured_output = partial if type == :structured_output
+          end

-          is_spam = check_if_spam(result)
+          is_spam = is_spam?(structured_output)

          log = AiApiAuditLog.order(id: :desc).where(feature_name: "spam_detection").first
+          text_content =
+            if target_msg[:content].is_a?(Array)
+              target_msg[:content].first
+            else
+              target_msg[:content]
+            end
          AiSpamLog.transaction do
            log =
              AiSpamLog.create!(
@ -247,7 +267,7 @@ module DiscourseAi
                llm_model: settings.llm_model,
                ai_api_audit_log: log,
                is_spam: is_spam,
-                payload: context,
+                payload: text_content,
              )
            handle_spam(post, log) if is_spam
          end
@ -273,11 +293,42 @@ module DiscourseAi

      private

-      def self.check_if_spam(result)
-        (result.present? && result.strip.downcase.start_with?("spam"))
+      def self.build_bot_context(
+        feature_name: "spam_detection",
+        messages:,
+        custom_instructions: nil,
+        bypass_response_format: false,
+        user: Discourse.system_user
+      )
+        DiscourseAi::Personas::BotContext
+          .new(
+            user: user,
+            skip_tool_details: true,
+            feature_name: feature_name,
+            messages: messages,
+            bypass_response_format: bypass_response_format,
+          )
+          .tap { |ctx| ctx.custom_instructions = custom_instructions if custom_instructions }
      end

-      def self.build_context(post, topic = nil)
+      def self.build_scanner_bot(
+        settings:,
+        use_structured_output: true,
+        llm_id: nil,
+        user: Discourse.system_user
+      )
+        persona = settings.ai_persona.class_instance&.new
+
+        llm_model = llm_id ? LlmModel.find(llm_id) : settings.llm_model
+
+        DiscourseAi::Personas::Bot.as(user, persona: persona, model: llm_model)
+      end
+
+      def self.is_spam?(structured_output)
+        structured_output.present? && structured_output.read_buffered_property(:spam)
+      end
+
+      def self.build_target_content_msg(post, topic = nil)
        topic ||= post.topic
        context = []

@ -318,7 +369,16 @@ module DiscourseAi

        context << "\nPost Content (first #{MAX_RAW_SCAN_LENGTH} chars):\n"
        context << post.raw[0..MAX_RAW_SCAN_LENGTH]
-        context.join("\n")
+
+        user_msg = { type: :user, content: context.join("\n") }
+
+        upload_ids = post.upload_ids
+        if upload_ids.present?
+          user_msg[:content] = [user_msg[:content]]
+          upload_ids.take(3).each { |upload_id| user_msg[:content] << { upload_id: upload_id } }
+        end
+
+        user_msg
      end

      def self.location_info(user)
@ -348,53 +408,6 @@ module DiscourseAi
        nil
      end

-      def self.build_system_prompt(custom_instructions)
-        base_prompt = +<<~PROMPT
-          You are a spam detection system. Analyze the following post content and context.
-          Respond with "SPAM" if the post is spam, or "NOT_SPAM" if it's legitimate.
-
-          - ALWAYS lead your reply with the word SPAM or NOT_SPAM - you are consumed via an API
-
-          Consider the post type carefully:
-          - For REPLY posts: Check if the response is relevant and topical to the thread
-          - For NEW TOPIC posts: Check if it's a legitimate topic or spam promotion
-
-          A post is spam if it matches any of these criteria:
-          - Contains unsolicited commercial content or promotions
-          - Has suspicious or unrelated external links
-          - Shows patterns of automated/bot posting
-          - Contains irrelevant content or advertisements
-          - For replies: Completely unrelated to the discussion thread
-          - Uses excessive keywords or repetitive text patterns
-          - Shows suspicious formatting or character usage
-
-          Be especially strict with:
-          - Replies that ignore the previous conversation
-          - Posts containing multiple unrelated external links
-          - Generic responses that could be posted anywhere
-
-          Be fair to:
-          - New users making legitimate first contributions
-          - Non-native speakers making genuine efforts to participate
-          - Topic-relevant product mentions in appropriate contexts
-        PROMPT
-
-        base_prompt << "\n\n"
-        base_prompt << <<~SITE_SPECIFIC
-          Site Specific Information:
-          - Site name: #{SiteSetting.title}
-          - Site URL: #{Discourse.base_url}
-          - Site description: #{SiteSetting.site_description}
-          - Site top 10 categories: #{Category.where(read_restricted: false).order(posts_year: :desc).limit(10).pluck(:name).join(", ")}
-        SITE_SPECIFIC
-
-        if custom_instructions.present?
-          base_prompt << "\n\nAdditional site-specific instructions provided by Staff:\n#{custom_instructions}"
-        end
-
-        base_prompt
-      end
-
      def self.handle_spam(post, log)
        url = "#{Discourse.base_url}/admin/plugins/discourse-ai/ai-spam"
        reason = I18n.t("discourse_ai.spam_detection.flag_reason", url: url)
--- a/lib/completions/prompt.rb
+++ b/lib/completions/prompt.rb
@ -5,7 +5,7 @@ module DiscourseAi
    class Prompt
      INVALID_TURN = Class.new(StandardError)

-      attr_reader :messages, :tools
+      attr_reader :messages, :tools, :system_message_text
      attr_accessor :topic_id, :post_id, :max_pixels, :tool_choice

      def initialize(
@ -28,8 +28,10 @@ module DiscourseAi
        @messages = []

        if system_message_text
-          system_message = { type: :system, content: system_message_text }
-          @messages << system_message
+          @system_message_text = system_message_text
+          @messages << { type: :system, content: @system_message_text }
+        else
+          @system_message_text = messages.find { |m| m[:type] == :system }&.dig(:content)
        end

        @messages.concat(messages)
--- a/lib/completions/structured_output.rb
+++ b/lib/completions/structured_output.rb
@ -37,7 +37,7 @@ module DiscourseAi
        end

        # Maybe we haven't read that part of the JSON yet.
-        return nil if @tracked[prop_name].blank?
+        return nil if @tracked[prop_name].nil?

        # This means this property is a string and we want to return unread chunks.
        if @property_cursors[prop_name].present?
--- a/lib/personas/bot.rb
+++ b/lib/personas/bot.rb
@ -3,8 +3,6 @@
 module DiscourseAi
  module Personas
    class Bot
-      attr_reader :model
-
      BOT_NOT_FOUND = Class.new(StandardError)

      # the future is agentic, allow for more turns
@ -24,7 +22,7 @@ module DiscourseAi
          model || self.class.guess_model(bot_user) || LlmModel.find(@persona.class.default_llm_id)
      end

-      attr_reader :bot_user
+      attr_reader :bot_user, :model
      attr_accessor :persona

      def llm
@ -69,9 +67,10 @@ module DiscourseAi
        llm_kwargs[:user] = user
        llm_kwargs[:temperature] = persona.temperature if persona.temperature
        llm_kwargs[:top_p] = persona.top_p if persona.top_p
-        llm_kwargs[:response_format] = build_json_schema(
-          persona.response_format,
-        ) if persona.response_format.present?
+
+        if !context.bypass_response_format && persona.response_format.present?
+          llm_kwargs[:response_format] = build_json_schema(persona.response_format)
+        end

        needs_newlines = false
        tools_ran = 0
--- a/lib/personas/bot_context.rb
+++ b/lib/personas/bot_context.rb
@ -21,7 +21,8 @@ module DiscourseAi
                    :inferred_concepts,
                    :format_dates,
                    :temporal_context,
-                    :user_language
+                    :user_language,
+                    :bypass_response_format

      def initialize(
        post: nil,
@ -42,7 +43,8 @@ module DiscourseAi
        resource_url: nil,
        cancel_manager: nil,
        inferred_concepts: [],
-        format_dates: false
+        format_dates: false,
+        bypass_response_format: false
      )
        @participants = participants
        @user = user
@ -66,6 +68,8 @@ module DiscourseAi

        @cancel_manager = cancel_manager

+        @bypass_response_format = bypass_response_format
+
        if post
          @post_id = post.id
          @topic_id = post.topic_id
@ -93,6 +97,7 @@ module DiscourseAi
        inferred_concepts
        user_language
        temporal_context
+        top_categories
      ]

      def lookup_template_param(key)
@ -119,6 +124,16 @@ module DiscourseAi
        @private_message
      end

+      def top_categories
+        @top_categories ||=
+          Category
+            .where(read_restricted: false)
+            .order(posts_year: :desc)
+            .limit(10)
+            .pluck(:name)
+            .join(", ")
+      end
+
      def to_json
        {
          messages: @messages,
@ -142,6 +157,8 @@ module DiscourseAi
          inferred_concepts: @inferred_concepts,
          user_language: @user_language,
          temporal_context: @temporal_context,
+          top_categories: @top_categories,
+          bypass_response_format: @bypass_response_format,
        }
      end
    end
--- a/lib/personas/persona.rb
+++ b/lib/personas/persona.rb
@ -68,6 +68,7 @@ module DiscourseAi
            PostRawTranslator => -28,
            TopicTitleTranslator => -29,
            ShortTextTranslator => -30,
+            SpamDetector => -31,
          }
        end

--- a/lib/personas/spam_detector.rb
+++ b/lib/personas/spam_detector.rb
@ -0,0 +1,62 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module Personas
+    class SpamDetector < Persona
+      def self.default_enabled
+        false
+      end
+
+      def temperature
+        0.1
+      end
+
+      def system_prompt
+        <<~PROMPT
+          You are a spam detection system. Analyze the following post content and context.
+
+          Consider the post type carefully:
+          - For REPLY posts: Check if the response is relevant and topical to the thread
+          - For NEW TOPIC posts: Check if it's a legitimate topic or spam promotion
+
+          A post is spam if it matches any of these criteria:
+          - Contains unsolicited commercial content or promotions
+          - Has suspicious or unrelated external links
+          - Shows patterns of automated/bot posting
+          - Contains irrelevant content or advertisements
+          - For replies: Completely unrelated to the discussion thread
+          - Uses excessive keywords or repetitive text patterns
+          - Shows suspicious formatting or character usage
+
+          Be especially strict with:
+          - Replies that ignore the previous conversation
+          - Posts containing multiple unrelated external links
+          - Generic responses that could be posted anywhere
+
+          Be fair to:
+          - New users making legitimate first contributions
+          - Non-native speakers making genuine efforts to participate
+          - Topic-relevant product mentions in appropriate contexts
+
+          Site Specific Information:
+          - Site name: {site_title}
+          - Site URL: {site_url}
+          - Site description: {site_description}
+          - Site top 10 categories: {top_categories}
+
+          Format your response as a JSON object with a one key named "spam", which indicates if a post is spam or legitimate.
+          Your output should be in the following format:
+            <output>
+              {"spam": "xx"}
+            </output>
+
+          Where "xx" is true if the post is spam, or false if it's legitimate.
+        PROMPT
+      end
+
+      def response_format
+        [{ "key" => "spam", "type" => "boolean" }]
+      end
+    end
+  end
+end
--- a/spec/lib/modules/ai_moderation/spam_scanner_spec.rb
+++ b/spec/lib/modules/ai_moderation/spam_scanner_spec.rb
@ -248,7 +248,7 @@ RSpec.describe DiscourseAi::AiModeration::SpamScanner do
      prompts = nil
      result =
        DiscourseAi::Completions::Llm.with_prepared_responses(
-          ["spam", "the reason is just because"],
+          [true, "the reason is just because"],
        ) do |_, _, _prompts|
          prompts = _prompts
          described_class.test_post(post, custom_instructions: "123")
@ -261,7 +261,7 @@ RSpec.describe DiscourseAi::AiModeration::SpamScanner do

      result =
        DiscourseAi::Completions::Llm.with_prepared_responses(
-          ["not_spam", "the reason is just because"],
+          [false, "the reason is just because"],
        ) do |_, _, _prompts|
          prompts = _prompts
          described_class.test_post(post, custom_instructions: "123")
@ -284,7 +284,7 @@ RSpec.describe DiscourseAi::AiModeration::SpamScanner do
      described_class.new_post(post)

      prompt = nil
-      DiscourseAi::Completions::Llm.with_prepared_responses(["spam"]) do |_, _, _prompts|
+      DiscourseAi::Completions::Llm.with_prepared_responses([true]) do |_, _, _prompts|
        # force a rebake so we actually scan
        post.rebake!
        prompt = _prompts.first
@ -336,7 +336,7 @@ RSpec.describe DiscourseAi::AiModeration::SpamScanner do

      described_class.new_post(post)

-      DiscourseAi::Completions::Llm.with_prepared_responses(["spam"]) do |_, _, _prompts|
+      DiscourseAi::Completions::Llm.with_prepared_responses([true]) do |_, _, _prompts|
        # force a rebake so we actually scan
        post.rebake!
      end
@ -364,7 +364,7 @@ RSpec.describe DiscourseAi::AiModeration::SpamScanner do

    prompts = nil
    DiscourseAi::Completions::Llm.with_prepared_responses(
-      ["spam", "just because"],
+      [true, "just because"],
    ) do |_, _, _prompts|
      prompts = _prompts
      described_class.test_post(post)
--- a/spec/requests/admin/ai_spam_controller_spec.rb
+++ b/spec/requests/admin/ai_spam_controller_spec.rb
@ -16,12 +16,17 @@ RSpec.describe DiscourseAi::Admin::AiSpamController do
            params: {
              is_enabled: true,
              llm_model_id: llm_model.id,
+              ai_persona_id:
+                DiscourseAi::Personas::Persona.system_personas[DiscourseAi::Personas::SpamDetector],
              custom_instructions: "custom instructions",
            }

        expect(response.status).to eq(200)
        expect(SiteSetting.ai_spam_detection_enabled).to eq(true)
        expect(AiModerationSetting.spam.llm_model_id).to eq(llm_model.id)
+        expect(AiModerationSetting.spam.ai_persona_id).to eq(
+          DiscourseAi::Personas::Persona.system_personas[DiscourseAi::Personas::SpamDetector],
+        )
        expect(AiModerationSetting.spam.data["custom_instructions"]).to eq("custom instructions")
      end

@ -49,6 +54,33 @@ RSpec.describe DiscourseAi::Admin::AiSpamController do
        expect(response.status).to eq(200)
      end

+      it "validates the selected persona has a valid response format" do
+        ai_persona = Fabricate(:ai_persona, response_format: nil)
+
+        put "/admin/plugins/discourse-ai/ai-spam.json",
+            params: {
+              is_enabled: true,
+              llm_model_id: llm_model.id,
+              ai_persona_id: ai_persona.id,
+              custom_instructions: "custom instructions",
+            }
+
+        expect(response.status).to eq(422)
+
+        ai_persona.update!(response_format: [{ "key" => "spam", "type" => "boolean" }])
+
+        put "/admin/plugins/discourse-ai/ai-spam.json",
+            params: {
+              is_enabled: true,
+              llm_model_id: llm_model.id,
+              ai_persona_id: ai_persona.id,
+              custom_instructions: "custom instructions",
+            }
+
+        expect(response.status).to eq(200)
+        expect(AiModerationSetting.spam.ai_persona_id).to eq(ai_persona.id)
+      end
+
      it "ensures that seeded llm ID is properly passed and allowed" do
        seeded_llm = Fabricate(:seeded_model)

@ -158,6 +190,29 @@ RSpec.describe DiscourseAi::Admin::AiSpamController do
          expect(history.details).to include("llm_model_id")
        end

+        it "logs staff actio when ai_persona_id changes" do
+          new_persona =
+            Fabricate(
+              :ai_persona,
+              name: "Updated Persona",
+              response_format: [{ "key" => "spam", "type" => "boolean" }],
+            )
+
+          put "/admin/plugins/discourse-ai/ai-spam.json", params: { ai_persona_id: new_persona.id }
+
+          expect(response.status).to eq(200)
+
+          # Verify the log was created with the right subject
+          history =
+            UserHistory.where(
+              action: UserHistory.actions[:custom_staff],
+              custom_type: "update_ai_spam_settings",
+            ).last
+          expect(history).to be_present
+          expect(history.details).to include("ai_persona_id")
+          expect(history.details).to include(new_persona.name)
+        end
+
        it "does not log staff action when only is_enabled changes" do
          # Check initial count of logs
          initial_count =
@ -231,7 +286,7 @@ RSpec.describe DiscourseAi::Admin::AiSpamController do

      llm2 = Fabricate(:llm_model, name: "DiffLLM")

-      DiscourseAi::Completions::Llm.with_prepared_responses(["spam", "just because"]) do
+      DiscourseAi::Completions::Llm.with_prepared_responses([true, "just because"]) do
        post "/admin/plugins/discourse-ai/ai-spam/test.json",
             params: {
               post_url: spam_post2.url,
@ -247,7 +302,7 @@ RSpec.describe DiscourseAi::Admin::AiSpamController do
    end

    it "can scan using post id" do
-      DiscourseAi::Completions::Llm.with_prepared_responses(["spam", "because apples"]) do
+      DiscourseAi::Completions::Llm.with_prepared_responses([true, "because apples"]) do
        post "/admin/plugins/discourse-ai/ai-spam/test.json",
             params: {
               post_url: spam_post.id.to_s,
@ -272,7 +327,7 @@ RSpec.describe DiscourseAi::Admin::AiSpamController do

      AiSpamLog.create!(post: spam_post, llm_model: llm_model, is_spam: true, created_at: 1.day.ago)

-      DiscourseAi::Completions::Llm.with_prepared_responses(["spam", "because banana"]) do
+      DiscourseAi::Completions::Llm.with_prepared_responses([true, "because banana"]) do
        post "/admin/plugins/discourse-ai/ai-spam/test.json",
             params: {
               post_url: spam_post.url,