FEATURE: Add AI-powered spam detection for new user posts (#1004)

This introduces a comprehensive spam detection system that uses LLM models to automatically identify and flag potential spam posts. The system is designed to be both powerful and configurable while preventing false positives. Key Features: * Automatically scans first 3 posts from new users (TL0/TL1) * Creates dedicated AI flagging user to distinguish from system flags * Tracks false positives/negatives for quality monitoring * Supports custom instructions to fine-tune detection * Includes test interface for trying detection on any post Technical Implementation: * New database tables: - ai_spam_logs: Stores scan history and results - ai_moderation_settings: Stores LLM config and custom instructions * Rate limiting and safeguards: - Minimum 10-minute delay between rescans - Only scans significant edits (>10 char difference) - Maximum 3 scans per post - 24-hour maximum age for scannable posts * Admin UI features: - Real-time testing capabilities - 7-day statistics dashboard - Configurable LLM model selection - Custom instruction support Security and Performance: * Respects trust levels - only scans TL0/TL1 users * Skips private messages entirely * Stops scanning users after 3 successful public posts * Includes comprehensive test coverage * Maintains audit log of all scan attempts --------- Co-authored-by: Keegan George <kgeorge13@gmail.com> Co-authored-by: Martin Brennan <martin@discourse.org>
2025-03-06 17:30:20 +00:00 · 2024-12-12 09:17:25 +11:00 · 2024-12-12 09:17:25 +11:00 · 47f5da7e42
commit 47f5da7e42
parent ae80494448
27 changed files with 1801 additions and 6 deletions
--- a/admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-spam.js
+++ b/admin/assets/javascripts/discourse/routes/admin-plugins-show-discourse-ai-spam.js
@ -0,0 +1,11 @@
 import { service } from "@ember/service";
 import { ajax } from "discourse/lib/ajax";
 import DiscourseRoute from "discourse/routes/discourse";
 export default class DiscourseAiSpamRoute extends DiscourseRoute {
  @service store;
  model() {
    return ajax("/admin/plugins/discourse-ai/ai-spam.json");
  }
 }
--- a/admin/assets/javascripts/discourse/templates/admin-plugins/show/discourse-ai-spam.hbs
+++ b/admin/assets/javascripts/discourse/templates/admin-plugins/show/discourse-ai-spam.hbs
@ -0,0 +1 @@
 <AiSpam @model={{this.model}} />
--- a/app/controllers/discourse_ai/admin/ai_spam_controller.rb
+++ b/app/controllers/discourse_ai/admin/ai_spam_controller.rb
@ -0,0 +1,112 @@
 # frozen_string_literal: true
 module DiscourseAi
  module Admin
    class AiSpamController < ::Admin::AdminController
      requires_plugin "discourse-ai"
      def show
        render json: AiSpamSerializer.new(spam_config, root: false)
      end
      def update
        updated_params = {}
        if allowed_params.key?(:llm_model_id)
          llm_model_id = updated_params[:llm_model_id] = allowed_params[:llm_model_id]
          if llm_model_id.to_i < 0 &&
               !SiteSetting.ai_spam_detection_model_allowed_seeded_models_map.include?(
                 "custom:#{llm_model_id}",
               )
            return(
              render_json_error(
                I18n.t("discourse_ai.llm.configuration.invalid_seeded_model"),
                status: 422,
              )
            )
          end
        end
        updated_params[:data] = {
          custom_instructions: allowed_params[:custom_instructions],
        } if allowed_params.key?(:custom_instructions)
        if updated_params.present?
          # not using upsert cause we will not get the correct validation errors
          if AiModerationSetting.spam
            AiModerationSetting.spam.update!(updated_params)
          else
            AiModerationSetting.create!(updated_params.merge(setting_type: :spam))
          end
        end
        is_enabled = ActiveModel::Type::Boolean.new.cast(allowed_params[:is_enabled])
        if allowed_params.key?(:is_enabled)
          if is_enabled && !AiModerationSetting.spam&.llm_model_id
            return(
              render_json_error(
                I18n.t("discourse_ai.llm.configuration.must_select_model"),
                status: 422,
              )
            )
          end
          SiteSetting.ai_spam_detection_enabled = is_enabled
        end
        render json: AiSpamSerializer.new(spam_config, root: false)
      end
      def test
        url = params[:post_url].to_s
        post = nil
        if url.match?(/^\d+$/)
          post_id = url.to_i
          post = Post.find_by(id: post_id)
        end
        route = UrlHelper.rails_route_from_url(url) if !post
        if route
          if route[:controller] == "topics"
            post_number = route[:post_number] || 1
            post = Post.with_deleted.find_by(post_number: post_number, topic_id: route[:topic_id])
          end
        end
        raise Discourse::NotFound if !post
        result =
          DiscourseAi::AiModeration::SpamScanner.test_post(
            post,
            custom_instructions: params[:custom_instructions],
            llm_id: params[:llm_id],
          )
        render json: result
      end
      private
      def allowed_params
        params.permit(:is_enabled, :llm_model_id, :custom_instructions)
      end
      def spam_config
        spam_config = {
          enabled: SiteSetting.ai_spam_detection_enabled,
          settings: AiModerationSetting.spam,
        }
        spam_config[:stats] = DiscourseAi::AiModeration::SpamReport.generate(min_date: 1.week.ago)
        if spam_config[:stats].scanned_count > 0
          spam_config[
            :flagging_username
          ] = DiscourseAi::AiModeration::SpamScanner.flagging_user&.username
        end
        spam_config
      end
    end
  end
 end
--- a/app/jobs/regular/ai_spam_scan.rb
+++ b/app/jobs/regular/ai_spam_scan.rb
@ -0,0 +1,13 @@
 # frozen_string_literal: true
 module Jobs
  class AiSpamScan < ::Jobs::Base
    def execute(args)
      return if !args[:post_id]
      post = Post.find_by(id: args[:post_id])
      return if !post
      DiscourseAi::AiModeration::SpamScanner.perform_scan(post)
    end
  end
 end
--- a/app/models/ai_moderation_setting.rb
+++ b/app/models/ai_moderation_setting.rb
@ -0,0 +1,32 @@
 # frozen_string_literal: true
 class AiModerationSetting < ActiveRecord::Base
  belongs_to :llm_model
  validates :llm_model_id, presence: true
  validates :setting_type, presence: true
  validates :setting_type, uniqueness: true
  def self.spam
    find_by(setting_type: :spam)
  end
  def custom_instructions
    data["custom_instructions"]
  end
 end
 # == Schema Information
 #
 # Table name: ai_moderation_settings
 #
 #  id           :bigint           not null, primary key
 #  setting_type :enum             not null
 #  data         :jsonb
 #  llm_model_id :bigint           not null
 #  created_at   :datetime         not null
 #  updated_at   :datetime         not null
 #
 # Indexes
 #
 #  index_ai_moderation_settings_on_setting_type  (setting_type) UNIQUE
 #
--- a/app/models/ai_spam_log.rb
+++ b/app/models/ai_spam_log.rb
@ -0,0 +1,26 @@
 # frozen_string_literal: true
 class AiSpamLog < ActiveRecord::Base
  belongs_to :post
  belongs_to :llm_model
  belongs_to :ai_api_audit_log
  belongs_to :reviewable
 end
 # == Schema Information
 #
 # Table name: ai_spam_logs
 #
 #  id                  :bigint           not null, primary key
 #  post_id             :bigint           not null
 #  llm_model_id        :bigint           not null
 #  ai_api_audit_log_id :bigint
 #  reviewable_id       :bigint
 #  is_spam             :boolean          not null
 #  payload             :string(20000)    default(""), not null
 #  created_at          :datetime         not null
 #  updated_at          :datetime         not null
 #
 # Indexes
 #
 #  index_ai_spam_logs_on_post_id  (post_id)
 #
--- a/app/models/llm_model.rb
+++ b/app/models/llm_model.rb
@ -56,7 +56,11 @@ class LlmModel < ActiveRecord::Base
  end
  def to_llm
-    DiscourseAi::Completions::Llm.proxy("custom:#{id}")
+    DiscourseAi::Completions::Llm.proxy(identifier)
  end
  def identifier
    "custom:#{id}"
  end
  def toggle_companion_user
--- a/app/serializers/ai_spam_serializer.rb
+++ b/app/serializers/ai_spam_serializer.rb
@ -0,0 +1,40 @@
 # frozen_string_literal: true
 class AiSpamSerializer < ApplicationSerializer
  attributes :is_enabled, :llm_id, :custom_instructions, :available_llms, :stats, :flagging_username
  def is_enabled
    object[:enabled]
  end
  def llm_id
    settings&.llm_model&.id
  end
  def custom_instructions
    settings&.custom_instructions
  end
  def available_llms
    DiscourseAi::Configuration::LlmEnumerator
      .values(allowed_seeded_llms: SiteSetting.ai_spam_detection_model_allowed_seeded_models_map)
      .map { |hash| { id: hash[:value], name: hash[:name] } }
  end
  def flagging_username
    object[:flagging_username]
  end
  def stats
    {
      scanned_count: object[:stats].scanned_count.to_i,
      spam_detected: object[:stats].spam_detected.to_i,
      false_positives: object[:stats].false_positives.to_i,
      false_negatives: object[:stats].false_negatives.to_i,
    }
  end
  def settings
    object[:settings]
  end
 end
--- a/assets/javascripts/discourse/admin-discourse-ai-plugin-route-map.js
+++ b/assets/javascripts/discourse/admin-discourse-ai-plugin-route-map.js
@ -18,6 +18,7 @@ export default {
      this.route("new");
      this.route("show", { path: "/:id" });
    });
    this.route("discourse-ai-spam", { path: "ai-spam" });
    this.route("discourse-ai-usage", { path: "ai-usage" });
  },
 };
--- a/assets/javascripts/discourse/components/ai-spam.gjs
+++ b/assets/javascripts/discourse/components/ai-spam.gjs
@ -0,0 +1,243 @@
 import Component from "@glimmer/component";
 import { tracked } from "@glimmer/tracking";
 import { fn } from "@ember/helper";
 import { on } from "@ember/modifier";
 import { action } from "@ember/object";
 import { LinkTo } from "@ember/routing";
 import { service } from "@ember/service";
 import DButton from "discourse/components/d-button";
 import DToggleSwitch from "discourse/components/d-toggle-switch";
 import DTooltip from "discourse/components/d-tooltip";
 import withEventValue from "discourse/helpers/with-event-value";
 import { ajax } from "discourse/lib/ajax";
 import { popupAjaxError } from "discourse/lib/ajax-error";
 import i18n from "discourse-common/helpers/i18n";
 import getURL from "discourse-common/lib/get-url";
 import AdminConfigAreaCard from "admin/components/admin-config-area-card";
 import AdminPageSubheader from "admin/components/admin-page-subheader";
 import ComboBox from "select-kit/components/combo-box";
 import SpamTestModal from "./modal/spam-test-modal";
 export default class AiSpam extends Component {
  @service siteSettings;
  @service toasts;
  @service modal;
  @tracked
  stats = {
    scanned_count: 0,
    spam_detected: 0,
    false_positives: 0,
    false_negatives: 0,
    daily_data: [],
  };
  @tracked isEnabled = false;
  @tracked selectedLLM = null;
  @tracked customInstructions = "";
  constructor() {
    super(...arguments);
    this.initializeFromModel();
  }
  @action
  initializeFromModel() {
    const model = this.args.model;
    this.isEnabled = model.is_enabled;
    if (model.llm_id) {
      this.selectedLLM = "custom:" + model.llm_id;
    } else {
      if (this.availableLLMs.length) {
        this.selectedLLM = this.availableLLMs[0].id;
        this.autoSelectedLLM = true;
      }
    }
    this.customInstructions = model.custom_instructions;
    this.stats = model.stats;
  }
  get availableLLMs() {
    return this.args.model?.available_llms || [];
  }
  @action
  async toggleEnabled() {
    this.isEnabled = !this.isEnabled;
    const data = { is_enabled: this.isEnabled };
    if (this.autoSelectedLLM) {
      data.llm_model_id = this.llmId;
    }
    try {
      const response = await ajax("/admin/plugins/discourse-ai/ai-spam.json", {
        type: "PUT",
        data,
      });
      this.autoSelectedLLM = false;
      this.isEnabled = response.is_enabled;
    } catch (error) {
      this.isEnabled = !this.isEnabled;
      popupAjaxError(error);
    }
  }
  get llmId() {
    return this.selectedLLM.toString().split(":")[1];
  }
  @action
  async updateLLM(value) {
    this.selectedLLM = value;
  }
  @action
  async save() {
    try {
      await ajax("/admin/plugins/discourse-ai/ai-spam.json", {
        type: "PUT",
        data: {
          llm_model_id: this.llmId,
          custom_instructions: this.customInstructions,
        },
      });
      this.toasts.success({
        data: { message: i18n("discourse_ai.spam.settings_saved") },
        duration: 2000,
      });
    } catch (error) {
      popupAjaxError(error);
    }
  }
  @action
  showTestModal() {
    this.modal.show(SpamTestModal, {
      model: {
        customInstructions: this.customInstructions,
        llmId: this.llmId,
      },
    });
  }
  get metrics() {
    const detected = {
      label: "discourse_ai.spam.spam_detected",
      value: this.stats.spam_detected,
    };
    if (this.args.model.flagging_username) {
      detected.href = getURL(
        "/review?flagged_by=" + this.args.model.flagging_username
      );
    }
    return [
      {
        label: "discourse_ai.spam.scanned_count",
        value: this.stats.scanned_count,
      },
      detected,
      {
        label: "discourse_ai.spam.false_positives",
        value: this.stats.false_positives,
      },
      {
        label: "discourse_ai.spam.false_negatives",
        value: this.stats.false_negatives,
      },
    ];
  }
  <template>
    <div class="ai-spam">
      <section class="ai-spam__settings">
        <AdminPageSubheader
          @titleLabel="discourse_ai.spam.title"
          @descriptionLabel="discourse_ai.spam.spam_description"
        />
        <div class="control-group ai-spam__enabled">
          <DToggleSwitch
            class="ai-spam__toggle"
            @state={{this.isEnabled}}
            @label="discourse_ai.spam.enable"
            {{on "click" this.toggleEnabled}}
          />
          <DTooltip
            @icon="circle-question"
            @content={{i18n "discourse_ai.spam.spam_tip"}}
          />
        </div>
        <div class="ai-spam__llm">
          <label class="ai-spam__llm-label">{{i18n
              "discourse_ai.spam.select_llm"
            }}</label>
          {{#if this.availableLLMs.length}}
            <ComboBox
              @value={{this.selectedLLM}}
              @content={{this.availableLLMs}}
              @onChange={{this.updateLLM}}
              class="ai-spam__llm-selector"
            />
          {{else}}
            <span class="ai-spam__llm-placeholder">
              <LinkTo @route="adminPlugins.show.discourse-ai-llms.index">
                {{i18n "discourse_ai.spam.no_llms"}}
              </LinkTo>
            </span>
          {{/if}}
        </div>
        <div class="ai-spam__instructions">
          <label class="ai-spam__instructions-label">
            {{i18n "discourse_ai.spam.custom_instructions"}}
            <DTooltip
              @icon="circle-question"
              @content={{i18n "discourse_ai.spam.custom_instructions_help"}}
            />
          </label>
          <textarea
            class="ai-spam__instructions-input"
            placeholder={{i18n
              "discourse_ai.spam.custom_instructions_placeholder"
            }}
            {{on "input" (withEventValue (fn (mut this.customInstructions)))}}
          >{{this.customInstructions}}</textarea>
          <DButton
            @action={{this.save}}
            @label="discourse_ai.spam.save_button"
            class="ai-spam__instructions-save btn-primary"
          />
          <DButton
            @action={{this.showTestModal}}
            @label="discourse_ai.spam.test_button"
            class="btn-default"
          />
        </div>
      </section>
      <AdminConfigAreaCard
        @heading="discourse_ai.spam.last_seven_days"
        class="ai-spam__stats"
      >
        <:content>
          <div class="ai-spam__metrics">
            {{#each this.metrics as |metric|}}
              <div class="ai-spam__metrics-item">
                <span class="ai-spam__metrics-label">{{i18n
                    metric.label
                  }}</span>
                {{#if metric.href}}
                  <a href={{metric.href}} class="ai-spam__metrics-value">
                    {{metric.value}}
                  </a>
                {{else}}
                  <span class="ai-spam__metrics-value">{{metric.value}}</span>
                {{/if}}
              </div>
            {{/each}}
          </div>
        </:content>
      </AdminConfigAreaCard>
    </div>
  </template>
 }
--- a/assets/javascripts/discourse/components/modal/spam-test-modal.gjs
+++ b/assets/javascripts/discourse/components/modal/spam-test-modal.gjs
@ -0,0 +1,101 @@
 import Component from "@glimmer/component";
 import { tracked } from "@glimmer/tracking";
 import { fn } from "@ember/helper";
 import { on } from "@ember/modifier";
 import { action } from "@ember/object";
 import DButton from "discourse/components/d-button";
 import DModal from "discourse/components/d-modal";
 import withEventValue from "discourse/helpers/with-event-value";
 import { ajax } from "discourse/lib/ajax";
 import { popupAjaxError } from "discourse/lib/ajax-error";
 import I18n from "discourse-i18n";
 import AiIndicatorWave from "../ai-indicator-wave";
 export default class SpamTestModal extends Component {
  @tracked testResult;
  @tracked isLoading = false;
  @tracked postUrl = "";
  @tracked scanLog = "";
  @tracked isSpam;
  @action
  async runTest() {
    this.isLoading = true;
    try {
      const response = await ajax(
        `/admin/plugins/discourse-ai/ai-spam/test.json`,
        {
          type: "POST",
          data: {
            post_url: this.postUrl,
            custom_instructions: this.args.model.customInstructions,
            llm_id: this.args.model.llmId,
          },
        }
      );
      this.isSpam = response.is_spam;
      this.testResult = response.is_spam
        ? I18n.t("discourse_ai.spam.test_modal.spam")
        : I18n.t("discourse_ai.spam.test_modal.not_spam");
      this.scanLog = response.log;
    } catch (error) {
      popupAjaxError(error);
    } finally {
      this.isLoading = false;
    }
  }
  <template>
    <DModal
      @title={{I18n.t "discourse_ai.spam.test_modal.title"}}
      @closeModal={{@closeModal}}
      @bodyClass="spam-test-modal__body"
      class="spam-test-modal"
    >
      <:body>
        <div class="control-group">
          <label>{{I18n.t
              "discourse_ai.spam.test_modal.post_url_label"
            }}</label>
          <input
            {{on "input" (withEventValue (fn (mut this.postUrl)))}}
            type="text"
            placeholder={{I18n.t
              "discourse_ai.spam.test_modal.post_url_placeholder"
            }}
          />
        </div>
        {{#if this.testResult}}
          <div class="spam-test-modal__test-result">
            <h3>{{I18n.t "discourse_ai.spam.test_modal.result"}}</h3>
            <div
              class="spam-test-modal__verdict
                {{if this.isSpam 'is-spam' 'not-spam'}}"
            >
              {{this.testResult}}
            </div>
            {{#if this.scanLog}}
              <div class="spam-test-modal__log">
                <h4>{{I18n.t "discourse_ai.spam.test_modal.scan_log"}}</h4>
                <pre>{{this.scanLog}}</pre>
              </div>
            {{/if}}
          </div>
        {{/if}}
      </:body>
      <:footer>
        <DButton
          @action={{this.runTest}}
          @label="discourse_ai.spam.test_modal.run"
          @disabled={{this.isLoading}}
          class="btn-primary spam-test-modal__run-button"
        >
          <AiIndicatorWave @loading={{this.isLoading}} />
        </DButton>
      </:footer>
    </DModal>
  </template>
 }
--- a/assets/javascripts/initializers/admin-plugin-configuration-nav.js
+++ b/assets/javascripts/initializers/admin-plugin-configuration-nav.js
@ -24,6 +24,10 @@ export default {
          label: "discourse_ai.tools.short_title",
          route: "adminPlugins.show.discourse-ai-tools",
        },
        {
          label: "discourse_ai.spam.short_title",
          route: "adminPlugins.show.discourse-ai-spam",
        },
        {
          label: "discourse_ai.usage.short_title",
          route: "adminPlugins.show.discourse-ai-usage",
--- a/assets/stylesheets/modules/llms/common/spam.scss
+++ b/assets/stylesheets/modules/llms/common/spam.scss
@ -0,0 +1,124 @@
 .ai-spam {
  --chart-scanned-color: var(--success);
  --chart-spam-color: var(--danger);
  padding-top: 15px;
  &__settings {
    margin-bottom: 2em;
  }
  &__enabled {
    display: flex;
    align-items: center;
    gap: 0.4em;
    margin-bottom: 1em;
    .fk-d-tooltip__trigger {
      color: var(--primary-high);
    }
  }
  &__settings-title {
    margin-bottom: 1em;
  }
  &__toggle,
  &__llm,
  &__instructions {
    margin-bottom: 1em;
  }
  &__toggle-label,
  &__llm-label,
  &__instructions-label {
    display: block;
    margin-bottom: 0.5em;
    font-weight: bold;
  }
  &__instructions-input {
    width: 100%;
    min-height: 100px;
    margin-bottom: 0.5em;
  }
  &__stats {
    margin-top: 2em;
  }
  &__stats-title {
    margin-bottom: 1em;
  }
  &__metrics {
    display: grid;
    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
    gap: 1em;
    margin-bottom: 2em;
  }
  &__metrics-item {
    display: flex;
    flex-direction: column;
    padding: 1em;
    background: var(--primary-very-low);
    border-radius: 0.25em;
  }
  &__metrics-label {
    color: var(--primary-medium);
    font-size: 0.875em;
    margin-bottom: 0.5em;
  }
  &__metrics-value {
    color: var(--primary);
    font-size: 1.5em;
    font-weight: bold;
  }
 }
 .spam-test-modal {
  &__body {
    min-width: 500px;
  }
  &__test-result {
    margin-top: 1.5em;
    padding-top: 1.5em;
    border-top: 1px solid var(--primary-low);
  }
  &__verdict {
    font-size: var(--font-up-2);
    font-weight: bold;
    padding: 0.5em;
    border-radius: 0.25em;
    text-align: center;
    margin: 1em 0;
    &.is-spam {
      background: var(--danger-low);
      color: var(--danger);
    }
    &.not-spam {
      background: var(--success-low);
      color: var(--success);
    }
  }
  &__log {
    margin-top: 1em;
    pre {
      max-height: 300px;
      overflow-y: auto;
      background: var(--primary-very-low);
      padding: 1em;
      margin: 0.5em 0;
      font-family: monospace;
      white-space: pre-wrap;
    }
  }
 }
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -129,6 +129,35 @@ en:
      modals:
        select_option: "Select an option..."
      spam:
        short_title: "Spam"
        title: "Configure spam handling"
        select_llm: "Select LLM"
        custom_instructions: "Custom instructions"
        custom_instructions_help: "Custom instructions specific to your site to help guide the AI in identifying spam, e.g. 'Be more aggressive about scanning posts not in English'."
        last_seven_days: "Last 7 days"
        scanned_count: "Posts scanned"
        false_positives: "Incorrectly flagged"
        false_negatives: "Missed spam"
        spam_detected: "Spam detected"
        custom_instructions_placeholder: "Site-specific instructions for the AI to help identify spam more accurately"
        enable: "Enable"
        spam_tip: "AI spam detection will scan the first 3 posts by all new users on public topics. It will flag them for review and block users if they are likely spam."
        settings_saved: "Settings saved"
        spam_description: "Identifies potential spam using the selected LLM and flags it for site moderators to inspect in the review queue"
        no_llms: "No LLMs available"
        test_button: "Test..."
        save_button: "Save changes"
        test_modal:
          title: "Test spam detection"
          post_url_label: "Post URL or ID"
          post_url_placeholder: "https://your-forum.com/t/topic/123/4 or post ID"
          result: "Result"
          scan_log: "Scan log"
          run: "Run test"
          spam: "Spam"
          not_spam: "Not spam"
      usage:
        short_title: "Usage"
        summary: "Summary"
@ -305,6 +334,7 @@ en:
          ai_persona: "Persona (%{persona})"
          ai_summarization: "Summarize"
          ai_embeddings_semantic_search: "AI search"
          ai_spam: "Spam"
        in_use_warning:
          one: "This model is currently used by %{settings}. If misconfigured, the feature won't work as expected."
          other: "This model is currently used by the following: %{settings}. If misconfigured, features won't work as expected. "
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@ -251,6 +251,8 @@ en:
        other_content_in_pm: "Personal messages containing posts from other people cannot be shared publicly"
        failed_to_share: "Failed to share the conversation"
        conversation_deleted: "Conversation share deleted successfully"
    spam_detection:
      flag_reason: "Flagged as spam by <a href='%{url}'>Discourse AI</a>"
    ai_bot:
      reply_error: "Sorry, it looks like our system encountered an unexpected issue while trying to reply.\n\n[details='Error details']\n%{details}\n[/details]"
      default_pm_prefix: "[Untitled AI bot PM]"
@ -413,9 +415,10 @@ en:
    llm:
      configuration:
        disable_module_first: "You have to disable %{setting} first."
-        set_llm_first: "Set %{setting} first."
+        set_llm_first: "Set %{setting} first"
        model_unreachable: "We couldn't get a response from this model. Check your settings first."
-        invalid_seeded_model: "You can't use this model with this feature."
+        invalid_seeded_model: "You can't use this model with this feature"
        must_select_model: "You must select a LLM first"
      endpoints:
        not_configured: "%{display_name} (not configured)"
        configuration_hint:
--- a/config/routes.rb
+++ b/config/routes.rb
@ -80,6 +80,9 @@ Discourse::Application.routes.draw do
    get "/ai-usage", to: "discourse_ai/admin/ai_usage#show"
    get "/ai-usage-report", to: "discourse_ai/admin/ai_usage#report"
    get "/ai-spam", to: "discourse_ai/admin/ai_spam#show"
    put "/ai-spam", to: "discourse_ai/admin/ai_spam#update"
    post "/ai-spam/test", to: "discourse_ai/admin/ai_spam#test"
    resources :ai_llms,
              only: %i[index create show update destroy],
--- a/config/settings.yml
+++ b/config/settings.yml
@ -25,7 +25,7 @@ discourse_ai:
  ai_sentiment_backfill_post_max_age_days:
    default: 60
    hidden: true
-  
+
  ai_openai_dall_e_3_url: "https://api.openai.com/v1/images/generations"
  ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings"
@ -321,3 +321,16 @@ discourse_ai:
    type: list
    list_type: compact
    default: ""
  ai_spam_detection_enabled:
    default: false
    hidden: true
  ai_spam_detection_user_id:
    default: ""
    hidden: true
  ai_spam_detection_model_allowed_seeded_models:
    default: ""
    hidden: true
    type: list
--- a/db/migrate/20241206030229_add_ai_moderation_settings.rb
+++ b/db/migrate/20241206030229_add_ai_moderation_settings.rb
@ -0,0 +1,15 @@
 # frozen_string_literal: true
 class AddAiModerationSettings < ActiveRecord::Migration[7.2]
  def change
    create_enum :ai_moderation_setting_type, %w[spam nsfw custom]
    create_table :ai_moderation_settings do |t|
      t.enum :setting_type, enum_type: "ai_moderation_setting_type", null: false
      t.jsonb :data, default: {}
      t.bigint :llm_model_id, null: false
      t.timestamps
    end
    add_index :ai_moderation_settings, :setting_type, unique: true
  end
 end
--- a/db/migrate/20241206051225_add_ai_spam_logs.rb
+++ b/db/migrate/20241206051225_add_ai_spam_logs.rb
@ -0,0 +1,16 @@
 # frozen_string_literal: true
 class AddAiSpamLogs < ActiveRecord::Migration[7.2]
  def change
    create_table :ai_spam_logs do |t|
      t.bigint :post_id, null: false
      t.bigint :llm_model_id, null: false
      t.bigint :ai_api_audit_log_id
      t.bigint :reviewable_id
      t.boolean :is_spam, null: false
      t.string :payload, null: false, default: "", limit: 20_000
      t.timestamps
    end
    add_index :ai_spam_logs, :post_id
  end
 end
--- a/lib/ai_moderation/entry_point.rb
+++ b/lib/ai_moderation/entry_point.rb
@ -0,0 +1,17 @@
 # frozen_string_literal: true
 module DiscourseAi
  module AiModeration
    class EntryPoint
      def inject_into(plugin)
        plugin.on(:post_created) { |post| SpamScanner.new_post(post) }
        plugin.on(:post_edited) { |post| SpamScanner.edited_post(post) }
        plugin.on(:post_process_cooked) { |_doc, post| SpamScanner.after_cooked_post(post) }
        plugin.on(:site_setting_changed) do |name, _old_value, new_value|
          SpamScanner.ensure_flagging_user! if name == :ai_spam_detection_enabled && new_value
        end
      end
    end
  end
 end
--- a/lib/ai_moderation/spam_report.rb
+++ b/lib/ai_moderation/spam_report.rb
@ -0,0 +1,47 @@
 # frozen_string_literal: true
 module DiscourseAi
  module AiModeration
    class SpamReport
      def self.generate(min_date: 1.week.ago)
        spam_status = [Reviewable.statuses[:approved], Reviewable.statuses[:deleted]]
        ham_status = [Reviewable.statuses[:rejected], Reviewable.statuses[:ignored]]
        sql = <<~SQL
          WITH spam_stats AS (
            SELECT
              asl.reviewable_id,
              asl.post_id,
              asl.is_spam,
              r.status as reviewable_status,
              r.target_type,
              r.potential_spam
            FROM ai_spam_logs asl
            LEFT JOIN reviewables r ON r.id = asl.reviewable_id
            WHERE asl.created_at > :min_date
          ),
          post_reviewables AS (
            SELECT
              target_id post_id,
              COUNT(DISTINCT target_id) as false_negative_count
            FROM reviewables
            WHERE target_type = 'Post'
              AND status IN (:spam)
              AND potential_spam
              AND target_id IN (SELECT post_id FROM spam_stats)
            GROUP BY target_id
          )
          SELECT
            COUNT(*) AS scanned_count,
            SUM(CASE WHEN is_spam THEN 1 ELSE 0 END) AS spam_detected,
            COUNT(CASE WHEN reviewable_status IN (:ham) THEN 1 END) AS false_positives,
            COALESCE(SUM(pr.false_negative_count), 0) AS false_negatives
          FROM spam_stats
          LEFT JOIN post_reviewables pr USING (post_id)
        SQL
        DB.query(sql, spam: spam_status, ham: ham_status, min_date: min_date).first
      end
    end
  end
 end
--- a/lib/ai_moderation/spam_scanner.rb
+++ b/lib/ai_moderation/spam_scanner.rb
@ -0,0 +1,371 @@
 # frozen_string_literal: true
 module DiscourseAi
  module AiModeration
    class SpamScanner
      POSTS_TO_SCAN = 3
      MINIMUM_EDIT_DIFFERENCE = 10
      EDIT_DELAY_MINUTES = 10
      MAX_AGE_TO_SCAN = 1.day
      MAX_RAW_SCAN_LENGTH = 5000
      SHOULD_SCAN_POST_CUSTOM_FIELD = "discourse_ai_should_scan_post"
      def self.new_post(post)
        return if !enabled?
        return if !should_scan_post?(post)
        flag_post_for_scanning(post)
      end
      def self.ensure_flagging_user!
        if !SiteSetting.ai_spam_detection_user_id.present?
          User.transaction do
            # prefer a "high" id for this bot
            id = User.where("id > -20").minimum(:id) - 1
            id = User.minimum(:id) - 1 if id == -100
            user =
              User.create!(
                id: id,
                username: UserNameSuggester.suggest("discourse_ai_spam"),
                name: "Discourse AI Spam Scanner",
                email: "#{SecureRandom.hex(10)}@invalid.invalid",
                active: true,
                approved: true,
                trust_level: TrustLevel[4],
                admin: true,
              )
            Group.user_trust_level_change!(user.id, user.trust_level)
            SiteSetting.ai_spam_detection_user_id = user.id
          end
        end
      end
      def self.flagging_user
        user = nil
        if SiteSetting.ai_spam_detection_user_id.present?
          user = User.find_by(id: SiteSetting.ai_spam_detection_user_id)
        end
        user || Discourse.system_user
      end
      def self.after_cooked_post(post)
        return if !enabled?
        return if !should_scan_post?(post)
        return if !post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD]
        return if post.updated_at < MAX_AGE_TO_SCAN.ago
        last_scan = AiSpamLog.where(post_id: post.id).order(created_at: :desc).first
        if last_scan && last_scan.created_at > EDIT_DELAY_MINUTES.minutes.ago
          delay_minutes =
            ((last_scan.created_at + EDIT_DELAY_MINUTES.minutes) - Time.current).to_i / 60
          Jobs.enqueue_in(delay_minutes.minutes, :ai_spam_scan, post_id: post.id)
        else
          Jobs.enqueue(:ai_spam_scan, post_id: post.id)
        end
      end
      def self.edited_post(post)
        return if !enabled?
        return if !should_scan_post?(post)
        return if scanned_max_times?(post)
        previous_version = post.revisions.last&.modifications&.dig("raw", 0)
        current_version = post.raw
        return if !significant_change?(previous_version, current_version)
        flag_post_for_scanning(post)
      end
      def self.flag_post_for_scanning(post)
        post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD] = "true"
        post.save_custom_fields
      end
      def self.enabled?
        SiteSetting.ai_spam_detection_enabled && SiteSetting.discourse_ai_enabled
      end
      def self.should_scan_post?(post)
        return false if !post.present?
        return false if post.user.trust_level > TrustLevel[1]
        return false if post.topic.private_message?
        if Post
             .where(user_id: post.user_id)
             .joins(:topic)
             .where(topic: { archetype: Archetype.default })
             .limit(4)
             .count > 3
          return false
        end
        true
      end
      def self.scanned_max_times?(post)
        AiSpamLog.where(post_id: post.id).count >= 3
      end
      def self.significant_change?(previous_version, current_version)
        return true if previous_version.nil? # First edit should be scanned
        # Use Discourse's built-in levenshtein implementation
        distance =
          ScreenedEmail.levenshtein(previous_version.to_s[0...1000], current_version.to_s[0...1000])
        distance >= MINIMUM_EDIT_DIFFERENCE
      end
      def self.test_post(post, custom_instructions: nil, llm_id: nil)
        settings = AiModerationSetting.spam
        llm_model = llm_id ? LlmModel.find(llm_id) : settings.llm_model
        llm = llm_model.to_llm
        custom_instructions = custom_instructions || settings.custom_instructions.presence
        context = build_context(post)
        prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)
        result =
          llm.generate(
            prompt,
            temperature: 0.1,
            max_tokens: 5,
            user: Discourse.system_user,
            feature_name: "spam_detection_test",
            feature_context: {
              post_id: post.id,
            },
          )&.strip
        history = nil
        AiSpamLog
          .where(post: post)
          .order(:created_at)
          .limit(100)
          .each do |log|
            history ||= +"Scan History:\n"
            history << "date: #{log.created_at} is_spam: #{log.is_spam}\n"
          end
        log = +"Scanning #{post.url}\n\n"
        if history
          log << history
          log << "\n"
        end
        log << "LLM: #{llm_model.name}\n\n"
        log << "System Prompt: #{build_system_prompt(custom_instructions)}\n\n"
        log << "Context: #{context}\n\n"
        is_spam = check_if_spam(result)
        prompt.push(type: :model, content: result)
        prompt.push(type: :user, content: "Explain your reasoning")
        reasoning =
          llm.generate(
            prompt,
            temperature: 0.1,
            max_tokens: 100,
            user: Discourse.system_user,
            feature_name: "spam_detection_test",
            feature_context: {
              post_id: post.id,
            },
          )&.strip
        log << "#{reasoning}"
        { is_spam: is_spam, log: log }
      end
      def self.completion_prompt(post, context:, custom_instructions:)
        system_prompt = build_system_prompt(custom_instructions)
        prompt = DiscourseAi::Completions::Prompt.new(system_prompt)
        args = { type: :user, content: context }
        upload_ids = post.upload_ids
        args[:upload_ids] = upload_ids.take(3) if upload_ids.present?
        prompt.push(**args)
        prompt
      end
      def self.perform_scan(post)
        return if !enabled?
        return if !should_scan_post?(post)
        settings = AiModerationSetting.spam
        return if !settings || !settings.llm_model
        context = build_context(post)
        llm = settings.llm_model.to_llm
        custom_instructions = settings.custom_instructions.presence
        prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)
        begin
          result =
            llm.generate(
              prompt,
              temperature: 0.1,
              max_tokens: 5,
              user: Discourse.system_user,
              feature_name: "spam_detection",
              feature_context: {
                post_id: post.id,
              },
            )&.strip
          is_spam = check_if_spam(result)
          log = AiApiAuditLog.order(id: :desc).where(feature_name: "spam_detection").first
          AiSpamLog.transaction do
            log =
              AiSpamLog.create!(
                post: post,
                llm_model: settings.llm_model,
                ai_api_audit_log: log,
                is_spam: is_spam,
                payload: context,
              )
            handle_spam(post, log) if is_spam
          end
        rescue StandardError => e
          # we need retries otherwise stuff will not be handled
          Discourse.warn_exception(
            e,
            message: "Discourse AI: Error in SpamScanner for post #{post.id}",
          )
          raise e
        end
      end
      private
      def self.check_if_spam(result)
        (result.present? && result.strip.downcase.start_with?("spam"))
      end
      def self.build_context(post)
        context = []
        # Clear distinction between reply and new topic
        if post.is_first_post?
          context << "NEW TOPIC POST ANALYSIS"
          context << "- Topic title: #{post.topic.title}"
          context << "- Category: #{post.topic.category&.name}"
        else
          context << "REPLY POST ANALYSIS"
          context << "- In topic: #{post.topic.title}"
          context << "- Category: #{post.topic.category&.name}"
          context << "- Topic started by: #{post.topic.user.username}"
          # Include parent post context for replies
          if post.reply_to_post.present?
            parent = post.reply_to_post
            context << "\nReplying to #{parent.user.username}'s post:"
            context << "#{parent.raw[0..500]}..." if parent.raw.length > 500
            context << parent.raw if parent.raw.length <= 500
          end
        end
        context << "\nPost Author Information:"
        context << "- Username: #{post.user.username}"
        context << "- Account age: #{(Time.current - post.user.created_at).to_i / 86_400} days"
        context << "- Total posts: #{post.user.post_count}"
        context << "- Trust level: #{post.user.trust_level}"
        context << "\nPost Content (first #{MAX_RAW_SCAN_LENGTH} chars):\n"
        context << post.raw[0..MAX_RAW_SCAN_LENGTH]
        context.join("\n")
      end
      def self.build_system_prompt(custom_instructions)
        base_prompt = +<<~PROMPT
          You are a spam detection system. Analyze the following post content and context.
          Respond with "SPAM" if the post is spam, or "NOT_SPAM" if it's legitimate.
          - ALWAYS lead your reply with the word SPAM or NOT_SPAM - you are consumed via an API
          Consider the post type carefully:
          - For REPLY posts: Check if the response is relevant and topical to the thread
          - For NEW TOPIC posts: Check if it's a legitimate topic or spam promotion
          A post is spam if it matches any of these criteria:
          - Contains unsolicited commercial content or promotions
          - Has suspicious or unrelated external links
          - Shows patterns of automated/bot posting
          - Contains irrelevant content or advertisements
          - For replies: Completely unrelated to the discussion thread
          - Uses excessive keywords or repetitive text patterns
          - Shows suspicious formatting or character usage
          Be especially strict with:
          - Replies that ignore the previous conversation
          - Posts containing multiple unrelated external links
          - Generic responses that could be posted anywhere
          Be fair to:
          - New users making legitimate first contributions
          - Non-native speakers making genuine efforts to participate
          - Topic-relevant product mentions in appropriate contexts
        PROMPT
        base_prompt << "\n\n"
        base_prompt << <<~SITE_SPECIFIC
          Site Specific Information:
          - Site name: #{SiteSetting.title}
          - Site URL: #{Discourse.base_url}
          - Site description: #{SiteSetting.site_description}
          - Site top 10 categories: #{Category.where(read_restricted: false).order(posts_year: :desc).limit(10).pluck(:name).join(", ")}
        SITE_SPECIFIC
        if custom_instructions.present?
          base_prompt << "\n\nAdditional site-specific instructions provided by Staff:\n#{custom_instructions}"
        end
        base_prompt
      end
      def self.handle_spam(post, log)
        url = "#{Discourse.base_url}/admin/plugins/discourse-ai/ai-spam"
        reason = I18n.t("discourse_ai.spam_detection.flag_reason", url: url)
        result =
          PostActionCreator.new(
            flagging_user,
            post,
            PostActionType.types[:spam],
            reason: reason,
            queue_for_review: true,
          ).perform
        log.update!(reviewable: result.reviewable)
        SpamRule::AutoSilence.new(post.user, post).silence_user
        # this is required cause tl1 is not auto hidden
        # we want to also handle tl1
        hide_posts_and_topics(post.user)
      end
      def self.hide_posts_and_topics(user)
        Post
          .where(user_id: user.id)
          .where("created_at > ?", 24.hours.ago)
          .update_all(
            [
              "hidden = true, hidden_reason_id = COALESCE(hidden_reason_id, ?)",
              Post.hidden_reasons[:new_user_spam_threshold_reached],
            ],
          )
        topic_ids =
          Post
            .where(user_id: user.id, post_number: 1)
            .where("created_at > ?", 24.hours.ago)
            .select(:topic_id)
        Topic.where(id: topic_ids).update_all(visible: false)
      end
    end
  end
 end
--- a/lib/configuration/llm_enumerator.rb
+++ b/lib/configuration/llm_enumerator.rb
@ -38,6 +38,11 @@ module DiscourseAi
          rval[model_id] << { type: :ai_embeddings_semantic_search }
        end
        if SiteSetting.ai_spam_detection_enabled
          model_id = AiModerationSetting.spam[:llm_model_id]
          rval[model_id] << { type: :ai_spam }
        end
        rval
      end
@ -45,14 +50,20 @@ module DiscourseAi
        true
      end
-      def self.values
+      def self.values(allowed_seeded_llms: nil)
        values = DB.query_hash(<<~SQL).map(&:symbolize_keys)
          SELECT display_name AS name, id AS value
          FROM llm_models
        SQL
-        values.each { |value_h| value_h[:value] = "custom:#{value_h[:value]}" }
+        if allowed_seeded_llms.is_a?(Array)
          values =
            values.filter do |value_h|
              value_h[:value] > 0 || allowed_seeded_llms.include?("custom:#{value_h[:value]}")
            end
        end
        values.each { |value_h| value_h[:value] = "custom:#{value_h[:value]}" }
        values
      end
--- a/plugin.rb
+++ b/plugin.rb
@ -37,6 +37,7 @@ register_asset "stylesheets/modules/sentiment/common/dashboard.scss"
 register_asset "stylesheets/modules/llms/common/ai-llms-editor.scss"
 register_asset "stylesheets/modules/llms/common/usage.scss"
 register_asset "stylesheets/modules/llms/common/spam.scss"
 register_asset "stylesheets/modules/ai-bot/common/ai-tools.scss"
@ -71,6 +72,7 @@ after_initialize do
    DiscourseAi::AiHelper::EntryPoint.new,
    DiscourseAi::Summarization::EntryPoint.new,
    DiscourseAi::AiBot::EntryPoint.new,
    DiscourseAi::AiModeration::EntryPoint.new,
  ].each { |a_module| a_module.inject_into(self) }
  register_reviewable_type ReviewableAiChatMessage
--- a/spec/lib/modules/ai_moderation/spam_scanner_spec.rb
+++ b/spec/lib/modules/ai_moderation/spam_scanner_spec.rb
@ -0,0 +1,222 @@
 # frozen_string_literal: true
 require "rails_helper"
 RSpec.describe DiscourseAi::AiModeration::SpamScanner do
  fab!(:user) { Fabricate(:user, trust_level: TrustLevel[0]) }
  fab!(:topic)
  fab!(:post) { Fabricate(:post, user: user, topic: topic) }
  fab!(:llm_model)
  fab!(:spam_setting) do
    AiModerationSetting.create!(
      setting_type: :spam,
      llm_model: llm_model,
      data: {
        custom_instructions: "test instructions",
      },
    )
  end
  before do
    SiteSetting.discourse_ai_enabled = true
    SiteSetting.ai_spam_detection_enabled = true
  end
  describe ".enabled?" do
    it "returns true when both settings are enabled" do
      expect(described_class.enabled?).to eq(true)
    end
    it "returns false when discourse_ai is disabled" do
      SiteSetting.discourse_ai_enabled = false
      expect(described_class.enabled?).to eq(false)
    end
    it "returns false when spam detection is disabled" do
      SiteSetting.ai_spam_detection_enabled = false
      expect(described_class.enabled?).to eq(false)
    end
  end
  describe ".should_scan_post?" do
    it "returns true for new users' posts" do
      expect(described_class.should_scan_post?(post)).to eq(true)
    end
    it "returns false for trusted users" do
      post.user.trust_level = TrustLevel[2]
      expect(described_class.should_scan_post?(post)).to eq(false)
    end
    it "returns false for users with many public posts" do
      Fabricate(:post, user: user, topic: topic)
      Fabricate(:post, user: user, topic: topic)
      expect(described_class.should_scan_post?(post)).to eq(true)
      pm = Fabricate(:private_message_topic, user: user)
      Fabricate(:post, user: user, topic: pm)
      expect(described_class.should_scan_post?(post)).to eq(true)
      topic = Fabricate(:topic, user: user)
      Fabricate(:post, user: user, topic: topic)
      expect(described_class.should_scan_post?(post)).to eq(false)
    end
    it "returns false for private messages" do
      pm_topic = Fabricate(:private_message_topic)
      pm_post = Fabricate(:post, topic: pm_topic, user: user)
      expect(described_class.should_scan_post?(pm_post)).to eq(false)
    end
    it "returns false for nil posts" do
      expect(described_class.should_scan_post?(nil)).to eq(false)
    end
  end
  describe ".scanned_max_times?" do
    it "returns true when post has been scanned 3 times" do
      3.times do
        AiSpamLog.create!(post: post, llm_model: llm_model, ai_api_audit_log_id: 1, is_spam: false)
      end
      expect(described_class.scanned_max_times?(post)).to eq(true)
    end
    it "returns false for posts scanned less than 3 times" do
      expect(described_class.scanned_max_times?(post)).to eq(false)
    end
  end
  describe ".significant_change?" do
    it "returns true for first edits" do
      expect(described_class.significant_change?(nil, "new content")).to eq(true)
    end
    it "returns true for significant changes" do
      old_version = "This is a test post"
      new_version = "This is a completely different post with new content"
      expect(described_class.significant_change?(old_version, new_version)).to eq(true)
    end
    it "returns false for minor changes" do
      old_version = "This is a test post"
      new_version = "This is a test Post" # Only capitalization change
      expect(described_class.significant_change?(old_version, new_version)).to eq(false)
    end
  end
  describe ".new_post" do
    it "enqueues spam scan job for eligible posts" do
      expect {
        described_class.new_post(post)
        described_class.after_cooked_post(post)
      }.to change(Jobs::AiSpamScan.jobs, :size).by(1)
    end
    it "doesn't enqueue jobs when disabled" do
      SiteSetting.ai_spam_detection_enabled = false
      expect { described_class.new_post(post) }.not_to change(Jobs::AiSpamScan.jobs, :size)
    end
  end
  describe ".edited_post" do
    it "enqueues spam scan job for eligible edited posts" do
      PostRevision.create!(
        post: post,
        modifications: {
          raw: ["old content", "completely new content"],
        },
      )
      expect {
        described_class.edited_post(post)
        described_class.after_cooked_post(post)
      }.to change(Jobs::AiSpamScan.jobs, :size).by(1)
    end
    it "schedules delayed job when edited too soon after last scan" do
      AiSpamLog.create!(
        post: post,
        llm_model: llm_model,
        ai_api_audit_log_id: 1,
        is_spam: false,
        created_at: 5.minutes.ago,
      )
      expect {
        described_class.edited_post(post)
        described_class.after_cooked_post(post)
      }.to change(Jobs::AiSpamScan.jobs, :size).by(1)
    end
  end
  describe "integration test" do
    fab!(:llm_model)
    let(:api_audit_log) { Fabricate(:api_audit_log) }
    fab!(:post_with_uploaded_image)
    before { Jobs.run_immediately! }
    it "Can correctly run tests" do
      prompts = nil
      result =
        DiscourseAi::Completions::Llm.with_prepared_responses(
          ["spam", "the reason is just because"],
        ) do |_, _, _prompts|
          prompts = _prompts
          described_class.test_post(post, custom_instructions: "123")
        end
      expect(prompts.length).to eq(2)
      expect(result[:is_spam]).to eq(true)
      expect(result[:log]).to include("123")
      expect(result[:log]).to include("just because")
      result =
        DiscourseAi::Completions::Llm.with_prepared_responses(
          ["not_spam", "the reason is just because"],
        ) do |_, _, _prompts|
          prompts = _prompts
          described_class.test_post(post, custom_instructions: "123")
        end
      expect(result[:is_spam]).to eq(false)
    end
    it "Correctly handles spam scanning" do
      expect(described_class.flagging_user.id).not_to eq(Discourse.system_user.id)
      # flag post for scanning
      post = post_with_uploaded_image
      described_class.new_post(post)
      prompt = nil
      DiscourseAi::Completions::Llm.with_prepared_responses(["spam"]) do |_, _, _prompts|
        # force a rebake so we actually scan
        post.rebake!
        prompt = _prompts.first
      end
      content = prompt.messages[1][:content]
      expect(content).to include(post.topic.title)
      expect(content).to include(post.raw)
      upload_ids = prompt.messages[1][:upload_ids]
      expect(upload_ids).to be_present
      expect(upload_ids).to eq(post.upload_ids)
      log = AiSpamLog.find_by(post: post)
      expect(log.payload).to eq(content)
      expect(log.is_spam).to eq(true)
      expect(post.user.reload.silenced_till).to be_present
      expect(post.topic.reload.visible).to eq(false)
      expect(log.reviewable).to be_present
      expect(log.reviewable.created_by_id).to eq(described_class.flagging_user.id)
    end
  end
 end
--- a/spec/requests/admin/ai_spam_controller_spec.rb
+++ b/spec/requests/admin/ai_spam_controller_spec.rb
@ -0,0 +1,285 @@
 # frozen_string_literal: true
 require "rails_helper"
 RSpec.describe DiscourseAi::Admin::AiSpamController do
  fab!(:admin)
  fab!(:user)
  fab!(:llm_model)
  describe "#update" do
    context "when logged in as admin" do
      before { sign_in(admin) }
      it "can update settings from scratch" do
        put "/admin/plugins/discourse-ai/ai-spam.json",
            params: {
              is_enabled: true,
              llm_model_id: llm_model.id,
              custom_instructions: "custom instructions",
            }
        expect(response.status).to eq(200)
        expect(SiteSetting.ai_spam_detection_enabled).to eq(true)
        expect(AiModerationSetting.spam.llm_model_id).to eq(llm_model.id)
        expect(AiModerationSetting.spam.data["custom_instructions"]).to eq("custom instructions")
      end
      it "denies update for disallowed seeded llm" do
        seeded_llm = Fabricate(:llm_model, id: -1)
        put "/admin/plugins/discourse-ai/ai-spam.json",
            params: {
              is_enabled: true,
              llm_model_id: seeded_llm.id,
              custom_instructions: "custom instructions",
            }
        expect(response.status).to eq(422)
        SiteSetting.ai_spam_detection_model_allowed_seeded_models = seeded_llm.identifier
        put "/admin/plugins/discourse-ai/ai-spam.json",
            params: {
              is_enabled: true,
              llm_model_id: seeded_llm.id,
              custom_instructions: "custom instructions",
            }
        expect(response.status).to eq(200)
      end
      it "can not enable spam detection without a model selected" do
        put "/admin/plugins/discourse-ai/ai-spam.json",
            params: {
              custom_instructions: "custom instructions",
            }
        expect(response.status).to eq(422)
      end
      it "can not fiddle with custom instructions without an llm" do
        put "/admin/plugins/discourse-ai/ai-spam.json", params: { is_enabled: true }
        expect(response.status).to eq(422)
      end
      context "when spam detection was already set" do
        fab!(:setting) do
          AiModerationSetting.create(
            {
              setting_type: :spam,
              llm_model_id: llm_model.id,
              data: {
                custom_instructions: "custom instructions",
              },
            },
          )
        end
        it "can partially update settings" do
          put "/admin/plugins/discourse-ai/ai-spam.json", params: { is_enabled: false }
          expect(response.status).to eq(200)
          expect(SiteSetting.ai_spam_detection_enabled).to eq(false)
          expect(AiModerationSetting.spam.llm_model_id).to eq(llm_model.id)
          expect(AiModerationSetting.spam.data["custom_instructions"]).to eq("custom instructions")
        end
        it "can update pre existing settings" do
          put "/admin/plugins/discourse-ai/ai-spam.json",
              params: {
                is_enabled: true,
                llm_model_id: llm_model.id,
                custom_instructions: "custom instructions new",
              }
          expect(response.status).to eq(200)
          expect(SiteSetting.ai_spam_detection_enabled).to eq(true)
          expect(AiModerationSetting.spam.llm_model_id).to eq(llm_model.id)
          expect(AiModerationSetting.spam.data["custom_instructions"]).to eq(
            "custom instructions new",
          )
        end
      end
    end
  end
  describe "#test" do
    fab!(:spam_post) { Fabricate(:post) }
    fab!(:spam_post2) { Fabricate(:post, topic: spam_post.topic, raw: "something special 123") }
    fab!(:setting) do
      AiModerationSetting.create(
        {
          setting_type: :spam,
          llm_model_id: llm_model.id,
          data: {
            custom_instructions: "custom instructions",
          },
        },
      )
    end
    before { sign_in(admin) }
    it "can scan using post url" do
      llm2 = Fabricate(:llm_model, name: "DiffLLM")
      DiscourseAi::Completions::Llm.with_prepared_responses(["spam", "just because"]) do
        post "/admin/plugins/discourse-ai/ai-spam/test.json",
             params: {
               post_url: spam_post2.url,
               llm_id: llm2.id,
             }
      end
      expect(response.status).to eq(200)
      parsed = response.parsed_body
      expect(parsed["log"]).to include(spam_post2.raw)
      expect(parsed["log"]).to include("DiffLLM")
    end
    it "can scan using post id" do
      DiscourseAi::Completions::Llm.with_prepared_responses(["spam", "because apples"]) do
        post "/admin/plugins/discourse-ai/ai-spam/test.json",
             params: {
               post_url: spam_post.id.to_s,
             }
      end
      expect(response.status).to eq(200)
      parsed = response.parsed_body
      expect(parsed["log"]).to include(spam_post.raw)
    end
    it "returns proper spam test results" do
      freeze_time DateTime.parse("2000-01-01")
      AiSpamLog.create!(
        post: spam_post,
        llm_model: llm_model,
        is_spam: false,
        created_at: 2.days.ago,
      )
      AiSpamLog.create!(post: spam_post, llm_model: llm_model, is_spam: true, created_at: 1.day.ago)
      DiscourseAi::Completions::Llm.with_prepared_responses(["spam", "because banana"]) do
        post "/admin/plugins/discourse-ai/ai-spam/test.json",
             params: {
               post_url: spam_post.url,
               custom_instructions: "special custom instructions",
             }
      end
      expect(response.status).to eq(200)
      parsed = response.parsed_body
      expect(parsed["log"]).to include("special custom instructions")
      expect(parsed["log"]).to include(spam_post.raw)
      expect(parsed["is_spam"]).to eq(true)
      expect(parsed["log"]).to include("Scan History:")
      expect(parsed["log"]).to include("banana")
    end
  end
  describe "#show" do
    context "when logged in as admin" do
      before { sign_in(admin) }
      it "correctly filters seeded llms" do
        SiteSetting.ai_spam_detection_enabled = true
        seeded_llm = Fabricate(:llm_model, id: -1, name: "seeded")
        get "/admin/plugins/discourse-ai/ai-spam.json"
        expect(response.status).to eq(200)
        json = response.parsed_body
        # only includes fabricated model
        expect(json["available_llms"].length).to eq(1)
        SiteSetting.ai_spam_detection_model_allowed_seeded_models = seeded_llm.identifier
        get "/admin/plugins/discourse-ai/ai-spam.json"
        expect(response.status).to eq(200)
        json = response.parsed_body
        expect(json["available_llms"].length).to eq(2)
      end
      it "returns the serialized spam settings" do
        SiteSetting.ai_spam_detection_enabled = true
        get "/admin/plugins/discourse-ai/ai-spam.json"
        expect(response.status).to eq(200)
        json = response.parsed_body
        expect(json["is_enabled"]).to eq(true)
        expect(json["selected_llm"]).to eq(nil)
        expect(json["custom_instructions"]).to eq(nil)
        expect(json["available_llms"]).to be_an(Array)
        expect(json["stats"]).to be_present
      end
      it "return proper settings when spam detection is enabled" do
        SiteSetting.ai_spam_detection_enabled = true
        AiModerationSetting.create(
          {
            setting_type: :spam,
            llm_model_id: llm_model.id,
            data: {
              custom_instructions: "custom instructions",
            },
          },
        )
        flagging_user = DiscourseAi::AiModeration::SpamScanner.flagging_user
        expect(flagging_user.id).not_to eq(Discourse.system_user.id)
        AiSpamLog.create!(post_id: 1, llm_model_id: llm_model.id, is_spam: true, payload: "test")
        get "/admin/plugins/discourse-ai/ai-spam.json"
        json = response.parsed_body
        expect(json["is_enabled"]).to eq(true)
        expect(json["llm_id"]).to eq(llm_model.id)
        expect(json["custom_instructions"]).to eq("custom instructions")
        expect(json["stats"].to_h).to eq(
          "scanned_count" => 1,
          "spam_detected" => 1,
          "false_positives" => 0,
          "false_negatives" => 0,
        )
        expect(json["flagging_username"]).to eq(flagging_user.username)
      end
    end
    context "when not logged in as admin" do
      it "returns 404 for anonymous users" do
        get "/admin/plugins/discourse-ai/ai-spam.json"
        expect(response.status).to eq(404)
      end
      it "returns 404 for regular users" do
        sign_in(user)
        get "/admin/plugins/discourse-ai/ai-spam.json"
        expect(response.status).to eq(404)
      end
    end
    context "when plugin is disabled" do
      before do
        sign_in(admin)
        SiteSetting.discourse_ai_enabled = false
      end
      it "returns 404" do
        get "/admin/plugins/discourse-ai/ai-spam.json"
        expect(response.status).to eq(404)
      end
    end
  end
 end
--- a/spec/system/ai_moderation/ai_spam_spec.rb
+++ b/spec/system/ai_moderation/ai_spam_spec.rb
@ -0,0 +1,48 @@
 # frozen_string_literal: true
 RSpec.describe "AI Spam Configuration", type: :system, js: true do
  fab!(:admin)
  let(:llm_model) { Fabricate(:llm_model) }
  before do
    SiteSetting.discourse_ai_enabled = true
    sign_in(admin)
  end
  it "can properly configure spam settings" do
    visit "/admin/plugins/discourse-ai/ai-spam"
    expect(page).to have_css(".ai-spam__llm-placeholder")
    toggle = PageObjects::Components::DToggleSwitch.new(".ai-spam__toggle")
    toggle.toggle
    dialog = PageObjects::Components::Dialog.new
    expect(dialog).to have_content(I18n.t("discourse_ai.llm.configuration.must_select_model"))
    dialog.click_ok
    expect(toggle.unchecked?).to eq(true)
    llm_model
    visit "/admin/plugins/discourse-ai/ai-spam"
    toggle = PageObjects::Components::DToggleSwitch.new(".ai-spam__toggle")
    toggle.toggle
    try_until_success { expect(AiModerationSetting.spam&.llm_model_id).to eq(llm_model.id) }
    find(".ai-spam__instructions-input").fill_in(with: "Test spam detection instructions")
    find(".ai-spam__instructions-save").click
    toasts = PageObjects::Components::Toasts.new
    expect(toasts).to have_content(I18n.t("js.discourse_ai.spam.settings_saved"))
    expect(AiModerationSetting.spam.custom_instructions).to eq("Test spam detection instructions")
    visit "/admin/plugins/discourse-ai/ai-llms"
    expect(find(".ai-llm-list-editor__usages")).to have_content(
      I18n.t("js.discourse_ai.llms.usage.ai_spam"),
    )
  end
 end