FEATURE: Add AI-powered spam detection for new user posts (#1004)

This introduces a comprehensive spam detection system that uses LLM models
to automatically identify and flag potential spam posts. The system is
designed to be both powerful and configurable while preventing false positives.

Key Features:
* Automatically scans first 3 posts from new users (TL0/TL1)
* Creates dedicated AI flagging user to distinguish from system flags
* Tracks false positives/negatives for quality monitoring
* Supports custom instructions to fine-tune detection
* Includes test interface for trying detection on any post

Technical Implementation:
* New database tables:
  - ai_spam_logs: Stores scan history and results
  - ai_moderation_settings: Stores LLM config and custom instructions
* Rate limiting and safeguards:
  - Minimum 10-minute delay between rescans
  - Only scans significant edits (>10 char difference)
  - Maximum 3 scans per post
  - 24-hour maximum age for scannable posts
* Admin UI features:
  - Real-time testing capabilities
  - 7-day statistics dashboard
  - Configurable LLM model selection
  - Custom instruction support

Security and Performance:
* Respects trust levels - only scans TL0/TL1 users
* Skips private messages entirely
* Stops scanning users after 3 successful public posts
* Includes comprehensive test coverage
* Maintains audit log of all scan attempts


---------

Co-authored-by: Keegan George <kgeorge13@gmail.com>
Co-authored-by: Martin Brennan <martin@discourse.org>
This commit is contained in:
Sam 2024-12-12 09:17:25 +11:00 committed by GitHub
parent ae80494448
commit 47f5da7e42
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
27 changed files with 1801 additions and 6 deletions

View File

@ -0,0 +1,11 @@
import { service } from "@ember/service";
import { ajax } from "discourse/lib/ajax";
import DiscourseRoute from "discourse/routes/discourse";
export default class DiscourseAiSpamRoute extends DiscourseRoute {
@service store;
model() {
return ajax("/admin/plugins/discourse-ai/ai-spam.json");
}
}

View File

@ -0,0 +1 @@
<AiSpam @model={{this.model}} />

View File

@ -0,0 +1,112 @@
# frozen_string_literal: true
module DiscourseAi
module Admin
class AiSpamController < ::Admin::AdminController
requires_plugin "discourse-ai"
def show
render json: AiSpamSerializer.new(spam_config, root: false)
end
def update
updated_params = {}
if allowed_params.key?(:llm_model_id)
llm_model_id = updated_params[:llm_model_id] = allowed_params[:llm_model_id]
if llm_model_id.to_i < 0 &&
!SiteSetting.ai_spam_detection_model_allowed_seeded_models_map.include?(
"custom:#{llm_model_id}",
)
return(
render_json_error(
I18n.t("discourse_ai.llm.configuration.invalid_seeded_model"),
status: 422,
)
)
end
end
updated_params[:data] = {
custom_instructions: allowed_params[:custom_instructions],
} if allowed_params.key?(:custom_instructions)
if updated_params.present?
# not using upsert cause we will not get the correct validation errors
if AiModerationSetting.spam
AiModerationSetting.spam.update!(updated_params)
else
AiModerationSetting.create!(updated_params.merge(setting_type: :spam))
end
end
is_enabled = ActiveModel::Type::Boolean.new.cast(allowed_params[:is_enabled])
if allowed_params.key?(:is_enabled)
if is_enabled && !AiModerationSetting.spam&.llm_model_id
return(
render_json_error(
I18n.t("discourse_ai.llm.configuration.must_select_model"),
status: 422,
)
)
end
SiteSetting.ai_spam_detection_enabled = is_enabled
end
render json: AiSpamSerializer.new(spam_config, root: false)
end
def test
url = params[:post_url].to_s
post = nil
if url.match?(/^\d+$/)
post_id = url.to_i
post = Post.find_by(id: post_id)
end
route = UrlHelper.rails_route_from_url(url) if !post
if route
if route[:controller] == "topics"
post_number = route[:post_number] || 1
post = Post.with_deleted.find_by(post_number: post_number, topic_id: route[:topic_id])
end
end
raise Discourse::NotFound if !post
result =
DiscourseAi::AiModeration::SpamScanner.test_post(
post,
custom_instructions: params[:custom_instructions],
llm_id: params[:llm_id],
)
render json: result
end
private
def allowed_params
params.permit(:is_enabled, :llm_model_id, :custom_instructions)
end
def spam_config
spam_config = {
enabled: SiteSetting.ai_spam_detection_enabled,
settings: AiModerationSetting.spam,
}
spam_config[:stats] = DiscourseAi::AiModeration::SpamReport.generate(min_date: 1.week.ago)
if spam_config[:stats].scanned_count > 0
spam_config[
:flagging_username
] = DiscourseAi::AiModeration::SpamScanner.flagging_user&.username
end
spam_config
end
end
end
end

View File

@ -0,0 +1,13 @@
# frozen_string_literal: true
module Jobs
class AiSpamScan < ::Jobs::Base
def execute(args)
return if !args[:post_id]
post = Post.find_by(id: args[:post_id])
return if !post
DiscourseAi::AiModeration::SpamScanner.perform_scan(post)
end
end
end

View File

@ -0,0 +1,32 @@
# frozen_string_literal: true
class AiModerationSetting < ActiveRecord::Base
belongs_to :llm_model
validates :llm_model_id, presence: true
validates :setting_type, presence: true
validates :setting_type, uniqueness: true
def self.spam
find_by(setting_type: :spam)
end
def custom_instructions
data["custom_instructions"]
end
end
# == Schema Information
#
# Table name: ai_moderation_settings
#
# id :bigint not null, primary key
# setting_type :enum not null
# data :jsonb
# llm_model_id :bigint not null
# created_at :datetime not null
# updated_at :datetime not null
#
# Indexes
#
# index_ai_moderation_settings_on_setting_type (setting_type) UNIQUE
#

26
app/models/ai_spam_log.rb Normal file
View File

@ -0,0 +1,26 @@
# frozen_string_literal: true
class AiSpamLog < ActiveRecord::Base
belongs_to :post
belongs_to :llm_model
belongs_to :ai_api_audit_log
belongs_to :reviewable
end
# == Schema Information
#
# Table name: ai_spam_logs
#
# id :bigint not null, primary key
# post_id :bigint not null
# llm_model_id :bigint not null
# ai_api_audit_log_id :bigint
# reviewable_id :bigint
# is_spam :boolean not null
# payload :string(20000) default(""), not null
# created_at :datetime not null
# updated_at :datetime not null
#
# Indexes
#
# index_ai_spam_logs_on_post_id (post_id)
#

View File

@ -56,7 +56,11 @@ class LlmModel < ActiveRecord::Base
end end
def to_llm def to_llm
DiscourseAi::Completions::Llm.proxy("custom:#{id}") DiscourseAi::Completions::Llm.proxy(identifier)
end
def identifier
"custom:#{id}"
end end
def toggle_companion_user def toggle_companion_user

View File

@ -0,0 +1,40 @@
# frozen_string_literal: true
class AiSpamSerializer < ApplicationSerializer
attributes :is_enabled, :llm_id, :custom_instructions, :available_llms, :stats, :flagging_username
def is_enabled
object[:enabled]
end
def llm_id
settings&.llm_model&.id
end
def custom_instructions
settings&.custom_instructions
end
def available_llms
DiscourseAi::Configuration::LlmEnumerator
.values(allowed_seeded_llms: SiteSetting.ai_spam_detection_model_allowed_seeded_models_map)
.map { |hash| { id: hash[:value], name: hash[:name] } }
end
def flagging_username
object[:flagging_username]
end
def stats
{
scanned_count: object[:stats].scanned_count.to_i,
spam_detected: object[:stats].spam_detected.to_i,
false_positives: object[:stats].false_positives.to_i,
false_negatives: object[:stats].false_negatives.to_i,
}
end
def settings
object[:settings]
end
end

View File

@ -18,6 +18,7 @@ export default {
this.route("new"); this.route("new");
this.route("show", { path: "/:id" }); this.route("show", { path: "/:id" });
}); });
this.route("discourse-ai-spam", { path: "ai-spam" });
this.route("discourse-ai-usage", { path: "ai-usage" }); this.route("discourse-ai-usage", { path: "ai-usage" });
}, },
}; };

View File

@ -0,0 +1,243 @@
import Component from "@glimmer/component";
import { tracked } from "@glimmer/tracking";
import { fn } from "@ember/helper";
import { on } from "@ember/modifier";
import { action } from "@ember/object";
import { LinkTo } from "@ember/routing";
import { service } from "@ember/service";
import DButton from "discourse/components/d-button";
import DToggleSwitch from "discourse/components/d-toggle-switch";
import DTooltip from "discourse/components/d-tooltip";
import withEventValue from "discourse/helpers/with-event-value";
import { ajax } from "discourse/lib/ajax";
import { popupAjaxError } from "discourse/lib/ajax-error";
import i18n from "discourse-common/helpers/i18n";
import getURL from "discourse-common/lib/get-url";
import AdminConfigAreaCard from "admin/components/admin-config-area-card";
import AdminPageSubheader from "admin/components/admin-page-subheader";
import ComboBox from "select-kit/components/combo-box";
import SpamTestModal from "./modal/spam-test-modal";
export default class AiSpam extends Component {
@service siteSettings;
@service toasts;
@service modal;
@tracked
stats = {
scanned_count: 0,
spam_detected: 0,
false_positives: 0,
false_negatives: 0,
daily_data: [],
};
@tracked isEnabled = false;
@tracked selectedLLM = null;
@tracked customInstructions = "";
constructor() {
super(...arguments);
this.initializeFromModel();
}
@action
initializeFromModel() {
const model = this.args.model;
this.isEnabled = model.is_enabled;
if (model.llm_id) {
this.selectedLLM = "custom:" + model.llm_id;
} else {
if (this.availableLLMs.length) {
this.selectedLLM = this.availableLLMs[0].id;
this.autoSelectedLLM = true;
}
}
this.customInstructions = model.custom_instructions;
this.stats = model.stats;
}
get availableLLMs() {
return this.args.model?.available_llms || [];
}
@action
async toggleEnabled() {
this.isEnabled = !this.isEnabled;
const data = { is_enabled: this.isEnabled };
if (this.autoSelectedLLM) {
data.llm_model_id = this.llmId;
}
try {
const response = await ajax("/admin/plugins/discourse-ai/ai-spam.json", {
type: "PUT",
data,
});
this.autoSelectedLLM = false;
this.isEnabled = response.is_enabled;
} catch (error) {
this.isEnabled = !this.isEnabled;
popupAjaxError(error);
}
}
get llmId() {
return this.selectedLLM.toString().split(":")[1];
}
@action
async updateLLM(value) {
this.selectedLLM = value;
}
@action
async save() {
try {
await ajax("/admin/plugins/discourse-ai/ai-spam.json", {
type: "PUT",
data: {
llm_model_id: this.llmId,
custom_instructions: this.customInstructions,
},
});
this.toasts.success({
data: { message: i18n("discourse_ai.spam.settings_saved") },
duration: 2000,
});
} catch (error) {
popupAjaxError(error);
}
}
@action
showTestModal() {
this.modal.show(SpamTestModal, {
model: {
customInstructions: this.customInstructions,
llmId: this.llmId,
},
});
}
get metrics() {
const detected = {
label: "discourse_ai.spam.spam_detected",
value: this.stats.spam_detected,
};
if (this.args.model.flagging_username) {
detected.href = getURL(
"/review?flagged_by=" + this.args.model.flagging_username
);
}
return [
{
label: "discourse_ai.spam.scanned_count",
value: this.stats.scanned_count,
},
detected,
{
label: "discourse_ai.spam.false_positives",
value: this.stats.false_positives,
},
{
label: "discourse_ai.spam.false_negatives",
value: this.stats.false_negatives,
},
];
}
<template>
<div class="ai-spam">
<section class="ai-spam__settings">
<AdminPageSubheader
@titleLabel="discourse_ai.spam.title"
@descriptionLabel="discourse_ai.spam.spam_description"
/>
<div class="control-group ai-spam__enabled">
<DToggleSwitch
class="ai-spam__toggle"
@state={{this.isEnabled}}
@label="discourse_ai.spam.enable"
{{on "click" this.toggleEnabled}}
/>
<DTooltip
@icon="circle-question"
@content={{i18n "discourse_ai.spam.spam_tip"}}
/>
</div>
<div class="ai-spam__llm">
<label class="ai-spam__llm-label">{{i18n
"discourse_ai.spam.select_llm"
}}</label>
{{#if this.availableLLMs.length}}
<ComboBox
@value={{this.selectedLLM}}
@content={{this.availableLLMs}}
@onChange={{this.updateLLM}}
class="ai-spam__llm-selector"
/>
{{else}}
<span class="ai-spam__llm-placeholder">
<LinkTo @route="adminPlugins.show.discourse-ai-llms.index">
{{i18n "discourse_ai.spam.no_llms"}}
</LinkTo>
</span>
{{/if}}
</div>
<div class="ai-spam__instructions">
<label class="ai-spam__instructions-label">
{{i18n "discourse_ai.spam.custom_instructions"}}
<DTooltip
@icon="circle-question"
@content={{i18n "discourse_ai.spam.custom_instructions_help"}}
/>
</label>
<textarea
class="ai-spam__instructions-input"
placeholder={{i18n
"discourse_ai.spam.custom_instructions_placeholder"
}}
{{on "input" (withEventValue (fn (mut this.customInstructions)))}}
>{{this.customInstructions}}</textarea>
<DButton
@action={{this.save}}
@label="discourse_ai.spam.save_button"
class="ai-spam__instructions-save btn-primary"
/>
<DButton
@action={{this.showTestModal}}
@label="discourse_ai.spam.test_button"
class="btn-default"
/>
</div>
</section>
<AdminConfigAreaCard
@heading="discourse_ai.spam.last_seven_days"
class="ai-spam__stats"
>
<:content>
<div class="ai-spam__metrics">
{{#each this.metrics as |metric|}}
<div class="ai-spam__metrics-item">
<span class="ai-spam__metrics-label">{{i18n
metric.label
}}</span>
{{#if metric.href}}
<a href={{metric.href}} class="ai-spam__metrics-value">
{{metric.value}}
</a>
{{else}}
<span class="ai-spam__metrics-value">{{metric.value}}</span>
{{/if}}
</div>
{{/each}}
</div>
</:content>
</AdminConfigAreaCard>
</div>
</template>
}

View File

@ -0,0 +1,101 @@
import Component from "@glimmer/component";
import { tracked } from "@glimmer/tracking";
import { fn } from "@ember/helper";
import { on } from "@ember/modifier";
import { action } from "@ember/object";
import DButton from "discourse/components/d-button";
import DModal from "discourse/components/d-modal";
import withEventValue from "discourse/helpers/with-event-value";
import { ajax } from "discourse/lib/ajax";
import { popupAjaxError } from "discourse/lib/ajax-error";
import I18n from "discourse-i18n";
import AiIndicatorWave from "../ai-indicator-wave";
export default class SpamTestModal extends Component {
@tracked testResult;
@tracked isLoading = false;
@tracked postUrl = "";
@tracked scanLog = "";
@tracked isSpam;
@action
async runTest() {
this.isLoading = true;
try {
const response = await ajax(
`/admin/plugins/discourse-ai/ai-spam/test.json`,
{
type: "POST",
data: {
post_url: this.postUrl,
custom_instructions: this.args.model.customInstructions,
llm_id: this.args.model.llmId,
},
}
);
this.isSpam = response.is_spam;
this.testResult = response.is_spam
? I18n.t("discourse_ai.spam.test_modal.spam")
: I18n.t("discourse_ai.spam.test_modal.not_spam");
this.scanLog = response.log;
} catch (error) {
popupAjaxError(error);
} finally {
this.isLoading = false;
}
}
<template>
<DModal
@title={{I18n.t "discourse_ai.spam.test_modal.title"}}
@closeModal={{@closeModal}}
@bodyClass="spam-test-modal__body"
class="spam-test-modal"
>
<:body>
<div class="control-group">
<label>{{I18n.t
"discourse_ai.spam.test_modal.post_url_label"
}}</label>
<input
{{on "input" (withEventValue (fn (mut this.postUrl)))}}
type="text"
placeholder={{I18n.t
"discourse_ai.spam.test_modal.post_url_placeholder"
}}
/>
</div>
{{#if this.testResult}}
<div class="spam-test-modal__test-result">
<h3>{{I18n.t "discourse_ai.spam.test_modal.result"}}</h3>
<div
class="spam-test-modal__verdict
{{if this.isSpam 'is-spam' 'not-spam'}}"
>
{{this.testResult}}
</div>
{{#if this.scanLog}}
<div class="spam-test-modal__log">
<h4>{{I18n.t "discourse_ai.spam.test_modal.scan_log"}}</h4>
<pre>{{this.scanLog}}</pre>
</div>
{{/if}}
</div>
{{/if}}
</:body>
<:footer>
<DButton
@action={{this.runTest}}
@label="discourse_ai.spam.test_modal.run"
@disabled={{this.isLoading}}
class="btn-primary spam-test-modal__run-button"
>
<AiIndicatorWave @loading={{this.isLoading}} />
</DButton>
</:footer>
</DModal>
</template>
}

View File

@ -24,6 +24,10 @@ export default {
label: "discourse_ai.tools.short_title", label: "discourse_ai.tools.short_title",
route: "adminPlugins.show.discourse-ai-tools", route: "adminPlugins.show.discourse-ai-tools",
}, },
{
label: "discourse_ai.spam.short_title",
route: "adminPlugins.show.discourse-ai-spam",
},
{ {
label: "discourse_ai.usage.short_title", label: "discourse_ai.usage.short_title",
route: "adminPlugins.show.discourse-ai-usage", route: "adminPlugins.show.discourse-ai-usage",

View File

@ -0,0 +1,124 @@
.ai-spam {
--chart-scanned-color: var(--success);
--chart-spam-color: var(--danger);
padding-top: 15px;
&__settings {
margin-bottom: 2em;
}
&__enabled {
display: flex;
align-items: center;
gap: 0.4em;
margin-bottom: 1em;
.fk-d-tooltip__trigger {
color: var(--primary-high);
}
}
&__settings-title {
margin-bottom: 1em;
}
&__toggle,
&__llm,
&__instructions {
margin-bottom: 1em;
}
&__toggle-label,
&__llm-label,
&__instructions-label {
display: block;
margin-bottom: 0.5em;
font-weight: bold;
}
&__instructions-input {
width: 100%;
min-height: 100px;
margin-bottom: 0.5em;
}
&__stats {
margin-top: 2em;
}
&__stats-title {
margin-bottom: 1em;
}
&__metrics {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 1em;
margin-bottom: 2em;
}
&__metrics-item {
display: flex;
flex-direction: column;
padding: 1em;
background: var(--primary-very-low);
border-radius: 0.25em;
}
&__metrics-label {
color: var(--primary-medium);
font-size: 0.875em;
margin-bottom: 0.5em;
}
&__metrics-value {
color: var(--primary);
font-size: 1.5em;
font-weight: bold;
}
}
.spam-test-modal {
&__body {
min-width: 500px;
}
&__test-result {
margin-top: 1.5em;
padding-top: 1.5em;
border-top: 1px solid var(--primary-low);
}
&__verdict {
font-size: var(--font-up-2);
font-weight: bold;
padding: 0.5em;
border-radius: 0.25em;
text-align: center;
margin: 1em 0;
&.is-spam {
background: var(--danger-low);
color: var(--danger);
}
&.not-spam {
background: var(--success-low);
color: var(--success);
}
}
&__log {
margin-top: 1em;
pre {
max-height: 300px;
overflow-y: auto;
background: var(--primary-very-low);
padding: 1em;
margin: 0.5em 0;
font-family: monospace;
white-space: pre-wrap;
}
}
}

View File

@ -129,6 +129,35 @@ en:
modals: modals:
select_option: "Select an option..." select_option: "Select an option..."
spam:
short_title: "Spam"
title: "Configure spam handling"
select_llm: "Select LLM"
custom_instructions: "Custom instructions"
custom_instructions_help: "Custom instructions specific to your site to help guide the AI in identifying spam, e.g. 'Be more aggressive about scanning posts not in English'."
last_seven_days: "Last 7 days"
scanned_count: "Posts scanned"
false_positives: "Incorrectly flagged"
false_negatives: "Missed spam"
spam_detected: "Spam detected"
custom_instructions_placeholder: "Site-specific instructions for the AI to help identify spam more accurately"
enable: "Enable"
spam_tip: "AI spam detection will scan the first 3 posts by all new users on public topics. It will flag them for review and block users if they are likely spam."
settings_saved: "Settings saved"
spam_description: "Identifies potential spam using the selected LLM and flags it for site moderators to inspect in the review queue"
no_llms: "No LLMs available"
test_button: "Test..."
save_button: "Save changes"
test_modal:
title: "Test spam detection"
post_url_label: "Post URL or ID"
post_url_placeholder: "https://your-forum.com/t/topic/123/4 or post ID"
result: "Result"
scan_log: "Scan log"
run: "Run test"
spam: "Spam"
not_spam: "Not spam"
usage: usage:
short_title: "Usage" short_title: "Usage"
summary: "Summary" summary: "Summary"
@ -305,6 +334,7 @@ en:
ai_persona: "Persona (%{persona})" ai_persona: "Persona (%{persona})"
ai_summarization: "Summarize" ai_summarization: "Summarize"
ai_embeddings_semantic_search: "AI search" ai_embeddings_semantic_search: "AI search"
ai_spam: "Spam"
in_use_warning: in_use_warning:
one: "This model is currently used by %{settings}. If misconfigured, the feature won't work as expected." one: "This model is currently used by %{settings}. If misconfigured, the feature won't work as expected."
other: "This model is currently used by the following: %{settings}. If misconfigured, features won't work as expected. " other: "This model is currently used by the following: %{settings}. If misconfigured, features won't work as expected. "

View File

@ -251,6 +251,8 @@ en:
other_content_in_pm: "Personal messages containing posts from other people cannot be shared publicly" other_content_in_pm: "Personal messages containing posts from other people cannot be shared publicly"
failed_to_share: "Failed to share the conversation" failed_to_share: "Failed to share the conversation"
conversation_deleted: "Conversation share deleted successfully" conversation_deleted: "Conversation share deleted successfully"
spam_detection:
flag_reason: "Flagged as spam by <a href='%{url}'>Discourse AI</a>"
ai_bot: ai_bot:
reply_error: "Sorry, it looks like our system encountered an unexpected issue while trying to reply.\n\n[details='Error details']\n%{details}\n[/details]" reply_error: "Sorry, it looks like our system encountered an unexpected issue while trying to reply.\n\n[details='Error details']\n%{details}\n[/details]"
default_pm_prefix: "[Untitled AI bot PM]" default_pm_prefix: "[Untitled AI bot PM]"
@ -413,9 +415,10 @@ en:
llm: llm:
configuration: configuration:
disable_module_first: "You have to disable %{setting} first." disable_module_first: "You have to disable %{setting} first."
set_llm_first: "Set %{setting} first." set_llm_first: "Set %{setting} first"
model_unreachable: "We couldn't get a response from this model. Check your settings first." model_unreachable: "We couldn't get a response from this model. Check your settings first."
invalid_seeded_model: "You can't use this model with this feature." invalid_seeded_model: "You can't use this model with this feature"
must_select_model: "You must select a LLM first"
endpoints: endpoints:
not_configured: "%{display_name} (not configured)" not_configured: "%{display_name} (not configured)"
configuration_hint: configuration_hint:

View File

@ -80,6 +80,9 @@ Discourse::Application.routes.draw do
get "/ai-usage", to: "discourse_ai/admin/ai_usage#show" get "/ai-usage", to: "discourse_ai/admin/ai_usage#show"
get "/ai-usage-report", to: "discourse_ai/admin/ai_usage#report" get "/ai-usage-report", to: "discourse_ai/admin/ai_usage#report"
get "/ai-spam", to: "discourse_ai/admin/ai_spam#show"
put "/ai-spam", to: "discourse_ai/admin/ai_spam#update"
post "/ai-spam/test", to: "discourse_ai/admin/ai_spam#test"
resources :ai_llms, resources :ai_llms,
only: %i[index create show update destroy], only: %i[index create show update destroy],

View File

@ -321,3 +321,16 @@ discourse_ai:
type: list type: list
list_type: compact list_type: compact
default: "" default: ""
ai_spam_detection_enabled:
default: false
hidden: true
ai_spam_detection_user_id:
default: ""
hidden: true
ai_spam_detection_model_allowed_seeded_models:
default: ""
hidden: true
type: list

View File

@ -0,0 +1,15 @@
# frozen_string_literal: true
class AddAiModerationSettings < ActiveRecord::Migration[7.2]
def change
create_enum :ai_moderation_setting_type, %w[spam nsfw custom]
create_table :ai_moderation_settings do |t|
t.enum :setting_type, enum_type: "ai_moderation_setting_type", null: false
t.jsonb :data, default: {}
t.bigint :llm_model_id, null: false
t.timestamps
end
add_index :ai_moderation_settings, :setting_type, unique: true
end
end

View File

@ -0,0 +1,16 @@
# frozen_string_literal: true
class AddAiSpamLogs < ActiveRecord::Migration[7.2]
def change
create_table :ai_spam_logs do |t|
t.bigint :post_id, null: false
t.bigint :llm_model_id, null: false
t.bigint :ai_api_audit_log_id
t.bigint :reviewable_id
t.boolean :is_spam, null: false
t.string :payload, null: false, default: "", limit: 20_000
t.timestamps
end
add_index :ai_spam_logs, :post_id
end
end

View File

@ -0,0 +1,17 @@
# frozen_string_literal: true
module DiscourseAi
module AiModeration
class EntryPoint
def inject_into(plugin)
plugin.on(:post_created) { |post| SpamScanner.new_post(post) }
plugin.on(:post_edited) { |post| SpamScanner.edited_post(post) }
plugin.on(:post_process_cooked) { |_doc, post| SpamScanner.after_cooked_post(post) }
plugin.on(:site_setting_changed) do |name, _old_value, new_value|
SpamScanner.ensure_flagging_user! if name == :ai_spam_detection_enabled && new_value
end
end
end
end
end

View File

@ -0,0 +1,47 @@
# frozen_string_literal: true
module DiscourseAi
module AiModeration
class SpamReport
def self.generate(min_date: 1.week.ago)
spam_status = [Reviewable.statuses[:approved], Reviewable.statuses[:deleted]]
ham_status = [Reviewable.statuses[:rejected], Reviewable.statuses[:ignored]]
sql = <<~SQL
WITH spam_stats AS (
SELECT
asl.reviewable_id,
asl.post_id,
asl.is_spam,
r.status as reviewable_status,
r.target_type,
r.potential_spam
FROM ai_spam_logs asl
LEFT JOIN reviewables r ON r.id = asl.reviewable_id
WHERE asl.created_at > :min_date
),
post_reviewables AS (
SELECT
target_id post_id,
COUNT(DISTINCT target_id) as false_negative_count
FROM reviewables
WHERE target_type = 'Post'
AND status IN (:spam)
AND potential_spam
AND target_id IN (SELECT post_id FROM spam_stats)
GROUP BY target_id
)
SELECT
COUNT(*) AS scanned_count,
SUM(CASE WHEN is_spam THEN 1 ELSE 0 END) AS spam_detected,
COUNT(CASE WHEN reviewable_status IN (:ham) THEN 1 END) AS false_positives,
COALESCE(SUM(pr.false_negative_count), 0) AS false_negatives
FROM spam_stats
LEFT JOIN post_reviewables pr USING (post_id)
SQL
DB.query(sql, spam: spam_status, ham: ham_status, min_date: min_date).first
end
end
end
end

View File

@ -0,0 +1,371 @@
# frozen_string_literal: true
module DiscourseAi
module AiModeration
class SpamScanner
POSTS_TO_SCAN = 3
MINIMUM_EDIT_DIFFERENCE = 10
EDIT_DELAY_MINUTES = 10
MAX_AGE_TO_SCAN = 1.day
MAX_RAW_SCAN_LENGTH = 5000
SHOULD_SCAN_POST_CUSTOM_FIELD = "discourse_ai_should_scan_post"
def self.new_post(post)
return if !enabled?
return if !should_scan_post?(post)
flag_post_for_scanning(post)
end
def self.ensure_flagging_user!
if !SiteSetting.ai_spam_detection_user_id.present?
User.transaction do
# prefer a "high" id for this bot
id = User.where("id > -20").minimum(:id) - 1
id = User.minimum(:id) - 1 if id == -100
user =
User.create!(
id: id,
username: UserNameSuggester.suggest("discourse_ai_spam"),
name: "Discourse AI Spam Scanner",
email: "#{SecureRandom.hex(10)}@invalid.invalid",
active: true,
approved: true,
trust_level: TrustLevel[4],
admin: true,
)
Group.user_trust_level_change!(user.id, user.trust_level)
SiteSetting.ai_spam_detection_user_id = user.id
end
end
end
def self.flagging_user
user = nil
if SiteSetting.ai_spam_detection_user_id.present?
user = User.find_by(id: SiteSetting.ai_spam_detection_user_id)
end
user || Discourse.system_user
end
def self.after_cooked_post(post)
return if !enabled?
return if !should_scan_post?(post)
return if !post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD]
return if post.updated_at < MAX_AGE_TO_SCAN.ago
last_scan = AiSpamLog.where(post_id: post.id).order(created_at: :desc).first
if last_scan && last_scan.created_at > EDIT_DELAY_MINUTES.minutes.ago
delay_minutes =
((last_scan.created_at + EDIT_DELAY_MINUTES.minutes) - Time.current).to_i / 60
Jobs.enqueue_in(delay_minutes.minutes, :ai_spam_scan, post_id: post.id)
else
Jobs.enqueue(:ai_spam_scan, post_id: post.id)
end
end
def self.edited_post(post)
return if !enabled?
return if !should_scan_post?(post)
return if scanned_max_times?(post)
previous_version = post.revisions.last&.modifications&.dig("raw", 0)
current_version = post.raw
return if !significant_change?(previous_version, current_version)
flag_post_for_scanning(post)
end
def self.flag_post_for_scanning(post)
post.custom_fields[SHOULD_SCAN_POST_CUSTOM_FIELD] = "true"
post.save_custom_fields
end
def self.enabled?
SiteSetting.ai_spam_detection_enabled && SiteSetting.discourse_ai_enabled
end
def self.should_scan_post?(post)
return false if !post.present?
return false if post.user.trust_level > TrustLevel[1]
return false if post.topic.private_message?
if Post
.where(user_id: post.user_id)
.joins(:topic)
.where(topic: { archetype: Archetype.default })
.limit(4)
.count > 3
return false
end
true
end
def self.scanned_max_times?(post)
AiSpamLog.where(post_id: post.id).count >= 3
end
def self.significant_change?(previous_version, current_version)
return true if previous_version.nil? # First edit should be scanned
# Use Discourse's built-in levenshtein implementation
distance =
ScreenedEmail.levenshtein(previous_version.to_s[0...1000], current_version.to_s[0...1000])
distance >= MINIMUM_EDIT_DIFFERENCE
end
def self.test_post(post, custom_instructions: nil, llm_id: nil)
settings = AiModerationSetting.spam
llm_model = llm_id ? LlmModel.find(llm_id) : settings.llm_model
llm = llm_model.to_llm
custom_instructions = custom_instructions || settings.custom_instructions.presence
context = build_context(post)
prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)
result =
llm.generate(
prompt,
temperature: 0.1,
max_tokens: 5,
user: Discourse.system_user,
feature_name: "spam_detection_test",
feature_context: {
post_id: post.id,
},
)&.strip
history = nil
AiSpamLog
.where(post: post)
.order(:created_at)
.limit(100)
.each do |log|
history ||= +"Scan History:\n"
history << "date: #{log.created_at} is_spam: #{log.is_spam}\n"
end
log = +"Scanning #{post.url}\n\n"
if history
log << history
log << "\n"
end
log << "LLM: #{llm_model.name}\n\n"
log << "System Prompt: #{build_system_prompt(custom_instructions)}\n\n"
log << "Context: #{context}\n\n"
is_spam = check_if_spam(result)
prompt.push(type: :model, content: result)
prompt.push(type: :user, content: "Explain your reasoning")
reasoning =
llm.generate(
prompt,
temperature: 0.1,
max_tokens: 100,
user: Discourse.system_user,
feature_name: "spam_detection_test",
feature_context: {
post_id: post.id,
},
)&.strip
log << "#{reasoning}"
{ is_spam: is_spam, log: log }
end
def self.completion_prompt(post, context:, custom_instructions:)
system_prompt = build_system_prompt(custom_instructions)
prompt = DiscourseAi::Completions::Prompt.new(system_prompt)
args = { type: :user, content: context }
upload_ids = post.upload_ids
args[:upload_ids] = upload_ids.take(3) if upload_ids.present?
prompt.push(**args)
prompt
end
def self.perform_scan(post)
return if !enabled?
return if !should_scan_post?(post)
settings = AiModerationSetting.spam
return if !settings || !settings.llm_model
context = build_context(post)
llm = settings.llm_model.to_llm
custom_instructions = settings.custom_instructions.presence
prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions)
begin
result =
llm.generate(
prompt,
temperature: 0.1,
max_tokens: 5,
user: Discourse.system_user,
feature_name: "spam_detection",
feature_context: {
post_id: post.id,
},
)&.strip
is_spam = check_if_spam(result)
log = AiApiAuditLog.order(id: :desc).where(feature_name: "spam_detection").first
AiSpamLog.transaction do
log =
AiSpamLog.create!(
post: post,
llm_model: settings.llm_model,
ai_api_audit_log: log,
is_spam: is_spam,
payload: context,
)
handle_spam(post, log) if is_spam
end
rescue StandardError => e
# we need retries otherwise stuff will not be handled
Discourse.warn_exception(
e,
message: "Discourse AI: Error in SpamScanner for post #{post.id}",
)
raise e
end
end
private
def self.check_if_spam(result)
(result.present? && result.strip.downcase.start_with?("spam"))
end
def self.build_context(post)
context = []
# Clear distinction between reply and new topic
if post.is_first_post?
context << "NEW TOPIC POST ANALYSIS"
context << "- Topic title: #{post.topic.title}"
context << "- Category: #{post.topic.category&.name}"
else
context << "REPLY POST ANALYSIS"
context << "- In topic: #{post.topic.title}"
context << "- Category: #{post.topic.category&.name}"
context << "- Topic started by: #{post.topic.user.username}"
# Include parent post context for replies
if post.reply_to_post.present?
parent = post.reply_to_post
context << "\nReplying to #{parent.user.username}'s post:"
context << "#{parent.raw[0..500]}..." if parent.raw.length > 500
context << parent.raw if parent.raw.length <= 500
end
end
context << "\nPost Author Information:"
context << "- Username: #{post.user.username}"
context << "- Account age: #{(Time.current - post.user.created_at).to_i / 86_400} days"
context << "- Total posts: #{post.user.post_count}"
context << "- Trust level: #{post.user.trust_level}"
context << "\nPost Content (first #{MAX_RAW_SCAN_LENGTH} chars):\n"
context << post.raw[0..MAX_RAW_SCAN_LENGTH]
context.join("\n")
end
def self.build_system_prompt(custom_instructions)
base_prompt = +<<~PROMPT
You are a spam detection system. Analyze the following post content and context.
Respond with "SPAM" if the post is spam, or "NOT_SPAM" if it's legitimate.
- ALWAYS lead your reply with the word SPAM or NOT_SPAM - you are consumed via an API
Consider the post type carefully:
- For REPLY posts: Check if the response is relevant and topical to the thread
- For NEW TOPIC posts: Check if it's a legitimate topic or spam promotion
A post is spam if it matches any of these criteria:
- Contains unsolicited commercial content or promotions
- Has suspicious or unrelated external links
- Shows patterns of automated/bot posting
- Contains irrelevant content or advertisements
- For replies: Completely unrelated to the discussion thread
- Uses excessive keywords or repetitive text patterns
- Shows suspicious formatting or character usage
Be especially strict with:
- Replies that ignore the previous conversation
- Posts containing multiple unrelated external links
- Generic responses that could be posted anywhere
Be fair to:
- New users making legitimate first contributions
- Non-native speakers making genuine efforts to participate
- Topic-relevant product mentions in appropriate contexts
PROMPT
base_prompt << "\n\n"
base_prompt << <<~SITE_SPECIFIC
Site Specific Information:
- Site name: #{SiteSetting.title}
- Site URL: #{Discourse.base_url}
- Site description: #{SiteSetting.site_description}
- Site top 10 categories: #{Category.where(read_restricted: false).order(posts_year: :desc).limit(10).pluck(:name).join(", ")}
SITE_SPECIFIC
if custom_instructions.present?
base_prompt << "\n\nAdditional site-specific instructions provided by Staff:\n#{custom_instructions}"
end
base_prompt
end
def self.handle_spam(post, log)
url = "#{Discourse.base_url}/admin/plugins/discourse-ai/ai-spam"
reason = I18n.t("discourse_ai.spam_detection.flag_reason", url: url)
result =
PostActionCreator.new(
flagging_user,
post,
PostActionType.types[:spam],
reason: reason,
queue_for_review: true,
).perform
log.update!(reviewable: result.reviewable)
SpamRule::AutoSilence.new(post.user, post).silence_user
# this is required cause tl1 is not auto hidden
# we want to also handle tl1
hide_posts_and_topics(post.user)
end
def self.hide_posts_and_topics(user)
Post
.where(user_id: user.id)
.where("created_at > ?", 24.hours.ago)
.update_all(
[
"hidden = true, hidden_reason_id = COALESCE(hidden_reason_id, ?)",
Post.hidden_reasons[:new_user_spam_threshold_reached],
],
)
topic_ids =
Post
.where(user_id: user.id, post_number: 1)
.where("created_at > ?", 24.hours.ago)
.select(:topic_id)
Topic.where(id: topic_ids).update_all(visible: false)
end
end
end
end

View File

@ -38,6 +38,11 @@ module DiscourseAi
rval[model_id] << { type: :ai_embeddings_semantic_search } rval[model_id] << { type: :ai_embeddings_semantic_search }
end end
if SiteSetting.ai_spam_detection_enabled
model_id = AiModerationSetting.spam[:llm_model_id]
rval[model_id] << { type: :ai_spam }
end
rval rval
end end
@ -45,14 +50,20 @@ module DiscourseAi
true true
end end
def self.values def self.values(allowed_seeded_llms: nil)
values = DB.query_hash(<<~SQL).map(&:symbolize_keys) values = DB.query_hash(<<~SQL).map(&:symbolize_keys)
SELECT display_name AS name, id AS value SELECT display_name AS name, id AS value
FROM llm_models FROM llm_models
SQL SQL
values.each { |value_h| value_h[:value] = "custom:#{value_h[:value]}" } if allowed_seeded_llms.is_a?(Array)
values =
values.filter do |value_h|
value_h[:value] > 0 || allowed_seeded_llms.include?("custom:#{value_h[:value]}")
end
end
values.each { |value_h| value_h[:value] = "custom:#{value_h[:value]}" }
values values
end end

View File

@ -37,6 +37,7 @@ register_asset "stylesheets/modules/sentiment/common/dashboard.scss"
register_asset "stylesheets/modules/llms/common/ai-llms-editor.scss" register_asset "stylesheets/modules/llms/common/ai-llms-editor.scss"
register_asset "stylesheets/modules/llms/common/usage.scss" register_asset "stylesheets/modules/llms/common/usage.scss"
register_asset "stylesheets/modules/llms/common/spam.scss"
register_asset "stylesheets/modules/ai-bot/common/ai-tools.scss" register_asset "stylesheets/modules/ai-bot/common/ai-tools.scss"
@ -71,6 +72,7 @@ after_initialize do
DiscourseAi::AiHelper::EntryPoint.new, DiscourseAi::AiHelper::EntryPoint.new,
DiscourseAi::Summarization::EntryPoint.new, DiscourseAi::Summarization::EntryPoint.new,
DiscourseAi::AiBot::EntryPoint.new, DiscourseAi::AiBot::EntryPoint.new,
DiscourseAi::AiModeration::EntryPoint.new,
].each { |a_module| a_module.inject_into(self) } ].each { |a_module| a_module.inject_into(self) }
register_reviewable_type ReviewableAiChatMessage register_reviewable_type ReviewableAiChatMessage

View File

@ -0,0 +1,222 @@
# frozen_string_literal: true
require "rails_helper"
RSpec.describe DiscourseAi::AiModeration::SpamScanner do
fab!(:user) { Fabricate(:user, trust_level: TrustLevel[0]) }
fab!(:topic)
fab!(:post) { Fabricate(:post, user: user, topic: topic) }
fab!(:llm_model)
fab!(:spam_setting) do
AiModerationSetting.create!(
setting_type: :spam,
llm_model: llm_model,
data: {
custom_instructions: "test instructions",
},
)
end
before do
SiteSetting.discourse_ai_enabled = true
SiteSetting.ai_spam_detection_enabled = true
end
describe ".enabled?" do
it "returns true when both settings are enabled" do
expect(described_class.enabled?).to eq(true)
end
it "returns false when discourse_ai is disabled" do
SiteSetting.discourse_ai_enabled = false
expect(described_class.enabled?).to eq(false)
end
it "returns false when spam detection is disabled" do
SiteSetting.ai_spam_detection_enabled = false
expect(described_class.enabled?).to eq(false)
end
end
describe ".should_scan_post?" do
it "returns true for new users' posts" do
expect(described_class.should_scan_post?(post)).to eq(true)
end
it "returns false for trusted users" do
post.user.trust_level = TrustLevel[2]
expect(described_class.should_scan_post?(post)).to eq(false)
end
it "returns false for users with many public posts" do
Fabricate(:post, user: user, topic: topic)
Fabricate(:post, user: user, topic: topic)
expect(described_class.should_scan_post?(post)).to eq(true)
pm = Fabricate(:private_message_topic, user: user)
Fabricate(:post, user: user, topic: pm)
expect(described_class.should_scan_post?(post)).to eq(true)
topic = Fabricate(:topic, user: user)
Fabricate(:post, user: user, topic: topic)
expect(described_class.should_scan_post?(post)).to eq(false)
end
it "returns false for private messages" do
pm_topic = Fabricate(:private_message_topic)
pm_post = Fabricate(:post, topic: pm_topic, user: user)
expect(described_class.should_scan_post?(pm_post)).to eq(false)
end
it "returns false for nil posts" do
expect(described_class.should_scan_post?(nil)).to eq(false)
end
end
describe ".scanned_max_times?" do
it "returns true when post has been scanned 3 times" do
3.times do
AiSpamLog.create!(post: post, llm_model: llm_model, ai_api_audit_log_id: 1, is_spam: false)
end
expect(described_class.scanned_max_times?(post)).to eq(true)
end
it "returns false for posts scanned less than 3 times" do
expect(described_class.scanned_max_times?(post)).to eq(false)
end
end
describe ".significant_change?" do
it "returns true for first edits" do
expect(described_class.significant_change?(nil, "new content")).to eq(true)
end
it "returns true for significant changes" do
old_version = "This is a test post"
new_version = "This is a completely different post with new content"
expect(described_class.significant_change?(old_version, new_version)).to eq(true)
end
it "returns false for minor changes" do
old_version = "This is a test post"
new_version = "This is a test Post" # Only capitalization change
expect(described_class.significant_change?(old_version, new_version)).to eq(false)
end
end
describe ".new_post" do
it "enqueues spam scan job for eligible posts" do
expect {
described_class.new_post(post)
described_class.after_cooked_post(post)
}.to change(Jobs::AiSpamScan.jobs, :size).by(1)
end
it "doesn't enqueue jobs when disabled" do
SiteSetting.ai_spam_detection_enabled = false
expect { described_class.new_post(post) }.not_to change(Jobs::AiSpamScan.jobs, :size)
end
end
describe ".edited_post" do
it "enqueues spam scan job for eligible edited posts" do
PostRevision.create!(
post: post,
modifications: {
raw: ["old content", "completely new content"],
},
)
expect {
described_class.edited_post(post)
described_class.after_cooked_post(post)
}.to change(Jobs::AiSpamScan.jobs, :size).by(1)
end
it "schedules delayed job when edited too soon after last scan" do
AiSpamLog.create!(
post: post,
llm_model: llm_model,
ai_api_audit_log_id: 1,
is_spam: false,
created_at: 5.minutes.ago,
)
expect {
described_class.edited_post(post)
described_class.after_cooked_post(post)
}.to change(Jobs::AiSpamScan.jobs, :size).by(1)
end
end
describe "integration test" do
fab!(:llm_model)
let(:api_audit_log) { Fabricate(:api_audit_log) }
fab!(:post_with_uploaded_image)
before { Jobs.run_immediately! }
it "Can correctly run tests" do
prompts = nil
result =
DiscourseAi::Completions::Llm.with_prepared_responses(
["spam", "the reason is just because"],
) do |_, _, _prompts|
prompts = _prompts
described_class.test_post(post, custom_instructions: "123")
end
expect(prompts.length).to eq(2)
expect(result[:is_spam]).to eq(true)
expect(result[:log]).to include("123")
expect(result[:log]).to include("just because")
result =
DiscourseAi::Completions::Llm.with_prepared_responses(
["not_spam", "the reason is just because"],
) do |_, _, _prompts|
prompts = _prompts
described_class.test_post(post, custom_instructions: "123")
end
expect(result[:is_spam]).to eq(false)
end
it "Correctly handles spam scanning" do
expect(described_class.flagging_user.id).not_to eq(Discourse.system_user.id)
# flag post for scanning
post = post_with_uploaded_image
described_class.new_post(post)
prompt = nil
DiscourseAi::Completions::Llm.with_prepared_responses(["spam"]) do |_, _, _prompts|
# force a rebake so we actually scan
post.rebake!
prompt = _prompts.first
end
content = prompt.messages[1][:content]
expect(content).to include(post.topic.title)
expect(content).to include(post.raw)
upload_ids = prompt.messages[1][:upload_ids]
expect(upload_ids).to be_present
expect(upload_ids).to eq(post.upload_ids)
log = AiSpamLog.find_by(post: post)
expect(log.payload).to eq(content)
expect(log.is_spam).to eq(true)
expect(post.user.reload.silenced_till).to be_present
expect(post.topic.reload.visible).to eq(false)
expect(log.reviewable).to be_present
expect(log.reviewable.created_by_id).to eq(described_class.flagging_user.id)
end
end
end

View File

@ -0,0 +1,285 @@
# frozen_string_literal: true
require "rails_helper"
RSpec.describe DiscourseAi::Admin::AiSpamController do
fab!(:admin)
fab!(:user)
fab!(:llm_model)
describe "#update" do
context "when logged in as admin" do
before { sign_in(admin) }
it "can update settings from scratch" do
put "/admin/plugins/discourse-ai/ai-spam.json",
params: {
is_enabled: true,
llm_model_id: llm_model.id,
custom_instructions: "custom instructions",
}
expect(response.status).to eq(200)
expect(SiteSetting.ai_spam_detection_enabled).to eq(true)
expect(AiModerationSetting.spam.llm_model_id).to eq(llm_model.id)
expect(AiModerationSetting.spam.data["custom_instructions"]).to eq("custom instructions")
end
it "denies update for disallowed seeded llm" do
seeded_llm = Fabricate(:llm_model, id: -1)
put "/admin/plugins/discourse-ai/ai-spam.json",
params: {
is_enabled: true,
llm_model_id: seeded_llm.id,
custom_instructions: "custom instructions",
}
expect(response.status).to eq(422)
SiteSetting.ai_spam_detection_model_allowed_seeded_models = seeded_llm.identifier
put "/admin/plugins/discourse-ai/ai-spam.json",
params: {
is_enabled: true,
llm_model_id: seeded_llm.id,
custom_instructions: "custom instructions",
}
expect(response.status).to eq(200)
end
it "can not enable spam detection without a model selected" do
put "/admin/plugins/discourse-ai/ai-spam.json",
params: {
custom_instructions: "custom instructions",
}
expect(response.status).to eq(422)
end
it "can not fiddle with custom instructions without an llm" do
put "/admin/plugins/discourse-ai/ai-spam.json", params: { is_enabled: true }
expect(response.status).to eq(422)
end
context "when spam detection was already set" do
fab!(:setting) do
AiModerationSetting.create(
{
setting_type: :spam,
llm_model_id: llm_model.id,
data: {
custom_instructions: "custom instructions",
},
},
)
end
it "can partially update settings" do
put "/admin/plugins/discourse-ai/ai-spam.json", params: { is_enabled: false }
expect(response.status).to eq(200)
expect(SiteSetting.ai_spam_detection_enabled).to eq(false)
expect(AiModerationSetting.spam.llm_model_id).to eq(llm_model.id)
expect(AiModerationSetting.spam.data["custom_instructions"]).to eq("custom instructions")
end
it "can update pre existing settings" do
put "/admin/plugins/discourse-ai/ai-spam.json",
params: {
is_enabled: true,
llm_model_id: llm_model.id,
custom_instructions: "custom instructions new",
}
expect(response.status).to eq(200)
expect(SiteSetting.ai_spam_detection_enabled).to eq(true)
expect(AiModerationSetting.spam.llm_model_id).to eq(llm_model.id)
expect(AiModerationSetting.spam.data["custom_instructions"]).to eq(
"custom instructions new",
)
end
end
end
end
describe "#test" do
fab!(:spam_post) { Fabricate(:post) }
fab!(:spam_post2) { Fabricate(:post, topic: spam_post.topic, raw: "something special 123") }
fab!(:setting) do
AiModerationSetting.create(
{
setting_type: :spam,
llm_model_id: llm_model.id,
data: {
custom_instructions: "custom instructions",
},
},
)
end
before { sign_in(admin) }
it "can scan using post url" do
llm2 = Fabricate(:llm_model, name: "DiffLLM")
DiscourseAi::Completions::Llm.with_prepared_responses(["spam", "just because"]) do
post "/admin/plugins/discourse-ai/ai-spam/test.json",
params: {
post_url: spam_post2.url,
llm_id: llm2.id,
}
end
expect(response.status).to eq(200)
parsed = response.parsed_body
expect(parsed["log"]).to include(spam_post2.raw)
expect(parsed["log"]).to include("DiffLLM")
end
it "can scan using post id" do
DiscourseAi::Completions::Llm.with_prepared_responses(["spam", "because apples"]) do
post "/admin/plugins/discourse-ai/ai-spam/test.json",
params: {
post_url: spam_post.id.to_s,
}
end
expect(response.status).to eq(200)
parsed = response.parsed_body
expect(parsed["log"]).to include(spam_post.raw)
end
it "returns proper spam test results" do
freeze_time DateTime.parse("2000-01-01")
AiSpamLog.create!(
post: spam_post,
llm_model: llm_model,
is_spam: false,
created_at: 2.days.ago,
)
AiSpamLog.create!(post: spam_post, llm_model: llm_model, is_spam: true, created_at: 1.day.ago)
DiscourseAi::Completions::Llm.with_prepared_responses(["spam", "because banana"]) do
post "/admin/plugins/discourse-ai/ai-spam/test.json",
params: {
post_url: spam_post.url,
custom_instructions: "special custom instructions",
}
end
expect(response.status).to eq(200)
parsed = response.parsed_body
expect(parsed["log"]).to include("special custom instructions")
expect(parsed["log"]).to include(spam_post.raw)
expect(parsed["is_spam"]).to eq(true)
expect(parsed["log"]).to include("Scan History:")
expect(parsed["log"]).to include("banana")
end
end
describe "#show" do
context "when logged in as admin" do
before { sign_in(admin) }
it "correctly filters seeded llms" do
SiteSetting.ai_spam_detection_enabled = true
seeded_llm = Fabricate(:llm_model, id: -1, name: "seeded")
get "/admin/plugins/discourse-ai/ai-spam.json"
expect(response.status).to eq(200)
json = response.parsed_body
# only includes fabricated model
expect(json["available_llms"].length).to eq(1)
SiteSetting.ai_spam_detection_model_allowed_seeded_models = seeded_llm.identifier
get "/admin/plugins/discourse-ai/ai-spam.json"
expect(response.status).to eq(200)
json = response.parsed_body
expect(json["available_llms"].length).to eq(2)
end
it "returns the serialized spam settings" do
SiteSetting.ai_spam_detection_enabled = true
get "/admin/plugins/discourse-ai/ai-spam.json"
expect(response.status).to eq(200)
json = response.parsed_body
expect(json["is_enabled"]).to eq(true)
expect(json["selected_llm"]).to eq(nil)
expect(json["custom_instructions"]).to eq(nil)
expect(json["available_llms"]).to be_an(Array)
expect(json["stats"]).to be_present
end
it "return proper settings when spam detection is enabled" do
SiteSetting.ai_spam_detection_enabled = true
AiModerationSetting.create(
{
setting_type: :spam,
llm_model_id: llm_model.id,
data: {
custom_instructions: "custom instructions",
},
},
)
flagging_user = DiscourseAi::AiModeration::SpamScanner.flagging_user
expect(flagging_user.id).not_to eq(Discourse.system_user.id)
AiSpamLog.create!(post_id: 1, llm_model_id: llm_model.id, is_spam: true, payload: "test")
get "/admin/plugins/discourse-ai/ai-spam.json"
json = response.parsed_body
expect(json["is_enabled"]).to eq(true)
expect(json["llm_id"]).to eq(llm_model.id)
expect(json["custom_instructions"]).to eq("custom instructions")
expect(json["stats"].to_h).to eq(
"scanned_count" => 1,
"spam_detected" => 1,
"false_positives" => 0,
"false_negatives" => 0,
)
expect(json["flagging_username"]).to eq(flagging_user.username)
end
end
context "when not logged in as admin" do
it "returns 404 for anonymous users" do
get "/admin/plugins/discourse-ai/ai-spam.json"
expect(response.status).to eq(404)
end
it "returns 404 for regular users" do
sign_in(user)
get "/admin/plugins/discourse-ai/ai-spam.json"
expect(response.status).to eq(404)
end
end
context "when plugin is disabled" do
before do
sign_in(admin)
SiteSetting.discourse_ai_enabled = false
end
it "returns 404" do
get "/admin/plugins/discourse-ai/ai-spam.json"
expect(response.status).to eq(404)
end
end
end
end

View File

@ -0,0 +1,48 @@
# frozen_string_literal: true
RSpec.describe "AI Spam Configuration", type: :system, js: true do
fab!(:admin)
let(:llm_model) { Fabricate(:llm_model) }
before do
SiteSetting.discourse_ai_enabled = true
sign_in(admin)
end
it "can properly configure spam settings" do
visit "/admin/plugins/discourse-ai/ai-spam"
expect(page).to have_css(".ai-spam__llm-placeholder")
toggle = PageObjects::Components::DToggleSwitch.new(".ai-spam__toggle")
toggle.toggle
dialog = PageObjects::Components::Dialog.new
expect(dialog).to have_content(I18n.t("discourse_ai.llm.configuration.must_select_model"))
dialog.click_ok
expect(toggle.unchecked?).to eq(true)
llm_model
visit "/admin/plugins/discourse-ai/ai-spam"
toggle = PageObjects::Components::DToggleSwitch.new(".ai-spam__toggle")
toggle.toggle
try_until_success { expect(AiModerationSetting.spam&.llm_model_id).to eq(llm_model.id) }
find(".ai-spam__instructions-input").fill_in(with: "Test spam detection instructions")
find(".ai-spam__instructions-save").click
toasts = PageObjects::Components::Toasts.new
expect(toasts).to have_content(I18n.t("js.discourse_ai.spam.settings_saved"))
expect(AiModerationSetting.spam.custom_instructions).to eq("Test spam detection instructions")
visit "/admin/plugins/discourse-ai/ai-llms"
expect(find(".ai-llm-list-editor__usages")).to have_content(
I18n.t("js.discourse_ai.llms.usage.ai_spam"),
)
end
end