discourse-ai/config/settings.yml
Sam 47f5da7e42
FEATURE: Add AI-powered spam detection for new user posts (#1004)
This introduces a comprehensive spam detection system that uses LLM models
to automatically identify and flag potential spam posts. The system is
designed to be both powerful and configurable while preventing false positives.

Key Features:
* Automatically scans first 3 posts from new users (TL0/TL1)
* Creates dedicated AI flagging user to distinguish from system flags
* Tracks false positives/negatives for quality monitoring
* Supports custom instructions to fine-tune detection
* Includes test interface for trying detection on any post

Technical Implementation:
* New database tables:
  - ai_spam_logs: Stores scan history and results
  - ai_moderation_settings: Stores LLM config and custom instructions
* Rate limiting and safeguards:
  - Minimum 10-minute delay between rescans
  - Only scans significant edits (>10 char difference)
  - Maximum 3 scans per post
  - 24-hour maximum age for scannable posts
* Admin UI features:
  - Real-time testing capabilities
  - 7-day statistics dashboard
  - Configurable LLM model selection
  - Custom instruction support

Security and Performance:
* Respects trust levels - only scans TL0/TL1 users
* Skips private messages entirely
* Stops scanning users after 3 successful public posts
* Includes comprehensive test coverage
* Maintains audit log of all scan attempts


---------

Co-authored-by: Keegan George <kgeorge13@gmail.com>
Co-authored-by: Martin Brennan <martin@discourse.org>
2024-12-12 09:17:25 +11:00

337 lines
8.3 KiB
YAML

discourse_ai:
discourse_ai_enabled:
default: true
client: true
ai_artifact_security:
client: true
type: enum
default: "strict"
choices:
- "disabled"
- "lax"
- "strict"
ai_sentiment_enabled:
default: false
client: true
ai_sentiment_model_configs:
default: ""
json_schema: DiscourseAi::Sentiment::SentimentSiteSettingJsonSchema
ai_sentiment_backfill_maximum_posts_per_hour:
default: 250
min: 0
max: 10000
hidden: true
ai_sentiment_backfill_post_max_age_days:
default: 60
hidden: true
ai_openai_dall_e_3_url: "https://api.openai.com/v1/images/generations"
ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings"
ai_openai_organization:
default: ""
hidden: true
ai_openai_api_key:
default: ""
secret: true
ai_stability_api_key:
default: ""
secret: true
ai_stability_api_url:
default: "https://api.stability.ai"
ai_stability_engine:
default: "stable-diffusion-xl-1024-v1-0"
type: enum
choices:
- "sd3"
- "sd3-turbo"
- "stable-diffusion-xl-1024-v1-0"
- "stable-diffusion-768-v2-1"
- "stable-diffusion-v1-5"
ai_hugging_face_tei_endpoint:
default: ""
ai_hugging_face_tei_endpoint_srv:
default: ""
hidden: true
ai_hugging_face_tei_api_key: ""
ai_hugging_face_tei_reranker_endpoint:
default: ""
ai_hugging_face_tei_reranker_endpoint_srv:
default: ""
hidden: true
ai_hugging_face_tei_reranker_api_key: ""
ai_google_custom_search_api_key:
default: ""
secret: true
ai_google_custom_search_cx:
default: ""
ai_cloudflare_workers_account_id:
default: ""
secret: true
ai_cloudflare_workers_api_token:
default: ""
secret: true
ai_gemini_api_key:
default: ""
hidden: false
ai_strict_token_counting:
default: false
hidden: true
ai_helper_enabled:
default: false
client: true
validator: "DiscourseAi::Configuration::LlmDependencyValidator"
composer_ai_helper_allowed_groups:
type: group_list
list_type: compact
default: "3|14" # 3: @staff, 14: @trust_level_4
allow_any: false
refresh: true
ai_helper_allowed_in_pm:
default: false
client: true
ai_helper_model:
default: ""
allow_any: false
type: enum
enum: "DiscourseAi::Configuration::LlmEnumerator"
validator: "DiscourseAi::Configuration::LlmValidator"
ai_helper_custom_prompts_allowed_groups:
type: group_list
list_type: compact
default: "3" # 3: @staff
allow_any: false
refresh: true
post_ai_helper_allowed_groups:
type: group_list
list_type: compact
default: "3|14" # 3: @staff, 14: @trust_level_4
allow_any: false
refresh: true
ai_helper_automatic_chat_thread_title:
default: false
ai_helper_automatic_chat_thread_title_delay:
default: 5
ai_helper_illustrate_post_model:
default: disabled
type: enum
choices:
- stable_diffusion_xl
- dall_e_3
- disabled
ai_helper_enabled_features:
client: true
default: "suggestions|context_menu"
type: list
list_type: compact
allow_any: false
refresh: true
choices:
- "suggestions"
- "context_menu"
- "image_caption"
ai_helper_image_caption_model:
default: ""
type: enum
enum: "DiscourseAi::Configuration::LlmVisionEnumerator"
ai_auto_image_caption_allowed_groups:
client: true
type: group_list
list_type: compact
default: "10" # 10: @trust_level_0
allow_any: false
refresh: true
ai_helper_model_allowed_seeded_models:
default: ""
hidden: true
type: list
list_type: compact
ai_helper_image_caption_model_allowed_seeded_models:
default: ""
hidden: true
type: list
list_type: compact
ai_embeddings_enabled:
default: false
client: true
validator: "DiscourseAi::Configuration::EmbeddingsModuleValidator"
ai_embeddings_discourse_service_api_endpoint: ""
ai_embeddings_discourse_service_api_endpoint_srv:
default: ""
hidden: true
ai_embeddings_discourse_service_api_key:
default: ""
secret: true
ai_embeddings_model:
type: enum
default: "bge-large-en"
allow_any: false
choices:
- all-mpnet-base-v2
- text-embedding-ada-002
- text-embedding-3-small
- text-embedding-3-large
- multilingual-e5-large
- bge-large-en
- gemini
- bge-m3
validator: "DiscourseAi::Configuration::EmbeddingsModelValidator"
ai_embeddings_per_post_enabled:
default: false
hidden: true
ai_embeddings_generate_for_pms: false
ai_embeddings_semantic_related_topics_enabled:
default: false
client: true
ai_embeddings_semantic_related_topics: 5
ai_embeddings_semantic_related_include_closed_topics: true
ai_embeddings_backfill_batch_size:
default: 250
hidden: true
ai_embeddings_pg_connection_string:
default: ""
hidden: true
ai_embeddings_semantic_search_enabled:
default: false
client: true
validator: "DiscourseAi::Configuration::LlmDependencyValidator"
ai_embeddings_semantic_search_hyde_model:
default: ""
type: enum
allow_any: false
enum: "DiscourseAi::Configuration::LlmEnumerator"
validator: "DiscourseAi::Configuration::LlmValidator"
ai_embeddings_semantic_search_hyde_model_allowed_seeded_models:
default: ""
hidden: true
type: list
list_type: compact
ai_embeddings_semantic_quick_search_enabled:
default: false
client: true
hidden: true
ai_summarization_enabled:
default: false
client: true
validator: "DiscourseAi::Configuration::LlmDependencyValidator"
ai_summarization_model:
default: ""
allow_any: false
type: enum
enum: "DiscourseAi::Configuration::LlmEnumerator"
validator: "DiscourseAi::Configuration::LlmValidator"
ai_pm_summarization_allowed_groups:
type: group_list
list_type: compact
default: ""
ai_custom_summarization_allowed_groups:
type: group_list
list_type: compact
default: "3|13" # 3: @staff, 13: @trust_level_3
ai_summary_gists_enabled:
default: false
hidden: true
ai_summary_gists_allowed_groups:
type: group_list
list_type: compact
default: "0" #everyone
hidden: true
ai_summarization_model_allowed_seeded_models:
default: ""
hidden: true
type: list
list_type: compact
ai_summary_backfill_topic_max_age_days:
default: 30
min: 1
max: 10000
ai_summary_backfill_maximum_topics_per_hour:
default: 0
min: 0
max: 10000
ai_summary_backfill_minimum_word_count:
default: 200
hidden: true
ai_bot_enabled:
default: false
client: true
ai_bot_enable_chat_warning:
default: false
client: true
ai_bot_debugging_allowed_groups:
type: group_list
list_type: compact
default: ""
allow_any: false
ai_bot_allowed_groups:
type: group_list
list_type: compact
default: "3|14" # 3: @staff, 14: @trust_level_4
ai_bot_public_sharing_allowed_groups:
client: false
type: group_list
list_type: compact
default: "1|2" # 1: admins, 2: moderators
allow_any: false
refresh: true
ai_bot_enabled_chat_bots: # TODO(roman): Deprecated. Remove by Sept 2024
type: list
default: "gpt-3.5-turbo"
hidden: true
choices: "DiscourseAi::Configuration::LlmEnumerator.available_ai_bots"
ai_bot_add_to_header:
default: true
client: true
ai_bot_github_access_token:
default: ""
secret: true
ai_automation_max_triage_per_minute:
default: 60
hidden: true
ai_automation_max_triage_per_post_per_minute:
default: 2
hidden: true
ai_automation_allowed_seeded_models:
default: ""
hidden: true
type: list
list_type: compact
ai_discord_app_id:
default: ""
client: false
ai_discord_app_public_key:
default: ""
client: false
ai_discord_search_mode:
default: "search"
type: enum
choices:
- search
- persona
ai_discord_search_persona:
default: ""
type: enum
enum: "DiscourseAi::Configuration::PersonaEnumerator"
ai_discord_allowed_guilds:
type: list
list_type: compact
default: ""
ai_spam_detection_enabled:
default: false
hidden: true
ai_spam_detection_user_id:
default: ""
hidden: true
ai_spam_detection_model_allowed_seeded_models:
default: ""
hidden: true
type: list