discourse-ai/config/settings.yml
Rafael dos Santos Silva 5e3f4e1b78
FEATURE: Embeddings to main db (#99)
* FEATURE: Embeddings to main db

This commit moves our embeddings store from an external configurable PostgreSQL
instance back into the main database. This is done to simplify the setup.

There is a migration that will try to import the external embeddings into
the main DB if it is configured and there are rows.

It removes support from embeddings models that aren't all_mpnet_base_v2 or OpenAI
text_embedding_ada_002. However it will now be easier to add new models.

It also now takes into account:
  - topic title
  - topic category
  - topic tags
  - replies (as much as the model allows)

We introduce an interface so we can eventually support multiple strategies
for handling long topics.

This PR severely damages the semantic search performance, but this is a
temporary until we can get adapt HyDE to make semantic search use the same
embeddings we have for semantic related with good performance.

Here we also have some ground work to add post level embeddings, but this
will be added in a future PR.

Please note that this PR will also block Discourse from booting / updating if 
this plugin is installed and the pgvector extension isn't available on the 
PostgreSQL instance Discourse uses.
2023-07-13 12:41:36 -03:00

200 lines
4.8 KiB
YAML

plugins:
discourse_ai_enabled:
default: true
client: true
ai_toxicity_enabled:
default: false
client: true
ai_toxicity_inference_service_api_endpoint:
default: "https://disorder-testing.demo-by-discourse.com"
ai_toxicity_inference_service_api_key:
default: ''
secret: true
ai_toxicity_inference_service_api_model:
type: enum
default: unbiased
choices:
- unbiased
- multilingual
- original
ai_toxicity_flag_automatically:
default: false
client: false
ai_toxicity_flag_threshold_toxicity:
default: 80
client: false
ai_toxicity_flag_threshold_severe_toxicity:
default: 30
client: false
ai_toxicity_flag_threshold_obscene:
default: 80
client: false
ai_toxicity_flag_threshold_identity_attack:
default: 60
client: false
ai_toxicity_flag_threshold_insult:
default: 60
client: false
ai_toxicity_flag_threshold_threat:
default: 60
client: false
ai_toxicity_flag_threshold_sexual_explicit:
default: 60
client: false
ai_toxicity_groups_bypass:
client: true
type: group_list
list_type: compact
default: "3" # 3: @staff
allow_any: false
refresh: true
ai_sentiment_enabled:
default: false
client: true
ai_sentiment_inference_service_api_endpoint:
default: "https://sentiment-testing.demo-by-discourse.com"
ai_sentiment_inference_service_api_key:
default: ''
secret: true
ai_sentiment_models:
type: list
list_type: compact
default: "emotion"
allow_any: false
choices:
- sentiment
- emotion
ai_nsfw_detection_enabled: false
ai_nsfw_inference_service_api_endpoint:
default: "https://nsfw-testing.demo-by-discourse.com"
ai_nsfw_inference_service_api_key:
default: ""
secret: true
ai_nsfw_flag_automatically: true
ai_nsfw_flag_threshold_general: 60
ai_nsfw_flag_threshold_drawings: 60
ai_nsfw_flag_threshold_hentai: 60
ai_nsfw_flag_threshold_porn: 60
ai_nsfw_flag_threshold_sexy: 70
ai_nsfw_models:
type: list
list_type: compact
default: "opennsfw2"
allow_any: false
choices:
- opennsfw2
- nsfw_detector
ai_openai_gpt35_url: "https://api.openai.com/v1/chat/completions"
ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions"
ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings"
ai_openai_api_key:
default: ""
secret: true
ai_anthropic_api_key:
default: ""
secret: true
ai_stability_api_key:
default: ""
secret: true
ai_stability_api_url:
default: "https://api.stability.ai"
ai_stability_engine:
default: "stable-diffusion-xl-beta-v2-2-2"
type: enum
choices:
- "stable-diffusion-xl-beta-v2-2-2"
- "stable-diffusion-v1-5"
ai_google_custom_search_api_key:
default: ""
secret: true
ai_google_custom_search_cx:
default: ""
composer_ai_helper_enabled:
default: false
client: true
ai_helper_allowed_groups:
client: true
type: group_list
list_type: compact
default: "3|14" # 3: @staff, 14: @trust_level_4
allow_any: false
refresh: true
ai_helper_allowed_in_pm:
default: false
client: true
ai_helper_model:
default: gpt-3.5-turbo
type: enum
choices:
- gpt-3.5-turbo
- gpt-4
- claude-2
ai_embeddings_enabled:
default: false
client: true
ai_embeddings_discourse_service_api_endpoint: ""
ai_embeddings_discourse_service_api_key:
default: ""
secret: true
ai_embeddings_model:
type: enum
list_type: compact
default: "all-mpnet-base-v2"
allow_any: false
choices:
- all-mpnet-base-v2
- text-embedding-ada-002
ai_embeddings_generate_for_pms: false
ai_embeddings_semantic_related_topics_enabled: false
ai_embeddings_semantic_related_topics: 5
ai_embeddings_semantic_related_include_closed_topics: true
ai_embeddings_pg_connection_string: ""
ai_embeddings_semantic_search_enabled:
default: false
client: true
ai_summarization_discourse_service_api_endpoint: ""
ai_summarization_discourse_service_api_key:
default: ""
secret: true
ai_bot_enabled:
default: false
client: true
ai_bot_allowed_groups:
client: true
type: group_list
list_type: compact
default: "3|14" # 3: @staff, 14: @trust_level_4
# Adding a new bot? Make sure to create a user for it on the seed file and update translations.
ai_bot_enabled_chat_bots:
type: list
default: "gpt-3.5-turbo"
client: true
choices:
- gpt-3.5-turbo
- gpt-4
- claude-v1
ai_bot_enabled_chat_commands:
type: list
default: "categories|google|image|search|tags|time"
client: true
choices:
- categories
- google
- image
- search
- summarize
- tags
- time
ai_helper_add_ai_pm_to_header:
default: true
client: true