diff --git a/app/jobs/regular/modules/sentiment/sentiment_classify_post.rb b/app/jobs/regular/modules/sentiment/sentiment_classify_post.rb new file mode 100644 index 00000000..2a9272ad --- /dev/null +++ b/app/jobs/regular/modules/sentiment/sentiment_classify_post.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module ::Jobs + class SentimentClassifyPost < ::Jobs::Base + def execute(args) + return unless SiteSetting.ai_sentiment_enabled + + post_id = args[:post_id] + return if post_id.blank? + + post = Post.find_by(id: post_id, post_type: Post.types[:regular]) + return if post&.raw.blank? + + ::DiscourseAI::Sentiment::PostClassifier.new(post).classify! + end + end +end diff --git a/app/jobs/regular/modules/toxicity/toxicity_classify_chat_message.rb b/app/jobs/regular/modules/toxicity/toxicity_classify_chat_message.rb new file mode 100644 index 00000000..ecd6b11a --- /dev/null +++ b/app/jobs/regular/modules/toxicity/toxicity_classify_chat_message.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module ::Jobs + class ClassifyChatMessage < ::Jobs::Base + def execute(args) + return unless SiteSetting.ai_toxicity_enabled + + chat_message_id = args[:chat_message_id] + return if chat_message_id.blank? + + chat_message = ChatMessage.find_by(id: chat_message_id) + return if chat_message&.message.blank? + + ::DiscourseAI::Toxicity::ChatMessageClassifier.new(chat_message).classify! + end + end +end diff --git a/app/jobs/regular/modules/toxicity/toxicity_classify_post.rb b/app/jobs/regular/modules/toxicity/toxicity_classify_post.rb new file mode 100644 index 00000000..bbb90447 --- /dev/null +++ b/app/jobs/regular/modules/toxicity/toxicity_classify_post.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module ::Jobs + class ToxicityClassifyPost < ::Jobs::Base + def execute(args) + return unless SiteSetting.ai_toxicity_enabled + + post_id = args[:post_id] + return if post_id.blank? + + post = Post.find_by(id: post_id, post_type: Post.types[:regular]) + return if post&.raw.blank? + + ::DiscourseAI::Toxicity::PostClassifier.new(post).classify! + end + end +end diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml index f9432c28..319db44f 100644 --- a/config/locales/client.en.yml +++ b/config/locales/client.en.yml @@ -1,4 +1,3 @@ en: js: - discourse_plugin_name: - placeholder: placeholder + discourse-ai: diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 63f1c3e9..486401eb 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -1 +1,21 @@ en: + site_settings: + ai_enabled: "Enable the discourse ai plugin." + ai_toxicity_enabled: "Enable the toxicity module." + ai_toxicity_inference_service_api_endpoint: "URL where the API is running for the toxicity module" + ai_toxicity_inference_service_api_key: "API key for the toxicity API" + ai_toxicity_inference_service_api_model: "Model to use for inference. Multilingual model works with Italian, French, Russian, Portuguese, Spanish and Turkish." + ai_toxicity_flag_automatically: "Automatically flag posts / chat messages that are above the configured thresholds." + ai_toxicity_flag_threshold_toxicity: "Toxicity: a rude, disrespectful, or unreasonable comment that is somewhat likely to make you leave a discussion or give up on sharing your perspective" + ai_toxicity_flag_threshold_severe_toxicity: "Severe Toxicity: a very hateful, aggressive, or disrespectful comment that is very likely to make you leave a discussion or give up on sharing your perspective" + ai_toxicity_flag_threshold_obscene: "Obscene" + ai_toxicity_flag_threshold_identity_attack: "Identity Attack" + ai_toxicity_flag_threshold_insult: "Insult" + ai_toxicity_flag_threshold_threat: "Threat" + ai_toxicity_flag_threshold_sexual_explicit: "Sexual Explicit" + ai_toxicity_groups_bypass: "Users on those groups will not have their posts classified by the toxicity module." + + ai_sentiment_enabled: "Enable the sentiment module." + ai_sentiment_inference_service_api_endpoint: "URL where the API is running for the sentiment module" + ai_sentiment_inference_service_api_key: "API key for the sentiment API" + ai_sentiment_models: "Models to use for inference. Sentiment classifies post on the positive/neutral/negative space. Emotion classifies on the anger/disgust/fear/joy/neutral/sadness/surprise space." diff --git a/config/settings.yml b/config/settings.yml index 332b90f9..8ab7cc4c 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -1,4 +1,66 @@ plugins: - plugin_name_enabled: + ai_enabled: default: true client: true + + ai_toxicity_enabled: + default: false + client: true + ai_toxicity_inference_service_api_endpoint: + default: "https://disorder-testing.demo-by-discourse.com" + ai_toxicity_inference_service_api_key: + default: '' + ai_toxicity_inference_service_api_model: + type: enum + default: unbiased + choices: + - unbiased + - multilingual + - original + ai_toxicity_flag_automatically: + default: true + client: false + ai_toxicity_flag_threshold_toxicity: + default: 70 + client: false + ai_toxicity_flag_threshold_severe_toxicity: + default: 60 + client: false + ai_toxicity_flag_threshold_obscene: + default: 60 + client: false + ai_toxicity_flag_threshold_identity_attack: + default: 60 + client: false + ai_toxicity_flag_threshold_insult: + default: 60 + client: false + ai_toxicity_flag_threshold_threat: + default: 60 + client: false + ai_toxicity_flag_threshold_sexual_explicit: + default: 60 + client: false + ai_toxicity_groups_bypass: + client: true + type: group_list + list_type: compact + default: "3" # 3: @staff + allow_any: false + refresh: true + + ai_sentiment_enabled: + default: false + client: true + ai_sentiment_inference_service_api_endpoint: + default: '' + ai_sentiment_inference_service_api_key: + default: '' + ai_sentiment_models: + type: list + list_type: simple + default: "emotion" + allow_any: false + choices: + - sentiment + - emotion diff --git a/lib/modules/sentiment/event_handler.rb b/lib/modules/sentiment/event_handler.rb new file mode 100644 index 00000000..7fe5958b --- /dev/null +++ b/lib/modules/sentiment/event_handler.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module ::DiscourseAI + module Sentiment + class EventHandler + class << self + def handle_post_async(post) + return unless SiteSetting.ai_sentiment_enabled + Jobs.enqueue(:sentiment_classify_post, post_id: post.id) + end + end + end + end +end diff --git a/lib/modules/sentiment/post_classifier.rb b/lib/modules/sentiment/post_classifier.rb new file mode 100644 index 00000000..0b2638f0 --- /dev/null +++ b/lib/modules/sentiment/post_classifier.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +module ::DiscourseAI + module Sentiment + class PostClassifier + SENTIMENT_LABELS = %w[anger disgust fear joy neutral sadness surprise] + + SENTIMENT_LABELS = %w[negative neutral positive] + + def initialize(object) + @object = object + end + + def content + @object.post_number == 1 ? "#{@object.topic.title}\n#{@object.raw}" : @object.raw + end + + def classify! + SiteSetting + .ai_sentiment_models + .split("|") + .each do |model| + classification = + ::DiscourseAI::InferenceManager.perform!( + "#{SiteSetting.ai_sentiment_inference_service_api_endpoint}/api/v1/classify", + model, + content, + SiteSetting.ai_sentiment_inference_service_api_key + ) + + store_classification(model, classification) + end + end + + def store_classification(model, classification) + PostCustomField.create!( + post_id: @object.id, + name: "ai-sentiment-#{model}", + value: { classification: classification }.to_json, + ) + end + end + end +end diff --git a/lib/modules/toxicity/chat_message_classifier.rb b/lib/modules/toxicity/chat_message_classifier.rb new file mode 100644 index 00000000..cb6b0898 --- /dev/null +++ b/lib/modules/toxicity/chat_message_classifier.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module ::DiscourseAI + module Toxicity + class ChatMessageClassifier < Classifier + def content + @object.message + end + + def store_classification + PluginStore.set( + "toxicity", + "chat_message_#{@object.id}", + { + classification: @classification, + model: SiteSetting.ai_toxicity_inference_service_api_model, + date: Time.now.utc, + }, + ) + end + + def flag! + Chat::ChatReviewQueue.new.flag_message( + @object, + Guardian.new(flagger), + ReviewableScore.types[:inappropriate], + ) + end + end + end +end diff --git a/lib/modules/toxicity/classifier.rb b/lib/modules/toxicity/classifier.rb new file mode 100644 index 00000000..ee7a951e --- /dev/null +++ b/lib/modules/toxicity/classifier.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +module ::DiscourseAI + module Toxicity + class Classifier + CLASSIFICATION_LABELS = %w[ + toxicity + severe_toxicity + obscene + identity_attack + insult + threat + sexual_explicit + ] + + def initialize(object) + @object = object + end + + def content + end + + def classify! + @classification = + ::DiscourseAI::InferenceManager.perform!( + "#{SiteSetting.ai_toxicity_inference_service_api_endpoint}/api/v1/classify", + SiteSetting.ai_toxicity_inference_service_api_model, + content, + SiteSetting.ai_toxicity_inference_service_api_key + ) + store_classification + consider_flagging + end + + def store_classification + end + + def automatic_flag_enabled? + SiteSetting.ai_toxicity_flag_automatically + end + + def consider_flagging + return unless automatic_flag_enabled? + @reasons = + CLASSIFICATION_LABELS.filter do |label| + @classification[label] >= SiteSetting.send("ai_toxicity_flag_threshold_#{label}") + end + + flag! unless @reasons.empty? + end + + def flagger + User.find_by(id: -1) + end + + def flag! + end + end + end +end diff --git a/lib/modules/toxicity/event_handler.rb b/lib/modules/toxicity/event_handler.rb new file mode 100644 index 00000000..fd78c024 --- /dev/null +++ b/lib/modules/toxicity/event_handler.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +module ::DiscourseAI + module Toxicity + class EventHandler + class << self + def handle_post_async(post) + return if bypass?(post) + Jobs.enqueue(:toxicity_classify_post, post_id: post.id) + end + + def handle_chat_async(chat_message) + return if bypass?(chat_message) + Jobs.enqueue(:toxicity_classify_chat_message, chat_message_id: chat_message.id) + end + + def bypass?(content) + !SiteSetting.ai_toxicity_enabled || group_bypass?(content.user) + end + + def group_bypass?(user) + user.groups.pluck(:id).intersection(SiteSetting.disorder_groups_bypass_map).present? + end + end + end + end +end diff --git a/lib/modules/toxicity/post_classifier.rb b/lib/modules/toxicity/post_classifier.rb new file mode 100644 index 00000000..1cd3ffdf --- /dev/null +++ b/lib/modules/toxicity/post_classifier.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +module ::DiscourseAI + module Toxicity + class PostClassifier < Classifier + def content + object.post_number == 1 ? "#{object.topic.title}\n#{object.raw}" : object.raw + end + + def store_classification + PostCustomField.create!( + post_id: @object.id, + name: "toxicity", + value: { + classification: @classification, + model: SiteSetting.ai_toxicity_inference_service_api_model, + }.to_json, + ) + end + + def flag! + PostActionCreator.create(flagger, @object, :inappropriate, reason: @reasons.join("/")) + @object.publish_change_to_clients! :acted + end + end + end +end diff --git a/lib/shared/inference_manager.rb b/lib/shared/inference_manager.rb new file mode 100644 index 00000000..03f060f8 --- /dev/null +++ b/lib/shared/inference_manager.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module ::DiscourseAI + class InferenceManager + def self.perform!(endpoint, model, content, api_key) + + headers = { + "Referer" => Discourse.base_url, + "Content-Type" => "application/json", + } + + if api_key.present? + headers["X-API-KEY"] = api_key + end + + response = + Faraday.post( + endpoint, + { model: model, content: content }.to_json, + headers, + ) + + raise Net::HTTPBadResponse unless response.status == 200 + + JSON.parse(response.body) + end + end +end diff --git a/lib/tasks/modules/toxicity/calibration.rake b/lib/tasks/modules/toxicity/calibration.rake new file mode 100644 index 00000000..b9fe6bf3 --- /dev/null +++ b/lib/tasks/modules/toxicity/calibration.rake @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +def classify(content) + ::DiscourseAI::InferenceManager.perform!( + "#{SiteSetting.ai_toxicity_inference_service_api_endpoint}/api/v1/classify", + SiteSetting.ai_toxicity_inference_service_api_model, + content, + SiteSetting.ai_toxicity_inference_service_api_key, + ) +end + +desc "Uses existing flagged posts to suggest a configuration threshold" +task "ai:toxicity:calibration_stats", [:set_size] => [:environment] do |_, args| + flag_agreed = + PostAction + .where(post_action_type_id: 4, disagreed_at: nil, deferred_at: nil) + .where("post_actions.user_id > 0") + .includes(:post, :user) + .where(user: { admin: false, moderator: false }) + .where("posts.raw IS NOT NULL") + .order(created_at: :desc) + .limit(args[:set_size]) + .pluck(:raw) + + flag_not_agreed = + PostAction + .where(post_action_type_id: 4) + .where("(disagreed_at IS NOT NULL OR deferred_at IS NOT NULL)") + .where("post_actions.user_id > 0") + .includes(:post, :user) + .where(user: { admin: false, moderator: false }) + .where("posts.raw IS NOT NULL") + .order(created_at: :desc) + .limit(args[:set_size]) + .pluck(:raw) + + flag_agreed_scores = flag_agreed.map { classify(_1) } + flag_not_agreed_scores = flag_not_agreed.map { classify(_1) } + + DiscourseAI::Toxicity::Classifier::CLASSIFICATION_LABELS.each do |label| + puts "Label: #{label}" + + label_agreed_scores = flag_agreed_scores.map { _1[label] } + label_not_agreed_scores = flag_not_agreed_scores.map { _1[label] } + + puts "Flagged posts score:" + puts "Max: #{label_agreed_scores.max}" + puts "Min: #{label_agreed_scores.min}" + puts "Avg: #{label_agreed_scores.sum(0.0) / label_agreed_scores.size}" + puts "Median: #{label_agreed_scores.sort[label_agreed_scores.size / 2]}" + puts "Stddev: #{Math.sqrt(label_agreed_scores.map { (_1 - label_agreed_scores.sum(0.0) / label_agreed_scores.size)**2 }.sum(0.0) / label_agreed_scores.size)}" + + puts "Flagged posts score:" + puts "Max: #{label_not_agreed_scores.max}" + puts "Min: #{label_not_agreed_scores.min}" + puts "Avg: #{label_not_agreed_scores.sum(0.0) / label_not_agreed_scores.size}" + puts "Median: #{label_not_agreed_scores.sort[label_not_agreed_scores.size / 2]}" + puts "Stddev: #{Math.sqrt(label_not_agreed_scores.map { (_1 - label_not_agreed_scores.sum(0.0) / label_not_agreed_scores.size)**2 }.sum(0.0) / label_not_agreed_scores.size)}" + + best_cutoff = 0 + best_cutoff_score = 0 + + (0..100) + .step(1) + .each do |cutoff| + score = + label_agreed_scores.count { _1 > cutoff } + label_not_agreed_scores.count { _1 <= cutoff } + + if score > best_cutoff_score + best_cutoff_score = score + best_cutoff = cutoff + end + end + + puts "Recommended ai_toxicity_flag_threshold_#{label} value: #{best_cutoff}" + end +end diff --git a/plugin.rb b/plugin.rb index f0bae53c..755f5a49 100644 --- a/plugin.rb +++ b/plugin.rb @@ -1,12 +1,43 @@ # frozen_string_literal: true -# name: discourse-plugin-name +# name: discourse-ai # about: TODO # version: 0.0.1 # authors: Discourse # url: TODO # required_version: 2.7.0 -enabled_site_setting :plugin_name_enabled +enabled_site_setting :ai_enabled -after_initialize {} +after_initialize do + module ::Disorder + PLUGIN_NAME = "discourse-ai" + end + + require_relative "lib/shared/inference_manager.rb" + require_relative "lib/modules/toxicity/event_handler.rb" + require_relative "lib/modules/toxicity/classifier.rb" + require_relative "lib/modules/toxicity/post_classifier.rb" + require_relative "lib/modules/toxicity/chat_message_classifier.rb" + require_relative "app/jobs/regular/modules/toxicity/toxicity_classify_post.rb" + require_relative "app/jobs/regular/modules/toxicity/toxicity_classify_chat_message.rb" + + require_relative "lib/modules/sentiment/event_handler.rb" + require_relative "lib/modules/sentiment/post_classifier.rb" + require_relative "app/jobs/regular/modules/sentiment/sentiment_classify_post.rb" + + on(:post_created) do |post| + DiscourseAI::Toxicity::EventHandler.handle_post_async(post) + DiscourseAI::Sentiment::EventHandler.handle_post_async(post) + end + on(:post_edited) do |post| + DiscourseAI::Toxicity::EventHandler.handle_post_async(post) + DiscourseAI::Sentiment::EventHandler.handle_post_async(post) + end + on(:chat_message_created) do |chat_message| + DiscourseAI::Toxicity::EventHandler.handle_chat_async(chat_message) + end + on(:chat_message_edited) do |chat_message| + DiscourseAI::Toxicity::EventHandler.handle_chat_async(chat_message) + end +end