add toxicity and sentiment modules

2023-02-22 20:46:53 -03:00 · 2023-02-22 20:46:53 -03:00 · 6cf411ec90
parent 2b319d9077
commit 6cf411ec90
15 changed files with 477 additions and 6 deletions
--- a/app/jobs/regular/modules/sentiment/sentiment_classify_post.rb
+++ b/app/jobs/regular/modules/sentiment/sentiment_classify_post.rb
@ -0,0 +1,17 @@
+# frozen_string_literal: true
+
+module ::Jobs
+  class SentimentClassifyPost < ::Jobs::Base
+    def execute(args)
+      return unless SiteSetting.ai_sentiment_enabled
+
+      post_id = args[:post_id]
+      return if post_id.blank?
+
+      post = Post.find_by(id: post_id, post_type: Post.types[:regular])
+      return if post&.raw.blank?
+
+      ::DiscourseAI::Sentiment::PostClassifier.new(post).classify!
+    end
+  end
+end
--- a/app/jobs/regular/modules/toxicity/toxicity_classify_chat_message.rb
+++ b/app/jobs/regular/modules/toxicity/toxicity_classify_chat_message.rb
@ -0,0 +1,17 @@
+# frozen_string_literal: true
+
+module ::Jobs
+  class ClassifyChatMessage < ::Jobs::Base
+    def execute(args)
+      return unless SiteSetting.ai_toxicity_enabled
+
+      chat_message_id = args[:chat_message_id]
+      return if chat_message_id.blank?
+
+      chat_message = ChatMessage.find_by(id: chat_message_id)
+      return if chat_message&.message.blank?
+
+      ::DiscourseAI::Toxicity::ChatMessageClassifier.new(chat_message).classify!
+    end
+  end
+end
--- a/app/jobs/regular/modules/toxicity/toxicity_classify_post.rb
+++ b/app/jobs/regular/modules/toxicity/toxicity_classify_post.rb
@ -0,0 +1,17 @@
+# frozen_string_literal: true
+
+module ::Jobs
+  class ToxicityClassifyPost < ::Jobs::Base
+    def execute(args)
+      return unless SiteSetting.ai_toxicity_enabled
+
+      post_id = args[:post_id]
+      return if post_id.blank?
+
+      post = Post.find_by(id: post_id, post_type: Post.types[:regular])
+      return if post&.raw.blank?
+
+      ::DiscourseAI::Toxicity::PostClassifier.new(post).classify!
+    end
+  end
+end
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -1,4 +1,3 @@
 en:
  js:
-    discourse_plugin_name:
-      placeholder: placeholder
+    discourse-ai:
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@ -1 +1,21 @@
 en:
+  site_settings:
+    ai_enabled: "Enable the discourse ai plugin."
+    ai_toxicity_enabled: "Enable the toxicity module."
+    ai_toxicity_inference_service_api_endpoint: "URL where the API is running for the toxicity module"
+    ai_toxicity_inference_service_api_key: "API key for the toxicity API"
+    ai_toxicity_inference_service_api_model: "Model to use for inference. Multilingual model works with Italian, French, Russian, Portuguese, Spanish and Turkish."
+    ai_toxicity_flag_automatically: "Automatically flag posts / chat messages that are above the configured thresholds."
+    ai_toxicity_flag_threshold_toxicity: "Toxicity: a rude, disrespectful, or unreasonable comment that is somewhat likely to make you leave a discussion or give up on sharing your perspective"
+    ai_toxicity_flag_threshold_severe_toxicity: "Severe Toxicity: a very hateful, aggressive, or disrespectful comment that is very likely to make you leave a discussion or give up on sharing your perspective"
+    ai_toxicity_flag_threshold_obscene: "Obscene"
+    ai_toxicity_flag_threshold_identity_attack: "Identity Attack"
+    ai_toxicity_flag_threshold_insult: "Insult"
+    ai_toxicity_flag_threshold_threat: "Threat"
+    ai_toxicity_flag_threshold_sexual_explicit: "Sexual Explicit"
+    ai_toxicity_groups_bypass: "Users on those groups will not have their posts classified by the toxicity module."
+
+    ai_sentiment_enabled: "Enable the sentiment module."
+    ai_sentiment_inference_service_api_endpoint: "URL where the API is running for the sentiment module"
+    ai_sentiment_inference_service_api_key: "API key for the sentiment API"
+    ai_sentiment_models: "Models to use for inference. Sentiment classifies post on the positive/neutral/negative space. Emotion classifies on the anger/disgust/fear/joy/neutral/sadness/surprise space."
--- a/config/settings.yml
+++ b/config/settings.yml
@ -1,4 +1,66 @@
 plugins:
-  plugin_name_enabled:
+  ai_enabled:
    default: true
    client: true
+
+  ai_toxicity_enabled:
+    default: false
+    client: true
+  ai_toxicity_inference_service_api_endpoint:
+    default: "https://disorder-testing.demo-by-discourse.com"
+  ai_toxicity_inference_service_api_key:
+    default: ''
+  ai_toxicity_inference_service_api_model:
+    type: enum
+    default: unbiased
+    choices:
+      - unbiased
+      - multilingual
+      - original
+  ai_toxicity_flag_automatically:
+    default: true
+    client: false
+  ai_toxicity_flag_threshold_toxicity:
+    default: 70
+    client: false
+  ai_toxicity_flag_threshold_severe_toxicity:
+    default: 60
+    client: false
+  ai_toxicity_flag_threshold_obscene:
+    default: 60
+    client: false
+  ai_toxicity_flag_threshold_identity_attack:
+    default: 60
+    client: false
+  ai_toxicity_flag_threshold_insult:
+    default: 60
+    client: false
+  ai_toxicity_flag_threshold_threat:
+    default: 60
+    client: false
+  ai_toxicity_flag_threshold_sexual_explicit:
+    default: 60
+    client: false
+  ai_toxicity_groups_bypass:
+    client: true
+    type: group_list
+    list_type: compact
+    default: "3" # 3: @staff
+    allow_any: false
+    refresh: true
+
+  ai_sentiment_enabled:
+    default: false
+    client: true
+  ai_sentiment_inference_service_api_endpoint:
+    default: ''
+  ai_sentiment_inference_service_api_key:
+    default: ''
+  ai_sentiment_models:
+    type: list
+    list_type: simple
+    default: "emotion"
+    allow_any: false
+    choices:
+     - sentiment
+     - emotion
--- a/lib/modules/sentiment/event_handler.rb
+++ b/lib/modules/sentiment/event_handler.rb
@ -0,0 +1,14 @@
+# frozen_string_literal: true
+
+module ::DiscourseAI
+  module Sentiment
+    class EventHandler
+      class << self
+        def handle_post_async(post)
+          return unless SiteSetting.ai_sentiment_enabled
+          Jobs.enqueue(:sentiment_classify_post, post_id: post.id)
+        end
+      end
+    end
+  end
+end
--- a/lib/modules/sentiment/post_classifier.rb
+++ b/lib/modules/sentiment/post_classifier.rb
@ -0,0 +1,44 @@
+# frozen_string_literal: true
+
+module ::DiscourseAI
+  module Sentiment
+    class PostClassifier
+      SENTIMENT_LABELS = %w[anger disgust fear joy neutral sadness surprise]
+
+      SENTIMENT_LABELS = %w[negative neutral positive]
+
+      def initialize(object)
+        @object = object
+      end
+
+      def content
+        @object.post_number == 1 ? "#{@object.topic.title}\n#{@object.raw}" : @object.raw
+      end
+
+      def classify!
+        SiteSetting
+          .ai_sentiment_models
+          .split("|")
+          .each do |model|
+            classification =
+              ::DiscourseAI::InferenceManager.perform!(
+                "#{SiteSetting.ai_sentiment_inference_service_api_endpoint}/api/v1/classify",
+                model,
+                content,
+                SiteSetting.ai_sentiment_inference_service_api_key
+              )
+
+            store_classification(model, classification)
+          end
+      end
+
+      def store_classification(model, classification)
+        PostCustomField.create!(
+          post_id: @object.id,
+          name: "ai-sentiment-#{model}",
+          value: { classification: classification }.to_json,
+        )
+      end
+    end
+  end
+end
--- a/lib/modules/toxicity/chat_message_classifier.rb
+++ b/lib/modules/toxicity/chat_message_classifier.rb
@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+module ::DiscourseAI
+  module Toxicity
+    class ChatMessageClassifier < Classifier
+      def content
+        @object.message
+      end
+
+      def store_classification
+        PluginStore.set(
+          "toxicity",
+          "chat_message_#{@object.id}",
+          {
+            classification: @classification,
+            model: SiteSetting.ai_toxicity_inference_service_api_model,
+            date: Time.now.utc,
+          },
+        )
+      end
+
+      def flag!
+        Chat::ChatReviewQueue.new.flag_message(
+          @object,
+          Guardian.new(flagger),
+          ReviewableScore.types[:inappropriate],
+        )
+      end
+    end
+  end
+end
--- a/lib/modules/toxicity/classifier.rb
+++ b/lib/modules/toxicity/classifier.rb
@ -0,0 +1,60 @@
+# frozen_string_literal: true
+
+module ::DiscourseAI
+  module Toxicity
+    class Classifier
+      CLASSIFICATION_LABELS = %w[
+        toxicity
+        severe_toxicity
+        obscene
+        identity_attack
+        insult
+        threat
+        sexual_explicit
+      ]
+
+      def initialize(object)
+        @object = object
+      end
+
+      def content
+      end
+
+      def classify!
+        @classification =
+          ::DiscourseAI::InferenceManager.perform!(
+            "#{SiteSetting.ai_toxicity_inference_service_api_endpoint}/api/v1/classify",
+            SiteSetting.ai_toxicity_inference_service_api_model,
+            content,
+            SiteSetting.ai_toxicity_inference_service_api_key
+          )
+        store_classification
+        consider_flagging
+      end
+
+      def store_classification
+      end
+
+      def automatic_flag_enabled?
+        SiteSetting.ai_toxicity_flag_automatically
+      end
+
+      def consider_flagging
+        return unless automatic_flag_enabled?
+        @reasons =
+          CLASSIFICATION_LABELS.filter do |label|
+            @classification[label] >= SiteSetting.send("ai_toxicity_flag_threshold_#{label}")
+          end
+
+        flag! unless @reasons.empty?
+      end
+
+      def flagger
+        User.find_by(id: -1)
+      end
+
+      def flag!
+      end
+    end
+  end
+end
--- a/lib/modules/toxicity/event_handler.rb
+++ b/lib/modules/toxicity/event_handler.rb
@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+module ::DiscourseAI
+  module Toxicity
+    class EventHandler
+      class << self
+        def handle_post_async(post)
+          return if bypass?(post)
+          Jobs.enqueue(:toxicity_classify_post, post_id: post.id)
+        end
+
+        def handle_chat_async(chat_message)
+          return if bypass?(chat_message)
+          Jobs.enqueue(:toxicity_classify_chat_message, chat_message_id: chat_message.id)
+        end
+
+        def bypass?(content)
+          !SiteSetting.ai_toxicity_enabled || group_bypass?(content.user)
+        end
+
+        def group_bypass?(user)
+          user.groups.pluck(:id).intersection(SiteSetting.disorder_groups_bypass_map).present?
+        end
+      end
+    end
+  end
+end
--- a/lib/modules/toxicity/post_classifier.rb
+++ b/lib/modules/toxicity/post_classifier.rb
@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+module ::DiscourseAI
+  module Toxicity
+    class PostClassifier < Classifier
+      def content
+        object.post_number == 1 ? "#{object.topic.title}\n#{object.raw}" : object.raw
+      end
+
+      def store_classification
+        PostCustomField.create!(
+          post_id: @object.id,
+          name: "toxicity",
+          value: {
+            classification: @classification,
+            model: SiteSetting.ai_toxicity_inference_service_api_model,
+          }.to_json,
+        )
+      end
+
+      def flag!
+        PostActionCreator.create(flagger, @object, :inappropriate, reason: @reasons.join("/"))
+        @object.publish_change_to_clients! :acted
+      end
+    end
+  end
+end
--- a/lib/shared/inference_manager.rb
+++ b/lib/shared/inference_manager.rb
@ -0,0 +1,28 @@
+# frozen_string_literal: true
+
+module ::DiscourseAI
+  class InferenceManager
+    def self.perform!(endpoint, model, content, api_key)
+
+      headers = {
+        "Referer" => Discourse.base_url,
+        "Content-Type" => "application/json",
+      }
+
+      if api_key.present?
+        headers["X-API-KEY"] = api_key
+      end
+
+      response =
+        Faraday.post(
+          endpoint,
+          { model: model, content: content }.to_json,
+          headers,
+        )
+
+      raise Net::HTTPBadResponse unless response.status == 200
+
+      JSON.parse(response.body)
+    end
+  end
+end
--- a/lib/tasks/modules/toxicity/calibration.rake
+++ b/lib/tasks/modules/toxicity/calibration.rake
@ -0,0 +1,77 @@
+# frozen_string_literal: true
+
+def classify(content)
+  ::DiscourseAI::InferenceManager.perform!(
+    "#{SiteSetting.ai_toxicity_inference_service_api_endpoint}/api/v1/classify",
+    SiteSetting.ai_toxicity_inference_service_api_model,
+    content,
+    SiteSetting.ai_toxicity_inference_service_api_key,
+  )
+end
+
+desc "Uses existing flagged posts to suggest a configuration threshold"
+task "ai:toxicity:calibration_stats", [:set_size] => [:environment] do |_, args|
+  flag_agreed =
+    PostAction
+      .where(post_action_type_id: 4, disagreed_at: nil, deferred_at: nil)
+      .where("post_actions.user_id > 0")
+      .includes(:post, :user)
+      .where(user: { admin: false, moderator: false })
+      .where("posts.raw IS NOT NULL")
+      .order(created_at: :desc)
+      .limit(args[:set_size])
+      .pluck(:raw)
+
+  flag_not_agreed =
+    PostAction
+      .where(post_action_type_id: 4)
+      .where("(disagreed_at IS NOT NULL OR deferred_at IS NOT NULL)")
+      .where("post_actions.user_id > 0")
+      .includes(:post, :user)
+      .where(user: { admin: false, moderator: false })
+      .where("posts.raw IS NOT NULL")
+      .order(created_at: :desc)
+      .limit(args[:set_size])
+      .pluck(:raw)
+
+  flag_agreed_scores = flag_agreed.map { classify(_1) }
+  flag_not_agreed_scores = flag_not_agreed.map { classify(_1) }
+
+  DiscourseAI::Toxicity::Classifier::CLASSIFICATION_LABELS.each do |label|
+    puts "Label: #{label}"
+
+    label_agreed_scores = flag_agreed_scores.map { _1[label] }
+    label_not_agreed_scores = flag_not_agreed_scores.map { _1[label] }
+
+    puts "Flagged posts score:"
+    puts "Max: #{label_agreed_scores.max}"
+    puts "Min: #{label_agreed_scores.min}"
+    puts "Avg: #{label_agreed_scores.sum(0.0) / label_agreed_scores.size}"
+    puts "Median: #{label_agreed_scores.sort[label_agreed_scores.size / 2]}"
+    puts "Stddev: #{Math.sqrt(label_agreed_scores.map { (_1 - label_agreed_scores.sum(0.0) / label_agreed_scores.size)**2 }.sum(0.0) / label_agreed_scores.size)}"
+
+    puts "Flagged posts score:"
+    puts "Max: #{label_not_agreed_scores.max}"
+    puts "Min: #{label_not_agreed_scores.min}"
+    puts "Avg: #{label_not_agreed_scores.sum(0.0) / label_not_agreed_scores.size}"
+    puts "Median: #{label_not_agreed_scores.sort[label_not_agreed_scores.size / 2]}"
+    puts "Stddev: #{Math.sqrt(label_not_agreed_scores.map { (_1 - label_not_agreed_scores.sum(0.0) / label_not_agreed_scores.size)**2 }.sum(0.0) / label_not_agreed_scores.size)}"
+
+    best_cutoff = 0
+    best_cutoff_score = 0
+
+    (0..100)
+      .step(1)
+      .each do |cutoff|
+        score =
+          label_agreed_scores.count { _1 > cutoff } + label_not_agreed_scores.count { _1 <= cutoff }
+
+        if score > best_cutoff_score
+          best_cutoff_score = score
+          best_cutoff = cutoff
+        end
+      end
+
+    puts "Recommended ai_toxicity_flag_threshold_#{label} value: #{best_cutoff}"
+  end
+end
--- a/plugin.rb
+++ b/plugin.rb
@ -1,12 +1,43 @@
 # frozen_string_literal: true

-# name: discourse-plugin-name
+# name: discourse-ai
 # about: TODO
 # version: 0.0.1
 # authors: Discourse
 # url: TODO
 # required_version: 2.7.0

-enabled_site_setting :plugin_name_enabled
+enabled_site_setting :ai_enabled

-after_initialize {}
+after_initialize do
+  module ::Disorder
+    PLUGIN_NAME = "discourse-ai"
+  end
+
+  require_relative "lib/shared/inference_manager.rb"
+  require_relative "lib/modules/toxicity/event_handler.rb"
+  require_relative "lib/modules/toxicity/classifier.rb"
+  require_relative "lib/modules/toxicity/post_classifier.rb"
+  require_relative "lib/modules/toxicity/chat_message_classifier.rb"
+  require_relative "app/jobs/regular/modules/toxicity/toxicity_classify_post.rb"
+  require_relative "app/jobs/regular/modules/toxicity/toxicity_classify_chat_message.rb"
+
+  require_relative "lib/modules/sentiment/event_handler.rb"
+  require_relative "lib/modules/sentiment/post_classifier.rb"
+  require_relative "app/jobs/regular/modules/sentiment/sentiment_classify_post.rb"
+
+  on(:post_created) do |post|
+    DiscourseAI::Toxicity::EventHandler.handle_post_async(post)
+    DiscourseAI::Sentiment::EventHandler.handle_post_async(post)
+  end
+  on(:post_edited) do |post|
+    DiscourseAI::Toxicity::EventHandler.handle_post_async(post)
+    DiscourseAI::Sentiment::EventHandler.handle_post_async(post)
+  end
+  on(:chat_message_created) do |chat_message|
+    DiscourseAI::Toxicity::EventHandler.handle_chat_async(chat_message)
+  end
+  on(:chat_message_edited) do |chat_message|
+    DiscourseAI::Toxicity::EventHandler.handle_chat_async(chat_message)
+  end
+end