add toxicity and sentiment modules

This commit is contained in:
Rafael dos Santos Silva 2023-02-22 20:46:53 -03:00
parent 2b319d9077
commit 6cf411ec90
No known key found for this signature in database
GPG Key ID: 5E50360227B34938
15 changed files with 477 additions and 6 deletions

View File

@ -0,0 +1,17 @@
# frozen_string_literal: true
module ::Jobs
class SentimentClassifyPost < ::Jobs::Base
def execute(args)
return unless SiteSetting.ai_sentiment_enabled
post_id = args[:post_id]
return if post_id.blank?
post = Post.find_by(id: post_id, post_type: Post.types[:regular])
return if post&.raw.blank?
::DiscourseAI::Sentiment::PostClassifier.new(post).classify!
end
end
end

View File

@ -0,0 +1,17 @@
# frozen_string_literal: true
module ::Jobs
class ClassifyChatMessage < ::Jobs::Base
def execute(args)
return unless SiteSetting.ai_toxicity_enabled
chat_message_id = args[:chat_message_id]
return if chat_message_id.blank?
chat_message = ChatMessage.find_by(id: chat_message_id)
return if chat_message&.message.blank?
::DiscourseAI::Toxicity::ChatMessageClassifier.new(chat_message).classify!
end
end
end

View File

@ -0,0 +1,17 @@
# frozen_string_literal: true
module ::Jobs
class ToxicityClassifyPost < ::Jobs::Base
def execute(args)
return unless SiteSetting.ai_toxicity_enabled
post_id = args[:post_id]
return if post_id.blank?
post = Post.find_by(id: post_id, post_type: Post.types[:regular])
return if post&.raw.blank?
::DiscourseAI::Toxicity::PostClassifier.new(post).classify!
end
end
end

View File

@ -1,4 +1,3 @@
en:
js:
discourse_plugin_name:
placeholder: placeholder
discourse-ai:

View File

@ -1 +1,21 @@
en:
site_settings:
ai_enabled: "Enable the discourse ai plugin."
ai_toxicity_enabled: "Enable the toxicity module."
ai_toxicity_inference_service_api_endpoint: "URL where the API is running for the toxicity module"
ai_toxicity_inference_service_api_key: "API key for the toxicity API"
ai_toxicity_inference_service_api_model: "Model to use for inference. Multilingual model works with Italian, French, Russian, Portuguese, Spanish and Turkish."
ai_toxicity_flag_automatically: "Automatically flag posts / chat messages that are above the configured thresholds."
ai_toxicity_flag_threshold_toxicity: "Toxicity: a rude, disrespectful, or unreasonable comment that is somewhat likely to make you leave a discussion or give up on sharing your perspective"
ai_toxicity_flag_threshold_severe_toxicity: "Severe Toxicity: a very hateful, aggressive, or disrespectful comment that is very likely to make you leave a discussion or give up on sharing your perspective"
ai_toxicity_flag_threshold_obscene: "Obscene"
ai_toxicity_flag_threshold_identity_attack: "Identity Attack"
ai_toxicity_flag_threshold_insult: "Insult"
ai_toxicity_flag_threshold_threat: "Threat"
ai_toxicity_flag_threshold_sexual_explicit: "Sexual Explicit"
ai_toxicity_groups_bypass: "Users on those groups will not have their posts classified by the toxicity module."
ai_sentiment_enabled: "Enable the sentiment module."
ai_sentiment_inference_service_api_endpoint: "URL where the API is running for the sentiment module"
ai_sentiment_inference_service_api_key: "API key for the sentiment API"
ai_sentiment_models: "Models to use for inference. Sentiment classifies post on the positive/neutral/negative space. Emotion classifies on the anger/disgust/fear/joy/neutral/sadness/surprise space."

View File

@ -1,4 +1,66 @@
plugins:
plugin_name_enabled:
ai_enabled:
default: true
client: true
ai_toxicity_enabled:
default: false
client: true
ai_toxicity_inference_service_api_endpoint:
default: "https://disorder-testing.demo-by-discourse.com"
ai_toxicity_inference_service_api_key:
default: ''
ai_toxicity_inference_service_api_model:
type: enum
default: unbiased
choices:
- unbiased
- multilingual
- original
ai_toxicity_flag_automatically:
default: true
client: false
ai_toxicity_flag_threshold_toxicity:
default: 70
client: false
ai_toxicity_flag_threshold_severe_toxicity:
default: 60
client: false
ai_toxicity_flag_threshold_obscene:
default: 60
client: false
ai_toxicity_flag_threshold_identity_attack:
default: 60
client: false
ai_toxicity_flag_threshold_insult:
default: 60
client: false
ai_toxicity_flag_threshold_threat:
default: 60
client: false
ai_toxicity_flag_threshold_sexual_explicit:
default: 60
client: false
ai_toxicity_groups_bypass:
client: true
type: group_list
list_type: compact
default: "3" # 3: @staff
allow_any: false
refresh: true
ai_sentiment_enabled:
default: false
client: true
ai_sentiment_inference_service_api_endpoint:
default: ''
ai_sentiment_inference_service_api_key:
default: ''
ai_sentiment_models:
type: list
list_type: simple
default: "emotion"
allow_any: false
choices:
- sentiment
- emotion

View File

@ -0,0 +1,14 @@
# frozen_string_literal: true
module ::DiscourseAI
module Sentiment
class EventHandler
class << self
def handle_post_async(post)
return unless SiteSetting.ai_sentiment_enabled
Jobs.enqueue(:sentiment_classify_post, post_id: post.id)
end
end
end
end
end

View File

@ -0,0 +1,44 @@
# frozen_string_literal: true
module ::DiscourseAI
module Sentiment
class PostClassifier
SENTIMENT_LABELS = %w[anger disgust fear joy neutral sadness surprise]
SENTIMENT_LABELS = %w[negative neutral positive]
def initialize(object)
@object = object
end
def content
@object.post_number == 1 ? "#{@object.topic.title}\n#{@object.raw}" : @object.raw
end
def classify!
SiteSetting
.ai_sentiment_models
.split("|")
.each do |model|
classification =
::DiscourseAI::InferenceManager.perform!(
"#{SiteSetting.ai_sentiment_inference_service_api_endpoint}/api/v1/classify",
model,
content,
SiteSetting.ai_sentiment_inference_service_api_key
)
store_classification(model, classification)
end
end
def store_classification(model, classification)
PostCustomField.create!(
post_id: @object.id,
name: "ai-sentiment-#{model}",
value: { classification: classification }.to_json,
)
end
end
end
end

View File

@ -0,0 +1,31 @@
# frozen_string_literal: true
module ::DiscourseAI
module Toxicity
class ChatMessageClassifier < Classifier
def content
@object.message
end
def store_classification
PluginStore.set(
"toxicity",
"chat_message_#{@object.id}",
{
classification: @classification,
model: SiteSetting.ai_toxicity_inference_service_api_model,
date: Time.now.utc,
},
)
end
def flag!
Chat::ChatReviewQueue.new.flag_message(
@object,
Guardian.new(flagger),
ReviewableScore.types[:inappropriate],
)
end
end
end
end

View File

@ -0,0 +1,60 @@
# frozen_string_literal: true
module ::DiscourseAI
module Toxicity
class Classifier
CLASSIFICATION_LABELS = %w[
toxicity
severe_toxicity
obscene
identity_attack
insult
threat
sexual_explicit
]
def initialize(object)
@object = object
end
def content
end
def classify!
@classification =
::DiscourseAI::InferenceManager.perform!(
"#{SiteSetting.ai_toxicity_inference_service_api_endpoint}/api/v1/classify",
SiteSetting.ai_toxicity_inference_service_api_model,
content,
SiteSetting.ai_toxicity_inference_service_api_key
)
store_classification
consider_flagging
end
def store_classification
end
def automatic_flag_enabled?
SiteSetting.ai_toxicity_flag_automatically
end
def consider_flagging
return unless automatic_flag_enabled?
@reasons =
CLASSIFICATION_LABELS.filter do |label|
@classification[label] >= SiteSetting.send("ai_toxicity_flag_threshold_#{label}")
end
flag! unless @reasons.empty?
end
def flagger
User.find_by(id: -1)
end
def flag!
end
end
end
end

View File

@ -0,0 +1,27 @@
# frozen_string_literal: true
module ::DiscourseAI
module Toxicity
class EventHandler
class << self
def handle_post_async(post)
return if bypass?(post)
Jobs.enqueue(:toxicity_classify_post, post_id: post.id)
end
def handle_chat_async(chat_message)
return if bypass?(chat_message)
Jobs.enqueue(:toxicity_classify_chat_message, chat_message_id: chat_message.id)
end
def bypass?(content)
!SiteSetting.ai_toxicity_enabled || group_bypass?(content.user)
end
def group_bypass?(user)
user.groups.pluck(:id).intersection(SiteSetting.disorder_groups_bypass_map).present?
end
end
end
end
end

View File

@ -0,0 +1,27 @@
# frozen_string_literal: true
module ::DiscourseAI
module Toxicity
class PostClassifier < Classifier
def content
object.post_number == 1 ? "#{object.topic.title}\n#{object.raw}" : object.raw
end
def store_classification
PostCustomField.create!(
post_id: @object.id,
name: "toxicity",
value: {
classification: @classification,
model: SiteSetting.ai_toxicity_inference_service_api_model,
}.to_json,
)
end
def flag!
PostActionCreator.create(flagger, @object, :inappropriate, reason: @reasons.join("/"))
@object.publish_change_to_clients! :acted
end
end
end
end

View File

@ -0,0 +1,28 @@
# frozen_string_literal: true
module ::DiscourseAI
class InferenceManager
def self.perform!(endpoint, model, content, api_key)
headers = {
"Referer" => Discourse.base_url,
"Content-Type" => "application/json",
}
if api_key.present?
headers["X-API-KEY"] = api_key
end
response =
Faraday.post(
endpoint,
{ model: model, content: content }.to_json,
headers,
)
raise Net::HTTPBadResponse unless response.status == 200
JSON.parse(response.body)
end
end
end

View File

@ -0,0 +1,77 @@
# frozen_string_literal: true
def classify(content)
::DiscourseAI::InferenceManager.perform!(
"#{SiteSetting.ai_toxicity_inference_service_api_endpoint}/api/v1/classify",
SiteSetting.ai_toxicity_inference_service_api_model,
content,
SiteSetting.ai_toxicity_inference_service_api_key,
)
end
desc "Uses existing flagged posts to suggest a configuration threshold"
task "ai:toxicity:calibration_stats", [:set_size] => [:environment] do |_, args|
flag_agreed =
PostAction
.where(post_action_type_id: 4, disagreed_at: nil, deferred_at: nil)
.where("post_actions.user_id > 0")
.includes(:post, :user)
.where(user: { admin: false, moderator: false })
.where("posts.raw IS NOT NULL")
.order(created_at: :desc)
.limit(args[:set_size])
.pluck(:raw)
flag_not_agreed =
PostAction
.where(post_action_type_id: 4)
.where("(disagreed_at IS NOT NULL OR deferred_at IS NOT NULL)")
.where("post_actions.user_id > 0")
.includes(:post, :user)
.where(user: { admin: false, moderator: false })
.where("posts.raw IS NOT NULL")
.order(created_at: :desc)
.limit(args[:set_size])
.pluck(:raw)
flag_agreed_scores = flag_agreed.map { classify(_1) }
flag_not_agreed_scores = flag_not_agreed.map { classify(_1) }
DiscourseAI::Toxicity::Classifier::CLASSIFICATION_LABELS.each do |label|
puts "Label: #{label}"
label_agreed_scores = flag_agreed_scores.map { _1[label] }
label_not_agreed_scores = flag_not_agreed_scores.map { _1[label] }
puts "Flagged posts score:"
puts "Max: #{label_agreed_scores.max}"
puts "Min: #{label_agreed_scores.min}"
puts "Avg: #{label_agreed_scores.sum(0.0) / label_agreed_scores.size}"
puts "Median: #{label_agreed_scores.sort[label_agreed_scores.size / 2]}"
puts "Stddev: #{Math.sqrt(label_agreed_scores.map { (_1 - label_agreed_scores.sum(0.0) / label_agreed_scores.size)**2 }.sum(0.0) / label_agreed_scores.size)}"
puts "Flagged posts score:"
puts "Max: #{label_not_agreed_scores.max}"
puts "Min: #{label_not_agreed_scores.min}"
puts "Avg: #{label_not_agreed_scores.sum(0.0) / label_not_agreed_scores.size}"
puts "Median: #{label_not_agreed_scores.sort[label_not_agreed_scores.size / 2]}"
puts "Stddev: #{Math.sqrt(label_not_agreed_scores.map { (_1 - label_not_agreed_scores.sum(0.0) / label_not_agreed_scores.size)**2 }.sum(0.0) / label_not_agreed_scores.size)}"
best_cutoff = 0
best_cutoff_score = 0
(0..100)
.step(1)
.each do |cutoff|
score =
label_agreed_scores.count { _1 > cutoff } + label_not_agreed_scores.count { _1 <= cutoff }
if score > best_cutoff_score
best_cutoff_score = score
best_cutoff = cutoff
end
end
puts "Recommended ai_toxicity_flag_threshold_#{label} value: #{best_cutoff}"
end
end

View File

@ -1,12 +1,43 @@
# frozen_string_literal: true
# name: discourse-plugin-name
# name: discourse-ai
# about: TODO
# version: 0.0.1
# authors: Discourse
# url: TODO
# required_version: 2.7.0
enabled_site_setting :plugin_name_enabled
enabled_site_setting :ai_enabled
after_initialize {}
after_initialize do
module ::Disorder
PLUGIN_NAME = "discourse-ai"
end
require_relative "lib/shared/inference_manager.rb"
require_relative "lib/modules/toxicity/event_handler.rb"
require_relative "lib/modules/toxicity/classifier.rb"
require_relative "lib/modules/toxicity/post_classifier.rb"
require_relative "lib/modules/toxicity/chat_message_classifier.rb"
require_relative "app/jobs/regular/modules/toxicity/toxicity_classify_post.rb"
require_relative "app/jobs/regular/modules/toxicity/toxicity_classify_chat_message.rb"
require_relative "lib/modules/sentiment/event_handler.rb"
require_relative "lib/modules/sentiment/post_classifier.rb"
require_relative "app/jobs/regular/modules/sentiment/sentiment_classify_post.rb"
on(:post_created) do |post|
DiscourseAI::Toxicity::EventHandler.handle_post_async(post)
DiscourseAI::Sentiment::EventHandler.handle_post_async(post)
end
on(:post_edited) do |post|
DiscourseAI::Toxicity::EventHandler.handle_post_async(post)
DiscourseAI::Sentiment::EventHandler.handle_post_async(post)
end
on(:chat_message_created) do |chat_message|
DiscourseAI::Toxicity::EventHandler.handle_chat_async(chat_message)
end
on(:chat_message_edited) do |chat_message|
DiscourseAI::Toxicity::EventHandler.handle_chat_async(chat_message)
end
end