add toxicity and sentiment modules
This commit is contained in:
parent
2b319d9077
commit
6cf411ec90
|
@ -0,0 +1,17 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module ::Jobs
|
||||
class SentimentClassifyPost < ::Jobs::Base
|
||||
def execute(args)
|
||||
return unless SiteSetting.ai_sentiment_enabled
|
||||
|
||||
post_id = args[:post_id]
|
||||
return if post_id.blank?
|
||||
|
||||
post = Post.find_by(id: post_id, post_type: Post.types[:regular])
|
||||
return if post&.raw.blank?
|
||||
|
||||
::DiscourseAI::Sentiment::PostClassifier.new(post).classify!
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,17 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module ::Jobs
|
||||
class ClassifyChatMessage < ::Jobs::Base
|
||||
def execute(args)
|
||||
return unless SiteSetting.ai_toxicity_enabled
|
||||
|
||||
chat_message_id = args[:chat_message_id]
|
||||
return if chat_message_id.blank?
|
||||
|
||||
chat_message = ChatMessage.find_by(id: chat_message_id)
|
||||
return if chat_message&.message.blank?
|
||||
|
||||
::DiscourseAI::Toxicity::ChatMessageClassifier.new(chat_message).classify!
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,17 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module ::Jobs
|
||||
class ToxicityClassifyPost < ::Jobs::Base
|
||||
def execute(args)
|
||||
return unless SiteSetting.ai_toxicity_enabled
|
||||
|
||||
post_id = args[:post_id]
|
||||
return if post_id.blank?
|
||||
|
||||
post = Post.find_by(id: post_id, post_type: Post.types[:regular])
|
||||
return if post&.raw.blank?
|
||||
|
||||
::DiscourseAI::Toxicity::PostClassifier.new(post).classify!
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,4 +1,3 @@
|
|||
en:
|
||||
js:
|
||||
discourse_plugin_name:
|
||||
placeholder: placeholder
|
||||
discourse-ai:
|
||||
|
|
|
@ -1 +1,21 @@
|
|||
en:
|
||||
site_settings:
|
||||
ai_enabled: "Enable the discourse ai plugin."
|
||||
ai_toxicity_enabled: "Enable the toxicity module."
|
||||
ai_toxicity_inference_service_api_endpoint: "URL where the API is running for the toxicity module"
|
||||
ai_toxicity_inference_service_api_key: "API key for the toxicity API"
|
||||
ai_toxicity_inference_service_api_model: "Model to use for inference. Multilingual model works with Italian, French, Russian, Portuguese, Spanish and Turkish."
|
||||
ai_toxicity_flag_automatically: "Automatically flag posts / chat messages that are above the configured thresholds."
|
||||
ai_toxicity_flag_threshold_toxicity: "Toxicity: a rude, disrespectful, or unreasonable comment that is somewhat likely to make you leave a discussion or give up on sharing your perspective"
|
||||
ai_toxicity_flag_threshold_severe_toxicity: "Severe Toxicity: a very hateful, aggressive, or disrespectful comment that is very likely to make you leave a discussion or give up on sharing your perspective"
|
||||
ai_toxicity_flag_threshold_obscene: "Obscene"
|
||||
ai_toxicity_flag_threshold_identity_attack: "Identity Attack"
|
||||
ai_toxicity_flag_threshold_insult: "Insult"
|
||||
ai_toxicity_flag_threshold_threat: "Threat"
|
||||
ai_toxicity_flag_threshold_sexual_explicit: "Sexual Explicit"
|
||||
ai_toxicity_groups_bypass: "Users on those groups will not have their posts classified by the toxicity module."
|
||||
|
||||
ai_sentiment_enabled: "Enable the sentiment module."
|
||||
ai_sentiment_inference_service_api_endpoint: "URL where the API is running for the sentiment module"
|
||||
ai_sentiment_inference_service_api_key: "API key for the sentiment API"
|
||||
ai_sentiment_models: "Models to use for inference. Sentiment classifies post on the positive/neutral/negative space. Emotion classifies on the anger/disgust/fear/joy/neutral/sadness/surprise space."
|
||||
|
|
|
@ -1,4 +1,66 @@
|
|||
plugins:
|
||||
plugin_name_enabled:
|
||||
ai_enabled:
|
||||
default: true
|
||||
client: true
|
||||
|
||||
ai_toxicity_enabled:
|
||||
default: false
|
||||
client: true
|
||||
ai_toxicity_inference_service_api_endpoint:
|
||||
default: "https://disorder-testing.demo-by-discourse.com"
|
||||
ai_toxicity_inference_service_api_key:
|
||||
default: ''
|
||||
ai_toxicity_inference_service_api_model:
|
||||
type: enum
|
||||
default: unbiased
|
||||
choices:
|
||||
- unbiased
|
||||
- multilingual
|
||||
- original
|
||||
ai_toxicity_flag_automatically:
|
||||
default: true
|
||||
client: false
|
||||
ai_toxicity_flag_threshold_toxicity:
|
||||
default: 70
|
||||
client: false
|
||||
ai_toxicity_flag_threshold_severe_toxicity:
|
||||
default: 60
|
||||
client: false
|
||||
ai_toxicity_flag_threshold_obscene:
|
||||
default: 60
|
||||
client: false
|
||||
ai_toxicity_flag_threshold_identity_attack:
|
||||
default: 60
|
||||
client: false
|
||||
ai_toxicity_flag_threshold_insult:
|
||||
default: 60
|
||||
client: false
|
||||
ai_toxicity_flag_threshold_threat:
|
||||
default: 60
|
||||
client: false
|
||||
ai_toxicity_flag_threshold_sexual_explicit:
|
||||
default: 60
|
||||
client: false
|
||||
ai_toxicity_groups_bypass:
|
||||
client: true
|
||||
type: group_list
|
||||
list_type: compact
|
||||
default: "3" # 3: @staff
|
||||
allow_any: false
|
||||
refresh: true
|
||||
|
||||
ai_sentiment_enabled:
|
||||
default: false
|
||||
client: true
|
||||
ai_sentiment_inference_service_api_endpoint:
|
||||
default: ''
|
||||
ai_sentiment_inference_service_api_key:
|
||||
default: ''
|
||||
ai_sentiment_models:
|
||||
type: list
|
||||
list_type: simple
|
||||
default: "emotion"
|
||||
allow_any: false
|
||||
choices:
|
||||
- sentiment
|
||||
- emotion
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module ::DiscourseAI
|
||||
module Sentiment
|
||||
class EventHandler
|
||||
class << self
|
||||
def handle_post_async(post)
|
||||
return unless SiteSetting.ai_sentiment_enabled
|
||||
Jobs.enqueue(:sentiment_classify_post, post_id: post.id)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,44 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module ::DiscourseAI
|
||||
module Sentiment
|
||||
class PostClassifier
|
||||
SENTIMENT_LABELS = %w[anger disgust fear joy neutral sadness surprise]
|
||||
|
||||
SENTIMENT_LABELS = %w[negative neutral positive]
|
||||
|
||||
def initialize(object)
|
||||
@object = object
|
||||
end
|
||||
|
||||
def content
|
||||
@object.post_number == 1 ? "#{@object.topic.title}\n#{@object.raw}" : @object.raw
|
||||
end
|
||||
|
||||
def classify!
|
||||
SiteSetting
|
||||
.ai_sentiment_models
|
||||
.split("|")
|
||||
.each do |model|
|
||||
classification =
|
||||
::DiscourseAI::InferenceManager.perform!(
|
||||
"#{SiteSetting.ai_sentiment_inference_service_api_endpoint}/api/v1/classify",
|
||||
model,
|
||||
content,
|
||||
SiteSetting.ai_sentiment_inference_service_api_key
|
||||
)
|
||||
|
||||
store_classification(model, classification)
|
||||
end
|
||||
end
|
||||
|
||||
def store_classification(model, classification)
|
||||
PostCustomField.create!(
|
||||
post_id: @object.id,
|
||||
name: "ai-sentiment-#{model}",
|
||||
value: { classification: classification }.to_json,
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,31 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module ::DiscourseAI
|
||||
module Toxicity
|
||||
class ChatMessageClassifier < Classifier
|
||||
def content
|
||||
@object.message
|
||||
end
|
||||
|
||||
def store_classification
|
||||
PluginStore.set(
|
||||
"toxicity",
|
||||
"chat_message_#{@object.id}",
|
||||
{
|
||||
classification: @classification,
|
||||
model: SiteSetting.ai_toxicity_inference_service_api_model,
|
||||
date: Time.now.utc,
|
||||
},
|
||||
)
|
||||
end
|
||||
|
||||
def flag!
|
||||
Chat::ChatReviewQueue.new.flag_message(
|
||||
@object,
|
||||
Guardian.new(flagger),
|
||||
ReviewableScore.types[:inappropriate],
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,60 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module ::DiscourseAI
|
||||
module Toxicity
|
||||
class Classifier
|
||||
CLASSIFICATION_LABELS = %w[
|
||||
toxicity
|
||||
severe_toxicity
|
||||
obscene
|
||||
identity_attack
|
||||
insult
|
||||
threat
|
||||
sexual_explicit
|
||||
]
|
||||
|
||||
def initialize(object)
|
||||
@object = object
|
||||
end
|
||||
|
||||
def content
|
||||
end
|
||||
|
||||
def classify!
|
||||
@classification =
|
||||
::DiscourseAI::InferenceManager.perform!(
|
||||
"#{SiteSetting.ai_toxicity_inference_service_api_endpoint}/api/v1/classify",
|
||||
SiteSetting.ai_toxicity_inference_service_api_model,
|
||||
content,
|
||||
SiteSetting.ai_toxicity_inference_service_api_key
|
||||
)
|
||||
store_classification
|
||||
consider_flagging
|
||||
end
|
||||
|
||||
def store_classification
|
||||
end
|
||||
|
||||
def automatic_flag_enabled?
|
||||
SiteSetting.ai_toxicity_flag_automatically
|
||||
end
|
||||
|
||||
def consider_flagging
|
||||
return unless automatic_flag_enabled?
|
||||
@reasons =
|
||||
CLASSIFICATION_LABELS.filter do |label|
|
||||
@classification[label] >= SiteSetting.send("ai_toxicity_flag_threshold_#{label}")
|
||||
end
|
||||
|
||||
flag! unless @reasons.empty?
|
||||
end
|
||||
|
||||
def flagger
|
||||
User.find_by(id: -1)
|
||||
end
|
||||
|
||||
def flag!
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,27 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module ::DiscourseAI
|
||||
module Toxicity
|
||||
class EventHandler
|
||||
class << self
|
||||
def handle_post_async(post)
|
||||
return if bypass?(post)
|
||||
Jobs.enqueue(:toxicity_classify_post, post_id: post.id)
|
||||
end
|
||||
|
||||
def handle_chat_async(chat_message)
|
||||
return if bypass?(chat_message)
|
||||
Jobs.enqueue(:toxicity_classify_chat_message, chat_message_id: chat_message.id)
|
||||
end
|
||||
|
||||
def bypass?(content)
|
||||
!SiteSetting.ai_toxicity_enabled || group_bypass?(content.user)
|
||||
end
|
||||
|
||||
def group_bypass?(user)
|
||||
user.groups.pluck(:id).intersection(SiteSetting.disorder_groups_bypass_map).present?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,27 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module ::DiscourseAI
|
||||
module Toxicity
|
||||
class PostClassifier < Classifier
|
||||
def content
|
||||
object.post_number == 1 ? "#{object.topic.title}\n#{object.raw}" : object.raw
|
||||
end
|
||||
|
||||
def store_classification
|
||||
PostCustomField.create!(
|
||||
post_id: @object.id,
|
||||
name: "toxicity",
|
||||
value: {
|
||||
classification: @classification,
|
||||
model: SiteSetting.ai_toxicity_inference_service_api_model,
|
||||
}.to_json,
|
||||
)
|
||||
end
|
||||
|
||||
def flag!
|
||||
PostActionCreator.create(flagger, @object, :inappropriate, reason: @reasons.join("/"))
|
||||
@object.publish_change_to_clients! :acted
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,28 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module ::DiscourseAI
|
||||
class InferenceManager
|
||||
def self.perform!(endpoint, model, content, api_key)
|
||||
|
||||
headers = {
|
||||
"Referer" => Discourse.base_url,
|
||||
"Content-Type" => "application/json",
|
||||
}
|
||||
|
||||
if api_key.present?
|
||||
headers["X-API-KEY"] = api_key
|
||||
end
|
||||
|
||||
response =
|
||||
Faraday.post(
|
||||
endpoint,
|
||||
{ model: model, content: content }.to_json,
|
||||
headers,
|
||||
)
|
||||
|
||||
raise Net::HTTPBadResponse unless response.status == 200
|
||||
|
||||
JSON.parse(response.body)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,77 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
def classify(content)
|
||||
::DiscourseAI::InferenceManager.perform!(
|
||||
"#{SiteSetting.ai_toxicity_inference_service_api_endpoint}/api/v1/classify",
|
||||
SiteSetting.ai_toxicity_inference_service_api_model,
|
||||
content,
|
||||
SiteSetting.ai_toxicity_inference_service_api_key,
|
||||
)
|
||||
end
|
||||
|
||||
desc "Uses existing flagged posts to suggest a configuration threshold"
|
||||
task "ai:toxicity:calibration_stats", [:set_size] => [:environment] do |_, args|
|
||||
flag_agreed =
|
||||
PostAction
|
||||
.where(post_action_type_id: 4, disagreed_at: nil, deferred_at: nil)
|
||||
.where("post_actions.user_id > 0")
|
||||
.includes(:post, :user)
|
||||
.where(user: { admin: false, moderator: false })
|
||||
.where("posts.raw IS NOT NULL")
|
||||
.order(created_at: :desc)
|
||||
.limit(args[:set_size])
|
||||
.pluck(:raw)
|
||||
|
||||
flag_not_agreed =
|
||||
PostAction
|
||||
.where(post_action_type_id: 4)
|
||||
.where("(disagreed_at IS NOT NULL OR deferred_at IS NOT NULL)")
|
||||
.where("post_actions.user_id > 0")
|
||||
.includes(:post, :user)
|
||||
.where(user: { admin: false, moderator: false })
|
||||
.where("posts.raw IS NOT NULL")
|
||||
.order(created_at: :desc)
|
||||
.limit(args[:set_size])
|
||||
.pluck(:raw)
|
||||
|
||||
flag_agreed_scores = flag_agreed.map { classify(_1) }
|
||||
flag_not_agreed_scores = flag_not_agreed.map { classify(_1) }
|
||||
|
||||
DiscourseAI::Toxicity::Classifier::CLASSIFICATION_LABELS.each do |label|
|
||||
puts "Label: #{label}"
|
||||
|
||||
label_agreed_scores = flag_agreed_scores.map { _1[label] }
|
||||
label_not_agreed_scores = flag_not_agreed_scores.map { _1[label] }
|
||||
|
||||
puts "Flagged posts score:"
|
||||
puts "Max: #{label_agreed_scores.max}"
|
||||
puts "Min: #{label_agreed_scores.min}"
|
||||
puts "Avg: #{label_agreed_scores.sum(0.0) / label_agreed_scores.size}"
|
||||
puts "Median: #{label_agreed_scores.sort[label_agreed_scores.size / 2]}"
|
||||
puts "Stddev: #{Math.sqrt(label_agreed_scores.map { (_1 - label_agreed_scores.sum(0.0) / label_agreed_scores.size)**2 }.sum(0.0) / label_agreed_scores.size)}"
|
||||
|
||||
puts "Flagged posts score:"
|
||||
puts "Max: #{label_not_agreed_scores.max}"
|
||||
puts "Min: #{label_not_agreed_scores.min}"
|
||||
puts "Avg: #{label_not_agreed_scores.sum(0.0) / label_not_agreed_scores.size}"
|
||||
puts "Median: #{label_not_agreed_scores.sort[label_not_agreed_scores.size / 2]}"
|
||||
puts "Stddev: #{Math.sqrt(label_not_agreed_scores.map { (_1 - label_not_agreed_scores.sum(0.0) / label_not_agreed_scores.size)**2 }.sum(0.0) / label_not_agreed_scores.size)}"
|
||||
|
||||
best_cutoff = 0
|
||||
best_cutoff_score = 0
|
||||
|
||||
(0..100)
|
||||
.step(1)
|
||||
.each do |cutoff|
|
||||
score =
|
||||
label_agreed_scores.count { _1 > cutoff } + label_not_agreed_scores.count { _1 <= cutoff }
|
||||
|
||||
if score > best_cutoff_score
|
||||
best_cutoff_score = score
|
||||
best_cutoff = cutoff
|
||||
end
|
||||
end
|
||||
|
||||
puts "Recommended ai_toxicity_flag_threshold_#{label} value: #{best_cutoff}"
|
||||
end
|
||||
end
|
37
plugin.rb
37
plugin.rb
|
@ -1,12 +1,43 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
# name: discourse-plugin-name
|
||||
# name: discourse-ai
|
||||
# about: TODO
|
||||
# version: 0.0.1
|
||||
# authors: Discourse
|
||||
# url: TODO
|
||||
# required_version: 2.7.0
|
||||
|
||||
enabled_site_setting :plugin_name_enabled
|
||||
enabled_site_setting :ai_enabled
|
||||
|
||||
after_initialize {}
|
||||
after_initialize do
|
||||
module ::Disorder
|
||||
PLUGIN_NAME = "discourse-ai"
|
||||
end
|
||||
|
||||
require_relative "lib/shared/inference_manager.rb"
|
||||
require_relative "lib/modules/toxicity/event_handler.rb"
|
||||
require_relative "lib/modules/toxicity/classifier.rb"
|
||||
require_relative "lib/modules/toxicity/post_classifier.rb"
|
||||
require_relative "lib/modules/toxicity/chat_message_classifier.rb"
|
||||
require_relative "app/jobs/regular/modules/toxicity/toxicity_classify_post.rb"
|
||||
require_relative "app/jobs/regular/modules/toxicity/toxicity_classify_chat_message.rb"
|
||||
|
||||
require_relative "lib/modules/sentiment/event_handler.rb"
|
||||
require_relative "lib/modules/sentiment/post_classifier.rb"
|
||||
require_relative "app/jobs/regular/modules/sentiment/sentiment_classify_post.rb"
|
||||
|
||||
on(:post_created) do |post|
|
||||
DiscourseAI::Toxicity::EventHandler.handle_post_async(post)
|
||||
DiscourseAI::Sentiment::EventHandler.handle_post_async(post)
|
||||
end
|
||||
on(:post_edited) do |post|
|
||||
DiscourseAI::Toxicity::EventHandler.handle_post_async(post)
|
||||
DiscourseAI::Sentiment::EventHandler.handle_post_async(post)
|
||||
end
|
||||
on(:chat_message_created) do |chat_message|
|
||||
DiscourseAI::Toxicity::EventHandler.handle_chat_async(chat_message)
|
||||
end
|
||||
on(:chat_message_edited) do |chat_message|
|
||||
DiscourseAI::Toxicity::EventHandler.handle_chat_async(chat_message)
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue