FEATURE: Use dedicated reviewables for AI flags. (#4)
This change adds two new reviewable types: ReviewableAIPost and ReviewableAIChatMessage. They have the same actions as their existing counterparts: ReviewableFlaggedPost and ReviewableChatMessage. We'll display the model used and their accuracy when showing these flags in the review queue and adjust the latter after staff performs an action, tracking a global accuracy per existing model in a separate table. * FEATURE: Dedicated reviewables for AI flags * Store and adjust model accuracy * Display accuracy in reviewable templates
This commit is contained in:
parent
676d3ce6b2
commit
a838116cd5
|
@ -0,0 +1,35 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class ModelAccuracy < ActiveRecord::Base
|
||||
def self.adjust_model_accuracy(new_status, reviewable)
|
||||
return unless %i[approved rejected].include?(new_status)
|
||||
return unless [ReviewableAIPost, ReviewableAIChatMessage].include?(reviewable.class)
|
||||
|
||||
verdicts = reviewable.payload.to_h["verdicts"] || {}
|
||||
|
||||
verdicts.each do |model_name, verdict|
|
||||
accuracy_model = find_by(model: model_name)
|
||||
|
||||
attribute =
|
||||
if verdict
|
||||
new_status == :approved ? :flags_agreed : :flags_disagreed
|
||||
else
|
||||
new_status == :rejected ? :flags_agreed : :flags_disagreed
|
||||
end
|
||||
|
||||
accuracy_model.increment!(attribute)
|
||||
end
|
||||
end
|
||||
|
||||
def calculate_accuracy
|
||||
return 0 if total_flags.zero?
|
||||
|
||||
(flags_agreed * 100) / total_flags
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def total_flags
|
||||
flags_agreed + flags_disagreed
|
||||
end
|
||||
end
|
|
@ -0,0 +1,4 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class ReviewableAIChatMessage < ReviewableChatMessage
|
||||
end
|
|
@ -0,0 +1,205 @@
|
|||
# frozen_string_literal:true
|
||||
|
||||
class ReviewableAIPost < Reviewable
|
||||
# Penalties are handled by the modal after the action is performed
|
||||
def self.action_aliases
|
||||
{
|
||||
agree_and_keep_hidden: :agree_and_keep,
|
||||
agree_and_silence: :agree_and_keep,
|
||||
agree_and_suspend: :agree_and_keep,
|
||||
disagree_and_restore: :disagree,
|
||||
}
|
||||
end
|
||||
|
||||
def build_actions(actions, guardian, args)
|
||||
return actions if !pending? || post.blank?
|
||||
|
||||
agree =
|
||||
actions.add_bundle("#{id}-agree", icon: "thumbs-up", label: "reviewables.actions.agree.title")
|
||||
|
||||
if !post.user_deleted? && !post.hidden?
|
||||
build_action(actions, :agree_and_hide, icon: "far-eye-slash", bundle: agree)
|
||||
end
|
||||
|
||||
if post.hidden?
|
||||
build_action(actions, :agree_and_keep_hidden, icon: "thumbs-up", bundle: agree)
|
||||
else
|
||||
build_action(actions, :agree_and_keep, icon: "thumbs-up", bundle: agree)
|
||||
end
|
||||
|
||||
if guardian.can_suspend?(target_created_by)
|
||||
build_action(
|
||||
actions,
|
||||
:agree_and_suspend,
|
||||
icon: "ban",
|
||||
bundle: agree,
|
||||
client_action: "suspend",
|
||||
)
|
||||
build_action(
|
||||
actions,
|
||||
:agree_and_silence,
|
||||
icon: "microphone-slash",
|
||||
bundle: agree,
|
||||
client_action: "silence",
|
||||
)
|
||||
end
|
||||
|
||||
build_action(actions, :agree_and_restore, icon: "far-eye", bundle: agree) if post.user_deleted?
|
||||
|
||||
if post.hidden?
|
||||
build_action(actions, :disagree_and_restore, icon: "thumbs-down")
|
||||
else
|
||||
build_action(actions, :disagree, icon: "thumbs-down")
|
||||
end
|
||||
|
||||
if guardian.can_delete_post_or_topic?(post)
|
||||
delete =
|
||||
actions.add_bundle(
|
||||
"#{id}-delete",
|
||||
icon: "far-trash-alt",
|
||||
label: "reviewables.actions.delete.title",
|
||||
)
|
||||
build_action(actions, :delete_and_ignore, icon: "external-link-alt", bundle: delete)
|
||||
if post.reply_count > 0
|
||||
build_action(
|
||||
actions,
|
||||
:delete_and_ignore_replies,
|
||||
icon: "external-link-alt",
|
||||
confirm: true,
|
||||
bundle: delete,
|
||||
)
|
||||
end
|
||||
build_action(actions, :delete_and_agree, icon: "thumbs-up", bundle: delete)
|
||||
if post.reply_count > 0
|
||||
build_action(
|
||||
actions,
|
||||
:delete_and_agree_replies,
|
||||
icon: "external-link-alt",
|
||||
bundle: delete,
|
||||
confirm: true,
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
delete_user_actions(actions) if guardian.can_delete_user?(target_created_by)
|
||||
|
||||
build_action(actions, :ignore, icon: "external-link-alt")
|
||||
end
|
||||
|
||||
def perform_agree_and_hide(performed_by, args)
|
||||
post.hide!(reviewable_scores.first.reviewable_score_type)
|
||||
|
||||
agree
|
||||
end
|
||||
|
||||
def perform_agree_and_keep(_performed_by, _args)
|
||||
agree
|
||||
end
|
||||
|
||||
def perform_agree_and_restore(performed_by, args)
|
||||
destroyer(performed_by).recover
|
||||
agree
|
||||
end
|
||||
|
||||
def perform_disagree(performed_by, args)
|
||||
# Undo hide/silence if applicable
|
||||
post.unhide! if post.hidden?
|
||||
|
||||
create_result(:success, :rejected) do |result|
|
||||
result.update_flag_stats = { status: :disagreed, user_ids: [created_by_id] }
|
||||
end
|
||||
end
|
||||
|
||||
def perform_ignore(performed_by, args)
|
||||
create_result(:success, :ignored) do |result|
|
||||
result.update_flag_stats = { status: :ignored, user_ids: [created_by_id] }
|
||||
end
|
||||
end
|
||||
|
||||
def perform_delete_and_ignore(performed_by, args)
|
||||
destroyer(performed_by).destroy
|
||||
|
||||
perform_ignore(performed_by, args)
|
||||
end
|
||||
|
||||
def perform_delete_and_agree(performed_by, args)
|
||||
destroyer(performed_by).destroy
|
||||
|
||||
agree
|
||||
end
|
||||
|
||||
def perform_delete_and_ignore_replies(performed_by, args)
|
||||
PostDestroyer.delete_with_replies(performed_by, post, self)
|
||||
|
||||
perform_ignore(performed_by, args)
|
||||
end
|
||||
|
||||
def perform_delete_and_agree_replies(performed_by, args)
|
||||
PostDestroyer.delete_with_replies(performed_by, post, self)
|
||||
|
||||
agree
|
||||
end
|
||||
|
||||
def perform_delete_user(performed_by, args)
|
||||
UserDestroyer.new(performed_by).destroy(post.user, delete_opts)
|
||||
|
||||
agree
|
||||
end
|
||||
|
||||
def perform_delete_user_block(performed_by, args)
|
||||
delete_options = delete_opts
|
||||
|
||||
delete_options.merge!(block_email: true, block_ip: true) if Rails.env.production?
|
||||
|
||||
UserDestroyer.new(performed_by).destroy(post.user, delete_options)
|
||||
|
||||
agree
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def post
|
||||
@post ||= (target || Post.with_deleted.find_by(id: target_id))
|
||||
end
|
||||
|
||||
def destroyer(performed_by)
|
||||
PostDestroyer.new(performed_by, post, reviewable: self)
|
||||
end
|
||||
|
||||
def agree
|
||||
create_result(:success, :approved) do |result|
|
||||
result.update_flag_stats = { status: :agreed, user_ids: [created_by_id] }
|
||||
result.recalculate_score = true
|
||||
end
|
||||
end
|
||||
|
||||
def delete_opts
|
||||
{
|
||||
delete_posts: true,
|
||||
prepare_for_destroy: true,
|
||||
block_urls: true,
|
||||
delete_as_spammer: true,
|
||||
context: "review",
|
||||
}
|
||||
end
|
||||
|
||||
def build_action(
|
||||
actions,
|
||||
id,
|
||||
icon:,
|
||||
button_class: nil,
|
||||
bundle: nil,
|
||||
client_action: nil,
|
||||
confirm: false
|
||||
)
|
||||
actions.add(id, bundle: bundle) do |action|
|
||||
prefix = "reviewables.actions.#{id}"
|
||||
action.icon = icon
|
||||
action.button_class = button_class
|
||||
action.label = "#{prefix}.title"
|
||||
action.description = "#{prefix}.description"
|
||||
action.client_action = client_action
|
||||
action.confirm_message = "#{prefix}.confirm" if confirm
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,5 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class ReviewableAIChatMessageSerializer < ReviewableChatMessageSerializer
|
||||
payload_attributes :accuracies
|
||||
end
|
|
@ -0,0 +1,5 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class ReviewableAIPostSerializer < ReviewableFlaggedPostSerializer
|
||||
payload_attributes :accuracies
|
||||
end
|
|
@ -0,0 +1,14 @@
|
|||
{{#if @accuracies}}
|
||||
<table class="reviewable-scores">
|
||||
<tbody>
|
||||
{{#each-in @accuracies as |model acc|}}
|
||||
<tr>
|
||||
<td colspan="4">{{i18n "discourse-ai.reviewables.model_used"}}</td>
|
||||
<td colspan="3">{{model}}</td>
|
||||
<td colspan="4">{{i18n "discourse-ai.reviewables.accuracy"}}</td>
|
||||
<td colspan="3">{{acc}}%</td>
|
||||
</tr>
|
||||
{{/each-in}}
|
||||
</tbody>
|
||||
</table>
|
||||
{{/if}}
|
|
@ -0,0 +1,3 @@
|
|||
import Component from "@glimmer/component";
|
||||
|
||||
export default class ReviewableAIPost extends Component {}
|
|
@ -0,0 +1,31 @@
|
|||
<div class="flagged-post-header">
|
||||
<LinkTo
|
||||
@route="chat.channel.near-message"
|
||||
@models={{array
|
||||
this.chatChannel.slugifiedTitle
|
||||
this.chatChannel.id
|
||||
@reviewable.target_id
|
||||
}}
|
||||
>
|
||||
<ChatChannelTitle @channel={{this.chatChannel}} />
|
||||
</LinkTo>
|
||||
</div>
|
||||
|
||||
<div class="post-contents-wrapper">
|
||||
<ReviewableCreatedBy @user={{@reviewable.target_created_by}} @tagName="" />
|
||||
<div class="post-contents">
|
||||
<ReviewablePostHeader
|
||||
@reviewable={{@reviewable}}
|
||||
@createdBy={{@reviewable.target_created_by}}
|
||||
@tagName=""
|
||||
/>
|
||||
|
||||
<div class="post-body">
|
||||
{{html-safe (or @reviewable.payload.message_cooked @reviewable.cooked)}}
|
||||
</div>
|
||||
|
||||
{{yield}}
|
||||
|
||||
<ModelAccuracies @accuracies={{@reviewable.payload.accuracies}} />
|
||||
</div>
|
||||
</div>
|
|
@ -0,0 +1,3 @@
|
|||
import Component from "@glimmer/component";
|
||||
|
||||
export default class ReviewableAIChatMessage extends Component {}
|
|
@ -0,0 +1,26 @@
|
|||
<div class="flagged-post-header">
|
||||
<ReviewableTopicLink @reviewable={{@reviewable}} @tagName="" />
|
||||
<ReviewablePostEdits @reviewable={{@reviewable}} @tagName="" />
|
||||
</div>
|
||||
|
||||
<div class="post-contents-wrapper">
|
||||
<ReviewableCreatedBy @user={{@reviewable.target_created_by}} @tagName="" />
|
||||
<div class="post-contents">
|
||||
<ReviewablePostHeader
|
||||
@reviewable={{@reviewable}}
|
||||
@createdBy={{@reviewable.target_created_by}}
|
||||
@tagName=""
|
||||
/>
|
||||
<div class="post-body">
|
||||
{{#if @reviewable.blank_post}}
|
||||
<p>{{i18n "review.deleted_post"}}</p>
|
||||
{{else}}
|
||||
{{html-safe @reviewable.cooked}}
|
||||
{{/if}}
|
||||
</div>
|
||||
|
||||
{{yield}}
|
||||
|
||||
<ModelAccuracies @accuracies={{@reviewable.payload.accuracies}} />
|
||||
</div>
|
||||
</div>
|
|
@ -0,0 +1,3 @@
|
|||
import Component from "@glimmer/component";
|
||||
|
||||
export default class ReviewableAIPost extends Component {}
|
|
@ -1,3 +1,12 @@
|
|||
en:
|
||||
js:
|
||||
discourse-ai:
|
||||
reviewables:
|
||||
model_used: "Model used:"
|
||||
accuracy: "Accuracy:"
|
||||
review:
|
||||
types:
|
||||
reviewable_aipost:
|
||||
title: "AI-Flagged post"
|
||||
reviewable_aichat_message:
|
||||
title: "AI-Flagged chat message"
|
||||
|
|
|
@ -19,3 +19,7 @@ en:
|
|||
ai_sentiment_inference_service_api_endpoint: "URL where the API is running for the sentiment module"
|
||||
ai_sentiment_inference_service_api_key: "API key for the sentiment API"
|
||||
ai_sentiment_models: "Models to use for inference. Sentiment classifies post on the positive/neutral/negative space. Emotion classifies on the anger/disgust/fear/joy/neutral/sadness/surprise space."
|
||||
reviewables:
|
||||
reasons:
|
||||
flagged_by_toxicity: The AI plugin flagged this after classifying it as toxic.
|
||||
flagged_by_nsfw: The AI plugin flagged this after classifying at least one of the attached images as NSFW.
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class CreatedModelAccuracyTable < ActiveRecord::Migration[7.0]
|
||||
def change
|
||||
create_table :model_accuracies do |t|
|
||||
t.string :model, null: false
|
||||
t.string :classification_type, null: false
|
||||
t.integer :flags_agreed, null: false, default: 0
|
||||
t.integer :flags_disagreed, null: false, default: 0
|
||||
|
||||
t.timestamps
|
||||
end
|
||||
|
||||
add_index :model_accuracies, %i[model], unique: true
|
||||
end
|
||||
end
|
|
@ -0,0 +1,7 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAI
|
||||
class Engine < ::Rails::Engine
|
||||
isolate_namespace DiscourseAI
|
||||
end
|
||||
end
|
|
@ -11,14 +11,23 @@ module DiscourseAI
|
|||
content_of(target).present?
|
||||
end
|
||||
|
||||
def should_flag_based_on?(classification_data)
|
||||
def get_verdicts(classification_data)
|
||||
classification_data
|
||||
.map do |model_name, classifications|
|
||||
verdict =
|
||||
classifications.values.any? do |data|
|
||||
send("#{model_name}_verdict?", data.except(:neutral, :target_classified_type))
|
||||
end
|
||||
|
||||
[model_name, verdict]
|
||||
end
|
||||
.to_h
|
||||
end
|
||||
|
||||
def should_flag_based_on?(verdicts)
|
||||
return false if !SiteSetting.ai_nsfw_flag_automatically
|
||||
|
||||
classification_data.any? do |model_name, classifications|
|
||||
classifications.values.any? do |data|
|
||||
send("#{model_name}_verdict?", data.except(:neutral, :target_classified_type))
|
||||
end
|
||||
end
|
||||
verdicts.values.any?
|
||||
end
|
||||
|
||||
def request(target_to_classify)
|
||||
|
|
|
@ -15,7 +15,14 @@ module DiscourseAI
|
|||
content_of(target).present?
|
||||
end
|
||||
|
||||
def should_flag_based_on?(classification_data)
|
||||
def get_verdicts(_)
|
||||
available_models.reduce({}) do |memo, model|
|
||||
memo[model] = false
|
||||
memo
|
||||
end
|
||||
end
|
||||
|
||||
def should_flag_based_on?(_verdicts)
|
||||
# We don't flag based on sentiment classification.
|
||||
false
|
||||
end
|
||||
|
|
|
@ -21,16 +21,23 @@ module DiscourseAI
|
|||
content_of(target).present?
|
||||
end
|
||||
|
||||
def should_flag_based_on?(classification_data)
|
||||
return false if !SiteSetting.ai_toxicity_flag_automatically
|
||||
|
||||
def get_verdicts(classification_data)
|
||||
# We only use one model for this classification.
|
||||
# Classification_data looks like { model_name => classification }
|
||||
_model_used, data = classification_data.to_a.first
|
||||
|
||||
CLASSIFICATION_LABELS.any? do |label|
|
||||
data[label] >= SiteSetting.send("ai_toxicity_flag_threshold_#{label}")
|
||||
end
|
||||
verdict =
|
||||
CLASSIFICATION_LABELS.any? do |label|
|
||||
data[label] >= SiteSetting.send("ai_toxicity_flag_threshold_#{label}")
|
||||
end
|
||||
|
||||
{ available_model => verdict }
|
||||
end
|
||||
|
||||
def should_flag_based_on?(verdicts)
|
||||
return false if !SiteSetting.ai_toxicity_flag_automatically
|
||||
|
||||
verdicts.values.any?
|
||||
end
|
||||
|
||||
def request(target_to_classify)
|
||||
|
|
|
@ -4,13 +4,22 @@ module ::DiscourseAI
|
|||
class ChatMessageClassificator < Classificator
|
||||
private
|
||||
|
||||
def flag!(chat_message, _toxic_labels)
|
||||
Chat::ChatReviewQueue.new.flag_message(
|
||||
chat_message,
|
||||
Guardian.new(flagger),
|
||||
ReviewableScore.types[:inappropriate],
|
||||
queue_for_review: true,
|
||||
)
|
||||
def flag!(chat_message, classification, verdicts, accuracies)
|
||||
reviewable =
|
||||
ReviewableAIChatMessage.needs_review!(
|
||||
created_by: Discourse.system_user,
|
||||
target: chat_message,
|
||||
reviewable_by_moderator: true,
|
||||
potential_spam: false,
|
||||
payload: {
|
||||
classification: classification,
|
||||
accuracies: accuracies,
|
||||
verdicts: verdicts,
|
||||
},
|
||||
)
|
||||
reviewable.update(target_created_by: chat_message.user)
|
||||
|
||||
add_score(reviewable)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -14,8 +14,11 @@ module ::DiscourseAI
|
|||
.tap do |classification|
|
||||
store_classification(target, classification)
|
||||
|
||||
if classification_model.should_flag_based_on?(classification)
|
||||
flag!(target, classification)
|
||||
verdicts = classification_model.get_verdicts(classification)
|
||||
|
||||
if classification_model.should_flag_based_on?(verdicts)
|
||||
accuracies = get_model_accuracies(verdicts.keys)
|
||||
flag!(target, classification, verdicts, accuracies)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -24,10 +27,32 @@ module ::DiscourseAI
|
|||
|
||||
attr_reader :classification_model
|
||||
|
||||
def flag!(_target, _classification)
|
||||
def flag!(_target, _classification, _verdicts, _accuracies)
|
||||
raise NotImplemented
|
||||
end
|
||||
|
||||
def get_model_accuracies(models)
|
||||
models
|
||||
.map do |name|
|
||||
accuracy =
|
||||
ModelAccuracy.find_or_create_by(
|
||||
model: name,
|
||||
classification_type: classification_model.type,
|
||||
)
|
||||
[name, accuracy.calculate_accuracy]
|
||||
end
|
||||
.to_h
|
||||
end
|
||||
|
||||
def add_score(reviewable)
|
||||
reviewable.add_score(
|
||||
Discourse.system_user,
|
||||
ReviewableScore.types[:inappropriate],
|
||||
reason: "flagged_by_#{classification_model.type}",
|
||||
force_review: true,
|
||||
)
|
||||
end
|
||||
|
||||
def store_classification(target, classification)
|
||||
attrs =
|
||||
classification.map do |model_name, classifications|
|
||||
|
|
|
@ -4,16 +4,23 @@ module ::DiscourseAI
|
|||
class PostClassificator < Classificator
|
||||
private
|
||||
|
||||
def flag!(post, classification_type)
|
||||
PostActionCreator.new(
|
||||
flagger,
|
||||
post,
|
||||
PostActionType.types[:inappropriate],
|
||||
reason: classification_type,
|
||||
queue_for_review: true,
|
||||
).perform
|
||||
def flag!(post, classification, verdicts, accuracies)
|
||||
post.hide!(ReviewableScore.types[:inappropriate])
|
||||
|
||||
post.publish_change_to_clients! :acted
|
||||
reviewable =
|
||||
ReviewableAIPost.needs_review!(
|
||||
created_by: Discourse.system_user,
|
||||
target: post,
|
||||
reviewable_by_moderator: true,
|
||||
potential_spam: false,
|
||||
payload: {
|
||||
classification: classification,
|
||||
accuracies: accuracies,
|
||||
verdicts: verdicts,
|
||||
},
|
||||
)
|
||||
|
||||
add_score(reviewable)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
17
plugin.rb
17
plugin.rb
|
@ -9,13 +9,13 @@
|
|||
|
||||
enabled_site_setting :discourse_ai_enabled
|
||||
|
||||
require_relative "lib/discourse_ai/engine"
|
||||
|
||||
after_initialize do
|
||||
module ::DiscourseAI
|
||||
PLUGIN_NAME = "discourse-ai"
|
||||
end
|
||||
|
||||
require_relative "app/models/classification_result"
|
||||
|
||||
require_relative "lib/shared/inference_manager"
|
||||
require_relative "lib/shared/classificator"
|
||||
require_relative "lib/shared/post_classificator"
|
||||
|
@ -25,14 +25,19 @@ after_initialize do
|
|||
require_relative "lib/modules/toxicity/entry_point"
|
||||
require_relative "lib/modules/sentiment/entry_point"
|
||||
|
||||
modules = [
|
||||
[
|
||||
DiscourseAI::NSFW::EntryPoint.new,
|
||||
DiscourseAI::Toxicity::EntryPoint.new,
|
||||
DiscourseAI::Sentiment::EntryPoint.new,
|
||||
]
|
||||
|
||||
modules.each do |a_module|
|
||||
].each do |a_module|
|
||||
a_module.load_files
|
||||
a_module.inject_into(self)
|
||||
end
|
||||
|
||||
register_reviewable_type ReviewableAIChatMessage
|
||||
register_reviewable_type ReviewableAIPost
|
||||
|
||||
on(:reviewable_transitioned_to) do |new_status, reviewable|
|
||||
ModelAccuracy.adjust_model_accuracy(new_status, reviewable)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -61,7 +61,7 @@ describe Jobs::EvaluatePostUploads do
|
|||
it "flags and hides the post" do
|
||||
subject.execute({ post_id: post.id })
|
||||
|
||||
expect(ReviewableFlaggedPost.where(target: post).count).to eq(1)
|
||||
expect(ReviewableAIPost.where(target: post).count).to eq(1)
|
||||
expect(post.reload.hidden?).to eq(true)
|
||||
end
|
||||
end
|
||||
|
@ -72,7 +72,7 @@ describe Jobs::EvaluatePostUploads do
|
|||
it "does nothing" do
|
||||
subject.execute({ post_id: post.id })
|
||||
|
||||
expect(ReviewableFlaggedPost.where(target: post).count).to be_zero
|
||||
expect(ReviewableAIPost.where(target: post).count).to be_zero
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -66,44 +66,26 @@ describe DiscourseAI::NSFW::NSFWClassification do
|
|||
describe "#should_flag_based_on?" do
|
||||
before { SiteSetting.ai_nsfw_flag_automatically = true }
|
||||
|
||||
let(:positive_classification) do
|
||||
{
|
||||
"opennsfw2" => {
|
||||
1 => NSFWInferenceStubs.negative_result("opennsfw2"),
|
||||
2 => NSFWInferenceStubs.positive_result("opennsfw2"),
|
||||
},
|
||||
"nsfw_detector" => {
|
||||
1 => NSFWInferenceStubs.negative_result("nsfw_detector"),
|
||||
2 => NSFWInferenceStubs.positive_result("nsfw_detector"),
|
||||
},
|
||||
}
|
||||
end
|
||||
let(:positive_verdict) { { "opennsfw2" => true, "nsfw_detector" => true } }
|
||||
|
||||
let(:negative_classification) do
|
||||
{
|
||||
"opennsfw2" => {
|
||||
1 => NSFWInferenceStubs.negative_result("opennsfw2"),
|
||||
2 => NSFWInferenceStubs.negative_result("opennsfw2"),
|
||||
},
|
||||
}
|
||||
end
|
||||
let(:negative_verdict) { { "opennsfw2" => false } }
|
||||
|
||||
it "returns false when NSFW flaggin is disabled" do
|
||||
SiteSetting.ai_nsfw_flag_automatically = false
|
||||
|
||||
should_flag = subject.should_flag_based_on?(positive_classification)
|
||||
should_flag = subject.should_flag_based_on?(positive_verdict)
|
||||
|
||||
expect(should_flag).to eq(false)
|
||||
end
|
||||
|
||||
it "returns true if the response is NSFW based on our thresholds" do
|
||||
should_flag = subject.should_flag_based_on?(positive_classification)
|
||||
should_flag = subject.should_flag_based_on?(positive_verdict)
|
||||
|
||||
expect(should_flag).to eq(true)
|
||||
end
|
||||
|
||||
it "returns false if the response is safe based on our thresholds" do
|
||||
should_flag = subject.should_flag_based_on?(negative_classification)
|
||||
should_flag = subject.should_flag_based_on?(negative_verdict)
|
||||
|
||||
expect(should_flag).to eq(false)
|
||||
end
|
||||
|
|
|
@ -18,19 +18,19 @@ describe Jobs::ToxicityClassifyPost do
|
|||
|
||||
subject.execute({ post_id: post.id })
|
||||
|
||||
expect(ReviewableFlaggedPost.where(target: post).count).to be_zero
|
||||
expect(ReviewableAIPost.where(target: post).count).to be_zero
|
||||
end
|
||||
|
||||
it "does nothing if there's no arg called post_id" do
|
||||
subject.execute({})
|
||||
|
||||
expect(ReviewableFlaggedPost.where(target: post).count).to be_zero
|
||||
expect(ReviewableAIPost.where(target: post).count).to be_zero
|
||||
end
|
||||
|
||||
it "does nothing if no post match the given id" do
|
||||
subject.execute({ post_id: nil })
|
||||
|
||||
expect(ReviewableFlaggedPost.where(target: post).count).to be_zero
|
||||
expect(ReviewableAIPost.where(target: post).count).to be_zero
|
||||
end
|
||||
|
||||
it "does nothing if the post content is blank" do
|
||||
|
@ -38,7 +38,7 @@ describe Jobs::ToxicityClassifyPost do
|
|||
|
||||
subject.execute({ post_id: post.id })
|
||||
|
||||
expect(ReviewableFlaggedPost.where(target: post).count).to be_zero
|
||||
expect(ReviewableAIPost.where(target: post).count).to be_zero
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -47,7 +47,7 @@ describe Jobs::ToxicityClassifyPost do
|
|||
|
||||
subject.execute({ post_id: post.id })
|
||||
|
||||
expect(ReviewableFlaggedPost.where(target: post).count).to eq(1)
|
||||
expect(ReviewableAIPost.where(target: post).count).to eq(1)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -21,34 +21,26 @@ describe DiscourseAI::Toxicity::ToxicityClassification do
|
|||
describe "#should_flag_based_on?" do
|
||||
before { SiteSetting.ai_toxicity_flag_automatically = true }
|
||||
|
||||
let(:toxic_response) do
|
||||
{
|
||||
SiteSetting.ai_toxicity_inference_service_api_model =>
|
||||
ToxicityInferenceStubs.toxic_response,
|
||||
}
|
||||
end
|
||||
let(:toxic_verdict) { { SiteSetting.ai_toxicity_inference_service_api_model => true } }
|
||||
|
||||
it "returns false when toxicity flaggin is disabled" do
|
||||
SiteSetting.ai_toxicity_flag_automatically = false
|
||||
|
||||
should_flag = subject.should_flag_based_on?(toxic_response)
|
||||
should_flag = subject.should_flag_based_on?(toxic_verdict)
|
||||
|
||||
expect(should_flag).to eq(false)
|
||||
end
|
||||
|
||||
it "returns true if the response is toxic based on our thresholds" do
|
||||
should_flag = subject.should_flag_based_on?(toxic_response)
|
||||
should_flag = subject.should_flag_based_on?(toxic_verdict)
|
||||
|
||||
expect(should_flag).to eq(true)
|
||||
end
|
||||
|
||||
it "returns false if the response is civilized based on our thresholds" do
|
||||
civilized_response = {
|
||||
SiteSetting.ai_toxicity_inference_service_api_model =>
|
||||
ToxicityInferenceStubs.civilized_response,
|
||||
}
|
||||
civilized_verdict = { SiteSetting.ai_toxicity_inference_service_api_model => false }
|
||||
|
||||
should_flag = subject.should_flag_based_on?(civilized_response)
|
||||
should_flag = subject.should_flag_based_on?(civilized_verdict)
|
||||
|
||||
expect(should_flag).to eq(false)
|
||||
end
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require "rails_helper"
|
||||
|
||||
describe ModelAccuracy do
|
||||
describe "#calculate_accuracy" do
|
||||
let(:accuracy) { ModelAccuracy.new(model: "test_model", classification_type: "test") }
|
||||
|
||||
it "returns 0 if we had no feedback" do
|
||||
expect(accuracy.calculate_accuracy).to eq(0.0)
|
||||
end
|
||||
|
||||
it "returns 50 if we had mixed feedback" do
|
||||
accuracy.flags_agreed = 1
|
||||
accuracy.flags_disagreed = 1
|
||||
|
||||
expect(accuracy.calculate_accuracy).to eq(50)
|
||||
end
|
||||
|
||||
it "always round the number" do
|
||||
accuracy.flags_agreed = 1
|
||||
accuracy.flags_disagreed = 2
|
||||
|
||||
expect(accuracy.calculate_accuracy).to eq(33)
|
||||
end
|
||||
end
|
||||
|
||||
describe ".adjust_model_accuracy" do
|
||||
let!(:accuracy) { ModelAccuracy.create!(model: "test_model", classification_type: "test") }
|
||||
|
||||
def build_reviewable(klass, test_model_verdict)
|
||||
klass.new(payload: { "verdicts" => { "test_model" => test_model_verdict } })
|
||||
end
|
||||
|
||||
it "does nothing if the reviewable is not generated by this plugin" do
|
||||
reviewable = build_reviewable(ReviewableFlaggedPost, true)
|
||||
|
||||
described_class.adjust_model_accuracy(:approved, reviewable)
|
||||
|
||||
expect(accuracy.reload.flags_agreed).to be_zero
|
||||
expect(accuracy.flags_disagreed).to be_zero
|
||||
end
|
||||
|
||||
it "updates the agreed flag if reviewable was approved and verdict is true" do
|
||||
reviewable = build_reviewable(ReviewableAIPost, true)
|
||||
|
||||
described_class.adjust_model_accuracy(:approved, reviewable)
|
||||
|
||||
expect(accuracy.reload.flags_agreed).to eq(1)
|
||||
expect(accuracy.flags_disagreed).to be_zero
|
||||
end
|
||||
|
||||
it "updates the disagreed flag if the reviewable was approved and verdict is false" do
|
||||
reviewable = build_reviewable(ReviewableAIPost, false)
|
||||
|
||||
described_class.adjust_model_accuracy(:approved, reviewable)
|
||||
|
||||
expect(accuracy.reload.flags_agreed).to be_zero
|
||||
expect(accuracy.flags_disagreed).to eq(1)
|
||||
end
|
||||
|
||||
it "updates the disagreed flag if reviewable was rejected and verdict is true" do
|
||||
reviewable = build_reviewable(ReviewableAIPost, true)
|
||||
|
||||
described_class.adjust_model_accuracy(:rejected, reviewable)
|
||||
|
||||
expect(accuracy.reload.flags_agreed).to be_zero
|
||||
expect(accuracy.flags_disagreed).to eq(1)
|
||||
end
|
||||
|
||||
it "updates the agreed flag if the reviewable was rejected and verdict is false" do
|
||||
reviewable = build_reviewable(ReviewableAIPost, false)
|
||||
|
||||
described_class.adjust_model_accuracy(:rejected, reviewable)
|
||||
|
||||
expect(accuracy.reload.flags_agreed).to eq(1)
|
||||
expect(accuracy.flags_disagreed).to be_zero
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,243 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require "rails_helper"
|
||||
|
||||
describe ReviewableAIPost do
|
||||
fab!(:target) { Fabricate(:post) }
|
||||
|
||||
describe "#build_actions" do
|
||||
let(:guardian) { Guardian.new }
|
||||
|
||||
let(:reviewable) do
|
||||
subject.tap do |r|
|
||||
r.target = target
|
||||
r.target_created_by = target.user
|
||||
r.created_by = Discourse.system_user
|
||||
end
|
||||
end
|
||||
|
||||
def reviewable_actions(a_guardian)
|
||||
actions = Reviewable::Actions.new(reviewable, a_guardian, {})
|
||||
reviewable.build_actions(actions, a_guardian, {})
|
||||
|
||||
actions
|
||||
end
|
||||
|
||||
context "when the reviewable isn't pending" do
|
||||
before { reviewable.status = Reviewable.statuses[:rejected] }
|
||||
|
||||
it "returns no actions" do
|
||||
expect(reviewable_actions(guardian)).to be_blank
|
||||
end
|
||||
end
|
||||
|
||||
describe "actions that don't require special permissions" do
|
||||
it "has the disagree action" do
|
||||
expect(reviewable_actions(guardian).has?(:disagree)).to eq(true)
|
||||
end
|
||||
|
||||
it "has the ignore action" do
|
||||
expect(reviewable_actions(guardian).has?(:ignore)).to eq(true)
|
||||
end
|
||||
|
||||
it "has the agree and hide or agree and keep actions" do
|
||||
actions = reviewable_actions(guardian)
|
||||
|
||||
expect(actions.has?(:agree_and_hide)).to eq(true)
|
||||
expect(actions.has?(:agree_and_keep)).to eq(true)
|
||||
expect(actions.has?(:agree_and_keep_hidden)).to eq(false)
|
||||
end
|
||||
|
||||
it "doesn't have the penalize actions" do
|
||||
actions = reviewable_actions(guardian)
|
||||
|
||||
expect(actions.has?(:agree_and_suspend)).to eq(false)
|
||||
expect(actions.has?(:agree_and_silence)).to eq(false)
|
||||
end
|
||||
|
||||
it "doesn't has the delete + replies actions" do
|
||||
actions = reviewable_actions(guardian)
|
||||
|
||||
expect(actions.has?(:delete_and_ignore_replies)).to eq(false)
|
||||
expect(actions.has?(:delete_and_agree_replies)).to eq(false)
|
||||
end
|
||||
|
||||
context "when the post is hidden" do
|
||||
before { target.hide!(PostActionType.types[:inappropriate]) }
|
||||
|
||||
it "can agree and keep hidden" do
|
||||
actions = reviewable_actions(guardian)
|
||||
|
||||
expect(actions.has?(:agree_and_hide)).to eq(false)
|
||||
expect(actions.has?(:agree_and_keep)).to eq(false)
|
||||
expect(actions.has?(:agree_and_keep_hidden)).to eq(true)
|
||||
end
|
||||
|
||||
it "has the disagree and restore action" do
|
||||
actions = reviewable_actions(guardian)
|
||||
|
||||
expect(actions.has?(:disagree)).to eq(false)
|
||||
expect(actions.has?(:disagree_and_restore)).to eq(true)
|
||||
end
|
||||
end
|
||||
|
||||
context "when the post was deleted by the user" do
|
||||
before { target.user_deleted = true }
|
||||
|
||||
it "lets you restore it but not hiding it" do
|
||||
actions = reviewable_actions(guardian)
|
||||
|
||||
expect(actions.has?(:agree_and_restore)).to eq(true)
|
||||
expect(actions.has?(:agree_and_keep)).to eq(true)
|
||||
expect(actions.has?(:agree_and_keep_hidden)).to eq(false)
|
||||
expect(actions.has?(:agree_and_hide)).to eq(false)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context "when the reviewer can suspend the poster" do
|
||||
let(:mod_guardian) { Guardian.new(Fabricate(:moderator)) }
|
||||
|
||||
it "has the penalization actions" do
|
||||
actions = reviewable_actions(mod_guardian)
|
||||
|
||||
expect(actions.has?(:agree_and_suspend)).to eq(true)
|
||||
expect(actions.has?(:agree_and_silence)).to eq(true)
|
||||
end
|
||||
end
|
||||
|
||||
context "when the reviewer can delete the post and topic" do
|
||||
let(:mod_guardian) { Guardian.new(Fabricate(:moderator)) }
|
||||
|
||||
it "has the delete + replies actions" do
|
||||
target.reply_count = 3
|
||||
actions = reviewable_actions(mod_guardian)
|
||||
|
||||
expect(actions.has?(:delete_and_ignore_replies)).to eq(true)
|
||||
expect(actions.has?(:delete_and_agree_replies)).to eq(true)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "#perform" do
|
||||
let(:reviewable) do
|
||||
described_class.needs_review!(target: target, created_by: Discourse.system_user)
|
||||
end
|
||||
fab!(:admin) { Fabricate(:admin) }
|
||||
|
||||
before do
|
||||
reviewable.add_score(
|
||||
Discourse.system_user,
|
||||
ReviewableScore.types[:inappropriate],
|
||||
created_at: reviewable.created_at,
|
||||
)
|
||||
end
|
||||
|
||||
describe "agree variations" do
|
||||
it "hides the topic when performing the agree_and_hide action" do
|
||||
result = reviewable.perform(admin, :agree_and_hide)
|
||||
|
||||
expect(result.transition_to).to eq :approved
|
||||
expect(target.reload.hidden?).to eq(true)
|
||||
end
|
||||
|
||||
it "doesn't unhide the topic when performing the agree_and_keep_hidden action" do
|
||||
target.hide!(ReviewableScore.types[:inappropriate])
|
||||
|
||||
result = reviewable.perform(admin, :agree_and_keep_hidden)
|
||||
|
||||
expect(result.transition_to).to eq :approved
|
||||
expect(target.reload.hidden?).to eq(true)
|
||||
end
|
||||
|
||||
it "un-deletes the post when performing the agree_and_restore action" do
|
||||
target.update!(deleted_at: 1.minute.ago, deleted_by: target.user, user_deleted: true)
|
||||
|
||||
result = reviewable.perform(admin, :agree_and_restore)
|
||||
|
||||
expect(result.transition_to).to eq :approved
|
||||
expect(target.reload.deleted_at).to be_nil
|
||||
expect(target.user_deleted).to eq(false)
|
||||
end
|
||||
end
|
||||
|
||||
describe "disagree variations" do
|
||||
it "disagree_and_restore disagrees with the flag and unhides the post" do
|
||||
target.hide!(ReviewableScore.types[:inappropriate])
|
||||
|
||||
result = reviewable.perform(admin, :disagree_and_restore)
|
||||
|
||||
expect(result.transition_to).to eq :rejected
|
||||
expect(target.reload.hidden?).to eq(false)
|
||||
end
|
||||
|
||||
it "disagree disagrees with the flag" do
|
||||
result = reviewable.perform(admin, :disagree)
|
||||
|
||||
expect(result.transition_to).to eq :rejected
|
||||
end
|
||||
end
|
||||
|
||||
describe "delete post variations" do
|
||||
def create_reply(post)
|
||||
PostCreator.create(
|
||||
Fabricate(:user),
|
||||
raw: "this is the reply text",
|
||||
reply_to_post_number: post.post_number,
|
||||
topic_id: post.topic,
|
||||
)
|
||||
end
|
||||
|
||||
before { target.update!(reply_count: 1) }
|
||||
|
||||
it "ignores the reviewable with delete_and_ignore" do
|
||||
result = reviewable.perform(admin, :delete_and_ignore)
|
||||
|
||||
expect(result.transition_to).to eq :ignored
|
||||
expect(target.reload.deleted_at).to be_present
|
||||
end
|
||||
|
||||
it "ignores the reviewable and replies with delete_and_ignore_replies" do
|
||||
reply = create_reply(target)
|
||||
|
||||
result = reviewable.perform(admin, :delete_and_ignore_replies)
|
||||
|
||||
expect(result.transition_to).to eq :ignored
|
||||
expect(target.reload.deleted_at).to be_present
|
||||
expect(reply.reload.deleted_at).to be_present
|
||||
end
|
||||
|
||||
it "agrees with the reviewable with delete_and_agree" do
|
||||
result = reviewable.perform(admin, :delete_and_agree)
|
||||
|
||||
expect(result.transition_to).to eq :approved
|
||||
expect(target.reload.deleted_at).to be_present
|
||||
end
|
||||
|
||||
it "agrees with the reviewables and its replies with delete_and_agree_replies" do
|
||||
reply = create_reply(target)
|
||||
|
||||
result = reviewable.perform(admin, :delete_and_agree_replies)
|
||||
|
||||
expect(result.transition_to).to eq :approved
|
||||
expect(target.reload.deleted_at).to be_present
|
||||
expect(reply.reload.deleted_at).to be_present
|
||||
end
|
||||
end
|
||||
|
||||
describe "delete user variations" do
|
||||
it "deletes the user and agrees with the reviewable" do
|
||||
result = reviewable.perform(admin, :delete_user)
|
||||
|
||||
expect(result.transition_to).to eq :approved
|
||||
expect { target.user.reload }.to raise_error(ActiveRecord::RecordNotFound)
|
||||
end
|
||||
end
|
||||
|
||||
it "ignores the reviewable" do
|
||||
result = reviewable.perform(admin, :ignore)
|
||||
|
||||
expect(result.transition_to).to eq :ignored
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,27 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require "rails_helper"
|
||||
require_relative "support/toxicity_inference_stubs"
|
||||
|
||||
describe Plugin::Instance do
|
||||
before { SiteSetting.discourse_ai_enabled = true }
|
||||
|
||||
describe "on reviewable_transitioned_to event" do
|
||||
fab!(:post) { Fabricate(:post) }
|
||||
fab!(:admin) { Fabricate(:admin) }
|
||||
|
||||
it "adjusts model accuracy" do
|
||||
ToxicityInferenceStubs.stub_post_classification(post, toxic: true)
|
||||
SiteSetting.ai_toxicity_flag_automatically = true
|
||||
classification = DiscourseAI::Toxicity::ToxicityClassification.new
|
||||
classificator = DiscourseAI::PostClassificator.new(classification)
|
||||
classificator.classify!(post)
|
||||
reviewable = ReviewableAIPost.find_by(target: post)
|
||||
|
||||
reviewable.perform admin, :agree_and_keep
|
||||
accuracy = ModelAccuracy.find_by(classification_type: classification.type)
|
||||
|
||||
expect(accuracy.flags_agreed).to eq(1)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -27,7 +27,7 @@ describe DiscourseAI::ChatMessageClassificator do
|
|||
|
||||
classification.classify!(chat_message)
|
||||
|
||||
expect(ReviewableChatMessage.where(target: chat_message).count).to eq(1)
|
||||
expect(ReviewableAIChatMessage.where(target: chat_message).count).to eq(1)
|
||||
end
|
||||
|
||||
it "doesn't flags the message if the model decides we shouldn't" do
|
||||
|
@ -35,7 +35,18 @@ describe DiscourseAI::ChatMessageClassificator do
|
|||
|
||||
classification.classify!(chat_message)
|
||||
|
||||
expect(ReviewableChatMessage.where(target: chat_message).count).to be_zero
|
||||
expect(ReviewableAIChatMessage.where(target: chat_message).count).to be_zero
|
||||
end
|
||||
|
||||
it "includes the model accuracy in the payload" do
|
||||
SiteSetting.ai_toxicity_flag_automatically = true
|
||||
classification.classify!(chat_message)
|
||||
|
||||
reviewable = ReviewableAIChatMessage.find_by(target: chat_message)
|
||||
|
||||
expect(
|
||||
reviewable.payload.dig("accuracies", SiteSetting.ai_toxicity_inference_service_api_model),
|
||||
).to be_zero
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -26,7 +26,7 @@ describe DiscourseAI::PostClassificator do
|
|||
|
||||
classification.classify!(post)
|
||||
|
||||
expect(ReviewableFlaggedPost.where(target: post).count).to eq(1)
|
||||
expect(ReviewableAIPost.where(target: post).count).to eq(1)
|
||||
expect(post.reload.hidden?).to eq(true)
|
||||
end
|
||||
|
||||
|
@ -35,7 +35,18 @@ describe DiscourseAI::PostClassificator do
|
|||
|
||||
classification.classify!(post)
|
||||
|
||||
expect(ReviewableFlaggedPost.where(target: post).count).to be_zero
|
||||
expect(ReviewableAIPost.where(target: post).count).to be_zero
|
||||
end
|
||||
|
||||
it "includes the model accuracy in the payload" do
|
||||
SiteSetting.ai_toxicity_flag_automatically = true
|
||||
classification.classify!(post)
|
||||
|
||||
reviewable = ReviewableAIPost.find_by(target: post)
|
||||
|
||||
expect(
|
||||
reviewable.payload.dig("accuracies", SiteSetting.ai_toxicity_inference_service_api_model),
|
||||
).to be_zero
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue