diff --git a/app/models/model_accuracy.rb b/app/models/model_accuracy.rb new file mode 100644 index 00000000..519429c0 --- /dev/null +++ b/app/models/model_accuracy.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +class ModelAccuracy < ActiveRecord::Base + def self.adjust_model_accuracy(new_status, reviewable) + return unless %i[approved rejected].include?(new_status) + return unless [ReviewableAIPost, ReviewableAIChatMessage].include?(reviewable.class) + + verdicts = reviewable.payload.to_h["verdicts"] || {} + + verdicts.each do |model_name, verdict| + accuracy_model = find_by(model: model_name) + + attribute = + if verdict + new_status == :approved ? :flags_agreed : :flags_disagreed + else + new_status == :rejected ? :flags_agreed : :flags_disagreed + end + + accuracy_model.increment!(attribute) + end + end + + def calculate_accuracy + return 0 if total_flags.zero? + + (flags_agreed * 100) / total_flags + end + + private + + def total_flags + flags_agreed + flags_disagreed + end +end diff --git a/app/models/reviewable_a_i_chat_message.rb b/app/models/reviewable_a_i_chat_message.rb new file mode 100644 index 00000000..ef92dda0 --- /dev/null +++ b/app/models/reviewable_a_i_chat_message.rb @@ -0,0 +1,4 @@ +# frozen_string_literal: true + +class ReviewableAIChatMessage < ReviewableChatMessage +end diff --git a/app/models/reviewable_a_i_post.rb b/app/models/reviewable_a_i_post.rb new file mode 100644 index 00000000..3ecd3474 --- /dev/null +++ b/app/models/reviewable_a_i_post.rb @@ -0,0 +1,205 @@ +# frozen_string_literal:true + +class ReviewableAIPost < Reviewable + # Penalties are handled by the modal after the action is performed + def self.action_aliases + { + agree_and_keep_hidden: :agree_and_keep, + agree_and_silence: :agree_and_keep, + agree_and_suspend: :agree_and_keep, + disagree_and_restore: :disagree, + } + end + + def build_actions(actions, guardian, args) + return actions if !pending? || post.blank? + + agree = + actions.add_bundle("#{id}-agree", icon: "thumbs-up", label: "reviewables.actions.agree.title") + + if !post.user_deleted? && !post.hidden? + build_action(actions, :agree_and_hide, icon: "far-eye-slash", bundle: agree) + end + + if post.hidden? + build_action(actions, :agree_and_keep_hidden, icon: "thumbs-up", bundle: agree) + else + build_action(actions, :agree_and_keep, icon: "thumbs-up", bundle: agree) + end + + if guardian.can_suspend?(target_created_by) + build_action( + actions, + :agree_and_suspend, + icon: "ban", + bundle: agree, + client_action: "suspend", + ) + build_action( + actions, + :agree_and_silence, + icon: "microphone-slash", + bundle: agree, + client_action: "silence", + ) + end + + build_action(actions, :agree_and_restore, icon: "far-eye", bundle: agree) if post.user_deleted? + + if post.hidden? + build_action(actions, :disagree_and_restore, icon: "thumbs-down") + else + build_action(actions, :disagree, icon: "thumbs-down") + end + + if guardian.can_delete_post_or_topic?(post) + delete = + actions.add_bundle( + "#{id}-delete", + icon: "far-trash-alt", + label: "reviewables.actions.delete.title", + ) + build_action(actions, :delete_and_ignore, icon: "external-link-alt", bundle: delete) + if post.reply_count > 0 + build_action( + actions, + :delete_and_ignore_replies, + icon: "external-link-alt", + confirm: true, + bundle: delete, + ) + end + build_action(actions, :delete_and_agree, icon: "thumbs-up", bundle: delete) + if post.reply_count > 0 + build_action( + actions, + :delete_and_agree_replies, + icon: "external-link-alt", + bundle: delete, + confirm: true, + ) + end + end + + delete_user_actions(actions) if guardian.can_delete_user?(target_created_by) + + build_action(actions, :ignore, icon: "external-link-alt") + end + + def perform_agree_and_hide(performed_by, args) + post.hide!(reviewable_scores.first.reviewable_score_type) + + agree + end + + def perform_agree_and_keep(_performed_by, _args) + agree + end + + def perform_agree_and_restore(performed_by, args) + destroyer(performed_by).recover + agree + end + + def perform_disagree(performed_by, args) + # Undo hide/silence if applicable + post.unhide! if post.hidden? + + create_result(:success, :rejected) do |result| + result.update_flag_stats = { status: :disagreed, user_ids: [created_by_id] } + end + end + + def perform_ignore(performed_by, args) + create_result(:success, :ignored) do |result| + result.update_flag_stats = { status: :ignored, user_ids: [created_by_id] } + end + end + + def perform_delete_and_ignore(performed_by, args) + destroyer(performed_by).destroy + + perform_ignore(performed_by, args) + end + + def perform_delete_and_agree(performed_by, args) + destroyer(performed_by).destroy + + agree + end + + def perform_delete_and_ignore_replies(performed_by, args) + PostDestroyer.delete_with_replies(performed_by, post, self) + + perform_ignore(performed_by, args) + end + + def perform_delete_and_agree_replies(performed_by, args) + PostDestroyer.delete_with_replies(performed_by, post, self) + + agree + end + + def perform_delete_user(performed_by, args) + UserDestroyer.new(performed_by).destroy(post.user, delete_opts) + + agree + end + + def perform_delete_user_block(performed_by, args) + delete_options = delete_opts + + delete_options.merge!(block_email: true, block_ip: true) if Rails.env.production? + + UserDestroyer.new(performed_by).destroy(post.user, delete_options) + + agree + end + + private + + def post + @post ||= (target || Post.with_deleted.find_by(id: target_id)) + end + + def destroyer(performed_by) + PostDestroyer.new(performed_by, post, reviewable: self) + end + + def agree + create_result(:success, :approved) do |result| + result.update_flag_stats = { status: :agreed, user_ids: [created_by_id] } + result.recalculate_score = true + end + end + + def delete_opts + { + delete_posts: true, + prepare_for_destroy: true, + block_urls: true, + delete_as_spammer: true, + context: "review", + } + end + + def build_action( + actions, + id, + icon:, + button_class: nil, + bundle: nil, + client_action: nil, + confirm: false + ) + actions.add(id, bundle: bundle) do |action| + prefix = "reviewables.actions.#{id}" + action.icon = icon + action.button_class = button_class + action.label = "#{prefix}.title" + action.description = "#{prefix}.description" + action.client_action = client_action + action.confirm_message = "#{prefix}.confirm" if confirm + end + end +end diff --git a/app/serializers/reviewable_a_i_chat_message_serializer.rb b/app/serializers/reviewable_a_i_chat_message_serializer.rb new file mode 100644 index 00000000..8033a112 --- /dev/null +++ b/app/serializers/reviewable_a_i_chat_message_serializer.rb @@ -0,0 +1,5 @@ +# frozen_string_literal: true + +class ReviewableAIChatMessageSerializer < ReviewableChatMessageSerializer + payload_attributes :accuracies +end diff --git a/app/serializers/reviewable_a_i_post_serializer.rb b/app/serializers/reviewable_a_i_post_serializer.rb new file mode 100644 index 00000000..255cb94f --- /dev/null +++ b/app/serializers/reviewable_a_i_post_serializer.rb @@ -0,0 +1,5 @@ +# frozen_string_literal: true + +class ReviewableAIPostSerializer < ReviewableFlaggedPostSerializer + payload_attributes :accuracies +end diff --git a/assets/javascripts/.gitkeep b/assets/javascripts/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/assets/javascripts/discourse/components/model-accuracies.hbs b/assets/javascripts/discourse/components/model-accuracies.hbs new file mode 100644 index 00000000..41e5e637 --- /dev/null +++ b/assets/javascripts/discourse/components/model-accuracies.hbs @@ -0,0 +1,14 @@ +{{#if @accuracies}} + + + {{#each-in @accuracies as |model acc|}} + + + + + + + {{/each-in}} + +
{{i18n "discourse-ai.reviewables.model_used"}}{{model}}{{i18n "discourse-ai.reviewables.accuracy"}}{{acc}}%
+{{/if}} \ No newline at end of file diff --git a/assets/javascripts/discourse/components/model-accuracies.js b/assets/javascripts/discourse/components/model-accuracies.js new file mode 100644 index 00000000..57dda6ed --- /dev/null +++ b/assets/javascripts/discourse/components/model-accuracies.js @@ -0,0 +1,3 @@ +import Component from "@glimmer/component"; + +export default class ReviewableAIPost extends Component {} diff --git a/assets/javascripts/discourse/components/reviewable-aichat-message.hbs b/assets/javascripts/discourse/components/reviewable-aichat-message.hbs new file mode 100644 index 00000000..0f0f6003 --- /dev/null +++ b/assets/javascripts/discourse/components/reviewable-aichat-message.hbs @@ -0,0 +1,31 @@ +
+ + + +
+ +
+ +
+ + +
+ {{html-safe (or @reviewable.payload.message_cooked @reviewable.cooked)}} +
+ + {{yield}} + + +
+
\ No newline at end of file diff --git a/assets/javascripts/discourse/components/reviewable-aichat-message.js b/assets/javascripts/discourse/components/reviewable-aichat-message.js new file mode 100644 index 00000000..140ca4f4 --- /dev/null +++ b/assets/javascripts/discourse/components/reviewable-aichat-message.js @@ -0,0 +1,3 @@ +import Component from "@glimmer/component"; + +export default class ReviewableAIChatMessage extends Component {} diff --git a/assets/javascripts/discourse/components/reviewable-aipost.hbs b/assets/javascripts/discourse/components/reviewable-aipost.hbs new file mode 100644 index 00000000..b6bf2248 --- /dev/null +++ b/assets/javascripts/discourse/components/reviewable-aipost.hbs @@ -0,0 +1,26 @@ +
+ + +
+ +
+ +
+ +
+ {{#if @reviewable.blank_post}} +

{{i18n "review.deleted_post"}}

+ {{else}} + {{html-safe @reviewable.cooked}} + {{/if}} +
+ + {{yield}} + + +
+
\ No newline at end of file diff --git a/assets/javascripts/discourse/components/reviewable-aipost.js b/assets/javascripts/discourse/components/reviewable-aipost.js new file mode 100644 index 00000000..57dda6ed --- /dev/null +++ b/assets/javascripts/discourse/components/reviewable-aipost.js @@ -0,0 +1,3 @@ +import Component from "@glimmer/component"; + +export default class ReviewableAIPost extends Component {} diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml index 319db44f..3961751d 100644 --- a/config/locales/client.en.yml +++ b/config/locales/client.en.yml @@ -1,3 +1,12 @@ en: js: discourse-ai: + reviewables: + model_used: "Model used:" + accuracy: "Accuracy:" + review: + types: + reviewable_aipost: + title: "AI-Flagged post" + reviewable_aichat_message: + title: "AI-Flagged chat message" diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 486401eb..7435126e 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -19,3 +19,7 @@ en: ai_sentiment_inference_service_api_endpoint: "URL where the API is running for the sentiment module" ai_sentiment_inference_service_api_key: "API key for the sentiment API" ai_sentiment_models: "Models to use for inference. Sentiment classifies post on the positive/neutral/negative space. Emotion classifies on the anger/disgust/fear/joy/neutral/sadness/surprise space." + reviewables: + reasons: + flagged_by_toxicity: The AI plugin flagged this after classifying it as toxic. + flagged_by_nsfw: The AI plugin flagged this after classifying at least one of the attached images as NSFW. diff --git a/db/migrate/20230307125342_created_model_accuracy_table.rb b/db/migrate/20230307125342_created_model_accuracy_table.rb new file mode 100644 index 00000000..3222bca5 --- /dev/null +++ b/db/migrate/20230307125342_created_model_accuracy_table.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +class CreatedModelAccuracyTable < ActiveRecord::Migration[7.0] + def change + create_table :model_accuracies do |t| + t.string :model, null: false + t.string :classification_type, null: false + t.integer :flags_agreed, null: false, default: 0 + t.integer :flags_disagreed, null: false, default: 0 + + t.timestamps + end + + add_index :model_accuracies, %i[model], unique: true + end +end diff --git a/lib/discourse_ai/engine.rb b/lib/discourse_ai/engine.rb new file mode 100644 index 00000000..11a00738 --- /dev/null +++ b/lib/discourse_ai/engine.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +module DiscourseAI + class Engine < ::Rails::Engine + isolate_namespace DiscourseAI + end +end diff --git a/lib/modules/nsfw/nsfw_classification.rb b/lib/modules/nsfw/nsfw_classification.rb index 97f59e41..a63dd992 100644 --- a/lib/modules/nsfw/nsfw_classification.rb +++ b/lib/modules/nsfw/nsfw_classification.rb @@ -11,14 +11,23 @@ module DiscourseAI content_of(target).present? end - def should_flag_based_on?(classification_data) + def get_verdicts(classification_data) + classification_data + .map do |model_name, classifications| + verdict = + classifications.values.any? do |data| + send("#{model_name}_verdict?", data.except(:neutral, :target_classified_type)) + end + + [model_name, verdict] + end + .to_h + end + + def should_flag_based_on?(verdicts) return false if !SiteSetting.ai_nsfw_flag_automatically - classification_data.any? do |model_name, classifications| - classifications.values.any? do |data| - send("#{model_name}_verdict?", data.except(:neutral, :target_classified_type)) - end - end + verdicts.values.any? end def request(target_to_classify) diff --git a/lib/modules/sentiment/sentiment_classification.rb b/lib/modules/sentiment/sentiment_classification.rb index ebd40b73..76a2ba3e 100644 --- a/lib/modules/sentiment/sentiment_classification.rb +++ b/lib/modules/sentiment/sentiment_classification.rb @@ -15,7 +15,14 @@ module DiscourseAI content_of(target).present? end - def should_flag_based_on?(classification_data) + def get_verdicts(_) + available_models.reduce({}) do |memo, model| + memo[model] = false + memo + end + end + + def should_flag_based_on?(_verdicts) # We don't flag based on sentiment classification. false end diff --git a/lib/modules/toxicity/toxicity_classification.rb b/lib/modules/toxicity/toxicity_classification.rb index 66702bd6..bf4e3679 100644 --- a/lib/modules/toxicity/toxicity_classification.rb +++ b/lib/modules/toxicity/toxicity_classification.rb @@ -21,16 +21,23 @@ module DiscourseAI content_of(target).present? end - def should_flag_based_on?(classification_data) - return false if !SiteSetting.ai_toxicity_flag_automatically - + def get_verdicts(classification_data) # We only use one model for this classification. # Classification_data looks like { model_name => classification } _model_used, data = classification_data.to_a.first - CLASSIFICATION_LABELS.any? do |label| - data[label] >= SiteSetting.send("ai_toxicity_flag_threshold_#{label}") - end + verdict = + CLASSIFICATION_LABELS.any? do |label| + data[label] >= SiteSetting.send("ai_toxicity_flag_threshold_#{label}") + end + + { available_model => verdict } + end + + def should_flag_based_on?(verdicts) + return false if !SiteSetting.ai_toxicity_flag_automatically + + verdicts.values.any? end def request(target_to_classify) diff --git a/lib/shared/chat_message_classificator.rb b/lib/shared/chat_message_classificator.rb index 9255fbe8..3f49a01e 100644 --- a/lib/shared/chat_message_classificator.rb +++ b/lib/shared/chat_message_classificator.rb @@ -4,13 +4,22 @@ module ::DiscourseAI class ChatMessageClassificator < Classificator private - def flag!(chat_message, _toxic_labels) - Chat::ChatReviewQueue.new.flag_message( - chat_message, - Guardian.new(flagger), - ReviewableScore.types[:inappropriate], - queue_for_review: true, - ) + def flag!(chat_message, classification, verdicts, accuracies) + reviewable = + ReviewableAIChatMessage.needs_review!( + created_by: Discourse.system_user, + target: chat_message, + reviewable_by_moderator: true, + potential_spam: false, + payload: { + classification: classification, + accuracies: accuracies, + verdicts: verdicts, + }, + ) + reviewable.update(target_created_by: chat_message.user) + + add_score(reviewable) end end end diff --git a/lib/shared/classificator.rb b/lib/shared/classificator.rb index eecd6328..d9ae948f 100644 --- a/lib/shared/classificator.rb +++ b/lib/shared/classificator.rb @@ -14,8 +14,11 @@ module ::DiscourseAI .tap do |classification| store_classification(target, classification) - if classification_model.should_flag_based_on?(classification) - flag!(target, classification) + verdicts = classification_model.get_verdicts(classification) + + if classification_model.should_flag_based_on?(verdicts) + accuracies = get_model_accuracies(verdicts.keys) + flag!(target, classification, verdicts, accuracies) end end end @@ -24,10 +27,32 @@ module ::DiscourseAI attr_reader :classification_model - def flag!(_target, _classification) + def flag!(_target, _classification, _verdicts, _accuracies) raise NotImplemented end + def get_model_accuracies(models) + models + .map do |name| + accuracy = + ModelAccuracy.find_or_create_by( + model: name, + classification_type: classification_model.type, + ) + [name, accuracy.calculate_accuracy] + end + .to_h + end + + def add_score(reviewable) + reviewable.add_score( + Discourse.system_user, + ReviewableScore.types[:inappropriate], + reason: "flagged_by_#{classification_model.type}", + force_review: true, + ) + end + def store_classification(target, classification) attrs = classification.map do |model_name, classifications| diff --git a/lib/shared/post_classificator.rb b/lib/shared/post_classificator.rb index 0e0f86f8..974a4685 100644 --- a/lib/shared/post_classificator.rb +++ b/lib/shared/post_classificator.rb @@ -4,16 +4,23 @@ module ::DiscourseAI class PostClassificator < Classificator private - def flag!(post, classification_type) - PostActionCreator.new( - flagger, - post, - PostActionType.types[:inappropriate], - reason: classification_type, - queue_for_review: true, - ).perform + def flag!(post, classification, verdicts, accuracies) + post.hide!(ReviewableScore.types[:inappropriate]) - post.publish_change_to_clients! :acted + reviewable = + ReviewableAIPost.needs_review!( + created_by: Discourse.system_user, + target: post, + reviewable_by_moderator: true, + potential_spam: false, + payload: { + classification: classification, + accuracies: accuracies, + verdicts: verdicts, + }, + ) + + add_score(reviewable) end end end diff --git a/plugin.rb b/plugin.rb index d3d9e9fa..6f0e3549 100644 --- a/plugin.rb +++ b/plugin.rb @@ -9,13 +9,13 @@ enabled_site_setting :discourse_ai_enabled +require_relative "lib/discourse_ai/engine" + after_initialize do module ::DiscourseAI PLUGIN_NAME = "discourse-ai" end - require_relative "app/models/classification_result" - require_relative "lib/shared/inference_manager" require_relative "lib/shared/classificator" require_relative "lib/shared/post_classificator" @@ -25,14 +25,19 @@ after_initialize do require_relative "lib/modules/toxicity/entry_point" require_relative "lib/modules/sentiment/entry_point" - modules = [ + [ DiscourseAI::NSFW::EntryPoint.new, DiscourseAI::Toxicity::EntryPoint.new, DiscourseAI::Sentiment::EntryPoint.new, - ] - - modules.each do |a_module| + ].each do |a_module| a_module.load_files a_module.inject_into(self) end + + register_reviewable_type ReviewableAIChatMessage + register_reviewable_type ReviewableAIPost + + on(:reviewable_transitioned_to) do |new_status, reviewable| + ModelAccuracy.adjust_model_accuracy(new_status, reviewable) + end end diff --git a/spec/lib/modules/nsfw/jobs/regular/evaluate_post_uploads_spec.rb b/spec/lib/modules/nsfw/jobs/regular/evaluate_post_uploads_spec.rb index 718da1fe..7acd60da 100644 --- a/spec/lib/modules/nsfw/jobs/regular/evaluate_post_uploads_spec.rb +++ b/spec/lib/modules/nsfw/jobs/regular/evaluate_post_uploads_spec.rb @@ -61,7 +61,7 @@ describe Jobs::EvaluatePostUploads do it "flags and hides the post" do subject.execute({ post_id: post.id }) - expect(ReviewableFlaggedPost.where(target: post).count).to eq(1) + expect(ReviewableAIPost.where(target: post).count).to eq(1) expect(post.reload.hidden?).to eq(true) end end @@ -72,7 +72,7 @@ describe Jobs::EvaluatePostUploads do it "does nothing" do subject.execute({ post_id: post.id }) - expect(ReviewableFlaggedPost.where(target: post).count).to be_zero + expect(ReviewableAIPost.where(target: post).count).to be_zero end end end diff --git a/spec/lib/modules/nsfw/nsfw_classification_spec.rb b/spec/lib/modules/nsfw/nsfw_classification_spec.rb index 727dbc22..80b4144a 100644 --- a/spec/lib/modules/nsfw/nsfw_classification_spec.rb +++ b/spec/lib/modules/nsfw/nsfw_classification_spec.rb @@ -66,44 +66,26 @@ describe DiscourseAI::NSFW::NSFWClassification do describe "#should_flag_based_on?" do before { SiteSetting.ai_nsfw_flag_automatically = true } - let(:positive_classification) do - { - "opennsfw2" => { - 1 => NSFWInferenceStubs.negative_result("opennsfw2"), - 2 => NSFWInferenceStubs.positive_result("opennsfw2"), - }, - "nsfw_detector" => { - 1 => NSFWInferenceStubs.negative_result("nsfw_detector"), - 2 => NSFWInferenceStubs.positive_result("nsfw_detector"), - }, - } - end + let(:positive_verdict) { { "opennsfw2" => true, "nsfw_detector" => true } } - let(:negative_classification) do - { - "opennsfw2" => { - 1 => NSFWInferenceStubs.negative_result("opennsfw2"), - 2 => NSFWInferenceStubs.negative_result("opennsfw2"), - }, - } - end + let(:negative_verdict) { { "opennsfw2" => false } } it "returns false when NSFW flaggin is disabled" do SiteSetting.ai_nsfw_flag_automatically = false - should_flag = subject.should_flag_based_on?(positive_classification) + should_flag = subject.should_flag_based_on?(positive_verdict) expect(should_flag).to eq(false) end it "returns true if the response is NSFW based on our thresholds" do - should_flag = subject.should_flag_based_on?(positive_classification) + should_flag = subject.should_flag_based_on?(positive_verdict) expect(should_flag).to eq(true) end it "returns false if the response is safe based on our thresholds" do - should_flag = subject.should_flag_based_on?(negative_classification) + should_flag = subject.should_flag_based_on?(negative_verdict) expect(should_flag).to eq(false) end diff --git a/spec/lib/modules/toxicity/jobs/regular/toxicity_classify_post_spec.rb b/spec/lib/modules/toxicity/jobs/regular/toxicity_classify_post_spec.rb index f5bdd160..1d6ec03b 100644 --- a/spec/lib/modules/toxicity/jobs/regular/toxicity_classify_post_spec.rb +++ b/spec/lib/modules/toxicity/jobs/regular/toxicity_classify_post_spec.rb @@ -18,19 +18,19 @@ describe Jobs::ToxicityClassifyPost do subject.execute({ post_id: post.id }) - expect(ReviewableFlaggedPost.where(target: post).count).to be_zero + expect(ReviewableAIPost.where(target: post).count).to be_zero end it "does nothing if there's no arg called post_id" do subject.execute({}) - expect(ReviewableFlaggedPost.where(target: post).count).to be_zero + expect(ReviewableAIPost.where(target: post).count).to be_zero end it "does nothing if no post match the given id" do subject.execute({ post_id: nil }) - expect(ReviewableFlaggedPost.where(target: post).count).to be_zero + expect(ReviewableAIPost.where(target: post).count).to be_zero end it "does nothing if the post content is blank" do @@ -38,7 +38,7 @@ describe Jobs::ToxicityClassifyPost do subject.execute({ post_id: post.id }) - expect(ReviewableFlaggedPost.where(target: post).count).to be_zero + expect(ReviewableAIPost.where(target: post).count).to be_zero end end @@ -47,7 +47,7 @@ describe Jobs::ToxicityClassifyPost do subject.execute({ post_id: post.id }) - expect(ReviewableFlaggedPost.where(target: post).count).to eq(1) + expect(ReviewableAIPost.where(target: post).count).to eq(1) end end end diff --git a/spec/lib/modules/toxicity/toxicity_classification_spec.rb b/spec/lib/modules/toxicity/toxicity_classification_spec.rb index 5ad0fa36..ee379bd1 100644 --- a/spec/lib/modules/toxicity/toxicity_classification_spec.rb +++ b/spec/lib/modules/toxicity/toxicity_classification_spec.rb @@ -21,34 +21,26 @@ describe DiscourseAI::Toxicity::ToxicityClassification do describe "#should_flag_based_on?" do before { SiteSetting.ai_toxicity_flag_automatically = true } - let(:toxic_response) do - { - SiteSetting.ai_toxicity_inference_service_api_model => - ToxicityInferenceStubs.toxic_response, - } - end + let(:toxic_verdict) { { SiteSetting.ai_toxicity_inference_service_api_model => true } } it "returns false when toxicity flaggin is disabled" do SiteSetting.ai_toxicity_flag_automatically = false - should_flag = subject.should_flag_based_on?(toxic_response) + should_flag = subject.should_flag_based_on?(toxic_verdict) expect(should_flag).to eq(false) end it "returns true if the response is toxic based on our thresholds" do - should_flag = subject.should_flag_based_on?(toxic_response) + should_flag = subject.should_flag_based_on?(toxic_verdict) expect(should_flag).to eq(true) end it "returns false if the response is civilized based on our thresholds" do - civilized_response = { - SiteSetting.ai_toxicity_inference_service_api_model => - ToxicityInferenceStubs.civilized_response, - } + civilized_verdict = { SiteSetting.ai_toxicity_inference_service_api_model => false } - should_flag = subject.should_flag_based_on?(civilized_response) + should_flag = subject.should_flag_based_on?(civilized_verdict) expect(should_flag).to eq(false) end diff --git a/spec/models/model_accuracy_spec.rb b/spec/models/model_accuracy_spec.rb new file mode 100644 index 00000000..6f5ab04c --- /dev/null +++ b/spec/models/model_accuracy_spec.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +require "rails_helper" + +describe ModelAccuracy do + describe "#calculate_accuracy" do + let(:accuracy) { ModelAccuracy.new(model: "test_model", classification_type: "test") } + + it "returns 0 if we had no feedback" do + expect(accuracy.calculate_accuracy).to eq(0.0) + end + + it "returns 50 if we had mixed feedback" do + accuracy.flags_agreed = 1 + accuracy.flags_disagreed = 1 + + expect(accuracy.calculate_accuracy).to eq(50) + end + + it "always round the number" do + accuracy.flags_agreed = 1 + accuracy.flags_disagreed = 2 + + expect(accuracy.calculate_accuracy).to eq(33) + end + end + + describe ".adjust_model_accuracy" do + let!(:accuracy) { ModelAccuracy.create!(model: "test_model", classification_type: "test") } + + def build_reviewable(klass, test_model_verdict) + klass.new(payload: { "verdicts" => { "test_model" => test_model_verdict } }) + end + + it "does nothing if the reviewable is not generated by this plugin" do + reviewable = build_reviewable(ReviewableFlaggedPost, true) + + described_class.adjust_model_accuracy(:approved, reviewable) + + expect(accuracy.reload.flags_agreed).to be_zero + expect(accuracy.flags_disagreed).to be_zero + end + + it "updates the agreed flag if reviewable was approved and verdict is true" do + reviewable = build_reviewable(ReviewableAIPost, true) + + described_class.adjust_model_accuracy(:approved, reviewable) + + expect(accuracy.reload.flags_agreed).to eq(1) + expect(accuracy.flags_disagreed).to be_zero + end + + it "updates the disagreed flag if the reviewable was approved and verdict is false" do + reviewable = build_reviewable(ReviewableAIPost, false) + + described_class.adjust_model_accuracy(:approved, reviewable) + + expect(accuracy.reload.flags_agreed).to be_zero + expect(accuracy.flags_disagreed).to eq(1) + end + + it "updates the disagreed flag if reviewable was rejected and verdict is true" do + reviewable = build_reviewable(ReviewableAIPost, true) + + described_class.adjust_model_accuracy(:rejected, reviewable) + + expect(accuracy.reload.flags_agreed).to be_zero + expect(accuracy.flags_disagreed).to eq(1) + end + + it "updates the agreed flag if the reviewable was rejected and verdict is false" do + reviewable = build_reviewable(ReviewableAIPost, false) + + described_class.adjust_model_accuracy(:rejected, reviewable) + + expect(accuracy.reload.flags_agreed).to eq(1) + expect(accuracy.flags_disagreed).to be_zero + end + end +end diff --git a/spec/models/reviewable_a_i_post_spec.rb b/spec/models/reviewable_a_i_post_spec.rb new file mode 100644 index 00000000..4f1e1766 --- /dev/null +++ b/spec/models/reviewable_a_i_post_spec.rb @@ -0,0 +1,243 @@ +# frozen_string_literal: true + +require "rails_helper" + +describe ReviewableAIPost do + fab!(:target) { Fabricate(:post) } + + describe "#build_actions" do + let(:guardian) { Guardian.new } + + let(:reviewable) do + subject.tap do |r| + r.target = target + r.target_created_by = target.user + r.created_by = Discourse.system_user + end + end + + def reviewable_actions(a_guardian) + actions = Reviewable::Actions.new(reviewable, a_guardian, {}) + reviewable.build_actions(actions, a_guardian, {}) + + actions + end + + context "when the reviewable isn't pending" do + before { reviewable.status = Reviewable.statuses[:rejected] } + + it "returns no actions" do + expect(reviewable_actions(guardian)).to be_blank + end + end + + describe "actions that don't require special permissions" do + it "has the disagree action" do + expect(reviewable_actions(guardian).has?(:disagree)).to eq(true) + end + + it "has the ignore action" do + expect(reviewable_actions(guardian).has?(:ignore)).to eq(true) + end + + it "has the agree and hide or agree and keep actions" do + actions = reviewable_actions(guardian) + + expect(actions.has?(:agree_and_hide)).to eq(true) + expect(actions.has?(:agree_and_keep)).to eq(true) + expect(actions.has?(:agree_and_keep_hidden)).to eq(false) + end + + it "doesn't have the penalize actions" do + actions = reviewable_actions(guardian) + + expect(actions.has?(:agree_and_suspend)).to eq(false) + expect(actions.has?(:agree_and_silence)).to eq(false) + end + + it "doesn't has the delete + replies actions" do + actions = reviewable_actions(guardian) + + expect(actions.has?(:delete_and_ignore_replies)).to eq(false) + expect(actions.has?(:delete_and_agree_replies)).to eq(false) + end + + context "when the post is hidden" do + before { target.hide!(PostActionType.types[:inappropriate]) } + + it "can agree and keep hidden" do + actions = reviewable_actions(guardian) + + expect(actions.has?(:agree_and_hide)).to eq(false) + expect(actions.has?(:agree_and_keep)).to eq(false) + expect(actions.has?(:agree_and_keep_hidden)).to eq(true) + end + + it "has the disagree and restore action" do + actions = reviewable_actions(guardian) + + expect(actions.has?(:disagree)).to eq(false) + expect(actions.has?(:disagree_and_restore)).to eq(true) + end + end + + context "when the post was deleted by the user" do + before { target.user_deleted = true } + + it "lets you restore it but not hiding it" do + actions = reviewable_actions(guardian) + + expect(actions.has?(:agree_and_restore)).to eq(true) + expect(actions.has?(:agree_and_keep)).to eq(true) + expect(actions.has?(:agree_and_keep_hidden)).to eq(false) + expect(actions.has?(:agree_and_hide)).to eq(false) + end + end + end + + context "when the reviewer can suspend the poster" do + let(:mod_guardian) { Guardian.new(Fabricate(:moderator)) } + + it "has the penalization actions" do + actions = reviewable_actions(mod_guardian) + + expect(actions.has?(:agree_and_suspend)).to eq(true) + expect(actions.has?(:agree_and_silence)).to eq(true) + end + end + + context "when the reviewer can delete the post and topic" do + let(:mod_guardian) { Guardian.new(Fabricate(:moderator)) } + + it "has the delete + replies actions" do + target.reply_count = 3 + actions = reviewable_actions(mod_guardian) + + expect(actions.has?(:delete_and_ignore_replies)).to eq(true) + expect(actions.has?(:delete_and_agree_replies)).to eq(true) + end + end + end + + describe "#perform" do + let(:reviewable) do + described_class.needs_review!(target: target, created_by: Discourse.system_user) + end + fab!(:admin) { Fabricate(:admin) } + + before do + reviewable.add_score( + Discourse.system_user, + ReviewableScore.types[:inappropriate], + created_at: reviewable.created_at, + ) + end + + describe "agree variations" do + it "hides the topic when performing the agree_and_hide action" do + result = reviewable.perform(admin, :agree_and_hide) + + expect(result.transition_to).to eq :approved + expect(target.reload.hidden?).to eq(true) + end + + it "doesn't unhide the topic when performing the agree_and_keep_hidden action" do + target.hide!(ReviewableScore.types[:inappropriate]) + + result = reviewable.perform(admin, :agree_and_keep_hidden) + + expect(result.transition_to).to eq :approved + expect(target.reload.hidden?).to eq(true) + end + + it "un-deletes the post when performing the agree_and_restore action" do + target.update!(deleted_at: 1.minute.ago, deleted_by: target.user, user_deleted: true) + + result = reviewable.perform(admin, :agree_and_restore) + + expect(result.transition_to).to eq :approved + expect(target.reload.deleted_at).to be_nil + expect(target.user_deleted).to eq(false) + end + end + + describe "disagree variations" do + it "disagree_and_restore disagrees with the flag and unhides the post" do + target.hide!(ReviewableScore.types[:inappropriate]) + + result = reviewable.perform(admin, :disagree_and_restore) + + expect(result.transition_to).to eq :rejected + expect(target.reload.hidden?).to eq(false) + end + + it "disagree disagrees with the flag" do + result = reviewable.perform(admin, :disagree) + + expect(result.transition_to).to eq :rejected + end + end + + describe "delete post variations" do + def create_reply(post) + PostCreator.create( + Fabricate(:user), + raw: "this is the reply text", + reply_to_post_number: post.post_number, + topic_id: post.topic, + ) + end + + before { target.update!(reply_count: 1) } + + it "ignores the reviewable with delete_and_ignore" do + result = reviewable.perform(admin, :delete_and_ignore) + + expect(result.transition_to).to eq :ignored + expect(target.reload.deleted_at).to be_present + end + + it "ignores the reviewable and replies with delete_and_ignore_replies" do + reply = create_reply(target) + + result = reviewable.perform(admin, :delete_and_ignore_replies) + + expect(result.transition_to).to eq :ignored + expect(target.reload.deleted_at).to be_present + expect(reply.reload.deleted_at).to be_present + end + + it "agrees with the reviewable with delete_and_agree" do + result = reviewable.perform(admin, :delete_and_agree) + + expect(result.transition_to).to eq :approved + expect(target.reload.deleted_at).to be_present + end + + it "agrees with the reviewables and its replies with delete_and_agree_replies" do + reply = create_reply(target) + + result = reviewable.perform(admin, :delete_and_agree_replies) + + expect(result.transition_to).to eq :approved + expect(target.reload.deleted_at).to be_present + expect(reply.reload.deleted_at).to be_present + end + end + + describe "delete user variations" do + it "deletes the user and agrees with the reviewable" do + result = reviewable.perform(admin, :delete_user) + + expect(result.transition_to).to eq :approved + expect { target.user.reload }.to raise_error(ActiveRecord::RecordNotFound) + end + end + + it "ignores the reviewable" do + result = reviewable.perform(admin, :ignore) + + expect(result.transition_to).to eq :ignored + end + end +end diff --git a/spec/plugin_spec.rb b/spec/plugin_spec.rb new file mode 100644 index 00000000..801b81d8 --- /dev/null +++ b/spec/plugin_spec.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require "rails_helper" +require_relative "support/toxicity_inference_stubs" + +describe Plugin::Instance do + before { SiteSetting.discourse_ai_enabled = true } + + describe "on reviewable_transitioned_to event" do + fab!(:post) { Fabricate(:post) } + fab!(:admin) { Fabricate(:admin) } + + it "adjusts model accuracy" do + ToxicityInferenceStubs.stub_post_classification(post, toxic: true) + SiteSetting.ai_toxicity_flag_automatically = true + classification = DiscourseAI::Toxicity::ToxicityClassification.new + classificator = DiscourseAI::PostClassificator.new(classification) + classificator.classify!(post) + reviewable = ReviewableAIPost.find_by(target: post) + + reviewable.perform admin, :agree_and_keep + accuracy = ModelAccuracy.find_by(classification_type: classification.type) + + expect(accuracy.flags_agreed).to eq(1) + end + end +end diff --git a/spec/shared/chat_message_classificator_spec.rb b/spec/shared/chat_message_classificator_spec.rb index b452dd2e..ef853088 100644 --- a/spec/shared/chat_message_classificator_spec.rb +++ b/spec/shared/chat_message_classificator_spec.rb @@ -27,7 +27,7 @@ describe DiscourseAI::ChatMessageClassificator do classification.classify!(chat_message) - expect(ReviewableChatMessage.where(target: chat_message).count).to eq(1) + expect(ReviewableAIChatMessage.where(target: chat_message).count).to eq(1) end it "doesn't flags the message if the model decides we shouldn't" do @@ -35,7 +35,18 @@ describe DiscourseAI::ChatMessageClassificator do classification.classify!(chat_message) - expect(ReviewableChatMessage.where(target: chat_message).count).to be_zero + expect(ReviewableAIChatMessage.where(target: chat_message).count).to be_zero + end + + it "includes the model accuracy in the payload" do + SiteSetting.ai_toxicity_flag_automatically = true + classification.classify!(chat_message) + + reviewable = ReviewableAIChatMessage.find_by(target: chat_message) + + expect( + reviewable.payload.dig("accuracies", SiteSetting.ai_toxicity_inference_service_api_model), + ).to be_zero end end end diff --git a/spec/shared/post_classificator_spec.rb b/spec/shared/post_classificator_spec.rb index 5ba69514..e04ee4df 100644 --- a/spec/shared/post_classificator_spec.rb +++ b/spec/shared/post_classificator_spec.rb @@ -26,7 +26,7 @@ describe DiscourseAI::PostClassificator do classification.classify!(post) - expect(ReviewableFlaggedPost.where(target: post).count).to eq(1) + expect(ReviewableAIPost.where(target: post).count).to eq(1) expect(post.reload.hidden?).to eq(true) end @@ -35,7 +35,18 @@ describe DiscourseAI::PostClassificator do classification.classify!(post) - expect(ReviewableFlaggedPost.where(target: post).count).to be_zero + expect(ReviewableAIPost.where(target: post).count).to be_zero + end + + it "includes the model accuracy in the payload" do + SiteSetting.ai_toxicity_flag_automatically = true + classification.classify!(post) + + reviewable = ReviewableAIPost.find_by(target: post) + + expect( + reviewable.payload.dig("accuracies", SiteSetting.ai_toxicity_inference_service_api_model), + ).to be_zero end end end