FEATURE: Use dedicated reviewables for AI flags. (#4)

This change adds two new reviewable types: ReviewableAIPost and ReviewableAIChatMessage. They have the same actions as their existing counterparts: ReviewableFlaggedPost and ReviewableChatMessage. We'll display the model used and their accuracy when showing these flags in the review queue and adjust the latter after staff performs an action, tracking a global accuracy per existing model in a separate table. * FEATURE: Dedicated reviewables for AI flags * Store and adjust model accuracy * Display accuracy in reviewable templates
2023-03-07 15:39:28 -03:00 · 2023-03-07 15:39:28 -03:00 · a838116cd5
parent 676d3ce6b2
commit a838116cd5
32 changed files with 870 additions and 85 deletions
--- a/app/models/model_accuracy.rb
+++ b/app/models/model_accuracy.rb
@ -0,0 +1,35 @@
+# frozen_string_literal: true
+
+class ModelAccuracy < ActiveRecord::Base
+  def self.adjust_model_accuracy(new_status, reviewable)
+    return unless %i[approved rejected].include?(new_status)
+    return unless [ReviewableAIPost, ReviewableAIChatMessage].include?(reviewable.class)
+
+    verdicts = reviewable.payload.to_h["verdicts"] || {}
+
+    verdicts.each do |model_name, verdict|
+      accuracy_model = find_by(model: model_name)
+
+      attribute =
+        if verdict
+          new_status == :approved ? :flags_agreed : :flags_disagreed
+        else
+          new_status == :rejected ? :flags_agreed : :flags_disagreed
+        end
+
+      accuracy_model.increment!(attribute)
+    end
+  end
+
+  def calculate_accuracy
+    return 0 if total_flags.zero?
+
+    (flags_agreed * 100) / total_flags
+  end
+
+  private
+
+  def total_flags
+    flags_agreed + flags_disagreed
+  end
+end
--- a/app/models/reviewable_a_i_chat_message.rb
+++ b/app/models/reviewable_a_i_chat_message.rb
@ -0,0 +1,4 @@
+# frozen_string_literal: true
+
+class ReviewableAIChatMessage < ReviewableChatMessage
+end
--- a/app/models/reviewable_a_i_post.rb
+++ b/app/models/reviewable_a_i_post.rb
@ -0,0 +1,205 @@
+# frozen_string_literal:true
+
+class ReviewableAIPost < Reviewable
+  # Penalties are handled by the modal after the action is performed
+  def self.action_aliases
+    {
+      agree_and_keep_hidden: :agree_and_keep,
+      agree_and_silence: :agree_and_keep,
+      agree_and_suspend: :agree_and_keep,
+      disagree_and_restore: :disagree,
+    }
+  end
+
+  def build_actions(actions, guardian, args)
+    return actions if !pending? || post.blank?
+
+    agree =
+      actions.add_bundle("#{id}-agree", icon: "thumbs-up", label: "reviewables.actions.agree.title")
+
+    if !post.user_deleted? && !post.hidden?
+      build_action(actions, :agree_and_hide, icon: "far-eye-slash", bundle: agree)
+    end
+
+    if post.hidden?
+      build_action(actions, :agree_and_keep_hidden, icon: "thumbs-up", bundle: agree)
+    else
+      build_action(actions, :agree_and_keep, icon: "thumbs-up", bundle: agree)
+    end
+
+    if guardian.can_suspend?(target_created_by)
+      build_action(
+        actions,
+        :agree_and_suspend,
+        icon: "ban",
+        bundle: agree,
+        client_action: "suspend",
+      )
+      build_action(
+        actions,
+        :agree_and_silence,
+        icon: "microphone-slash",
+        bundle: agree,
+        client_action: "silence",
+      )
+    end
+
+    build_action(actions, :agree_and_restore, icon: "far-eye", bundle: agree) if post.user_deleted?
+
+    if post.hidden?
+      build_action(actions, :disagree_and_restore, icon: "thumbs-down")
+    else
+      build_action(actions, :disagree, icon: "thumbs-down")
+    end
+
+    if guardian.can_delete_post_or_topic?(post)
+      delete =
+        actions.add_bundle(
+          "#{id}-delete",
+          icon: "far-trash-alt",
+          label: "reviewables.actions.delete.title",
+        )
+      build_action(actions, :delete_and_ignore, icon: "external-link-alt", bundle: delete)
+      if post.reply_count > 0
+        build_action(
+          actions,
+          :delete_and_ignore_replies,
+          icon: "external-link-alt",
+          confirm: true,
+          bundle: delete,
+        )
+      end
+      build_action(actions, :delete_and_agree, icon: "thumbs-up", bundle: delete)
+      if post.reply_count > 0
+        build_action(
+          actions,
+          :delete_and_agree_replies,
+          icon: "external-link-alt",
+          bundle: delete,
+          confirm: true,
+        )
+      end
+    end
+
+    delete_user_actions(actions) if guardian.can_delete_user?(target_created_by)
+
+    build_action(actions, :ignore, icon: "external-link-alt")
+  end
+
+  def perform_agree_and_hide(performed_by, args)
+    post.hide!(reviewable_scores.first.reviewable_score_type)
+
+    agree
+  end
+
+  def perform_agree_and_keep(_performed_by, _args)
+    agree
+  end
+
+  def perform_agree_and_restore(performed_by, args)
+    destroyer(performed_by).recover
+    agree
+  end
+
+  def perform_disagree(performed_by, args)
+    # Undo hide/silence if applicable
+    post.unhide! if post.hidden?
+
+    create_result(:success, :rejected) do |result|
+      result.update_flag_stats = { status: :disagreed, user_ids: [created_by_id] }
+    end
+  end
+
+  def perform_ignore(performed_by, args)
+    create_result(:success, :ignored) do |result|
+      result.update_flag_stats = { status: :ignored, user_ids: [created_by_id] }
+    end
+  end
+
+  def perform_delete_and_ignore(performed_by, args)
+    destroyer(performed_by).destroy
+
+    perform_ignore(performed_by, args)
+  end
+
+  def perform_delete_and_agree(performed_by, args)
+    destroyer(performed_by).destroy
+
+    agree
+  end
+
+  def perform_delete_and_ignore_replies(performed_by, args)
+    PostDestroyer.delete_with_replies(performed_by, post, self)
+
+    perform_ignore(performed_by, args)
+  end
+
+  def perform_delete_and_agree_replies(performed_by, args)
+    PostDestroyer.delete_with_replies(performed_by, post, self)
+
+    agree
+  end
+
+  def perform_delete_user(performed_by, args)
+    UserDestroyer.new(performed_by).destroy(post.user, delete_opts)
+
+    agree
+  end
+
+  def perform_delete_user_block(performed_by, args)
+    delete_options = delete_opts
+
+    delete_options.merge!(block_email: true, block_ip: true) if Rails.env.production?
+
+    UserDestroyer.new(performed_by).destroy(post.user, delete_options)
+
+    agree
+  end
+
+  private
+
+  def post
+    @post ||= (target || Post.with_deleted.find_by(id: target_id))
+  end
+
+  def destroyer(performed_by)
+    PostDestroyer.new(performed_by, post, reviewable: self)
+  end
+
+  def agree
+    create_result(:success, :approved) do |result|
+      result.update_flag_stats = { status: :agreed, user_ids: [created_by_id] }
+      result.recalculate_score = true
+    end
+  end
+
+  def delete_opts
+    {
+      delete_posts: true,
+      prepare_for_destroy: true,
+      block_urls: true,
+      delete_as_spammer: true,
+      context: "review",
+    }
+  end
+
+  def build_action(
+    actions,
+    id,
+    icon:,
+    button_class: nil,
+    bundle: nil,
+    client_action: nil,
+    confirm: false
+  )
+    actions.add(id, bundle: bundle) do |action|
+      prefix = "reviewables.actions.#{id}"
+      action.icon = icon
+      action.button_class = button_class
+      action.label = "#{prefix}.title"
+      action.description = "#{prefix}.description"
+      action.client_action = client_action
+      action.confirm_message = "#{prefix}.confirm" if confirm
+    end
+  end
+end
--- a/app/serializers/reviewable_a_i_chat_message_serializer.rb
+++ b/app/serializers/reviewable_a_i_chat_message_serializer.rb
@ -0,0 +1,5 @@
+# frozen_string_literal: true
+
+class ReviewableAIChatMessageSerializer < ReviewableChatMessageSerializer
+  payload_attributes :accuracies
+end
--- a/app/serializers/reviewable_a_i_post_serializer.rb
+++ b/app/serializers/reviewable_a_i_post_serializer.rb
@ -0,0 +1,5 @@
+# frozen_string_literal: true
+
+class ReviewableAIPostSerializer < ReviewableFlaggedPostSerializer
+  payload_attributes :accuracies
+end
--- a/assets/javascripts/.gitkeep
+++ b/assets/javascripts/.gitkeep
--- a/assets/javascripts/discourse/components/model-accuracies.hbs
+++ b/assets/javascripts/discourse/components/model-accuracies.hbs
@ -0,0 +1,14 @@
+{{#if @accuracies}}
+  <table class="reviewable-scores">
+    <tbody>
+      {{#each-in @accuracies as |model acc|}}
+        <tr>
+          <td colspan="4">{{i18n "discourse-ai.reviewables.model_used"}}</td>
+          <td colspan="3">{{model}}</td>
+          <td colspan="4">{{i18n "discourse-ai.reviewables.accuracy"}}</td>
+          <td colspan="3">{{acc}}%</td>
+        </tr>
+      {{/each-in}}
+    </tbody>
+  </table>
+{{/if}}
--- a/assets/javascripts/discourse/components/model-accuracies.js
+++ b/assets/javascripts/discourse/components/model-accuracies.js
@ -0,0 +1,3 @@
+import Component from "@glimmer/component";
+
+export default class ReviewableAIPost extends Component {}
--- a/assets/javascripts/discourse/components/reviewable-aichat-message.hbs
+++ b/assets/javascripts/discourse/components/reviewable-aichat-message.hbs
@ -0,0 +1,31 @@
+<div class="flagged-post-header">
+  <LinkTo
+    @route="chat.channel.near-message"
+    @models={{array
+      this.chatChannel.slugifiedTitle
+      this.chatChannel.id
+      @reviewable.target_id
+    }}
+  >
+    <ChatChannelTitle @channel={{this.chatChannel}} />
+  </LinkTo>
+</div>
+
+<div class="post-contents-wrapper">
+  <ReviewableCreatedBy @user={{@reviewable.target_created_by}} @tagName="" />
+  <div class="post-contents">
+    <ReviewablePostHeader
+      @reviewable={{@reviewable}}
+      @createdBy={{@reviewable.target_created_by}}
+      @tagName=""
+    />
+
+    <div class="post-body">
+      {{html-safe (or @reviewable.payload.message_cooked @reviewable.cooked)}}
+    </div>
+
+    {{yield}}
+
+    <ModelAccuracies @accuracies={{@reviewable.payload.accuracies}} />
+  </div>
+</div>
--- a/assets/javascripts/discourse/components/reviewable-aichat-message.js
+++ b/assets/javascripts/discourse/components/reviewable-aichat-message.js
@ -0,0 +1,3 @@
+import Component from "@glimmer/component";
+
+export default class ReviewableAIChatMessage extends Component {}
--- a/assets/javascripts/discourse/components/reviewable-aipost.hbs
+++ b/assets/javascripts/discourse/components/reviewable-aipost.hbs
@ -0,0 +1,26 @@
+<div class="flagged-post-header">
+  <ReviewableTopicLink @reviewable={{@reviewable}} @tagName="" />
+  <ReviewablePostEdits @reviewable={{@reviewable}} @tagName="" />
+</div>
+
+<div class="post-contents-wrapper">
+  <ReviewableCreatedBy @user={{@reviewable.target_created_by}} @tagName="" />
+  <div class="post-contents">
+    <ReviewablePostHeader
+      @reviewable={{@reviewable}}
+      @createdBy={{@reviewable.target_created_by}}
+      @tagName=""
+    />
+    <div class="post-body">
+      {{#if @reviewable.blank_post}}
+        <p>{{i18n "review.deleted_post"}}</p>
+      {{else}}
+        {{html-safe @reviewable.cooked}}
+      {{/if}}
+    </div>
+
+    {{yield}}
+
+    <ModelAccuracies @accuracies={{@reviewable.payload.accuracies}} />
+  </div>
+</div>
--- a/assets/javascripts/discourse/components/reviewable-aipost.js
+++ b/assets/javascripts/discourse/components/reviewable-aipost.js
@ -0,0 +1,3 @@
+import Component from "@glimmer/component";
+
+export default class ReviewableAIPost extends Component {}
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -1,3 +1,12 @@
 en:
  js:
    discourse-ai:
+      reviewables:
+        model_used: "Model used:"
+        accuracy: "Accuracy:"
+    review:
+      types:
+        reviewable_aipost:
+          title: "AI-Flagged post"
+        reviewable_aichat_message:
+          title: "AI-Flagged chat message"
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@ -19,3 +19,7 @@ en:
    ai_sentiment_inference_service_api_endpoint: "URL where the API is running for the sentiment module"
    ai_sentiment_inference_service_api_key: "API key for the sentiment API"
    ai_sentiment_models: "Models to use for inference. Sentiment classifies post on the positive/neutral/negative space. Emotion classifies on the anger/disgust/fear/joy/neutral/sadness/surprise space."
+  reviewables:
+      reasons:
+        flagged_by_toxicity: The AI plugin flagged this after classifying it as toxic.
+        flagged_by_nsfw: The AI plugin flagged this after classifying at least one of the attached images as NSFW.
--- a/db/migrate/20230307125342_created_model_accuracy_table.rb
+++ b/db/migrate/20230307125342_created_model_accuracy_table.rb
@ -0,0 +1,16 @@
+# frozen_string_literal: true
+
+class CreatedModelAccuracyTable < ActiveRecord::Migration[7.0]
+  def change
+    create_table :model_accuracies do |t|
+      t.string :model, null: false
+      t.string :classification_type, null: false
+      t.integer :flags_agreed, null: false, default: 0
+      t.integer :flags_disagreed, null: false, default: 0
+
+      t.timestamps
+    end
+
+    add_index :model_accuracies, %i[model], unique: true
+  end
+end
--- a/lib/discourse_ai/engine.rb
+++ b/lib/discourse_ai/engine.rb
@ -0,0 +1,7 @@
+# frozen_string_literal: true
+
+module DiscourseAI
+  class Engine < ::Rails::Engine
+    isolate_namespace DiscourseAI
+  end
+end
--- a/lib/modules/nsfw/nsfw_classification.rb
+++ b/lib/modules/nsfw/nsfw_classification.rb
@ -11,14 +11,23 @@ module DiscourseAI
        content_of(target).present?
      end

-      def should_flag_based_on?(classification_data)
+      def get_verdicts(classification_data)
+        classification_data
+          .map do |model_name, classifications|
+            verdict =
+              classifications.values.any? do |data|
+                send("#{model_name}_verdict?", data.except(:neutral, :target_classified_type))
+              end
+
+            [model_name, verdict]
+          end
+          .to_h
+      end
+
+      def should_flag_based_on?(verdicts)
        return false if !SiteSetting.ai_nsfw_flag_automatically

-        classification_data.any? do |model_name, classifications|
-          classifications.values.any? do |data|
-            send("#{model_name}_verdict?", data.except(:neutral, :target_classified_type))
-          end
-        end
+        verdicts.values.any?
      end

      def request(target_to_classify)
--- a/lib/modules/sentiment/sentiment_classification.rb
+++ b/lib/modules/sentiment/sentiment_classification.rb
@ -15,7 +15,14 @@ module DiscourseAI
        content_of(target).present?
      end

-      def should_flag_based_on?(classification_data)
+      def get_verdicts(_)
+        available_models.reduce({}) do |memo, model|
+          memo[model] = false
+          memo
+        end
+      end
+
+      def should_flag_based_on?(_verdicts)
        # We don't flag based on sentiment classification.
        false
      end
--- a/lib/modules/toxicity/toxicity_classification.rb
+++ b/lib/modules/toxicity/toxicity_classification.rb
@ -21,16 +21,23 @@ module DiscourseAI
        content_of(target).present?
      end

-      def should_flag_based_on?(classification_data)
-        return false if !SiteSetting.ai_toxicity_flag_automatically
-
+      def get_verdicts(classification_data)
        # We only use one model for this classification.
        # Classification_data looks like { model_name => classification }
        _model_used, data = classification_data.to_a.first

-        CLASSIFICATION_LABELS.any? do |label|
-          data[label] >= SiteSetting.send("ai_toxicity_flag_threshold_#{label}")
-        end
+        verdict =
+          CLASSIFICATION_LABELS.any? do |label|
+            data[label] >= SiteSetting.send("ai_toxicity_flag_threshold_#{label}")
+          end
+
+        { available_model => verdict }
+      end
+
+      def should_flag_based_on?(verdicts)
+        return false if !SiteSetting.ai_toxicity_flag_automatically
+
+        verdicts.values.any?
      end

      def request(target_to_classify)
--- a/lib/shared/chat_message_classificator.rb
+++ b/lib/shared/chat_message_classificator.rb
@ -4,13 +4,22 @@ module ::DiscourseAI
  class ChatMessageClassificator < Classificator
    private

-    def flag!(chat_message, _toxic_labels)
-      Chat::ChatReviewQueue.new.flag_message(
-        chat_message,
-        Guardian.new(flagger),
-        ReviewableScore.types[:inappropriate],
-        queue_for_review: true,
-      )
+    def flag!(chat_message, classification, verdicts, accuracies)
+      reviewable =
+        ReviewableAIChatMessage.needs_review!(
+          created_by: Discourse.system_user,
+          target: chat_message,
+          reviewable_by_moderator: true,
+          potential_spam: false,
+          payload: {
+            classification: classification,
+            accuracies: accuracies,
+            verdicts: verdicts,
+          },
+        )
+      reviewable.update(target_created_by: chat_message.user)
+
+      add_score(reviewable)
    end
  end
 end
--- a/lib/shared/classificator.rb
+++ b/lib/shared/classificator.rb
@ -14,8 +14,11 @@ module ::DiscourseAI
        .tap do |classification|
          store_classification(target, classification)

-          if classification_model.should_flag_based_on?(classification)
-            flag!(target, classification)
+          verdicts = classification_model.get_verdicts(classification)
+
+          if classification_model.should_flag_based_on?(verdicts)
+            accuracies = get_model_accuracies(verdicts.keys)
+            flag!(target, classification, verdicts, accuracies)
          end
        end
    end
@ -24,10 +27,32 @@ module ::DiscourseAI

    attr_reader :classification_model

-    def flag!(_target, _classification)
+    def flag!(_target, _classification, _verdicts, _accuracies)
      raise NotImplemented
    end

+    def get_model_accuracies(models)
+      models
+        .map do |name|
+          accuracy =
+            ModelAccuracy.find_or_create_by(
+              model: name,
+              classification_type: classification_model.type,
+            )
+          [name, accuracy.calculate_accuracy]
+        end
+        .to_h
+    end
+
+    def add_score(reviewable)
+      reviewable.add_score(
+        Discourse.system_user,
+        ReviewableScore.types[:inappropriate],
+        reason: "flagged_by_#{classification_model.type}",
+        force_review: true,
+      )
+    end
+
    def store_classification(target, classification)
      attrs =
        classification.map do |model_name, classifications|
--- a/lib/shared/post_classificator.rb
+++ b/lib/shared/post_classificator.rb
@ -4,16 +4,23 @@ module ::DiscourseAI
  class PostClassificator < Classificator
    private

-    def flag!(post, classification_type)
-      PostActionCreator.new(
-        flagger,
-        post,
-        PostActionType.types[:inappropriate],
-        reason: classification_type,
-        queue_for_review: true,
-      ).perform
+    def flag!(post, classification, verdicts, accuracies)
+      post.hide!(ReviewableScore.types[:inappropriate])

-      post.publish_change_to_clients! :acted
+      reviewable =
+        ReviewableAIPost.needs_review!(
+          created_by: Discourse.system_user,
+          target: post,
+          reviewable_by_moderator: true,
+          potential_spam: false,
+          payload: {
+            classification: classification,
+            accuracies: accuracies,
+            verdicts: verdicts,
+          },
+        )
+
+      add_score(reviewable)
    end
  end
 end
--- a/plugin.rb
+++ b/plugin.rb
@ -9,13 +9,13 @@

 enabled_site_setting :discourse_ai_enabled

+require_relative "lib/discourse_ai/engine"
+
 after_initialize do
  module ::DiscourseAI
    PLUGIN_NAME = "discourse-ai"
  end

-  require_relative "app/models/classification_result"
-
  require_relative "lib/shared/inference_manager"
  require_relative "lib/shared/classificator"
  require_relative "lib/shared/post_classificator"
@ -25,14 +25,19 @@ after_initialize do
  require_relative "lib/modules/toxicity/entry_point"
  require_relative "lib/modules/sentiment/entry_point"

-  modules = [
+  [
    DiscourseAI::NSFW::EntryPoint.new,
    DiscourseAI::Toxicity::EntryPoint.new,
    DiscourseAI::Sentiment::EntryPoint.new,
-  ]
-
-  modules.each do |a_module|
+  ].each do |a_module|
    a_module.load_files
    a_module.inject_into(self)
  end
+
+  register_reviewable_type ReviewableAIChatMessage
+  register_reviewable_type ReviewableAIPost
+
+  on(:reviewable_transitioned_to) do |new_status, reviewable|
+    ModelAccuracy.adjust_model_accuracy(new_status, reviewable)
+  end
 end
--- a/spec/lib/modules/nsfw/jobs/regular/evaluate_post_uploads_spec.rb
+++ b/spec/lib/modules/nsfw/jobs/regular/evaluate_post_uploads_spec.rb
@ -61,7 +61,7 @@ describe Jobs::EvaluatePostUploads do
        it "flags and hides the post" do
          subject.execute({ post_id: post.id })

-          expect(ReviewableFlaggedPost.where(target: post).count).to eq(1)
+          expect(ReviewableAIPost.where(target: post).count).to eq(1)
          expect(post.reload.hidden?).to eq(true)
        end
      end
@ -72,7 +72,7 @@ describe Jobs::EvaluatePostUploads do
        it "does nothing" do
          subject.execute({ post_id: post.id })

-          expect(ReviewableFlaggedPost.where(target: post).count).to be_zero
+          expect(ReviewableAIPost.where(target: post).count).to be_zero
        end
      end
    end
--- a/spec/lib/modules/nsfw/nsfw_classification_spec.rb
+++ b/spec/lib/modules/nsfw/nsfw_classification_spec.rb
@ -66,44 +66,26 @@ describe DiscourseAI::NSFW::NSFWClassification do
  describe "#should_flag_based_on?" do
    before { SiteSetting.ai_nsfw_flag_automatically = true }

-    let(:positive_classification) do
-      {
-        "opennsfw2" => {
-          1 => NSFWInferenceStubs.negative_result("opennsfw2"),
-          2 => NSFWInferenceStubs.positive_result("opennsfw2"),
-        },
-        "nsfw_detector" => {
-          1 => NSFWInferenceStubs.negative_result("nsfw_detector"),
-          2 => NSFWInferenceStubs.positive_result("nsfw_detector"),
-        },
-      }
-    end
+    let(:positive_verdict) { { "opennsfw2" => true, "nsfw_detector" => true } }

-    let(:negative_classification) do
-      {
-        "opennsfw2" => {
-          1 => NSFWInferenceStubs.negative_result("opennsfw2"),
-          2 => NSFWInferenceStubs.negative_result("opennsfw2"),
-        },
-      }
-    end
+    let(:negative_verdict) { { "opennsfw2" => false } }

    it "returns false when NSFW flaggin is disabled" do
      SiteSetting.ai_nsfw_flag_automatically = false

-      should_flag = subject.should_flag_based_on?(positive_classification)
+      should_flag = subject.should_flag_based_on?(positive_verdict)

      expect(should_flag).to eq(false)
    end

    it "returns true if the response is NSFW based on our thresholds" do
-      should_flag = subject.should_flag_based_on?(positive_classification)
+      should_flag = subject.should_flag_based_on?(positive_verdict)

      expect(should_flag).to eq(true)
    end

    it "returns false if the response is safe based on our thresholds" do
-      should_flag = subject.should_flag_based_on?(negative_classification)
+      should_flag = subject.should_flag_based_on?(negative_verdict)

      expect(should_flag).to eq(false)
    end
--- a/spec/lib/modules/toxicity/jobs/regular/toxicity_classify_post_spec.rb
+++ b/spec/lib/modules/toxicity/jobs/regular/toxicity_classify_post_spec.rb
@ -18,19 +18,19 @@ describe Jobs::ToxicityClassifyPost do

        subject.execute({ post_id: post.id })

-        expect(ReviewableFlaggedPost.where(target: post).count).to be_zero
+        expect(ReviewableAIPost.where(target: post).count).to be_zero
      end

      it "does nothing if there's no arg called post_id" do
        subject.execute({})

-        expect(ReviewableFlaggedPost.where(target: post).count).to be_zero
+        expect(ReviewableAIPost.where(target: post).count).to be_zero
      end

      it "does nothing if no post match the given id" do
        subject.execute({ post_id: nil })

-        expect(ReviewableFlaggedPost.where(target: post).count).to be_zero
+        expect(ReviewableAIPost.where(target: post).count).to be_zero
      end

      it "does nothing if the post content is blank" do
@ -38,7 +38,7 @@ describe Jobs::ToxicityClassifyPost do

        subject.execute({ post_id: post.id })

-        expect(ReviewableFlaggedPost.where(target: post).count).to be_zero
+        expect(ReviewableAIPost.where(target: post).count).to be_zero
      end
    end

@ -47,7 +47,7 @@ describe Jobs::ToxicityClassifyPost do

      subject.execute({ post_id: post.id })

-      expect(ReviewableFlaggedPost.where(target: post).count).to eq(1)
+      expect(ReviewableAIPost.where(target: post).count).to eq(1)
    end
  end
 end
--- a/spec/lib/modules/toxicity/toxicity_classification_spec.rb
+++ b/spec/lib/modules/toxicity/toxicity_classification_spec.rb
@ -21,34 +21,26 @@ describe DiscourseAI::Toxicity::ToxicityClassification do
  describe "#should_flag_based_on?" do
    before { SiteSetting.ai_toxicity_flag_automatically = true }

-    let(:toxic_response) do
-      {
-        SiteSetting.ai_toxicity_inference_service_api_model =>
-          ToxicityInferenceStubs.toxic_response,
-      }
-    end
+    let(:toxic_verdict) { { SiteSetting.ai_toxicity_inference_service_api_model => true } }

    it "returns false when toxicity flaggin is disabled" do
      SiteSetting.ai_toxicity_flag_automatically = false

-      should_flag = subject.should_flag_based_on?(toxic_response)
+      should_flag = subject.should_flag_based_on?(toxic_verdict)

      expect(should_flag).to eq(false)
    end

    it "returns true if the response is toxic based on our thresholds" do
-      should_flag = subject.should_flag_based_on?(toxic_response)
+      should_flag = subject.should_flag_based_on?(toxic_verdict)

      expect(should_flag).to eq(true)
    end

    it "returns false if the response is civilized based on our thresholds" do
-      civilized_response = {
-        SiteSetting.ai_toxicity_inference_service_api_model =>
-          ToxicityInferenceStubs.civilized_response,
-      }
+      civilized_verdict = { SiteSetting.ai_toxicity_inference_service_api_model => false }

-      should_flag = subject.should_flag_based_on?(civilized_response)
+      should_flag = subject.should_flag_based_on?(civilized_verdict)

      expect(should_flag).to eq(false)
    end
--- a/spec/models/model_accuracy_spec.rb
+++ b/spec/models/model_accuracy_spec.rb
@ -0,0 +1,80 @@
+# frozen_string_literal: true
+
+require "rails_helper"
+
+describe ModelAccuracy do
+  describe "#calculate_accuracy" do
+    let(:accuracy) { ModelAccuracy.new(model: "test_model", classification_type: "test") }
+
+    it "returns 0 if we had no feedback" do
+      expect(accuracy.calculate_accuracy).to eq(0.0)
+    end
+
+    it "returns 50 if we had mixed feedback" do
+      accuracy.flags_agreed = 1
+      accuracy.flags_disagreed = 1
+
+      expect(accuracy.calculate_accuracy).to eq(50)
+    end
+
+    it "always round the number" do
+      accuracy.flags_agreed = 1
+      accuracy.flags_disagreed = 2
+
+      expect(accuracy.calculate_accuracy).to eq(33)
+    end
+  end
+
+  describe ".adjust_model_accuracy" do
+    let!(:accuracy) { ModelAccuracy.create!(model: "test_model", classification_type: "test") }
+
+    def build_reviewable(klass, test_model_verdict)
+      klass.new(payload: { "verdicts" => { "test_model" => test_model_verdict } })
+    end
+
+    it "does nothing if the reviewable is not generated by this plugin" do
+      reviewable = build_reviewable(ReviewableFlaggedPost, true)
+
+      described_class.adjust_model_accuracy(:approved, reviewable)
+
+      expect(accuracy.reload.flags_agreed).to be_zero
+      expect(accuracy.flags_disagreed).to be_zero
+    end
+
+    it "updates the agreed flag if reviewable was approved and verdict is true" do
+      reviewable = build_reviewable(ReviewableAIPost, true)
+
+      described_class.adjust_model_accuracy(:approved, reviewable)
+
+      expect(accuracy.reload.flags_agreed).to eq(1)
+      expect(accuracy.flags_disagreed).to be_zero
+    end
+
+    it "updates the disagreed flag if the reviewable was approved and verdict is false" do
+      reviewable = build_reviewable(ReviewableAIPost, false)
+
+      described_class.adjust_model_accuracy(:approved, reviewable)
+
+      expect(accuracy.reload.flags_agreed).to be_zero
+      expect(accuracy.flags_disagreed).to eq(1)
+    end
+
+    it "updates the disagreed flag if reviewable was rejected and verdict is true" do
+      reviewable = build_reviewable(ReviewableAIPost, true)
+
+      described_class.adjust_model_accuracy(:rejected, reviewable)
+
+      expect(accuracy.reload.flags_agreed).to be_zero
+      expect(accuracy.flags_disagreed).to eq(1)
+    end
+
+    it "updates the agreed flag if the reviewable was rejected and verdict is false" do
+      reviewable = build_reviewable(ReviewableAIPost, false)
+
+      described_class.adjust_model_accuracy(:rejected, reviewable)
+
+      expect(accuracy.reload.flags_agreed).to eq(1)
+      expect(accuracy.flags_disagreed).to be_zero
+    end
+  end
+end
--- a/spec/models/reviewable_a_i_post_spec.rb
+++ b/spec/models/reviewable_a_i_post_spec.rb
@ -0,0 +1,243 @@
+# frozen_string_literal: true
+
+require "rails_helper"
+
+describe ReviewableAIPost do
+  fab!(:target) { Fabricate(:post) }
+
+  describe "#build_actions" do
+    let(:guardian) { Guardian.new }
+
+    let(:reviewable) do
+      subject.tap do |r|
+        r.target = target
+        r.target_created_by = target.user
+        r.created_by = Discourse.system_user
+      end
+    end
+
+    def reviewable_actions(a_guardian)
+      actions = Reviewable::Actions.new(reviewable, a_guardian, {})
+      reviewable.build_actions(actions, a_guardian, {})
+
+      actions
+    end
+
+    context "when the reviewable isn't pending" do
+      before { reviewable.status = Reviewable.statuses[:rejected] }
+
+      it "returns no actions" do
+        expect(reviewable_actions(guardian)).to be_blank
+      end
+    end
+
+    describe "actions that don't require special permissions" do
+      it "has the disagree action" do
+        expect(reviewable_actions(guardian).has?(:disagree)).to eq(true)
+      end
+
+      it "has the ignore action" do
+        expect(reviewable_actions(guardian).has?(:ignore)).to eq(true)
+      end
+
+      it "has the agree and hide or agree and keep actions" do
+        actions = reviewable_actions(guardian)
+
+        expect(actions.has?(:agree_and_hide)).to eq(true)
+        expect(actions.has?(:agree_and_keep)).to eq(true)
+        expect(actions.has?(:agree_and_keep_hidden)).to eq(false)
+      end
+
+      it "doesn't have the penalize actions" do
+        actions = reviewable_actions(guardian)
+
+        expect(actions.has?(:agree_and_suspend)).to eq(false)
+        expect(actions.has?(:agree_and_silence)).to eq(false)
+      end
+
+      it "doesn't has the delete + replies actions" do
+        actions = reviewable_actions(guardian)
+
+        expect(actions.has?(:delete_and_ignore_replies)).to eq(false)
+        expect(actions.has?(:delete_and_agree_replies)).to eq(false)
+      end
+
+      context "when the post is hidden" do
+        before { target.hide!(PostActionType.types[:inappropriate]) }
+
+        it "can agree and keep hidden" do
+          actions = reviewable_actions(guardian)
+
+          expect(actions.has?(:agree_and_hide)).to eq(false)
+          expect(actions.has?(:agree_and_keep)).to eq(false)
+          expect(actions.has?(:agree_and_keep_hidden)).to eq(true)
+        end
+
+        it "has the disagree and restore action" do
+          actions = reviewable_actions(guardian)
+
+          expect(actions.has?(:disagree)).to eq(false)
+          expect(actions.has?(:disagree_and_restore)).to eq(true)
+        end
+      end
+
+      context "when the post was deleted by the user" do
+        before { target.user_deleted = true }
+
+        it "lets you restore it but not hiding it" do
+          actions = reviewable_actions(guardian)
+
+          expect(actions.has?(:agree_and_restore)).to eq(true)
+          expect(actions.has?(:agree_and_keep)).to eq(true)
+          expect(actions.has?(:agree_and_keep_hidden)).to eq(false)
+          expect(actions.has?(:agree_and_hide)).to eq(false)
+        end
+      end
+    end
+
+    context "when the reviewer can suspend the poster" do
+      let(:mod_guardian) { Guardian.new(Fabricate(:moderator)) }
+
+      it "has the penalization actions" do
+        actions = reviewable_actions(mod_guardian)
+
+        expect(actions.has?(:agree_and_suspend)).to eq(true)
+        expect(actions.has?(:agree_and_silence)).to eq(true)
+      end
+    end
+
+    context "when the reviewer can delete the post and topic" do
+      let(:mod_guardian) { Guardian.new(Fabricate(:moderator)) }
+
+      it "has the delete + replies actions" do
+        target.reply_count = 3
+        actions = reviewable_actions(mod_guardian)
+
+        expect(actions.has?(:delete_and_ignore_replies)).to eq(true)
+        expect(actions.has?(:delete_and_agree_replies)).to eq(true)
+      end
+    end
+  end
+
+  describe "#perform" do
+    let(:reviewable) do
+      described_class.needs_review!(target: target, created_by: Discourse.system_user)
+    end
+    fab!(:admin) { Fabricate(:admin) }
+
+    before do
+      reviewable.add_score(
+        Discourse.system_user,
+        ReviewableScore.types[:inappropriate],
+        created_at: reviewable.created_at,
+      )
+    end
+
+    describe "agree variations" do
+      it "hides the topic when performing the agree_and_hide action" do
+        result = reviewable.perform(admin, :agree_and_hide)
+
+        expect(result.transition_to).to eq :approved
+        expect(target.reload.hidden?).to eq(true)
+      end
+
+      it "doesn't unhide the topic when performing the agree_and_keep_hidden action" do
+        target.hide!(ReviewableScore.types[:inappropriate])
+
+        result = reviewable.perform(admin, :agree_and_keep_hidden)
+
+        expect(result.transition_to).to eq :approved
+        expect(target.reload.hidden?).to eq(true)
+      end
+
+      it "un-deletes the post when performing the agree_and_restore action" do
+        target.update!(deleted_at: 1.minute.ago, deleted_by: target.user, user_deleted: true)
+
+        result = reviewable.perform(admin, :agree_and_restore)
+
+        expect(result.transition_to).to eq :approved
+        expect(target.reload.deleted_at).to be_nil
+        expect(target.user_deleted).to eq(false)
+      end
+    end
+
+    describe "disagree variations" do
+      it "disagree_and_restore disagrees with the flag and unhides the post" do
+        target.hide!(ReviewableScore.types[:inappropriate])
+
+        result = reviewable.perform(admin, :disagree_and_restore)
+
+        expect(result.transition_to).to eq :rejected
+        expect(target.reload.hidden?).to eq(false)
+      end
+
+      it "disagree disagrees with the flag" do
+        result = reviewable.perform(admin, :disagree)
+
+        expect(result.transition_to).to eq :rejected
+      end
+    end
+
+    describe "delete post variations" do
+      def create_reply(post)
+        PostCreator.create(
+          Fabricate(:user),
+          raw: "this is the reply text",
+          reply_to_post_number: post.post_number,
+          topic_id: post.topic,
+        )
+      end
+
+      before { target.update!(reply_count: 1) }
+
+      it "ignores the reviewable with delete_and_ignore" do
+        result = reviewable.perform(admin, :delete_and_ignore)
+
+        expect(result.transition_to).to eq :ignored
+        expect(target.reload.deleted_at).to be_present
+      end
+
+      it "ignores the reviewable and replies with delete_and_ignore_replies" do
+        reply = create_reply(target)
+
+        result = reviewable.perform(admin, :delete_and_ignore_replies)
+
+        expect(result.transition_to).to eq :ignored
+        expect(target.reload.deleted_at).to be_present
+        expect(reply.reload.deleted_at).to be_present
+      end
+
+      it "agrees with the reviewable with delete_and_agree" do
+        result = reviewable.perform(admin, :delete_and_agree)
+
+        expect(result.transition_to).to eq :approved
+        expect(target.reload.deleted_at).to be_present
+      end
+
+      it "agrees with the reviewables and its replies with delete_and_agree_replies" do
+        reply = create_reply(target)
+
+        result = reviewable.perform(admin, :delete_and_agree_replies)
+
+        expect(result.transition_to).to eq :approved
+        expect(target.reload.deleted_at).to be_present
+        expect(reply.reload.deleted_at).to be_present
+      end
+    end
+
+    describe "delete user variations" do
+      it "deletes the user and agrees with the reviewable" do
+        result = reviewable.perform(admin, :delete_user)
+
+        expect(result.transition_to).to eq :approved
+        expect { target.user.reload }.to raise_error(ActiveRecord::RecordNotFound)
+      end
+    end
+
+    it "ignores the reviewable" do
+      result = reviewable.perform(admin, :ignore)
+
+      expect(result.transition_to).to eq :ignored
+    end
+  end
+end
--- a/spec/plugin_spec.rb
+++ b/spec/plugin_spec.rb
@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+require "rails_helper"
+require_relative "support/toxicity_inference_stubs"
+
+describe Plugin::Instance do
+  before { SiteSetting.discourse_ai_enabled = true }
+
+  describe "on reviewable_transitioned_to event" do
+    fab!(:post) { Fabricate(:post) }
+    fab!(:admin) { Fabricate(:admin) }
+
+    it "adjusts model accuracy" do
+      ToxicityInferenceStubs.stub_post_classification(post, toxic: true)
+      SiteSetting.ai_toxicity_flag_automatically = true
+      classification = DiscourseAI::Toxicity::ToxicityClassification.new
+      classificator = DiscourseAI::PostClassificator.new(classification)
+      classificator.classify!(post)
+      reviewable = ReviewableAIPost.find_by(target: post)
+
+      reviewable.perform admin, :agree_and_keep
+      accuracy = ModelAccuracy.find_by(classification_type: classification.type)
+
+      expect(accuracy.flags_agreed).to eq(1)
+    end
+  end
+end
--- a/spec/shared/chat_message_classificator_spec.rb
+++ b/spec/shared/chat_message_classificator_spec.rb
@ -27,7 +27,7 @@ describe DiscourseAI::ChatMessageClassificator do

      classification.classify!(chat_message)

-      expect(ReviewableChatMessage.where(target: chat_message).count).to eq(1)
+      expect(ReviewableAIChatMessage.where(target: chat_message).count).to eq(1)
    end

    it "doesn't flags the message if the model decides we shouldn't" do
@ -35,7 +35,18 @@ describe DiscourseAI::ChatMessageClassificator do

      classification.classify!(chat_message)

-      expect(ReviewableChatMessage.where(target: chat_message).count).to be_zero
+      expect(ReviewableAIChatMessage.where(target: chat_message).count).to be_zero
+    end
+
+    it "includes the model accuracy in the payload" do
+      SiteSetting.ai_toxicity_flag_automatically = true
+      classification.classify!(chat_message)
+
+      reviewable = ReviewableAIChatMessage.find_by(target: chat_message)
+
+      expect(
+        reviewable.payload.dig("accuracies", SiteSetting.ai_toxicity_inference_service_api_model),
+      ).to be_zero
    end
  end
 end
--- a/spec/shared/post_classificator_spec.rb
+++ b/spec/shared/post_classificator_spec.rb
@ -26,7 +26,7 @@ describe DiscourseAI::PostClassificator do

      classification.classify!(post)

-      expect(ReviewableFlaggedPost.where(target: post).count).to eq(1)
+      expect(ReviewableAIPost.where(target: post).count).to eq(1)
      expect(post.reload.hidden?).to eq(true)
    end

@ -35,7 +35,18 @@ describe DiscourseAI::PostClassificator do

      classification.classify!(post)

-      expect(ReviewableFlaggedPost.where(target: post).count).to be_zero
+      expect(ReviewableAIPost.where(target: post).count).to be_zero
+    end
+
+    it "includes the model accuracy in the payload" do
+      SiteSetting.ai_toxicity_flag_automatically = true
+      classification.classify!(post)
+
+      reviewable = ReviewableAIPost.find_by(target: post)
+
+      expect(
+        reviewable.payload.dig("accuracies", SiteSetting.ai_toxicity_inference_service_api_model),
+      ).to be_zero
    end
  end
 end