discourse-ai/lib/modules/toxicity/toxicity_classification.rb

# frozen_string_literal: true

module DiscourseAi
  module Toxicity
    class ToxicityClassification
      CLASSIFICATION_LABELS = %i[
        toxicity
        severe_toxicity
        obscene
        identity_attack
        insult
        threat
        sexual_explicit
      ]

      def type
        :toxicity
      end

      def can_classify?(target)
        content_of(target).present?
      end

      def get_verdicts(classification_data)
        # We only use one model for this classification.
        # Classification_data looks like { model_name => classification }
        _model_used, data = classification_data.to_a.first

        verdict =
          CLASSIFICATION_LABELS.any? do |label|
            data[label] >= SiteSetting.send("ai_toxicity_flag_threshold_#{label}")
          end

        { available_model => verdict }
      end

      def should_flag_based_on?(verdicts)
        return false if !SiteSetting.ai_toxicity_flag_automatically

        verdicts.values.any?
      end

      def request(target_to_classify)
        data =
          ::DiscourseAi::Inference::DiscourseClassifier.perform!(
            "#{SiteSetting.ai_toxicity_inference_service_api_endpoint}/api/v1/classify",
            SiteSetting.ai_toxicity_inference_service_api_model,
            content_of(target_to_classify),
            SiteSetting.ai_toxicity_inference_service_api_key,
          )

        { available_model => data }
      end

      private

      def available_model
        SiteSetting.ai_toxicity_inference_service_api_model
      end

      def content_of(target_to_classify)
        return target_to_classify.message if target_to_classify.is_a?(Chat::Message)

        if target_to_classify.post_number == 1
          "#{target_to_classify.topic.title}\n#{target_to_classify.raw}"
        else
          target_to_classify.raw
        end
      end
    end
  end
end
REFACTOR: Streamline flag and classification process 2023-02-24 11:25:02 -05:00			`# frozen_string_literal: true`

DEV: DiscourseAI -> DiscourseAi rename to have consistent folders and files (#9) 2023-03-14 15:03:50 -04:00			`module DiscourseAi`
REFACTOR: Streamline flag and classification process 2023-02-24 11:25:02 -05:00			`module Toxicity`
			`class ToxicityClassification`
			`CLASSIFICATION_LABELS = %i[`
			`toxicity`
			`severe_toxicity`
			`obscene`
			`identity_attack`
			`insult`
			`threat`
			`sexual_explicit`
			`]`

			`def type`
			`:toxicity`
			`end`

			`def can_classify?(target)`
			`content_of(target).present?`
			`end`

FEATURE: Use dedicated reviewables for AI flags. (#4) This change adds two new reviewable types: ReviewableAIPost and ReviewableAIChatMessage. They have the same actions as their existing counterparts: ReviewableFlaggedPost and ReviewableChatMessage. We'll display the model used and their accuracy when showing these flags in the review queue and adjust the latter after staff performs an action, tracking a global accuracy per existing model in a separate table. * FEATURE: Dedicated reviewables for AI flags * Store and adjust model accuracy * Display accuracy in reviewable templates 2023-03-07 13:39:28 -05:00			`def get_verdicts(classification_data)`
REFACTOR: Streamline flag and classification process 2023-02-24 11:25:02 -05:00			`# We only use one model for this classification.`
			`# Classification_data looks like { model_name => classification }`
			`_model_used, data = classification_data.to_a.first`

FEATURE: Use dedicated reviewables for AI flags. (#4) This change adds two new reviewable types: ReviewableAIPost and ReviewableAIChatMessage. They have the same actions as their existing counterparts: ReviewableFlaggedPost and ReviewableChatMessage. We'll display the model used and their accuracy when showing these flags in the review queue and adjust the latter after staff performs an action, tracking a global accuracy per existing model in a separate table. * FEATURE: Dedicated reviewables for AI flags * Store and adjust model accuracy * Display accuracy in reviewable templates 2023-03-07 13:39:28 -05:00			`verdict =`
			`CLASSIFICATION_LABELS.any? do \|label\|`
			`data[label] >= SiteSetting.send("ai_toxicity_flag_threshold_#{label}")`
			`end`

			`{ available_model => verdict }`
			`end`

			`def should_flag_based_on?(verdicts)`
			`return false if !SiteSetting.ai_toxicity_flag_automatically`

			`verdicts.values.any?`
REFACTOR: Streamline flag and classification process 2023-02-24 11:25:02 -05:00			`end`

			`def request(target_to_classify)`
			`data =`
DEV: DiscourseAI -> DiscourseAi rename to have consistent folders and files (#9) 2023-03-14 15:03:50 -04:00			`::DiscourseAi::Inference::DiscourseClassifier.perform!(`
REFACTOR: Streamline flag and classification process 2023-02-24 11:25:02 -05:00			`"#{SiteSetting.ai_toxicity_inference_service_api_endpoint}/api/v1/classify",`
			`SiteSetting.ai_toxicity_inference_service_api_model,`
			`content_of(target_to_classify),`
			`SiteSetting.ai_toxicity_inference_service_api_key,`
			`)`

DEV: Dedicated table for saving classification results (#1) 2023-02-27 14:21:40 -05:00			`{ available_model => data }`
REFACTOR: Streamline flag and classification process 2023-02-24 11:25:02 -05:00			`end`

			`private`

DEV: Dedicated table for saving classification results (#1) 2023-02-27 14:21:40 -05:00			`def available_model`
			`SiteSetting.ai_toxicity_inference_service_api_model`
			`end`

REFACTOR: Streamline flag and classification process 2023-02-24 11:25:02 -05:00			`def content_of(target_to_classify)`
DEV: applies chat namespacing (#12) 2023-03-17 10:15:38 -04:00			`return target_to_classify.message if target_to_classify.is_a?(Chat::Message)`
REFACTOR: Streamline flag and classification process 2023-02-24 11:25:02 -05:00
			`if target_to_classify.post_number == 1`
			`"#{target_to_classify.topic.title}\n#{target_to_classify.raw}"`
			`else`
			`target_to_classify.raw`
			`end`
			`end`
			`end`
			`end`
			`end`