discourse-ai/lib/toxicity/toxicity_classification.rb

# frozen_string_literal: true

module DiscourseAi
  module Toxicity
    class ToxicityClassification
      CLASSIFICATION_LABELS = %i[
        toxicity
        severe_toxicity
        obscene
        identity_attack
        insult
        threat
        sexual_explicit
      ]

      def type
        :toxicity
      end

      def can_classify?(target)
        content_of(target).present?
      end

      def get_verdicts(classification_data)
        # We only use one model for this classification.
        # Classification_data looks like { model_name => classification }
        _model_used, data = classification_data.to_a.first

        verdict =
          CLASSIFICATION_LABELS.any? do |label|
            data[label] >= SiteSetting.send("ai_toxicity_flag_threshold_#{label}")
          end

        { available_model => verdict }
      end

      def should_flag_based_on?(verdicts)
        return false if !SiteSetting.ai_toxicity_flag_automatically

        verdicts.values.any?
      end

      def request(target_to_classify)
        data =
          ::DiscourseAi::Inference::DiscourseClassifier.perform!(
            "#{endpoint}/api/v1/classify",
            SiteSetting.ai_toxicity_inference_service_api_model,
            content_of(target_to_classify),
            SiteSetting.ai_toxicity_inference_service_api_key,
          )

        { available_model => data }
      end

      private

      def available_model
        SiteSetting.ai_toxicity_inference_service_api_model
      end

      def content_of(target_to_classify)
        return target_to_classify.message if target_to_classify.is_a?(Chat::Message)

        if target_to_classify.post_number == 1
          "#{target_to_classify.topic.title}\n#{target_to_classify.raw}"
        else
          target_to_classify.raw
        end
      end

      def endpoint
        if SiteSetting.ai_toxicity_inference_service_api_endpoint_srv.present?
          service =
            DiscourseAi::Utils::DnsSrv.lookup(
              SiteSetting.ai_toxicity_inference_service_api_endpoint_srv,
            )
          "https://#{service.target}:#{service.port}"
        else
          SiteSetting.ai_toxicity_inference_service_api_endpoint
        end
      end
    end
  end
end
REFACTOR: Streamline flag and classification process 2023-02-24 11:25:02 -05:00			`# frozen_string_literal: true`

DEV: DiscourseAI -> DiscourseAi rename to have consistent folders and files (#9) 2023-03-14 15:03:50 -04:00			`module DiscourseAi`
REFACTOR: Streamline flag and classification process 2023-02-24 11:25:02 -05:00			`module Toxicity`
			`class ToxicityClassification`
			`CLASSIFICATION_LABELS = %i[`
			`toxicity`
			`severe_toxicity`
			`obscene`
			`identity_attack`
			`insult`
			`threat`
			`sexual_explicit`
			`]`

			`def type`
			`:toxicity`
			`end`

			`def can_classify?(target)`
			`content_of(target).present?`
			`end`

FEATURE: Use dedicated reviewables for AI flags. (#4) This change adds two new reviewable types: ReviewableAIPost and ReviewableAIChatMessage. They have the same actions as their existing counterparts: ReviewableFlaggedPost and ReviewableChatMessage. We'll display the model used and their accuracy when showing these flags in the review queue and adjust the latter after staff performs an action, tracking a global accuracy per existing model in a separate table. * FEATURE: Dedicated reviewables for AI flags * Store and adjust model accuracy * Display accuracy in reviewable templates 2023-03-07 13:39:28 -05:00			`def get_verdicts(classification_data)`
REFACTOR: Streamline flag and classification process 2023-02-24 11:25:02 -05:00			`# We only use one model for this classification.`
			`# Classification_data looks like { model_name => classification }`
			`_model_used, data = classification_data.to_a.first`

FEATURE: Use dedicated reviewables for AI flags. (#4) This change adds two new reviewable types: ReviewableAIPost and ReviewableAIChatMessage. They have the same actions as their existing counterparts: ReviewableFlaggedPost and ReviewableChatMessage. We'll display the model used and their accuracy when showing these flags in the review queue and adjust the latter after staff performs an action, tracking a global accuracy per existing model in a separate table. * FEATURE: Dedicated reviewables for AI flags * Store and adjust model accuracy * Display accuracy in reviewable templates 2023-03-07 13:39:28 -05:00			`verdict =`
			`CLASSIFICATION_LABELS.any? do \|label\|`
			`data[label] >= SiteSetting.send("ai_toxicity_flag_threshold_#{label}")`
			`end`

			`{ available_model => verdict }`
			`end`

			`def should_flag_based_on?(verdicts)`
			`return false if !SiteSetting.ai_toxicity_flag_automatically`

			`verdicts.values.any?`
REFACTOR: Streamline flag and classification process 2023-02-24 11:25:02 -05:00			`end`

			`def request(target_to_classify)`
			`data =`
DEV: DiscourseAI -> DiscourseAi rename to have consistent folders and files (#9) 2023-03-14 15:03:50 -04:00			`::DiscourseAi::Inference::DiscourseClassifier.perform!(`
FEATURE: Support for SRV records for Discourse services (#414) This allows admins to configure services with multiple backends using DNS SRV records. This PR also adds support for shared secret auth via headers for TEI and vLLM endpoints, so they are inline with the other ones. 2024-01-10 17:23:07 -05:00			`"#{endpoint}/api/v1/classify",`
REFACTOR: Streamline flag and classification process 2023-02-24 11:25:02 -05:00			`SiteSetting.ai_toxicity_inference_service_api_model,`
			`content_of(target_to_classify),`
			`SiteSetting.ai_toxicity_inference_service_api_key,`
			`)`

DEV: Dedicated table for saving classification results (#1) 2023-02-27 14:21:40 -05:00			`{ available_model => data }`
REFACTOR: Streamline flag and classification process 2023-02-24 11:25:02 -05:00			`end`

			`private`

DEV: Dedicated table for saving classification results (#1) 2023-02-27 14:21:40 -05:00			`def available_model`
			`SiteSetting.ai_toxicity_inference_service_api_model`
			`end`

REFACTOR: Streamline flag and classification process 2023-02-24 11:25:02 -05:00			`def content_of(target_to_classify)`
DEV: applies chat namespacing (#12) 2023-03-17 10:15:38 -04:00			`return target_to_classify.message if target_to_classify.is_a?(Chat::Message)`
REFACTOR: Streamline flag and classification process 2023-02-24 11:25:02 -05:00
			`if target_to_classify.post_number == 1`
			`"#{target_to_classify.topic.title}\n#{target_to_classify.raw}"`
			`else`
			`target_to_classify.raw`
			`end`
			`end`
FEATURE: Support for SRV records for Discourse services (#414) This allows admins to configure services with multiple backends using DNS SRV records. This PR also adds support for shared secret auth via headers for TEI and vLLM endpoints, so they are inline with the other ones. 2024-01-10 17:23:07 -05:00
			`def endpoint`
			`if SiteSetting.ai_toxicity_inference_service_api_endpoint_srv.present?`
			`service =`
			`DiscourseAi::Utils::DnsSrv.lookup(`
			`SiteSetting.ai_toxicity_inference_service_api_endpoint_srv,`
			`)`
			`"https://#{service.target}:#{service.port}"`
			`else`
			`SiteSetting.ai_toxicity_inference_service_api_endpoint`
			`end`
			`end`
REFACTOR: Streamline flag and classification process 2023-02-24 11:25:02 -05:00			`end`
			`end`
			`end`