diff --git a/app/serializers/ai_spam_serializer.rb b/app/serializers/ai_spam_serializer.rb index 179d828c..14f1c474 100644 --- a/app/serializers/ai_spam_serializer.rb +++ b/app/serializers/ai_spam_serializer.rb @@ -1,7 +1,13 @@ # frozen_string_literal: true class AiSpamSerializer < ApplicationSerializer - attributes :is_enabled, :llm_id, :custom_instructions, :available_llms, :stats, :flagging_username + attributes :is_enabled, + :llm_id, + :custom_instructions, + :available_llms, + :stats, + :flagging_username, + :spam_score_type def is_enabled object[:enabled] @@ -25,6 +31,10 @@ class AiSpamSerializer < ApplicationSerializer object[:flagging_username] end + def spam_score_type + ReviewableScore.types[:spam] + end + def stats { scanned_count: object[:stats].scanned_count.to_i, diff --git a/assets/javascripts/discourse/components/ai-spam.gjs b/assets/javascripts/discourse/components/ai-spam.gjs index 6c006bf0..b6646a37 100644 --- a/assets/javascripts/discourse/components/ai-spam.gjs +++ b/assets/javascripts/discourse/components/ai-spam.gjs @@ -125,9 +125,30 @@ export default class AiSpam extends Component { label: i18n("discourse_ai.spam.spam_detected"), value: this.stats.spam_detected, }; + + const falsePositives = { + label: i18n("discourse_ai.spam.false_positives"), + value: this.stats.false_positives, + tooltip: i18n("discourse_ai.spam.stat_tooltips.incorrectly_flagged"), + }; + + const falseNegatives = { + label: i18n("discourse_ai.spam.false_negatives"), + value: this.stats.false_negatives, + tooltip: i18n("discourse_ai.spam.stat_tooltips.missed_spam"), + }; + if (this.args.model.flagging_username) { detected.href = getURL( - "/review?flagged_by=" + this.args.model.flagging_username + `/review?flagged_by=${this.args.model.flagging_username}&status=all&sort_order=created_at` + ); + + falsePositives.href = getURL( + `/review?flagged_by=${this.args.model.flagging_username}&status=rejected&sort_order=created_at` + ); + + falseNegatives.href = getURL( + `/review?status=approved&sort_order=created_at&additional_filters={"ai_spam_false_negative":true}&order=created&score_type=${this.args.model.spam_score_type}` ); } return [ @@ -136,16 +157,8 @@ export default class AiSpam extends Component { value: this.stats.scanned_count, }, detected, - { - label: i18n("discourse_ai.spam.false_positives"), - value: this.stats.false_positives, - tooltip: i18n("discourse_ai.spam.stat_tooltips.incorrectly_flagged"), - }, - { - label: i18n("discourse_ai.spam.false_negatives"), - value: this.stats.false_negatives, - tooltip: i18n("discourse_ai.spam.stat_tooltips.missed_spam"), - }, + falsePositives, + falseNegatives, ]; } diff --git a/lib/ai_moderation/entry_point.rb b/lib/ai_moderation/entry_point.rb index a9655fd8..68b7f407 100644 --- a/lib/ai_moderation/entry_point.rb +++ b/lib/ai_moderation/entry_point.rb @@ -11,6 +11,25 @@ module DiscourseAi plugin.on(:site_setting_changed) do |name, _old_value, new_value| SpamScanner.ensure_flagging_user! if name == :ai_spam_detection_enabled && new_value end + + custom_filter = [ + :ai_spam_false_negative, + Proc.new do |results, value| + if value + results.where(<<~SQL) + EXISTS ( + SELECT 1 FROM ai_spam_logs + WHERE NOT is_spam + AND post_id = target_id AND target_type = 'Post' + ) + SQL + else + results + end + end, + ] + + Reviewable.add_custom_filter(custom_filter) end end end diff --git a/lib/ai_moderation/spam_report.rb b/lib/ai_moderation/spam_report.rb index 5bb3b395..f4574c19 100644 --- a/lib/ai_moderation/spam_report.rb +++ b/lib/ai_moderation/spam_report.rb @@ -14,33 +14,34 @@ module DiscourseAi asl.post_id, asl.is_spam, r.status as reviewable_status, - r.target_type, - r.potential_spam + CASE WHEN EXISTS ( + SELECT 1 FROM reviewable_scores rs + JOIN reviewables r1 ON r1.id = rs.reviewable_id + WHERE r1.target_id = asl.post_id + AND r1.target_type = 'Post' + AND rs.reviewable_score_type = :spam_score_type + AND NOT is_spam + AND r1.status IN (:spam) + ) THEN true ELSE false END AS missed_spam FROM ai_spam_logs asl LEFT JOIN reviewables r ON r.id = asl.reviewable_id WHERE asl.created_at > :min_date - ), - post_reviewables AS ( - SELECT - target_id post_id, - COUNT(DISTINCT target_id) as false_negative_count - FROM reviewables - WHERE target_type = 'Post' - AND status IN (:spam) - AND potential_spam - AND target_id IN (SELECT post_id FROM spam_stats) - GROUP BY target_id ) SELECT COUNT(*) AS scanned_count, SUM(CASE WHEN is_spam THEN 1 ELSE 0 END) AS spam_detected, COUNT(CASE WHEN reviewable_status IN (:ham) THEN 1 END) AS false_positives, - COALESCE(SUM(pr.false_negative_count), 0) AS false_negatives + COUNT(CASE WHEN missed_spam THEN 1 END) AS false_negatives FROM spam_stats - LEFT JOIN post_reviewables pr USING (post_id) SQL - DB.query(sql, spam: spam_status, ham: ham_status, min_date: min_date).first + DB.query( + sql, + spam: spam_status, + ham: ham_status, + min_date: min_date, + spam_score_type: ReviewableScore.types[:spam], + ).first end end end diff --git a/spec/requests/admin/reviewable_controller_spec.rb b/spec/requests/admin/reviewable_controller_spec.rb new file mode 100644 index 00000000..5f620eb5 --- /dev/null +++ b/spec/requests/admin/reviewable_controller_spec.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +RSpec.describe ReviewablesController do + fab!(:post1) { Fabricate(:post) } + fab!(:post2) { Fabricate(:post) } + fab!(:admin) + fab!(:llm_model) + + fab!(:reviewable) do + Reviewable.create!( + target: post1, + topic: post2.topic, + type: ReviewablePost, + created_by: admin, + status: Reviewable.statuses[:pending], + ) + end + + fab!(:reviewable2) do + Reviewable.create!( + target: post2, + topic: post2.topic, + type: ReviewablePost, + created_by: admin, + status: Reviewable.statuses[:pending], + ) + end + + fab!(:ai_spam_log_missed) do + AiSpamLog.create!(is_spam: false, post_id: post1.id, llm_model_id: llm_model.id) + end + # we amend the behavior with a custom filter so we need to confirm it works + it "properly applies custom filter" do + sign_in(admin) + + get '/review.json?additional_filters={"ai_spam_false_negative":true}' + expect(response.status).to eq(200) + + json = JSON.parse(response.body) + expect(json["reviewables"].length).to eq(1) + + get "/review.json" + expect(response.status).to eq(200) + json = JSON.parse(response.body) + expect(json["reviewables"].length).to eq(2) + end +end