FEATURE: link correctly to filters to assist in debugging spam (#1031)
- Add spam_score_type to AiSpamSerializer for better integration with reviewables. - Introduce a custom filter for detecting AI spam false negatives in moderation workflows. - Refactor spam report generation to improve identification of false negatives. - Add tests to verify the custom filter and its behavior. - Introduce links for all spam counts in report
This commit is contained in:
parent
90ce942108
commit
fae2d5ff2c
|
@ -1,7 +1,13 @@
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
class AiSpamSerializer < ApplicationSerializer
|
class AiSpamSerializer < ApplicationSerializer
|
||||||
attributes :is_enabled, :llm_id, :custom_instructions, :available_llms, :stats, :flagging_username
|
attributes :is_enabled,
|
||||||
|
:llm_id,
|
||||||
|
:custom_instructions,
|
||||||
|
:available_llms,
|
||||||
|
:stats,
|
||||||
|
:flagging_username,
|
||||||
|
:spam_score_type
|
||||||
|
|
||||||
def is_enabled
|
def is_enabled
|
||||||
object[:enabled]
|
object[:enabled]
|
||||||
|
@ -25,6 +31,10 @@ class AiSpamSerializer < ApplicationSerializer
|
||||||
object[:flagging_username]
|
object[:flagging_username]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def spam_score_type
|
||||||
|
ReviewableScore.types[:spam]
|
||||||
|
end
|
||||||
|
|
||||||
def stats
|
def stats
|
||||||
{
|
{
|
||||||
scanned_count: object[:stats].scanned_count.to_i,
|
scanned_count: object[:stats].scanned_count.to_i,
|
||||||
|
|
|
@ -125,9 +125,30 @@ export default class AiSpam extends Component {
|
||||||
label: i18n("discourse_ai.spam.spam_detected"),
|
label: i18n("discourse_ai.spam.spam_detected"),
|
||||||
value: this.stats.spam_detected,
|
value: this.stats.spam_detected,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const falsePositives = {
|
||||||
|
label: i18n("discourse_ai.spam.false_positives"),
|
||||||
|
value: this.stats.false_positives,
|
||||||
|
tooltip: i18n("discourse_ai.spam.stat_tooltips.incorrectly_flagged"),
|
||||||
|
};
|
||||||
|
|
||||||
|
const falseNegatives = {
|
||||||
|
label: i18n("discourse_ai.spam.false_negatives"),
|
||||||
|
value: this.stats.false_negatives,
|
||||||
|
tooltip: i18n("discourse_ai.spam.stat_tooltips.missed_spam"),
|
||||||
|
};
|
||||||
|
|
||||||
if (this.args.model.flagging_username) {
|
if (this.args.model.flagging_username) {
|
||||||
detected.href = getURL(
|
detected.href = getURL(
|
||||||
"/review?flagged_by=" + this.args.model.flagging_username
|
`/review?flagged_by=${this.args.model.flagging_username}&status=all&sort_order=created_at`
|
||||||
|
);
|
||||||
|
|
||||||
|
falsePositives.href = getURL(
|
||||||
|
`/review?flagged_by=${this.args.model.flagging_username}&status=rejected&sort_order=created_at`
|
||||||
|
);
|
||||||
|
|
||||||
|
falseNegatives.href = getURL(
|
||||||
|
`/review?status=approved&sort_order=created_at&additional_filters={"ai_spam_false_negative":true}&order=created&score_type=${this.args.model.spam_score_type}`
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return [
|
return [
|
||||||
|
@ -136,16 +157,8 @@ export default class AiSpam extends Component {
|
||||||
value: this.stats.scanned_count,
|
value: this.stats.scanned_count,
|
||||||
},
|
},
|
||||||
detected,
|
detected,
|
||||||
{
|
falsePositives,
|
||||||
label: i18n("discourse_ai.spam.false_positives"),
|
falseNegatives,
|
||||||
value: this.stats.false_positives,
|
|
||||||
tooltip: i18n("discourse_ai.spam.stat_tooltips.incorrectly_flagged"),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
label: i18n("discourse_ai.spam.false_negatives"),
|
|
||||||
value: this.stats.false_negatives,
|
|
||||||
tooltip: i18n("discourse_ai.spam.stat_tooltips.missed_spam"),
|
|
||||||
},
|
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,25 @@ module DiscourseAi
|
||||||
plugin.on(:site_setting_changed) do |name, _old_value, new_value|
|
plugin.on(:site_setting_changed) do |name, _old_value, new_value|
|
||||||
SpamScanner.ensure_flagging_user! if name == :ai_spam_detection_enabled && new_value
|
SpamScanner.ensure_flagging_user! if name == :ai_spam_detection_enabled && new_value
|
||||||
end
|
end
|
||||||
|
|
||||||
|
custom_filter = [
|
||||||
|
:ai_spam_false_negative,
|
||||||
|
Proc.new do |results, value|
|
||||||
|
if value
|
||||||
|
results.where(<<~SQL)
|
||||||
|
EXISTS (
|
||||||
|
SELECT 1 FROM ai_spam_logs
|
||||||
|
WHERE NOT is_spam
|
||||||
|
AND post_id = target_id AND target_type = 'Post'
|
||||||
|
)
|
||||||
|
SQL
|
||||||
|
else
|
||||||
|
results
|
||||||
|
end
|
||||||
|
end,
|
||||||
|
]
|
||||||
|
|
||||||
|
Reviewable.add_custom_filter(custom_filter)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -14,33 +14,34 @@ module DiscourseAi
|
||||||
asl.post_id,
|
asl.post_id,
|
||||||
asl.is_spam,
|
asl.is_spam,
|
||||||
r.status as reviewable_status,
|
r.status as reviewable_status,
|
||||||
r.target_type,
|
CASE WHEN EXISTS (
|
||||||
r.potential_spam
|
SELECT 1 FROM reviewable_scores rs
|
||||||
|
JOIN reviewables r1 ON r1.id = rs.reviewable_id
|
||||||
|
WHERE r1.target_id = asl.post_id
|
||||||
|
AND r1.target_type = 'Post'
|
||||||
|
AND rs.reviewable_score_type = :spam_score_type
|
||||||
|
AND NOT is_spam
|
||||||
|
AND r1.status IN (:spam)
|
||||||
|
) THEN true ELSE false END AS missed_spam
|
||||||
FROM ai_spam_logs asl
|
FROM ai_spam_logs asl
|
||||||
LEFT JOIN reviewables r ON r.id = asl.reviewable_id
|
LEFT JOIN reviewables r ON r.id = asl.reviewable_id
|
||||||
WHERE asl.created_at > :min_date
|
WHERE asl.created_at > :min_date
|
||||||
),
|
|
||||||
post_reviewables AS (
|
|
||||||
SELECT
|
|
||||||
target_id post_id,
|
|
||||||
COUNT(DISTINCT target_id) as false_negative_count
|
|
||||||
FROM reviewables
|
|
||||||
WHERE target_type = 'Post'
|
|
||||||
AND status IN (:spam)
|
|
||||||
AND potential_spam
|
|
||||||
AND target_id IN (SELECT post_id FROM spam_stats)
|
|
||||||
GROUP BY target_id
|
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
COUNT(*) AS scanned_count,
|
COUNT(*) AS scanned_count,
|
||||||
SUM(CASE WHEN is_spam THEN 1 ELSE 0 END) AS spam_detected,
|
SUM(CASE WHEN is_spam THEN 1 ELSE 0 END) AS spam_detected,
|
||||||
COUNT(CASE WHEN reviewable_status IN (:ham) THEN 1 END) AS false_positives,
|
COUNT(CASE WHEN reviewable_status IN (:ham) THEN 1 END) AS false_positives,
|
||||||
COALESCE(SUM(pr.false_negative_count), 0) AS false_negatives
|
COUNT(CASE WHEN missed_spam THEN 1 END) AS false_negatives
|
||||||
FROM spam_stats
|
FROM spam_stats
|
||||||
LEFT JOIN post_reviewables pr USING (post_id)
|
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
DB.query(sql, spam: spam_status, ham: ham_status, min_date: min_date).first
|
DB.query(
|
||||||
|
sql,
|
||||||
|
spam: spam_status,
|
||||||
|
ham: ham_status,
|
||||||
|
min_date: min_date,
|
||||||
|
spam_score_type: ReviewableScore.types[:spam],
|
||||||
|
).first
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -0,0 +1,47 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
RSpec.describe ReviewablesController do
|
||||||
|
fab!(:post1) { Fabricate(:post) }
|
||||||
|
fab!(:post2) { Fabricate(:post) }
|
||||||
|
fab!(:admin)
|
||||||
|
fab!(:llm_model)
|
||||||
|
|
||||||
|
fab!(:reviewable) do
|
||||||
|
Reviewable.create!(
|
||||||
|
target: post1,
|
||||||
|
topic: post2.topic,
|
||||||
|
type: ReviewablePost,
|
||||||
|
created_by: admin,
|
||||||
|
status: Reviewable.statuses[:pending],
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
fab!(:reviewable2) do
|
||||||
|
Reviewable.create!(
|
||||||
|
target: post2,
|
||||||
|
topic: post2.topic,
|
||||||
|
type: ReviewablePost,
|
||||||
|
created_by: admin,
|
||||||
|
status: Reviewable.statuses[:pending],
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
fab!(:ai_spam_log_missed) do
|
||||||
|
AiSpamLog.create!(is_spam: false, post_id: post1.id, llm_model_id: llm_model.id)
|
||||||
|
end
|
||||||
|
# we amend the behavior with a custom filter so we need to confirm it works
|
||||||
|
it "properly applies custom filter" do
|
||||||
|
sign_in(admin)
|
||||||
|
|
||||||
|
get '/review.json?additional_filters={"ai_spam_false_negative":true}'
|
||||||
|
expect(response.status).to eq(200)
|
||||||
|
|
||||||
|
json = JSON.parse(response.body)
|
||||||
|
expect(json["reviewables"].length).to eq(1)
|
||||||
|
|
||||||
|
get "/review.json"
|
||||||
|
expect(response.status).to eq(200)
|
||||||
|
json = JSON.parse(response.body)
|
||||||
|
expect(json["reviewables"].length).to eq(2)
|
||||||
|
end
|
||||||
|
end
|
Loading…
Reference in New Issue