FIX: Changes to the sentiment reports. (#289)

This PR aims to clarify sentiment reports by replacing averages with a count of posts that have one of their values above a threshold (60), meaning we have some level of confidence they are, in fact, positive or negative.

Same thing happen with post emotions, with the difference that a post can have multiple values above it (30). Additionally, we dropped the "Neutral" axis.

We also reworded the tooltip next to each report title, and added an early return to signal we have no data available instead of displaying an empty chart.
This commit is contained in:
Roman Rizzi 2023-11-09 17:23:25 -03:00 committed by GitHub
parent 0c83963a28
commit d0198c5c5b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 59 additions and 29 deletions

View File

@ -102,12 +102,12 @@ en:
reports:
overall_sentiment:
title: "Overall sentiment"
description: "The average percentage of positive and negative sentiments in public posts."
description: "This chart compares the number of posts classified either positive or negative."
xaxis: "Positive(%)"
yaxis: "Date"
post_emotion:
title: "Post emotion"
description: "The average percentage of emotions present in public posts grouped by the poster's trust level."
description: "Number of posts classified with one of the following emotions, grouped by poster's trust level."
xaxis:
yaxis:

View File

@ -21,13 +21,21 @@ module DiscourseAi
plugin.add_report("overall_sentiment") do |report|
report.modes = [:stacked_chart]
threshold = 60
sentiment_count_sql = Proc.new { |sentiment| <<~SQL }
COUNT(
CASE WHEN (cr.classification::jsonb->'#{sentiment}')::integer > :threshold THEN 1 ELSE NULL END
) AS #{sentiment}_count
SQL
grouped_sentiments =
DB.query(<<~SQL, report_start: report.start_date, report_end: report.end_date)
DB.query(
<<~SQL,
SELECT
DATE_TRUNC('day', p.created_at)::DATE AS posted_at,
AVG((cr.classification::jsonb->'positive')::integer) AS avg_positive,
-AVG((cr.classification::jsonb->'negative')::integer) AS avg_negative
#{sentiment_count_sql.call("positive")},
-#{sentiment_count_sql.call("negative")}
FROM
classification_results AS cr
INNER JOIN posts p ON p.id = cr.target_id AND cr.target_type = 'Post'
@ -40,9 +48,15 @@ module DiscourseAi
(p.created_at > :report_start AND p.created_at < :report_end)
GROUP BY DATE_TRUNC('day', p.created_at)
SQL
report_start: report.start_date,
report_end: report.end_date,
threshold: threshold,
)
data_points = %w[positive negative]
return report if grouped_sentiments.empty?
report.data =
data_points.map do |point|
{
@ -51,7 +65,7 @@ module DiscourseAi
label: I18n.t("discourse_ai.sentiment.reports.overall_sentiment.#{point}"),
data:
grouped_sentiments.map do |gs|
{ x: gs.posted_at, y: gs.public_send("avg_#{point}") }
{ x: gs.posted_at, y: gs.public_send("#{point}_count") }
end,
}
end
@ -59,18 +73,25 @@ module DiscourseAi
plugin.add_report("post_emotion") do |report|
report.modes = [:radar]
threshold = 30
emotion_count_clause = Proc.new { |emotion| <<~SQL }
COUNT(
CASE WHEN (cr.classification::jsonb->'#{emotion}')::integer > :threshold THEN 1 ELSE NULL END
) AS #{emotion}_count
SQL
grouped_emotions =
DB.query(<<~SQL, report_start: report.start_date, report_end: report.end_date)
DB.query(
<<~SQL,
SELECT
u.trust_level AS trust_level,
AVG((cr.classification::jsonb->'sadness')::integer) AS avg_sadness,
AVG((cr.classification::jsonb->'surprise')::integer) AS avg_surprise,
AVG((cr.classification::jsonb->'neutral')::integer) AS avg_neutral,
AVG((cr.classification::jsonb->'fear')::integer) AS avg_fear,
AVG((cr.classification::jsonb->'anger')::integer) AS avg_anger,
AVG((cr.classification::jsonb->'joy')::integer) AS avg_joy,
AVG((cr.classification::jsonb->'disgust')::integer) AS avg_disgust
#{emotion_count_clause.call("sadness")},
#{emotion_count_clause.call("surprise")},
#{emotion_count_clause.call("fear")},
#{emotion_count_clause.call("anger")},
#{emotion_count_clause.call("joy")},
#{emotion_count_clause.call("disgust")}
FROM
classification_results AS cr
INNER JOIN posts p ON p.id = cr.target_id AND cr.target_type = 'Post'
@ -84,10 +105,16 @@ module DiscourseAi
(p.created_at > :report_start AND p.created_at < :report_end)
GROUP BY u.trust_level
SQL
report_start: report.start_date,
report_end: report.end_date,
threshold: threshold,
)
emotions = %w[sadness surprise neutral fear anger joy disgust]
emotions = %w[sadness surprise fear anger joy disgust]
level_groups = [[0, 1], [2, 3, 4]]
return report if grouped_emotions.empty?
report.data =
level_groups.each_with_index.map do |lg, idx|
tl_emotion_avgs = grouped_emotions.select { |ge| lg.include?(ge.trust_level) }
@ -102,8 +129,8 @@ module DiscourseAi
x: I18n.t("discourse_ai.sentiment.reports.post_emotion.#{e}"),
y:
tl_emotion_avgs.sum do |tl_emotion_avg|
tl_emotion_avg.public_send("avg_#{e}").to_i
end / [tl_emotion_avgs.size, 1].max,
tl_emotion_avg.public_send("#{e}_count").to_i
end,
}
end,
}

View File

@ -62,7 +62,7 @@ RSpec.describe DiscourseAi::Sentiment::EntryPoint do
describe "overall_sentiment report" do
let(:positive_classification) { { negative: 2, neutral: 30, positive: 70 } }
let(:negative_classification) { { negative: 60, neutral: 2, positive: 10 } }
let(:negative_classification) { { negative: 65, neutral: 2, positive: 10 } }
def sentiment_classification(post, classification)
Fabricate(:sentiment_classification, target: post, classification: classification)
@ -73,17 +73,12 @@ RSpec.describe DiscourseAi::Sentiment::EntryPoint do
sentiment_classification(post_2, negative_classification)
sentiment_classification(pm, positive_classification)
expected_positive =
(positive_classification[:positive] + negative_classification[:positive]) / 2
expected_negative =
-(positive_classification[:negative] + negative_classification[:negative]) / 2
report = Report.find("overall_sentiment")
positive_data_point = report.data[0][:data].first[:y].to_i
negative_data_point = report.data[1][:data].first[:y].to_i
expect(positive_data_point).to eq(expected_positive)
expect(negative_data_point).to eq(expected_negative)
expect(positive_data_point).to eq(1)
expect(negative_data_point).to eq(-1)
end
end
@ -109,17 +104,25 @@ RSpec.describe DiscourseAi::Sentiment::EntryPoint do
post_1.user.update!(trust_level: TrustLevel[0])
post_2.user.update!(trust_level: TrustLevel[3])
pm.user.update!(trust_level: TrustLevel[0])
threshold = 30
emotion_classification(post_1, emotion_1)
emotion_classification(post_2, emotion_2)
emotion_classification(pm, emotion_2)
report = Report.find("post_emotion")
tl_01_point = report.data[0][:data].first
tl_234_point = report.data[1][:data].first
tl_01_point = report.data[0][:data]
tl_234_point = report.data[1][:data]
expect(tl_01_point[:y]).to eq(emotion_1[tl_01_point[:x].downcase.to_sym])
expect(tl_234_point[:y]).to eq(emotion_2[tl_234_point[:x].downcase.to_sym])
tl_01_point.each do |point|
expected = emotion_1[point[:x].downcase.to_sym] > threshold ? 1 : 0
expect(point[:y]).to eq(expected)
end
tl_234_point.each do |point|
expected = emotion_2[point[:x].downcase.to_sym] > threshold ? 1 : 0
expect(point[:y]).to eq(expected)
end
end
it "doesn't try to divide by zero if there are no data in a TL group" do