FIX: Changes to the sentiment reports. (#289)

This PR aims to clarify sentiment reports by replacing averages with a count of posts that have one of their values above a threshold (60), meaning we have some level of confidence they are, in fact, positive or negative.

Same thing happen with post emotions, with the difference that a post can have multiple values above it (30). Additionally, we dropped the "Neutral" axis.

We also reworded the tooltip next to each report title, and added an early return to signal we have no data available instead of displaying an empty chart.
This commit is contained in:
Roman Rizzi 2023-11-09 17:23:25 -03:00 committed by GitHub
parent 0c83963a28
commit d0198c5c5b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 59 additions and 29 deletions

View File

@ -102,12 +102,12 @@ en:
reports: reports:
overall_sentiment: overall_sentiment:
title: "Overall sentiment" title: "Overall sentiment"
description: "The average percentage of positive and negative sentiments in public posts." description: "This chart compares the number of posts classified either positive or negative."
xaxis: "Positive(%)" xaxis: "Positive(%)"
yaxis: "Date" yaxis: "Date"
post_emotion: post_emotion:
title: "Post emotion" title: "Post emotion"
description: "The average percentage of emotions present in public posts grouped by the poster's trust level." description: "Number of posts classified with one of the following emotions, grouped by poster's trust level."
xaxis: xaxis:
yaxis: yaxis:

View File

@ -21,13 +21,21 @@ module DiscourseAi
plugin.add_report("overall_sentiment") do |report| plugin.add_report("overall_sentiment") do |report|
report.modes = [:stacked_chart] report.modes = [:stacked_chart]
threshold = 60
sentiment_count_sql = Proc.new { |sentiment| <<~SQL }
COUNT(
CASE WHEN (cr.classification::jsonb->'#{sentiment}')::integer > :threshold THEN 1 ELSE NULL END
) AS #{sentiment}_count
SQL
grouped_sentiments = grouped_sentiments =
DB.query(<<~SQL, report_start: report.start_date, report_end: report.end_date) DB.query(
<<~SQL,
SELECT SELECT
DATE_TRUNC('day', p.created_at)::DATE AS posted_at, DATE_TRUNC('day', p.created_at)::DATE AS posted_at,
AVG((cr.classification::jsonb->'positive')::integer) AS avg_positive, #{sentiment_count_sql.call("positive")},
-AVG((cr.classification::jsonb->'negative')::integer) AS avg_negative -#{sentiment_count_sql.call("negative")}
FROM FROM
classification_results AS cr classification_results AS cr
INNER JOIN posts p ON p.id = cr.target_id AND cr.target_type = 'Post' INNER JOIN posts p ON p.id = cr.target_id AND cr.target_type = 'Post'
@ -40,9 +48,15 @@ module DiscourseAi
(p.created_at > :report_start AND p.created_at < :report_end) (p.created_at > :report_start AND p.created_at < :report_end)
GROUP BY DATE_TRUNC('day', p.created_at) GROUP BY DATE_TRUNC('day', p.created_at)
SQL SQL
report_start: report.start_date,
report_end: report.end_date,
threshold: threshold,
)
data_points = %w[positive negative] data_points = %w[positive negative]
return report if grouped_sentiments.empty?
report.data = report.data =
data_points.map do |point| data_points.map do |point|
{ {
@ -51,7 +65,7 @@ module DiscourseAi
label: I18n.t("discourse_ai.sentiment.reports.overall_sentiment.#{point}"), label: I18n.t("discourse_ai.sentiment.reports.overall_sentiment.#{point}"),
data: data:
grouped_sentiments.map do |gs| grouped_sentiments.map do |gs|
{ x: gs.posted_at, y: gs.public_send("avg_#{point}") } { x: gs.posted_at, y: gs.public_send("#{point}_count") }
end, end,
} }
end end
@ -59,18 +73,25 @@ module DiscourseAi
plugin.add_report("post_emotion") do |report| plugin.add_report("post_emotion") do |report|
report.modes = [:radar] report.modes = [:radar]
threshold = 30
emotion_count_clause = Proc.new { |emotion| <<~SQL }
COUNT(
CASE WHEN (cr.classification::jsonb->'#{emotion}')::integer > :threshold THEN 1 ELSE NULL END
) AS #{emotion}_count
SQL
grouped_emotions = grouped_emotions =
DB.query(<<~SQL, report_start: report.start_date, report_end: report.end_date) DB.query(
<<~SQL,
SELECT SELECT
u.trust_level AS trust_level, u.trust_level AS trust_level,
AVG((cr.classification::jsonb->'sadness')::integer) AS avg_sadness, #{emotion_count_clause.call("sadness")},
AVG((cr.classification::jsonb->'surprise')::integer) AS avg_surprise, #{emotion_count_clause.call("surprise")},
AVG((cr.classification::jsonb->'neutral')::integer) AS avg_neutral, #{emotion_count_clause.call("fear")},
AVG((cr.classification::jsonb->'fear')::integer) AS avg_fear, #{emotion_count_clause.call("anger")},
AVG((cr.classification::jsonb->'anger')::integer) AS avg_anger, #{emotion_count_clause.call("joy")},
AVG((cr.classification::jsonb->'joy')::integer) AS avg_joy, #{emotion_count_clause.call("disgust")}
AVG((cr.classification::jsonb->'disgust')::integer) AS avg_disgust
FROM FROM
classification_results AS cr classification_results AS cr
INNER JOIN posts p ON p.id = cr.target_id AND cr.target_type = 'Post' INNER JOIN posts p ON p.id = cr.target_id AND cr.target_type = 'Post'
@ -84,10 +105,16 @@ module DiscourseAi
(p.created_at > :report_start AND p.created_at < :report_end) (p.created_at > :report_start AND p.created_at < :report_end)
GROUP BY u.trust_level GROUP BY u.trust_level
SQL SQL
report_start: report.start_date,
report_end: report.end_date,
threshold: threshold,
)
emotions = %w[sadness surprise neutral fear anger joy disgust] emotions = %w[sadness surprise fear anger joy disgust]
level_groups = [[0, 1], [2, 3, 4]] level_groups = [[0, 1], [2, 3, 4]]
return report if grouped_emotions.empty?
report.data = report.data =
level_groups.each_with_index.map do |lg, idx| level_groups.each_with_index.map do |lg, idx|
tl_emotion_avgs = grouped_emotions.select { |ge| lg.include?(ge.trust_level) } tl_emotion_avgs = grouped_emotions.select { |ge| lg.include?(ge.trust_level) }
@ -102,8 +129,8 @@ module DiscourseAi
x: I18n.t("discourse_ai.sentiment.reports.post_emotion.#{e}"), x: I18n.t("discourse_ai.sentiment.reports.post_emotion.#{e}"),
y: y:
tl_emotion_avgs.sum do |tl_emotion_avg| tl_emotion_avgs.sum do |tl_emotion_avg|
tl_emotion_avg.public_send("avg_#{e}").to_i tl_emotion_avg.public_send("#{e}_count").to_i
end / [tl_emotion_avgs.size, 1].max, end,
} }
end, end,
} }

View File

@ -62,7 +62,7 @@ RSpec.describe DiscourseAi::Sentiment::EntryPoint do
describe "overall_sentiment report" do describe "overall_sentiment report" do
let(:positive_classification) { { negative: 2, neutral: 30, positive: 70 } } let(:positive_classification) { { negative: 2, neutral: 30, positive: 70 } }
let(:negative_classification) { { negative: 60, neutral: 2, positive: 10 } } let(:negative_classification) { { negative: 65, neutral: 2, positive: 10 } }
def sentiment_classification(post, classification) def sentiment_classification(post, classification)
Fabricate(:sentiment_classification, target: post, classification: classification) Fabricate(:sentiment_classification, target: post, classification: classification)
@ -73,17 +73,12 @@ RSpec.describe DiscourseAi::Sentiment::EntryPoint do
sentiment_classification(post_2, negative_classification) sentiment_classification(post_2, negative_classification)
sentiment_classification(pm, positive_classification) sentiment_classification(pm, positive_classification)
expected_positive =
(positive_classification[:positive] + negative_classification[:positive]) / 2
expected_negative =
-(positive_classification[:negative] + negative_classification[:negative]) / 2
report = Report.find("overall_sentiment") report = Report.find("overall_sentiment")
positive_data_point = report.data[0][:data].first[:y].to_i positive_data_point = report.data[0][:data].first[:y].to_i
negative_data_point = report.data[1][:data].first[:y].to_i negative_data_point = report.data[1][:data].first[:y].to_i
expect(positive_data_point).to eq(expected_positive) expect(positive_data_point).to eq(1)
expect(negative_data_point).to eq(expected_negative) expect(negative_data_point).to eq(-1)
end end
end end
@ -109,17 +104,25 @@ RSpec.describe DiscourseAi::Sentiment::EntryPoint do
post_1.user.update!(trust_level: TrustLevel[0]) post_1.user.update!(trust_level: TrustLevel[0])
post_2.user.update!(trust_level: TrustLevel[3]) post_2.user.update!(trust_level: TrustLevel[3])
pm.user.update!(trust_level: TrustLevel[0]) pm.user.update!(trust_level: TrustLevel[0])
threshold = 30
emotion_classification(post_1, emotion_1) emotion_classification(post_1, emotion_1)
emotion_classification(post_2, emotion_2) emotion_classification(post_2, emotion_2)
emotion_classification(pm, emotion_2) emotion_classification(pm, emotion_2)
report = Report.find("post_emotion") report = Report.find("post_emotion")
tl_01_point = report.data[0][:data].first tl_01_point = report.data[0][:data]
tl_234_point = report.data[1][:data].first tl_234_point = report.data[1][:data]
expect(tl_01_point[:y]).to eq(emotion_1[tl_01_point[:x].downcase.to_sym]) tl_01_point.each do |point|
expect(tl_234_point[:y]).to eq(emotion_2[tl_234_point[:x].downcase.to_sym]) expected = emotion_1[point[:x].downcase.to_sym] > threshold ? 1 : 0
expect(point[:y]).to eq(expected)
end
tl_234_point.each do |point|
expected = emotion_2[point[:x].downcase.to_sym] > threshold ? 1 : 0
expect(point[:y]).to eq(expected)
end
end end
it "doesn't try to divide by zero if there are no data in a TL group" do it "doesn't try to divide by zero if there are no data in a TL group" do