FIX: Changes to the sentiment reports. (#289)

This PR aims to clarify sentiment reports by replacing averages with a count of posts that have one of their values above a threshold (60), meaning we have some level of confidence they are, in fact, positive or negative. Same thing happen with post emotions, with the difference that a post can have multiple values above it (30). Additionally, we dropped the "Neutral" axis. We also reworded the tooltip next to each report title, and added an early return to signal we have no data available instead of displaying an empty chart.
2023-11-09 17:23:25 -03:00 · 2023-11-09 17:23:25 -03:00 · d0198c5c5b
parent 0c83963a28
commit d0198c5c5b
3 changed files with 59 additions and 29 deletions
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@ -102,12 +102,12 @@ en:
  reports:
    overall_sentiment:
      title: "Overall sentiment"
-      description: "The average percentage of positive and negative sentiments in public posts."
+      description: "This chart compares the number of posts classified either positive or negative."
      xaxis: "Positive(%)"
      yaxis: "Date"
    post_emotion:
      title: "Post emotion"
-      description: "The average percentage of emotions present in public posts grouped by the poster's trust level."
+      description: "Number of posts classified with one of the following emotions, grouped by poster's trust level."
      xaxis:
      yaxis:
--- a/lib/modules/sentiment/entry_point.rb
+++ b/lib/modules/sentiment/entry_point.rb
@ -21,13 +21,21 @@ module DiscourseAi
        plugin.add_report("overall_sentiment") do |report|
          report.modes = [:stacked_chart]
          threshold = 60
          sentiment_count_sql = Proc.new { |sentiment| <<~SQL }
            COUNT(
              CASE WHEN (cr.classification::jsonb->'#{sentiment}')::integer > :threshold THEN 1 ELSE NULL END
            ) AS #{sentiment}_count
          SQL
          grouped_sentiments =
-            DB.query(<<~SQL, report_start: report.start_date, report_end: report.end_date)
+            DB.query(
              <<~SQL,
            SELECT 
              DATE_TRUNC('day', p.created_at)::DATE AS posted_at,
-              AVG((cr.classification::jsonb->'positive')::integer) AS avg_positive,
+              #{sentiment_count_sql.call("positive")},
-              -AVG((cr.classification::jsonb->'negative')::integer) AS avg_negative
+              -#{sentiment_count_sql.call("negative")}
            FROM 
              classification_results AS cr
            INNER JOIN posts p ON p.id = cr.target_id AND cr.target_type = 'Post'
@ -40,9 +48,15 @@ module DiscourseAi
              (p.created_at > :report_start AND p.created_at < :report_end)
            GROUP BY DATE_TRUNC('day', p.created_at)
          SQL
              report_start: report.start_date,
              report_end: report.end_date,
              threshold: threshold,
            )
          data_points = %w[positive negative]
          return report if grouped_sentiments.empty?
          report.data =
            data_points.map do |point|
              {
@ -51,7 +65,7 @@ module DiscourseAi
                label: I18n.t("discourse_ai.sentiment.reports.overall_sentiment.#{point}"),
                data:
                  grouped_sentiments.map do |gs|
-                    { x: gs.posted_at, y: gs.public_send("avg_#{point}") }
+                    { x: gs.posted_at, y: gs.public_send("#{point}_count") }
                  end,
              }
            end
@ -59,18 +73,25 @@ module DiscourseAi
        plugin.add_report("post_emotion") do |report|
          report.modes = [:radar]
          threshold = 30
          emotion_count_clause = Proc.new { |emotion| <<~SQL }
            COUNT(
              CASE WHEN (cr.classification::jsonb->'#{emotion}')::integer > :threshold THEN 1 ELSE NULL END
            ) AS #{emotion}_count
          SQL
          grouped_emotions =
-            DB.query(<<~SQL, report_start: report.start_date, report_end: report.end_date)
+            DB.query(
              <<~SQL,
            SELECT 
              u.trust_level AS trust_level,
-              AVG((cr.classification::jsonb->'sadness')::integer) AS avg_sadness,
+              #{emotion_count_clause.call("sadness")},
-              AVG((cr.classification::jsonb->'surprise')::integer) AS avg_surprise,
+              #{emotion_count_clause.call("surprise")},
-              AVG((cr.classification::jsonb->'neutral')::integer) AS avg_neutral,
+              #{emotion_count_clause.call("fear")},
-              AVG((cr.classification::jsonb->'fear')::integer) AS avg_fear,
+              #{emotion_count_clause.call("anger")},
-              AVG((cr.classification::jsonb->'anger')::integer) AS avg_anger,
+              #{emotion_count_clause.call("joy")},
-              AVG((cr.classification::jsonb->'joy')::integer) AS avg_joy,
+              #{emotion_count_clause.call("disgust")}
              AVG((cr.classification::jsonb->'disgust')::integer) AS avg_disgust
            FROM
              classification_results AS cr
            INNER JOIN posts p ON p.id = cr.target_id AND cr.target_type = 'Post'
@ -84,10 +105,16 @@ module DiscourseAi
              (p.created_at > :report_start AND p.created_at < :report_end)
            GROUP BY u.trust_level
          SQL
              report_start: report.start_date,
              report_end: report.end_date,
              threshold: threshold,
            )
-          emotions = %w[sadness surprise neutral fear anger joy disgust]
+          emotions = %w[sadness surprise fear anger joy disgust]
          level_groups = [[0, 1], [2, 3, 4]]
          return report if grouped_emotions.empty?
          report.data =
            level_groups.each_with_index.map do |lg, idx|
              tl_emotion_avgs = grouped_emotions.select { |ge| lg.include?(ge.trust_level) }
@ -102,8 +129,8 @@ module DiscourseAi
                      x: I18n.t("discourse_ai.sentiment.reports.post_emotion.#{e}"),
                      y:
                        tl_emotion_avgs.sum do |tl_emotion_avg|
-                          tl_emotion_avg.public_send("avg_#{e}").to_i
+                          tl_emotion_avg.public_send("#{e}_count").to_i
-                        end / [tl_emotion_avgs.size, 1].max,
+                        end,
                    }
                  end,
              }
--- a/spec/lib/modules/sentiment/entry_point_spec.rb
+++ b/spec/lib/modules/sentiment/entry_point_spec.rb
@ -62,7 +62,7 @@ RSpec.describe DiscourseAi::Sentiment::EntryPoint do
    describe "overall_sentiment report" do
      let(:positive_classification) { { negative: 2, neutral: 30, positive: 70 } }
-      let(:negative_classification) { { negative: 60, neutral: 2, positive: 10 } }
+      let(:negative_classification) { { negative: 65, neutral: 2, positive: 10 } }
      def sentiment_classification(post, classification)
        Fabricate(:sentiment_classification, target: post, classification: classification)
@ -73,17 +73,12 @@ RSpec.describe DiscourseAi::Sentiment::EntryPoint do
        sentiment_classification(post_2, negative_classification)
        sentiment_classification(pm, positive_classification)
        expected_positive =
          (positive_classification[:positive] + negative_classification[:positive]) / 2
        expected_negative =
          -(positive_classification[:negative] + negative_classification[:negative]) / 2
        report = Report.find("overall_sentiment")
        positive_data_point = report.data[0][:data].first[:y].to_i
        negative_data_point = report.data[1][:data].first[:y].to_i
-        expect(positive_data_point).to eq(expected_positive)
+        expect(positive_data_point).to eq(1)
-        expect(negative_data_point).to eq(expected_negative)
+        expect(negative_data_point).to eq(-1)
      end
    end
@ -109,17 +104,25 @@ RSpec.describe DiscourseAi::Sentiment::EntryPoint do
        post_1.user.update!(trust_level: TrustLevel[0])
        post_2.user.update!(trust_level: TrustLevel[3])
        pm.user.update!(trust_level: TrustLevel[0])
        threshold = 30
        emotion_classification(post_1, emotion_1)
        emotion_classification(post_2, emotion_2)
        emotion_classification(pm, emotion_2)
        report = Report.find("post_emotion")
-        tl_01_point = report.data[0][:data].first
+        tl_01_point = report.data[0][:data]
-        tl_234_point = report.data[1][:data].first
+        tl_234_point = report.data[1][:data]
-        expect(tl_01_point[:y]).to eq(emotion_1[tl_01_point[:x].downcase.to_sym])
+        tl_01_point.each do |point|
-        expect(tl_234_point[:y]).to eq(emotion_2[tl_234_point[:x].downcase.to_sym])
+          expected = emotion_1[point[:x].downcase.to_sym] > threshold ? 1 : 0
          expect(point[:y]).to eq(expected)
        end
        tl_234_point.each do |point|
          expected = emotion_2[point[:x].downcase.to_sym] > threshold ? 1 : 0
          expect(point[:y]).to eq(expected)
        end
      end
      it "doesn't try to divide by zero if there are no data in a TL group" do