From bf0f068938dd7100d739adb8ef461719fba73935 Mon Sep 17 00:00:00 2001 From: Rafael Silva Date: Fri, 13 Dec 2024 14:14:13 -0300 Subject: [PATCH] Word Cloud report --- .../rewind/action/word_cloud.rb | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 app/services/discourse_rewind/rewind/action/word_cloud.rb diff --git a/app/services/discourse_rewind/rewind/action/word_cloud.rb b/app/services/discourse_rewind/rewind/action/word_cloud.rb new file mode 100644 index 0000000..6959575 --- /dev/null +++ b/app/services/discourse_rewind/rewind/action/word_cloud.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +# User Word Cloud +module DiscourseRewind + class Rewind::Action::WordCloud < Action::Base + def call + words = DB.query(<<~SQL, user_id: user.id, date_start: date.first, date_end: date.last) + WITH popular_words AS ( + SELECT + * + FROM + ts_stat( + $INNERSQL$ + SELECT + search_data + FROM + post_search_data + INNER JOIN + posts ON posts.id = post_search_data.post_id + WHERE + posts.user_id = :user_id + AND posts.created_at BETWEEN :date_start AND :date_end + $INNERSQL$ + ) AS search_data + ORDER BY + nentry DESC, + ndoc DESC, + word + LIMIT + 100 + ), lex AS ( + SELECT + DISTINCT ON (lexeme) to_tsvector('english', word) as lexeme, + word as original_word + FROM + ts_stat ($INNERSQL$ + SELECT + to_tsvector('simple', raw) + FROM + posts AS p + WHERE + p.created_at BETWEEN :date_start AND :date_end + AND p.user_id = :user_id + $INNERSQL$) + ), ranked_words AS ( + SELECT + popular_words.*, lex.original_word, + ROW_NUMBER() OVER (PARTITION BY word ORDER BY LENGTH(original_word)) AS rn + FROM + popular_words + INNER JOIN + lex ON lex.lexeme @@ to_tsquery('english', popular_words.word) + ) + SELECT + word, + ndoc, + nentry, + original_word + FROM + ranked_words + WHERE + rn = 1 + ORDER BY + ndoc + nentry DESC + LIMIT 100 + SQL + + word_score = words.map { [_1.original_word, _1.ndoc + _1.nentry] }.to_h + + { data: word_score, identifier: "word-cloud" } + end + end +end