From 14443bf890cfa248d5acf77b52bd49a6dbc87d91 Mon Sep 17 00:00:00 2001 From: Sam Date: Tue, 13 Aug 2024 21:47:47 +1000 Subject: [PATCH] FIX: more robust summary implementation (#750) When navigating between topic we were not correctly resetting internal state for summarization. This leads to a situation where incorrect summaries can be displayed to users and wrong summaries can be displayed. Additionally our controller for grabbing summaries was always streaming results via message bus, which could be delayed when sidekiq is overloaded. We now will return the cached summary right away if it is available direct from REST endpoint. --- .../summarization/summary_controller.rb | 17 +++- app/jobs/regular/stream_topic_ai_summary.rb | 6 +- .../discourse_ai/topic_summarization.rb | 87 ++++++++++++------- .../ai-summary-box.gjs | 22 ++++- .../summarization/summary_controller_spec.rb | 22 +++++ .../discourse_ai/topic_summarization_spec.rb | 33 ++++--- 6 files changed, 130 insertions(+), 57 deletions(-) diff --git a/app/controllers/discourse_ai/summarization/summary_controller.rb b/app/controllers/discourse_ai/summarization/summary_controller.rb index 14f57c62..549bbb44 100644 --- a/app/controllers/discourse_ai/summarization/summary_controller.rb +++ b/app/controllers/discourse_ai/summarization/summary_controller.rb @@ -14,19 +14,32 @@ module DiscourseAi RateLimiter.new(current_user, "summary", 6, 5.minutes).performed! if current_user opts = params.permit(:skip_age_check) + skip_age_check = opts[:skip_age_check] == "true" if params[:stream] && current_user + cached_summary = DiscourseAi::TopicSummarization.cached_summary(topic, current_user) + + if cached_summary && !skip_age_check + render_serialized(cached_summary, AiTopicSummarySerializer) + return + end + Jobs.enqueue( :stream_topic_ai_summary, topic_id: topic.id, user_id: current_user.id, - opts: opts.as_json, + skip_age_check: skip_age_check, ) render json: success_json else hijack do - summary = DiscourseAi::TopicSummarization.summarize(topic, current_user, opts) + summary = + DiscourseAi::TopicSummarization.summarize( + topic, + current_user, + skip_age_check: skip_age_check, + ) render_serialized(summary, AiTopicSummarySerializer) end end diff --git a/app/jobs/regular/stream_topic_ai_summary.rb b/app/jobs/regular/stream_topic_ai_summary.rb index 6641918c..c9756801 100644 --- a/app/jobs/regular/stream_topic_ai_summary.rb +++ b/app/jobs/regular/stream_topic_ai_summary.rb @@ -14,15 +14,15 @@ module Jobs guardian = Guardian.new(user) return unless guardian.can_see?(topic) - opts = args[:opts] || {} + skip_age_check = !!args[:skip_age_check] streamed_summary = +"" start = Time.now summary = DiscourseAi::TopicSummarization - .new(strategy) - .summarize(topic, user, opts) do |partial_summary| + .new(strategy, topic, user) + .summarize(skip_age_check: skip_age_check) do |partial_summary| streamed_summary << partial_summary # Throttle updates. diff --git a/app/services/discourse_ai/topic_summarization.rb b/app/services/discourse_ai/topic_summarization.rb index d5ea3be2..15a94193 100644 --- a/app/services/discourse_ai/topic_summarization.rb +++ b/app/services/discourse_ai/topic_summarization.rb @@ -2,33 +2,36 @@ module DiscourseAi class TopicSummarization - def self.summarize(topic, user, opts = {}, &on_partial_blk) - new(DiscourseAi::Summarization.default_strategy).summarize(topic, user, opts, &on_partial_blk) + def self.summarize(topic, user, skip_age_check: false, &on_partial_blk) + new(DiscourseAi::Summarization.default_strategy, topic, user).summarize( + skip_age_check: skip_age_check, + &on_partial_blk + ) end - def initialize(strategy) + def self.cached_summary(topic, user) + new(DiscourseAi::Summarization.default_strategy, topic, user).cached_summary + end + + def initialize(strategy, topic, user) @strategy = strategy + @topic = topic + @user = user end - def summarize(topic, user, opts = {}, &on_partial_blk) - existing_summary = AiSummary.find_by(target: topic) + attr_reader :strategy, :topic, :user + def cached_summary + existing_summary + end + + def summarize(skip_age_check: false, &on_partial_blk) # Existing summary shouldn't be nil in this scenario because the controller checks its existence. return if !user && !existing_summary - targets_data = summary_targets(topic).pluck(:post_number, :raw, :username) + return existing_summary if use_cached?(skip_age_check) - current_topic_sha = build_sha(targets_data.map(&:first)) - can_summarize = Guardian.new(user).can_request_summary? - - if use_cached?(existing_summary, can_summarize, current_topic_sha, !!opts[:skip_age_check]) - # It's important that we signal a cached summary is outdated - existing_summary.mark_as_outdated if new_targets?(existing_summary, current_topic_sha) - - return existing_summary - end - - delete_cached_summaries_of(topic) if existing_summary + delete_cached_summaries! if existing_summary content = { resource_path: "#{Discourse.base_path}/t/-/#{topic.id}", @@ -36,7 +39,7 @@ module DiscourseAi contents: [], } - targets_data.map do |(pn, raw, username)| + summary_targets_data.map do |(pn, raw, username)| raw_text = raw if pn == 1 && topic.topic_embed&.embed_content_cache.present? @@ -47,19 +50,34 @@ module DiscourseAi end summarization_result = strategy.summarize(content, user, &on_partial_blk) - - cache_summary(summarization_result, targets_data.map(&:first), topic) + cache_summary(summarization_result) end - def summary_targets(topic) - topic.has_summary? ? best_replies(topic) : pick_selection(topic) + def summary_targets + topic.has_summary? ? best_replies : pick_selection end private - attr_reader :strategy + def summary_sha + @summary_sha ||= build_sha(summary_targets_data.map(&:first)) + end - def best_replies(topic) + def summary_targets_data + @summary_targets_data ||= summary_targets.pluck(:post_number, :raw, :username) + end + + def existing_summary + if !defined?(@existing_summary) + @existing_summary = AiSummary.find_by(target: topic) + if @existing_summary && existing_summary.original_content_sha != summary_sha + @existing_summary.mark_as_outdated + end + end + @existing_summary + end + + def best_replies Post .summary(topic.id) .where("post_type = ?", Post.types[:regular]) @@ -68,7 +86,7 @@ module DiscourseAi .order(:post_number) end - def pick_selection(topic) + def pick_selection posts = Post .where(topic_id: topic.id) @@ -87,31 +105,34 @@ module DiscourseAi .order(:post_number) end - def delete_cached_summaries_of(topic) + def delete_cached_summaries! AiSummary.where(target: topic).destroy_all end - # For users without permissions to generate a summary or fresh summaries, we return what we have cached. - def use_cached?(existing_summary, can_summarize, current_sha, skip_age_check) + def use_cached?(skip_age_check) + can_summarize = Guardian.new(user).can_request_summary? + existing_summary && !( - can_summarize && new_targets?(existing_summary, current_sha) && + can_summarize && new_targets? && (skip_age_check || existing_summary.created_at < 1.hour.ago) ) end - def new_targets?(summary, current_sha) - summary.original_content_sha != current_sha + def new_targets? + existing_summary&.original_content_sha != summary_sha end - def cache_summary(result, post_numbers, topic) + def cache_summary(result) + post_numbers = summary_targets_data.map(&:first) + cached_summary = AiSummary.create!( target: topic, algorithm: strategy.display_name, content_range: (post_numbers.first..post_numbers.last), summarized_text: result[:summary], - original_content_sha: build_sha(post_numbers), + original_content_sha: summary_sha, ) cached_summary diff --git a/assets/javascripts/discourse/connectors/topic-map-expanded-after/ai-summary-box.gjs b/assets/javascripts/discourse/connectors/topic-map-expanded-after/ai-summary-box.gjs index 878f27cd..6cfe231b 100644 --- a/assets/javascripts/discourse/connectors/topic-map-expanded-after/ai-summary-box.gjs +++ b/assets/javascripts/discourse/connectors/topic-map-expanded-after/ai-summary-box.gjs @@ -3,6 +3,7 @@ import { tracked } from "@glimmer/tracking"; import { array } from "@ember/helper"; import { action } from "@ember/object"; import didInsert from "@ember/render-modifiers/modifiers/did-insert"; +import didUpdate from "@ember/render-modifiers/modifiers/did-update"; import willDestroy from "@ember/render-modifiers/modifiers/will-destroy"; import { service } from "@ember/service"; import DButton from "discourse/components/d-button"; @@ -44,6 +45,17 @@ export default class AiSummaryBox extends Component { return outdatedText; } + resetSummary() { + this.text = ""; + this.summarizedOn = null; + this.summarizedBy = null; + this.newPostsSinceSummary = null; + this.outdated = false; + this.canRegenerate = false; + this.loading = false; + this._channel = null; + } + get topRepliesSummaryEnabled() { return this.args.outletArgs.postStream.summary; } @@ -57,8 +69,12 @@ export default class AiSummaryBox extends Component { } @bind - subscribe() { + subscribe(unsubscribe) { + if (unsubscribe && this._channel) { + this.unsubscribe(); + } const channel = `/discourse-ai/summaries/topic/${this.args.outletArgs.topic.id}`; + this._channel = channel; this.messageBus.subscribe(channel, this._updateSummary); } @@ -68,6 +84,7 @@ export default class AiSummaryBox extends Component { "/discourse-ai/summaries/topic/*", this._updateSummary ); + this.resetSummary(); } @action @@ -106,7 +123,7 @@ export default class AiSummaryBox extends Component { this.summarizedOn = null; return ajax(url).then((data) => { - if (!this.currentUser) { + if (data?.ai_topic_summary?.summarized_text) { data.done = true; this._updateSummary(data); } @@ -153,6 +170,7 @@ export default class AiSummaryBox extends Component {