mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-03-08 18:29:32 +00:00
When navigating between topic we were not correctly resetting internal state for summarization. This leads to a situation where incorrect summaries can be displayed to users and wrong summaries can be displayed. Additionally our controller for grabbing summaries was always streaming results via message bus, which could be delayed when sidekiq is overloaded. We now will return the cached summary right away if it is available direct from REST endpoint.
146 lines
3.8 KiB
Ruby
146 lines
3.8 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module DiscourseAi
|
|
class TopicSummarization
|
|
def self.summarize(topic, user, skip_age_check: false, &on_partial_blk)
|
|
new(DiscourseAi::Summarization.default_strategy, topic, user).summarize(
|
|
skip_age_check: skip_age_check,
|
|
&on_partial_blk
|
|
)
|
|
end
|
|
|
|
def self.cached_summary(topic, user)
|
|
new(DiscourseAi::Summarization.default_strategy, topic, user).cached_summary
|
|
end
|
|
|
|
def initialize(strategy, topic, user)
|
|
@strategy = strategy
|
|
@topic = topic
|
|
@user = user
|
|
end
|
|
|
|
attr_reader :strategy, :topic, :user
|
|
|
|
def cached_summary
|
|
existing_summary
|
|
end
|
|
|
|
def summarize(skip_age_check: false, &on_partial_blk)
|
|
# Existing summary shouldn't be nil in this scenario because the controller checks its existence.
|
|
return if !user && !existing_summary
|
|
|
|
return existing_summary if use_cached?(skip_age_check)
|
|
|
|
delete_cached_summaries! if existing_summary
|
|
|
|
content = {
|
|
resource_path: "#{Discourse.base_path}/t/-/#{topic.id}",
|
|
content_title: topic.title,
|
|
contents: [],
|
|
}
|
|
|
|
summary_targets_data.map do |(pn, raw, username)|
|
|
raw_text = raw
|
|
|
|
if pn == 1 && topic.topic_embed&.embed_content_cache.present?
|
|
raw_text = topic.topic_embed&.embed_content_cache
|
|
end
|
|
|
|
content[:contents] << { poster: username, id: pn, text: raw_text }
|
|
end
|
|
|
|
summarization_result = strategy.summarize(content, user, &on_partial_blk)
|
|
cache_summary(summarization_result)
|
|
end
|
|
|
|
def summary_targets
|
|
topic.has_summary? ? best_replies : pick_selection
|
|
end
|
|
|
|
private
|
|
|
|
def summary_sha
|
|
@summary_sha ||= build_sha(summary_targets_data.map(&:first))
|
|
end
|
|
|
|
def summary_targets_data
|
|
@summary_targets_data ||= summary_targets.pluck(:post_number, :raw, :username)
|
|
end
|
|
|
|
def existing_summary
|
|
if !defined?(@existing_summary)
|
|
@existing_summary = AiSummary.find_by(target: topic)
|
|
if @existing_summary && existing_summary.original_content_sha != summary_sha
|
|
@existing_summary.mark_as_outdated
|
|
end
|
|
end
|
|
@existing_summary
|
|
end
|
|
|
|
def best_replies
|
|
Post
|
|
.summary(topic.id)
|
|
.where("post_type = ?", Post.types[:regular])
|
|
.where("NOT hidden")
|
|
.joins(:user)
|
|
.order(:post_number)
|
|
end
|
|
|
|
def pick_selection
|
|
posts =
|
|
Post
|
|
.where(topic_id: topic.id)
|
|
.where("post_type = ?", Post.types[:regular])
|
|
.where("NOT hidden")
|
|
.order(:post_number)
|
|
|
|
post_numbers = posts.limit(5).pluck(:post_number)
|
|
post_numbers += posts.reorder("posts.score desc").limit(50).pluck(:post_number)
|
|
post_numbers += posts.reorder("post_number desc").limit(5).pluck(:post_number)
|
|
|
|
Post
|
|
.where(topic_id: topic.id)
|
|
.joins(:user)
|
|
.where("post_number in (?)", post_numbers)
|
|
.order(:post_number)
|
|
end
|
|
|
|
def delete_cached_summaries!
|
|
AiSummary.where(target: topic).destroy_all
|
|
end
|
|
|
|
def use_cached?(skip_age_check)
|
|
can_summarize = Guardian.new(user).can_request_summary?
|
|
|
|
existing_summary &&
|
|
!(
|
|
can_summarize && new_targets? &&
|
|
(skip_age_check || existing_summary.created_at < 1.hour.ago)
|
|
)
|
|
end
|
|
|
|
def new_targets?
|
|
existing_summary&.original_content_sha != summary_sha
|
|
end
|
|
|
|
def cache_summary(result)
|
|
post_numbers = summary_targets_data.map(&:first)
|
|
|
|
cached_summary =
|
|
AiSummary.create!(
|
|
target: topic,
|
|
algorithm: strategy.display_name,
|
|
content_range: (post_numbers.first..post_numbers.last),
|
|
summarized_text: result[:summary],
|
|
original_content_sha: summary_sha,
|
|
)
|
|
|
|
cached_summary
|
|
end
|
|
|
|
def build_sha(ids)
|
|
Digest::SHA256.hexdigest(ids.join)
|
|
end
|
|
end
|
|
end
|