FIX: more robust summary implementation (#750)
When navigating between topic we were not correctly resetting internal state for summarization. This leads to a situation where incorrect summaries can be displayed to users and wrong summaries can be displayed. Additionally our controller for grabbing summaries was always streaming results via message bus, which could be delayed when sidekiq is overloaded. We now will return the cached summary right away if it is available direct from REST endpoint.
This commit is contained in:
parent
f72ab12761
commit
14443bf890
|
@ -14,19 +14,32 @@ module DiscourseAi
|
|||
RateLimiter.new(current_user, "summary", 6, 5.minutes).performed! if current_user
|
||||
|
||||
opts = params.permit(:skip_age_check)
|
||||
skip_age_check = opts[:skip_age_check] == "true"
|
||||
|
||||
if params[:stream] && current_user
|
||||
cached_summary = DiscourseAi::TopicSummarization.cached_summary(topic, current_user)
|
||||
|
||||
if cached_summary && !skip_age_check
|
||||
render_serialized(cached_summary, AiTopicSummarySerializer)
|
||||
return
|
||||
end
|
||||
|
||||
Jobs.enqueue(
|
||||
:stream_topic_ai_summary,
|
||||
topic_id: topic.id,
|
||||
user_id: current_user.id,
|
||||
opts: opts.as_json,
|
||||
skip_age_check: skip_age_check,
|
||||
)
|
||||
|
||||
render json: success_json
|
||||
else
|
||||
hijack do
|
||||
summary = DiscourseAi::TopicSummarization.summarize(topic, current_user, opts)
|
||||
summary =
|
||||
DiscourseAi::TopicSummarization.summarize(
|
||||
topic,
|
||||
current_user,
|
||||
skip_age_check: skip_age_check,
|
||||
)
|
||||
render_serialized(summary, AiTopicSummarySerializer)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -14,15 +14,15 @@ module Jobs
|
|||
guardian = Guardian.new(user)
|
||||
return unless guardian.can_see?(topic)
|
||||
|
||||
opts = args[:opts] || {}
|
||||
skip_age_check = !!args[:skip_age_check]
|
||||
|
||||
streamed_summary = +""
|
||||
start = Time.now
|
||||
|
||||
summary =
|
||||
DiscourseAi::TopicSummarization
|
||||
.new(strategy)
|
||||
.summarize(topic, user, opts) do |partial_summary|
|
||||
.new(strategy, topic, user)
|
||||
.summarize(skip_age_check: skip_age_check) do |partial_summary|
|
||||
streamed_summary << partial_summary
|
||||
|
||||
# Throttle updates.
|
||||
|
|
|
@ -2,33 +2,36 @@
|
|||
|
||||
module DiscourseAi
|
||||
class TopicSummarization
|
||||
def self.summarize(topic, user, opts = {}, &on_partial_blk)
|
||||
new(DiscourseAi::Summarization.default_strategy).summarize(topic, user, opts, &on_partial_blk)
|
||||
def self.summarize(topic, user, skip_age_check: false, &on_partial_blk)
|
||||
new(DiscourseAi::Summarization.default_strategy, topic, user).summarize(
|
||||
skip_age_check: skip_age_check,
|
||||
&on_partial_blk
|
||||
)
|
||||
end
|
||||
|
||||
def initialize(strategy)
|
||||
def self.cached_summary(topic, user)
|
||||
new(DiscourseAi::Summarization.default_strategy, topic, user).cached_summary
|
||||
end
|
||||
|
||||
def initialize(strategy, topic, user)
|
||||
@strategy = strategy
|
||||
@topic = topic
|
||||
@user = user
|
||||
end
|
||||
|
||||
def summarize(topic, user, opts = {}, &on_partial_blk)
|
||||
existing_summary = AiSummary.find_by(target: topic)
|
||||
attr_reader :strategy, :topic, :user
|
||||
|
||||
def cached_summary
|
||||
existing_summary
|
||||
end
|
||||
|
||||
def summarize(skip_age_check: false, &on_partial_blk)
|
||||
# Existing summary shouldn't be nil in this scenario because the controller checks its existence.
|
||||
return if !user && !existing_summary
|
||||
|
||||
targets_data = summary_targets(topic).pluck(:post_number, :raw, :username)
|
||||
return existing_summary if use_cached?(skip_age_check)
|
||||
|
||||
current_topic_sha = build_sha(targets_data.map(&:first))
|
||||
can_summarize = Guardian.new(user).can_request_summary?
|
||||
|
||||
if use_cached?(existing_summary, can_summarize, current_topic_sha, !!opts[:skip_age_check])
|
||||
# It's important that we signal a cached summary is outdated
|
||||
existing_summary.mark_as_outdated if new_targets?(existing_summary, current_topic_sha)
|
||||
|
||||
return existing_summary
|
||||
end
|
||||
|
||||
delete_cached_summaries_of(topic) if existing_summary
|
||||
delete_cached_summaries! if existing_summary
|
||||
|
||||
content = {
|
||||
resource_path: "#{Discourse.base_path}/t/-/#{topic.id}",
|
||||
|
@ -36,7 +39,7 @@ module DiscourseAi
|
|||
contents: [],
|
||||
}
|
||||
|
||||
targets_data.map do |(pn, raw, username)|
|
||||
summary_targets_data.map do |(pn, raw, username)|
|
||||
raw_text = raw
|
||||
|
||||
if pn == 1 && topic.topic_embed&.embed_content_cache.present?
|
||||
|
@ -47,19 +50,34 @@ module DiscourseAi
|
|||
end
|
||||
|
||||
summarization_result = strategy.summarize(content, user, &on_partial_blk)
|
||||
|
||||
cache_summary(summarization_result, targets_data.map(&:first), topic)
|
||||
cache_summary(summarization_result)
|
||||
end
|
||||
|
||||
def summary_targets(topic)
|
||||
topic.has_summary? ? best_replies(topic) : pick_selection(topic)
|
||||
def summary_targets
|
||||
topic.has_summary? ? best_replies : pick_selection
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
attr_reader :strategy
|
||||
def summary_sha
|
||||
@summary_sha ||= build_sha(summary_targets_data.map(&:first))
|
||||
end
|
||||
|
||||
def best_replies(topic)
|
||||
def summary_targets_data
|
||||
@summary_targets_data ||= summary_targets.pluck(:post_number, :raw, :username)
|
||||
end
|
||||
|
||||
def existing_summary
|
||||
if !defined?(@existing_summary)
|
||||
@existing_summary = AiSummary.find_by(target: topic)
|
||||
if @existing_summary && existing_summary.original_content_sha != summary_sha
|
||||
@existing_summary.mark_as_outdated
|
||||
end
|
||||
end
|
||||
@existing_summary
|
||||
end
|
||||
|
||||
def best_replies
|
||||
Post
|
||||
.summary(topic.id)
|
||||
.where("post_type = ?", Post.types[:regular])
|
||||
|
@ -68,7 +86,7 @@ module DiscourseAi
|
|||
.order(:post_number)
|
||||
end
|
||||
|
||||
def pick_selection(topic)
|
||||
def pick_selection
|
||||
posts =
|
||||
Post
|
||||
.where(topic_id: topic.id)
|
||||
|
@ -87,31 +105,34 @@ module DiscourseAi
|
|||
.order(:post_number)
|
||||
end
|
||||
|
||||
def delete_cached_summaries_of(topic)
|
||||
def delete_cached_summaries!
|
||||
AiSummary.where(target: topic).destroy_all
|
||||
end
|
||||
|
||||
# For users without permissions to generate a summary or fresh summaries, we return what we have cached.
|
||||
def use_cached?(existing_summary, can_summarize, current_sha, skip_age_check)
|
||||
def use_cached?(skip_age_check)
|
||||
can_summarize = Guardian.new(user).can_request_summary?
|
||||
|
||||
existing_summary &&
|
||||
!(
|
||||
can_summarize && new_targets?(existing_summary, current_sha) &&
|
||||
can_summarize && new_targets? &&
|
||||
(skip_age_check || existing_summary.created_at < 1.hour.ago)
|
||||
)
|
||||
end
|
||||
|
||||
def new_targets?(summary, current_sha)
|
||||
summary.original_content_sha != current_sha
|
||||
def new_targets?
|
||||
existing_summary&.original_content_sha != summary_sha
|
||||
end
|
||||
|
||||
def cache_summary(result, post_numbers, topic)
|
||||
def cache_summary(result)
|
||||
post_numbers = summary_targets_data.map(&:first)
|
||||
|
||||
cached_summary =
|
||||
AiSummary.create!(
|
||||
target: topic,
|
||||
algorithm: strategy.display_name,
|
||||
content_range: (post_numbers.first..post_numbers.last),
|
||||
summarized_text: result[:summary],
|
||||
original_content_sha: build_sha(post_numbers),
|
||||
original_content_sha: summary_sha,
|
||||
)
|
||||
|
||||
cached_summary
|
||||
|
|
|
@ -3,6 +3,7 @@ import { tracked } from "@glimmer/tracking";
|
|||
import { array } from "@ember/helper";
|
||||
import { action } from "@ember/object";
|
||||
import didInsert from "@ember/render-modifiers/modifiers/did-insert";
|
||||
import didUpdate from "@ember/render-modifiers/modifiers/did-update";
|
||||
import willDestroy from "@ember/render-modifiers/modifiers/will-destroy";
|
||||
import { service } from "@ember/service";
|
||||
import DButton from "discourse/components/d-button";
|
||||
|
@ -44,6 +45,17 @@ export default class AiSummaryBox extends Component {
|
|||
return outdatedText;
|
||||
}
|
||||
|
||||
resetSummary() {
|
||||
this.text = "";
|
||||
this.summarizedOn = null;
|
||||
this.summarizedBy = null;
|
||||
this.newPostsSinceSummary = null;
|
||||
this.outdated = false;
|
||||
this.canRegenerate = false;
|
||||
this.loading = false;
|
||||
this._channel = null;
|
||||
}
|
||||
|
||||
get topRepliesSummaryEnabled() {
|
||||
return this.args.outletArgs.postStream.summary;
|
||||
}
|
||||
|
@ -57,8 +69,12 @@ export default class AiSummaryBox extends Component {
|
|||
}
|
||||
|
||||
@bind
|
||||
subscribe() {
|
||||
subscribe(unsubscribe) {
|
||||
if (unsubscribe && this._channel) {
|
||||
this.unsubscribe();
|
||||
}
|
||||
const channel = `/discourse-ai/summaries/topic/${this.args.outletArgs.topic.id}`;
|
||||
this._channel = channel;
|
||||
this.messageBus.subscribe(channel, this._updateSummary);
|
||||
}
|
||||
|
||||
|
@ -68,6 +84,7 @@ export default class AiSummaryBox extends Component {
|
|||
"/discourse-ai/summaries/topic/*",
|
||||
this._updateSummary
|
||||
);
|
||||
this.resetSummary();
|
||||
}
|
||||
|
||||
@action
|
||||
|
@ -106,7 +123,7 @@ export default class AiSummaryBox extends Component {
|
|||
this.summarizedOn = null;
|
||||
|
||||
return ajax(url).then((data) => {
|
||||
if (!this.currentUser) {
|
||||
if (data?.ai_topic_summary?.summarized_text) {
|
||||
data.done = true;
|
||||
this._updateSummary(data);
|
||||
}
|
||||
|
@ -153,6 +170,7 @@ export default class AiSummaryBox extends Component {
|
|||
<div
|
||||
class="ai-summarization-button"
|
||||
{{didInsert this.subscribe}}
|
||||
{{didUpdate this.subscribe @outletArgs.topic.id}}
|
||||
{{willDestroy this.unsubscribe}}
|
||||
>
|
||||
<DMenu
|
||||
|
|
|
@ -11,6 +11,28 @@ RSpec.describe DiscourseAi::Summarization::SummaryController do
|
|||
SiteSetting.ai_summarization_enabled = true
|
||||
end
|
||||
|
||||
context "when streaming" do
|
||||
it "return a cached summary with json payload and does not trigger job if it exists" do
|
||||
section =
|
||||
AiSummary.create!(
|
||||
target: topic,
|
||||
summarized_text: "test",
|
||||
algorithm: "test",
|
||||
original_content_sha: "test",
|
||||
)
|
||||
|
||||
sign_in(Fabricate(:admin))
|
||||
|
||||
get "/discourse-ai/summarization/t/#{topic.id}.json?stream=true"
|
||||
|
||||
expect(response.status).to eq(200)
|
||||
expect(Jobs::StreamTopicAiSummary.jobs.size).to eq(0)
|
||||
|
||||
summary = response.parsed_body
|
||||
expect(summary.dig("ai_topic_summary", "summarized_text")).to eq(section.summarized_text)
|
||||
end
|
||||
end
|
||||
|
||||
context "for anons" do
|
||||
it "returns a 404 if there is no cached summary" do
|
||||
get "/discourse-ai/summarization/t/#{topic.id}.json"
|
||||
|
|
|
@ -14,12 +14,12 @@ describe DiscourseAi::TopicSummarization do
|
|||
let(:strategy) { DiscourseAi::Summarization.default_strategy }
|
||||
|
||||
shared_examples "includes only public-visible topics" do
|
||||
subject { DiscourseAi::TopicSummarization.new(strategy) }
|
||||
subject { DiscourseAi::TopicSummarization.new(strategy, topic, user) }
|
||||
|
||||
it "only includes visible posts" do
|
||||
topic.first_post.update!(hidden: true)
|
||||
|
||||
posts = subject.summary_targets(topic)
|
||||
posts = subject.summary_targets
|
||||
|
||||
expect(posts.none?(&:hidden?)).to eq(true)
|
||||
end
|
||||
|
@ -27,7 +27,7 @@ describe DiscourseAi::TopicSummarization do
|
|||
it "doesn't include posts without users" do
|
||||
topic.first_post.user.destroy!
|
||||
|
||||
posts = subject.summary_targets(topic)
|
||||
posts = subject.summary_targets
|
||||
|
||||
expect(posts.detect { |p| p.id == topic.first_post.id }).to be_nil
|
||||
end
|
||||
|
@ -35,7 +35,7 @@ describe DiscourseAi::TopicSummarization do
|
|||
it "doesn't include deleted posts" do
|
||||
topic.first_post.update!(user_id: nil)
|
||||
|
||||
posts = subject.summary_targets(topic)
|
||||
posts = subject.summary_targets
|
||||
|
||||
expect(posts.detect { |p| p.id == topic.first_post.id }).to be_nil
|
||||
end
|
||||
|
@ -56,7 +56,7 @@ describe DiscourseAi::TopicSummarization do
|
|||
end
|
||||
|
||||
describe "#summarize" do
|
||||
subject(:summarization) { described_class.new(strategy) }
|
||||
subject(:summarization) { described_class.new(strategy, topic, user) }
|
||||
|
||||
def assert_summary_is_cached(topic, summary_response)
|
||||
cached_summary = AiSummary.find_by(target: topic)
|
||||
|
@ -72,14 +72,14 @@ describe DiscourseAi::TopicSummarization do
|
|||
|
||||
it "caches the summary" do
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([summary]) do
|
||||
section = summarization.summarize(topic, user)
|
||||
section = summarization.summarize
|
||||
expect(section.summarized_text).to eq(summary)
|
||||
assert_summary_is_cached(topic, summary)
|
||||
end
|
||||
end
|
||||
|
||||
it "returns the cached version in subsequent calls" do
|
||||
summarization.summarize(topic, user)
|
||||
summarization.summarize
|
||||
|
||||
cached_summary_text = "This is a cached summary"
|
||||
AiSummary.find_by(target: topic).update!(
|
||||
|
@ -87,7 +87,8 @@ describe DiscourseAi::TopicSummarization do
|
|||
updated_at: 24.hours.ago,
|
||||
)
|
||||
|
||||
section = summarization.summarize(topic, user)
|
||||
summarization = described_class.new(strategy, topic, user)
|
||||
section = summarization.summarize
|
||||
expect(section.summarized_text).to eq(cached_summary_text)
|
||||
end
|
||||
|
||||
|
@ -101,7 +102,7 @@ describe DiscourseAi::TopicSummarization do
|
|||
)
|
||||
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses(["A summary"]) do |spy|
|
||||
summarization.summarize(topic, user)
|
||||
summarization.summarize
|
||||
|
||||
prompt_raw =
|
||||
spy
|
||||
|
@ -133,7 +134,7 @@ describe DiscourseAi::TopicSummarization do
|
|||
# so we create the cached summary totally independantly
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([cached_text]) do
|
||||
strategy = DiscourseAi::Summarization.default_strategy
|
||||
described_class.new(strategy).summarize(topic, user)
|
||||
described_class.new(strategy, topic, user).summarize
|
||||
end
|
||||
|
||||
cached_summary.update!(summarized_text: cached_text, created_at: 24.hours.ago)
|
||||
|
@ -142,7 +143,7 @@ describe DiscourseAi::TopicSummarization do
|
|||
context "when the user can requests new summaries" do
|
||||
context "when there are no new posts" do
|
||||
it "returns the cached summary" do
|
||||
section = summarization.summarize(topic, user)
|
||||
section = summarization.summarize
|
||||
|
||||
expect(section.summarized_text).to eq(cached_text)
|
||||
end
|
||||
|
@ -153,7 +154,7 @@ describe DiscourseAi::TopicSummarization do
|
|||
|
||||
it "returns a new summary" do
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([updated_summary]) do
|
||||
section = summarization.summarize(topic, user)
|
||||
section = summarization.summarize
|
||||
|
||||
expect(section.summarized_text).to eq(updated_summary)
|
||||
end
|
||||
|
@ -165,7 +166,7 @@ describe DiscourseAi::TopicSummarization do
|
|||
it "returns the cached summary" do
|
||||
cached_summary.update!(created_at: 30.minutes.ago)
|
||||
|
||||
section = summarization.summarize(topic, user)
|
||||
section = summarization.summarize
|
||||
|
||||
expect(section.summarized_text).to eq(cached_text)
|
||||
expect(section.outdated).to eq(true)
|
||||
|
@ -173,7 +174,7 @@ describe DiscourseAi::TopicSummarization do
|
|||
|
||||
it "returns a new summary if the skip_age_check flag is passed" do
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([updated_summary]) do
|
||||
section = summarization.summarize(topic, user, skip_age_check: true)
|
||||
section = summarization.summarize(skip_age_check: true)
|
||||
|
||||
expect(section.summarized_text).to eq(updated_summary)
|
||||
end
|
||||
|
@ -190,9 +191,7 @@ describe DiscourseAi::TopicSummarization do
|
|||
partial_result = +""
|
||||
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([summary]) do
|
||||
summarization.summarize(topic, user) do |partial_summary|
|
||||
partial_result << partial_summary
|
||||
end
|
||||
summarization.summarize { |partial_summary| partial_result << partial_summary }
|
||||
end
|
||||
|
||||
expect(partial_result).to eq(summary)
|
||||
|
|
Loading…
Reference in New Issue