From 7ca5ee6cd2bcde191a192d9c3aa8bb6a6c3a4166 Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Fri, 11 Aug 2023 15:08:49 -0300 Subject: [PATCH] FEATURE: Stream topic summaries. (#23065) When we receive the stream parameter, we'll queue a job that periodically publishes partial updates, and after the summarization finishes, a final one with the completed version, plus metadata. `summary-box` listens to these updates via MessageBus, and updates state accordingly. --- .../discourse/app/components/summary-box.hbs | 127 ++++++++++-------- .../discourse/app/components/summary-box.js | 57 +++++--- .../tests/acceptance/topic-summary-test.js | 73 ++++++++++ app/controllers/topics_controller.rb | 25 ++-- app/jobs/regular/stream_topic_summary.rb | 49 +++++++ app/serializers/topic_summary_serializer.rb | 13 ++ app/services/topic_summarization.rb | 4 +- lib/summarization/base.rb | 2 + .../jobs/regular/stream_topic_summary_spec.rb | 76 +++++++++++ spec/requests/topics_controller_spec.rb | 25 +++- spec/services/topic_summarization_spec.rb | 12 ++ spec/support/dummy_custom_summarization.rb | 2 +- spec/system/topic_summarization_spec.rb | 32 ----- 13 files changed, 370 insertions(+), 127 deletions(-) create mode 100644 app/assets/javascripts/discourse/tests/acceptance/topic-summary-test.js create mode 100644 app/jobs/regular/stream_topic_summary.rb create mode 100644 app/serializers/topic_summary_serializer.rb create mode 100644 spec/jobs/regular/stream_topic_summary_spec.rb delete mode 100644 spec/system/topic_summarization_spec.rb diff --git a/app/assets/javascripts/discourse/app/components/summary-box.hbs b/app/assets/javascripts/discourse/app/components/summary-box.hbs index cc1d2223801..828575c1cc0 100644 --- a/app/assets/javascripts/discourse/app/components/summary-box.hbs +++ b/app/assets/javascripts/discourse/app/components/summary-box.hbs @@ -1,62 +1,71 @@ -{{#if @postAttrs.hasTopRepliesSummary}} -

{{html-safe this.topRepliesSummaryInfo}}

-{{/if}} -
- {{#if @postAttrs.summarizable}} - {{#if this.canCollapseSummary}} - - {{else}} - - {{/if}} - {{/if}} - +
{{#if @postAttrs.hasTopRepliesSummary}} - +

{{html-safe this.topRepliesSummaryInfo}}

{{/if}} -
- -{{#if this.showSummaryBox}} -
- {{#if this.loadingSummary}} - - {{else}} -
{{this.summary}}
-
-

- {{i18n "summary.summarized_on" date=this.summarizedOn}} - - {{d-icon "info-circle"}} - - {{i18n "summary.model_used" model=this.summarizedBy}} - - -

- - {{#if this.outdated}} -

- {{this.outdatedSummaryWarningText}} -

- {{/if}} -
+
+ {{#if @postAttrs.summarizable}} + {{#if this.canCollapseSummary}} + + {{else}} + + {{/if}} {{/if}} -
-{{/if}} \ No newline at end of file + + {{#if @postAttrs.hasTopRepliesSummary}} + + {{/if}} +
+ + {{#if this.showSummaryBox}} +
+ {{#if this.loadingSummary}} + + {{else}} +
{{this.summary}}
+ + {{#if this.summarizedOn}} +
+

+ {{i18n "summary.summarized_on" date=this.summarizedOn}} + + {{d-icon "info-circle"}} + + {{i18n "summary.model_used" model=this.summarizedBy}} + + +

+ + {{#if this.outdated}} +

+ {{this.outdatedSummaryWarningText}} +

+ {{/if}} +
+ {{/if}} + {{/if}} +
+ {{/if}} + \ No newline at end of file diff --git a/app/assets/javascripts/discourse/app/components/summary-box.js b/app/assets/javascripts/discourse/app/components/summary-box.js index 2de660f81b7..28e59e12ef8 100644 --- a/app/assets/javascripts/discourse/app/components/summary-box.js +++ b/app/assets/javascripts/discourse/app/components/summary-box.js @@ -7,11 +7,13 @@ import { ajax } from "discourse/lib/ajax"; import { popupAjaxError } from "discourse/lib/ajax-error"; import { cookAsync } from "discourse/lib/text"; import { shortDateNoYear } from "discourse/lib/formatter"; +import { bind } from "discourse-common/utils/decorators"; const MIN_POST_READ_TIME = 4; export default class SummaryBox extends Component { @service siteSettings; + @service messageBus; @tracked summary = ""; @tracked summarizedOn = null; @@ -25,6 +27,40 @@ export default class SummaryBox extends Component { @tracked canCollapseSummary = false; @tracked loadingSummary = false; + @bind + subscribe() { + const channel = `/summaries/topic/${this.args.postAttrs.topicId}`; + this.messageBus.subscribe(channel, this._updateSummary); + } + + @bind + unsubscribe() { + this.messageBus.unsubscribe("/summaries/topic/*", this._updateSummary); + } + + @bind + _updateSummary(update) { + const topicSummary = update.topic_summary; + + if (topicSummary.summarized_text) { + cookAsync(topicSummary.summarized_text).then((cooked) => { + this.summary = cooked; + this.loadingSummary = false; + }); + } + + if (update.done) { + this.summarizedOn = shortDateNoYear(topicSummary.summarized_on); + this.summarizedBy = topicSummary.algorithm; + this.newPostsSinceSummary = topicSummary.new_posts_since_summary; + this.outdated = topicSummary.outdated; + this.newPostsSinceSummary = topicSummary.new_posts_since_summary; + this.canRegenerate = topicSummary.outdated && topicSummary.can_regenerate; + + this.canCollapseSummary = !this.canRegenerate; + } + } + get generateSummaryTitle() { const title = this.canRegenerate ? "summary.buttons.regenerate" @@ -130,27 +166,12 @@ export default class SummaryBox extends Component { this.loadingSummary = true; } - let fetchURL = `/t/${this.args.postAttrs.topicId}/strategy-summary`; + let fetchURL = `/t/${this.args.postAttrs.topicId}/strategy-summary?stream=true`; if (this.canRegenerate) { - fetchURL += "?skip_age_check=true"; + fetchURL += "&skip_age_check=true"; } - ajax(fetchURL) - .then((data) => { - cookAsync(data.summary).then((cooked) => { - this.summary = cooked; - this.summarizedOn = shortDateNoYear(data.summarized_on); - this.summarizedBy = data.summarized_by; - this.newPostsSinceSummary = data.new_posts_since_summary; - this.outdated = data.outdated; - this.newPostsSinceSummary = data.new_posts_since_summary; - this.canRegenerate = data.outdated && data.can_regenerate; - - this.canCollapseSummary = !this.canRegenerate; - }); - }) - .catch(popupAjaxError) - .finally(() => (this.loadingSummary = false)); + ajax(fetchURL).catch(popupAjaxError); } } diff --git a/app/assets/javascripts/discourse/tests/acceptance/topic-summary-test.js b/app/assets/javascripts/discourse/tests/acceptance/topic-summary-test.js new file mode 100644 index 00000000000..b1828c6ab39 --- /dev/null +++ b/app/assets/javascripts/discourse/tests/acceptance/topic-summary-test.js @@ -0,0 +1,73 @@ +import { + acceptance, + exists, + publishToMessageBus, + query, + updateCurrentUser, +} from "discourse/tests/helpers/qunit-helpers"; +import { test } from "qunit"; +import { click, visit } from "@ember/test-helpers"; +import { cloneJSON } from "discourse-common/lib/object"; +import topicFixtures from "discourse/tests/fixtures/topic"; + +acceptance("Topic - Summary", function (needs) { + const currentUserId = 5; + + needs.user(); + needs.pretender((server, helper) => { + server.get("/t/1.json", () => { + const json = cloneJSON(topicFixtures["/t/130.json"]); + json.id = 1; + json.summarizable = true; + + return helper.response(json); + }); + + server.get("/t/1/strategy-summary", () => { + return helper.response({}); + }); + }); + + needs.hooks.beforeEach(() => { + updateCurrentUser({ id: currentUserId }); + }); + + test("displays streamed summary", async function (assert) { + await visit("/t/-/1"); + + const partialSummary = "This a"; + await publishToMessageBus("/summaries/topic/1", { + done: false, + topic_summary: { summarized_text: partialSummary }, + }); + + await click(".topic-strategy-summarization"); + + assert.strictEqual( + query(".summary-box .generated-summary p").innerText, + partialSummary, + "Updates the summary with a partial result" + ); + + const finalSummary = "This is a completed summary"; + await publishToMessageBus("/summaries/topic/1", { + done: true, + topic_summary: { + summarized_text: finalSummary, + summarized_on: "2023-01-01T04:00:00.000Z", + algorithm: "OpenAI GPT-4", + outdated: false, + new_posts_since_summary: false, + can_regenerate: true, + }, + }); + + assert.strictEqual( + query(".summary-box .generated-summary p").innerText, + finalSummary, + "Updates the summary with a partial result" + ); + + assert.ok(exists(".summary-box .summarized-on"), "summary metadata exists"); + }); +}); diff --git a/app/controllers/topics_controller.rb b/app/controllers/topics_controller.rb index 5b662283a5f..789e3610000 100644 --- a/app/controllers/topics_controller.rb +++ b/app/controllers/topics_controller.rb @@ -1185,18 +1185,21 @@ class TopicsController < ApplicationController opts = params.permit(:skip_age_check) - hijack do - summary = TopicSummarization.new(strategy).summarize(topic, current_user, opts) + if params[:stream] + Jobs.enqueue( + :stream_topic_summary, + topic_id: topic.id, + user_id: current_user.id, + opts: opts.as_json, + ) - render json: { - summary: summary.summarized_text, - summarized_on: summary.updated_at, - summarized_by: summary.algorithm, - outdated: summary.outdated, - can_regenerate: Summarization::Base.can_request_summary_for?(current_user), - new_posts_since_summary: - topic.highest_post_number.to_i - summary.content_range&.max.to_i, - } + render json: success_json + else + hijack do + summary = TopicSummarization.new(strategy).summarize(topic, current_user, opts) + + render_serialized(summary, TopicSummarySerializer) + end end end diff --git a/app/jobs/regular/stream_topic_summary.rb b/app/jobs/regular/stream_topic_summary.rb new file mode 100644 index 00000000000..a82030a613f --- /dev/null +++ b/app/jobs/regular/stream_topic_summary.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +module Jobs + class StreamTopicSummary < ::Jobs::Base + sidekiq_options retry: false + + def execute(args) + return unless topic = Topic.find_by(id: args[:topic_id]) + return unless user = User.find_by(id: args[:user_id]) + + strategy = Summarization::Base.selected_strategy + return if strategy.nil? || !Summarization::Base.can_see_summary?(topic, user) + + guardian = Guardian.new(user) + return unless guardian.can_see?(topic) + + opts = args[:opts] || {} + + streamed_summary = +"" + start = Time.now + + summary = + TopicSummarization + .new(strategy) + .summarize(topic, user, opts) do |partial_summary| + streamed_summary << partial_summary + + # Throttle updates. + if (Time.now - start > 0.5) || Rails.env.test? + payload = { done: false, topic_summary: { summarized_text: streamed_summary } } + publish_update(topic, user, payload) + start = Time.now + end + end + + publish_update( + topic, + user, + TopicSummarySerializer.new(summary, { scope: guardian }).as_json.merge(done: true), + ) + end + + private + + def publish_update(topic, user, payload) + MessageBus.publish("/summaries/topic/#{topic.id}", payload, user_ids: [user.id]) + end + end +end diff --git a/app/serializers/topic_summary_serializer.rb b/app/serializers/topic_summary_serializer.rb new file mode 100644 index 00000000000..e2980d88d00 --- /dev/null +++ b/app/serializers/topic_summary_serializer.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +class TopicSummarySerializer < ApplicationSerializer + attributes :summarized_text, :algorithm, :outdated, :can_regenerate, :new_posts_since_summary + + def can_regenerate + Summarization::Base.can_request_summary_for?(scope.current_user) + end + + def new_posts_since_summary + object.target.highest_post_number.to_i - object.content_range&.end.to_i + end +end diff --git a/app/services/topic_summarization.rb b/app/services/topic_summarization.rb index 6321831f7c3..e0c9ee87474 100644 --- a/app/services/topic_summarization.rb +++ b/app/services/topic_summarization.rb @@ -5,7 +5,7 @@ class TopicSummarization @strategy = strategy end - def summarize(topic, user, opts = {}) + def summarize(topic, user, opts = {}, &on_partial_blk) existing_summary = SummarySection.find_by(target: topic, meta_section_id: nil) # Existing summary shouldn't be nil in this scenario because the controller checks its existence. @@ -37,7 +37,7 @@ class TopicSummarization content[:contents] << { poster: username, id: pn, text: raw } end - summarization_result = strategy.summarize(content) + summarization_result = strategy.summarize(content, &on_partial_blk) cache_summary(summarization_result, targets_data.map(&:first), topic) end diff --git a/lib/summarization/base.rb b/lib/summarization/base.rb index 53fbba5ec1e..cbb4b167cce 100644 --- a/lib/summarization/base.rb +++ b/lib/summarization/base.rb @@ -72,6 +72,8 @@ module Summarization # - content_title (optional): Provides guidance about what the content is about. # - contents (required): Array of hashes with content to summarize (e.g. [{ poster: "asd", id: 1, text: "This is a text" }]) # All keys are required. + # @param &on_partial_blk { Block - Optional } - If the strategy supports it, the passed block + # will get called with partial summarized text as its generated. # # @returns { Hash } - The summarized content, plus chunks if the content couldn't be summarized in one pass. Example: # { diff --git a/spec/jobs/regular/stream_topic_summary_spec.rb b/spec/jobs/regular/stream_topic_summary_spec.rb new file mode 100644 index 00000000000..d92ad1081e6 --- /dev/null +++ b/spec/jobs/regular/stream_topic_summary_spec.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +RSpec.describe Jobs::StreamTopicSummary do + subject(:job) { described_class.new } + + describe "#execute" do + fab!(:topic) { Fabricate(:topic) } + let(:plugin) { Plugin::Instance.new } + let(:strategy) { DummyCustomSummarization.new({ summary: "dummy", chunks: [] }) } + fab!(:user) { Fabricate(:leader) } + + before { Group.find(Group::AUTO_GROUPS[:trust_level_3]).add(user) } + + before do + plugin.register_summarization_strategy(strategy) + SiteSetting.summarization_strategy = strategy.model + end + + describe "validates params" do + it "does nothing if there is no topic" do + messages = + MessageBus.track_publish("/summaries/topic/#{topic.id}") do + job.execute(topic_id: nil, user_id: user.id) + end + + expect(messages).to be_empty + end + + it "does nothing if there is no user" do + messages = + MessageBus.track_publish("/summaries/topic/#{topic.id}") do + job.execute(topic_id: topic.id, user_id: nil) + end + + expect(messages).to be_empty + end + + it "does nothing if the user is not allowed to see the topic" do + private_topic = Fabricate(:private_message_topic) + + messages = + MessageBus.track_publish("/summaries/topic/#{private_topic.id}") do + job.execute(topic_id: private_topic.id, user_id: user.id) + end + + expect(messages).to be_empty + end + end + + it "publishes updates with a partial summary" do + messages = + MessageBus.track_publish("/summaries/topic/#{topic.id}") do + job.execute(topic_id: topic.id, user_id: user.id) + end + + partial_summary_update = messages.first.data + expect(partial_summary_update[:done]).to eq(false) + expect(partial_summary_update.dig(:topic_summary, :summarized_text)).to eq("dummy") + end + + it "publishes a final update to signal we're done and provide metadata" do + messages = + MessageBus.track_publish("/summaries/topic/#{topic.id}") do + job.execute(topic_id: topic.id, user_id: user.id) + end + + final_update = messages.last.data + expect(final_update[:done]).to eq(true) + + expect(final_update.dig(:topic_summary, :algorithm)).to eq(strategy.model) + expect(final_update.dig(:topic_summary, :outdated)).to eq(false) + expect(final_update.dig(:topic_summary, :can_regenerate)).to eq(true) + expect(final_update.dig(:topic_summary, :new_posts_since_summary)).to be_zero + end + end +end diff --git a/spec/requests/topics_controller_spec.rb b/spec/requests/topics_controller_spec.rb index 9b19ce1243e..04e85e916bd 100644 --- a/spec/requests/topics_controller_spec.rb +++ b/spec/requests/topics_controller_spec.rb @@ -5508,9 +5508,9 @@ RSpec.describe TopicsController do describe "#summary" do fab!(:topic) { Fabricate(:topic) } let(:plugin) { Plugin::Instance.new } + let(:strategy) { DummyCustomSummarization.new({ summary: "dummy", chunks: [] }) } before do - strategy = DummyCustomSummarization.new("dummy") plugin.register_summarization_strategy(strategy) SiteSetting.summarization_strategy = strategy.model end @@ -5536,14 +5536,17 @@ RSpec.describe TopicsController do expect(response.status).to eq(200) summary = response.parsed_body - expect(summary["summary"]).to eq(section.summarized_text) + expect(summary.dig("topic_summary", "summarized_text")).to eq(section.summarized_text) end end context "when the user is a member of an allowlisted group" do fab!(:user) { Fabricate(:leader) } - before { sign_in(user) } + before do + sign_in(user) + Group.find(Group::AUTO_GROUPS[:trust_level_3]).add(user) + end it "returns a 404 if there is no topic" do invalid_topic_id = 999 @@ -5560,6 +5563,20 @@ RSpec.describe TopicsController do expect(response.status).to eq(403) end + + it "returns a summary" do + get "/t/#{topic.id}/strategy-summary.json" + + expect(response.status).to eq(200) + summary = response.parsed_body["topic_summary"] + section = SummarySection.last + + expect(summary["summarized_text"]).to eq(section.summarized_text) + expect(summary["algorithm"]).to eq(strategy.model) + expect(summary["outdated"]).to eq(false) + expect(summary["can_regenerate"]).to eq(true) + expect(summary["new_posts_since_summary"]).to be_zero + end end context "when the user is not a member of an allowlisted group" do @@ -5587,7 +5604,7 @@ RSpec.describe TopicsController do expect(response.status).to eq(200) summary = response.parsed_body - expect(summary["summary"]).to eq(section.summarized_text) + expect(summary.dig("topic_summary", "summarized_text")).to eq(section.summarized_text) end end end diff --git a/spec/services/topic_summarization_spec.rb b/spec/services/topic_summarization_spec.rb index 00ff67c183e..16339bc0738 100644 --- a/spec/services/topic_summarization_spec.rb +++ b/spec/services/topic_summarization_spec.rb @@ -186,5 +186,17 @@ describe TopicSummarization do end end end + + describe "stream partial updates" do + let(:summary) { { summary: "This is the final summary", chunks: [] } } + + it "receives a blk that is passed to the underlying strategy and called with partial summaries" do + partial_result = nil + + summarization.summarize(topic, user) { |partial_summary| partial_result = partial_summary } + + expect(partial_result).to eq(summary[:summary]) + end + end end end diff --git a/spec/support/dummy_custom_summarization.rb b/spec/support/dummy_custom_summarization.rb index 1724a0274f6..b6578b73b80 100644 --- a/spec/support/dummy_custom_summarization.rb +++ b/spec/support/dummy_custom_summarization.rb @@ -22,6 +22,6 @@ class DummyCustomSummarization < Summarization::Base end def summarize(_content) - @summarization_result + @summarization_result.tap { |result| yield(result[:summary]) if block_given? } end end diff --git a/spec/system/topic_summarization_spec.rb b/spec/system/topic_summarization_spec.rb deleted file mode 100644 index 368e3b1d92e..00000000000 --- a/spec/system/topic_summarization_spec.rb +++ /dev/null @@ -1,32 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe "Topic summarization", type: :system, js: true do - fab!(:user) { Fabricate(:admin) } - - # has_summary to force topic map to be present. - fab!(:topic) { Fabricate(:topic, has_summary: true) } - fab!(:post_1) { Fabricate(:post, topic: topic) } - fab!(:post_2) { Fabricate(:post, topic: topic) } - - let(:plugin) { Plugin::Instance.new } - - let(:expected_summary) { "This is a summary" } - let(:summarization_result) { { summary: expected_summary, chunks: [] } } - - before do - sign_in(user) - strategy = DummyCustomSummarization.new(summarization_result) - plugin.register_summarization_strategy(strategy) - SiteSetting.summarization_strategy = strategy.model - end - - it "returns a summary using the selected timeframe" do - visit("/t/-/#{topic.id}") - - find(".topic-strategy-summarization").click - - summary = find(".summary-box .generated-summary p").text - - expect(summary).to eq(expected_summary) - end -end