FEATURE: Stream topic summaries. (#23065)

When we receive the stream parameter, we'll queue a job that periodically publishes partial updates, and after the summarization finishes, a final one with the completed version, plus metadata.

`summary-box` listens to these updates via MessageBus, and updates state accordingly.
This commit is contained in:
Roman Rizzi 2023-08-11 15:08:49 -03:00 committed by GitHub
parent 840bea3c51
commit 7ca5ee6cd2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 370 additions and 127 deletions

View File

@ -1,62 +1,71 @@
{{#if @postAttrs.hasTopRepliesSummary}}
<p>{{html-safe this.topRepliesSummaryInfo}}</p>
{{/if}}
<div class="summarization-buttons">
{{#if @postAttrs.summarizable}}
{{#if this.canCollapseSummary}}
<DButton
@class="btn-primary topic-strategy-summarization"
@action={{this.toggleSummary}}
@title="summary.buttons.hide"
@label="summary.buttons.hide"
@icon="chevron-up"
/>
{{else}}
<DButton
@class="btn-primary topic-strategy-summarization"
@action={{this.generateSummary}}
@translatedLabel={{this.generateSummaryTitle}}
@translatedTitle={{this.generateSummaryTitle}}
@icon={{this.generateSummaryIcon}}
@disabled={{this.loadingSummary}}
/>
{{/if}}
{{/if}}
<div
class="summary-box__container"
{{did-insert this.subscribe}}
{{will-destroy this.unsubscribe}}
>
{{#if @postAttrs.hasTopRepliesSummary}}
<DButton
@class="top-replies"
@action={{this.toggleTopRepliesFilter}}
@translatedTitle={{this.topRepliesTitle}}
@translatedLabel={{this.topRepliesLabel}}
@icon={{this.topRepliesIcon}}
/>
<p>{{html-safe this.topRepliesSummaryInfo}}</p>
{{/if}}
</div>
{{#if this.showSummaryBox}}
<article class="summary-box">
{{#if this.loadingSummary}}
<AiSummarySkeleton />
{{else}}
<div class="generated-summary">{{this.summary}}</div>
<div class="summarized-on">
<p>
{{i18n "summary.summarized_on" date=this.summarizedOn}}
<span>
{{d-icon "info-circle"}}
<DTooltip @placement="top-end">
{{i18n "summary.model_used" model=this.summarizedBy}}
</DTooltip>
</span>
</p>
{{#if this.outdated}}
<p class="outdated-summary">
{{this.outdatedSummaryWarningText}}
</p>
{{/if}}
</div>
<div class="summarization-buttons">
{{#if @postAttrs.summarizable}}
{{#if this.canCollapseSummary}}
<DButton
@class="btn-primary topic-strategy-summarization"
@action={{this.toggleSummary}}
@title="summary.buttons.hide"
@label="summary.buttons.hide"
@icon="chevron-up"
/>
{{else}}
<DButton
@class="btn-primary topic-strategy-summarization"
@action={{this.generateSummary}}
@translatedLabel={{this.generateSummaryTitle}}
@translatedTitle={{this.generateSummaryTitle}}
@icon={{this.generateSummaryIcon}}
@disabled={{this.loadingSummary}}
/>
{{/if}}
{{/if}}
</article>
{{/if}}
{{#if @postAttrs.hasTopRepliesSummary}}
<DButton
@class="top-replies"
@action={{this.toggleTopRepliesFilter}}
@translatedTitle={{this.topRepliesTitle}}
@translatedLabel={{this.topRepliesLabel}}
@icon={{this.topRepliesIcon}}
/>
{{/if}}
</div>
{{#if this.showSummaryBox}}
<article class="summary-box">
{{#if this.loadingSummary}}
<AiSummarySkeleton />
{{else}}
<div class="generated-summary">{{this.summary}}</div>
{{#if this.summarizedOn}}
<div class="summarized-on">
<p>
{{i18n "summary.summarized_on" date=this.summarizedOn}}
<span>
{{d-icon "info-circle"}}
<DTooltip @placement="top-end">
{{i18n "summary.model_used" model=this.summarizedBy}}
</DTooltip>
</span>
</p>
{{#if this.outdated}}
<p class="outdated-summary">
{{this.outdatedSummaryWarningText}}
</p>
{{/if}}
</div>
{{/if}}
{{/if}}
</article>
{{/if}}
</div>

View File

@ -7,11 +7,13 @@ import { ajax } from "discourse/lib/ajax";
import { popupAjaxError } from "discourse/lib/ajax-error";
import { cookAsync } from "discourse/lib/text";
import { shortDateNoYear } from "discourse/lib/formatter";
import { bind } from "discourse-common/utils/decorators";
const MIN_POST_READ_TIME = 4;
export default class SummaryBox extends Component {
@service siteSettings;
@service messageBus;
@tracked summary = "";
@tracked summarizedOn = null;
@ -25,6 +27,40 @@ export default class SummaryBox extends Component {
@tracked canCollapseSummary = false;
@tracked loadingSummary = false;
@bind
subscribe() {
const channel = `/summaries/topic/${this.args.postAttrs.topicId}`;
this.messageBus.subscribe(channel, this._updateSummary);
}
@bind
unsubscribe() {
this.messageBus.unsubscribe("/summaries/topic/*", this._updateSummary);
}
@bind
_updateSummary(update) {
const topicSummary = update.topic_summary;
if (topicSummary.summarized_text) {
cookAsync(topicSummary.summarized_text).then((cooked) => {
this.summary = cooked;
this.loadingSummary = false;
});
}
if (update.done) {
this.summarizedOn = shortDateNoYear(topicSummary.summarized_on);
this.summarizedBy = topicSummary.algorithm;
this.newPostsSinceSummary = topicSummary.new_posts_since_summary;
this.outdated = topicSummary.outdated;
this.newPostsSinceSummary = topicSummary.new_posts_since_summary;
this.canRegenerate = topicSummary.outdated && topicSummary.can_regenerate;
this.canCollapseSummary = !this.canRegenerate;
}
}
get generateSummaryTitle() {
const title = this.canRegenerate
? "summary.buttons.regenerate"
@ -130,27 +166,12 @@ export default class SummaryBox extends Component {
this.loadingSummary = true;
}
let fetchURL = `/t/${this.args.postAttrs.topicId}/strategy-summary`;
let fetchURL = `/t/${this.args.postAttrs.topicId}/strategy-summary?stream=true`;
if (this.canRegenerate) {
fetchURL += "?skip_age_check=true";
fetchURL += "&skip_age_check=true";
}
ajax(fetchURL)
.then((data) => {
cookAsync(data.summary).then((cooked) => {
this.summary = cooked;
this.summarizedOn = shortDateNoYear(data.summarized_on);
this.summarizedBy = data.summarized_by;
this.newPostsSinceSummary = data.new_posts_since_summary;
this.outdated = data.outdated;
this.newPostsSinceSummary = data.new_posts_since_summary;
this.canRegenerate = data.outdated && data.can_regenerate;
this.canCollapseSummary = !this.canRegenerate;
});
})
.catch(popupAjaxError)
.finally(() => (this.loadingSummary = false));
ajax(fetchURL).catch(popupAjaxError);
}
}

View File

@ -0,0 +1,73 @@
import {
acceptance,
exists,
publishToMessageBus,
query,
updateCurrentUser,
} from "discourse/tests/helpers/qunit-helpers";
import { test } from "qunit";
import { click, visit } from "@ember/test-helpers";
import { cloneJSON } from "discourse-common/lib/object";
import topicFixtures from "discourse/tests/fixtures/topic";
acceptance("Topic - Summary", function (needs) {
const currentUserId = 5;
needs.user();
needs.pretender((server, helper) => {
server.get("/t/1.json", () => {
const json = cloneJSON(topicFixtures["/t/130.json"]);
json.id = 1;
json.summarizable = true;
return helper.response(json);
});
server.get("/t/1/strategy-summary", () => {
return helper.response({});
});
});
needs.hooks.beforeEach(() => {
updateCurrentUser({ id: currentUserId });
});
test("displays streamed summary", async function (assert) {
await visit("/t/-/1");
const partialSummary = "This a";
await publishToMessageBus("/summaries/topic/1", {
done: false,
topic_summary: { summarized_text: partialSummary },
});
await click(".topic-strategy-summarization");
assert.strictEqual(
query(".summary-box .generated-summary p").innerText,
partialSummary,
"Updates the summary with a partial result"
);
const finalSummary = "This is a completed summary";
await publishToMessageBus("/summaries/topic/1", {
done: true,
topic_summary: {
summarized_text: finalSummary,
summarized_on: "2023-01-01T04:00:00.000Z",
algorithm: "OpenAI GPT-4",
outdated: false,
new_posts_since_summary: false,
can_regenerate: true,
},
});
assert.strictEqual(
query(".summary-box .generated-summary p").innerText,
finalSummary,
"Updates the summary with a partial result"
);
assert.ok(exists(".summary-box .summarized-on"), "summary metadata exists");
});
});

View File

@ -1185,18 +1185,21 @@ class TopicsController < ApplicationController
opts = params.permit(:skip_age_check)
hijack do
summary = TopicSummarization.new(strategy).summarize(topic, current_user, opts)
if params[:stream]
Jobs.enqueue(
:stream_topic_summary,
topic_id: topic.id,
user_id: current_user.id,
opts: opts.as_json,
)
render json: {
summary: summary.summarized_text,
summarized_on: summary.updated_at,
summarized_by: summary.algorithm,
outdated: summary.outdated,
can_regenerate: Summarization::Base.can_request_summary_for?(current_user),
new_posts_since_summary:
topic.highest_post_number.to_i - summary.content_range&.max.to_i,
}
render json: success_json
else
hijack do
summary = TopicSummarization.new(strategy).summarize(topic, current_user, opts)
render_serialized(summary, TopicSummarySerializer)
end
end
end

View File

@ -0,0 +1,49 @@
# frozen_string_literal: true
module Jobs
class StreamTopicSummary < ::Jobs::Base
sidekiq_options retry: false
def execute(args)
return unless topic = Topic.find_by(id: args[:topic_id])
return unless user = User.find_by(id: args[:user_id])
strategy = Summarization::Base.selected_strategy
return if strategy.nil? || !Summarization::Base.can_see_summary?(topic, user)
guardian = Guardian.new(user)
return unless guardian.can_see?(topic)
opts = args[:opts] || {}
streamed_summary = +""
start = Time.now
summary =
TopicSummarization
.new(strategy)
.summarize(topic, user, opts) do |partial_summary|
streamed_summary << partial_summary
# Throttle updates.
if (Time.now - start > 0.5) || Rails.env.test?
payload = { done: false, topic_summary: { summarized_text: streamed_summary } }
publish_update(topic, user, payload)
start = Time.now
end
end
publish_update(
topic,
user,
TopicSummarySerializer.new(summary, { scope: guardian }).as_json.merge(done: true),
)
end
private
def publish_update(topic, user, payload)
MessageBus.publish("/summaries/topic/#{topic.id}", payload, user_ids: [user.id])
end
end
end

View File

@ -0,0 +1,13 @@
# frozen_string_literal: true
class TopicSummarySerializer < ApplicationSerializer
attributes :summarized_text, :algorithm, :outdated, :can_regenerate, :new_posts_since_summary
def can_regenerate
Summarization::Base.can_request_summary_for?(scope.current_user)
end
def new_posts_since_summary
object.target.highest_post_number.to_i - object.content_range&.end.to_i
end
end

View File

@ -5,7 +5,7 @@ class TopicSummarization
@strategy = strategy
end
def summarize(topic, user, opts = {})
def summarize(topic, user, opts = {}, &on_partial_blk)
existing_summary = SummarySection.find_by(target: topic, meta_section_id: nil)
# Existing summary shouldn't be nil in this scenario because the controller checks its existence.
@ -37,7 +37,7 @@ class TopicSummarization
content[:contents] << { poster: username, id: pn, text: raw }
end
summarization_result = strategy.summarize(content)
summarization_result = strategy.summarize(content, &on_partial_blk)
cache_summary(summarization_result, targets_data.map(&:first), topic)
end

View File

@ -72,6 +72,8 @@ module Summarization
# - content_title (optional): Provides guidance about what the content is about.
# - contents (required): Array of hashes with content to summarize (e.g. [{ poster: "asd", id: 1, text: "This is a text" }])
# All keys are required.
# @param &on_partial_blk { Block - Optional } - If the strategy supports it, the passed block
# will get called with partial summarized text as its generated.
#
# @returns { Hash } - The summarized content, plus chunks if the content couldn't be summarized in one pass. Example:
# {

View File

@ -0,0 +1,76 @@
# frozen_string_literal: true
RSpec.describe Jobs::StreamTopicSummary do
subject(:job) { described_class.new }
describe "#execute" do
fab!(:topic) { Fabricate(:topic) }
let(:plugin) { Plugin::Instance.new }
let(:strategy) { DummyCustomSummarization.new({ summary: "dummy", chunks: [] }) }
fab!(:user) { Fabricate(:leader) }
before { Group.find(Group::AUTO_GROUPS[:trust_level_3]).add(user) }
before do
plugin.register_summarization_strategy(strategy)
SiteSetting.summarization_strategy = strategy.model
end
describe "validates params" do
it "does nothing if there is no topic" do
messages =
MessageBus.track_publish("/summaries/topic/#{topic.id}") do
job.execute(topic_id: nil, user_id: user.id)
end
expect(messages).to be_empty
end
it "does nothing if there is no user" do
messages =
MessageBus.track_publish("/summaries/topic/#{topic.id}") do
job.execute(topic_id: topic.id, user_id: nil)
end
expect(messages).to be_empty
end
it "does nothing if the user is not allowed to see the topic" do
private_topic = Fabricate(:private_message_topic)
messages =
MessageBus.track_publish("/summaries/topic/#{private_topic.id}") do
job.execute(topic_id: private_topic.id, user_id: user.id)
end
expect(messages).to be_empty
end
end
it "publishes updates with a partial summary" do
messages =
MessageBus.track_publish("/summaries/topic/#{topic.id}") do
job.execute(topic_id: topic.id, user_id: user.id)
end
partial_summary_update = messages.first.data
expect(partial_summary_update[:done]).to eq(false)
expect(partial_summary_update.dig(:topic_summary, :summarized_text)).to eq("dummy")
end
it "publishes a final update to signal we're done and provide metadata" do
messages =
MessageBus.track_publish("/summaries/topic/#{topic.id}") do
job.execute(topic_id: topic.id, user_id: user.id)
end
final_update = messages.last.data
expect(final_update[:done]).to eq(true)
expect(final_update.dig(:topic_summary, :algorithm)).to eq(strategy.model)
expect(final_update.dig(:topic_summary, :outdated)).to eq(false)
expect(final_update.dig(:topic_summary, :can_regenerate)).to eq(true)
expect(final_update.dig(:topic_summary, :new_posts_since_summary)).to be_zero
end
end
end

View File

@ -5508,9 +5508,9 @@ RSpec.describe TopicsController do
describe "#summary" do
fab!(:topic) { Fabricate(:topic) }
let(:plugin) { Plugin::Instance.new }
let(:strategy) { DummyCustomSummarization.new({ summary: "dummy", chunks: [] }) }
before do
strategy = DummyCustomSummarization.new("dummy")
plugin.register_summarization_strategy(strategy)
SiteSetting.summarization_strategy = strategy.model
end
@ -5536,14 +5536,17 @@ RSpec.describe TopicsController do
expect(response.status).to eq(200)
summary = response.parsed_body
expect(summary["summary"]).to eq(section.summarized_text)
expect(summary.dig("topic_summary", "summarized_text")).to eq(section.summarized_text)
end
end
context "when the user is a member of an allowlisted group" do
fab!(:user) { Fabricate(:leader) }
before { sign_in(user) }
before do
sign_in(user)
Group.find(Group::AUTO_GROUPS[:trust_level_3]).add(user)
end
it "returns a 404 if there is no topic" do
invalid_topic_id = 999
@ -5560,6 +5563,20 @@ RSpec.describe TopicsController do
expect(response.status).to eq(403)
end
it "returns a summary" do
get "/t/#{topic.id}/strategy-summary.json"
expect(response.status).to eq(200)
summary = response.parsed_body["topic_summary"]
section = SummarySection.last
expect(summary["summarized_text"]).to eq(section.summarized_text)
expect(summary["algorithm"]).to eq(strategy.model)
expect(summary["outdated"]).to eq(false)
expect(summary["can_regenerate"]).to eq(true)
expect(summary["new_posts_since_summary"]).to be_zero
end
end
context "when the user is not a member of an allowlisted group" do
@ -5587,7 +5604,7 @@ RSpec.describe TopicsController do
expect(response.status).to eq(200)
summary = response.parsed_body
expect(summary["summary"]).to eq(section.summarized_text)
expect(summary.dig("topic_summary", "summarized_text")).to eq(section.summarized_text)
end
end
end

View File

@ -186,5 +186,17 @@ describe TopicSummarization do
end
end
end
describe "stream partial updates" do
let(:summary) { { summary: "This is the final summary", chunks: [] } }
it "receives a blk that is passed to the underlying strategy and called with partial summaries" do
partial_result = nil
summarization.summarize(topic, user) { |partial_summary| partial_result = partial_summary }
expect(partial_result).to eq(summary[:summary])
end
end
end
end

View File

@ -22,6 +22,6 @@ class DummyCustomSummarization < Summarization::Base
end
def summarize(_content)
@summarization_result
@summarization_result.tap { |result| yield(result[:summary]) if block_given? }
end
end

View File

@ -1,32 +0,0 @@
# frozen_string_literal: true
RSpec.describe "Topic summarization", type: :system, js: true do
fab!(:user) { Fabricate(:admin) }
# has_summary to force topic map to be present.
fab!(:topic) { Fabricate(:topic, has_summary: true) }
fab!(:post_1) { Fabricate(:post, topic: topic) }
fab!(:post_2) { Fabricate(:post, topic: topic) }
let(:plugin) { Plugin::Instance.new }
let(:expected_summary) { "This is a summary" }
let(:summarization_result) { { summary: expected_summary, chunks: [] } }
before do
sign_in(user)
strategy = DummyCustomSummarization.new(summarization_result)
plugin.register_summarization_strategy(strategy)
SiteSetting.summarization_strategy = strategy.model
end
it "returns a summary using the selected timeframe" do
visit("/t/-/#{topic.id}")
find(".topic-strategy-summarization").click
summary = find(".summary-box .generated-summary p").text
expect(summary).to eq(expected_summary)
end
end