FEATURE: Inline topic summary. Cached version accessible to everyone. (#22551)

* FEATURE:  Inline topic summary. Cached version accessible to everyone.

Anons and non-members of the `custom_summarization_allowed_groups_map` groups can see cached summaries for any accessible topic. After the first 12 hours and if the posts to summarize have changed, allowed users clicking on the button will automatically re-generate it.

* Ensure chat summaries work and prevent model hallucinations when there are no messages.
This commit is contained in:
Roman Rizzi 2023-07-12 11:21:51 -03:00 committed by GitHub
parent 0b8fcb7c72
commit 61aeb2da90
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
27 changed files with 378 additions and 156 deletions

View File

@ -8,7 +8,6 @@ import offsetCalculator from "discourse/lib/offset-calculator";
import { inject as service } from "@ember/service";
import { bind } from "discourse-common/utils/decorators";
import domUtils from "discourse-common/utils/dom-utils";
import showModal from "discourse/lib/show-modal";
const DEBOUNCE_DELAY = 50;
@ -269,12 +268,6 @@ export default MountWidget.extend({
this.screenTrack.setOnscreen(onscreenPostNumbers, readPostNumbers);
},
showSummary() {
showModal("topic-summary").setProperties({
topicId: this.posts.objectAt(0).topic_id,
});
},
_scrollTriggered() {
scheduleOnce("afterRender", this, this.scrolled);
},

View File

@ -1,14 +0,0 @@
<DModalBody @title="summary.strategy.title">
<div class="topic-summary" {{did-insert this.summarize}}>
<ConditionalLoadingSpinner @condition={{this.loading}} />
{{#unless this.loading}}
<p class="summary-area">{{this.summary}}</p>
{{/unless}}
</div>
</DModalBody>
<div class="modal-footer">
<DModalCancel @close={{route-action "closeModal"}} />
</div>

View File

@ -1,28 +0,0 @@
import Component from "@glimmer/component";
import { tracked } from "@glimmer/tracking";
import { ajax } from "discourse/lib/ajax";
import { popupAjaxError } from "discourse/lib/ajax-error";
import { action } from "@ember/object";
import { schedule } from "@ember/runloop";
import { cookAsync } from "discourse/lib/text";
export default class TopicSummary extends Component {
@tracked loading = false;
@tracked summary = null;
@action
summarize() {
schedule("afterRender", () => {
this.loading = true;
ajax(`/t/${this.args.topicId}/strategy-summary`)
.then((data) => {
cookAsync(data.summary).then((cooked) => {
this.summary = cooked;
});
})
.catch(popupAjaxError)
.finally(() => (this.loading = false));
});
}
}

View File

@ -7,7 +7,7 @@ export function shortDate(date) {
return moment(date).format(I18n.t("dates.medium.date_year"));
}
function shortDateNoYear(date) {
export function shortDateNoYear(date) {
return moment(date).format(I18n.t("dates.tiny.date_month"));
}

View File

@ -219,6 +219,7 @@ export default function transformPost(
postAtts.topicSummaryEnabled = postStream.summary;
postAtts.topicWordCount = topic.word_count;
postAtts.hasTopRepliesSummary = topic.has_summary;
postAtts.summarizable = topic.summarizable;
}
if (postAtts.isDeleted) {

View File

@ -1,4 +0,0 @@
<TopicSummary
@topicId={{this.topicId}}
@closeModal={{route-action "closeModal"}}
/>

View File

@ -0,0 +1,65 @@
import { createWidget } from "discourse/widgets/widget";
import hbs from "discourse/widgets/hbs-compiler";
import { ajax } from "discourse/lib/ajax";
import { popupAjaxError } from "discourse/lib/ajax-error";
import { cookAsync } from "discourse/lib/text";
import RawHtml from "discourse/widgets/raw-html";
import I18n from "I18n";
import { shortDateNoYear } from "discourse/lib/formatter";
import { h } from "virtual-dom";
createWidget("summary-skeleton", {
tagName: "section.placeholder-summary",
template: hbs`
<div class="placeholder-summary-text placeholder-animation"></div>
<div class="placeholder-summary-text placeholder-animation"></div>
<div class="placeholder-summary-text placeholder-animation"></div>
<div class="placeholder-summary-text">{{transformed.in_progress_label}}</div>
`,
transform() {
return { in_progress_label: I18n.t("summary.in_progress") };
},
});
export default createWidget("summary-box", {
tagName: "article.summary-box",
buildKey: (attrs) => `summary-box-${attrs.topicId}`,
defaultState() {
return { summary: "" };
},
html(attrs, state) {
const html = [];
if (state.summary) {
html.push(new RawHtml({ html: state.summary }));
html.push(
h(
"div.summarized-on",
{},
I18n.t("summary.summarized_on", { date: state.summarized_on })
)
);
} else {
html.push(this.attach("summary-skeleton"));
this.fetchSummary(attrs.topicId);
}
return html;
},
fetchSummary(topicId) {
ajax(`/t/${topicId}/strategy-summary`)
.then((data) => {
this.state.summarized_on = shortDateNoYear(data.summarized_on);
cookAsync(data.summary).then((cooked) => {
this.state.summary = cooked.string;
this.scheduleRerender();
});
})
.catch(popupAjaxError);
},
});

View File

@ -34,15 +34,36 @@ createWidget("toggle-summary-description", {
export default createWidget("toggle-topic-summary", {
tagName: "section.information.toggle-summary",
html(attrs) {
buildKey: (attrs) => `toggle-topic-summary-${attrs.topicId}`,
defaultState() {
return { expandSummaryBox: false };
},
html(attrs, state) {
const html = [];
const summarizationButtons = [];
if (attrs.summarizable) {
const title = I18n.t("summary.strategy.button_title");
summarizationButtons.push(
this.attach("button", {
className: "btn btn-primary topic-strategy-summarization",
icon: "magic",
translatedTitle: title,
translatedLabel: title,
action: "expandSummaryBox",
disabled: state.expandSummaryBox,
})
);
}
if (attrs.hasTopRepliesSummary) {
html.push(this.attach("toggle-summary-description", attrs));
summarizationButtons.push(
this.attach("button", {
className: "btn btn-primary",
className: "btn top-replies",
icon: attrs.topicSummaryEnabled ? null : "layer-group",
title: attrs.topicSummaryEnabled ? null : "summary.short_title",
label: attrs.topicSummaryEnabled
@ -53,22 +74,18 @@ export default createWidget("toggle-topic-summary", {
);
}
if (attrs.includeSummary) {
const title = I18n.t("summary.strategy.button_title");
summarizationButtons.push(
this.attach("button", {
className: "btn btn-primary topic-strategy-summarization",
icon: "magic",
translatedTitle: title,
translatedLabel: title,
action: "showSummary",
})
);
if (summarizationButtons) {
html.push(h("div.summarization-buttons", summarizationButtons));
}
html.push(h("div.summarization-buttons", summarizationButtons));
if (state.expandSummaryBox) {
html.push(this.attach("summary-box", attrs));
}
return html;
},
expandSummaryBox() {
this.state.expandSummaryBox = true;
},
});

View File

@ -386,8 +386,7 @@ export default createWidget("topic-map", {
contents.push(this.attach("topic-map-expanded", attrs));
}
if (attrs.hasTopRepliesSummary || this._includesSummary()) {
attrs.includeSummary = this._includesSummary();
if (attrs.hasTopRepliesSummary || attrs.summarizable) {
contents.push(this.attach("toggle-topic-summary", attrs));
}
@ -400,19 +399,4 @@ export default createWidget("topic-map", {
toggleMap() {
this.state.collapsed = !this.state.collapsed;
},
_includesSummary() {
const customSummaryAllowedGroups =
this.siteSettings.custom_summarization_allowed_groups
.split("|")
.map(parseInt);
return (
this.siteSettings.summarization_strategy &&
this.currentUser &&
this.currentUser.groups.some((g) =>
customSummaryAllowedGroups.includes(g.id)
)
);
},
});

View File

@ -843,6 +843,21 @@ aside.quote {
.summarization-buttons {
display: flex;
}
.placeholder-summary {
padding-top: 0.5em;
}
.placeholder-summary-text {
display: inline-block;
height: 1em;
margin-top: 0.6em;
width: 100%;
}
.summarized-on {
text-align: right;
}
}
}

View File

@ -326,9 +326,13 @@ pre.codeblock-buttons:hover {
}
}
.toggle-summary .summarization-buttons .topic-strategy-summarization {
.toggle-summary .summarization-buttons .top-replies {
margin-left: 10px;
}
.toggle-summary .summary-box {
margin-top: 10px;
}
}
#topic-footer-buttons {

View File

@ -204,7 +204,7 @@ a.reply-to-tab {
.summarization-buttons {
flex-direction: column;
.topic-strategy-summarization {
.top-replies {
margin-top: 10px;
}
}

View File

@ -30,7 +30,6 @@ class TopicsController < ApplicationController
publish
reset_bump_date
set_slow_mode
summary
]
before_action :consider_user_for_promotion, only: :show
@ -1172,13 +1171,17 @@ class TopicsController < ApplicationController
topic = Topic.find(params[:topic_id])
guardian.ensure_can_see!(topic)
strategy = Summarization::Base.selected_strategy
raise Discourse::NotFound.new unless strategy
raise Discourse::InvalidAccess unless strategy.can_request_summaries?(current_user)
if strategy.nil? || !Summarization::Base.can_see_summary?(topic, current_user)
raise Discourse::NotFound
end
RateLimiter.new(current_user, "summary", 6, 5.minutes).performed!
RateLimiter.new(current_user, "summary", 6, 5.minutes).performed! if current_user
hijack { render json: { summary: TopicSummarization.new(strategy).summarize(topic) } }
hijack do
summary = TopicSummarization.new(strategy).summarize(topic, current_user)
render json: { summary: summary&.summarized_text, summarized_on: summary&.updated_at }
end
end
private

View File

@ -78,6 +78,7 @@ class TopicViewSerializer < ApplicationSerializer
:user_last_posted_at,
:is_shared_draft,
:slow_mode_enabled_until,
:summarizable,
)
has_one :details, serializer: TopicViewDetailsSerializer, root: false, embed: :objects
@ -311,4 +312,8 @@ class TopicViewSerializer < ApplicationSerializer
def slow_mode_enabled_until
object.topic.slow_mode_topic_timer&.execute_at
end
def summarizable
object.summarizable?
end
end

View File

@ -22,6 +22,7 @@ class WebHookTopicViewSerializer < TopicViewSerializer
slow_mode_seconds
slow_mode_enabled_until
bookmarks
summarizable
].each { |attr| define_method("include_#{attr}?") { false } }
def include_show_read_indicator?

View File

@ -5,10 +5,16 @@ class TopicSummarization
@strategy = strategy
end
def summarize(topic)
DistributedMutex.synchronize("toppic_summarization_#{topic.id}") do
def summarize(topic, user)
existing_summary = SummarySection.find_by(target: topic, meta_section_id: nil)
return existing_summary.summarized_text if existing_summary
# For users without permissions to generate a summary, we return what we have cached.
# Existing summary shouldn't be nil in this scenario because the controller checks its existence.
return existing_summary if !user || !Summarization::Base.can_request_summary_for?(user)
return existing_summary if existing_summary && fresh?(existing_summary, topic)
delete_cached_summaries_of(topic) if existing_summary
content = {
resource_path: "#{Discourse.base_path}/t/-/#{topic.id}",
@ -24,14 +30,11 @@ class TopicSummarization
summarization_result = strategy.summarize(content)
cached_summary(summarization_result, targets_data.map(&:first), topic)
summarization_result[:summary]
end
cache_summary(summarization_result, targets_data.map(&:first), topic)
end
def summary_targets(topic)
topic.has_summary? ? best_replies(topic) : pick_selection(topic)
@targets ||= topic.has_summary? ? best_replies(topic) : pick_selection(topic)
end
private
@ -66,7 +69,18 @@ class TopicSummarization
.order(:post_number)
end
def cached_summary(result, post_numbers, topic)
def delete_cached_summaries_of(topic)
SummarySection.where(target: topic).destroy_all
end
def fresh?(summary, topic)
return true if summary.created_at > 12.hours.ago
latest_post_to_summarize = summary_targets(topic).last.post_number
latest_post_to_summarize <= summary.content_range.to_a.last
end
def cache_summary(result, post_numbers, topic)
main_summary =
SummarySection.create!(
target: topic,
@ -86,5 +100,7 @@ class TopicSummarization
meta_section_id: main_summary.id,
)
end
main_summary
end
end

View File

@ -2040,7 +2040,9 @@ en:
refresh_page: "Refresh page"
summary:
enabled_description: "You're viewing a summary of this topic: the most interesting posts as determined by the community."
in_progress: "Summary in progress..."
summarized_on: "Summarized on %{date}"
enabled_description: "You're viewing this topic top replies: the most interesting posts as determined by the community."
description:
one: "There is <b>%{count}</b> reply."
other: "There are <b>%{count}</b> replies."
@ -2057,8 +2059,8 @@ en:
enable: "Show top replies"
disable: "Show All Posts"
short_label: "Summarize"
short_title: "Show a summary of this topic: the most interesting posts as determined by the community"
short_label: "Top replies"
short_title: "Show this topic top replies: the most interesting posts as determined by the community"
strategy:
button_title: "Summarize this topic"
title: "Topic summary"
@ -3633,7 +3635,7 @@ en:
filtered_replies:
viewing_posts_by: "Viewing %{post_count} posts by"
viewing_subset: "Some replies are collapsed"
viewing_summary: "Viewing a summary of this topic"
viewing_summary: "Viewing this topic top replies"
post_number: "%{username}, post #%{post_number}"
show_all: "Show all"

View File

@ -21,15 +21,26 @@ module Summarization
find_strategy(SiteSetting.summarization_strategy)
end
def can_see_summary?(target, user)
return false if SiteSetting.summarization_strategy.blank?
has_cached_summary = SummarySection.exists?(target: target, meta_section_id: nil)
return has_cached_summary if user.nil?
has_cached_summary || can_request_summary_for?(user)
end
def can_request_summaries?(user)
def can_request_summary_for?(user)
return false unless user
user_group_ids = user.group_ids
SiteSetting.custom_summarization_allowed_groups_map.any? do |group_id|
user_group_ids.include?(group_id)
end
end
end
# Some strategies could require other conditions to work correctly,
# like site settings.

View File

@ -722,6 +722,10 @@ class TopicView
end
end
def summarizable?
Summarization::Base.can_see_summary?(@topic, @user)
end
protected
def read_posts_set

View File

@ -12,22 +12,29 @@ class Chat::Api::SummariesController < Chat::ApiController
strategy = Summarization::Base.selected_strategy
raise Discourse::NotFound.new unless strategy
raise Discourse::InvalidAccess unless strategy.can_request_summaries?(current_user)
raise Discourse::InvalidAccess unless Summarization::Base.can_request_summary_for?(current_user)
RateLimiter.new(current_user, "channel_summary", 6, 5.minutes).performed!
hijack do
content =
channel
content = { content_title: channel.name }
content[:contents] = channel
.chat_messages
.where("chat_messages.created_at > ?", since.hours.ago)
.includes(:user)
.order(created_at: :asc)
.pluck(:username_lower, :message)
.map { "#{_1}: #{_2}" }
.join("\n")
.pluck(:id, :username_lower, :message)
.map { { id: _1, poster: _2, text: _3 } }
render json: { summary: strategy.summarize(content).dig(:summary) }
summarized_text =
if content[:contents].empty?
I18n.t("chat.summaries.no_targets")
else
strategy.summarize(content).dig(:summary)
end
render json: { summary: summarized_text }
end
end
end

View File

@ -27,6 +27,7 @@ export default class ChatModalChannelSummary extends Component {
@action
summarize(since) {
this.sinceHours = since;
this.loading = true;
if (this.availableSummaries[since]) {

View File

@ -159,6 +159,9 @@ en:
one: "and %{count} other"
other: "and %{count} others"
summaries:
no_targets: "There were no messages during the selected period."
discourse_push_notifications:
popup:
chat_mention:

View File

@ -1,24 +1,67 @@
# frozen_string_literal: true
describe Summarization::Base do
subject(:summarization) { described_class.new }
fab!(:user) { Fabricate(:user) }
fab!(:group) { Fabricate(:group) }
fab!(:topic) { Fabricate(:topic) }
before { group.add(user) }
let(:plugin) { Plugin::Instance.new }
describe "#can_request_summaries?" do
it "returns true if the user group is present in the custom_summarization_allowed_groups_map setting" do
SiteSetting.custom_summarization_allowed_groups = group.id
before do
group.add(user)
expect(summarization.can_request_summaries?(user)).to eq(true)
strategy = DummyCustomSummarization.new("dummy")
plugin.register_summarization_strategy(strategy)
SiteSetting.summarization_strategy = strategy.model
end
it "returns false if the user group is not present in the custom_summarization_allowed_groups_map setting" do
describe "#can_see_summary?" do
context "when the user cannot generate a summary" do
before { SiteSetting.custom_summarization_allowed_groups = "" }
it "returns false" do
SiteSetting.custom_summarization_allowed_groups = ""
expect(summarization.can_request_summaries?(user)).to eq(false)
expect(described_class.can_see_summary?(topic, user)).to eq(false)
end
it "returns true if there is a cached summary" do
SummarySection.create!(
target: topic,
summarized_text: "test",
original_content_sha: "123",
algorithm: "test",
meta_section_id: nil,
)
expect(described_class.can_see_summary?(topic, user)).to eq(true)
end
end
context "when the user can generate a summary" do
before { SiteSetting.custom_summarization_allowed_groups = group.id }
it "returns true if the user group is present in the custom_summarization_allowed_groups_map setting" do
expect(described_class.can_see_summary?(topic, user)).to eq(true)
end
end
context "when there is no user" do
it "returns false for anons" do
expect(described_class.can_see_summary?(topic, nil)).to eq(false)
end
it "returns true for anons when there is a cached summary" do
SummarySection.create!(
target: topic,
summarized_text: "test",
original_content_sha: "123",
algorithm: "test",
meta_section_id: nil,
)
expect(described_class.can_see_summary?(topic, nil)).to eq(true)
end
end
end
end

View File

@ -715,6 +715,9 @@
"null"
]
},
"summarizable": {
"type": "boolean"
},
"details": {
"type": "object",
"additionalProperties": false,
@ -985,6 +988,7 @@
"show_read_indicator",
"thumbnails",
"slow_mode_enabled_until",
"summarizable",
"details"
]
}

View File

@ -5469,10 +5469,27 @@ RSpec.describe TopicsController do
end
context "for anons" do
it "returns a 404" do
it "returns a 404 if there is no cached summary" do
get "/t/#{topic.id}/strategy-summary.json"
expect(response.status).to eq(403)
expect(response.status).to eq(404)
end
it "returns a cached summary" do
section =
SummarySection.create!(
target: topic,
summarized_text: "test",
algorithm: "test",
original_content_sha: "test",
)
get "/t/#{topic.id}/strategy-summary.json"
expect(response.status).to eq(200)
summary = response.parsed_body
expect(summary["summary"]).to eq(section.summarized_text)
end
end
@ -5498,15 +5515,32 @@ RSpec.describe TopicsController do
end
end
context "when the user is not a member of an allowlited group" do
context "when the user is not a member of an allowlisted group" do
fab!(:user) { Fabricate(:user) }
before { sign_in(user) }
it "return a 404" do
it "return a 404 if there is no cached summary" do
get "/t/#{topic.id}/strategy-summary.json"
expect(response.status).to eq(403)
expect(response.status).to eq(404)
end
it "returns a cached summary" do
section =
SummarySection.create!(
target: topic,
summarized_text: "test",
algorithm: "test",
original_content_sha: "test",
)
get "/t/#{topic.id}/strategy-summary.json"
expect(response.status).to eq(200)
summary = response.parsed_body
expect(summary["summary"]).to eq(section.summarized_text)
end
end
end

View File

@ -2,6 +2,7 @@
describe TopicSummarization do
fab!(:topic) { Fabricate(:topic) }
fab!(:user) { Fabricate(:admin) }
fab!(:post_1) { Fabricate(:post, topic: topic) }
fab!(:post_2) { Fabricate(:post, topic: topic) }
@ -79,24 +80,25 @@ describe TopicSummarization do
let(:summary) { { summary: "This is the final summary", chunks: [] } }
it "caches the summary" do
summarized_text = summarization.summarize(topic)
section = summarization.summarize(topic, user)
expect(summarized_text).to eq(summary[:summary])
expect(section.summarized_text).to eq(summary[:summary])
assert_summary_is_cached(topic, summary)
end
it "returns the cached version in subsequent calls" do
summarization.summarize(topic)
summarization.summarize(topic, user)
cached_summary_text = "This is a cached summary"
cached_summary =
SummarySection.find_by(target: topic, meta_section_id: nil).update!(
summarized_text: cached_summary_text,
updated_at: 24.hours.ago,
)
summarized_text = summarization.summarize(topic)
expect(summarized_text).to eq(cached_summary_text)
section = summarization.summarize(topic, user)
expect(section.summarized_text).to eq(cached_summary_text)
end
end
@ -112,14 +114,64 @@ describe TopicSummarization do
end
it "caches the summary and each chunk" do
summarized_text = summarization.summarize(topic)
section = summarization.summarize(topic, user)
expect(summarized_text).to eq(summary[:summary])
expect(section.summarized_text).to eq(summary[:summary])
assert_summary_is_cached(topic, summary)
summary[:chunks].each { |c| assert_chunk_is_cached(topic, c) }
end
end
describe "invalidating cached summaries" do
let(:cached_text) { "This is a cached summary" }
let(:summarized_text) { "This is the final summary" }
let(:summary) do
{
summary: summarized_text,
chunks: [
{ ids: [topic.first_post.post_number], summary: "this is the first chunk" },
{ ids: [post_1.post_number, post_2.post_number], summary: "this is the second chunk" },
],
}
end
def cached_summary
SummarySection.find_by(target: topic, meta_section_id: nil)
end
before do
summarization.summarize(topic, user)
cached_summary.update!(summarized_text: cached_text, created_at: 24.hours.ago)
end
context "when the summary targets changed" do
before { cached_summary.update!(content_range: (1..1)) }
it "deletes existing summaries and create a new one" do
section = summarization.summarize(topic, user)
expect(section.summarized_text).to eq(summarized_text)
end
it "does nothing if the last summary is less than 12 hours old" do
cached_summary.update!(created_at: 6.hours.ago)
section = summarization.summarize(topic, user)
expect(section.summarized_text).to eq(cached_text)
end
end
context "when the summary targets are still the same" do
it "doesn't create a new summary" do
section = summarization.summarize(topic, user)
expect(section.summarized_text).to eq(cached_text)
end
end
end
end
end

View File

@ -10,7 +10,8 @@ RSpec.describe "Topic summarization", type: :system, js: true do
let(:plugin) { Plugin::Instance.new }
let(:summarization_result) { { summary: "This is a summary", chunks: [] } }
let(:expected_summary) { "This is a summary" }
let(:summarization_result) { { summary: expected_summary, chunks: [] } }
before do
sign_in(user)
@ -22,8 +23,10 @@ RSpec.describe "Topic summarization", type: :system, js: true do
it "returns a summary using the selected timeframe" do
visit("/t/-/#{topic.id}")
find(".topic-strategy-summarization", wait: 5).click
find(".topic-strategy-summarization").click
expect(page.has_css?(".topic-summary-modal", wait: 5)).to eq(true)
summary = find(".summary-box p").text
expect(summary).to eq(expected_summary)
end
end