DEV: Add summarization logic from core (#658)

This commit is contained in:
Keegan George 2024-07-02 08:51:59 -07:00 committed by GitHub
parent c352cc5ba3
commit 1b0ba9197c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
36 changed files with 1987 additions and 206 deletions

View File

@ -0,0 +1,49 @@
# frozen_string_literal: true
module DiscourseAi
module Summarization
class ChatSummaryController < ::Chat::ApiController
requires_plugin ::DiscourseAi::PLUGIN_NAME
requires_plugin ::Chat::PLUGIN_NAME
VALID_SINCE_VALUES = [1, 3, 6, 12, 24, 72, 168]
def show
since = params[:since].to_i
raise Discourse::InvalidParameters.new(:since) if !VALID_SINCE_VALUES.include?(since)
channel = ::Chat::Channel.find(params[:channel_id])
guardian.ensure_can_join_chat_channel!(channel)
strategy = DiscourseAi::Summarization::Models::Base.selected_strategy
raise Discourse::NotFound.new unless strategy
unless DiscourseAi::Summarization::Models::Base.can_request_summary_for?(current_user)
raise Discourse::InvalidAccess
end
RateLimiter.new(current_user, "channel_summary", 6, 5.minutes).performed!
hijack do
content = { content_title: channel.name }
content[:contents] = channel
.chat_messages
.where("chat_messages.created_at > ?", since.hours.ago)
.includes(:user)
.order(created_at: :asc)
.pluck(:id, :username_lower, :message)
.map { { id: _1, poster: _2, text: _3 } }
summarized_text =
if content[:contents].empty?
I18n.t("discourse_ai.summarization.chat.no_targets")
else
strategy.summarize(content, current_user).dig(:summary)
end
render json: { summary: summarized_text }
end
end
end
end
end

View File

@ -0,0 +1,42 @@
# frozen_string_literal: true
module DiscourseAi
module Summarization
class SummaryController < ::ApplicationController
requires_plugin ::DiscourseAi::PLUGIN_NAME
def show
topic = Topic.find(params[:topic_id])
guardian.ensure_can_see!(topic)
strategy = DiscourseAi::Summarization::Models::Base.selected_strategy
if strategy.nil? ||
!DiscourseAi::Summarization::Models::Base.can_see_summary?(topic, current_user)
raise Discourse::NotFound
end
RateLimiter.new(current_user, "summary", 6, 5.minutes).performed! if current_user
opts = params.permit(:skip_age_check)
if params[:stream] && current_user
Jobs.enqueue(
:stream_topic_ai_summary,
topic_id: topic.id,
user_id: current_user.id,
opts: opts.as_json,
)
render json: success_json
else
hijack do
summary =
DiscourseAi::TopicSummarization.new(strategy).summarize(topic, current_user, opts)
render_serialized(summary, AiTopicSummarySerializer)
end
end
end
end
end
end

View File

@ -0,0 +1,52 @@
# frozen_string_literal: true
module Jobs
class StreamTopicAiSummary < ::Jobs::Base
sidekiq_options retry: false
def execute(args)
return unless topic = Topic.find_by(id: args[:topic_id])
return unless user = User.find_by(id: args[:user_id])
strategy = DiscourseAi::Summarization::Models::Base.selected_strategy
if strategy.nil? || !DiscourseAi::Summarization::Models::Base.can_see_summary?(topic, user)
return
end
guardian = Guardian.new(user)
return unless guardian.can_see?(topic)
opts = args[:opts] || {}
streamed_summary = +""
start = Time.now
summary =
DiscourseAi::TopicSummarization
.new(strategy)
.summarize(topic, user, opts) do |partial_summary|
streamed_summary << partial_summary
# Throttle updates.
if (Time.now - start > 0.5) || Rails.env.test?
payload = { done: false, ai_topic_summary: { summarized_text: streamed_summary } }
publish_update(topic, user, payload)
start = Time.now
end
end
publish_update(
topic,
user,
AiTopicSummarySerializer.new(summary, { scope: guardian }).as_json.merge(done: true),
)
end
private
def publish_update(topic, user, payload)
MessageBus.publish("/discourse-ai/summaries/topic/#{topic.id}", payload, user_ids: [user.id])
end
end
end

28
app/models/ai_summary.rb Normal file
View File

@ -0,0 +1,28 @@
# frozen_string_literal: true
class AiSummary < ActiveRecord::Base
belongs_to :target, polymorphic: true
def mark_as_outdated
@outdated = true
end
def outdated
@outdated || false
end
end
# == Schema Information
#
# Table name: ai_summaries
#
# id :bigint not null, primary key
# target_id :integer not null
# target_type :string not null
# content_range :int4range
# summarized_text :string not null
# original_content_sha :string not null
# algorithm :string not null
# created_at :datetime not null
# updated_at :datetime not null
#

View File

@ -0,0 +1,22 @@
# frozen_string_literal: true
class AiTopicSummarySerializer < ApplicationSerializer
attributes :summarized_text, :algorithm, :outdated, :can_regenerate, :new_posts_since_summary
def can_regenerate
DiscourseAi::Summarization::Models::Base.can_request_summary_for?(scope.current_user)
end
def new_posts_since_summary
# Postgres uses discrete range types for int4range, which means
# (1..2) is stored as (1...3).
#
# We use Range#max to work around this, which in the case above always returns 2.
# Be careful with using Range#end here, it could lead to unexpected results as:
#
# (1..2).end => 2
# (1...3).end => 3
object.target.highest_post_number.to_i - object.content_range&.max.to_i
end
end

View File

@ -0,0 +1,120 @@
# frozen_string_literal: true
module DiscourseAi
class TopicSummarization
def initialize(strategy)
@strategy = strategy
end
def summarize(topic, user, opts = {}, &on_partial_blk)
existing_summary = AiSummary.find_by(target: topic)
# Existing summary shouldn't be nil in this scenario because the controller checks its existence.
return if !user && !existing_summary
targets_data = summary_targets(topic).pluck(:post_number, :raw, :username)
current_topic_sha = build_sha(targets_data.map(&:first))
can_summarize = DiscourseAi::Summarization::Models::Base.can_request_summary_for?(user)
if use_cached?(existing_summary, can_summarize, current_topic_sha, !!opts[:skip_age_check])
# It's important that we signal a cached summary is outdated
existing_summary.mark_as_outdated if new_targets?(existing_summary, current_topic_sha)
return existing_summary
end
delete_cached_summaries_of(topic) if existing_summary
content = {
resource_path: "#{Discourse.base_path}/t/-/#{topic.id}",
content_title: topic.title,
contents: [],
}
targets_data.map do |(pn, raw, username)|
raw_text = raw
if pn == 1 && topic.topic_embed&.embed_content_cache.present?
raw_text = topic.topic_embed&.embed_content_cache
end
content[:contents] << { poster: username, id: pn, text: raw_text }
end
summarization_result = strategy.summarize(content, user, &on_partial_blk)
cache_summary(summarization_result, targets_data.map(&:first), topic)
end
def summary_targets(topic)
topic.has_summary? ? best_replies(topic) : pick_selection(topic)
end
private
attr_reader :strategy
def best_replies(topic)
Post
.summary(topic.id)
.where("post_type = ?", Post.types[:regular])
.where("NOT hidden")
.joins(:user)
.order(:post_number)
end
def pick_selection(topic)
posts =
Post
.where(topic_id: topic.id)
.where("post_type = ?", Post.types[:regular])
.where("NOT hidden")
.order(:post_number)
post_numbers = posts.limit(5).pluck(:post_number)
post_numbers += posts.reorder("posts.score desc").limit(50).pluck(:post_number)
post_numbers += posts.reorder("post_number desc").limit(5).pluck(:post_number)
Post
.where(topic_id: topic.id)
.joins(:user)
.where("post_number in (?)", post_numbers)
.order(:post_number)
end
def delete_cached_summaries_of(topic)
AiSummary.where(target: topic).destroy_all
end
# For users without permissions to generate a summary or fresh summaries, we return what we have cached.
def use_cached?(existing_summary, can_summarize, current_sha, skip_age_check)
existing_summary &&
!(
can_summarize && new_targets?(existing_summary, current_sha) &&
(skip_age_check || existing_summary.created_at < 1.hour.ago)
)
end
def new_targets?(summary, current_sha)
summary.original_content_sha != current_sha
end
def cache_summary(result, post_numbers, topic)
cached_summary =
AiSummary.create!(
target: topic,
algorithm: strategy.display_name,
content_range: (post_numbers.first..post_numbers.last),
summarized_text: result[:summary],
original_content_sha: build_sha(post_numbers),
)
cached_summary
end
def build_sha(ids)
Digest::SHA256.hexdigest(ids.join)
end
end
end

View File

@ -0,0 +1,129 @@
import Component from "@glimmer/component";
import { tracked } from "@glimmer/tracking";
import { fn } from "@ember/helper";
import { action } from "@ember/object";
import didInsert from "@ember/render-modifiers/modifiers/did-insert";
import didUpdate from "@ember/render-modifiers/modifiers/did-update";
import willDestroy from "@ember/render-modifiers/modifiers/will-destroy";
import { cancel } from "@ember/runloop";
import concatClass from "discourse/helpers/concat-class";
import i18n from "discourse-common/helpers/i18n";
import discourseLater from "discourse-common/lib/later";
class Block {
@tracked show = false;
@tracked shown = false;
@tracked blinking = false;
constructor(args = {}) {
this.show = args.show ?? false;
this.shown = args.shown ?? false;
}
}
const BLOCKS_SIZE = 20; // changing this requires to change css accordingly
export default class AiSummarySkeleton extends Component {
blocks = [...Array.from({ length: BLOCKS_SIZE }, () => new Block())];
#nextBlockBlinkingTimer;
#blockBlinkingTimer;
#blockShownTimer;
@action
setupAnimation() {
this.blocks.firstObject.show = true;
this.blocks.firstObject.shown = true;
}
@action
onBlinking(block) {
if (!block.blinking) {
return;
}
block.show = false;
this.#nextBlockBlinkingTimer = discourseLater(
this,
() => {
this.#nextBlock(block).blinking = true;
},
250
);
this.#blockBlinkingTimer = discourseLater(
this,
() => {
block.blinking = false;
},
500
);
}
@action
onShowing(block) {
if (!block.show) {
return;
}
this.#blockShownTimer = discourseLater(
this,
() => {
this.#nextBlock(block).show = true;
this.#nextBlock(block).shown = true;
if (this.blocks.lastObject === block) {
this.blocks.firstObject.blinking = true;
}
},
250
);
}
@action
teardownAnimation() {
cancel(this.#blockShownTimer);
cancel(this.#nextBlockBlinkingTimer);
cancel(this.#blockBlinkingTimer);
}
#nextBlock(currentBlock) {
if (currentBlock === this.blocks.lastObject) {
return this.blocks.firstObject;
} else {
return this.blocks.objectAt(this.blocks.indexOf(currentBlock) + 1);
}
}
<template>
<div class="ai-summary__container">
<ul class="ai-summary__list" {{didInsert this.setupAnimation}}>
{{#each this.blocks as |block|}}
<li
class={{concatClass
"ai-summary__list-item"
(if block.show "show")
(if block.shown "is-shown")
(if block.blinking "blink")
}}
{{didUpdate (fn this.onBlinking block) block.blinking}}
{{didUpdate (fn this.onShowing block) block.show}}
{{willDestroy this.teardownAnimation}}
></li>
{{/each}}
</ul>
<span>
<div class="ai-summary__generating-text">
{{i18n "summary.in_progress"}}
</div>
<span class="ai-summary__indicator-wave">
<span class="ai-summary__indicator-dot">.</span>
<span class="ai-summary__indicator-dot">.</span>
<span class="ai-summary__indicator-dot">.</span>
</span>
</span>
</div>
</template>
}

View File

@ -0,0 +1,83 @@
import Component from "@glimmer/component";
import { tracked } from "@glimmer/tracking";
import { action } from "@ember/object";
import { service } from "@ember/service";
import ConditionalLoadingSection from "discourse/components/conditional-loading-section";
import DModal from "discourse/components/d-modal";
import DModalCancel from "discourse/components/d-modal-cancel";
import { ajax } from "discourse/lib/ajax";
import { popupAjaxError } from "discourse/lib/ajax-error";
import i18n from "discourse-common/helpers/i18n";
import I18n from "discourse-i18n";
import ComboBox from "select-kit/components/combo-box";
export default class ChatModalChannelSummary extends Component {
@service chatApi;
@tracked sinceHours = null;
@tracked loading = false;
@tracked summary = null;
availableSummaries = {};
sinceOptions = [1, 3, 6, 12, 24, 72, 168].map((hours) => {
return {
name: I18n.t("discourse_ai.summarization.chat.since", { count: hours }),
value: hours,
};
});
get channelId() {
return this.args.model.channelId;
}
@action
summarize(since) {
this.sinceHours = since;
this.loading = true;
if (this.availableSummaries[since]) {
this.summary = this.availableSummaries[since];
this.loading = false;
return;
}
return ajax(`/discourse-ai/summarization/channels/${this.channelId}.json`, {
type: "GET",
data: {
since,
},
})
.then((data) => {
this.availableSummaries[this.sinceHours] = data.summary;
this.summary = this.availableSummaries[this.sinceHours];
})
.catch(popupAjaxError)
.finally(() => (this.loading = false));
}
<template>
<DModal
@closeModal={{@closeModal}}
class="chat-modal-channel-summary"
@title={{i18n "discourse_ai.summarization.chat.title"}}
>
<:body>
<span>{{i18n "discourse_ai.summarization.chat.description"}}</span>
<ComboBox
@value={{this.sinceHours}}
@content={{this.sinceOptions}}
@onChange={{this.summarize}}
@valueProperty="value"
class="summarization-since"
/>
<ConditionalLoadingSection @isLoading={{this.loading}}>
<p class="summary-area">{{this.summary}}</p>
</ConditionalLoadingSection>
</:body>
<:footer>
<DModalCancel @close={{@closeModal}} />
</:footer>
</DModal>
</template>
}

View File

@ -0,0 +1,207 @@
import Component from "@glimmer/component";
import { tracked } from "@glimmer/tracking";
import { array } from "@ember/helper";
import { action } from "@ember/object";
import didInsert from "@ember/render-modifiers/modifiers/did-insert";
import willDestroy from "@ember/render-modifiers/modifiers/will-destroy";
import { service } from "@ember/service";
import DButton from "discourse/components/d-button";
import { ajax } from "discourse/lib/ajax";
import { shortDateNoYear } from "discourse/lib/formatter";
import { cook } from "discourse/lib/text";
import dIcon from "discourse-common/helpers/d-icon";
import i18n from "discourse-common/helpers/i18n";
import { bind } from "discourse-common/utils/decorators";
import I18n from "discourse-i18n";
import DTooltip from "float-kit/components/d-tooltip";
import and from "truth-helpers/helpers/and";
import not from "truth-helpers/helpers/not";
import or from "truth-helpers/helpers/or";
import AiSummarySkeleton from "../../components/ai-summary-skeleton";
export default class AiSummaryBox extends Component {
@service siteSettings;
@service messageBus;
@service currentUser;
@tracked summary = "";
@tracked text = "";
@tracked summarizedOn = null;
@tracked summarizedBy = null;
@tracked newPostsSinceSummary = null;
@tracked outdated = false;
@tracked canRegenerate = false;
@tracked regenerated = false;
@tracked showSummaryBox = false;
@tracked canCollapseSummary = false;
@tracked loading = false;
get generateSummaryTitle() {
const title = this.canRegenerate
? "summary.buttons.regenerate"
: "summary.buttons.generate";
return I18n.t(title);
}
get generateSummaryIcon() {
return this.canRegenerate ? "sync" : "discourse-sparkles";
}
get outdatedSummaryWarningText() {
let outdatedText = I18n.t("summary.outdated");
if (!this.topRepliesSummaryEnabled && this.newPostsSinceSummary > 0) {
outdatedText += " ";
outdatedText += I18n.t("summary.outdated_posts", {
count: this.newPostsSinceSummary,
});
}
return outdatedText;
}
get topRepliesSummaryEnabled() {
return this.args.outletArgs.postStream.summary;
}
@action
collapse() {
this.showSummaryBox = false;
this.canCollapseSummary = false;
}
@action
generateSummary() {
const topicId = this.args.outletArgs.topic.id;
this.showSummaryBox = true;
if (this.text && !this.canRegenerate) {
this.canCollapseSummary = false;
return;
}
let fetchURL = `/discourse-ai/summarization/t/${topicId}?`;
if (this.currentUser) {
fetchURL += `stream=true`;
if (this.canRegenerate) {
fetchURL += "&skip_age_check=true";
}
}
this.loading = true;
return ajax(fetchURL).then((data) => {
if (!this.currentUser) {
data.done = true;
this._updateSummary(data);
}
});
}
@bind
subscribe() {
const channel = `/discourse-ai/summaries/topic/${this.args.outletArgs.topic.id}`;
this.messageBus.subscribe(channel, this._updateSummary);
}
@bind
unsubscribe() {
this.messageBus.unsubscribe(
"/discourse-ai/summaries/topic/*",
this._updateSummary
);
}
@bind
_updateSummary(update) {
const topicSummary = update.ai_topic_summary;
return cook(topicSummary.summarized_text)
.then((cooked) => {
this.text = cooked;
this.loading = false;
})
.then(() => {
if (update.done) {
this.summarizedOn = shortDateNoYear(topicSummary.summarized_on);
this.summarizedBy = topicSummary.algorithm;
this.newPostsSinceSummary = topicSummary.new_posts_since_summary;
this.outdated = topicSummary.outdated;
this.newPostsSinceSummary = topicSummary.new_posts_since_summary;
this.canRegenerate =
topicSummary.outdated && topicSummary.can_regenerate;
}
});
}
<template>
{{#if (or @outletArgs.topic.has_summary @outletArgs.topic.summarizable)}}
<div class="summarization-buttons">
{{#if @outletArgs.topic.summarizable}}
{{#if this.showSummaryBox}}
<DButton
@action={{this.collapse}}
@title="summary.buttons.hide"
@label="summary.buttons.hide"
@icon="chevron-up"
class="btn-primary ai-topic-summarization"
/>
{{else}}
<DButton
@action={{this.generateSummary}}
@translatedLabel={{this.generateSummaryTitle}}
@translatedTitle={{this.generateSummaryTitle}}
@icon={{this.generateSummaryIcon}}
@disabled={{this.loading}}
class="btn-primary ai-topic-summarization"
/>
{{/if}}
{{/if}}
{{yield}}
</div>
<div
class="summary-box__container"
{{didInsert this.subscribe}}
{{willDestroy this.unsubscribe}}
>
{{#if this.showSummaryBox}}
<article class="ai-summary-box">
{{#if (and this.loading (not this.text))}}
<AiSummarySkeleton />
{{else}}
<div class="generated-summary">{{this.text}}</div>
{{#if this.summarizedOn}}
<div class="summarized-on">
<p>
{{i18n "summary.summarized_on" date=this.summarizedOn}}
<DTooltip @placements={{array "top-end"}}>
<:trigger>
{{dIcon "info-circle"}}
</:trigger>
<:content>
{{i18n "summary.model_used" model=this.summarizedBy}}
</:content>
</DTooltip>
</p>
{{#if this.outdated}}
<p class="outdated-summary">
{{this.outdatedSummaryWarningText}}
</p>
{{/if}}
</div>
{{/if}}
{{/if}}
</article>
{{/if}}
</div>
{{/if}}
</template>
}

View File

@ -0,0 +1,72 @@
import { tracked } from "@glimmer/tracking";
import { ajax } from "discourse/lib/ajax";
import { shortDateNoYear } from "discourse/lib/formatter";
import { cook } from "discourse/lib/text";
export default class AiTopicSummary {
@tracked text = "";
@tracked summarizedOn = null;
@tracked summarizedBy = null;
@tracked newPostsSinceSummary = null;
@tracked outdated = false;
@tracked canRegenerate = false;
@tracked regenerated = false;
@tracked showSummaryBox = false;
@tracked canCollapseSummary = false;
@tracked loadingSummary = false;
processUpdate(update) {
const topicSummary = update.ai_topic_summary;
return cook(topicSummary.summarized_text)
.then((cooked) => {
this.text = cooked;
this.loading = false;
})
.then(() => {
if (update.done) {
this.summarizedOn = shortDateNoYear(topicSummary.summarized_on);
this.summarizedBy = topicSummary.algorithm;
this.newPostsSinceSummary = topicSummary.new_posts_since_summary;
this.outdated = topicSummary.outdated;
this.newPostsSinceSummary = topicSummary.new_posts_since_summary;
this.canRegenerate =
topicSummary.outdated && topicSummary.can_regenerate;
}
});
}
collapse() {
this.showSummaryBox = false;
this.canCollapseSummary = false;
}
generateSummary(currentUser, topicId) {
this.showSummaryBox = true;
if (this.text && !this.canRegenerate) {
this.canCollapseSummary = false;
return;
}
let fetchURL = `/discourse-ai/summarization/t/${topicId}?`;
if (currentUser) {
fetchURL += `stream=true`;
if (this.canRegenerate) {
fetchURL += "&skip_age_check=true";
}
}
this.loading = true;
return ajax(fetchURL).then((data) => {
if (!currentUser) {
data.done = true;
this.processUpdate(data);
}
});
}
}

View File

@ -0,0 +1,29 @@
import { apiInitializer } from "discourse/lib/api";
import ChatModalChannelSummary from "../discourse/components/modal/chat-modal-channel-summary";
export default apiInitializer("1.34.0", (api) => {
const siteSettings = api.container.lookup("service:site-settings");
const currentUser = api.getCurrentUser();
const chatService = api.container.lookup("service:chat");
const modal = api.container.lookup("service:modal");
const canSummarize =
siteSettings.ai_summarization_strategy &&
currentUser &&
currentUser.can_summarize;
if (!chatService.userCanChat || !siteSettings.chat_enabled || !canSummarize) {
return;
}
api.registerChatComposerButton({
translatedLabel: "discourse_ai.summarization.chat.title",
id: "channel-summary",
icon: "discourse-sparkles",
position: "dropdown",
action: () => {
modal.show(ChatModalChannelSummary, {
model: { channelId: chatService.activeChannel?.id },
});
},
});
});

View File

@ -0,0 +1,193 @@
.topic-map .toggle-summary {
.summarization-buttons {
display: flex;
gap: 0.5em;
}
.ai-summary {
&__list {
list-style: none;
display: flex;
flex-wrap: wrap;
padding: 0;
margin: 0;
}
&__list-item {
background: var(--primary-300);
border-radius: var(--d-border-radius);
margin-right: 8px;
margin-bottom: 8px;
height: 18px;
opacity: 0;
display: block;
&:nth-child(1) {
width: 10%;
}
&:nth-child(2) {
width: 12%;
}
&:nth-child(3) {
width: 18%;
}
&:nth-child(4) {
width: 14%;
}
&:nth-child(5) {
width: 18%;
}
&:nth-child(6) {
width: 14%;
}
&:nth-child(7) {
width: 22%;
}
&:nth-child(8) {
width: 05%;
}
&:nth-child(9) {
width: 25%;
}
&:nth-child(10) {
width: 14%;
}
&:nth-child(11) {
width: 18%;
}
&:nth-child(12) {
width: 12%;
}
&:nth-child(13) {
width: 22%;
}
&:nth-child(14) {
width: 18%;
}
&:nth-child(15) {
width: 13%;
}
&:nth-child(16) {
width: 22%;
}
&:nth-child(17) {
width: 19%;
}
&:nth-child(18) {
width: 13%;
}
&:nth-child(19) {
width: 22%;
}
&:nth-child(20) {
width: 25%;
}
&.is-shown {
opacity: 1;
}
&.show {
animation: appear 0.5s cubic-bezier(0.445, 0.05, 0.55, 0.95) 0s forwards;
@media (prefers-reduced-motion) {
animation-duration: 0s;
}
}
@media (prefers-reduced-motion: no-preference) {
&.blink {
animation: blink 0.5s cubic-bezier(0.55, 0.085, 0.68, 0.53) both;
}
}
}
&__generating-text {
display: inline-block;
margin-left: 3px;
}
&__indicator-wave {
flex: 0 0 auto;
display: inline-flex;
}
&__indicator-dot {
display: inline-block;
@media (prefers-reduced-motion: no-preference) {
animation: ai-summary__indicator-wave 1.8s linear infinite;
}
&:nth-child(2) {
animation-delay: -1.6s;
}
&:nth-child(3) {
animation-delay: -1.4s;
}
}
}
.placeholder-summary {
padding-top: 0.5em;
}
.placeholder-summary-text {
display: inline-block;
height: 1em;
margin-top: 0.6em;
width: 100%;
}
.summarized-on {
text-align: right;
.info-icon {
margin-left: 3px;
}
}
.outdated-summary {
color: var(--primary-medium);
}
}
@keyframes ai-summary__indicator-wave {
0%,
60%,
100% {
transform: initial;
}
30% {
transform: translateY(-0.2em);
}
}
@keyframes appear {
0% {
opacity: 0;
}
100% {
opacity: 1;
}
}
@keyframes blink {
0% {
opacity: 1;
}
50% {
opacity: 0.5;
}
100% {
opacity: 1;
}
}

View File

@ -391,6 +391,15 @@ en:
sentiments:
dashboard:
title: "Sentiment"
summarization:
chat:
title: "Summarize messages"
description: "Select an option below to summarize the conversation sent during the desired timeframe."
summarize: "Summarize"
since:
one: "Last hour"
other: "Last %{count} hours"
review:
types:
reviewable_ai_post:

View File

@ -95,6 +95,8 @@ en:
ai_summarization_discourse_service_api_endpoint: "URL where the Discourse summarization API is running."
ai_summarization_discourse_service_api_key: "API key for the Discourse summarization API."
ai_summarization_strategy: "Additional ways to summarize content registered by plugins"
ai_custom_summarization_allowed_groups: "Groups allowed to summarize contents using the `summarization_strategy`."
ai_bot_enabled: "Enable the AI Bot module."
ai_bot_enable_chat_warning: "Display a warning when PM chat is initiated. Can be overriden by editing the translation string: discourse_ai.ai_bot.pm_warning"
@ -312,6 +314,8 @@ en:
configuration_hint:
one: "Configure the `%{setting}` setting first."
other: "Configure these settings first: %{settings}"
chat:
no_targets: "There were no messages during the selected period."
sentiment:
reports:

View File

@ -27,6 +27,11 @@ DiscourseAi::Engine.routes.draw do
get "/:share_key" => "shared_ai_conversations#show"
get "/preview/:topic_id" => "shared_ai_conversations#preview"
end
scope module: :summarization, path: "/summarization", defaults: { format: :json } do
get "/t/:topic_id" => "summary#show", :constraints => { topic_id: /\d+/ }
get "/channels/:channel_id" => "chat_summary#show"
end
end
Discourse::Application.routes.draw do

View File

@ -327,6 +327,15 @@ discourse_ai:
ai_summarization_discourse_service_api_key:
default: ""
secret: true
ai_summarization_strategy:
client: true
default: ""
enum: "DiscourseAi::Configuration::SummarizationEnumerator"
validator: "DiscourseAi::Configuration::SummarizationValidator"
ai_custom_summarization_allowed_groups:
type: group_list
list_type: compact
default: "3|13" # 3: @staff, 13: @trust_level_3
ai_bot_enabled:
default: false

View File

@ -0,0 +1,15 @@
# frozen_string_literal: true
class CreateAiSummariesTable < ActiveRecord::Migration[7.0]
def change
create_table :ai_summaries do |t|
t.integer :target_id, null: false
t.string :target_type, null: false
t.int4range :content_range
t.string :summarized_text, null: false
t.string :original_content_sha, null: false
t.string :algorithm, null: false
t.timestamps
end
end
end

View File

@ -0,0 +1,16 @@
# frozen_string_literal: true
class CopySummarySectionsToAiSummaries < ActiveRecord::Migration[7.0]
def up
execute <<-SQL
INSERT INTO ai_summaries (id, target_id, target_type, content_range, summarized_text, original_content_sha, algorithm, created_at, updated_at)
SELECT id, target_id, target_type, content_range, summarized_text, original_content_sha, algorithm, created_at, updated_at
FROM summary_sections
WHERE meta_section_id IS NULL
SQL
end
def down
raise ActiveRecord::IrreversibleMigration
end
end

View File

@ -0,0 +1,17 @@
# frozen_string_literal: true
class CopySummarizationStrategyToAiSummarizationStrategy < ActiveRecord::Migration[7.0]
def up
execute <<-SQL
UPDATE site_settings
SET data_type = (SELECT data_type FROM site_settings WHERE name = 'summarization_strategy'),
value = (SELECT value FROM site_settings WHERE name = 'summarization_strategy')
WHERE name = 'ai_summarization_strategy'
AND EXISTS (SELECT 1 FROM site_settings WHERE name = 'summarization_strategy');
SQL
end
def down
raise ActiveRecord::IrreversibleMigration
end
end

View File

@ -0,0 +1,19 @@
# frozen_string_literal: true
class CopyCustomSummarizationAllowedGroupsToAiCustomSummarizationAllowedGroups < ActiveRecord::Migration[
7.0
]
def up
execute <<-SQL
UPDATE site_settings
SET data_type = (SELECT data_type FROM site_settings WHERE name = 'custom_summarization_allowed_groups'),
value = (SELECT value FROM site_settings WHERE name = 'custom_summarization_allowed_groups')
WHERE name = 'ai_custom_summarization_allowed_groups'
AND EXISTS (SELECT 1 FROM site_settings WHERE name = 'custom_summarization_allowed_groups');
SQL
end
def down
raise ActiveRecord::IrreversibleMigration
end
end

View File

@ -13,7 +13,7 @@ module DiscourseAi
def initialize(responses)
@responses = responses
@completions = 0
@prompt = nil
@dialect = nil
end
def normalize_model_params(model_params)
@ -21,10 +21,14 @@ module DiscourseAi
model_params
end
attr_reader :responses, :completions, :prompt
attr_reader :responses, :completions, :dialect
def perform_completion!(prompt, _user, _model_params, feature_name: nil)
@prompt = prompt
def prompt_messages
dialect.prompt.messages
end
def perform_completion!(dialect, _user, _model_params, feature_name: nil)
@dialect = dialect
response = responses[completions]
if response.nil?
raise CANNED_RESPONSE_ERROR,

View File

@ -0,0 +1,20 @@
# frozen_string_literal: true
require "enum_site_setting"
module DiscourseAi
module Configuration
class SummarizationEnumerator < ::EnumSiteSetting
def self.valid_value?(val)
true
end
def self.values
@values ||=
DiscourseAi::Summarization::Models::Base.available_strategies.map do |strategy|
{ name: strategy.display_name, value: strategy.model }
end
end
end
end
end

View File

@ -0,0 +1,23 @@
# frozen_string_literal: true
module DiscourseAi
module Configuration
class SummarizationValidator
def initialize(opts = {})
@opts = opts
end
def valid_value?(val)
strategy = DiscourseAi::Summarization::Models::Base.find_strategy(val)
return true unless strategy
strategy.correctly_configured?.tap { |is_valid| @strategy = strategy unless is_valid }
end
def error_message
@strategy.configuration_hint
end
end
end
end

View File

@ -4,79 +4,17 @@ module DiscourseAi
module Summarization
class EntryPoint
def inject_into(plugin)
foldable_models = [
Models::OpenAi.new("open_ai:gpt-4", max_tokens: 8192),
Models::OpenAi.new("open_ai:gpt-4-32k", max_tokens: 32_768),
Models::OpenAi.new("open_ai:gpt-4-turbo", max_tokens: 100_000),
Models::OpenAi.new("open_ai:gpt-4o", max_tokens: 100_000),
Models::OpenAi.new("open_ai:gpt-3.5-turbo", max_tokens: 4096),
Models::OpenAi.new("open_ai:gpt-3.5-turbo-16k", max_tokens: 16_384),
Models::Gemini.new("google:gemini-pro", max_tokens: 32_768),
Models::Gemini.new("google:gemini-1.5-pro", max_tokens: 800_000),
Models::Gemini.new("google:gemini-1.5-flash", max_tokens: 800_000),
]
claude_prov = "anthropic"
if DiscourseAi::Completions::Endpoints::AwsBedrock.correctly_configured?("claude-2")
claude_prov = "aws_bedrock"
plugin.add_to_serializer(:current_user, :can_summarize) do
scope.user.in_any_groups?(SiteSetting.ai_custom_summarization_allowed_groups_map)
end
foldable_models << Models::Anthropic.new("#{claude_prov}:claude-2", max_tokens: 200_000)
foldable_models << Models::Anthropic.new(
"#{claude_prov}:claude-instant-1",
max_tokens: 100_000,
)
foldable_models << Models::Anthropic.new(
"#{claude_prov}:claude-3-haiku",
max_tokens: 200_000,
)
foldable_models << Models::Anthropic.new(
"#{claude_prov}:claude-3-sonnet",
max_tokens: 200_000,
)
foldable_models << Models::Anthropic.new(
"#{claude_prov}:claude-3-opus",
max_tokens: 200_000,
)
mixtral_prov = "hugging_face"
if DiscourseAi::Completions::Endpoints::Vllm.correctly_configured?(
"mistralai/Mixtral-8x7B-Instruct-v0.1",
)
mixtral_prov = "vllm"
plugin.add_to_serializer(:topic_view, :summarizable) do
DiscourseAi::Summarization::Models::Base.can_see_summary?(object.topic, scope.user)
end
foldable_models << Models::Mixtral.new(
"#{mixtral_prov}:mistralai/Mixtral-8x7B-Instruct-v0.1",
max_tokens: 32_000,
)
# TODO: Roman, we need to de-register custom LLMs on destroy from summarization
# strategy and clear cache
# it may be better to pull all of this code into Discourse AI cause as it stands
# the coupling is making it really hard to reason about summarization
#
# Auto registration and de-registration needs to be tested
#LlmModel.all.each do |model|
# foldable_models << Models::CustomLlm.new(
# "custom:#{model.id}",
# max_tokens: model.max_prompt_tokens,
# )
#end
foldable_models.each do |model|
plugin.register_summarization_strategy(Strategies::FoldContent.new(model))
plugin.add_to_serializer(:web_hook_topic_view, :summarizable) do
DiscourseAi::Summarization::Models::Base.can_see_summary?(object.topic, scope.user)
end
#plugin.add_model_callback(LlmModel, :after_create) do
# new_model = Models::CustomLlm.new("custom:#{self.id}", max_tokens: self.max_prompt_tokens)
# if ::Summarization::Base.find_strategy("custom:#{self.id}").nil?
# plugin.register_summarization_strategy(Strategies::FoldContent.new(new_model))
# end
#end
end
end
end

View File

@ -1,30 +1,157 @@
# frozen_string_literal: true
# Base class that defines the interface that every summarization
# strategy must implement.
# Above each method, you'll find an explanation of what
# it does and what it should return.
module DiscourseAi
module Summarization
module Models
class Base
class << self
def available_strategies
foldable_models = [
Models::OpenAi.new("open_ai:gpt-4", max_tokens: 8192),
Models::OpenAi.new("open_ai:gpt-4-32k", max_tokens: 32_768),
Models::OpenAi.new("open_ai:gpt-4-turbo", max_tokens: 100_000),
Models::OpenAi.new("open_ai:gpt-4o", max_tokens: 100_000),
Models::OpenAi.new("open_ai:gpt-3.5-turbo", max_tokens: 4096),
Models::OpenAi.new("open_ai:gpt-3.5-turbo-16k", max_tokens: 16_384),
Models::Gemini.new("google:gemini-pro", max_tokens: 32_768),
Models::Gemini.new("google:gemini-1.5-pro", max_tokens: 800_000),
Models::Gemini.new("google:gemini-1.5-flash", max_tokens: 800_000),
]
claude_prov = "anthropic"
if DiscourseAi::Completions::Endpoints::AwsBedrock.correctly_configured?("claude-2")
claude_prov = "aws_bedrock"
end
foldable_models << Models::Anthropic.new("#{claude_prov}:claude-2", max_tokens: 200_000)
foldable_models << Models::Anthropic.new(
"#{claude_prov}:claude-instant-1",
max_tokens: 100_000,
)
foldable_models << Models::Anthropic.new(
"#{claude_prov}:claude-3-haiku",
max_tokens: 200_000,
)
foldable_models << Models::Anthropic.new(
"#{claude_prov}:claude-3-sonnet",
max_tokens: 200_000,
)
foldable_models << Models::Anthropic.new(
"#{claude_prov}:claude-3-opus",
max_tokens: 200_000,
)
mixtral_prov = "hugging_face"
if DiscourseAi::Completions::Endpoints::Vllm.correctly_configured?(
"mistralai/Mixtral-8x7B-Instruct-v0.1",
)
mixtral_prov = "vllm"
end
foldable_models << Models::Mixtral.new(
"#{mixtral_prov}:mistralai/Mixtral-8x7B-Instruct-v0.1",
max_tokens: 32_000,
)
unless Rails.env.production?
foldable_models << Models::Fake.new("fake:fake", max_tokens: 8192)
end
folded_models = foldable_models.map { |model| Strategies::FoldContent.new(model) }
folded_models
end
def find_strategy(strategy_model)
available_strategies.detect { |s| s.model == strategy_model }
end
def selected_strategy
return if SiteSetting.ai_summarization_strategy.blank?
find_strategy(SiteSetting.ai_summarization_strategy)
end
def can_see_summary?(target, user)
return false if SiteSetting.ai_summarization_strategy.blank?
return false if target.class == Topic && target.private_message?
has_cached_summary = AiSummary.exists?(target: target)
return has_cached_summary if user.nil?
has_cached_summary || can_request_summary_for?(user)
end
def can_request_summary_for?(user)
return false unless user
user_group_ids = user.group_ids
SiteSetting.ai_custom_summarization_allowed_groups_map.any? do |group_id|
user_group_ids.include?(group_id)
end
end
end
def initialize(model_name, max_tokens:)
@model_name = model_name
@max_tokens = max_tokens
end
# Some strategies could require other conditions to work correctly,
# like site settings.
# This method gets called when admins attempt to select it,
# checking if we met those conditions.
def correctly_configured?
raise NotImplemented
end
# Strategy name to display to admins in the available strategies dropdown.
def display_name
raise NotImplemented
end
# If we don't meet the conditions to enable this strategy,
# we'll display this hint as an error to admins.
def configuration_hint
raise NotImplemented
end
# The idea behind this method is "give me a collection of texts,
# and I'll handle the summarization to the best of my capabilities.".
# It's important to emphasize the "collection of texts" part, which implies
# it's not tied to any model and expects the "content" to be a hash instead.
#
# @param content { Hash } - Includes the content to summarize, plus additional
# context to help the strategy produce a better result. Keys present in the content hash:
# - resource_path (optional): Helps the strategy build links to the content in the summary (e.g. "/t/-/:topic_id/POST_NUMBER")
# - content_title (optional): Provides guidance about what the content is about.
# - contents (required): Array of hashes with content to summarize (e.g. [{ poster: "asd", id: 1, text: "This is a text" }])
# All keys are required.
# @param &on_partial_blk { Block - Optional } - If the strategy supports it, the passed block
# will get called with partial summarized text as its generated.
#
# @param current_user { User } - User requesting the summary.
#
# @returns { Hash } - The summarized content. Example:
# {
# summary: "This is the final summary",
# }
def summarize(content, current_user)
raise NotImplemented
end
def available_tokens
max_tokens - reserved_tokens
end
# Returns the string we'll store in the selected strategy site setting.
def model
model_name.split(":").last
end

View File

@ -0,0 +1,25 @@
# frozen_string_literal: true
module DiscourseAi
module Summarization
module Models
class Fake < Base
def display_name
"fake"
end
def correctly_configured?
true
end
def configuration_hint
""
end
def model
"fake"
end
end
end
end
end

View File

@ -3,7 +3,7 @@
module DiscourseAi
module Summarization
module Strategies
class FoldContent < ::Summarization::Base
class FoldContent < DiscourseAi::Summarization::Models::Base
def initialize(completion_model)
@completion_model = completion_model
end
@ -21,122 +21,27 @@ module DiscourseAi
llm = DiscourseAi::Completions::Llm.proxy(completion_model.model_name)
initial_chunks =
rebalance_chunks(
llm.tokenizer,
content[:contents].map { |c| { ids: [c[:id]], summary: format_content_item(c) } },
)
summary_content =
content[:contents].map { |c| { ids: [c[:id]], summary: format_content_item(c) } }
# Special case where we can do all the summarization in one pass.
if initial_chunks.length == 1
{
summary:
summarize_single(llm, initial_chunks.first[:summary], user, opts, &on_partial_blk),
chunks: [],
}
else
summarize_chunks(llm, initial_chunks, user, opts, &on_partial_blk)
end
{
summary:
summarize_single(llm, summary_content.first[:summary], user, opts, &on_partial_blk),
}
end
private
def summarize_chunks(llm, chunks, user, opts, &on_partial_blk)
# Safely assume we always have more than one chunk.
summarized_chunks = summarize_in_chunks(llm, chunks, user, opts)
total_summaries_size =
llm.tokenizer.size(summarized_chunks.map { |s| s[:summary].to_s }.join)
if total_summaries_size < completion_model.available_tokens
# Chunks are small enough, we can concatenate them.
{
summary:
concatenate_summaries(
llm,
summarized_chunks.map { |s| s[:summary] },
user,
&on_partial_blk
),
chunks: summarized_chunks,
}
else
# We have summarized chunks but we can't concatenate them yet. Split them into smaller summaries and summarize again.
rebalanced_chunks = rebalance_chunks(llm.tokenizer, summarized_chunks)
summarize_chunks(llm, rebalanced_chunks, user, opts, &on_partial_blk)
end
end
def format_content_item(item)
"(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
end
def rebalance_chunks(tokenizer, chunks)
section = { ids: [], summary: "" }
chunks =
chunks.reduce([]) do |sections, chunk|
if tokenizer.can_expand_tokens?(
section[:summary],
chunk[:summary],
completion_model.available_tokens,
)
section[:summary] += chunk[:summary]
section[:ids] = section[:ids].concat(chunk[:ids])
else
sections << section
section = chunk
end
sections
end
chunks << section if section[:summary].present?
chunks
end
def summarize_single(llm, text, user, opts, &on_partial_blk)
prompt = summarization_prompt(text, opts)
llm.generate(prompt, user: user, feature_name: "summarize", &on_partial_blk)
end
def summarize_in_chunks(llm, chunks, user, opts)
chunks.map do |chunk|
prompt = summarization_prompt(chunk[:summary], opts)
chunk[:summary] = llm.generate(
prompt,
user: user,
max_tokens: 300,
feature_name: "summarize",
)
chunk
end
end
def concatenate_summaries(llm, summaries, user, &on_partial_blk)
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
You are a summarization bot that effectively concatenates disjoint summaries, creating a cohesive narrative.
The narrative you create is in the form of one or multiple paragraphs.
Your reply MUST BE a single concatenated summary using the summaries I'll provide to you.
I'm NOT interested in anything other than the concatenated summary, don't include additional text or comments.
You understand and generate Discourse forum Markdown.
You format the response, including links, using Markdown.
TEXT
prompt.push(type: :user, content: <<~TEXT.strip)
THESE are the summaries, each one separated by a newline, all of them inside <input></input> XML tags:
<input>
#{summaries.join("\n")}
</input>
TEXT
llm.generate(prompt, user: user, &on_partial_blk)
end
def summarization_prompt(input, opts)
insts = +<<~TEXT
You are an advanced summarization bot that generates concise, coherent summaries of provided text.

View File

@ -15,6 +15,8 @@ enabled_site_setting :discourse_ai_enabled
register_asset "stylesheets/modules/ai-helper/common/ai-helper.scss"
register_asset "stylesheets/modules/summarization/common/ai-summary.scss"
register_asset "stylesheets/modules/ai-bot/common/bot-replies.scss"
register_asset "stylesheets/modules/ai-bot/common/ai-persona.scss"
register_asset "stylesheets/modules/ai-bot/mobile/ai-persona.scss", :mobile

View File

@ -0,0 +1,81 @@
# frozen_string_literal: true
RSpec.describe Jobs::StreamTopicAiSummary do
subject(:job) { described_class.new }
describe "#execute" do
fab!(:topic) { Fabricate(:topic, highest_post_number: 2) }
fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1) }
fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2) }
fab!(:user) { Fabricate(:leader) }
before { Group.find(Group::AUTO_GROUPS[:trust_level_3]).add(user) }
before { SiteSetting.ai_summarization_strategy = "fake" }
def with_responses(responses)
DiscourseAi::Completions::Llm.with_prepared_responses(responses) { yield }
end
describe "validates params" do
it "does nothing if there is no topic" do
messages =
MessageBus.track_publish("/discourse-ai/summaries/topic/#{topic.id}") do
job.execute(topic_id: nil, user_id: user.id)
end
expect(messages).to be_empty
end
it "does nothing if there is no user" do
messages =
MessageBus.track_publish("/discourse-ai/summaries/topic/#{topic.id}") do
job.execute(topic_id: topic.id, user_id: nil)
end
expect(messages).to be_empty
end
it "does nothing if the user is not allowed to see the topic" do
private_topic = Fabricate(:private_message_topic)
messages =
MessageBus.track_publish("/discourse-ai/summaries/topic/#{private_topic.id}") do
job.execute(topic_id: private_topic.id, user_id: user.id)
end
expect(messages).to be_empty
end
end
it "publishes updates with a partial summary" do
with_responses(["dummy"]) do
messages =
MessageBus.track_publish("/discourse-ai/summaries/topic/#{topic.id}") do
job.execute(topic_id: topic.id, user_id: user.id)
end
partial_summary_update = messages.first.data
expect(partial_summary_update[:done]).to eq(false)
expect(partial_summary_update.dig(:ai_topic_summary, :summarized_text)).to eq("dummy")
end
end
it "publishes a final update to signal we're done and provide metadata" do
with_responses(["dummy"]) do
messages =
MessageBus.track_publish("/discourse-ai/summaries/topic/#{topic.id}") do
job.execute(topic_id: topic.id, user_id: user.id)
end
final_update = messages.last.data
expect(final_update[:done]).to eq(true)
expect(final_update.dig(:ai_topic_summary, :algorithm)).to eq("fake")
expect(final_update.dig(:ai_topic_summary, :outdated)).to eq(false)
expect(final_update.dig(:ai_topic_summary, :can_regenerate)).to eq(true)
expect(final_update.dig(:ai_topic_summary, :new_posts_since_summary)).to be_zero
end
end
end
end

View File

@ -0,0 +1,70 @@
# frozen_string_literal: true
describe DiscourseAi::Summarization::Models::Base do
fab!(:user)
fab!(:group)
fab!(:topic)
before do
group.add(user)
SiteSetting.ai_summarization_strategy = "fake"
end
describe "#can_see_summary?" do
context "when the user cannot generate a summary" do
before { SiteSetting.ai_custom_summarization_allowed_groups = "" }
it "returns false" do
SiteSetting.ai_custom_summarization_allowed_groups = ""
expect(described_class.can_see_summary?(topic, user)).to eq(false)
end
it "returns true if there is a cached summary" do
AiSummary.create!(
target: topic,
summarized_text: "test",
original_content_sha: "123",
algorithm: "test",
)
expect(described_class.can_see_summary?(topic, user)).to eq(true)
end
end
context "when the user can generate a summary" do
before { SiteSetting.ai_custom_summarization_allowed_groups = group.id }
it "returns true if the user group is present in the ai_custom_summarization_allowed_groups_map setting" do
expect(described_class.can_see_summary?(topic, user)).to eq(true)
end
end
context "when there is no user" do
it "returns false for anons" do
expect(described_class.can_see_summary?(topic, nil)).to eq(false)
end
it "returns true for anons when there is a cached summary" do
AiSummary.create!(
target: topic,
summarized_text: "test",
original_content_sha: "123",
algorithm: "test",
)
expect(described_class.can_see_summary?(topic, nil)).to eq(true)
end
end
context "when the topic is a PM" do
before { SiteSetting.ai_custom_summarization_allowed_groups = group.id }
let(:pm) { Fabricate(:private_message_topic) }
it "returns false" do
expect(described_class.can_see_summary?(pm, user)).to eq(false)
end
end
end
end

View File

@ -32,37 +32,5 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
expect(result[:summary]).to eq(single_summary)
end
end
context "when the content to summarize doesn't fit in a single call" do
it "summarizes each chunk and then concatenates them" do
content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
result =
DiscourseAi::Completions::Llm.with_prepared_responses(
[single_summary, single_summary, concatenated_summary],
) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(3) } }
expect(result[:summary]).to eq(concatenated_summary)
end
it "keeps splitting into chunks until the content fits into a single call to create a cohesive narrative" do
content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
max_length_response = "(1 asd said: This is a text "
chunk_of_chunks = "I'm smol"
result =
DiscourseAi::Completions::Llm.with_prepared_responses(
[
max_length_response,
max_length_response,
chunk_of_chunks,
chunk_of_chunks,
concatenated_summary,
],
) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(5) } }
expect(result[:summary]).to eq(concatenated_summary)
end
end
end
end

View File

@ -0,0 +1,29 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::Summarization::ChatSummaryController do
fab!(:current_user) { Fabricate(:user) }
fab!(:group)
before do
group.add(current_user)
SiteSetting.ai_summarization_strategy = "fake"
SiteSetting.ai_custom_summarization_allowed_groups = group.id
SiteSetting.chat_enabled = true
SiteSetting.chat_allowed_groups = group.id
sign_in(current_user)
end
describe "#show" do
context "when the user is not allowed to join the channel" do
fab!(:channel) { Fabricate(:private_category_channel) }
it "returns a 403" do
get "/discourse-ai/summarization/channels/#{channel.id}", params: { since: 6 }
expect(response.status).to eq(403)
end
end
end
end

View File

@ -0,0 +1,122 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::Summarization::SummaryController do
describe "#summary" do
fab!(:topic) { Fabricate(:topic, highest_post_number: 2) }
fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1) }
fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2) }
before { SiteSetting.ai_summarization_strategy = "fake" }
context "for anons" do
it "returns a 404 if there is no cached summary" do
get "/discourse-ai/summarization/t/#{topic.id}.json"
expect(response.status).to eq(404)
end
it "returns a cached summary" do
section =
AiSummary.create!(
target: topic,
summarized_text: "test",
algorithm: "test",
original_content_sha: "test",
)
get "/discourse-ai/summarization/t/#{topic.id}.json"
expect(response.status).to eq(200)
summary = response.parsed_body
expect(summary.dig("ai_topic_summary", "summarized_text")).to eq(section.summarized_text)
end
end
context "when the user is a member of an allowlisted group" do
fab!(:user) { Fabricate(:leader) }
before do
sign_in(user)
Group.find(Group::AUTO_GROUPS[:trust_level_3]).add(user)
end
it "returns a 404 if there is no topic" do
invalid_topic_id = 999
get "/discourse-ai/summarization/t/#{invalid_topic_id}.json"
expect(response.status).to eq(404)
end
it "returns a 403 if not allowed to see the topic" do
pm = Fabricate(:private_message_topic)
get "/discourse-ai/summarization/t/#{pm.id}.json"
expect(response.status).to eq(403)
end
it "returns a summary" do
summary_text = "This is a summary"
DiscourseAi::Completions::Llm.with_prepared_responses([summary_text]) do
get "/discourse-ai/summarization/t/#{topic.id}.json"
expect(response.status).to eq(200)
summary = response.parsed_body["ai_topic_summary"]
section = AiSummary.last
expect(section.summarized_text).to eq(summary_text)
expect(summary["summarized_text"]).to eq(section.summarized_text)
expect(summary["algorithm"]).to eq("fake")
expect(summary["outdated"]).to eq(false)
expect(summary["can_regenerate"]).to eq(true)
expect(summary["new_posts_since_summary"]).to be_zero
end
end
it "signals the summary is outdated" do
get "/discourse-ai/summarization/t/#{topic.id}.json"
Fabricate(:post, topic: topic, post_number: 3)
topic.update!(highest_post_number: 3)
get "/discourse-ai/summarization/t/#{topic.id}.json"
expect(response.status).to eq(200)
summary = response.parsed_body["ai_topic_summary"]
expect(summary["outdated"]).to eq(true)
expect(summary["new_posts_since_summary"]).to eq(1)
end
end
context "when the user is not a member of an allowlisted group" do
fab!(:user)
before { sign_in(user) }
it "return a 404 if there is no cached summary" do
get "/discourse-ai/summarization/t/#{topic.id}.json"
expect(response.status).to eq(404)
end
it "returns a cached summary" do
section =
AiSummary.create!(
target: topic,
summarized_text: "test",
algorithm: "test",
original_content_sha: "test",
)
get "/discourse-ai/summarization/t/#{topic.id}.json"
expect(response.status).to eq(200)
summary = response.parsed_body
expect(summary.dig("ai_topic_summary", "summarized_text")).to eq(section.summarized_text)
end
end
end
end

View File

@ -0,0 +1,199 @@
# frozen_string_literal: true
describe DiscourseAi::TopicSummarization do
fab!(:user) { Fabricate(:admin) }
fab!(:topic) { Fabricate(:topic, highest_post_number: 2) }
fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1) }
fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2) }
let(:model) do
DiscourseAi::Summarization::Strategies::FoldContent.new(
DiscourseAi::Summarization::Models::Fake.new("fake:fake", max_tokens: 8192),
)
end
shared_examples "includes only public-visible topics" do
subject { described_class.new(model) }
it "only includes visible posts" do
topic.first_post.update!(hidden: true)
posts = subject.summary_targets(topic)
expect(posts.none?(&:hidden?)).to eq(true)
end
it "doesn't include posts without users" do
topic.first_post.user.destroy!
posts = subject.summary_targets(topic)
expect(posts.detect { |p| p.id == topic.first_post.id }).to be_nil
end
it "doesn't include deleted posts" do
topic.first_post.update!(user_id: nil)
posts = subject.summary_targets(topic)
expect(posts.detect { |p| p.id == topic.first_post.id }).to be_nil
end
end
describe "#summary_targets" do
context "when the topic has a best replies summary" do
before { topic.has_summary = true }
it_behaves_like "includes only public-visible topics"
end
context "when the topic doesn't have a best replies summary" do
before { topic.has_summary = false }
it_behaves_like "includes only public-visible topics"
end
end
describe "#summarize" do
subject(:summarization) { described_class.new(model) }
def assert_summary_is_cached(topic, summary_response)
cached_summary = AiSummary.find_by(target: topic)
expect(cached_summary.content_range).to cover(*topic.posts.map(&:post_number))
expect(cached_summary.summarized_text).to eq(summary)
expect(cached_summary.original_content_sha).to be_present
expect(cached_summary.algorithm).to eq("fake")
end
context "when the content was summarized in a single chunk" do
let(:summary) { "This is the final summary" }
it "caches the summary" do
DiscourseAi::Completions::Llm.with_prepared_responses([summary]) do
section = summarization.summarize(topic, user)
expect(section.summarized_text).to eq(summary)
assert_summary_is_cached(topic, summary)
end
end
it "returns the cached version in subsequent calls" do
summarization.summarize(topic, user)
cached_summary_text = "This is a cached summary"
cached_summary =
AiSummary.find_by(target: topic).update!(
summarized_text: cached_summary_text,
updated_at: 24.hours.ago,
)
section = summarization.summarize(topic, user)
expect(section.summarized_text).to eq(cached_summary_text)
end
context "when the topic has embed content cached" do
it "embed content is used instead of the raw text" do
topic_embed =
Fabricate(
:topic_embed,
topic: topic,
embed_content_cache: "<p>hello world new post :D</p>",
)
DiscourseAi::Completions::Llm.with_prepared_responses(["A summary"]) do |spy|
summarization.summarize(topic, user)
prompt_raw =
spy
.prompt_messages
.reduce(+"") do |memo, m|
memo << m[:content] << "\n"
memo
end
expect(prompt_raw).to include(topic_embed.embed_content_cache)
end
end
end
end
describe "invalidating cached summaries" do
let(:cached_text) { "This is a cached summary" }
let(:updated_summary) { "This is the final summary" }
def cached_summary
AiSummary.find_by(target: topic)
end
before do
DiscourseAi::Completions::Llm.with_prepared_responses([cached_text]) do
summarization.summarize(topic, user)
end
cached_summary.update!(summarized_text: cached_text, created_at: 24.hours.ago)
end
context "when the user can requests new summaries" do
context "when there are no new posts" do
it "returns the cached summary" do
section = summarization.summarize(topic, user)
expect(section.summarized_text).to eq(cached_text)
end
end
context "when there are new posts" do
before { cached_summary.update!(original_content_sha: "outdated_sha") }
it "returns a new summary" do
DiscourseAi::Completions::Llm.with_prepared_responses([updated_summary]) do
section = summarization.summarize(topic, user)
expect(section.summarized_text).to eq(updated_summary)
end
end
context "when the cached summary is less than one hour old" do
before { cached_summary.update!(created_at: 30.minutes.ago) }
it "returns the cached summary" do
cached_summary.update!(created_at: 30.minutes.ago)
section = summarization.summarize(topic, user)
expect(section.summarized_text).to eq(cached_text)
expect(section.outdated).to eq(true)
end
it "returns a new summary if the skip_age_check flag is passed" do
DiscourseAi::Completions::Llm.with_prepared_responses([updated_summary]) do
section = summarization.summarize(topic, user, skip_age_check: true)
expect(section.summarized_text).to eq(updated_summary)
end
end
end
end
end
end
describe "stream partial updates" do
let(:summary) { "This is the final summary" }
it "receives a blk that is passed to the underlying strategy and called with partial summaries" do
partial_result = +""
DiscourseAi::Completions::Llm.with_prepared_responses([summary]) do
summarization.summarize(topic, user) do |partial_summary|
partial_result << partial_summary
end
end
expect(partial_result).to eq(summary)
end
end
end
end

View File

@ -0,0 +1,38 @@
# frozen_string_literal: true
RSpec.describe "Summarize a channel since your last visit", type: :system do
fab!(:current_user) { Fabricate(:user) }
fab!(:group)
fab!(:channel) { Fabricate(:chat_channel) }
fab!(:message_1) { Fabricate(:chat_message, chat_channel: channel) }
let(:chat) { PageObjects::Pages::Chat.new }
let(:summarization_result) { "This is a summary" }
before do
group.add(current_user)
SiteSetting.ai_summarization_strategy = "fake"
SiteSetting.ai_custom_summarization_allowed_groups = group.id.to_s
SiteSetting.chat_enabled = true
SiteSetting.chat_allowed_groups = group.id.to_s
sign_in(current_user)
chat_system_bootstrap(current_user, [channel])
end
it "displays a summary of the messages since the selected timeframe" do
DiscourseAi::Completions::Llm.with_prepared_responses([summarization_result]) do
chat.visit_channel(channel)
find(".chat-composer-dropdown__trigger-btn").click
find(".chat-composer-dropdown__action-btn.channel-summary").click
expect(page.has_css?(".chat-modal-channel-summary")).to eq(true)
find(".summarization-since").click
find(".select-kit-row[data-value=\"3\"]").click
expect(find(".summary-area").text).to eq(summarization_result)
end
end
end

View File

@ -0,0 +1,110 @@
import { click, visit } from "@ember/test-helpers";
import { test } from "qunit";
import topicFixtures from "discourse/tests/fixtures/topic";
import {
acceptance,
publishToMessageBus,
updateCurrentUser,
} from "discourse/tests/helpers/qunit-helpers";
import { cloneJSON } from "discourse-common/lib/object";
acceptance("Topic - Summary", function (needs) {
const currentUserId = 5;
needs.user();
needs.pretender((server, helper) => {
server.get("/t/1.json", () => {
const json = cloneJSON(topicFixtures["/t/130.json"]);
json.id = 1;
json.summarizable = true;
return helper.response(json);
});
server.get("/discourse-ai/summarization/t/1", () => {
return helper.response({});
});
});
needs.hooks.beforeEach(() => {
updateCurrentUser({ id: currentUserId });
});
test("displays streamed summary", async function (assert) {
await visit("/t/-/1");
const partialSummary = "This a";
await publishToMessageBus("/discourse-ai/summaries/topic/1", {
done: false,
ai_topic_summary: { summarized_text: partialSummary },
});
await click(".ai-topic-summarization");
assert
.dom(".ai-summary-box .generated-summary p")
.hasText(partialSummary, "Updates the summary with a partial result");
const finalSummary = "This is a completed summary";
await publishToMessageBus("/discourse-ai/summaries/topic/1", {
done: true,
ai_topic_summary: {
summarized_text: finalSummary,
summarized_on: "2023-01-01T04:00:00.000Z",
algorithm: "OpenAI GPT-4",
outdated: false,
new_posts_since_summary: false,
can_regenerate: true,
},
});
assert
.dom(".ai-summary-box .generated-summary p")
.hasText(finalSummary, "Updates the summary with a final result");
assert
.dom(".ai-summary-box .summarized-on")
.exists("summary metadata exists");
});
});
acceptance("Topic - Summary - Anon", function (needs) {
const finalSummary = "This is a completed summary";
needs.pretender((server, helper) => {
server.get("/t/1.json", () => {
const json = cloneJSON(topicFixtures["/t/280/1.json"]);
json.id = 1;
json.summarizable = true;
return helper.response(json);
});
server.get("/discourse-ai/summarization/t/1", () => {
return helper.response({
ai_topic_summary: {
summarized_text: finalSummary,
summarized_on: "2023-01-01T04:00:00.000Z",
algorithm: "OpenAI GPT-4",
outdated: false,
new_posts_since_summary: false,
can_regenerate: false,
},
});
});
});
test("displays cached summary immediately", async function (assert) {
await visit("/t/-/1");
await click(".ai-topic-summarization");
assert
.dom(".ai-summary-box .generated-summary p")
.hasText(finalSummary, "Updates the summary with the result");
assert
.dom(".ai-summary-box .summarized-on")
.exists("summary metadata exists");
});
});