DEV: Better strategies for summarization (#88)

* DEV: Better strategies for summarization

The strategy responsibility needs to be "Given a collection of texts, I know how to summarize them most efficiently, using the minimum amount of requests and maximizing token usage".

There are different token limits for each model, so it all boils down to two different strategies:

Fold all these texts into a single one, doing the summarization in chunks, and then build a summary from those.
Build it by combining texts in a single prompt, and truncate it according to your token limits.

While the latter is less than ideal, we need it for "bart-large-cnn-samsum" and "flan-t5-base-samsum", both with low limits. The rest will rely on folding.

* Expose summarized chunks to users
This commit is contained in:
Roman Rizzi 2023-06-27 12:26:33 -03:00 committed by GitHub
parent 9390fba768
commit 9a79afcdbf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 872 additions and 245 deletions

View File

@ -4,23 +4,38 @@ module DiscourseAi
module Summarization module Summarization
class EntryPoint class EntryPoint
def load_files def load_files
require_relative "strategies/anthropic" require_relative "models/base"
require_relative "strategies/discourse_ai" require_relative "models/anthropic"
require_relative "strategies/open_ai" require_relative "models/discourse"
require_relative "models/open_ai"
require_relative "strategies/fold_content"
require_relative "strategies/truncate_content"
end end
def inject_into(plugin) def inject_into(plugin)
[ foldable_models = [
Strategies::OpenAi.new("gpt-4"), Models::OpenAi.new("gpt-4", max_tokens: 8192),
Strategies::OpenAi.new("gpt-4-32k"), Models::OpenAi.new("gpt-4-32k", max_tokens: 32_768),
Strategies::OpenAi.new("gpt-3.5-turbo"), Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096),
Strategies::OpenAi.new("gpt-3.5-turbo-16k"), Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384),
Strategies::DiscourseAi.new("bart-large-cnn-samsum"), Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384),
Strategies::DiscourseAi.new("flan-t5-base-samsum"), Models::Anthropic.new("claude-v1", max_tokens: 9000),
Strategies::DiscourseAi.new("long-t5-tglobal-base-16384-book-summary"), Models::Anthropic.new("claude-v1-100k", max_tokens: 100_000),
Strategies::Anthropic.new("claude-v1"), ]
Strategies::Anthropic.new("claude-v1-100k"),
].each { |strategy| plugin.register_summarization_strategy(strategy) } foldable_models.each do |model|
plugin.register_summarization_strategy(Strategies::FoldContent.new(model))
end
truncable_models = [
Models::Discourse.new("bart-large-cnn-samsum", max_tokens: 1024),
Models::Discourse.new("flan-t5-base-samsum", max_tokens: 512),
]
truncable_models.each do |model|
plugin.register_summarization_strategy(Strategies::TruncateContent.new(model))
end
end end
end end
end end

View File

@ -0,0 +1,84 @@
# frozen_string_literal: true
module DiscourseAi
module Summarization
module Models
class Anthropic < Base
def display_name
"Anthropic's #{model}"
end
def correctly_configured?
SiteSetting.ai_anthropic_api_key.present?
end
def configuration_hint
I18n.t(
"discourse_ai.summarization.configuration_hint",
count: 1,
setting: "ai_anthropic_api_key",
)
end
def concatenate_summaries(summaries)
instructions = <<~TEXT
Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative.
Include only the summary inside <ai> tags.
TEXT
instructions += summaries.reduce("") { |m, s| m += "<input>#{s}</input>\n" }
instructions += "Assistant:\n"
completion(instructions)
end
def summarize_with_truncation(contents, opts)
instructions = build_base_prompt(opts)
text_to_summarize = contents.map { |c| format_content_item(c) }.join
truncated_content = tokenizer.truncate(text_to_summarize, max_tokens - reserved_tokens)
instructions += "<input>#{truncated_content}</input>\nAssistant:\n"
completion(instructions)
end
private
def summarize_chunk(chunk_text, opts)
completion(build_base_prompt(opts) + "<input>#{chunk_text}</input>\nAssistant:\n")
end
def build_base_prompt(opts)
base_prompt = <<~TEXT
Human: Summarize the following forum discussion inside the given <input> tag.
Include only the summary inside <ai> tags.
TEXT
if opts[:resource_path]
base_prompt += "Try generating links as well the format is #{opts[:resource_path]}.\n"
end
base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[
:content_title
]
base_prompt += "Don't use more than 400 words.\n"
end
def completion(prompt)
response =
::DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, model).dig(:completion)
Nokogiri::HTML5.fragment(response).at("ai").text
end
def tokenizer
DiscourseAi::Tokenizer::AnthropicTokenizer
end
attr_reader :max_tokens
end
end
end
end

View File

@ -0,0 +1,82 @@
# frozen_string_literal: true
module DiscourseAi
module Summarization
module Models
class Base
def initialize(model, max_tokens:)
@model = model
@max_tokens = max_tokens
end
def correctly_configured?
raise NotImplemented
end
def display_name
raise NotImplemented
end
def configuration_hint
raise NotImplemented
end
def summarize_in_chunks(contents, opts)
chunks = []
section = { ids: [], summary: "" }
contents.each do |item|
new_content = format_content_item(item)
if tokenizer.can_expand_tokens?(
section[:summary],
new_content,
max_tokens - reserved_tokens,
)
section[:summary] += new_content
section[:ids] << item[:id]
else
chunks << section
section = { id: [item[:id]], summary: new_content }
end
end
chunks << section if section[:summary].present?
chunks.each { |chunk| chunk[:summary] = summarize_chunk(chunk[:summary], opts) }
chunks
end
def concatenate_summaries(_summaries)
raise NotImplemented
end
def summarize_with_truncation(_contents, _opts)
raise NotImplemented
end
attr_reader :model
protected
attr_reader :max_tokens
def summarize_chunk(_chunk_text, _opts)
raise NotImplemented
end
def format_content_item(item)
"(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
end
def reserved_tokens
# Reserve tokens for the response and the base prompt
# ~500 words
700
end
end
end
end
end

View File

@ -2,8 +2,8 @@
module DiscourseAi module DiscourseAi
module Summarization module Summarization
module Strategies module Models
class DiscourseAi < ::Summarization::Base class Discourse < Base
def display_name def display_name
"Discourse AI's #{model}" "Discourse AI's #{model}"
end end
@ -22,29 +22,39 @@ module DiscourseAi
) )
end end
def summarize(content_text) def concatenate_summaries(summaries)
::DiscourseAi::Inference::DiscourseClassifier.perform!( completion(summaries.join("\n"))
"#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
model,
prompt(content_text),
SiteSetting.ai_summarization_discourse_service_api_key,
).dig(:summary_text)
end end
def prompt(text) def summarize_with_truncation(contents, opts)
::DiscourseAi::Tokenizer::BertTokenizer.truncate(text, max_length) text_to_summarize = contents.map { |c| format_content_item(c) }.join
truncated_content =
::DiscourseAi::Tokenizer::BertTokenizer.truncate(text_to_summarize, max_tokens)
completion(truncated_content)
end end
private private
def max_length def summarize_chunk(chunk_text, _opts)
lengths = { completion(chunk_text)
"bart-large-cnn-samsum" => 1024, end
"flan-t5-base-samsum" => 512,
"long-t5-tglobal-base-16384-book-summary" => 16_384,
}
lengths[model] def reserved_tokens
0
end
def completion(prompt)
::DiscourseAi::Inference::DiscourseClassifier.perform!(
"#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
model,
prompt,
SiteSetting.ai_summarization_discourse_service_api_key,
).dig(:summary_text)
end
def tokenizer
DiscourseAi::Tokenizer::BertTokenizer
end end
end end
end end

View File

@ -0,0 +1,96 @@
# frozen_string_literal: true
module DiscourseAi
module Summarization
module Models
class OpenAi < Base
def display_name
"Open AI's #{model}"
end
def correctly_configured?
SiteSetting.ai_openai_api_key.present?
end
def configuration_hint
I18n.t(
"discourse_ai.summarization.configuration_hint",
count: 1,
setting: "ai_openai_api_key",
)
end
def concatenate_summaries(summaries)
messages = [
{ role: "system", content: "You are a helpful bot" },
{
role: "user",
content:
"Concatenate these disjoint summaries, creating a cohesive narrative:\n#{summaries.join("\n")}",
},
]
completion(messages)
end
def summarize_with_truncation(contents, opts)
messages = [{ role: "system", content: build_base_prompt(opts) }]
text_to_summarize = contents.map { |c| format_content_item(c) }.join
truncated_content = tokenizer.truncate(text_to_summarize, max_tokens - reserved_tokens)
messages << {
role: "user",
content: "Summarize the following in 400 words:\n#{truncated_content}",
}
completion(messages)
end
private
def summarize_chunk(chunk_text, opts)
completion(
[
{ role: "system", content: build_base_prompt(opts) },
{ role: "user", content: "Summarize the following in 400 words:\n#{chunk_text}" },
],
)
end
def build_base_prompt(opts)
base_prompt = <<~TEXT
You are a summarization bot.
You effectively summarise any text and reply ONLY with ONLY the summarized text.
You condense it into a shorter version.
You understand and generate Discourse forum Markdown.
TEXT
if opts[:resource_path]
base_prompt +=
"Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)\n"
end
base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[
:content_title
]
base_prompt
end
def completion(prompt)
::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model).dig(
:choices,
0,
:message,
:content,
)
end
def tokenizer
DiscourseAi::Tokenizer::OpenAiTokenizer
end
end
end
end
end

View File

@ -1,57 +0,0 @@
# frozen_string_literal: true
module DiscourseAi
module Summarization
module Strategies
class Anthropic < ::Summarization::Base
def display_name
"Anthropic's #{model}"
end
def correctly_configured?
SiteSetting.ai_anthropic_api_key.present?
end
def configuration_hint
I18n.t(
"discourse_ai.summarization.configuration_hint",
count: 1,
setting: "ai_anthropic_api_key",
)
end
def summarize(content_text)
response =
::DiscourseAi::Inference::AnthropicCompletions.perform!(
prompt(content_text),
model,
).dig(:completion)
Nokogiri::HTML5.fragment(response).at("ai").text
end
def prompt(content)
truncated_content =
::DiscourseAi::Tokenizer::AnthropicTokenizer.truncate(content, max_length - 50)
"Human: Summarize the following article that is inside <input> tags.
Please include only the summary inside <ai> tags.
<input>##{truncated_content}</input>
Assistant:
"
end
private
def max_length
lengths = { "claude-v1" => 9000, "claude-v1-100k" => 100_000 }
lengths[model]
end
end
end
end
end

View File

@ -0,0 +1,30 @@
# frozen_string_literal: true
module DiscourseAi
module Summarization
module Strategies
class FoldContent < ::Summarization::Base
def initialize(completion_model)
@completion_model = completion_model
end
attr_reader :completion_model
delegate :correctly_configured?,
:display_name,
:configuration_hint,
:model,
to: :completion_model
def summarize(content)
opts = content.except(:contents)
summaries = completion_model.summarize_in_chunks(content[:contents], opts)
return { summary: summaries.first[:summary], chunks: [] } if summaries.length == 1
{ summary: completion_model.concatenate_summaries(summaries), chunks: summaries }
end
end
end
end
end

View File

@ -1,56 +0,0 @@
# frozen_string_literal: true
module DiscourseAi
module Summarization
module Strategies
class OpenAi < ::Summarization::Base
def display_name
"Open AI's #{model}"
end
def correctly_configured?
SiteSetting.ai_openai_api_key.present?
end
def configuration_hint
I18n.t(
"discourse_ai.summarization.configuration_hint",
count: 1,
setting: "ai_openai_api_key",
)
end
def summarize(content_text)
::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt(content_text), model).dig(
:choices,
0,
:message,
:content,
)
end
def prompt(content)
truncated_content =
::DiscourseAi::Tokenizer::OpenAiTokenizer.truncate(content, max_length - 50)
messages = [{ role: "system", content: <<~TEXT }]
Summarize the following article:\n\n#{truncated_content}
TEXT
end
private
def max_length
lengths = {
"gpt-3.5-turbo" => 4096,
"gpt-4" => 8192,
"gpt-3.5-turbo-16k" => 16_384,
"gpt-4-32k" => 32_768,
}
lengths[model]
end
end
end
end
end

View File

@ -0,0 +1,30 @@
# frozen_string_literal: true
module DiscourseAi
module Summarization
module Strategies
class TruncateContent < ::Summarization::Base
def initialize(completion_model)
@completion_model = completion_model
end
attr_reader :completion_model
delegate :correctly_configured?,
:display_name,
:configuration_hint,
:model,
to: :completion_model
def summarize(content)
opts = content.except(:contents)
{
summary: completion_model.summarize_with_truncation(content[:contents], opts),
chunks: [],
}
end
end
end
end
end

View File

@ -24,7 +24,7 @@ module ::DiscourseAi
payload[:temperature] = temperature if temperature payload[:temperature] = temperature if temperature
payload[:top_p] = top_p if top_p payload[:top_p] = top_p if top_p
payload[:max_tokens_to_sample] = max_tokens || 300 payload[:max_tokens_to_sample] = max_tokens if max_tokens
payload[:stream] = true if block_given? payload[:stream] = true if block_given?
Net::HTTP.start( Net::HTTP.start(

View File

@ -3,21 +3,31 @@
module DiscourseAi module DiscourseAi
module Tokenizer module Tokenizer
class BasicTokenizer class BasicTokenizer
def self.tokenizer class << self
raise NotImplementedError def tokenizer
end raise NotImplementedError
end
def self.tokenize(text) def tokenize(text)
tokenizer.encode(text).tokens tokenizer.encode(text).tokens
end end
def self.size(text)
tokenize(text).size
end
def self.truncate(text, max_length)
# Fast track the common case where the text is already short enough.
return text if text.size < max_length
tokenizer.decode(tokenizer.encode(text).ids.take(max_length)) def size(text)
tokenize(text).size
end
def truncate(text, max_length)
# Fast track the common case where the text is already short enough.
return text if text.size < max_length
tokenizer.decode(tokenizer.encode(text).ids.take(max_length))
end
def can_expand_tokens?(text, addition, max_length)
return true if text.size + addition.size < max_length
tokenizer.encode(text).ids.length + tokenizer.encode(addition).ids.length < max_length
end
end end
end end
@ -36,22 +46,30 @@ module DiscourseAi
end end
class OpenAiTokenizer < BasicTokenizer class OpenAiTokenizer < BasicTokenizer
def self.tokenizer class << self
@@tokenizer ||= Tiktoken.get_encoding("cl100k_base") def tokenizer
end @@tokenizer ||= Tiktoken.get_encoding("cl100k_base")
end
def self.tokenize(text) def tokenize(text)
tokenizer.encode(text) tokenizer.encode(text)
end end
def self.truncate(text, max_length) def truncate(text, max_length)
# Fast track the common case where the text is already short enough. # Fast track the common case where the text is already short enough.
return text if text.size < max_length return text if text.size < max_length
tokenizer.decode(tokenize(text).take(max_length)) tokenizer.decode(tokenize(text).take(max_length))
rescue Tiktoken::UnicodeError rescue Tiktoken::UnicodeError
max_length = max_length - 1 max_length = max_length - 1
retry retry
end
def can_expand_tokens?(text, addition, max_length)
return true if text.size + addition.size < max_length
tokenizer.encode(text).length + tokenizer.encode(addition).length < max_length
end
end end
end end
end end

View File

@ -0,0 +1,116 @@
# frozen_string_literal: true
require_relative "../../../../support/anthropic_completion_stubs"
RSpec.describe DiscourseAi::Summarization::Models::Anthropic do
let(:model) { "claude-v1" }
let(:max_tokens) { 720 }
subject { described_class.new(model, max_tokens: max_tokens) }
let(:content) do
{
resource_path: "/t/1/POST_NUMBER",
content_title: "This is a title",
contents: [{ poster: "asd", id: 1, text: "This is a text" }],
}
end
def expected_messages(contents, opts)
base_prompt = <<~TEXT
Human: Summarize the following forum discussion inside the given <input> tag.
Include only the summary inside <ai> tags.
Try generating links as well the format is #{opts[:resource_path]}.
The discussion title is: #{opts[:content_title]}.
Don't use more than 400 words.
TEXT
text =
contents.reduce("") do |memo, item|
memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
end
base_prompt += "<input>#{text}</input>\nAssistant:\n"
end
describe "#summarize_in_chunks" do
context "when the content fits in a single chunk" do
it "performs a request to summarize" do
opts = content.except(:contents)
AnthropicCompletionStubs.stub_response(
expected_messages(content[:contents], opts),
"<ai>This is summary 1</ai>",
)
summarized_chunks =
subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] }
expect(summarized_chunks).to contain_exactly("This is summary 1")
end
end
context "when the content fits in multiple chunks" do
it "performs a request for each one to summarize" do
content[:contents] << {
poster: "asd2",
id: 2,
text: "This is a different text to summarize",
}
opts = content.except(:contents)
content[:contents].each_with_index do |item, idx|
AnthropicCompletionStubs.stub_response(
expected_messages([item], opts),
"<ai>This is summary #{idx + 1}</ai>",
)
end
summarized_chunks =
subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] }
expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2")
end
end
end
describe "#concatenate_summaries" do
it "combines all the different summaries into a single one" do
messages = <<~TEXT
Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative.
Include only the summary inside <ai> tags.
<input>summary 1</input>
<input>summary 2</input>
Assistant:
TEXT
AnthropicCompletionStubs.stub_response(messages, "<ai>concatenated summary</ai>")
expect(subject.concatenate_summaries(["summary 1", "summary 2"])).to eq(
"concatenated summary",
)
end
end
describe "#summarize_with_truncation" do
let(:max_tokens) { 709 }
it "truncates the context to meet the token limit" do
opts = content.except(:contents)
instructions = <<~TEXT
Human: Summarize the following forum discussion inside the given <input> tag.
Include only the summary inside <ai> tags.
Try generating links as well the format is #{opts[:resource_path]}.
The discussion title is: #{opts[:content_title]}.
Don't use more than 400 words.
<input>(1 asd said: This is a</input>
Assistant:
TEXT
AnthropicCompletionStubs.stub_response(instructions, "<ai>truncated summary</ai>")
expect(subject.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary")
end
end
end

View File

@ -0,0 +1,93 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::Summarization::Models::Discourse do
let(:model) { "bart-large-cnn-samsum" }
let(:max_tokens) { 20 }
subject { described_class.new(model, max_tokens: max_tokens) }
let(:content) do
{
resource_path: "/t/1/POST_NUMBER",
content_title: "This is a title",
contents: [{ poster: "asd", id: 1, text: "This is a text" }],
}
end
before { SiteSetting.ai_summarization_discourse_service_api_endpoint = "https://test.com" }
def stub_request(prompt, response)
WebMock
.stub_request(
:post,
"#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
)
.with(body: JSON.dump(model: model, content: prompt))
.to_return(status: 200, body: JSON.dump(summary_text: response))
end
def expected_messages(contents, opts)
contents.reduce("") do |memo, item|
memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
end
end
describe "#summarize_in_chunks" do
context "when the content fits in a single chunk" do
it "performs a request to summarize" do
opts = content.except(:contents)
stub_request(expected_messages(content[:contents], opts), "This is summary 1")
summarized_chunks =
subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] }
expect(summarized_chunks).to contain_exactly("This is summary 1")
end
end
context "when the content fits in multiple chunks" do
it "performs a request for each one to summarize" do
content[:contents] << {
poster: "asd2",
id: 2,
text: "This is a different text to summarize",
}
opts = content.except(:contents)
content[:contents].each_with_index do |item, idx|
stub_request(expected_messages([item], opts), "This is summary #{idx + 1}")
end
summarized_chunks =
subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] }
expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2")
end
end
end
describe "#concatenate_summaries" do
it "combines all the different summaries into a single one" do
messages = ["summary 1", "summary 2"].join("\n")
stub_request(messages, "concatenated summary")
expect(subject.concatenate_summaries(["summary 1", "summary 2"])).to eq(
"concatenated summary",
)
end
end
describe "#summarize_with_truncation" do
let(:max_tokens) { 9 }
it "truncates the context to meet the token limit" do
opts = content.except(:contents)
stub_request("( 1 asd said : this is", "truncated summary")
expect(subject.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary")
end
end
end

View File

@ -0,0 +1,116 @@
# frozen_string_literal: true
require_relative "../../../../support/openai_completions_inference_stubs"
RSpec.describe DiscourseAi::Summarization::Models::OpenAi do
let(:model) { "gpt-3.5-turbo" }
let(:max_tokens) { 720 }
subject { described_class.new(model, max_tokens: max_tokens) }
let(:content) do
{
resource_path: "/t/1/POST_NUMBER",
content_title: "This is a title",
contents: [{ poster: "asd", id: 1, text: "This is a text" }],
}
end
def expected_messages(contents, opts)
base_prompt = <<~TEXT
You are a summarization bot.
You effectively summarise any text and reply ONLY with ONLY the summarized text.
You condense it into a shorter version.
You understand and generate Discourse forum Markdown.
Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)
The discussion title is: #{opts[:content_title]}.
TEXT
messages = [{ role: "system", content: base_prompt }]
text =
contents.reduce("") do |memo, item|
memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
end
messages << { role: "user", content: "Summarize the following in 400 words:\n#{text}" }
end
describe "#summarize_in_chunks" do
context "when the content fits in a single chunk" do
it "performs a request to summarize" do
opts = content.except(:contents)
OpenAiCompletionsInferenceStubs.stub_response(
expected_messages(content[:contents], opts),
"This is summary 1",
)
summarized_chunks =
subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] }
expect(summarized_chunks).to contain_exactly("This is summary 1")
end
end
context "when the content fits in multiple chunks" do
it "performs a request for each one to summarize" do
content[:contents] << {
poster: "asd2",
id: 2,
text: "This is a different text to summarize",
}
opts = content.except(:contents)
content[:contents].each_with_index do |item, idx|
OpenAiCompletionsInferenceStubs.stub_response(
expected_messages([item], opts),
"This is summary #{idx + 1}",
)
end
summarized_chunks =
subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] }
expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2")
end
end
end
describe "#concatenate_summaries" do
it "combines all the different summaries into a single one" do
messages = [
{ role: "system", content: "You are a helpful bot" },
{
role: "user",
content:
"Concatenate these disjoint summaries, creating a cohesive narrative:\nsummary 1\nsummary 2",
},
]
OpenAiCompletionsInferenceStubs.stub_response(messages, "concatenated summary")
expect(subject.concatenate_summaries(["summary 1", "summary 2"])).to eq(
"concatenated summary",
)
end
end
describe "#summarize_with_truncation" do
let(:max_tokens) { 709 }
it "truncates the context to meet the token limit" do
opts = content.except(:contents)
truncated_version = expected_messages(content[:contents], opts)
truncated_version.last[
:content
] = "Summarize the following in 400 words:\n(1 asd said: This is a"
OpenAiCompletionsInferenceStubs.stub_response(truncated_version, "truncated summary")
expect(subject.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary")
end
end
end

View File

@ -1,26 +0,0 @@
# frozen_string_literal: true
require_relative "../../../../support/anthropic_completion_stubs"
RSpec.describe DiscourseAi::Summarization::Strategies::Anthropic do
describe "#summarize" do
let(:model) { "claude-v1" }
subject { described_class.new(model) }
it "asks an Anthropic's model to summarize the content" do
summarization_text = "This is a text"
expected_response = "This is a summary"
AnthropicCompletionStubs.stub_response(
subject.prompt(summarization_text),
"<ai>#{expected_response}</ai>",
req_opts: {
max_tokens_to_sample: 300,
},
)
expect(subject.summarize(summarization_text)).to eq(expected_response)
end
end
end

View File

@ -1,25 +0,0 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::Summarization::Strategies::DiscourseAi do
describe "#summarize" do
let(:model) { "bart-large-cnn-samsum" }
subject { described_class.new(model) }
it "asks a Discourse's model to summarize the content" do
SiteSetting.ai_summarization_discourse_service_api_endpoint = "https://test.com"
summarization_text = "This is a text"
expected_response = "This is a summary"
WebMock
.stub_request(
:post,
"#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
)
.with(body: JSON.dump(model: model, content: subject.prompt(summarization_text)))
.to_return(status: 200, body: JSON.dump(summary_text: expected_response))
expect(subject.summarize(summarization_text)).to eq(expected_response)
end
end
end

View File

@ -0,0 +1,38 @@
# frozen_string_literal: true
require_relative "../../../../support/summarization/dummy_completion_model"
RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
describe "#summarize" do
let(:summarize_text) { "This is a text" }
let(:model) { DummyCompletionModel.new(model_tokens) }
let(:model_tokens) do
# Make sure each content fits in a single chunk.
DiscourseAi::Tokenizer::BertTokenizer.size("(1 asd said: This is a text ") + 3
end
subject { described_class.new(model) }
let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } }
context "when the content to summarize fits in a single call" do
it "does one call to summarize content" do
result = subject.summarize(content)
expect(model.summarization_calls).to eq(1)
expect(result[:summary]).to eq(DummyCompletionModel::SINGLE_SUMMARY)
end
end
context "when the content to summarize doesn't fit in a single call" do
it "summarizes each chunk and then concatenates them" do
content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
result = subject.summarize(content)
expect(model.summarization_calls).to eq(3)
expect(result[:summary]).to eq(DummyCompletionModel::CONCATENATED_SUMMARIES)
end
end
end
end

View File

@ -1,21 +0,0 @@
# frozen_string_literal: true
require_relative "../../../../support/openai_completions_inference_stubs"
RSpec.describe DiscourseAi::Summarization::Strategies::OpenAi do
let(:model) { "gpt-3.5-turbo" }
subject { described_class.new(model) }
it "asks a OpenAI's model to summarize the content" do
summarization_text = "This is a text"
expected_response = "This is a summary"
OpenAiCompletionsInferenceStubs.stub_response(
subject.prompt(summarization_text),
expected_response,
)
expect(subject.summarize(summarization_text)).to eq(expected_response)
end
end

View File

@ -0,0 +1,26 @@
# frozen_string_literal: true
require_relative "../../../../support/summarization/dummy_completion_model"
RSpec.describe DiscourseAi::Summarization::Strategies::TruncateContent do
describe "#summarize" do
let(:summarize_text) { "This is a text" }
let(:model_tokens) { summarize_text.length }
let(:model) { DummyCompletionModel.new(model_tokens) }
subject { described_class.new(model) }
let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } }
context "when the content to summarize doesn't fit in a single call" do
it "summarizes a truncated version" do
content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
result = subject.summarize(content)
expect(model.summarization_calls).to eq(1)
expect(result[:summary]).to eq(DummyCompletionModel::SINGLE_SUMMARY)
end
end
end
end

View File

@ -45,7 +45,11 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
AnthropicCompletionStubs.stub_streamed_response(prompt, deltas, req_opts: req_opts) AnthropicCompletionStubs.stub_streamed_response(prompt, deltas, req_opts: req_opts)
DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, "claude-v1") do |partial, cancel| DiscourseAi::Inference::AnthropicCompletions.perform!(
prompt,
"claude-v1",
max_tokens: req_opts[:max_tokens_to_sample],
) do |partial, cancel|
data = partial[:completion] data = partial[:completion]
content = data if data content = data if data
cancel.call if content.split(" ").length == 2 cancel.call if content.split(" ").length == 2

View File

@ -0,0 +1,54 @@
# frozen_string_literal: true
class DummyCompletionModel
SINGLE_SUMMARY = "this is a single summary"
CONCATENATED_SUMMARIES = "this is a concatenated summary"
def initialize(prompt_length)
@max_length = prompt_length
@summarization_calls = 0
end
attr_reader :max_length, :summarization_calls
def summarize_in_chunks(contents, opts)
chunks = []
section = { ids: [], summary: "" }
contents.each do |item|
new_content = "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
if tokenizer.can_expand_tokens?(section[:summary], new_content, max_length)
section[:summary] += new_content
section[:ids] << item[:id]
else
chunks << section
section = { id: [item[:id]], summary: new_content }
end
end
chunks << section if section[:summary].present?
chunks.each do |chunk|
chunk[:summary] = SINGLE_SUMMARY
@summarization_calls += 1
end
chunks
end
def concatenate_summaries(summaries)
@summarization_calls += 1
CONCATENATED_SUMMARIES
end
def summarize_with_truncation(_contents, _opts)
@summarization_calls += 1
SINGLE_SUMMARY
end
def tokenizer
DiscourseAi::Tokenizer::BertTokenizer
end
end