2023-06-27 11:26:33 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
|
|
|
|
describe "#summarize" do
|
2023-06-27 13:42:33 -04:00
|
|
|
subject(:strategy) { described_class.new(model) }
|
|
|
|
|
2023-06-27 11:26:33 -04:00
|
|
|
let(:summarize_text) { "This is a text" }
|
|
|
|
let(:model_tokens) do
|
|
|
|
# Make sure each content fits in a single chunk.
|
2023-11-23 10:58:54 -05:00
|
|
|
# 700 is the number of tokens reserved for the prompt.
|
|
|
|
700 + DiscourseAi::Tokenizer::OpenAiTokenizer.size("(1 asd said: This is a text ") + 3
|
2023-06-27 11:26:33 -04:00
|
|
|
end
|
|
|
|
|
2023-11-23 10:58:54 -05:00
|
|
|
let(:model) do
|
|
|
|
DiscourseAi::Summarization::Models::OpenAi.new("gpt-4", max_tokens: model_tokens)
|
|
|
|
end
|
2023-11-21 11:27:35 -05:00
|
|
|
|
2023-06-27 11:26:33 -04:00
|
|
|
let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } }
|
|
|
|
|
2023-11-23 10:58:54 -05:00
|
|
|
let(:single_summary) { "this is a single summary" }
|
|
|
|
let(:concatenated_summary) { "this is a concatenated summary" }
|
|
|
|
|
|
|
|
let(:user) { User.new }
|
|
|
|
|
2023-06-27 11:26:33 -04:00
|
|
|
context "when the content to summarize fits in a single call" do
|
|
|
|
it "does one call to summarize content" do
|
2023-11-23 10:58:54 -05:00
|
|
|
result =
|
2023-11-28 23:17:46 -05:00
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses([single_summary]) do |spy|
|
2023-11-23 10:58:54 -05:00
|
|
|
strategy.summarize(content, user).tap { expect(spy.completions).to eq(1) }
|
|
|
|
end
|
2023-06-27 11:26:33 -04:00
|
|
|
|
2023-11-23 10:58:54 -05:00
|
|
|
expect(result[:summary]).to eq(single_summary)
|
2023-06-27 11:26:33 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
context "when the content to summarize doesn't fit in a single call" do
|
|
|
|
it "summarizes each chunk and then concatenates them" do
|
|
|
|
content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
|
|
|
|
|
2023-11-23 10:58:54 -05:00
|
|
|
result =
|
2023-11-28 23:17:46 -05:00
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(
|
2023-11-23 10:58:54 -05:00
|
|
|
[single_summary, single_summary, concatenated_summary],
|
|
|
|
) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(3) } }
|
2023-06-27 11:26:33 -04:00
|
|
|
|
2023-11-23 10:58:54 -05:00
|
|
|
expect(result[:summary]).to eq(concatenated_summary)
|
2023-06-27 11:26:33 -04:00
|
|
|
end
|
2023-12-06 17:00:24 -05:00
|
|
|
|
|
|
|
it "keeps splitting into chunks until the content fits into a single call to create a cohesive narrative" do
|
|
|
|
content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
|
|
|
|
max_length_response = "(1 asd said: This is a text "
|
|
|
|
chunk_of_chunks = "I'm smol"
|
|
|
|
|
|
|
|
result =
|
|
|
|
DiscourseAi::Completions::Llm.with_prepared_responses(
|
|
|
|
[
|
|
|
|
max_length_response,
|
|
|
|
max_length_response,
|
|
|
|
chunk_of_chunks,
|
|
|
|
chunk_of_chunks,
|
|
|
|
concatenated_summary,
|
|
|
|
],
|
|
|
|
) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(5) } }
|
|
|
|
|
|
|
|
expect(result[:summary]).to eq(concatenated_summary)
|
|
|
|
end
|
2023-06-27 11:26:33 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|