2023-06-27 10:44:34 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
describe TopicSummarization do
|
2023-07-12 10:21:51 -04:00
|
|
|
fab!(:user) { Fabricate(:admin) }
|
2023-08-15 13:16:06 -04:00
|
|
|
fab!(:topic) { Fabricate(:topic, highest_post_number: 2) }
|
|
|
|
fab!(:post_1) { Fabricate(:post, topic: topic, post_number: 1) }
|
|
|
|
fab!(:post_2) { Fabricate(:post, topic: topic, post_number: 2) }
|
2023-06-27 10:44:34 -04:00
|
|
|
|
|
|
|
shared_examples "includes only public-visible topics" do
|
|
|
|
subject { described_class.new(DummyCustomSummarization.new({})) }
|
|
|
|
|
|
|
|
it "only includes visible posts" do
|
|
|
|
topic.first_post.update!(hidden: true)
|
|
|
|
|
|
|
|
posts = subject.summary_targets(topic)
|
|
|
|
|
|
|
|
expect(posts.none?(&:hidden?)).to eq(true)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "doesn't include posts without users" do
|
|
|
|
topic.first_post.user.destroy!
|
|
|
|
|
|
|
|
posts = subject.summary_targets(topic)
|
|
|
|
|
|
|
|
expect(posts.detect { |p| p.id == topic.first_post.id }).to be_nil
|
|
|
|
end
|
|
|
|
|
|
|
|
it "doesn't include deleted posts" do
|
|
|
|
topic.first_post.update!(user_id: nil)
|
|
|
|
|
|
|
|
posts = subject.summary_targets(topic)
|
|
|
|
|
|
|
|
expect(posts.detect { |p| p.id == topic.first_post.id }).to be_nil
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
describe "#summary_targets" do
|
|
|
|
context "when the topic has a best replies summary" do
|
|
|
|
before { topic.has_summary = true }
|
|
|
|
|
|
|
|
it_behaves_like "includes only public-visible topics"
|
|
|
|
end
|
|
|
|
|
|
|
|
context "when the topic doesn't have a best replies summary" do
|
|
|
|
before { topic.has_summary = false }
|
|
|
|
|
|
|
|
it_behaves_like "includes only public-visible topics"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
describe "#summarize" do
|
2023-06-27 12:18:10 -04:00
|
|
|
subject(:summarization) { described_class.new(strategy) }
|
2023-06-27 10:44:34 -04:00
|
|
|
|
2023-06-27 12:18:10 -04:00
|
|
|
let(:strategy) { DummyCustomSummarization.new(summary) }
|
2023-06-27 10:44:34 -04:00
|
|
|
|
|
|
|
def assert_summary_is_cached(topic, summary_response)
|
|
|
|
cached_summary = SummarySection.find_by(target: topic, meta_section_id: nil)
|
|
|
|
|
|
|
|
expect(cached_summary.content_range).to cover(*topic.posts.map(&:post_number))
|
|
|
|
expect(cached_summary.summarized_text).to eq(summary_response[:summary])
|
2023-06-27 12:18:10 -04:00
|
|
|
expect(cached_summary.original_content_sha).to be_present
|
2023-06-27 10:44:34 -04:00
|
|
|
expect(cached_summary.algorithm).to eq(strategy.model)
|
|
|
|
end
|
|
|
|
|
|
|
|
def assert_chunk_is_cached(topic, chunk_response)
|
|
|
|
cached_chunk =
|
|
|
|
SummarySection
|
|
|
|
.where.not(meta_section_id: nil)
|
|
|
|
.find_by(
|
|
|
|
target: topic,
|
|
|
|
content_range: (chunk_response[:ids].min..chunk_response[:ids].max),
|
|
|
|
)
|
|
|
|
|
|
|
|
expect(cached_chunk.summarized_text).to eq(chunk_response[:summary])
|
2023-06-27 12:18:10 -04:00
|
|
|
expect(cached_chunk.original_content_sha).to be_present
|
2023-06-27 10:44:34 -04:00
|
|
|
expect(cached_chunk.algorithm).to eq(strategy.model)
|
|
|
|
end
|
|
|
|
|
|
|
|
context "when the content was summarized in a single chunk" do
|
|
|
|
let(:summary) { { summary: "This is the final summary", chunks: [] } }
|
|
|
|
|
|
|
|
it "caches the summary" do
|
2023-07-12 10:21:51 -04:00
|
|
|
section = summarization.summarize(topic, user)
|
2023-06-27 10:44:34 -04:00
|
|
|
|
2023-07-12 10:21:51 -04:00
|
|
|
expect(section.summarized_text).to eq(summary[:summary])
|
2023-06-27 10:44:34 -04:00
|
|
|
|
|
|
|
assert_summary_is_cached(topic, summary)
|
|
|
|
end
|
|
|
|
|
|
|
|
it "returns the cached version in subsequent calls" do
|
2023-07-12 10:21:51 -04:00
|
|
|
summarization.summarize(topic, user)
|
2023-06-27 10:44:34 -04:00
|
|
|
|
|
|
|
cached_summary_text = "This is a cached summary"
|
|
|
|
cached_summary =
|
|
|
|
SummarySection.find_by(target: topic, meta_section_id: nil).update!(
|
|
|
|
summarized_text: cached_summary_text,
|
2023-07-12 10:21:51 -04:00
|
|
|
updated_at: 24.hours.ago,
|
2023-06-27 10:44:34 -04:00
|
|
|
)
|
|
|
|
|
2023-07-12 10:21:51 -04:00
|
|
|
section = summarization.summarize(topic, user)
|
|
|
|
expect(section.summarized_text).to eq(cached_summary_text)
|
2023-06-27 10:44:34 -04:00
|
|
|
end
|
2024-01-09 12:00:01 -05:00
|
|
|
|
|
|
|
context "when the topic has embed content cached" do
|
|
|
|
it "embed content is used instead of the raw text" do
|
|
|
|
topic_embed =
|
|
|
|
Fabricate(
|
|
|
|
:topic_embed,
|
|
|
|
topic: topic,
|
|
|
|
embed_content_cache: "<p>hello world new post :D</p>",
|
|
|
|
)
|
|
|
|
|
|
|
|
summarization.summarize(topic, user)
|
|
|
|
|
|
|
|
first_post_data =
|
|
|
|
strategy.content[:contents].detect { |c| c[:id] == topic.first_post.post_number }
|
|
|
|
|
|
|
|
expect(first_post_data[:text]).to eq(topic_embed.embed_content_cache)
|
|
|
|
end
|
|
|
|
end
|
2023-06-27 10:44:34 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
context "when the content was summarized in multiple chunks" do
|
|
|
|
let(:summary) do
|
|
|
|
{
|
|
|
|
summary: "This is the final summary",
|
|
|
|
chunks: [
|
|
|
|
{ ids: [topic.first_post.post_number], summary: "this is the first chunk" },
|
|
|
|
{ ids: [post_1.post_number, post_2.post_number], summary: "this is the second chunk" },
|
|
|
|
],
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
it "caches the summary and each chunk" do
|
2023-07-12 10:21:51 -04:00
|
|
|
section = summarization.summarize(topic, user)
|
2023-06-27 10:44:34 -04:00
|
|
|
|
2023-07-12 10:21:51 -04:00
|
|
|
expect(section.summarized_text).to eq(summary[:summary])
|
2023-06-27 10:44:34 -04:00
|
|
|
|
|
|
|
assert_summary_is_cached(topic, summary)
|
|
|
|
|
|
|
|
summary[:chunks].each { |c| assert_chunk_is_cached(topic, c) }
|
|
|
|
end
|
|
|
|
end
|
2023-07-12 10:21:51 -04:00
|
|
|
|
|
|
|
describe "invalidating cached summaries" do
|
|
|
|
let(:cached_text) { "This is a cached summary" }
|
|
|
|
let(:summarized_text) { "This is the final summary" }
|
|
|
|
let(:summary) do
|
|
|
|
{
|
|
|
|
summary: summarized_text,
|
|
|
|
chunks: [
|
|
|
|
{ ids: [topic.first_post.post_number], summary: "this is the first chunk" },
|
|
|
|
{ ids: [post_1.post_number, post_2.post_number], summary: "this is the second chunk" },
|
|
|
|
],
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def cached_summary
|
|
|
|
SummarySection.find_by(target: topic, meta_section_id: nil)
|
|
|
|
end
|
|
|
|
|
|
|
|
before do
|
|
|
|
summarization.summarize(topic, user)
|
|
|
|
|
|
|
|
cached_summary.update!(summarized_text: cached_text, created_at: 24.hours.ago)
|
|
|
|
end
|
|
|
|
|
2023-07-20 14:25:46 -04:00
|
|
|
context "when the user can requests new summaries" do
|
|
|
|
context "when there are no new posts" do
|
|
|
|
it "returns the cached summary" do
|
|
|
|
section = summarization.summarize(topic, user)
|
2023-07-12 10:21:51 -04:00
|
|
|
|
2023-07-20 14:25:46 -04:00
|
|
|
expect(section.summarized_text).to eq(cached_text)
|
|
|
|
end
|
2023-07-12 10:21:51 -04:00
|
|
|
end
|
|
|
|
|
2023-07-20 14:25:46 -04:00
|
|
|
context "when there are new posts" do
|
|
|
|
before { cached_summary.update!(original_content_sha: "outdated_sha") }
|
2023-07-12 10:21:51 -04:00
|
|
|
|
2023-07-20 14:25:46 -04:00
|
|
|
it "returns a new summary" do
|
|
|
|
section = summarization.summarize(topic, user)
|
2023-07-12 10:21:51 -04:00
|
|
|
|
2023-07-20 14:25:46 -04:00
|
|
|
expect(section.summarized_text).to eq(summarized_text)
|
|
|
|
end
|
|
|
|
|
|
|
|
context "when the cached summary is less than one hour old" do
|
|
|
|
before { cached_summary.update!(created_at: 30.minutes.ago) }
|
|
|
|
|
|
|
|
it "returns the cached summary" do
|
|
|
|
cached_summary.update!(created_at: 30.minutes.ago)
|
|
|
|
|
|
|
|
section = summarization.summarize(topic, user)
|
|
|
|
|
|
|
|
expect(section.summarized_text).to eq(cached_text)
|
|
|
|
expect(section.outdated).to eq(true)
|
|
|
|
end
|
2023-07-12 10:21:51 -04:00
|
|
|
|
2023-07-20 14:25:46 -04:00
|
|
|
it "returns a new summary if the skip_age_check flag is passed" do
|
|
|
|
section = summarization.summarize(topic, user, skip_age_check: true)
|
2023-07-12 10:21:51 -04:00
|
|
|
|
2023-07-20 14:25:46 -04:00
|
|
|
expect(section.summarized_text).to eq(summarized_text)
|
|
|
|
end
|
|
|
|
end
|
2023-07-12 10:21:51 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2023-08-11 14:08:49 -04:00
|
|
|
|
|
|
|
describe "stream partial updates" do
|
|
|
|
let(:summary) { { summary: "This is the final summary", chunks: [] } }
|
|
|
|
|
|
|
|
it "receives a blk that is passed to the underlying strategy and called with partial summaries" do
|
|
|
|
partial_result = nil
|
|
|
|
|
|
|
|
summarization.summarize(topic, user) { |partial_summary| partial_result = partial_summary }
|
|
|
|
|
|
|
|
expect(partial_result).to eq(summary[:summary])
|
|
|
|
end
|
|
|
|
end
|
2023-06-27 10:44:34 -04:00
|
|
|
end
|
|
|
|
end
|