From 3bc010b686a3ec8ba9d7fae4583f840ebe0750c7 Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Fri, 1 Dec 2023 10:17:24 -0300 Subject: [PATCH] FIX: call the right method to summarize with truncation (#328) --- .../strategies/truncate_content.rb | 3 +- .../strategies/truncate_content_spec.rb | 46 +++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 spec/lib/modules/summarization/strategies/truncate_content_spec.rb diff --git a/lib/summarization/strategies/truncate_content.rb b/lib/summarization/strategies/truncate_content.rb index 6e4c368a..1b4cbab6 100644 --- a/lib/summarization/strategies/truncate_content.rb +++ b/lib/summarization/strategies/truncate_content.rb @@ -20,8 +20,7 @@ module DiscourseAi opts = content.except(:contents) { - summary: - completion_model.summarize_with_truncation(content[:contents], opts, &on_partial_blk), + summary: summarize_with_truncation(content[:contents], opts, &on_partial_blk), chunks: [], } end diff --git a/spec/lib/modules/summarization/strategies/truncate_content_spec.rb b/spec/lib/modules/summarization/strategies/truncate_content_spec.rb new file mode 100644 index 00000000..0857c46f --- /dev/null +++ b/spec/lib/modules/summarization/strategies/truncate_content_spec.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +RSpec.describe DiscourseAi::Summarization::Strategies::TruncateContent do + subject(:strategy) { described_class.new(model) } + + before { SiteSetting.ai_summarization_discourse_service_api_endpoint = "https://test.com" } + + let(:summarize_text) { "This is a text" } + let(:full_text) { "(1 asd said: #{summarize_text} " } + let(:model_tokens) { ::DiscourseAi::Tokenizer::BertTokenizer.size(full_text) - 5 } + + let(:model) do + DiscourseAi::Summarization::Models::Discourse.new( + "flan-t5-base-samsum", + max_tokens: model_tokens, + ) + end + + let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } } + + let(:summarized_text) { "this is a single summary" } + + let(:user) { User.new } + + describe "#summary" do + it "truncates the content and requests a summary" do + truncated = + ::DiscourseAi::Tokenizer::BertTokenizer.truncate( + "(1 asd said: This is a text ", + model_tokens, + ) + + WebMock + .stub_request( + :post, + "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify", + ) + .with(body: JSON.dump(model: model.model, content: truncated)) + .to_return(status: 200, body: JSON.dump({ summary_text: summarized_text })) + + summary = strategy.summarize(content, user).dig(:summary) + + expect(summary).to eq(summarized_text) + end + end +end