diff --git a/lib/modules/summarization/entry_point.rb b/lib/modules/summarization/entry_point.rb index 93b27ea0..d1af8117 100644 --- a/lib/modules/summarization/entry_point.rb +++ b/lib/modules/summarization/entry_point.rb @@ -4,23 +4,38 @@ module DiscourseAi module Summarization class EntryPoint def load_files - require_relative "strategies/anthropic" - require_relative "strategies/discourse_ai" - require_relative "strategies/open_ai" + require_relative "models/base" + require_relative "models/anthropic" + require_relative "models/discourse" + require_relative "models/open_ai" + + require_relative "strategies/fold_content" + require_relative "strategies/truncate_content" end def inject_into(plugin) - [ - Strategies::OpenAi.new("gpt-4"), - Strategies::OpenAi.new("gpt-4-32k"), - Strategies::OpenAi.new("gpt-3.5-turbo"), - Strategies::OpenAi.new("gpt-3.5-turbo-16k"), - Strategies::DiscourseAi.new("bart-large-cnn-samsum"), - Strategies::DiscourseAi.new("flan-t5-base-samsum"), - Strategies::DiscourseAi.new("long-t5-tglobal-base-16384-book-summary"), - Strategies::Anthropic.new("claude-v1"), - Strategies::Anthropic.new("claude-v1-100k"), - ].each { |strategy| plugin.register_summarization_strategy(strategy) } + foldable_models = [ + Models::OpenAi.new("gpt-4", max_tokens: 8192), + Models::OpenAi.new("gpt-4-32k", max_tokens: 32_768), + Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096), + Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384), + Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384), + Models::Anthropic.new("claude-v1", max_tokens: 9000), + Models::Anthropic.new("claude-v1-100k", max_tokens: 100_000), + ] + + foldable_models.each do |model| + plugin.register_summarization_strategy(Strategies::FoldContent.new(model)) + end + + truncable_models = [ + Models::Discourse.new("bart-large-cnn-samsum", max_tokens: 1024), + Models::Discourse.new("flan-t5-base-samsum", max_tokens: 512), + ] + + truncable_models.each do |model| + plugin.register_summarization_strategy(Strategies::TruncateContent.new(model)) + end end end end diff --git a/lib/modules/summarization/models/anthropic.rb b/lib/modules/summarization/models/anthropic.rb new file mode 100644 index 00000000..d038385e --- /dev/null +++ b/lib/modules/summarization/models/anthropic.rb @@ -0,0 +1,84 @@ +# frozen_string_literal: true + +module DiscourseAi + module Summarization + module Models + class Anthropic < Base + def display_name + "Anthropic's #{model}" + end + + def correctly_configured? + SiteSetting.ai_anthropic_api_key.present? + end + + def configuration_hint + I18n.t( + "discourse_ai.summarization.configuration_hint", + count: 1, + setting: "ai_anthropic_api_key", + ) + end + + def concatenate_summaries(summaries) + instructions = <<~TEXT + Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative. + Include only the summary inside tags. + TEXT + + instructions += summaries.reduce("") { |m, s| m += "#{s}\n" } + instructions += "Assistant:\n" + + completion(instructions) + end + + def summarize_with_truncation(contents, opts) + instructions = build_base_prompt(opts) + + text_to_summarize = contents.map { |c| format_content_item(c) }.join + truncated_content = tokenizer.truncate(text_to_summarize, max_tokens - reserved_tokens) + + instructions += "#{truncated_content}\nAssistant:\n" + + completion(instructions) + end + + private + + def summarize_chunk(chunk_text, opts) + completion(build_base_prompt(opts) + "#{chunk_text}\nAssistant:\n") + end + + def build_base_prompt(opts) + base_prompt = <<~TEXT + Human: Summarize the following forum discussion inside the given tag. + Include only the summary inside tags. + TEXT + + if opts[:resource_path] + base_prompt += "Try generating links as well the format is #{opts[:resource_path]}.\n" + end + + base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[ + :content_title + ] + + base_prompt += "Don't use more than 400 words.\n" + end + + def completion(prompt) + response = + ::DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, model).dig(:completion) + + Nokogiri::HTML5.fragment(response).at("ai").text + end + + def tokenizer + DiscourseAi::Tokenizer::AnthropicTokenizer + end + + attr_reader :max_tokens + end + end + end +end diff --git a/lib/modules/summarization/models/base.rb b/lib/modules/summarization/models/base.rb new file mode 100644 index 00000000..558006a8 --- /dev/null +++ b/lib/modules/summarization/models/base.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +module DiscourseAi + module Summarization + module Models + class Base + def initialize(model, max_tokens:) + @model = model + @max_tokens = max_tokens + end + + def correctly_configured? + raise NotImplemented + end + + def display_name + raise NotImplemented + end + + def configuration_hint + raise NotImplemented + end + + def summarize_in_chunks(contents, opts) + chunks = [] + + section = { ids: [], summary: "" } + + contents.each do |item| + new_content = format_content_item(item) + + if tokenizer.can_expand_tokens?( + section[:summary], + new_content, + max_tokens - reserved_tokens, + ) + section[:summary] += new_content + section[:ids] << item[:id] + else + chunks << section + section = { id: [item[:id]], summary: new_content } + end + end + + chunks << section if section[:summary].present? + + chunks.each { |chunk| chunk[:summary] = summarize_chunk(chunk[:summary], opts) } + + chunks + end + + def concatenate_summaries(_summaries) + raise NotImplemented + end + + def summarize_with_truncation(_contents, _opts) + raise NotImplemented + end + + attr_reader :model + + protected + + attr_reader :max_tokens + + def summarize_chunk(_chunk_text, _opts) + raise NotImplemented + end + + def format_content_item(item) + "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " + end + + def reserved_tokens + # Reserve tokens for the response and the base prompt + # ~500 words + 700 + end + end + end + end +end diff --git a/lib/modules/summarization/strategies/discourse_ai.rb b/lib/modules/summarization/models/discourse.rb similarity index 58% rename from lib/modules/summarization/strategies/discourse_ai.rb rename to lib/modules/summarization/models/discourse.rb index 363b40b8..240f1fb8 100644 --- a/lib/modules/summarization/strategies/discourse_ai.rb +++ b/lib/modules/summarization/models/discourse.rb @@ -2,8 +2,8 @@ module DiscourseAi module Summarization - module Strategies - class DiscourseAi < ::Summarization::Base + module Models + class Discourse < Base def display_name "Discourse AI's #{model}" end @@ -22,29 +22,39 @@ module DiscourseAi ) end - def summarize(content_text) - ::DiscourseAi::Inference::DiscourseClassifier.perform!( - "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify", - model, - prompt(content_text), - SiteSetting.ai_summarization_discourse_service_api_key, - ).dig(:summary_text) + def concatenate_summaries(summaries) + completion(summaries.join("\n")) end - def prompt(text) - ::DiscourseAi::Tokenizer::BertTokenizer.truncate(text, max_length) + def summarize_with_truncation(contents, opts) + text_to_summarize = contents.map { |c| format_content_item(c) }.join + truncated_content = + ::DiscourseAi::Tokenizer::BertTokenizer.truncate(text_to_summarize, max_tokens) + + completion(truncated_content) end private - def max_length - lengths = { - "bart-large-cnn-samsum" => 1024, - "flan-t5-base-samsum" => 512, - "long-t5-tglobal-base-16384-book-summary" => 16_384, - } + def summarize_chunk(chunk_text, _opts) + completion(chunk_text) + end - lengths[model] + def reserved_tokens + 0 + end + + def completion(prompt) + ::DiscourseAi::Inference::DiscourseClassifier.perform!( + "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify", + model, + prompt, + SiteSetting.ai_summarization_discourse_service_api_key, + ).dig(:summary_text) + end + + def tokenizer + DiscourseAi::Tokenizer::BertTokenizer end end end diff --git a/lib/modules/summarization/models/open_ai.rb b/lib/modules/summarization/models/open_ai.rb new file mode 100644 index 00000000..cd91d31e --- /dev/null +++ b/lib/modules/summarization/models/open_ai.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +module DiscourseAi + module Summarization + module Models + class OpenAi < Base + def display_name + "Open AI's #{model}" + end + + def correctly_configured? + SiteSetting.ai_openai_api_key.present? + end + + def configuration_hint + I18n.t( + "discourse_ai.summarization.configuration_hint", + count: 1, + setting: "ai_openai_api_key", + ) + end + + def concatenate_summaries(summaries) + messages = [ + { role: "system", content: "You are a helpful bot" }, + { + role: "user", + content: + "Concatenate these disjoint summaries, creating a cohesive narrative:\n#{summaries.join("\n")}", + }, + ] + + completion(messages) + end + + def summarize_with_truncation(contents, opts) + messages = [{ role: "system", content: build_base_prompt(opts) }] + + text_to_summarize = contents.map { |c| format_content_item(c) }.join + truncated_content = tokenizer.truncate(text_to_summarize, max_tokens - reserved_tokens) + + messages << { + role: "user", + content: "Summarize the following in 400 words:\n#{truncated_content}", + } + + completion(messages) + end + + private + + def summarize_chunk(chunk_text, opts) + completion( + [ + { role: "system", content: build_base_prompt(opts) }, + { role: "user", content: "Summarize the following in 400 words:\n#{chunk_text}" }, + ], + ) + end + + def build_base_prompt(opts) + base_prompt = <<~TEXT + You are a summarization bot. + You effectively summarise any text and reply ONLY with ONLY the summarized text. + You condense it into a shorter version. + You understand and generate Discourse forum Markdown. + TEXT + + if opts[:resource_path] + base_prompt += + "Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)\n" + end + + base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[ + :content_title + ] + + base_prompt + end + + def completion(prompt) + ::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model).dig( + :choices, + 0, + :message, + :content, + ) + end + + def tokenizer + DiscourseAi::Tokenizer::OpenAiTokenizer + end + end + end + end +end diff --git a/lib/modules/summarization/strategies/anthropic.rb b/lib/modules/summarization/strategies/anthropic.rb deleted file mode 100644 index 57e08285..00000000 --- a/lib/modules/summarization/strategies/anthropic.rb +++ /dev/null @@ -1,57 +0,0 @@ -# frozen_string_literal: true - -module DiscourseAi - module Summarization - module Strategies - class Anthropic < ::Summarization::Base - def display_name - "Anthropic's #{model}" - end - - def correctly_configured? - SiteSetting.ai_anthropic_api_key.present? - end - - def configuration_hint - I18n.t( - "discourse_ai.summarization.configuration_hint", - count: 1, - setting: "ai_anthropic_api_key", - ) - end - - def summarize(content_text) - response = - ::DiscourseAi::Inference::AnthropicCompletions.perform!( - prompt(content_text), - model, - ).dig(:completion) - - Nokogiri::HTML5.fragment(response).at("ai").text - end - - def prompt(content) - truncated_content = - ::DiscourseAi::Tokenizer::AnthropicTokenizer.truncate(content, max_length - 50) - - "Human: Summarize the following article that is inside tags. - Please include only the summary inside tags. - - ##{truncated_content} - - - Assistant: - " - end - - private - - def max_length - lengths = { "claude-v1" => 9000, "claude-v1-100k" => 100_000 } - - lengths[model] - end - end - end - end -end diff --git a/lib/modules/summarization/strategies/fold_content.rb b/lib/modules/summarization/strategies/fold_content.rb new file mode 100644 index 00000000..2f4508d6 --- /dev/null +++ b/lib/modules/summarization/strategies/fold_content.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module DiscourseAi + module Summarization + module Strategies + class FoldContent < ::Summarization::Base + def initialize(completion_model) + @completion_model = completion_model + end + + attr_reader :completion_model + + delegate :correctly_configured?, + :display_name, + :configuration_hint, + :model, + to: :completion_model + + def summarize(content) + opts = content.except(:contents) + summaries = completion_model.summarize_in_chunks(content[:contents], opts) + + return { summary: summaries.first[:summary], chunks: [] } if summaries.length == 1 + + { summary: completion_model.concatenate_summaries(summaries), chunks: summaries } + end + end + end + end +end diff --git a/lib/modules/summarization/strategies/open_ai.rb b/lib/modules/summarization/strategies/open_ai.rb deleted file mode 100644 index 35011b66..00000000 --- a/lib/modules/summarization/strategies/open_ai.rb +++ /dev/null @@ -1,56 +0,0 @@ -# frozen_string_literal: true - -module DiscourseAi - module Summarization - module Strategies - class OpenAi < ::Summarization::Base - def display_name - "Open AI's #{model}" - end - - def correctly_configured? - SiteSetting.ai_openai_api_key.present? - end - - def configuration_hint - I18n.t( - "discourse_ai.summarization.configuration_hint", - count: 1, - setting: "ai_openai_api_key", - ) - end - - def summarize(content_text) - ::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt(content_text), model).dig( - :choices, - 0, - :message, - :content, - ) - end - - def prompt(content) - truncated_content = - ::DiscourseAi::Tokenizer::OpenAiTokenizer.truncate(content, max_length - 50) - - messages = [{ role: "system", content: <<~TEXT }] - Summarize the following article:\n\n#{truncated_content} - TEXT - end - - private - - def max_length - lengths = { - "gpt-3.5-turbo" => 4096, - "gpt-4" => 8192, - "gpt-3.5-turbo-16k" => 16_384, - "gpt-4-32k" => 32_768, - } - - lengths[model] - end - end - end - end -end diff --git a/lib/modules/summarization/strategies/truncate_content.rb b/lib/modules/summarization/strategies/truncate_content.rb new file mode 100644 index 00000000..7634dd65 --- /dev/null +++ b/lib/modules/summarization/strategies/truncate_content.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module DiscourseAi + module Summarization + module Strategies + class TruncateContent < ::Summarization::Base + def initialize(completion_model) + @completion_model = completion_model + end + + attr_reader :completion_model + + delegate :correctly_configured?, + :display_name, + :configuration_hint, + :model, + to: :completion_model + + def summarize(content) + opts = content.except(:contents) + + { + summary: completion_model.summarize_with_truncation(content[:contents], opts), + chunks: [], + } + end + end + end + end +end diff --git a/lib/shared/inference/anthropic_completions.rb b/lib/shared/inference/anthropic_completions.rb index 8af3be02..18fd4418 100644 --- a/lib/shared/inference/anthropic_completions.rb +++ b/lib/shared/inference/anthropic_completions.rb @@ -24,7 +24,7 @@ module ::DiscourseAi payload[:temperature] = temperature if temperature payload[:top_p] = top_p if top_p - payload[:max_tokens_to_sample] = max_tokens || 300 + payload[:max_tokens_to_sample] = max_tokens if max_tokens payload[:stream] = true if block_given? Net::HTTP.start( diff --git a/lib/shared/tokenizer/tokenizer.rb b/lib/shared/tokenizer/tokenizer.rb index 0fdcf2c9..01178614 100644 --- a/lib/shared/tokenizer/tokenizer.rb +++ b/lib/shared/tokenizer/tokenizer.rb @@ -3,21 +3,31 @@ module DiscourseAi module Tokenizer class BasicTokenizer - def self.tokenizer - raise NotImplementedError - end + class << self + def tokenizer + raise NotImplementedError + end - def self.tokenize(text) - tokenizer.encode(text).tokens - end - def self.size(text) - tokenize(text).size - end - def self.truncate(text, max_length) - # Fast track the common case where the text is already short enough. - return text if text.size < max_length + def tokenize(text) + tokenizer.encode(text).tokens + end - tokenizer.decode(tokenizer.encode(text).ids.take(max_length)) + def size(text) + tokenize(text).size + end + + def truncate(text, max_length) + # Fast track the common case where the text is already short enough. + return text if text.size < max_length + + tokenizer.decode(tokenizer.encode(text).ids.take(max_length)) + end + + def can_expand_tokens?(text, addition, max_length) + return true if text.size + addition.size < max_length + + tokenizer.encode(text).ids.length + tokenizer.encode(addition).ids.length < max_length + end end end @@ -36,22 +46,30 @@ module DiscourseAi end class OpenAiTokenizer < BasicTokenizer - def self.tokenizer - @@tokenizer ||= Tiktoken.get_encoding("cl100k_base") - end + class << self + def tokenizer + @@tokenizer ||= Tiktoken.get_encoding("cl100k_base") + end - def self.tokenize(text) - tokenizer.encode(text) - end + def tokenize(text) + tokenizer.encode(text) + end - def self.truncate(text, max_length) - # Fast track the common case where the text is already short enough. - return text if text.size < max_length + def truncate(text, max_length) + # Fast track the common case where the text is already short enough. + return text if text.size < max_length - tokenizer.decode(tokenize(text).take(max_length)) - rescue Tiktoken::UnicodeError - max_length = max_length - 1 - retry + tokenizer.decode(tokenize(text).take(max_length)) + rescue Tiktoken::UnicodeError + max_length = max_length - 1 + retry + end + + def can_expand_tokens?(text, addition, max_length) + return true if text.size + addition.size < max_length + + tokenizer.encode(text).length + tokenizer.encode(addition).length < max_length + end end end end diff --git a/spec/lib/modules/summarization/models/anthropic_spec.rb b/spec/lib/modules/summarization/models/anthropic_spec.rb new file mode 100644 index 00000000..2ce99a3a --- /dev/null +++ b/spec/lib/modules/summarization/models/anthropic_spec.rb @@ -0,0 +1,116 @@ +# frozen_string_literal: true + +require_relative "../../../../support/anthropic_completion_stubs" + +RSpec.describe DiscourseAi::Summarization::Models::Anthropic do + let(:model) { "claude-v1" } + let(:max_tokens) { 720 } + + subject { described_class.new(model, max_tokens: max_tokens) } + + let(:content) do + { + resource_path: "/t/1/POST_NUMBER", + content_title: "This is a title", + contents: [{ poster: "asd", id: 1, text: "This is a text" }], + } + end + + def expected_messages(contents, opts) + base_prompt = <<~TEXT + Human: Summarize the following forum discussion inside the given tag. + Include only the summary inside tags. + Try generating links as well the format is #{opts[:resource_path]}. + The discussion title is: #{opts[:content_title]}. + Don't use more than 400 words. + TEXT + + text = + contents.reduce("") do |memo, item| + memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " + end + + base_prompt += "#{text}\nAssistant:\n" + end + + describe "#summarize_in_chunks" do + context "when the content fits in a single chunk" do + it "performs a request to summarize" do + opts = content.except(:contents) + + AnthropicCompletionStubs.stub_response( + expected_messages(content[:contents], opts), + "This is summary 1", + ) + + summarized_chunks = + subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] } + + expect(summarized_chunks).to contain_exactly("This is summary 1") + end + end + + context "when the content fits in multiple chunks" do + it "performs a request for each one to summarize" do + content[:contents] << { + poster: "asd2", + id: 2, + text: "This is a different text to summarize", + } + opts = content.except(:contents) + + content[:contents].each_with_index do |item, idx| + AnthropicCompletionStubs.stub_response( + expected_messages([item], opts), + "This is summary #{idx + 1}", + ) + end + + summarized_chunks = + subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] } + + expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2") + end + end + end + + describe "#concatenate_summaries" do + it "combines all the different summaries into a single one" do + messages = <<~TEXT + Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative. + Include only the summary inside tags. + summary 1 + summary 2 + Assistant: + TEXT + + AnthropicCompletionStubs.stub_response(messages, "concatenated summary") + + expect(subject.concatenate_summaries(["summary 1", "summary 2"])).to eq( + "concatenated summary", + ) + end + end + + describe "#summarize_with_truncation" do + let(:max_tokens) { 709 } + + it "truncates the context to meet the token limit" do + opts = content.except(:contents) + + instructions = <<~TEXT + Human: Summarize the following forum discussion inside the given tag. + Include only the summary inside tags. + Try generating links as well the format is #{opts[:resource_path]}. + The discussion title is: #{opts[:content_title]}. + Don't use more than 400 words. + (1 asd said: This is a + Assistant: + TEXT + + AnthropicCompletionStubs.stub_response(instructions, "truncated summary") + + expect(subject.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary") + end + end +end diff --git a/spec/lib/modules/summarization/models/discourse_spec.rb b/spec/lib/modules/summarization/models/discourse_spec.rb new file mode 100644 index 00000000..c505da1c --- /dev/null +++ b/spec/lib/modules/summarization/models/discourse_spec.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +RSpec.describe DiscourseAi::Summarization::Models::Discourse do + let(:model) { "bart-large-cnn-samsum" } + let(:max_tokens) { 20 } + + subject { described_class.new(model, max_tokens: max_tokens) } + + let(:content) do + { + resource_path: "/t/1/POST_NUMBER", + content_title: "This is a title", + contents: [{ poster: "asd", id: 1, text: "This is a text" }], + } + end + + before { SiteSetting.ai_summarization_discourse_service_api_endpoint = "https://test.com" } + + def stub_request(prompt, response) + WebMock + .stub_request( + :post, + "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify", + ) + .with(body: JSON.dump(model: model, content: prompt)) + .to_return(status: 200, body: JSON.dump(summary_text: response)) + end + + def expected_messages(contents, opts) + contents.reduce("") do |memo, item| + memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " + end + end + + describe "#summarize_in_chunks" do + context "when the content fits in a single chunk" do + it "performs a request to summarize" do + opts = content.except(:contents) + + stub_request(expected_messages(content[:contents], opts), "This is summary 1") + + summarized_chunks = + subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] } + + expect(summarized_chunks).to contain_exactly("This is summary 1") + end + end + + context "when the content fits in multiple chunks" do + it "performs a request for each one to summarize" do + content[:contents] << { + poster: "asd2", + id: 2, + text: "This is a different text to summarize", + } + opts = content.except(:contents) + + content[:contents].each_with_index do |item, idx| + stub_request(expected_messages([item], opts), "This is summary #{idx + 1}") + end + + summarized_chunks = + subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] } + + expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2") + end + end + end + + describe "#concatenate_summaries" do + it "combines all the different summaries into a single one" do + messages = ["summary 1", "summary 2"].join("\n") + + stub_request(messages, "concatenated summary") + + expect(subject.concatenate_summaries(["summary 1", "summary 2"])).to eq( + "concatenated summary", + ) + end + end + + describe "#summarize_with_truncation" do + let(:max_tokens) { 9 } + + it "truncates the context to meet the token limit" do + opts = content.except(:contents) + + stub_request("( 1 asd said : this is", "truncated summary") + + expect(subject.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary") + end + end +end diff --git a/spec/lib/modules/summarization/models/open_ai_spec.rb b/spec/lib/modules/summarization/models/open_ai_spec.rb new file mode 100644 index 00000000..d01fd287 --- /dev/null +++ b/spec/lib/modules/summarization/models/open_ai_spec.rb @@ -0,0 +1,116 @@ +# frozen_string_literal: true + +require_relative "../../../../support/openai_completions_inference_stubs" + +RSpec.describe DiscourseAi::Summarization::Models::OpenAi do + let(:model) { "gpt-3.5-turbo" } + let(:max_tokens) { 720 } + + subject { described_class.new(model, max_tokens: max_tokens) } + + let(:content) do + { + resource_path: "/t/1/POST_NUMBER", + content_title: "This is a title", + contents: [{ poster: "asd", id: 1, text: "This is a text" }], + } + end + + def expected_messages(contents, opts) + base_prompt = <<~TEXT + You are a summarization bot. + You effectively summarise any text and reply ONLY with ONLY the summarized text. + You condense it into a shorter version. + You understand and generate Discourse forum Markdown. + Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77) + The discussion title is: #{opts[:content_title]}. + TEXT + + messages = [{ role: "system", content: base_prompt }] + + text = + contents.reduce("") do |memo, item| + memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " + end + + messages << { role: "user", content: "Summarize the following in 400 words:\n#{text}" } + end + + describe "#summarize_in_chunks" do + context "when the content fits in a single chunk" do + it "performs a request to summarize" do + opts = content.except(:contents) + + OpenAiCompletionsInferenceStubs.stub_response( + expected_messages(content[:contents], opts), + "This is summary 1", + ) + + summarized_chunks = + subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] } + + expect(summarized_chunks).to contain_exactly("This is summary 1") + end + end + + context "when the content fits in multiple chunks" do + it "performs a request for each one to summarize" do + content[:contents] << { + poster: "asd2", + id: 2, + text: "This is a different text to summarize", + } + opts = content.except(:contents) + + content[:contents].each_with_index do |item, idx| + OpenAiCompletionsInferenceStubs.stub_response( + expected_messages([item], opts), + "This is summary #{idx + 1}", + ) + end + + summarized_chunks = + subject.summarize_in_chunks(content[:contents], opts).map { |c| c[:summary] } + + expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2") + end + end + end + + describe "#concatenate_summaries" do + it "combines all the different summaries into a single one" do + messages = [ + { role: "system", content: "You are a helpful bot" }, + { + role: "user", + content: + "Concatenate these disjoint summaries, creating a cohesive narrative:\nsummary 1\nsummary 2", + }, + ] + + OpenAiCompletionsInferenceStubs.stub_response(messages, "concatenated summary") + + expect(subject.concatenate_summaries(["summary 1", "summary 2"])).to eq( + "concatenated summary", + ) + end + end + + describe "#summarize_with_truncation" do + let(:max_tokens) { 709 } + + it "truncates the context to meet the token limit" do + opts = content.except(:contents) + + truncated_version = expected_messages(content[:contents], opts) + + truncated_version.last[ + :content + ] = "Summarize the following in 400 words:\n(1 asd said: This is a" + + OpenAiCompletionsInferenceStubs.stub_response(truncated_version, "truncated summary") + + expect(subject.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary") + end + end +end diff --git a/spec/lib/modules/summarization/strategies/anthropic_spec.rb b/spec/lib/modules/summarization/strategies/anthropic_spec.rb deleted file mode 100644 index afd64f13..00000000 --- a/spec/lib/modules/summarization/strategies/anthropic_spec.rb +++ /dev/null @@ -1,26 +0,0 @@ -# frozen_string_literal: true - -require_relative "../../../../support/anthropic_completion_stubs" - -RSpec.describe DiscourseAi::Summarization::Strategies::Anthropic do - describe "#summarize" do - let(:model) { "claude-v1" } - - subject { described_class.new(model) } - - it "asks an Anthropic's model to summarize the content" do - summarization_text = "This is a text" - expected_response = "This is a summary" - - AnthropicCompletionStubs.stub_response( - subject.prompt(summarization_text), - "#{expected_response}", - req_opts: { - max_tokens_to_sample: 300, - }, - ) - - expect(subject.summarize(summarization_text)).to eq(expected_response) - end - end -end diff --git a/spec/lib/modules/summarization/strategies/discourse_spec.rb b/spec/lib/modules/summarization/strategies/discourse_spec.rb deleted file mode 100644 index 8d52e069..00000000 --- a/spec/lib/modules/summarization/strategies/discourse_spec.rb +++ /dev/null @@ -1,25 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe DiscourseAi::Summarization::Strategies::DiscourseAi do - describe "#summarize" do - let(:model) { "bart-large-cnn-samsum" } - - subject { described_class.new(model) } - - it "asks a Discourse's model to summarize the content" do - SiteSetting.ai_summarization_discourse_service_api_endpoint = "https://test.com" - summarization_text = "This is a text" - expected_response = "This is a summary" - - WebMock - .stub_request( - :post, - "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify", - ) - .with(body: JSON.dump(model: model, content: subject.prompt(summarization_text))) - .to_return(status: 200, body: JSON.dump(summary_text: expected_response)) - - expect(subject.summarize(summarization_text)).to eq(expected_response) - end - end -end diff --git a/spec/lib/modules/summarization/strategies/fold_content_spec.rb b/spec/lib/modules/summarization/strategies/fold_content_spec.rb new file mode 100644 index 00000000..655a7855 --- /dev/null +++ b/spec/lib/modules/summarization/strategies/fold_content_spec.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +require_relative "../../../../support/summarization/dummy_completion_model" + +RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do + describe "#summarize" do + let(:summarize_text) { "This is a text" } + let(:model) { DummyCompletionModel.new(model_tokens) } + let(:model_tokens) do + # Make sure each content fits in a single chunk. + DiscourseAi::Tokenizer::BertTokenizer.size("(1 asd said: This is a text ") + 3 + end + + subject { described_class.new(model) } + + let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } } + + context "when the content to summarize fits in a single call" do + it "does one call to summarize content" do + result = subject.summarize(content) + + expect(model.summarization_calls).to eq(1) + expect(result[:summary]).to eq(DummyCompletionModel::SINGLE_SUMMARY) + end + end + + context "when the content to summarize doesn't fit in a single call" do + it "summarizes each chunk and then concatenates them" do + content[:contents] << { poster: "asd2", id: 2, text: summarize_text } + + result = subject.summarize(content) + + expect(model.summarization_calls).to eq(3) + expect(result[:summary]).to eq(DummyCompletionModel::CONCATENATED_SUMMARIES) + end + end + end +end diff --git a/spec/lib/modules/summarization/strategies/open_ai_spec.rb b/spec/lib/modules/summarization/strategies/open_ai_spec.rb deleted file mode 100644 index a83ca4db..00000000 --- a/spec/lib/modules/summarization/strategies/open_ai_spec.rb +++ /dev/null @@ -1,21 +0,0 @@ -# frozen_string_literal: true - -require_relative "../../../../support/openai_completions_inference_stubs" - -RSpec.describe DiscourseAi::Summarization::Strategies::OpenAi do - let(:model) { "gpt-3.5-turbo" } - - subject { described_class.new(model) } - - it "asks a OpenAI's model to summarize the content" do - summarization_text = "This is a text" - expected_response = "This is a summary" - - OpenAiCompletionsInferenceStubs.stub_response( - subject.prompt(summarization_text), - expected_response, - ) - - expect(subject.summarize(summarization_text)).to eq(expected_response) - end -end diff --git a/spec/lib/modules/summarization/strategies/truncate_content_spec.rb b/spec/lib/modules/summarization/strategies/truncate_content_spec.rb new file mode 100644 index 00000000..4b6f1584 --- /dev/null +++ b/spec/lib/modules/summarization/strategies/truncate_content_spec.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +require_relative "../../../../support/summarization/dummy_completion_model" + +RSpec.describe DiscourseAi::Summarization::Strategies::TruncateContent do + describe "#summarize" do + let(:summarize_text) { "This is a text" } + let(:model_tokens) { summarize_text.length } + let(:model) { DummyCompletionModel.new(model_tokens) } + + subject { described_class.new(model) } + + let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } } + + context "when the content to summarize doesn't fit in a single call" do + it "summarizes a truncated version" do + content[:contents] << { poster: "asd2", id: 2, text: summarize_text } + + result = subject.summarize(content) + + expect(model.summarization_calls).to eq(1) + expect(result[:summary]).to eq(DummyCompletionModel::SINGLE_SUMMARY) + end + end + end +end diff --git a/spec/shared/inference/anthropic_completions_spec.rb b/spec/shared/inference/anthropic_completions_spec.rb index 30e5037a..6ab081a7 100644 --- a/spec/shared/inference/anthropic_completions_spec.rb +++ b/spec/shared/inference/anthropic_completions_spec.rb @@ -45,7 +45,11 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do AnthropicCompletionStubs.stub_streamed_response(prompt, deltas, req_opts: req_opts) - DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, "claude-v1") do |partial, cancel| + DiscourseAi::Inference::AnthropicCompletions.perform!( + prompt, + "claude-v1", + max_tokens: req_opts[:max_tokens_to_sample], + ) do |partial, cancel| data = partial[:completion] content = data if data cancel.call if content.split(" ").length == 2 diff --git a/spec/support/summarization/dummy_completion_model.rb b/spec/support/summarization/dummy_completion_model.rb new file mode 100644 index 00000000..3c4136c0 --- /dev/null +++ b/spec/support/summarization/dummy_completion_model.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +class DummyCompletionModel + SINGLE_SUMMARY = "this is a single summary" + CONCATENATED_SUMMARIES = "this is a concatenated summary" + + def initialize(prompt_length) + @max_length = prompt_length + @summarization_calls = 0 + end + + attr_reader :max_length, :summarization_calls + + def summarize_in_chunks(contents, opts) + chunks = [] + + section = { ids: [], summary: "" } + + contents.each do |item| + new_content = "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " + + if tokenizer.can_expand_tokens?(section[:summary], new_content, max_length) + section[:summary] += new_content + section[:ids] << item[:id] + else + chunks << section + section = { id: [item[:id]], summary: new_content } + end + end + + chunks << section if section[:summary].present? + + chunks.each do |chunk| + chunk[:summary] = SINGLE_SUMMARY + @summarization_calls += 1 + end + + chunks + end + + def concatenate_summaries(summaries) + @summarization_calls += 1 + CONCATENATED_SUMMARIES + end + + def summarize_with_truncation(_contents, _opts) + @summarization_calls += 1 + SINGLE_SUMMARY + end + + def tokenizer + DiscourseAi::Tokenizer::BertTokenizer + end +end