diff --git a/lib/completions/dialects/chat_gpt.rb b/lib/completions/dialects/chat_gpt.rb
new file mode 100644
index 00000000..1f4166be
--- /dev/null
+++ b/lib/completions/dialects/chat_gpt.rb
@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module Completions
+ module Dialects
+ class ChatGPT
+ def self.can_translate?(model_name)
+ %w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k].include?(model_name)
+ end
+
+ def translate(generic_prompt)
+ open_ai_prompt = [
+ {
+ role: "system",
+ content: [generic_prompt[:insts], generic_prompt[:post_insts].to_s].join("\n"),
+ },
+ ]
+
+ if generic_prompt[:examples]
+ generic_prompt[:examples].each do |example_pair|
+ open_ai_prompt << { role: "user", content: example_pair.first }
+ open_ai_prompt << { role: "assistant", content: example_pair.second }
+ end
+ end
+
+ open_ai_prompt << { role: "user", content: generic_prompt[:input] }
+ end
+
+ def tokenizer
+ DiscourseAi::Tokenizer::OpenAiTokenizer
+ end
+ end
+ end
+ end
+end
diff --git a/lib/completions/dialects/claude.rb b/lib/completions/dialects/claude.rb
new file mode 100644
index 00000000..07438985
--- /dev/null
+++ b/lib/completions/dialects/claude.rb
@@ -0,0 +1,37 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module Completions
+ module Dialects
+ class Claude
+ def self.can_translate?(model_name)
+ %w[claude-instant-1 claude-2].include?(model_name)
+ end
+
+ def translate(generic_prompt)
+ claude_prompt = +"Human: #{generic_prompt[:insts]}\n"
+
+ claude_prompt << build_examples(generic_prompt[:examples]) if generic_prompt[:examples]
+
+ claude_prompt << "#{generic_prompt[:input]}\n"
+
+ claude_prompt << "#{generic_prompt[:post_insts]}\n" if generic_prompt[:post_insts]
+
+ claude_prompt << "Assistant:\n"
+ end
+
+ def tokenizer
+ DiscourseAi::Tokenizer::AnthropicTokenizer
+ end
+
+ private
+
+ def build_examples(examples_arr)
+ examples_arr.reduce("") do |memo, example|
+ memo += "\nH: #{example[0]}\nA: #{example[1]}\n\n"
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/completions/dialects/llama2_classic.rb b/lib/completions/dialects/llama2_classic.rb
new file mode 100644
index 00000000..b6c58c8b
--- /dev/null
+++ b/lib/completions/dialects/llama2_classic.rb
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module Completions
+ module Dialects
+ class Llama2Classic
+ def self.can_translate?(model_name)
+ "Llama2-*-chat-hf" == model_name
+ end
+
+ def translate(generic_prompt)
+ llama2_prompt =
+ +"[INST]<>#{[generic_prompt[:insts], generic_prompt[:post_insts].to_s].join("\n")}<>[/INST]\n"
+
+ if generic_prompt[:examples]
+ generic_prompt[:examples].each do |example_pair|
+ llama2_prompt << "[INST]#{example_pair.first}[/INST]\n"
+ llama2_prompt << "#{example_pair.second}\n"
+ end
+ end
+
+ llama2_prompt << "[INST]#{generic_prompt[:input]}[/INST]\n"
+ end
+
+ def tokenizer
+ DiscourseAi::Tokenizer::Llama2Tokenizer
+ end
+ end
+ end
+ end
+end
diff --git a/lib/completions/dialects/orca_style.rb b/lib/completions/dialects/orca_style.rb
new file mode 100644
index 00000000..3aa11609
--- /dev/null
+++ b/lib/completions/dialects/orca_style.rb
@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module Completions
+ module Dialects
+ class OrcaStyle
+ def self.can_translate?(model_name)
+ %w[StableBeluga2 Upstage-Llama-2-*-instruct-v2].include?(model_name)
+ end
+
+ def translate(generic_prompt)
+ orca_style_prompt =
+ +"### System:\n#{[generic_prompt[:insts], generic_prompt[:post_insts].to_s].join("\n")}\n"
+
+ if generic_prompt[:examples]
+ generic_prompt[:examples].each do |example_pair|
+ orca_style_prompt << "### User:\n#{example_pair.first}\n"
+ orca_style_prompt << "### Assistant:\n#{example_pair.second}\n"
+ end
+ end
+
+ orca_style_prompt << "### User:\n#{generic_prompt[:input]}\n"
+
+ orca_style_prompt << "### Assistant:\n"
+ end
+
+ def tokenizer
+ DiscourseAi::Tokenizer::Llama2Tokenizer
+ end
+ end
+ end
+ end
+end
diff --git a/lib/completions/endpoints/anthropic.rb b/lib/completions/endpoints/anthropic.rb
new file mode 100644
index 00000000..5216d4e7
--- /dev/null
+++ b/lib/completions/endpoints/anthropic.rb
@@ -0,0 +1,52 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module Completions
+ module Endpoints
+ class Anthropic < Base
+ def self.can_contact?(model_name)
+ %w[claude-instant-1 claude-2].include?(model_name)
+ end
+
+ def default_options
+ { max_tokens_to_sample: 2000, model: model }
+ end
+
+ def provider_id
+ AiApiAuditLog::Provider::Anthropic
+ end
+
+ private
+
+ def model_uri
+ @uri ||= URI("https://api.anthropic.com/v1/complete")
+ end
+
+ def prepare_payload(prompt, model_params)
+ default_options
+ .merge(model_params)
+ .merge(prompt: prompt)
+ .tap { |payload| payload[:stream] = true if @streaming_mode }
+ end
+
+ def prepare_request(payload)
+ headers = {
+ "anthropic-version" => "2023-06-01",
+ "x-api-key" => SiteSetting.ai_anthropic_api_key,
+ "content-type" => "application/json",
+ }
+
+ Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload }
+ end
+
+ def extract_completion_from(response_raw)
+ JSON.parse(response_raw, symbolize_names: true)[:completion].to_s
+ end
+
+ def partials_from(decoded_chunk)
+ decoded_chunk.split("\n").map { |line| line.split("data: ", 2)[1] }.compact
+ end
+ end
+ end
+ end
+end
diff --git a/lib/completions/endpoints/aws_bedrock.rb b/lib/completions/endpoints/aws_bedrock.rb
new file mode 100644
index 00000000..09a8d6d0
--- /dev/null
+++ b/lib/completions/endpoints/aws_bedrock.rb
@@ -0,0 +1,86 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module Completions
+ module Endpoints
+ class AwsBedrock < Base
+ def self.can_contact?(model_name)
+ SiteSetting.ai_bedrock_access_key_id.present? &&
+ SiteSetting.ai_bedrock_secret_access_key.present? &&
+ SiteSetting.ai_bedrock_region.present?
+ end
+
+ def default_options
+ { max_tokens_to_sample: 20_000 }
+ end
+
+ def provider_id
+ AiApiAuditLog::Provider::Anthropic
+ end
+
+ private
+
+ def model_uri
+ api_url =
+ "https://bedrock-runtime.#{SiteSetting.ai_bedrock_region}.amazonaws.com/model/anthropic.#{model}/invoke"
+
+ api_url = @streaming_mode ? (api_url + "-with-response-stream") : api_url
+
+ URI(api_url)
+ end
+
+ def prepare_payload(prompt, model_params)
+ default_options.merge(prompt: prompt).merge(model_params)
+ end
+
+ def prepare_request(payload)
+ headers = { "content-type" => "application/json", "Accept" => "*/*" }
+
+ signer =
+ Aws::Sigv4::Signer.new(
+ access_key_id: SiteSetting.ai_bedrock_access_key_id,
+ region: SiteSetting.ai_bedrock_region,
+ secret_access_key: SiteSetting.ai_bedrock_secret_access_key,
+ service: "bedrock",
+ )
+
+ Net::HTTP::Post
+ .new(model_uri, headers)
+ .tap do |r|
+ r.body = payload
+
+ signed_request =
+ signer.sign_request(req: r, http_method: r.method, url: model_uri, body: r.body)
+
+ r.initialize_http_header(headers.merge(signed_request.headers))
+ end
+ end
+
+ def decode(chunk)
+ Aws::EventStream::Decoder
+ .new
+ .decode_chunk(chunk)
+ .first
+ .payload
+ .string
+ .then { JSON.parse(_1) }
+ .dig("bytes")
+ .then { Base64.decode64(_1) }
+ rescue JSON::ParserError,
+ Aws::EventStream::Errors::MessageChecksumError,
+ Aws::EventStream::Errors::PreludeChecksumError => e
+ Rails.logger.error("#{self.class.name}: #{e.message}")
+ nil
+ end
+
+ def extract_completion_from(response_raw)
+ JSON.parse(response_raw, symbolize_names: true)[:completion].to_s
+ end
+
+ def partials_from(decoded_chunk)
+ [decoded_chunk]
+ end
+ end
+ end
+ end
+end
diff --git a/lib/completions/endpoints/base.rb b/lib/completions/endpoints/base.rb
new file mode 100644
index 00000000..bfe9c741
--- /dev/null
+++ b/lib/completions/endpoints/base.rb
@@ -0,0 +1,167 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module Completions
+ module Endpoints
+ class Base
+ CompletionFailed = Class.new(StandardError)
+ TIMEOUT = 60
+
+ def self.endpoint_for(model_name)
+ # Order is important.
+ # Bedrock has priority over Anthropic if creadentials are present.
+ [
+ DiscourseAi::Completions::Endpoints::AwsBedrock,
+ DiscourseAi::Completions::Endpoints::Anthropic,
+ DiscourseAi::Completions::Endpoints::OpenAI,
+ DiscourseAi::Completions::Endpoints::Huggingface,
+ ].detect(-> { raise DiscourseAi::Completions::LLM::UNKNOWN_MODEL }) do |ek|
+ ek.can_contact?(model_name)
+ end
+ end
+
+ def self.can_contact?(_model_name)
+ raise NotImplementedError
+ end
+
+ def initialize(model_name, tokenizer)
+ @model = model_name
+ @tokenizer = tokenizer
+ end
+
+ def perform_completion!(prompt, user, model_params = {})
+ @streaming_mode = block_given?
+
+ Net::HTTP.start(
+ model_uri.host,
+ model_uri.port,
+ use_ssl: true,
+ read_timeout: TIMEOUT,
+ open_timeout: TIMEOUT,
+ write_timeout: TIMEOUT,
+ ) do |http|
+ response_data = +""
+ response_raw = +""
+ request_body = prepare_payload(prompt, model_params).to_json
+
+ request = prepare_request(request_body)
+
+ http.request(request) do |response|
+ if response.code.to_i != 200
+ Rails.logger.error(
+ "#{self.class.name}: status: #{response.code.to_i} - body: #{response.body}",
+ )
+ raise CompletionFailed
+ end
+
+ log =
+ AiApiAuditLog.new(
+ provider_id: provider_id,
+ user_id: user.id,
+ raw_request_payload: request_body,
+ request_tokens: prompt_size(prompt),
+ )
+
+ if !@streaming_mode
+ response_raw = response.read_body
+ response_data = extract_completion_from(response_raw)
+
+ return response_data
+ end
+
+ begin
+ cancelled = false
+ cancel = lambda { cancelled = true }
+
+ leftover = ""
+
+ response.read_body do |chunk|
+ if cancelled
+ http.finish
+ return
+ end
+
+ decoded_chunk = decode(chunk)
+ response_raw << decoded_chunk
+
+ partials_from(leftover + decoded_chunk).each do |raw_partial|
+ next if cancelled
+ next if raw_partial.blank?
+
+ begin
+ partial = extract_completion_from(raw_partial)
+ leftover = ""
+ response_data << partial
+
+ yield partial, cancel if partial
+ rescue JSON::ParserError
+ leftover = raw_partial
+ end
+ end
+ end
+ rescue IOError, StandardError
+ raise if !cancelled
+ end
+
+ return response_data
+ ensure
+ log.raw_response_payload = response_raw
+ log.response_tokens = tokenizer.size(response_data)
+ log.save!
+
+ if Rails.env.development? && log
+ puts "#{self.class.name}: request_tokens #{log.request_tokens} response_tokens #{log.response_tokens}"
+ end
+ end
+ end
+ end
+
+ def default_options
+ raise NotImplementedError
+ end
+
+ def provider_id
+ raise NotImplementedError
+ end
+
+ def prompt_size(prompt)
+ tokenizer.size(extract_prompt_for_tokenizer(prompt))
+ end
+
+ attr_reader :tokenizer
+
+ protected
+
+ attr_reader :model
+
+ def model_uri
+ raise NotImplementedError
+ end
+
+ def prepare_payload(_prompt, _model_params)
+ raise NotImplementedError
+ end
+
+ def prepare_request(_payload)
+ raise NotImplementedError
+ end
+
+ def extract_completion_from(_response_raw)
+ raise NotImplementedError
+ end
+
+ def decode(chunk)
+ chunk
+ end
+
+ def partials_from(_decoded_chunk)
+ raise NotImplementedError
+ end
+
+ def extract_prompt_for_tokenizer(prompt)
+ prompt
+ end
+ end
+ end
+ end
+end
diff --git a/lib/completions/endpoints/canned_response.rb b/lib/completions/endpoints/canned_response.rb
new file mode 100644
index 00000000..2bdf7226
--- /dev/null
+++ b/lib/completions/endpoints/canned_response.rb
@@ -0,0 +1,47 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module Completions
+ module Endpoints
+ class CannedResponse
+ CANNED_RESPONSE_ERROR = Class.new(StandardError)
+
+ def self.can_contact?(_)
+ Rails.env.test?
+ end
+
+ def initialize(responses)
+ @responses = responses
+ @completions = 0
+ end
+
+ attr_reader :responses, :completions
+
+ def perform_completion!(_prompt, _user, _model_params)
+ response = responses[completions]
+ if response.nil?
+ raise CANNED_RESPONSE_ERROR,
+ "The number of completions you requested exceed the number of canned responses"
+ end
+
+ @completions += 1
+ if block_given?
+ cancelled = false
+ cancel_fn = lambda { cancelled = true }
+
+ response.each_char do |char|
+ break if cancelled
+ yield(char, cancel_fn)
+ end
+ else
+ response
+ end
+ end
+
+ def tokenizer
+ DiscourseAi::Tokenizer::OpenAiTokenizer
+ end
+ end
+ end
+ end
+end
diff --git a/lib/completions/endpoints/hugging_face.rb b/lib/completions/endpoints/hugging_face.rb
new file mode 100644
index 00000000..271a3394
--- /dev/null
+++ b/lib/completions/endpoints/hugging_face.rb
@@ -0,0 +1,75 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module Completions
+ module Endpoints
+ class Huggingface < Base
+ def self.can_contact?(model_name)
+ %w[StableBeluga2 Upstage-Llama-2-*-instruct-v2 Llama2-*-chat-hf].include?(model_name)
+ end
+
+ def default_options
+ { parameters: { repetition_penalty: 1.1, temperature: 0.7 } }
+ end
+
+ def provider_id
+ AiApiAuditLog::Provider::HuggingFaceTextGeneration
+ end
+
+ private
+
+ def model_uri
+ URI(SiteSetting.ai_hugging_face_api_url).tap do |uri|
+ uri.path = @streaming_mode ? "/generate_stream" : "/generate"
+ end
+ end
+
+ def prepare_payload(prompt, model_params)
+ default_options
+ .merge(inputs: prompt)
+ .tap do |payload|
+ payload[:parameters].merge!(model_params)
+
+ token_limit = 2_000 || SiteSetting.ai_hugging_face_token_limit
+
+ payload[:parameters][:max_new_tokens] = token_limit - prompt_size(prompt)
+ end
+ end
+
+ def prepare_request(payload)
+ headers =
+ { "Content-Type" => "application/json" }.tap do |h|
+ if SiteSetting.ai_hugging_face_api_key.present?
+ h["Authorization"] = "Bearer #{SiteSetting.ai_hugging_face_api_key}"
+ end
+ end
+
+ Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload }
+ end
+
+ def extract_completion_from(response_raw)
+ parsed = JSON.parse(response_raw, symbolize_names: true)
+
+ if @streaming_mode
+ # Last chunk contains full response, which we already yielded.
+ return if parsed.dig(:token, :special)
+
+ parsed.dig(:token, :text).to_s
+ else
+ parsed[:generated_text].to_s
+ end
+ end
+
+ def partials_from(decoded_chunk)
+ decoded_chunk
+ .split("\n")
+ .map do |line|
+ data = line.split("data: ", 2)[1]
+ data&.squish == "[DONE]" ? nil : data
+ end
+ .compact
+ end
+ end
+ end
+ end
+end
diff --git a/lib/completions/endpoints/open_ai.rb b/lib/completions/endpoints/open_ai.rb
new file mode 100644
index 00000000..3388c00c
--- /dev/null
+++ b/lib/completions/endpoints/open_ai.rb
@@ -0,0 +1,92 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module Completions
+ module Endpoints
+ class OpenAI < Base
+ def self.can_contact?(model_name)
+ %w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k].include?(model_name)
+ end
+
+ def default_options
+ { model: model }
+ end
+
+ def provider_id
+ AiApiAuditLog::Provider::OpenAI
+ end
+
+ private
+
+ def model_uri
+ url =
+ if model.include?("gpt-4")
+ if model.include?("32k")
+ SiteSetting.ai_openai_gpt4_32k_url
+ else
+ SiteSetting.ai_openai_gpt4_url
+ end
+ else
+ if model.include?("16k")
+ SiteSetting.ai_openai_gpt35_16k_url
+ else
+ SiteSetting.ai_openai_gpt35_url
+ end
+ end
+
+ URI(url)
+ end
+
+ def prepare_payload(prompt, model_params)
+ default_options
+ .merge(model_params)
+ .merge(messages: prompt)
+ .tap { |payload| payload[:stream] = true if @streaming_mode }
+ end
+
+ def prepare_request(payload)
+ headers =
+ { "Content-Type" => "application/json" }.tap do |h|
+ if model_uri.host.include?("azure")
+ h["api-key"] = SiteSetting.ai_openai_api_key
+ else
+ h["Authorization"] = "Bearer #{SiteSetting.ai_openai_api_key}"
+ end
+
+ if SiteSetting.ai_openai_organization.present?
+ h["OpenAI-Organization"] = SiteSetting.ai_openai_organization
+ end
+ end
+
+ Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload }
+ end
+
+ def extract_completion_from(response_raw)
+ parsed = JSON.parse(response_raw, symbolize_names: true)
+
+ (
+ if @streaming_mode
+ parsed.dig(:choices, 0, :delta, :content)
+ else
+ parsed.dig(:choices, 0, :message, :content)
+ end
+ ).to_s
+ end
+
+ def partials_from(decoded_chunk)
+ decoded_chunk
+ .split("\n")
+ .map do |line|
+ data = line.split("data: ", 2)[1]
+ data == "[DONE]" ? nil : data
+ end
+ .compact
+ end
+
+ def extract_prompt_for_tokenizer(prompt)
+ prompt.map { |message| message[:content] || message["content"] || "" }.join("\n")
+ end
+ end
+ end
+ end
+end
diff --git a/lib/completions/entry_point.rb b/lib/completions/entry_point.rb
new file mode 100644
index 00000000..fa3d2ba6
--- /dev/null
+++ b/lib/completions/entry_point.rb
@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module Completions
+ class EntryPoint
+ def load_files
+ require_relative "dialects/chat_gpt"
+ require_relative "dialects/llama2_classic"
+ require_relative "dialects/orca_style"
+ require_relative "dialects/claude"
+
+ require_relative "endpoints/canned_response"
+ require_relative "endpoints/base"
+ require_relative "endpoints/anthropic"
+ require_relative "endpoints/aws_bedrock"
+ require_relative "endpoints/open_ai"
+ require_relative "endpoints/hugging_face"
+
+ require_relative "llm"
+ end
+
+ def inject_into(_)
+ end
+ end
+ end
+end
diff --git a/lib/completions/llm.rb b/lib/completions/llm.rb
new file mode 100644
index 00000000..7210aad2
--- /dev/null
+++ b/lib/completions/llm.rb
@@ -0,0 +1,81 @@
+# frozen_string_literal: true
+
+# A facade that abstracts multiple LLMs behind a single interface.
+#
+# Internally, it consists of the combination of a dialect and an endpoint.
+# After recieving a prompt using our generic format, it translates it to
+# the target model and routes the completion request through the correct gateway.
+#
+# Use the .proxy method to instantiate an object.
+# It chooses the best dialect and endpoint for the model you want to interact with.
+#
+# Tests of modules that perform LLM calls can use .with_prepared_responses to return canned responses
+# instead of relying on WebMock stubs like we did in the past.
+#
+module DiscourseAi
+ module Completions
+ class LLM
+ UNKNOWN_MODEL = Class.new(StandardError)
+
+ def self.with_prepared_responses(responses)
+ @canned_response = DiscourseAi::Completions::Endpoints::CannedResponse.new(responses)
+
+ yield(@canned_response).tap { @canned_response = nil }
+ end
+
+ def self.proxy(model_name)
+ dialects = [
+ DiscourseAi::Completions::Dialects::Claude,
+ DiscourseAi::Completions::Dialects::Llama2Classic,
+ DiscourseAi::Completions::Dialects::ChatGPT,
+ DiscourseAi::Completions::Dialects::OrcaStyle,
+ ]
+
+ dialect =
+ dialects.detect(-> { raise UNKNOWN_MODEL }) { |d| d.can_translate?(model_name) }.new
+
+ return new(dialect, @canned_response, model_name) if @canned_response
+
+ gateway =
+ DiscourseAi::Completions::Endpoints::Base.endpoint_for(model_name).new(
+ model_name,
+ dialect.tokenizer,
+ )
+
+ new(dialect, gateway, model_name)
+ end
+
+ def initialize(dialect, gateway, model_name)
+ @dialect = dialect
+ @gateway = gateway
+ @model_name = model_name
+ end
+
+ delegate :tokenizer, to: :dialect
+
+ # @param generic_prompt { Hash } - Prompt using our generic format.
+ # We use the following keys from the hash:
+ # - insts: String with instructions for the LLM.
+ # - input: String containing user input
+ # - examples (optional): Array of arrays with examples of input and responses. Each array is a input/response pair like [[example1, response1], [example2, response2]].
+ # - post_insts (optional): Additional instructions for the LLM. Some dialects like Claude add these at the end of the prompt.
+ #
+ # @param user { User } - User requesting the summary.
+ #
+ # @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
+ #
+ # @returns { String } - Completion result.
+ def completion!(generic_prompt, user, &partial_read_blk)
+ prompt = dialect.translate(generic_prompt)
+
+ model_params = generic_prompt.dig(:params, model_name) || {}
+
+ gateway.perform_completion!(prompt, user, model_params, &partial_read_blk)
+ end
+
+ private
+
+ attr_reader :dialect, :gateway, :model_name
+ end
+ end
+end
diff --git a/lib/modules/embeddings/entry_point.rb b/lib/modules/embeddings/entry_point.rb
index c2582930..bb054bdb 100644
--- a/lib/modules/embeddings/entry_point.rb
+++ b/lib/modules/embeddings/entry_point.rb
@@ -15,11 +15,6 @@ module DiscourseAi
require_relative "semantic_related"
require_relative "semantic_topic_query"
- require_relative "hyde_generators/base"
- require_relative "hyde_generators/openai"
- require_relative "hyde_generators/anthropic"
- require_relative "hyde_generators/llama2"
- require_relative "hyde_generators/llama2_ftos"
require_relative "semantic_search"
end
diff --git a/lib/modules/embeddings/hyde_generators/anthropic.rb b/lib/modules/embeddings/hyde_generators/anthropic.rb
deleted file mode 100644
index 7a1e87ff..00000000
--- a/lib/modules/embeddings/hyde_generators/anthropic.rb
+++ /dev/null
@@ -1,37 +0,0 @@
-# frozen_string_literal: true
-
-module DiscourseAi
- module Embeddings
- module HydeGenerators
- class Anthropic < DiscourseAi::Embeddings::HydeGenerators::Base
- def prompt(search_term)
- <<~TEXT
- Human: Given a search term given between tags, generate a forum post about a given subject.
- #{basic_prompt_instruction}
- #{search_term}
-
- Respond with the generated post between tags.
-
- Assistant:\n
- TEXT
- end
-
- def models
- %w[claude-instant-1 claude-2]
- end
-
- def hypothetical_post_from(query)
- response =
- ::DiscourseAi::Inference::AnthropicCompletions.perform!(
- prompt(query),
- SiteSetting.ai_embeddings_semantic_search_hyde_model,
- max_tokens: 400,
- stop_sequences: [""],
- ).dig(:completion)
-
- Nokogiri::HTML5.fragment(response).at("ai").text
- end
- end
- end
- end
-end
diff --git a/lib/modules/embeddings/hyde_generators/base.rb b/lib/modules/embeddings/hyde_generators/base.rb
deleted file mode 100644
index be291b03..00000000
--- a/lib/modules/embeddings/hyde_generators/base.rb
+++ /dev/null
@@ -1,28 +0,0 @@
-# frozen_string_literal: true
-
-module DiscourseAi
- module Embeddings
- module HydeGenerators
- class Base
- def self.current_hyde_model
- DiscourseAi::Embeddings::HydeGenerators::Base.descendants.find do |generator_klass|
- generator_klass.new.models.include?(
- SiteSetting.ai_embeddings_semantic_search_hyde_model,
- )
- end
- end
-
- def basic_prompt_instruction
- <<~TEXT
- Act as a content writer for a forum.
- The forum description is as follows:
- #{SiteSetting.title}
- #{SiteSetting.site_description}
-
- Given the forum description write a forum post about the following subject:
- TEXT
- end
- end
- end
- end
-end
diff --git a/lib/modules/embeddings/hyde_generators/llama2.rb b/lib/modules/embeddings/hyde_generators/llama2.rb
deleted file mode 100644
index 86ca977a..00000000
--- a/lib/modules/embeddings/hyde_generators/llama2.rb
+++ /dev/null
@@ -1,35 +0,0 @@
-# frozen_string_literal: true
-
-module DiscourseAi
- module Embeddings
- module HydeGenerators
- class Llama2 < DiscourseAi::Embeddings::HydeGenerators::Base
- def prompt(search_term)
- <<~TEXT
- [INST] <>
- You are a helpful bot
- You create forum posts about a given subject
- <>
-
- #{basic_prompt_instruction}
- #{search_term}
- [/INST]
- Here is a forum post about the above subject:
- TEXT
- end
-
- def models
- ["Llama2-*-chat-hf"]
- end
-
- def hypothetical_post_from(query)
- ::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(
- prompt(query),
- SiteSetting.ai_embeddings_semantic_search_hyde_model,
- token_limit: 400,
- ).dig(:generated_text)
- end
- end
- end
- end
-end
diff --git a/lib/modules/embeddings/hyde_generators/llama2_ftos.rb b/lib/modules/embeddings/hyde_generators/llama2_ftos.rb
deleted file mode 100644
index e5222e78..00000000
--- a/lib/modules/embeddings/hyde_generators/llama2_ftos.rb
+++ /dev/null
@@ -1,28 +0,0 @@
-# frozen_string_literal: true
-
-module DiscourseAi
- module Embeddings
- module HydeGenerators
- class Llama2Ftos < DiscourseAi::Embeddings::HydeGenerators::Llama2
- def prompt(search_term)
- <<~TEXT
- ### System:
- You are a helpful bot
- You create forum posts about a given subject
-
- ### User:
- #{basic_prompt_instruction}
- #{search_term}
-
- ### Assistant:
- Here is a forum post about the above subject:
- TEXT
- end
-
- def models
- %w[StableBeluga2 Upstage-Llama-2-*-instruct-v2]
- end
- end
- end
- end
-end
diff --git a/lib/modules/embeddings/hyde_generators/openai.rb b/lib/modules/embeddings/hyde_generators/openai.rb
deleted file mode 100644
index 75ba2919..00000000
--- a/lib/modules/embeddings/hyde_generators/openai.rb
+++ /dev/null
@@ -1,31 +0,0 @@
-# frozen_string_literal: true
-
-module DiscourseAi
- module Embeddings
- module HydeGenerators
- class OpenAi < DiscourseAi::Embeddings::HydeGenerators::Base
- def prompt(search_term)
- [
- {
- role: "system",
- content: "You are a helpful bot. You create forum posts about a given subject.",
- },
- { role: "user", content: "#{basic_prompt_instruction}\n#{search_term}" },
- ]
- end
-
- def models
- %w[gpt-3.5-turbo gpt-4]
- end
-
- def hypothetical_post_from(query)
- ::DiscourseAi::Inference::OpenAiCompletions.perform!(
- prompt(query),
- SiteSetting.ai_embeddings_semantic_search_hyde_model,
- max_tokens: 400,
- ).dig(:choices, 0, :message, :content)
- end
- end
- end
- end
-end
diff --git a/lib/modules/embeddings/semantic_search.rb b/lib/modules/embeddings/semantic_search.rb
index 0a2d5ae2..b56004fe 100644
--- a/lib/modules/embeddings/semantic_search.rb
+++ b/lib/modules/embeddings/semantic_search.rb
@@ -55,10 +55,7 @@ module DiscourseAi
hypothetical_post =
Discourse
.cache
- .fetch(hyde_key, expires_in: 1.week) do
- hyde_generator = DiscourseAi::Embeddings::HydeGenerators::Base.current_hyde_model.new
- hyde_generator.hypothetical_post_from(search_term)
- end
+ .fetch(hyde_key, expires_in: 1.week) { hypothetical_post_from(search_term) }
hypothetical_post_embedding =
Discourse
@@ -96,6 +93,30 @@ module DiscourseAi
def build_embedding_key(digest, hyde_model, embedding_model)
"#{build_hyde_key(digest, hyde_model)}-#{embedding_model}"
end
+
+ def hypothetical_post_from(search_term)
+ prompt = {
+ insts: <<~TEXT,
+ You are a content creator for a forum. The forum description is as follows:
+ #{SiteSetting.title}
+ #{SiteSetting.site_description}
+ Given the forum description write a forum post about the following subject:
+ TEXT
+ input: <<~TEXT,
+ Using this description, write a forum post about the subject inside the XML tags:
+
+ #{search_term}
+ TEXT
+ post_insts: "Put the forum post between tags.",
+ }
+
+ llm_response =
+ DiscourseAi::Completions::LLM.proxy(
+ SiteSetting.ai_embeddings_semantic_search_hyde_model,
+ ).completion!(prompt, @guardian.user)
+
+ Nokogiri::HTML5.fragment(llm_response).at("ai").text
+ end
end
end
end
diff --git a/lib/modules/summarization/entry_point.rb b/lib/modules/summarization/entry_point.rb
index 50e7ea32..ad582637 100644
--- a/lib/modules/summarization/entry_point.rb
+++ b/lib/modules/summarization/entry_point.rb
@@ -21,7 +21,6 @@ module DiscourseAi
Models::OpenAi.new("gpt-4-32k", max_tokens: 32_768),
Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096),
Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384),
- Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384),
Models::Anthropic.new("claude-2", max_tokens: 100_000),
Models::Anthropic.new("claude-instant-1", max_tokens: 100_000),
Models::Llama2.new("Llama2-chat-hf", max_tokens: SiteSetting.ai_hugging_face_token_limit),
@@ -36,6 +35,7 @@ module DiscourseAi
end
truncable_models = [
+ Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384),
Models::Discourse.new("bart-large-cnn-samsum", max_tokens: 1024),
Models::Discourse.new("flan-t5-base-samsum", max_tokens: 512),
]
diff --git a/lib/modules/summarization/models/anthropic.rb b/lib/modules/summarization/models/anthropic.rb
index 151545a3..2ce5ff61 100644
--- a/lib/modules/summarization/models/anthropic.rb
+++ b/lib/modules/summarization/models/anthropic.rb
@@ -19,109 +19,6 @@ module DiscourseAi
setting: "ai_anthropic_api_key",
)
end
-
- def concatenate_summaries(summaries, &on_partial_blk)
- instructions = <<~TEXT
- Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative.
- Include only the summary inside tags.
- TEXT
-
- instructions += summaries.reduce("") { |m, s| m += "#{s}\n" }
- instructions += "Assistant:\n"
-
- completion(instructions, &on_partial_blk)
- end
-
- def summarize_with_truncation(contents, opts, &on_partial_blk)
- instructions = build_base_prompt(opts)
-
- text_to_summarize = contents.map { |c| format_content_item(c) }.join
- truncated_content = tokenizer.truncate(text_to_summarize, available_tokens)
-
- instructions += "#{truncated_content}\nAssistant:\n"
-
- completion(instructions, &on_partial_blk)
- end
-
- def summarize_single(chunk_text, opts, &on_partial_blk)
- summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk)
- end
-
- private
-
- def summarize_chunk(chunk_text, opts, &on_partial_blk)
- completion(
- build_base_prompt(opts) + "#{chunk_text}\nAssistant:\n",
- &on_partial_blk
- )
- end
-
- def build_base_prompt(opts)
- initial_instruction =
- if opts[:single_chunk]
- "Summarize the following forum discussion inside the given tag, creating a cohesive narrative."
- else
- "Summarize the following forum discussion inside the given tag."
- end
-
- base_prompt = <<~TEXT
- Human: #{initial_instruction}
- Try to keep the summary in the same language as the forum discussion.
- Format the response, including links, using markdown.
- TEXT
-
- base_prompt += <<~TEXT if opts[:resource_path]
- Try generating links as well the format is #{opts[:resource_path]}/POST_ID
- For example, a link to the 3rd post in the topic would be [post 3](#{opts[:resource_path]}/3)
- TEXT
-
- base_prompt += "Wrap the whole the summary inside tags.\n"
-
- base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[
- :content_title
- ]
-
- base_prompt += "Don't use more than 400 words.\n" unless opts[:single_chunk]
-
- base_prompt
- end
-
- def completion(prompt, &on_partial_blk)
- # We need to discard any text that might come before the tag.
- # Instructing the model to reply only with the summary seems impossible.
- pre_tag_partial = +""
-
- if on_partial_blk
- on_partial_read =
- Proc.new do |partial|
- if pre_tag_partial.include?("")
- on_partial_blk.call(partial[:completion])
- else
- pre_tag_partial << partial[:completion]
- end
- end
-
- response =
- ::DiscourseAi::Inference::AnthropicCompletions.perform!(
- prompt,
- model,
- &on_partial_read
- )
- else
- response =
- ::DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, model).dig(
- :completion,
- )
- end
-
- Nokogiri::HTML5.fragment(response).at("ai")&.text.presence || response
- end
-
- def tokenizer
- DiscourseAi::Tokenizer::AnthropicTokenizer
- end
-
- attr_reader :max_tokens
end
end
end
diff --git a/lib/modules/summarization/models/base.rb b/lib/modules/summarization/models/base.rb
index 1ce220fb..00e4b84f 100644
--- a/lib/modules/summarization/models/base.rb
+++ b/lib/modules/summarization/models/base.rb
@@ -21,29 +21,6 @@ module DiscourseAi
raise NotImplemented
end
- def summarize_in_chunks(chunks, opts)
- chunks.map do |chunk|
- chunk[:summary] = summarize_chunk(chunk[:summary], opts)
- chunk
- end
- end
-
- def concatenate_summaries(_summaries)
- raise NotImplemented
- end
-
- def summarize_with_truncation(_contents, _opts)
- raise NotImplemented
- end
-
- def summarize_single(chunk_text, opts)
- raise NotImplemented
- end
-
- def format_content_item(item)
- "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
- end
-
def available_tokens
max_tokens - reserved_tokens
end
@@ -57,16 +34,6 @@ module DiscourseAi
# ~500 words
700
end
-
- def summarize_chunk(_chunk_text, _opts)
- raise NotImplemented
- end
-
- def tokenizer
- raise NotImplemented
- end
-
- delegate :can_expand_tokens?, to: :tokenizer
end
end
end
diff --git a/lib/modules/summarization/models/discourse.rb b/lib/modules/summarization/models/discourse.rb
index 79669aa6..c37c6cb9 100644
--- a/lib/modules/summarization/models/discourse.rb
+++ b/lib/modules/summarization/models/discourse.rb
@@ -22,44 +22,11 @@ module DiscourseAi
)
end
- def concatenate_summaries(summaries)
- completion(summaries.join("\n"))
- end
-
- def summarize_with_truncation(contents, opts)
- text_to_summarize = contents.map { |c| format_content_item(c) }.join
- truncated_content =
- ::DiscourseAi::Tokenizer::BertTokenizer.truncate(text_to_summarize, available_tokens)
-
- completion(truncated_content)
- end
-
- def summarize_single(chunk_text, _opts)
- completion(chunk_text)
- end
-
private
- def summarize_chunk(chunk_text, _opts)
- completion(chunk_text)
- end
-
def reserved_tokens
0
end
-
- def completion(prompt)
- ::DiscourseAi::Inference::DiscourseClassifier.perform!(
- "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
- model,
- prompt,
- SiteSetting.ai_summarization_discourse_service_api_key,
- ).dig(:summary_text)
- end
-
- def tokenizer
- DiscourseAi::Tokenizer::BertTokenizer
- end
end
end
end
diff --git a/lib/modules/summarization/models/llama2.rb b/lib/modules/summarization/models/llama2.rb
index 1dc96088..4942ae5c 100644
--- a/lib/modules/summarization/models/llama2.rb
+++ b/lib/modules/summarization/models/llama2.rb
@@ -19,104 +19,6 @@ module DiscourseAi
setting: "ai_hugging_face_api_url",
)
end
-
- def concatenate_summaries(summaries, &on_partial_blk)
- prompt = <<~TEXT
- [INST] <>
- You are a helpful bot
- <>
-
- Concatenate these disjoint summaries, creating a cohesive narrative:
- #{summaries.join("\n")} [/INST]
- TEXT
-
- completion(prompt, &on_partial_blk)
- end
-
- def summarize_with_truncation(contents, opts, &on_partial_blk)
- text_to_summarize = contents.map { |c| format_content_item(c) }.join
- truncated_content = tokenizer.truncate(text_to_summarize, available_tokens)
-
- prompt = <<~TEXT
- [INST] <>
- #{build_base_prompt(opts)}
- <>
-
- Summarize the following in up to 400 words:
- #{truncated_content} [/INST]
- Here is a summary of the above topic:
- TEXT
-
- completion(prompt, &on_partial_blk)
- end
-
- def summarize_single(chunk_text, opts, &on_partial_blk)
- summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk)
- end
-
- private
-
- def summarize_chunk(chunk_text, opts, &on_partial_blk)
- summary_instruction =
- if opts[:single_chunk]
- "Summarize the following forum discussion, creating a cohesive narrative:"
- else
- "Summarize the following in up to 400 words:"
- end
-
- prompt = <<~TEXT
- [INST] <>
- #{build_base_prompt(opts)}
- <>
-
- #{summary_instruction}
- #{chunk_text} [/INST]
- Here is a summary of the above topic:
- TEXT
-
- completion(prompt, &on_partial_blk)
- end
-
- def build_base_prompt(opts)
- base_prompt = <<~TEXT
- You are a summarization bot.
- You effectively summarise any text and reply ONLY with ONLY the summarized text.
- You condense it into a shorter version.
- You understand and generate Discourse forum Markdown.
- TEXT
-
- if opts[:resource_path]
- base_prompt +=
- "Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)\n"
- end
-
- base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[
- :content_title
- ]
-
- base_prompt
- end
-
- def completion(prompt, &on_partial_blk)
- if on_partial_blk
- on_partial_read =
- Proc.new { |partial| on_partial_blk.call(partial.dig(:token, :text).to_s) }
-
- ::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(
- prompt,
- model,
- &on_partial_read
- )
- else
- ::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(prompt, model).dig(
- :generated_text,
- )
- end
- end
-
- def tokenizer
- DiscourseAi::Tokenizer::Llama2Tokenizer
- end
end
end
end
diff --git a/lib/modules/summarization/models/llama2_fine_tuned_orca_style.rb b/lib/modules/summarization/models/llama2_fine_tuned_orca_style.rb
index acc682f6..81ff6bda 100644
--- a/lib/modules/summarization/models/llama2_fine_tuned_orca_style.rb
+++ b/lib/modules/summarization/models/llama2_fine_tuned_orca_style.rb
@@ -7,65 +7,6 @@ module DiscourseAi
def display_name
"Llama2FineTunedOrcaStyle's #{SiteSetting.ai_hugging_face_model_display_name.presence || model}"
end
-
- def concatenate_summaries(summaries, &on_partial_blk)
- prompt = <<~TEXT
- ### System:
- You are a helpful bot
-
- ### User:
- Concatenate these disjoint summaries, creating a cohesive narrative:
- #{summaries.join("\n")}
-
- ### Assistant:
- TEXT
-
- completion(prompt, &on_partial_blk)
- end
-
- def summarize_with_truncation(contents, opts, &on_partial_blk)
- text_to_summarize = contents.map { |c| format_content_item(c) }.join
- truncated_content = tokenizer.truncate(text_to_summarize, available_tokens)
-
- prompt = <<~TEXT
- ### System:
- #{build_base_prompt(opts)}
-
- ### User:
- Summarize the following in up to 400 words:
- #{truncated_content}
-
- ### Assistant:
- Here is a summary of the above topic:
- TEXT
-
- completion(prompt, &on_partial_blk)
- end
-
- private
-
- def summarize_chunk(chunk_text, opts, &on_partial_blk)
- summary_instruction =
- if opts[:single_chunk]
- "Summarize the following forum discussion, creating a cohesive narrative:"
- else
- "Summarize the following in up to 400 words:"
- end
-
- prompt = <<~TEXT
- ### System:
- #{build_base_prompt(opts)}
-
- ### User:
- #{summary_instruction}
- #{chunk_text}
-
- ### Assistant:
- Here is a summary of the above topic:
- TEXT
-
- completion(prompt, &on_partial_blk)
- end
end
end
end
diff --git a/lib/modules/summarization/models/open_ai.rb b/lib/modules/summarization/models/open_ai.rb
index 79bd63dd..121d71f5 100644
--- a/lib/modules/summarization/models/open_ai.rb
+++ b/lib/modules/summarization/models/open_ai.rb
@@ -19,100 +19,6 @@ module DiscourseAi
setting: "ai_openai_api_key",
)
end
-
- def concatenate_summaries(summaries, &on_partial_blk)
- messages = [
- { role: "system", content: "You are a helpful bot" },
- {
- role: "user",
- content:
- "Concatenate these disjoint summaries, creating a cohesive narrative. Keep the summary in the same language used in the text below.\n#{summaries.join("\n")}",
- },
- ]
-
- completion(messages, &on_partial_blk)
- end
-
- def summarize_with_truncation(contents, opts, &on_partial_blk)
- messages = [{ role: "system", content: build_base_prompt(opts) }]
-
- text_to_summarize = contents.map { |c| format_content_item(c) }.join
- truncated_content = tokenizer.truncate(text_to_summarize, available_tokens)
-
- messages << {
- role: "user",
- content:
- "Summarize the following in 400 words. Keep the summary in the same language used in the text below.\n#{truncated_content}",
- }
-
- completion(messages, &on_partial_blk)
- end
-
- def summarize_single(chunk_text, opts, &on_partial_blk)
- summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk)
- end
-
- private
-
- def summarize_chunk(chunk_text, opts, &on_partial_blk)
- summary_instruction =
- if opts[:single_chunk]
- "Summarize the following forum discussion, creating a cohesive narrative. Keep the summary in the same language used in the text below."
- else
- "Summarize the following in 400 words. Keep the summary in the same language used in the text below."
- end
-
- completion(
- [
- { role: "system", content: build_base_prompt(opts) },
- { role: "user", content: "#{summary_instruction}\n#{chunk_text}" },
- ],
- &on_partial_blk
- )
- end
-
- def build_base_prompt(opts)
- base_prompt = <<~TEXT
- You are a summarization bot.
- You effectively summarise any text and reply ONLY with ONLY the summarized text.
- You condense it into a shorter version.
- You understand and generate Discourse forum Markdown.
- You format the response, including links, using markdown.
- TEXT
-
- if opts[:resource_path]
- base_prompt +=
- "Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)\n"
- end
-
- base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[
- :content_title
- ]
-
- base_prompt
- end
-
- def completion(prompt, &on_partial_blk)
- if on_partial_blk
- on_partial_read =
- Proc.new do |partial|
- on_partial_blk.call(partial.dig(:choices, 0, :delta, :content).to_s)
- end
-
- ::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model, &on_partial_read)
- else
- ::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model).dig(
- :choices,
- 0,
- :message,
- :content,
- )
- end
- end
-
- def tokenizer
- DiscourseAi::Tokenizer::OpenAiTokenizer
- end
end
end
end
diff --git a/lib/modules/summarization/strategies/fold_content.rb b/lib/modules/summarization/strategies/fold_content.rb
index a8d5bd11..5c646ef3 100644
--- a/lib/modules/summarization/strategies/fold_content.rb
+++ b/lib/modules/summarization/strategies/fold_content.rb
@@ -16,22 +16,29 @@ module DiscourseAi
:model,
to: :completion_model
- def summarize(content, _user, &on_partial_blk)
+ def summarize(content, user, &on_partial_blk)
opts = content.except(:contents)
- chunks = split_into_chunks(content[:contents])
+ llm = DiscourseAi::Completions::LLM.proxy(completion_model.model)
+
+ chunks = split_into_chunks(llm.tokenizer, content[:contents])
if chunks.length == 1
{
- summary:
- completion_model.summarize_single(chunks.first[:summary], opts, &on_partial_blk),
+ summary: summarize_single(llm, chunks.first[:summary], user, opts, &on_partial_blk),
chunks: [],
}
else
- summaries = completion_model.summarize_in_chunks(chunks, opts)
+ summaries = summarize_in_chunks(llm, chunks, user, opts)
{
- summary: completion_model.concatenate_summaries(summaries, &on_partial_blk),
+ summary:
+ concatenate_summaries(
+ llm,
+ summaries.map { |s| s[:summary] },
+ user,
+ &on_partial_blk
+ ),
chunks: summaries,
}
end
@@ -39,14 +46,18 @@ module DiscourseAi
private
- def split_into_chunks(contents)
+ def format_content_item(item)
+ "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
+ end
+
+ def split_into_chunks(tokenizer, contents)
section = { ids: [], summary: "" }
chunks =
contents.reduce([]) do |sections, item|
- new_content = completion_model.format_content_item(item)
+ new_content = format_content_item(item)
- if completion_model.can_expand_tokens?(
+ if tokenizer.can_expand_tokens?(
section[:summary],
new_content,
completion_model.available_tokens,
@@ -65,6 +76,71 @@ module DiscourseAi
chunks
end
+
+ def summarize_single(llm, text, user, opts, &on_partial_blk)
+ prompt = summarization_prompt(text, opts)
+
+ llm.completion!(prompt, user, &on_partial_blk)
+ end
+
+ def summarize_in_chunks(llm, chunks, user, opts)
+ chunks.map do |chunk|
+ prompt = summarization_prompt(chunk[:summary], opts)
+ prompt[:post_insts] = "Don't use more than 400 words for the summary."
+
+ chunk[:summary] = llm.completion!(prompt, user)
+ chunk
+ end
+ end
+
+ def concatenate_summaries(llm, summaries, user, &on_partial_blk)
+ prompt = summarization_prompt(summaries.join("\n"), {})
+ prompt[:insts] = <<~TEXT
+ You are a bot that can concatenate disjoint summaries, creating a cohesive narrative.
+ Keep the resulting summary in the same language used in the text below.
+ TEXT
+
+ llm.completion!(prompt, user, &on_partial_blk)
+ end
+
+ def summarization_prompt(input, opts)
+ insts = <<~TEXT
+ You are a summarization bot that effectively summarize any text, creating a cohesive narrative.
+ Your replies contain ONLY a summarized version of the text I provided and you, using the same language.
+ You understand and generate Discourse forum Markdown.
+ You format the response, including links, using Markdown.
+ TEXT
+
+ insts += <<~TEXT if opts[:resource_path]
+ Each message is formatted as ") "
+ Append to #{opts[:resource_path]} when linking posts.
+ TEXT
+
+ insts += "The discussion title is: #{opts[:content_title]}.\n" if opts[:content_title]
+
+ prompt = { insts: insts, input: <<~TEXT }
+ Here is the text, inside XML tags:
+
+
+ #{input}
+
+ TEXT
+
+ if opts[:resource_path]
+ prompt[:examples] = [
+ [
+ "(1 user1 said: I love Mondays 2) user2 said: I hate Mondays",
+ "Two users are sharing their feelings toward Mondays. [user1](#{opts[:resource_path]}/1) hates them, while [user2](#{opts[:resource_path]}/2) loves them.",
+ ],
+ [
+ "3) usuario1: Amo los lunes 6) usuario2: Odio los lunes",
+ "Dos usuarios charlan sobre los lunes. [usuario1](#{opts[:resource_path]}/3) dice que los ama, mientras que [usuario2](#{opts[:resource_path]}/2) los odia.",
+ ],
+ ]
+ end
+
+ prompt
+ end
end
end
end
diff --git a/lib/modules/summarization/strategies/truncate_content.rb b/lib/modules/summarization/strategies/truncate_content.rb
index c26f2e8a..6e4c368a 100644
--- a/lib/modules/summarization/strategies/truncate_content.rb
+++ b/lib/modules/summarization/strategies/truncate_content.rb
@@ -25,6 +25,32 @@ module DiscourseAi
chunks: [],
}
end
+
+ private
+
+ def format_content_item(item)
+ "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
+ end
+
+ def summarize_with_truncation(contents, opts)
+ text_to_summarize = contents.map { |c| format_content_item(c) }.join
+ truncated_content =
+ ::DiscourseAi::Tokenizer::BertTokenizer.truncate(
+ text_to_summarize,
+ completion_model.available_tokens,
+ )
+
+ completion(truncated_content)
+ end
+
+ def completion(prompt)
+ ::DiscourseAi::Inference::DiscourseClassifier.perform!(
+ "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
+ completion_model.model,
+ prompt,
+ SiteSetting.ai_summarization_discourse_service_api_key,
+ ).dig(:summary_text)
+ end
end
end
end
diff --git a/plugin.rb b/plugin.rb
index 0eb0d8ee..a4022c62 100644
--- a/plugin.rb
+++ b/plugin.rb
@@ -52,6 +52,8 @@ after_initialize do
require_relative "lib/shared/database/connection"
+ require_relative "lib/completions/entry_point"
+
require_relative "lib/modules/nsfw/entry_point"
require_relative "lib/modules/toxicity/entry_point"
require_relative "lib/modules/sentiment/entry_point"
@@ -64,6 +66,7 @@ after_initialize do
add_admin_route "discourse_ai.title", "discourse-ai"
[
+ DiscourseAi::Completions::EntryPoint.new,
DiscourseAi::Embeddings::EntryPoint.new,
DiscourseAi::NSFW::EntryPoint.new,
DiscourseAi::Toxicity::EntryPoint.new,
diff --git a/spec/lib/completions/dialects/chat_gpt_spec.rb b/spec/lib/completions/dialects/chat_gpt_spec.rb
new file mode 100644
index 00000000..599f1113
--- /dev/null
+++ b/spec/lib/completions/dialects/chat_gpt_spec.rb
@@ -0,0 +1,63 @@
+# frozen_string_literal: true
+
+RSpec.describe DiscourseAi::Completions::Dialects::ChatGPT do
+ subject(:dialect) { described_class.new }
+
+ let(:prompt) do
+ {
+ insts: <<~TEXT,
+ I want you to act as a title generator for written pieces. I will provide you with a text,
+ and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
+ and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
+ TEXT
+ input: <<~TEXT,
+ Here is the text, inside XML tags:
+
+ To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
+ discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
+ defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
+
+ Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
+ a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
+ slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
+ dies so that a scene may be repeated.
+
+ TEXT
+ post_insts:
+ "Please put the translation between tags and separate each title with a comma.",
+ }
+ end
+
+ describe "#translate" do
+ it "translates a prompt written in our generic format to the ChatGPT format" do
+ open_ai_version = [
+ { role: "system", content: [prompt[:insts], prompt[:post_insts]].join("\n") },
+ { role: "user", content: prompt[:input] },
+ ]
+
+ translated = dialect.translate(prompt)
+
+ expect(translated).to contain_exactly(*open_ai_version)
+ end
+
+ it "include examples in the ChatGPT version" do
+ prompt[:examples] = [
+ [
+ "In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.",
+ "The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.",
+ ],
+ ]
+
+ open_ai_version = [
+ { role: "system", content: [prompt[:insts], prompt[:post_insts]].join("\n") },
+ { role: "user", content: prompt[:examples][0][0] },
+ { role: "assistant", content: prompt[:examples][0][1] },
+ { role: "user", content: prompt[:input] },
+ ]
+
+ translated = dialect.translate(prompt)
+
+ expect(translated).to contain_exactly(*open_ai_version)
+ end
+ end
+end
diff --git a/spec/lib/completions/dialects/claude_spec.rb b/spec/lib/completions/dialects/claude_spec.rb
new file mode 100644
index 00000000..d26dd570
--- /dev/null
+++ b/spec/lib/completions/dialects/claude_spec.rb
@@ -0,0 +1,68 @@
+# frozen_string_literal: true
+
+RSpec.describe DiscourseAi::Completions::Dialects::Claude do
+ subject(:dialect) { described_class.new }
+
+ let(:prompt) do
+ {
+ insts: <<~TEXT,
+ I want you to act as a title generator for written pieces. I will provide you with a text,
+ and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
+ and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
+ TEXT
+ input: <<~TEXT,
+ Here is the text, inside XML tags:
+
+ To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
+ discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
+ defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
+
+ Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
+ a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
+ slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
+ dies so that a scene may be repeated.
+
+ TEXT
+ post_insts:
+ "Please put the translation between tags and separate each title with a comma.",
+ }
+ end
+
+ describe "#translate" do
+ it "translates a prompt written in our generic format to Claude's format" do
+ anthropic_version = <<~TEXT
+ Human: #{prompt[:insts]}
+ #{prompt[:input]}
+ #{prompt[:post_insts]}
+ Assistant:
+ TEXT
+
+ translated = dialect.translate(prompt)
+
+ expect(translated).to eq(anthropic_version)
+ end
+
+ it "knows how to translate examples to Claude's format" do
+ prompt[:examples] = [
+ [
+ "In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.",
+ "The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.",
+ ],
+ ]
+ anthropic_version = <<~TEXT
+ Human: #{prompt[:insts]}
+
+ H: #{prompt[:examples][0][0]}
+ A: #{prompt[:examples][0][1]}
+
+ #{prompt[:input]}
+ #{prompt[:post_insts]}
+ Assistant:
+ TEXT
+
+ translated = dialect.translate(prompt)
+
+ expect(translated).to eq(anthropic_version)
+ end
+ end
+end
diff --git a/spec/lib/completions/dialects/llama2_classic_spec.rb b/spec/lib/completions/dialects/llama2_classic_spec.rb
new file mode 100644
index 00000000..2b1d93a2
--- /dev/null
+++ b/spec/lib/completions/dialects/llama2_classic_spec.rb
@@ -0,0 +1,63 @@
+# frozen_string_literal: true
+
+RSpec.describe DiscourseAi::Completions::Dialects::Llama2Classic do
+ subject(:dialect) { described_class.new }
+
+ let(:prompt) do
+ {
+ insts: <<~TEXT,
+ I want you to act as a title generator for written pieces. I will provide you with a text,
+ and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
+ and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
+ TEXT
+ input: <<~TEXT,
+ Here is the text, inside XML tags:
+
+ To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
+ discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
+ defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
+
+ Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
+ a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
+ slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
+ dies so that a scene may be repeated.
+
+ TEXT
+ post_insts:
+ "Please put the translation between tags and separate each title with a comma.",
+ }
+ end
+
+ describe "#translate" do
+ it "translates a prompt written in our generic format to the Llama2 format" do
+ llama2_classic_version = <<~TEXT
+ [INST]<>#{[prompt[:insts], prompt[:post_insts]].join("\n")}<>[/INST]
+ [INST]#{prompt[:input]}[/INST]
+ TEXT
+
+ translated = dialect.translate(prompt)
+
+ expect(translated).to eq(llama2_classic_version)
+ end
+
+ it "includes examples in the translation" do
+ prompt[:examples] = [
+ [
+ "In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.",
+ "The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.",
+ ],
+ ]
+
+ llama2_classic_version = <<~TEXT
+ [INST]<>#{[prompt[:insts], prompt[:post_insts]].join("\n")}<>[/INST]
+ [INST]#{prompt[:examples][0][0]}[/INST]
+ #{prompt[:examples][0][1]}
+ [INST]#{prompt[:input]}[/INST]
+ TEXT
+
+ translated = dialect.translate(prompt)
+
+ expect(translated).to eq(llama2_classic_version)
+ end
+ end
+end
diff --git a/spec/lib/completions/dialects/orca_style_spec.rb b/spec/lib/completions/dialects/orca_style_spec.rb
new file mode 100644
index 00000000..411a84a8
--- /dev/null
+++ b/spec/lib/completions/dialects/orca_style_spec.rb
@@ -0,0 +1,71 @@
+# frozen_string_literal: true
+
+RSpec.describe DiscourseAi::Completions::Dialects::OrcaStyle do
+ subject(:dialect) { described_class.new }
+
+ describe "#translate" do
+ let(:prompt) do
+ {
+ insts: <<~TEXT,
+ I want you to act as a title generator for written pieces. I will provide you with a text,
+ and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
+ and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
+ TEXT
+ input: <<~TEXT,
+ Here is the text, inside XML tags:
+
+ To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
+ discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
+ defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
+
+ Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
+ a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
+ slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
+ dies so that a scene may be repeated.
+
+ TEXT
+ post_insts:
+ "Please put the translation between tags and separate each title with a comma.",
+ }
+ end
+
+ it "translates a prompt written in our generic format to the Open AI format" do
+ orca_style_version = <<~TEXT
+ ### System:
+ #{[prompt[:insts], prompt[:post_insts]].join("\n")}
+ ### User:
+ #{prompt[:input]}
+ ### Assistant:
+ TEXT
+
+ translated = dialect.translate(prompt)
+
+ expect(translated).to eq(orca_style_version)
+ end
+
+ it "include examples in the translated prompt" do
+ prompt[:examples] = [
+ [
+ "In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.",
+ "The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.",
+ ],
+ ]
+
+ orca_style_version = <<~TEXT
+ ### System:
+ #{[prompt[:insts], prompt[:post_insts]].join("\n")}
+ ### User:
+ #{prompt[:examples][0][0]}
+ ### Assistant:
+ #{prompt[:examples][0][1]}
+ ### User:
+ #{prompt[:input]}
+ ### Assistant:
+ TEXT
+
+ translated = dialect.translate(prompt)
+
+ expect(translated).to eq(orca_style_version)
+ end
+ end
+end
diff --git a/spec/lib/completions/endpoints/anthropic_spec.rb b/spec/lib/completions/endpoints/anthropic_spec.rb
new file mode 100644
index 00000000..d0309e2f
--- /dev/null
+++ b/spec/lib/completions/endpoints/anthropic_spec.rb
@@ -0,0 +1,64 @@
+# frozen_String_literal: true
+
+require_relative "endpoint_examples"
+
+RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
+ subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::AnthropicTokenizer) }
+
+ let(:model_name) { "claude-2" }
+ let(:prompt) { "Human: write 3 words\n\n" }
+
+ let(:request_body) { model.default_options.merge(prompt: prompt).to_json }
+ let(:stream_request_body) { model.default_options.merge(prompt: prompt, stream: true).to_json }
+
+ def response(content)
+ {
+ completion: content,
+ stop: "\n\nHuman:",
+ stop_reason: "stop_sequence",
+ truncated: false,
+ log_id: "12dcc7feafbee4a394e0de9dffde3ac5",
+ model: model_name,
+ exception: nil,
+ }
+ end
+
+ def stub_response(prompt, response_text)
+ WebMock
+ .stub_request(:post, "https://api.anthropic.com/v1/complete")
+ .with(body: model.default_options.merge(prompt: prompt).to_json)
+ .to_return(status: 200, body: JSON.dump(response(response_text)))
+ end
+
+ def stream_line(delta, finish_reason: nil)
+ +"data: " << {
+ completion: delta,
+ stop: finish_reason ? "\n\nHuman:" : nil,
+ stop_reason: finish_reason,
+ truncated: false,
+ log_id: "12b029451c6d18094d868bc04ce83f63",
+ model: "claude-2",
+ exception: nil,
+ }.to_json
+ end
+
+ def stub_streamed_response(prompt, deltas)
+ chunks =
+ deltas.each_with_index.map do |_, index|
+ if index == (deltas.length - 1)
+ stream_line(deltas[index], finish_reason: "stop_sequence")
+ else
+ stream_line(deltas[index])
+ end
+ end
+
+ chunks = chunks.join("\n\n")
+
+ WebMock
+ .stub_request(:post, "https://api.anthropic.com/v1/complete")
+ .with(body: model.default_options.merge(prompt: prompt, stream: true).to_json)
+ .to_return(status: 200, body: chunks)
+ end
+
+ it_behaves_like "an endpoint that can communicate with a completion service"
+end
diff --git a/spec/lib/completions/endpoints/aws_bedrock_spec.rb b/spec/lib/completions/endpoints/aws_bedrock_spec.rb
new file mode 100644
index 00000000..4b655a91
--- /dev/null
+++ b/spec/lib/completions/endpoints/aws_bedrock_spec.rb
@@ -0,0 +1,122 @@
+# frozen_string_literal: true
+
+require_relative "endpoint_examples"
+
+RSpec.describe DiscourseAi::Completions::Endpoints::AwsBedrock do
+ subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::AnthropicTokenizer) }
+
+ let(:model_name) { "claude-2" }
+ let(:prompt) { "Human: write 3 words\n\n" }
+
+ let(:request_body) { model.default_options.merge(prompt: prompt).to_json }
+ let(:stream_request_body) { model.default_options.merge(prompt: prompt).to_json }
+
+ before do
+ SiteSetting.ai_bedrock_access_key_id = "123456"
+ SiteSetting.ai_bedrock_secret_access_key = "asd-asd-asd"
+ SiteSetting.ai_bedrock_region = "us-east-1"
+ end
+
+ # Copied from https://github.com/bblimke/webmock/issues/629
+ # Workaround for stubbing a streamed response
+ before do
+ mocked_http =
+ Class.new(Net::HTTP) do
+ def request(*)
+ super do |response|
+ response.instance_eval do
+ def read_body(*, &block)
+ if block_given?
+ @body.each(&block)
+ else
+ super
+ end
+ end
+ end
+
+ yield response if block_given?
+
+ response
+ end
+ end
+ end
+
+ @original_net_http = Net.send(:remove_const, :HTTP)
+ Net.send(:const_set, :HTTP, mocked_http)
+ end
+
+ after do
+ Net.send(:remove_const, :HTTP)
+ Net.send(:const_set, :HTTP, @original_net_http)
+ end
+
+ def response(content)
+ {
+ completion: content,
+ stop: "\n\nHuman:",
+ stop_reason: "stop_sequence",
+ truncated: false,
+ log_id: "12dcc7feafbee4a394e0de9dffde3ac5",
+ model: model_name,
+ exception: nil,
+ }
+ end
+
+ def stub_response(prompt, response_text)
+ WebMock
+ .stub_request(
+ :post,
+ "https://bedrock-runtime.#{SiteSetting.ai_bedrock_region}.amazonaws.com/model/anthropic.#{model_name}/invoke",
+ )
+ .with(body: request_body)
+ .to_return(status: 200, body: JSON.dump(response(response_text)))
+ end
+
+ def stream_line(delta, finish_reason: nil)
+ encoder = Aws::EventStream::Encoder.new
+
+ message =
+ Aws::EventStream::Message.new(
+ payload:
+ StringIO.new(
+ {
+ bytes:
+ Base64.encode64(
+ {
+ completion: delta,
+ stop: finish_reason ? "\n\nHuman:" : nil,
+ stop_reason: finish_reason,
+ truncated: false,
+ log_id: "12b029451c6d18094d868bc04ce83f63",
+ model: "claude-2",
+ exception: nil,
+ }.to_json,
+ ),
+ }.to_json,
+ ),
+ )
+
+ encoder.encode(message)
+ end
+
+ def stub_streamed_response(prompt, deltas)
+ chunks =
+ deltas.each_with_index.map do |_, index|
+ if index == (deltas.length - 1)
+ stream_line(deltas[index], finish_reason: "stop_sequence")
+ else
+ stream_line(deltas[index])
+ end
+ end
+
+ WebMock
+ .stub_request(
+ :post,
+ "https://bedrock-runtime.#{SiteSetting.ai_bedrock_region}.amazonaws.com/model/anthropic.#{model_name}/invoke-with-response-stream",
+ )
+ .with(body: stream_request_body)
+ .to_return(status: 200, body: chunks)
+ end
+
+ it_behaves_like "an endpoint that can communicate with a completion service"
+end
diff --git a/spec/lib/completions/endpoints/endpoint_examples.rb b/spec/lib/completions/endpoints/endpoint_examples.rb
new file mode 100644
index 00000000..6ca86070
--- /dev/null
+++ b/spec/lib/completions/endpoints/endpoint_examples.rb
@@ -0,0 +1,71 @@
+# frozen_string_literal: true
+
+RSpec.shared_examples "an endpoint that can communicate with a completion service" do
+ describe "#perform_completion!" do
+ fab!(:user) { Fabricate(:user) }
+
+ let(:response_text) { "1. Serenity\\n2. Laughter\\n3. Adventure" }
+
+ context "when using regular mode" do
+ before { stub_response(prompt, response_text) }
+
+ it "can complete a trivial prompt" do
+ completion_response = model.perform_completion!(prompt, user)
+
+ expect(completion_response).to eq(response_text)
+ end
+
+ it "creates an audit log for the request" do
+ model.perform_completion!(prompt, user)
+
+ expect(AiApiAuditLog.count).to eq(1)
+ log = AiApiAuditLog.first
+
+ response_body = response(response_text).to_json
+
+ expect(log.provider_id).to eq(model.provider_id)
+ expect(log.user_id).to eq(user.id)
+ expect(log.raw_request_payload).to eq(request_body)
+ expect(log.raw_response_payload).to eq(response_body)
+ expect(log.request_tokens).to eq(model.prompt_size(prompt))
+ expect(log.response_tokens).to eq(model.tokenizer.size(response_text))
+ end
+ end
+
+ context "when using stream mode" do
+ let(:deltas) { ["Mount", "ain", " ", "Tree ", "Frog"] }
+
+ before { stub_streamed_response(prompt, deltas) }
+
+ it "can complete a trivial prompt" do
+ completion_response = +""
+
+ model.perform_completion!(prompt, user) do |partial, cancel|
+ completion_response << partial
+ cancel.call if completion_response.split(" ").length == 2
+ end
+
+ expect(completion_response).to eq(deltas[0...-1].join)
+ end
+
+ it "creates an audit log and updates is on each read." do
+ completion_response = +""
+
+ model.perform_completion!(prompt, user) do |partial, cancel|
+ completion_response << partial
+ cancel.call if completion_response.split(" ").length == 2
+ end
+
+ expect(AiApiAuditLog.count).to eq(1)
+ log = AiApiAuditLog.first
+
+ expect(log.provider_id).to eq(model.provider_id)
+ expect(log.user_id).to eq(user.id)
+ expect(log.raw_request_payload).to eq(stream_request_body)
+ expect(log.raw_response_payload).to be_present
+ expect(log.request_tokens).to eq(model.prompt_size(prompt))
+ expect(log.response_tokens).to eq(model.tokenizer.size(deltas[0...-1].join))
+ end
+ end
+ end
+end
diff --git a/spec/lib/completions/endpoints/hugging_face_spec.rb b/spec/lib/completions/endpoints/hugging_face_spec.rb
new file mode 100644
index 00000000..0acd480f
--- /dev/null
+++ b/spec/lib/completions/endpoints/hugging_face_spec.rb
@@ -0,0 +1,68 @@
+# frozen_string_literal: true
+
+require_relative "endpoint_examples"
+
+RSpec.describe DiscourseAi::Completions::Endpoints::Huggingface do
+ subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::Llama2Tokenizer) }
+
+ let(:model_name) { "Llama2-*-chat-hf" }
+ let(:prompt) { <<~TEXT }
+ [INST]<>You are a helpful bot.<>[/INST]
+ [INST]Write 3 words[/INST]
+ TEXT
+
+ let(:request_body) do
+ model
+ .default_options
+ .merge(inputs: prompt)
+ .tap { |payload| payload[:parameters][:max_new_tokens] = 2_000 - model.prompt_size(prompt) }
+ .to_json
+ end
+ let(:stream_request_body) { request_body }
+
+ before { SiteSetting.ai_hugging_face_api_url = "https://test.dev" }
+
+ def response(content)
+ { generated_text: content }
+ end
+
+ def stub_response(prompt, response_text)
+ WebMock
+ .stub_request(:post, "#{SiteSetting.ai_hugging_face_api_url}/generate")
+ .with(body: request_body)
+ .to_return(status: 200, body: JSON.dump(response(response_text)))
+ end
+
+ def stream_line(delta, finish_reason: nil)
+ +"data: " << {
+ token: {
+ id: 29_889,
+ text: delta,
+ logprob: -0.08319092,
+ special: !!finish_reason,
+ },
+ generated_text: finish_reason ? response_text : nil,
+ details: nil,
+ }.to_json
+ end
+
+ def stub_streamed_response(prompt, deltas)
+ chunks =
+ deltas.each_with_index.map do |_, index|
+ if index == (deltas.length - 1)
+ stream_line(deltas[index], finish_reason: true)
+ else
+ stream_line(deltas[index])
+ end
+ end
+
+ chunks = chunks.join("\n\n")
+
+ WebMock
+ .stub_request(:post, "#{SiteSetting.ai_hugging_face_api_url}/generate_stream")
+ .with(body: request_body)
+ .to_return(status: 200, body: chunks)
+ end
+
+ it_behaves_like "an endpoint that can communicate with a completion service"
+end
diff --git a/spec/lib/completions/endpoints/open_ai_spec.rb b/spec/lib/completions/endpoints/open_ai_spec.rb
new file mode 100644
index 00000000..63d33d78
--- /dev/null
+++ b/spec/lib/completions/endpoints/open_ai_spec.rb
@@ -0,0 +1,74 @@
+# frozen_string_literal: true
+
+require_relative "endpoint_examples"
+
+RSpec.describe DiscourseAi::Completions::Endpoints::OpenAI do
+ subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::OpenAiTokenizer) }
+
+ let(:model_name) { "gpt-3.5-turbo" }
+ let(:prompt) do
+ [
+ { role: "system", content: "You are a helpful bot." },
+ { role: "user", content: "Write 3 words" },
+ ]
+ end
+
+ let(:request_body) { model.default_options.merge(messages: prompt).to_json }
+ let(:stream_request_body) { model.default_options.merge(messages: prompt, stream: true).to_json }
+
+ def response(content)
+ {
+ id: "chatcmpl-6sZfAb30Rnv9Q7ufzFwvQsMpjZh8S",
+ object: "chat.completion",
+ created: 1_678_464_820,
+ model: "gpt-3.5-turbo-0301",
+ usage: {
+ prompt_tokens: 337,
+ completion_tokens: 162,
+ total_tokens: 499,
+ },
+ choices: [
+ { message: { role: "assistant", content: content }, finish_reason: "stop", index: 0 },
+ ],
+ }
+ end
+
+ def stub_response(prompt, response_text)
+ WebMock
+ .stub_request(:post, "https://api.openai.com/v1/chat/completions")
+ .with(body: { model: model_name, messages: prompt })
+ .to_return(status: 200, body: JSON.dump(response(response_text)))
+ end
+
+ def stream_line(delta, finish_reason: nil)
+ +"data: " << {
+ id: "chatcmpl-#{SecureRandom.hex}",
+ object: "chat.completion.chunk",
+ created: 1_681_283_881,
+ model: "gpt-3.5-turbo-0301",
+ choices: [{ delta: { content: delta } }],
+ finish_reason: finish_reason,
+ index: 0,
+ }.to_json
+ end
+
+ def stub_streamed_response(prompt, deltas)
+ chunks =
+ deltas.each_with_index.map do |_, index|
+ if index == (deltas.length - 1)
+ stream_line(deltas[index], finish_reason: "stop_sequence")
+ else
+ stream_line(deltas[index])
+ end
+ end
+
+ chunks = chunks.join("\n\n")
+
+ WebMock
+ .stub_request(:post, "https://api.openai.com/v1/chat/completions")
+ .with(body: model.default_options.merge(messages: prompt, stream: true).to_json)
+ .to_return(status: 200, body: chunks)
+ end
+
+ it_behaves_like "an endpoint that can communicate with a completion service"
+end
diff --git a/spec/lib/completions/llm_spec.rb b/spec/lib/completions/llm_spec.rb
new file mode 100644
index 00000000..9c7148c0
--- /dev/null
+++ b/spec/lib/completions/llm_spec.rb
@@ -0,0 +1,71 @@
+# frozen_string_literal: true
+
+RSpec.describe DiscourseAi::Completions::LLM do
+ subject(:llm) do
+ described_class.new(
+ DiscourseAi::Completions::Dialects::OrcaStyle.new,
+ canned_response,
+ "Upstage-Llama-2-*-instruct-v2",
+ )
+ end
+
+ fab!(:user) { Fabricate(:user) }
+
+ describe ".proxy" do
+ it "raises an exception when we can't proxy the model" do
+ fake_model = "unknown_v2"
+
+ expect { described_class.proxy(fake_model) }.to(
+ raise_error(DiscourseAi::Completions::LLM::UNKNOWN_MODEL),
+ )
+ end
+ end
+
+ describe "#completion!" do
+ let(:prompt) do
+ {
+ insts: <<~TEXT,
+ I want you to act as a title generator for written pieces. I will provide you with a text,
+ and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
+ and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
+ TEXT
+ input: <<~TEXT,
+ Here is the text, inside XML tags:
+
+ To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
+ discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
+ defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
+
+ TEXT
+ post_insts:
+ "Please put the translation between tags and separate each title with a comma.",
+ }
+ end
+
+ let(:canned_response) do
+ DiscourseAi::Completions::Endpoints::CannedResponse.new(
+ [
+ "The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.",
+ ],
+ )
+ end
+
+ context "when getting the full response" do
+ it "processes the prompt and return the response" do
+ llm_response = llm.completion!(prompt, user)
+
+ expect(llm_response).to eq(canned_response.responses[0])
+ end
+ end
+
+ context "when getting a streamed response" do
+ it "processes the prompt and call the given block with the partial response" do
+ llm_response = +""
+
+ llm.completion!(prompt, user) { |partial, cancel_fn| llm_response << partial }
+
+ expect(llm_response).to eq(canned_response.responses[0])
+ end
+ end
+ end
+end
diff --git a/spec/lib/modules/ai_bot/commands/search_command_spec.rb b/spec/lib/modules/ai_bot/commands/search_command_spec.rb
index 285b88ed..215fd3b2 100644
--- a/spec/lib/modules/ai_bot/commands/search_command_spec.rb
+++ b/spec/lib/modules/ai_bot/commands/search_command_spec.rb
@@ -66,7 +66,10 @@ RSpec.describe DiscourseAi::AiBot::Commands::SearchCommand do
.expects(:asymmetric_topics_similarity_search)
.returns([post1.topic_id])
- results = search.process(search_query: "hello world, sam", status: "public")
+ results =
+ DiscourseAi::Completions::LLM.with_prepared_responses(["#{query}"]) do
+ search.process(search_query: "hello world, sam", status: "public")
+ end
expect(results[:args]).to eq({ search_query: "hello world, sam", status: "public" })
expect(results[:rows].length).to eq(1)
diff --git a/spec/lib/modules/embeddings/semantic_search_spec.rb b/spec/lib/modules/embeddings/semantic_search_spec.rb
index b6bbad11..b406dbaa 100644
--- a/spec/lib/modules/embeddings/semantic_search_spec.rb
+++ b/spec/lib/modules/embeddings/semantic_search_spec.rb
@@ -13,15 +13,6 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
before do
SiteSetting.ai_embeddings_discourse_service_api_endpoint = "http://test.com"
- prompt = DiscourseAi::Embeddings::HydeGenerators::OpenAi.new.prompt(query)
- OpenAiCompletionsInferenceStubs.stub_response(
- prompt,
- hypothetical_post,
- req_opts: {
- max_tokens: 400,
- },
- )
-
hyde_embedding = [0.049382, 0.9999]
EmbeddingsGenerationStubs.discourse_service(
SiteSetting.ai_embeddings_model,
@@ -39,10 +30,16 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
.returns(candidate_ids)
end
+ def trigger_search(query)
+ DiscourseAi::Completions::LLM.with_prepared_responses(["#{hypothetical_post}"]) do
+ subject.search_for_topics(query)
+ end
+ end
+
it "returns the first post of a topic included in the asymmetric search results" do
stub_candidate_ids([post.topic_id])
- posts = subject.search_for_topics(query)
+ posts = trigger_search(query)
expect(posts).to contain_exactly(post)
end
@@ -53,7 +50,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
post.topic.update!(visible: false)
stub_candidate_ids([post.topic_id])
- posts = subject.search_for_topics(query)
+ posts = trigger_search(query)
expect(posts).to be_empty
end
@@ -64,7 +61,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
pm_post = Fabricate(:private_message_post)
stub_candidate_ids([pm_post.topic_id])
- posts = subject.search_for_topics(query)
+ posts = trigger_search(query)
expect(posts).to be_empty
end
@@ -75,7 +72,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
post.update!(post_type: Post.types[:whisper])
stub_candidate_ids([post.topic_id])
- posts = subject.search_for_topics(query)
+ posts = trigger_search(query)
expect(posts).to be_empty
end
@@ -87,7 +84,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
reply.topic.first_post.trash!
stub_candidate_ids([reply.topic_id])
- posts = subject.search_for_topics(query)
+ posts = trigger_search(query)
expect(posts).to be_empty
end
@@ -98,7 +95,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
post_2 = Fabricate(:post)
stub_candidate_ids([post.topic_id])
- posts = subject.search_for_topics(query)
+ posts = trigger_search(query)
expect(posts).not_to include(post_2)
end
@@ -114,7 +111,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
end
it "returns an empty list" do
- posts = subject.search_for_topics(query)
+ posts = trigger_search(query)
expect(posts).to be_empty
end
@@ -122,14 +119,17 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
it "returns the results if the user has access to the category" do
group.add(user)
- posts = subject.search_for_topics(query)
+ posts = trigger_search(query)
expect(posts).to contain_exactly(post)
end
context "while searching as anon" do
it "returns an empty list" do
- posts = described_class.new(Guardian.new(nil)).search_for_topics(query)
+ posts =
+ DiscourseAi::Completions::LLM.with_prepared_responses(
+ ["#{hypothetical_post}"],
+ ) { described_class.new(Guardian.new(nil)).search_for_topics(query) }
expect(posts).to be_empty
end
diff --git a/spec/lib/modules/summarization/models/anthropic_spec.rb b/spec/lib/modules/summarization/models/anthropic_spec.rb
deleted file mode 100644
index 263ab62f..00000000
--- a/spec/lib/modules/summarization/models/anthropic_spec.rb
+++ /dev/null
@@ -1,122 +0,0 @@
-# frozen_string_literal: true
-
-RSpec.describe DiscourseAi::Summarization::Models::Anthropic do
- subject(:model) { described_class.new(model_name, max_tokens: max_tokens) }
-
- let(:model_name) { "claude-2" }
- let(:max_tokens) { 720 }
-
- let(:content) do
- {
- resource_path: "/t/-/1",
- content_title: "This is a title",
- contents: [{ poster: "asd", id: 1, text: "This is a text" }],
- }
- end
-
- def as_chunk(item)
- { ids: [item[:id]], summary: "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }
- end
-
- def expected_messages(contents, opts)
- base_prompt = <<~TEXT
- Human: Summarize the following forum discussion inside the given tag.
- Try to keep the summary in the same language as the forum discussion.
- Format the response, including links, using markdown.
- Try generating links as well the format is #{opts[:resource_path]}/POST_ID
- For example, a link to the 3rd post in the topic would be [post 3](#{opts[:resource_path]}/3)
- Wrap the whole the summary inside tags.
- The discussion title is: #{opts[:content_title]}.
- Don't use more than 400 words.
- TEXT
-
- text =
- contents.reduce("") do |memo, item|
- memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
- end
-
- base_prompt += "#{text}\nAssistant:\n"
- end
-
- describe "#summarize_in_chunks" do
- context "when the content fits in a single chunk" do
- it "performs a request to summarize" do
- opts = content.except(:contents)
-
- AnthropicCompletionStubs.stub_response(
- expected_messages(content[:contents], opts),
- "This is summary 1",
- )
-
- chunks = content[:contents].map { |c| as_chunk(c) }
- summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
-
- expect(summarized_chunks).to contain_exactly("This is summary 1")
- end
- end
-
- context "when the content fits in multiple chunks" do
- it "performs a request for each one to summarize" do
- content[:contents] << {
- poster: "asd2",
- id: 2,
- text: "This is a different text to summarize",
- }
- opts = content.except(:contents)
-
- content[:contents].each_with_index do |item, idx|
- AnthropicCompletionStubs.stub_response(
- expected_messages([item], opts),
- "This is summary #{idx + 1}",
- )
- end
-
- chunks = content[:contents].map { |c| as_chunk(c) }
- summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
-
- expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2")
- end
- end
- end
-
- describe "#concatenate_summaries" do
- it "combines all the different summaries into a single one" do
- messages = <<~TEXT
- Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative.
- Include only the summary inside tags.
- summary 1
- summary 2
- Assistant:
- TEXT
-
- AnthropicCompletionStubs.stub_response(messages, "concatenated summary")
-
- expect(model.concatenate_summaries(["summary 1", "summary 2"])).to eq("concatenated summary")
- end
- end
-
- describe "#summarize_with_truncation" do
- let(:max_tokens) { 709 }
-
- it "truncates the context to meet the token limit" do
- opts = content.except(:contents)
-
- instructions = <<~TEXT
- Human: Summarize the following forum discussion inside the given tag.
- Try to keep the summary in the same language as the forum discussion.
- Format the response, including links, using markdown.
- Try generating links as well the format is #{opts[:resource_path]}/POST_ID
- For example, a link to the 3rd post in the topic would be [post 3](#{opts[:resource_path]}/3)
- Wrap the whole the summary inside tags.
- The discussion title is: #{opts[:content_title]}.
- Don't use more than 400 words.
- (1 asd said: This is a
- Assistant:
- TEXT
-
- AnthropicCompletionStubs.stub_response(instructions, "truncated summary")
-
- expect(model.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary")
- end
- end
-end
diff --git a/spec/lib/modules/summarization/models/discourse_spec.rb b/spec/lib/modules/summarization/models/discourse_spec.rb
deleted file mode 100644
index cd1c768a..00000000
--- a/spec/lib/modules/summarization/models/discourse_spec.rb
+++ /dev/null
@@ -1,95 +0,0 @@
-# frozen_string_literal: true
-
-RSpec.describe DiscourseAi::Summarization::Models::Discourse do
- subject(:model) { described_class.new(model_name, max_tokens: max_tokens) }
-
- let(:model_name) { "bart-large-cnn-samsum" }
- let(:max_tokens) { 20 }
-
- let(:content) do
- {
- resource_path: "/t/1/POST_NUMBER",
- content_title: "This is a title",
- contents: [{ poster: "asd", id: 1, text: "This is a text" }],
- }
- end
-
- before { SiteSetting.ai_summarization_discourse_service_api_endpoint = "https://test.com" }
-
- def stub_request(prompt, response)
- WebMock
- .stub_request(
- :post,
- "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
- )
- .with(body: JSON.dump(model: model_name, content: prompt))
- .to_return(status: 200, body: JSON.dump(summary_text: response))
- end
-
- def expected_messages(contents, opts)
- contents.reduce("") do |memo, item|
- memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
- end
- end
-
- def as_chunk(item)
- { ids: [item[:id]], summary: "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }
- end
-
- describe "#summarize_in_chunks" do
- context "when the content fits in a single chunk" do
- it "performs a request to summarize" do
- opts = content.except(:contents)
-
- stub_request(expected_messages(content[:contents], opts), "This is summary 1")
-
- chunks = content[:contents].map { |c| as_chunk(c) }
- summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
-
- expect(summarized_chunks).to contain_exactly("This is summary 1")
- end
- end
-
- context "when the content fits in multiple chunks" do
- it "performs a request for each one to summarize" do
- content[:contents] << {
- poster: "asd2",
- id: 2,
- text: "This is a different text to summarize",
- }
- opts = content.except(:contents)
-
- content[:contents].each_with_index do |item, idx|
- stub_request(expected_messages([item], opts), "This is summary #{idx + 1}")
- end
-
- chunks = content[:contents].map { |c| as_chunk(c) }
- summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
-
- expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2")
- end
- end
- end
-
- describe "#concatenate_summaries" do
- it "combines all the different summaries into a single one" do
- messages = ["summary 1", "summary 2"].join("\n")
-
- stub_request(messages, "concatenated summary")
-
- expect(model.concatenate_summaries(["summary 1", "summary 2"])).to eq("concatenated summary")
- end
- end
-
- describe "#summarize_with_truncation" do
- let(:max_tokens) { 9 }
-
- it "truncates the context to meet the token limit" do
- opts = content.except(:contents)
-
- stub_request("( 1 asd said : this is", "truncated summary")
-
- expect(model.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary")
- end
- end
-end
diff --git a/spec/lib/modules/summarization/models/open_ai_spec.rb b/spec/lib/modules/summarization/models/open_ai_spec.rb
deleted file mode 100644
index bf2773e7..00000000
--- a/spec/lib/modules/summarization/models/open_ai_spec.rb
+++ /dev/null
@@ -1,121 +0,0 @@
-# frozen_string_literal: true
-
-RSpec.describe DiscourseAi::Summarization::Models::OpenAi do
- subject(:model) { described_class.new(model_name, max_tokens: max_tokens) }
-
- let(:model_name) { "gpt-3.5-turbo" }
- let(:max_tokens) { 720 }
-
- let(:content) do
- {
- resource_path: "/t/1/POST_NUMBER",
- content_title: "This is a title",
- contents: [{ poster: "asd", id: 1, text: "This is a text" }],
- }
- end
-
- def as_chunk(item)
- { ids: [item[:id]], summary: "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }
- end
-
- def expected_messages(contents, opts)
- base_prompt = <<~TEXT
- You are a summarization bot.
- You effectively summarise any text and reply ONLY with ONLY the summarized text.
- You condense it into a shorter version.
- You understand and generate Discourse forum Markdown.
- You format the response, including links, using markdown.
- Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)
- The discussion title is: #{opts[:content_title]}.
- TEXT
-
- messages = [{ role: "system", content: base_prompt }]
-
- text =
- contents.reduce("") do |memo, item|
- memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
- end
-
- messages << {
- role: "user",
- content:
- "Summarize the following in 400 words. Keep the summary in the same language used in the text below.\n#{text}",
- }
- end
-
- describe "#summarize_in_chunks" do
- context "when the content fits in a single chunk" do
- it "performs a request to summarize" do
- opts = content.except(:contents)
-
- OpenAiCompletionsInferenceStubs.stub_response(
- expected_messages(content[:contents], opts),
- "This is summary 1",
- )
-
- chunks = content[:contents].map { |c| as_chunk(c) }
- summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
-
- expect(summarized_chunks).to contain_exactly("This is summary 1")
- end
- end
-
- context "when the content fits in multiple chunks" do
- it "performs a request for each one to summarize" do
- content[:contents] << {
- poster: "asd2",
- id: 2,
- text: "This is a different text to summarize",
- }
- opts = content.except(:contents)
-
- content[:contents].each_with_index do |item, idx|
- OpenAiCompletionsInferenceStubs.stub_response(
- expected_messages([item], opts),
- "This is summary #{idx + 1}",
- )
- end
-
- chunks = content[:contents].map { |c| as_chunk(c) }
- summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
-
- expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2")
- end
- end
- end
-
- describe "#concatenate_summaries" do
- it "combines all the different summaries into a single one" do
- messages = [
- { role: "system", content: "You are a helpful bot" },
- {
- role: "user",
- content:
- "Concatenate these disjoint summaries, creating a cohesive narrative. Keep the summary in the same language used in the text below.\nsummary 1\nsummary 2",
- },
- ]
-
- OpenAiCompletionsInferenceStubs.stub_response(messages, "concatenated summary")
-
- expect(model.concatenate_summaries(["summary 1", "summary 2"])).to eq("concatenated summary")
- end
- end
-
- describe "#summarize_with_truncation" do
- let(:max_tokens) { 709 }
-
- it "truncates the context to meet the token limit" do
- opts = content.except(:contents)
-
- truncated_version = expected_messages(content[:contents], opts)
-
- truncated_version.last[
- :content
- ] = "Summarize the following in 400 words. Keep the summary in the same language used in the text below.\n(1 asd said: This is a"
-
- OpenAiCompletionsInferenceStubs.stub_response(truncated_version, "truncated summary")
-
- expect(model.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary")
- end
- end
-end
diff --git a/spec/lib/modules/summarization/strategies/fold_content_spec.rb b/spec/lib/modules/summarization/strategies/fold_content_spec.rb
index abe664f3..dfe35528 100644
--- a/spec/lib/modules/summarization/strategies/fold_content_spec.rb
+++ b/spec/lib/modules/summarization/strategies/fold_content_spec.rb
@@ -1,28 +1,35 @@
# frozen_string_literal: true
-require_relative "../../../../support/summarization/dummy_completion_model"
-
RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
describe "#summarize" do
subject(:strategy) { described_class.new(model) }
let(:summarize_text) { "This is a text" }
- let(:model) { DummyCompletionModel.new(model_tokens) }
let(:model_tokens) do
# Make sure each content fits in a single chunk.
- DiscourseAi::Tokenizer::BertTokenizer.size("(1 asd said: This is a text ") + 3
+ # 700 is the number of tokens reserved for the prompt.
+ 700 + DiscourseAi::Tokenizer::OpenAiTokenizer.size("(1 asd said: This is a text ") + 3
end
- let(:user) { User.new }
+ let(:model) do
+ DiscourseAi::Summarization::Models::OpenAi.new("gpt-4", max_tokens: model_tokens)
+ end
let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } }
+ let(:single_summary) { "this is a single summary" }
+ let(:concatenated_summary) { "this is a concatenated summary" }
+
+ let(:user) { User.new }
+
context "when the content to summarize fits in a single call" do
it "does one call to summarize content" do
- result = strategy.summarize(content, user)
+ result =
+ DiscourseAi::Completions::LLM.with_prepared_responses([single_summary]) do |spy|
+ strategy.summarize(content, user).tap { expect(spy.completions).to eq(1) }
+ end
- expect(model.summarization_calls).to eq(1)
- expect(result[:summary]).to eq(DummyCompletionModel::SINGLE_SUMMARY)
+ expect(result[:summary]).to eq(single_summary)
end
end
@@ -30,10 +37,12 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
it "summarizes each chunk and then concatenates them" do
content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
- result = strategy.summarize(content, user)
+ result =
+ DiscourseAi::Completions::LLM.with_prepared_responses(
+ [single_summary, single_summary, concatenated_summary],
+ ) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(3) } }
- expect(model.summarization_calls).to eq(3)
- expect(result[:summary]).to eq(DummyCompletionModel::CONCATENATED_SUMMARIES)
+ expect(result[:summary]).to eq(concatenated_summary)
end
end
end
diff --git a/spec/lib/modules/summarization/strategies/truncate_content_spec.rb b/spec/lib/modules/summarization/strategies/truncate_content_spec.rb
deleted file mode 100644
index 1e7cc6ea..00000000
--- a/spec/lib/modules/summarization/strategies/truncate_content_spec.rb
+++ /dev/null
@@ -1,28 +0,0 @@
-# frozen_string_literal: true
-
-require_relative "../../../../support/summarization/dummy_completion_model"
-
-RSpec.describe DiscourseAi::Summarization::Strategies::TruncateContent do
- describe "#summarize" do
- subject(:strategy) { described_class.new(model) }
-
- let(:summarize_text) { "This is a text" }
- let(:model_tokens) { summarize_text.length }
- let(:model) { DummyCompletionModel.new(model_tokens) }
-
- let(:user) { User.new }
-
- let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } }
-
- context "when the content to summarize doesn't fit in a single call" do
- it "summarizes a truncated version" do
- content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
-
- result = strategy.summarize(content, user)
-
- expect(model.summarization_calls).to eq(1)
- expect(result[:summary]).to eq(DummyCompletionModel::SINGLE_SUMMARY)
- end
- end
- end
-end
diff --git a/spec/support/summarization/dummy_completion_model.rb b/spec/support/summarization/dummy_completion_model.rb
deleted file mode 100644
index a83a434a..00000000
--- a/spec/support/summarization/dummy_completion_model.rb
+++ /dev/null
@@ -1,46 +0,0 @@
-# frozen_string_literal: true
-
-class DummyCompletionModel
- SINGLE_SUMMARY = "this is a single summary"
- CONCATENATED_SUMMARIES = "this is a concatenated summary"
-
- def initialize(max_tokens)
- @summarization_calls = 0
- @available_tokens = max_tokens
- end
-
- attr_reader :max_length, :summarization_calls, :available_tokens
-
- delegate :can_expand_tokens?, to: :tokenizer
-
- def summarize_single(single_chunk, opts)
- @summarization_calls += 1
- SINGLE_SUMMARY
- end
-
- def summarize_in_chunks(chunks, opts)
- chunks.map do |chunk|
- chunk[:summary] = SINGLE_SUMMARY
- @summarization_calls += 1
- chunk
- end
- end
-
- def concatenate_summaries(summaries)
- @summarization_calls += 1
- CONCATENATED_SUMMARIES
- end
-
- def summarize_with_truncation(_contents, _opts)
- @summarization_calls += 1
- SINGLE_SUMMARY
- end
-
- def format_content_item(item)
- "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
- end
-
- def tokenizer
- DiscourseAi::Tokenizer::BertTokenizer
- end
-end