REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297)

* DEV: One LLM abstraction to rule them all

* REFACTOR: HyDE search uses new LLM abstraction

* REFACTOR: Summarization uses the LLM abstraction

* Updated documentation and made small fixes. Remove Bedrock claude-2 restriction
This commit is contained in:
Roman Rizzi 2023-11-23 12:58:54 -03:00 committed by GitHub
parent 53b7f031ba
commit 3064d4c288
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
47 changed files with 1679 additions and 1040 deletions

View File

@ -0,0 +1,35 @@
# frozen_string_literal: true
module DiscourseAi
module Completions
module Dialects
class ChatGPT
def self.can_translate?(model_name)
%w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k].include?(model_name)
end
def translate(generic_prompt)
open_ai_prompt = [
{
role: "system",
content: [generic_prompt[:insts], generic_prompt[:post_insts].to_s].join("\n"),
},
]
if generic_prompt[:examples]
generic_prompt[:examples].each do |example_pair|
open_ai_prompt << { role: "user", content: example_pair.first }
open_ai_prompt << { role: "assistant", content: example_pair.second }
end
end
open_ai_prompt << { role: "user", content: generic_prompt[:input] }
end
def tokenizer
DiscourseAi::Tokenizer::OpenAiTokenizer
end
end
end
end
end

View File

@ -0,0 +1,37 @@
# frozen_string_literal: true
module DiscourseAi
module Completions
module Dialects
class Claude
def self.can_translate?(model_name)
%w[claude-instant-1 claude-2].include?(model_name)
end
def translate(generic_prompt)
claude_prompt = +"Human: #{generic_prompt[:insts]}\n"
claude_prompt << build_examples(generic_prompt[:examples]) if generic_prompt[:examples]
claude_prompt << "#{generic_prompt[:input]}\n"
claude_prompt << "#{generic_prompt[:post_insts]}\n" if generic_prompt[:post_insts]
claude_prompt << "Assistant:\n"
end
def tokenizer
DiscourseAi::Tokenizer::AnthropicTokenizer
end
private
def build_examples(examples_arr)
examples_arr.reduce("") do |memo, example|
memo += "<example>\nH: #{example[0]}\nA: #{example[1]}\n</example>\n"
end
end
end
end
end
end

View File

@ -0,0 +1,31 @@
# frozen_string_literal: true
module DiscourseAi
module Completions
module Dialects
class Llama2Classic
def self.can_translate?(model_name)
"Llama2-*-chat-hf" == model_name
end
def translate(generic_prompt)
llama2_prompt =
+"[INST]<<SYS>>#{[generic_prompt[:insts], generic_prompt[:post_insts].to_s].join("\n")}<</SYS>>[/INST]\n"
if generic_prompt[:examples]
generic_prompt[:examples].each do |example_pair|
llama2_prompt << "[INST]#{example_pair.first}[/INST]\n"
llama2_prompt << "#{example_pair.second}\n"
end
end
llama2_prompt << "[INST]#{generic_prompt[:input]}[/INST]\n"
end
def tokenizer
DiscourseAi::Tokenizer::Llama2Tokenizer
end
end
end
end
end

View File

@ -0,0 +1,33 @@
# frozen_string_literal: true
module DiscourseAi
module Completions
module Dialects
class OrcaStyle
def self.can_translate?(model_name)
%w[StableBeluga2 Upstage-Llama-2-*-instruct-v2].include?(model_name)
end
def translate(generic_prompt)
orca_style_prompt =
+"### System:\n#{[generic_prompt[:insts], generic_prompt[:post_insts].to_s].join("\n")}\n"
if generic_prompt[:examples]
generic_prompt[:examples].each do |example_pair|
orca_style_prompt << "### User:\n#{example_pair.first}\n"
orca_style_prompt << "### Assistant:\n#{example_pair.second}\n"
end
end
orca_style_prompt << "### User:\n#{generic_prompt[:input]}\n"
orca_style_prompt << "### Assistant:\n"
end
def tokenizer
DiscourseAi::Tokenizer::Llama2Tokenizer
end
end
end
end
end

View File

@ -0,0 +1,52 @@
# frozen_string_literal: true
module DiscourseAi
module Completions
module Endpoints
class Anthropic < Base
def self.can_contact?(model_name)
%w[claude-instant-1 claude-2].include?(model_name)
end
def default_options
{ max_tokens_to_sample: 2000, model: model }
end
def provider_id
AiApiAuditLog::Provider::Anthropic
end
private
def model_uri
@uri ||= URI("https://api.anthropic.com/v1/complete")
end
def prepare_payload(prompt, model_params)
default_options
.merge(model_params)
.merge(prompt: prompt)
.tap { |payload| payload[:stream] = true if @streaming_mode }
end
def prepare_request(payload)
headers = {
"anthropic-version" => "2023-06-01",
"x-api-key" => SiteSetting.ai_anthropic_api_key,
"content-type" => "application/json",
}
Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload }
end
def extract_completion_from(response_raw)
JSON.parse(response_raw, symbolize_names: true)[:completion].to_s
end
def partials_from(decoded_chunk)
decoded_chunk.split("\n").map { |line| line.split("data: ", 2)[1] }.compact
end
end
end
end
end

View File

@ -0,0 +1,86 @@
# frozen_string_literal: true
module DiscourseAi
module Completions
module Endpoints
class AwsBedrock < Base
def self.can_contact?(model_name)
SiteSetting.ai_bedrock_access_key_id.present? &&
SiteSetting.ai_bedrock_secret_access_key.present? &&
SiteSetting.ai_bedrock_region.present?
end
def default_options
{ max_tokens_to_sample: 20_000 }
end
def provider_id
AiApiAuditLog::Provider::Anthropic
end
private
def model_uri
api_url =
"https://bedrock-runtime.#{SiteSetting.ai_bedrock_region}.amazonaws.com/model/anthropic.#{model}/invoke"
api_url = @streaming_mode ? (api_url + "-with-response-stream") : api_url
URI(api_url)
end
def prepare_payload(prompt, model_params)
default_options.merge(prompt: prompt).merge(model_params)
end
def prepare_request(payload)
headers = { "content-type" => "application/json", "Accept" => "*/*" }
signer =
Aws::Sigv4::Signer.new(
access_key_id: SiteSetting.ai_bedrock_access_key_id,
region: SiteSetting.ai_bedrock_region,
secret_access_key: SiteSetting.ai_bedrock_secret_access_key,
service: "bedrock",
)
Net::HTTP::Post
.new(model_uri, headers)
.tap do |r|
r.body = payload
signed_request =
signer.sign_request(req: r, http_method: r.method, url: model_uri, body: r.body)
r.initialize_http_header(headers.merge(signed_request.headers))
end
end
def decode(chunk)
Aws::EventStream::Decoder
.new
.decode_chunk(chunk)
.first
.payload
.string
.then { JSON.parse(_1) }
.dig("bytes")
.then { Base64.decode64(_1) }
rescue JSON::ParserError,
Aws::EventStream::Errors::MessageChecksumError,
Aws::EventStream::Errors::PreludeChecksumError => e
Rails.logger.error("#{self.class.name}: #{e.message}")
nil
end
def extract_completion_from(response_raw)
JSON.parse(response_raw, symbolize_names: true)[:completion].to_s
end
def partials_from(decoded_chunk)
[decoded_chunk]
end
end
end
end
end

View File

@ -0,0 +1,167 @@
# frozen_string_literal: true
module DiscourseAi
module Completions
module Endpoints
class Base
CompletionFailed = Class.new(StandardError)
TIMEOUT = 60
def self.endpoint_for(model_name)
# Order is important.
# Bedrock has priority over Anthropic if creadentials are present.
[
DiscourseAi::Completions::Endpoints::AwsBedrock,
DiscourseAi::Completions::Endpoints::Anthropic,
DiscourseAi::Completions::Endpoints::OpenAI,
DiscourseAi::Completions::Endpoints::Huggingface,
].detect(-> { raise DiscourseAi::Completions::LLM::UNKNOWN_MODEL }) do |ek|
ek.can_contact?(model_name)
end
end
def self.can_contact?(_model_name)
raise NotImplementedError
end
def initialize(model_name, tokenizer)
@model = model_name
@tokenizer = tokenizer
end
def perform_completion!(prompt, user, model_params = {})
@streaming_mode = block_given?
Net::HTTP.start(
model_uri.host,
model_uri.port,
use_ssl: true,
read_timeout: TIMEOUT,
open_timeout: TIMEOUT,
write_timeout: TIMEOUT,
) do |http|
response_data = +""
response_raw = +""
request_body = prepare_payload(prompt, model_params).to_json
request = prepare_request(request_body)
http.request(request) do |response|
if response.code.to_i != 200
Rails.logger.error(
"#{self.class.name}: status: #{response.code.to_i} - body: #{response.body}",
)
raise CompletionFailed
end
log =
AiApiAuditLog.new(
provider_id: provider_id,
user_id: user.id,
raw_request_payload: request_body,
request_tokens: prompt_size(prompt),
)
if !@streaming_mode
response_raw = response.read_body
response_data = extract_completion_from(response_raw)
return response_data
end
begin
cancelled = false
cancel = lambda { cancelled = true }
leftover = ""
response.read_body do |chunk|
if cancelled
http.finish
return
end
decoded_chunk = decode(chunk)
response_raw << decoded_chunk
partials_from(leftover + decoded_chunk).each do |raw_partial|
next if cancelled
next if raw_partial.blank?
begin
partial = extract_completion_from(raw_partial)
leftover = ""
response_data << partial
yield partial, cancel if partial
rescue JSON::ParserError
leftover = raw_partial
end
end
end
rescue IOError, StandardError
raise if !cancelled
end
return response_data
ensure
log.raw_response_payload = response_raw
log.response_tokens = tokenizer.size(response_data)
log.save!
if Rails.env.development? && log
puts "#{self.class.name}: request_tokens #{log.request_tokens} response_tokens #{log.response_tokens}"
end
end
end
end
def default_options
raise NotImplementedError
end
def provider_id
raise NotImplementedError
end
def prompt_size(prompt)
tokenizer.size(extract_prompt_for_tokenizer(prompt))
end
attr_reader :tokenizer
protected
attr_reader :model
def model_uri
raise NotImplementedError
end
def prepare_payload(_prompt, _model_params)
raise NotImplementedError
end
def prepare_request(_payload)
raise NotImplementedError
end
def extract_completion_from(_response_raw)
raise NotImplementedError
end
def decode(chunk)
chunk
end
def partials_from(_decoded_chunk)
raise NotImplementedError
end
def extract_prompt_for_tokenizer(prompt)
prompt
end
end
end
end
end

View File

@ -0,0 +1,47 @@
# frozen_string_literal: true
module DiscourseAi
module Completions
module Endpoints
class CannedResponse
CANNED_RESPONSE_ERROR = Class.new(StandardError)
def self.can_contact?(_)
Rails.env.test?
end
def initialize(responses)
@responses = responses
@completions = 0
end
attr_reader :responses, :completions
def perform_completion!(_prompt, _user, _model_params)
response = responses[completions]
if response.nil?
raise CANNED_RESPONSE_ERROR,
"The number of completions you requested exceed the number of canned responses"
end
@completions += 1
if block_given?
cancelled = false
cancel_fn = lambda { cancelled = true }
response.each_char do |char|
break if cancelled
yield(char, cancel_fn)
end
else
response
end
end
def tokenizer
DiscourseAi::Tokenizer::OpenAiTokenizer
end
end
end
end
end

View File

@ -0,0 +1,75 @@
# frozen_string_literal: true
module DiscourseAi
module Completions
module Endpoints
class Huggingface < Base
def self.can_contact?(model_name)
%w[StableBeluga2 Upstage-Llama-2-*-instruct-v2 Llama2-*-chat-hf].include?(model_name)
end
def default_options
{ parameters: { repetition_penalty: 1.1, temperature: 0.7 } }
end
def provider_id
AiApiAuditLog::Provider::HuggingFaceTextGeneration
end
private
def model_uri
URI(SiteSetting.ai_hugging_face_api_url).tap do |uri|
uri.path = @streaming_mode ? "/generate_stream" : "/generate"
end
end
def prepare_payload(prompt, model_params)
default_options
.merge(inputs: prompt)
.tap do |payload|
payload[:parameters].merge!(model_params)
token_limit = 2_000 || SiteSetting.ai_hugging_face_token_limit
payload[:parameters][:max_new_tokens] = token_limit - prompt_size(prompt)
end
end
def prepare_request(payload)
headers =
{ "Content-Type" => "application/json" }.tap do |h|
if SiteSetting.ai_hugging_face_api_key.present?
h["Authorization"] = "Bearer #{SiteSetting.ai_hugging_face_api_key}"
end
end
Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload }
end
def extract_completion_from(response_raw)
parsed = JSON.parse(response_raw, symbolize_names: true)
if @streaming_mode
# Last chunk contains full response, which we already yielded.
return if parsed.dig(:token, :special)
parsed.dig(:token, :text).to_s
else
parsed[:generated_text].to_s
end
end
def partials_from(decoded_chunk)
decoded_chunk
.split("\n")
.map do |line|
data = line.split("data: ", 2)[1]
data&.squish == "[DONE]" ? nil : data
end
.compact
end
end
end
end
end

View File

@ -0,0 +1,92 @@
# frozen_string_literal: true
module DiscourseAi
module Completions
module Endpoints
class OpenAI < Base
def self.can_contact?(model_name)
%w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k].include?(model_name)
end
def default_options
{ model: model }
end
def provider_id
AiApiAuditLog::Provider::OpenAI
end
private
def model_uri
url =
if model.include?("gpt-4")
if model.include?("32k")
SiteSetting.ai_openai_gpt4_32k_url
else
SiteSetting.ai_openai_gpt4_url
end
else
if model.include?("16k")
SiteSetting.ai_openai_gpt35_16k_url
else
SiteSetting.ai_openai_gpt35_url
end
end
URI(url)
end
def prepare_payload(prompt, model_params)
default_options
.merge(model_params)
.merge(messages: prompt)
.tap { |payload| payload[:stream] = true if @streaming_mode }
end
def prepare_request(payload)
headers =
{ "Content-Type" => "application/json" }.tap do |h|
if model_uri.host.include?("azure")
h["api-key"] = SiteSetting.ai_openai_api_key
else
h["Authorization"] = "Bearer #{SiteSetting.ai_openai_api_key}"
end
if SiteSetting.ai_openai_organization.present?
h["OpenAI-Organization"] = SiteSetting.ai_openai_organization
end
end
Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload }
end
def extract_completion_from(response_raw)
parsed = JSON.parse(response_raw, symbolize_names: true)
(
if @streaming_mode
parsed.dig(:choices, 0, :delta, :content)
else
parsed.dig(:choices, 0, :message, :content)
end
).to_s
end
def partials_from(decoded_chunk)
decoded_chunk
.split("\n")
.map do |line|
data = line.split("data: ", 2)[1]
data == "[DONE]" ? nil : data
end
.compact
end
def extract_prompt_for_tokenizer(prompt)
prompt.map { |message| message[:content] || message["content"] || "" }.join("\n")
end
end
end
end
end

View File

@ -0,0 +1,26 @@
# frozen_string_literal: true
module DiscourseAi
module Completions
class EntryPoint
def load_files
require_relative "dialects/chat_gpt"
require_relative "dialects/llama2_classic"
require_relative "dialects/orca_style"
require_relative "dialects/claude"
require_relative "endpoints/canned_response"
require_relative "endpoints/base"
require_relative "endpoints/anthropic"
require_relative "endpoints/aws_bedrock"
require_relative "endpoints/open_ai"
require_relative "endpoints/hugging_face"
require_relative "llm"
end
def inject_into(_)
end
end
end
end

81
lib/completions/llm.rb Normal file
View File

@ -0,0 +1,81 @@
# frozen_string_literal: true
# A facade that abstracts multiple LLMs behind a single interface.
#
# Internally, it consists of the combination of a dialect and an endpoint.
# After recieving a prompt using our generic format, it translates it to
# the target model and routes the completion request through the correct gateway.
#
# Use the .proxy method to instantiate an object.
# It chooses the best dialect and endpoint for the model you want to interact with.
#
# Tests of modules that perform LLM calls can use .with_prepared_responses to return canned responses
# instead of relying on WebMock stubs like we did in the past.
#
module DiscourseAi
module Completions
class LLM
UNKNOWN_MODEL = Class.new(StandardError)
def self.with_prepared_responses(responses)
@canned_response = DiscourseAi::Completions::Endpoints::CannedResponse.new(responses)
yield(@canned_response).tap { @canned_response = nil }
end
def self.proxy(model_name)
dialects = [
DiscourseAi::Completions::Dialects::Claude,
DiscourseAi::Completions::Dialects::Llama2Classic,
DiscourseAi::Completions::Dialects::ChatGPT,
DiscourseAi::Completions::Dialects::OrcaStyle,
]
dialect =
dialects.detect(-> { raise UNKNOWN_MODEL }) { |d| d.can_translate?(model_name) }.new
return new(dialect, @canned_response, model_name) if @canned_response
gateway =
DiscourseAi::Completions::Endpoints::Base.endpoint_for(model_name).new(
model_name,
dialect.tokenizer,
)
new(dialect, gateway, model_name)
end
def initialize(dialect, gateway, model_name)
@dialect = dialect
@gateway = gateway
@model_name = model_name
end
delegate :tokenizer, to: :dialect
# @param generic_prompt { Hash } - Prompt using our generic format.
# We use the following keys from the hash:
# - insts: String with instructions for the LLM.
# - input: String containing user input
# - examples (optional): Array of arrays with examples of input and responses. Each array is a input/response pair like [[example1, response1], [example2, response2]].
# - post_insts (optional): Additional instructions for the LLM. Some dialects like Claude add these at the end of the prompt.
#
# @param user { User } - User requesting the summary.
#
# @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
#
# @returns { String } - Completion result.
def completion!(generic_prompt, user, &partial_read_blk)
prompt = dialect.translate(generic_prompt)
model_params = generic_prompt.dig(:params, model_name) || {}
gateway.perform_completion!(prompt, user, model_params, &partial_read_blk)
end
private
attr_reader :dialect, :gateway, :model_name
end
end
end

View File

@ -15,11 +15,6 @@ module DiscourseAi
require_relative "semantic_related"
require_relative "semantic_topic_query"
require_relative "hyde_generators/base"
require_relative "hyde_generators/openai"
require_relative "hyde_generators/anthropic"
require_relative "hyde_generators/llama2"
require_relative "hyde_generators/llama2_ftos"
require_relative "semantic_search"
end

View File

@ -1,37 +0,0 @@
# frozen_string_literal: true
module DiscourseAi
module Embeddings
module HydeGenerators
class Anthropic < DiscourseAi::Embeddings::HydeGenerators::Base
def prompt(search_term)
<<~TEXT
Human: Given a search term given between <input> tags, generate a forum post about a given subject.
#{basic_prompt_instruction}
<input>#{search_term}</input>
Respond with the generated post between <ai> tags.
Assistant:\n
TEXT
end
def models
%w[claude-instant-1 claude-2]
end
def hypothetical_post_from(query)
response =
::DiscourseAi::Inference::AnthropicCompletions.perform!(
prompt(query),
SiteSetting.ai_embeddings_semantic_search_hyde_model,
max_tokens: 400,
stop_sequences: ["</ai>"],
).dig(:completion)
Nokogiri::HTML5.fragment(response).at("ai").text
end
end
end
end
end

View File

@ -1,28 +0,0 @@
# frozen_string_literal: true
module DiscourseAi
module Embeddings
module HydeGenerators
class Base
def self.current_hyde_model
DiscourseAi::Embeddings::HydeGenerators::Base.descendants.find do |generator_klass|
generator_klass.new.models.include?(
SiteSetting.ai_embeddings_semantic_search_hyde_model,
)
end
end
def basic_prompt_instruction
<<~TEXT
Act as a content writer for a forum.
The forum description is as follows:
#{SiteSetting.title}
#{SiteSetting.site_description}
Given the forum description write a forum post about the following subject:
TEXT
end
end
end
end
end

View File

@ -1,35 +0,0 @@
# frozen_string_literal: true
module DiscourseAi
module Embeddings
module HydeGenerators
class Llama2 < DiscourseAi::Embeddings::HydeGenerators::Base
def prompt(search_term)
<<~TEXT
[INST] <<SYS>>
You are a helpful bot
You create forum posts about a given subject
<</SYS>>
#{basic_prompt_instruction}
#{search_term}
[/INST]
Here is a forum post about the above subject:
TEXT
end
def models
["Llama2-*-chat-hf"]
end
def hypothetical_post_from(query)
::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(
prompt(query),
SiteSetting.ai_embeddings_semantic_search_hyde_model,
token_limit: 400,
).dig(:generated_text)
end
end
end
end
end

View File

@ -1,28 +0,0 @@
# frozen_string_literal: true
module DiscourseAi
module Embeddings
module HydeGenerators
class Llama2Ftos < DiscourseAi::Embeddings::HydeGenerators::Llama2
def prompt(search_term)
<<~TEXT
### System:
You are a helpful bot
You create forum posts about a given subject
### User:
#{basic_prompt_instruction}
#{search_term}
### Assistant:
Here is a forum post about the above subject:
TEXT
end
def models
%w[StableBeluga2 Upstage-Llama-2-*-instruct-v2]
end
end
end
end
end

View File

@ -1,31 +0,0 @@
# frozen_string_literal: true
module DiscourseAi
module Embeddings
module HydeGenerators
class OpenAi < DiscourseAi::Embeddings::HydeGenerators::Base
def prompt(search_term)
[
{
role: "system",
content: "You are a helpful bot. You create forum posts about a given subject.",
},
{ role: "user", content: "#{basic_prompt_instruction}\n#{search_term}" },
]
end
def models
%w[gpt-3.5-turbo gpt-4]
end
def hypothetical_post_from(query)
::DiscourseAi::Inference::OpenAiCompletions.perform!(
prompt(query),
SiteSetting.ai_embeddings_semantic_search_hyde_model,
max_tokens: 400,
).dig(:choices, 0, :message, :content)
end
end
end
end
end

View File

@ -55,10 +55,7 @@ module DiscourseAi
hypothetical_post =
Discourse
.cache
.fetch(hyde_key, expires_in: 1.week) do
hyde_generator = DiscourseAi::Embeddings::HydeGenerators::Base.current_hyde_model.new
hyde_generator.hypothetical_post_from(search_term)
end
.fetch(hyde_key, expires_in: 1.week) { hypothetical_post_from(search_term) }
hypothetical_post_embedding =
Discourse
@ -96,6 +93,30 @@ module DiscourseAi
def build_embedding_key(digest, hyde_model, embedding_model)
"#{build_hyde_key(digest, hyde_model)}-#{embedding_model}"
end
def hypothetical_post_from(search_term)
prompt = {
insts: <<~TEXT,
You are a content creator for a forum. The forum description is as follows:
#{SiteSetting.title}
#{SiteSetting.site_description}
Given the forum description write a forum post about the following subject:
TEXT
input: <<~TEXT,
Using this description, write a forum post about the subject inside the <input></input> XML tags:
<input>#{search_term}</input>
TEXT
post_insts: "Put the forum post between <ai></ai> tags.",
}
llm_response =
DiscourseAi::Completions::LLM.proxy(
SiteSetting.ai_embeddings_semantic_search_hyde_model,
).completion!(prompt, @guardian.user)
Nokogiri::HTML5.fragment(llm_response).at("ai").text
end
end
end
end

View File

@ -21,7 +21,6 @@ module DiscourseAi
Models::OpenAi.new("gpt-4-32k", max_tokens: 32_768),
Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096),
Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384),
Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384),
Models::Anthropic.new("claude-2", max_tokens: 100_000),
Models::Anthropic.new("claude-instant-1", max_tokens: 100_000),
Models::Llama2.new("Llama2-chat-hf", max_tokens: SiteSetting.ai_hugging_face_token_limit),
@ -36,6 +35,7 @@ module DiscourseAi
end
truncable_models = [
Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384),
Models::Discourse.new("bart-large-cnn-samsum", max_tokens: 1024),
Models::Discourse.new("flan-t5-base-samsum", max_tokens: 512),
]

View File

@ -19,109 +19,6 @@ module DiscourseAi
setting: "ai_anthropic_api_key",
)
end
def concatenate_summaries(summaries, &on_partial_blk)
instructions = <<~TEXT
Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative.
Include only the summary inside <ai> tags.
TEXT
instructions += summaries.reduce("") { |m, s| m += "<input>#{s}</input>\n" }
instructions += "Assistant:\n"
completion(instructions, &on_partial_blk)
end
def summarize_with_truncation(contents, opts, &on_partial_blk)
instructions = build_base_prompt(opts)
text_to_summarize = contents.map { |c| format_content_item(c) }.join
truncated_content = tokenizer.truncate(text_to_summarize, available_tokens)
instructions += "<input>#{truncated_content}</input>\nAssistant:\n"
completion(instructions, &on_partial_blk)
end
def summarize_single(chunk_text, opts, &on_partial_blk)
summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk)
end
private
def summarize_chunk(chunk_text, opts, &on_partial_blk)
completion(
build_base_prompt(opts) + "<input>#{chunk_text}</input>\nAssistant:\n",
&on_partial_blk
)
end
def build_base_prompt(opts)
initial_instruction =
if opts[:single_chunk]
"Summarize the following forum discussion inside the given <input> tag, creating a cohesive narrative."
else
"Summarize the following forum discussion inside the given <input> tag."
end
base_prompt = <<~TEXT
Human: #{initial_instruction}
Try to keep the summary in the same language as the forum discussion.
Format the response, including links, using markdown.
TEXT
base_prompt += <<~TEXT if opts[:resource_path]
Try generating links as well the format is #{opts[:resource_path]}/POST_ID
For example, a link to the 3rd post in the topic would be [post 3](#{opts[:resource_path]}/3)
TEXT
base_prompt += "Wrap the whole the summary inside <ai> tags.\n"
base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[
:content_title
]
base_prompt += "Don't use more than 400 words.\n" unless opts[:single_chunk]
base_prompt
end
def completion(prompt, &on_partial_blk)
# We need to discard any text that might come before the <ai> tag.
# Instructing the model to reply only with the summary seems impossible.
pre_tag_partial = +""
if on_partial_blk
on_partial_read =
Proc.new do |partial|
if pre_tag_partial.include?("<ai>")
on_partial_blk.call(partial[:completion])
else
pre_tag_partial << partial[:completion]
end
end
response =
::DiscourseAi::Inference::AnthropicCompletions.perform!(
prompt,
model,
&on_partial_read
)
else
response =
::DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, model).dig(
:completion,
)
end
Nokogiri::HTML5.fragment(response).at("ai")&.text.presence || response
end
def tokenizer
DiscourseAi::Tokenizer::AnthropicTokenizer
end
attr_reader :max_tokens
end
end
end

View File

@ -21,29 +21,6 @@ module DiscourseAi
raise NotImplemented
end
def summarize_in_chunks(chunks, opts)
chunks.map do |chunk|
chunk[:summary] = summarize_chunk(chunk[:summary], opts)
chunk
end
end
def concatenate_summaries(_summaries)
raise NotImplemented
end
def summarize_with_truncation(_contents, _opts)
raise NotImplemented
end
def summarize_single(chunk_text, opts)
raise NotImplemented
end
def format_content_item(item)
"(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
end
def available_tokens
max_tokens - reserved_tokens
end
@ -57,16 +34,6 @@ module DiscourseAi
# ~500 words
700
end
def summarize_chunk(_chunk_text, _opts)
raise NotImplemented
end
def tokenizer
raise NotImplemented
end
delegate :can_expand_tokens?, to: :tokenizer
end
end
end

View File

@ -22,44 +22,11 @@ module DiscourseAi
)
end
def concatenate_summaries(summaries)
completion(summaries.join("\n"))
end
def summarize_with_truncation(contents, opts)
text_to_summarize = contents.map { |c| format_content_item(c) }.join
truncated_content =
::DiscourseAi::Tokenizer::BertTokenizer.truncate(text_to_summarize, available_tokens)
completion(truncated_content)
end
def summarize_single(chunk_text, _opts)
completion(chunk_text)
end
private
def summarize_chunk(chunk_text, _opts)
completion(chunk_text)
end
def reserved_tokens
0
end
def completion(prompt)
::DiscourseAi::Inference::DiscourseClassifier.perform!(
"#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
model,
prompt,
SiteSetting.ai_summarization_discourse_service_api_key,
).dig(:summary_text)
end
def tokenizer
DiscourseAi::Tokenizer::BertTokenizer
end
end
end
end

View File

@ -19,104 +19,6 @@ module DiscourseAi
setting: "ai_hugging_face_api_url",
)
end
def concatenate_summaries(summaries, &on_partial_blk)
prompt = <<~TEXT
[INST] <<SYS>>
You are a helpful bot
<</SYS>>
Concatenate these disjoint summaries, creating a cohesive narrative:
#{summaries.join("\n")} [/INST]
TEXT
completion(prompt, &on_partial_blk)
end
def summarize_with_truncation(contents, opts, &on_partial_blk)
text_to_summarize = contents.map { |c| format_content_item(c) }.join
truncated_content = tokenizer.truncate(text_to_summarize, available_tokens)
prompt = <<~TEXT
[INST] <<SYS>>
#{build_base_prompt(opts)}
<</SYS>>
Summarize the following in up to 400 words:
#{truncated_content} [/INST]
Here is a summary of the above topic:
TEXT
completion(prompt, &on_partial_blk)
end
def summarize_single(chunk_text, opts, &on_partial_blk)
summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk)
end
private
def summarize_chunk(chunk_text, opts, &on_partial_blk)
summary_instruction =
if opts[:single_chunk]
"Summarize the following forum discussion, creating a cohesive narrative:"
else
"Summarize the following in up to 400 words:"
end
prompt = <<~TEXT
[INST] <<SYS>>
#{build_base_prompt(opts)}
<</SYS>>
#{summary_instruction}
#{chunk_text} [/INST]
Here is a summary of the above topic:
TEXT
completion(prompt, &on_partial_blk)
end
def build_base_prompt(opts)
base_prompt = <<~TEXT
You are a summarization bot.
You effectively summarise any text and reply ONLY with ONLY the summarized text.
You condense it into a shorter version.
You understand and generate Discourse forum Markdown.
TEXT
if opts[:resource_path]
base_prompt +=
"Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)\n"
end
base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[
:content_title
]
base_prompt
end
def completion(prompt, &on_partial_blk)
if on_partial_blk
on_partial_read =
Proc.new { |partial| on_partial_blk.call(partial.dig(:token, :text).to_s) }
::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(
prompt,
model,
&on_partial_read
)
else
::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(prompt, model).dig(
:generated_text,
)
end
end
def tokenizer
DiscourseAi::Tokenizer::Llama2Tokenizer
end
end
end
end

View File

@ -7,65 +7,6 @@ module DiscourseAi
def display_name
"Llama2FineTunedOrcaStyle's #{SiteSetting.ai_hugging_face_model_display_name.presence || model}"
end
def concatenate_summaries(summaries, &on_partial_blk)
prompt = <<~TEXT
### System:
You are a helpful bot
### User:
Concatenate these disjoint summaries, creating a cohesive narrative:
#{summaries.join("\n")}
### Assistant:
TEXT
completion(prompt, &on_partial_blk)
end
def summarize_with_truncation(contents, opts, &on_partial_blk)
text_to_summarize = contents.map { |c| format_content_item(c) }.join
truncated_content = tokenizer.truncate(text_to_summarize, available_tokens)
prompt = <<~TEXT
### System:
#{build_base_prompt(opts)}
### User:
Summarize the following in up to 400 words:
#{truncated_content}
### Assistant:
Here is a summary of the above topic:
TEXT
completion(prompt, &on_partial_blk)
end
private
def summarize_chunk(chunk_text, opts, &on_partial_blk)
summary_instruction =
if opts[:single_chunk]
"Summarize the following forum discussion, creating a cohesive narrative:"
else
"Summarize the following in up to 400 words:"
end
prompt = <<~TEXT
### System:
#{build_base_prompt(opts)}
### User:
#{summary_instruction}
#{chunk_text}
### Assistant:
Here is a summary of the above topic:
TEXT
completion(prompt, &on_partial_blk)
end
end
end
end

View File

@ -19,100 +19,6 @@ module DiscourseAi
setting: "ai_openai_api_key",
)
end
def concatenate_summaries(summaries, &on_partial_blk)
messages = [
{ role: "system", content: "You are a helpful bot" },
{
role: "user",
content:
"Concatenate these disjoint summaries, creating a cohesive narrative. Keep the summary in the same language used in the text below.\n#{summaries.join("\n")}",
},
]
completion(messages, &on_partial_blk)
end
def summarize_with_truncation(contents, opts, &on_partial_blk)
messages = [{ role: "system", content: build_base_prompt(opts) }]
text_to_summarize = contents.map { |c| format_content_item(c) }.join
truncated_content = tokenizer.truncate(text_to_summarize, available_tokens)
messages << {
role: "user",
content:
"Summarize the following in 400 words. Keep the summary in the same language used in the text below.\n#{truncated_content}",
}
completion(messages, &on_partial_blk)
end
def summarize_single(chunk_text, opts, &on_partial_blk)
summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk)
end
private
def summarize_chunk(chunk_text, opts, &on_partial_blk)
summary_instruction =
if opts[:single_chunk]
"Summarize the following forum discussion, creating a cohesive narrative. Keep the summary in the same language used in the text below."
else
"Summarize the following in 400 words. Keep the summary in the same language used in the text below."
end
completion(
[
{ role: "system", content: build_base_prompt(opts) },
{ role: "user", content: "#{summary_instruction}\n#{chunk_text}" },
],
&on_partial_blk
)
end
def build_base_prompt(opts)
base_prompt = <<~TEXT
You are a summarization bot.
You effectively summarise any text and reply ONLY with ONLY the summarized text.
You condense it into a shorter version.
You understand and generate Discourse forum Markdown.
You format the response, including links, using markdown.
TEXT
if opts[:resource_path]
base_prompt +=
"Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)\n"
end
base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[
:content_title
]
base_prompt
end
def completion(prompt, &on_partial_blk)
if on_partial_blk
on_partial_read =
Proc.new do |partial|
on_partial_blk.call(partial.dig(:choices, 0, :delta, :content).to_s)
end
::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model, &on_partial_read)
else
::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model).dig(
:choices,
0,
:message,
:content,
)
end
end
def tokenizer
DiscourseAi::Tokenizer::OpenAiTokenizer
end
end
end
end

View File

@ -16,22 +16,29 @@ module DiscourseAi
:model,
to: :completion_model
def summarize(content, _user, &on_partial_blk)
def summarize(content, user, &on_partial_blk)
opts = content.except(:contents)
chunks = split_into_chunks(content[:contents])
llm = DiscourseAi::Completions::LLM.proxy(completion_model.model)
chunks = split_into_chunks(llm.tokenizer, content[:contents])
if chunks.length == 1
{
summary:
completion_model.summarize_single(chunks.first[:summary], opts, &on_partial_blk),
summary: summarize_single(llm, chunks.first[:summary], user, opts, &on_partial_blk),
chunks: [],
}
else
summaries = completion_model.summarize_in_chunks(chunks, opts)
summaries = summarize_in_chunks(llm, chunks, user, opts)
{
summary: completion_model.concatenate_summaries(summaries, &on_partial_blk),
summary:
concatenate_summaries(
llm,
summaries.map { |s| s[:summary] },
user,
&on_partial_blk
),
chunks: summaries,
}
end
@ -39,14 +46,18 @@ module DiscourseAi
private
def split_into_chunks(contents)
def format_content_item(item)
"(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
end
def split_into_chunks(tokenizer, contents)
section = { ids: [], summary: "" }
chunks =
contents.reduce([]) do |sections, item|
new_content = completion_model.format_content_item(item)
new_content = format_content_item(item)
if completion_model.can_expand_tokens?(
if tokenizer.can_expand_tokens?(
section[:summary],
new_content,
completion_model.available_tokens,
@ -65,6 +76,71 @@ module DiscourseAi
chunks
end
def summarize_single(llm, text, user, opts, &on_partial_blk)
prompt = summarization_prompt(text, opts)
llm.completion!(prompt, user, &on_partial_blk)
end
def summarize_in_chunks(llm, chunks, user, opts)
chunks.map do |chunk|
prompt = summarization_prompt(chunk[:summary], opts)
prompt[:post_insts] = "Don't use more than 400 words for the summary."
chunk[:summary] = llm.completion!(prompt, user)
chunk
end
end
def concatenate_summaries(llm, summaries, user, &on_partial_blk)
prompt = summarization_prompt(summaries.join("\n"), {})
prompt[:insts] = <<~TEXT
You are a bot that can concatenate disjoint summaries, creating a cohesive narrative.
Keep the resulting summary in the same language used in the text below.
TEXT
llm.completion!(prompt, user, &on_partial_blk)
end
def summarization_prompt(input, opts)
insts = <<~TEXT
You are a summarization bot that effectively summarize any text, creating a cohesive narrative.
Your replies contain ONLY a summarized version of the text I provided and you, using the same language.
You understand and generate Discourse forum Markdown.
You format the response, including links, using Markdown.
TEXT
insts += <<~TEXT if opts[:resource_path]
Each message is formatted as "<POST_NUMBER>) <USERNAME> <MESSAGE> "
Append <POST_NUMBER> to #{opts[:resource_path]} when linking posts.
TEXT
insts += "The discussion title is: #{opts[:content_title]}.\n" if opts[:content_title]
prompt = { insts: insts, input: <<~TEXT }
Here is the text, inside <input></input> XML tags:
<input>
#{input}
</input>
TEXT
if opts[:resource_path]
prompt[:examples] = [
[
"<input>(1 user1 said: I love Mondays 2) user2 said: I hate Mondays</input>",
"Two users are sharing their feelings toward Mondays. [user1](#{opts[:resource_path]}/1) hates them, while [user2](#{opts[:resource_path]}/2) loves them.",
],
[
"<input>3) usuario1: Amo los lunes 6) usuario2: Odio los lunes</input>",
"Dos usuarios charlan sobre los lunes. [usuario1](#{opts[:resource_path]}/3) dice que los ama, mientras que [usuario2](#{opts[:resource_path]}/2) los odia.",
],
]
end
prompt
end
end
end
end

View File

@ -25,6 +25,32 @@ module DiscourseAi
chunks: [],
}
end
private
def format_content_item(item)
"(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
end
def summarize_with_truncation(contents, opts)
text_to_summarize = contents.map { |c| format_content_item(c) }.join
truncated_content =
::DiscourseAi::Tokenizer::BertTokenizer.truncate(
text_to_summarize,
completion_model.available_tokens,
)
completion(truncated_content)
end
def completion(prompt)
::DiscourseAi::Inference::DiscourseClassifier.perform!(
"#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
completion_model.model,
prompt,
SiteSetting.ai_summarization_discourse_service_api_key,
).dig(:summary_text)
end
end
end
end

View File

@ -52,6 +52,8 @@ after_initialize do
require_relative "lib/shared/database/connection"
require_relative "lib/completions/entry_point"
require_relative "lib/modules/nsfw/entry_point"
require_relative "lib/modules/toxicity/entry_point"
require_relative "lib/modules/sentiment/entry_point"
@ -64,6 +66,7 @@ after_initialize do
add_admin_route "discourse_ai.title", "discourse-ai"
[
DiscourseAi::Completions::EntryPoint.new,
DiscourseAi::Embeddings::EntryPoint.new,
DiscourseAi::NSFW::EntryPoint.new,
DiscourseAi::Toxicity::EntryPoint.new,

View File

@ -0,0 +1,63 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::Completions::Dialects::ChatGPT do
subject(:dialect) { described_class.new }
let(:prompt) do
{
insts: <<~TEXT,
I want you to act as a title generator for written pieces. I will provide you with a text,
and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
TEXT
input: <<~TEXT,
Here is the text, inside <input></input> XML tags:
<input>
To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
dies so that a scene may be repeated.
</input>
TEXT
post_insts:
"Please put the translation between <ai></ai> tags and separate each title with a comma.",
}
end
describe "#translate" do
it "translates a prompt written in our generic format to the ChatGPT format" do
open_ai_version = [
{ role: "system", content: [prompt[:insts], prompt[:post_insts]].join("\n") },
{ role: "user", content: prompt[:input] },
]
translated = dialect.translate(prompt)
expect(translated).to contain_exactly(*open_ai_version)
end
it "include examples in the ChatGPT version" do
prompt[:examples] = [
[
"<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
"<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
],
]
open_ai_version = [
{ role: "system", content: [prompt[:insts], prompt[:post_insts]].join("\n") },
{ role: "user", content: prompt[:examples][0][0] },
{ role: "assistant", content: prompt[:examples][0][1] },
{ role: "user", content: prompt[:input] },
]
translated = dialect.translate(prompt)
expect(translated).to contain_exactly(*open_ai_version)
end
end
end

View File

@ -0,0 +1,68 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::Completions::Dialects::Claude do
subject(:dialect) { described_class.new }
let(:prompt) do
{
insts: <<~TEXT,
I want you to act as a title generator for written pieces. I will provide you with a text,
and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
TEXT
input: <<~TEXT,
Here is the text, inside <input></input> XML tags:
<input>
To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
dies so that a scene may be repeated.
</input>
TEXT
post_insts:
"Please put the translation between <ai></ai> tags and separate each title with a comma.",
}
end
describe "#translate" do
it "translates a prompt written in our generic format to Claude's format" do
anthropic_version = <<~TEXT
Human: #{prompt[:insts]}
#{prompt[:input]}
#{prompt[:post_insts]}
Assistant:
TEXT
translated = dialect.translate(prompt)
expect(translated).to eq(anthropic_version)
end
it "knows how to translate examples to Claude's format" do
prompt[:examples] = [
[
"<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
"<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
],
]
anthropic_version = <<~TEXT
Human: #{prompt[:insts]}
<example>
H: #{prompt[:examples][0][0]}
A: #{prompt[:examples][0][1]}
</example>
#{prompt[:input]}
#{prompt[:post_insts]}
Assistant:
TEXT
translated = dialect.translate(prompt)
expect(translated).to eq(anthropic_version)
end
end
end

View File

@ -0,0 +1,63 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::Completions::Dialects::Llama2Classic do
subject(:dialect) { described_class.new }
let(:prompt) do
{
insts: <<~TEXT,
I want you to act as a title generator for written pieces. I will provide you with a text,
and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
TEXT
input: <<~TEXT,
Here is the text, inside <input></input> XML tags:
<input>
To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
dies so that a scene may be repeated.
</input>
TEXT
post_insts:
"Please put the translation between <ai></ai> tags and separate each title with a comma.",
}
end
describe "#translate" do
it "translates a prompt written in our generic format to the Llama2 format" do
llama2_classic_version = <<~TEXT
[INST]<<SYS>>#{[prompt[:insts], prompt[:post_insts]].join("\n")}<</SYS>>[/INST]
[INST]#{prompt[:input]}[/INST]
TEXT
translated = dialect.translate(prompt)
expect(translated).to eq(llama2_classic_version)
end
it "includes examples in the translation" do
prompt[:examples] = [
[
"<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
"<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
],
]
llama2_classic_version = <<~TEXT
[INST]<<SYS>>#{[prompt[:insts], prompt[:post_insts]].join("\n")}<</SYS>>[/INST]
[INST]#{prompt[:examples][0][0]}[/INST]
#{prompt[:examples][0][1]}
[INST]#{prompt[:input]}[/INST]
TEXT
translated = dialect.translate(prompt)
expect(translated).to eq(llama2_classic_version)
end
end
end

View File

@ -0,0 +1,71 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::Completions::Dialects::OrcaStyle do
subject(:dialect) { described_class.new }
describe "#translate" do
let(:prompt) do
{
insts: <<~TEXT,
I want you to act as a title generator for written pieces. I will provide you with a text,
and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
TEXT
input: <<~TEXT,
Here is the text, inside <input></input> XML tags:
<input>
To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
dies so that a scene may be repeated.
</input>
TEXT
post_insts:
"Please put the translation between <ai></ai> tags and separate each title with a comma.",
}
end
it "translates a prompt written in our generic format to the Open AI format" do
orca_style_version = <<~TEXT
### System:
#{[prompt[:insts], prompt[:post_insts]].join("\n")}
### User:
#{prompt[:input]}
### Assistant:
TEXT
translated = dialect.translate(prompt)
expect(translated).to eq(orca_style_version)
end
it "include examples in the translated prompt" do
prompt[:examples] = [
[
"<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
"<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
],
]
orca_style_version = <<~TEXT
### System:
#{[prompt[:insts], prompt[:post_insts]].join("\n")}
### User:
#{prompt[:examples][0][0]}
### Assistant:
#{prompt[:examples][0][1]}
### User:
#{prompt[:input]}
### Assistant:
TEXT
translated = dialect.translate(prompt)
expect(translated).to eq(orca_style_version)
end
end
end

View File

@ -0,0 +1,64 @@
# frozen_String_literal: true
require_relative "endpoint_examples"
RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::AnthropicTokenizer) }
let(:model_name) { "claude-2" }
let(:prompt) { "Human: write 3 words\n\n" }
let(:request_body) { model.default_options.merge(prompt: prompt).to_json }
let(:stream_request_body) { model.default_options.merge(prompt: prompt, stream: true).to_json }
def response(content)
{
completion: content,
stop: "\n\nHuman:",
stop_reason: "stop_sequence",
truncated: false,
log_id: "12dcc7feafbee4a394e0de9dffde3ac5",
model: model_name,
exception: nil,
}
end
def stub_response(prompt, response_text)
WebMock
.stub_request(:post, "https://api.anthropic.com/v1/complete")
.with(body: model.default_options.merge(prompt: prompt).to_json)
.to_return(status: 200, body: JSON.dump(response(response_text)))
end
def stream_line(delta, finish_reason: nil)
+"data: " << {
completion: delta,
stop: finish_reason ? "\n\nHuman:" : nil,
stop_reason: finish_reason,
truncated: false,
log_id: "12b029451c6d18094d868bc04ce83f63",
model: "claude-2",
exception: nil,
}.to_json
end
def stub_streamed_response(prompt, deltas)
chunks =
deltas.each_with_index.map do |_, index|
if index == (deltas.length - 1)
stream_line(deltas[index], finish_reason: "stop_sequence")
else
stream_line(deltas[index])
end
end
chunks = chunks.join("\n\n")
WebMock
.stub_request(:post, "https://api.anthropic.com/v1/complete")
.with(body: model.default_options.merge(prompt: prompt, stream: true).to_json)
.to_return(status: 200, body: chunks)
end
it_behaves_like "an endpoint that can communicate with a completion service"
end

View File

@ -0,0 +1,122 @@
# frozen_string_literal: true
require_relative "endpoint_examples"
RSpec.describe DiscourseAi::Completions::Endpoints::AwsBedrock do
subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::AnthropicTokenizer) }
let(:model_name) { "claude-2" }
let(:prompt) { "Human: write 3 words\n\n" }
let(:request_body) { model.default_options.merge(prompt: prompt).to_json }
let(:stream_request_body) { model.default_options.merge(prompt: prompt).to_json }
before do
SiteSetting.ai_bedrock_access_key_id = "123456"
SiteSetting.ai_bedrock_secret_access_key = "asd-asd-asd"
SiteSetting.ai_bedrock_region = "us-east-1"
end
# Copied from https://github.com/bblimke/webmock/issues/629
# Workaround for stubbing a streamed response
before do
mocked_http =
Class.new(Net::HTTP) do
def request(*)
super do |response|
response.instance_eval do
def read_body(*, &block)
if block_given?
@body.each(&block)
else
super
end
end
end
yield response if block_given?
response
end
end
end
@original_net_http = Net.send(:remove_const, :HTTP)
Net.send(:const_set, :HTTP, mocked_http)
end
after do
Net.send(:remove_const, :HTTP)
Net.send(:const_set, :HTTP, @original_net_http)
end
def response(content)
{
completion: content,
stop: "\n\nHuman:",
stop_reason: "stop_sequence",
truncated: false,
log_id: "12dcc7feafbee4a394e0de9dffde3ac5",
model: model_name,
exception: nil,
}
end
def stub_response(prompt, response_text)
WebMock
.stub_request(
:post,
"https://bedrock-runtime.#{SiteSetting.ai_bedrock_region}.amazonaws.com/model/anthropic.#{model_name}/invoke",
)
.with(body: request_body)
.to_return(status: 200, body: JSON.dump(response(response_text)))
end
def stream_line(delta, finish_reason: nil)
encoder = Aws::EventStream::Encoder.new
message =
Aws::EventStream::Message.new(
payload:
StringIO.new(
{
bytes:
Base64.encode64(
{
completion: delta,
stop: finish_reason ? "\n\nHuman:" : nil,
stop_reason: finish_reason,
truncated: false,
log_id: "12b029451c6d18094d868bc04ce83f63",
model: "claude-2",
exception: nil,
}.to_json,
),
}.to_json,
),
)
encoder.encode(message)
end
def stub_streamed_response(prompt, deltas)
chunks =
deltas.each_with_index.map do |_, index|
if index == (deltas.length - 1)
stream_line(deltas[index], finish_reason: "stop_sequence")
else
stream_line(deltas[index])
end
end
WebMock
.stub_request(
:post,
"https://bedrock-runtime.#{SiteSetting.ai_bedrock_region}.amazonaws.com/model/anthropic.#{model_name}/invoke-with-response-stream",
)
.with(body: stream_request_body)
.to_return(status: 200, body: chunks)
end
it_behaves_like "an endpoint that can communicate with a completion service"
end

View File

@ -0,0 +1,71 @@
# frozen_string_literal: true
RSpec.shared_examples "an endpoint that can communicate with a completion service" do
describe "#perform_completion!" do
fab!(:user) { Fabricate(:user) }
let(:response_text) { "1. Serenity\\n2. Laughter\\n3. Adventure" }
context "when using regular mode" do
before { stub_response(prompt, response_text) }
it "can complete a trivial prompt" do
completion_response = model.perform_completion!(prompt, user)
expect(completion_response).to eq(response_text)
end
it "creates an audit log for the request" do
model.perform_completion!(prompt, user)
expect(AiApiAuditLog.count).to eq(1)
log = AiApiAuditLog.first
response_body = response(response_text).to_json
expect(log.provider_id).to eq(model.provider_id)
expect(log.user_id).to eq(user.id)
expect(log.raw_request_payload).to eq(request_body)
expect(log.raw_response_payload).to eq(response_body)
expect(log.request_tokens).to eq(model.prompt_size(prompt))
expect(log.response_tokens).to eq(model.tokenizer.size(response_text))
end
end
context "when using stream mode" do
let(:deltas) { ["Mount", "ain", " ", "Tree ", "Frog"] }
before { stub_streamed_response(prompt, deltas) }
it "can complete a trivial prompt" do
completion_response = +""
model.perform_completion!(prompt, user) do |partial, cancel|
completion_response << partial
cancel.call if completion_response.split(" ").length == 2
end
expect(completion_response).to eq(deltas[0...-1].join)
end
it "creates an audit log and updates is on each read." do
completion_response = +""
model.perform_completion!(prompt, user) do |partial, cancel|
completion_response << partial
cancel.call if completion_response.split(" ").length == 2
end
expect(AiApiAuditLog.count).to eq(1)
log = AiApiAuditLog.first
expect(log.provider_id).to eq(model.provider_id)
expect(log.user_id).to eq(user.id)
expect(log.raw_request_payload).to eq(stream_request_body)
expect(log.raw_response_payload).to be_present
expect(log.request_tokens).to eq(model.prompt_size(prompt))
expect(log.response_tokens).to eq(model.tokenizer.size(deltas[0...-1].join))
end
end
end
end

View File

@ -0,0 +1,68 @@
# frozen_string_literal: true
require_relative "endpoint_examples"
RSpec.describe DiscourseAi::Completions::Endpoints::Huggingface do
subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::Llama2Tokenizer) }
let(:model_name) { "Llama2-*-chat-hf" }
let(:prompt) { <<~TEXT }
[INST]<<SYS>>You are a helpful bot.<</SYS>>[/INST]
[INST]Write 3 words[/INST]
TEXT
let(:request_body) do
model
.default_options
.merge(inputs: prompt)
.tap { |payload| payload[:parameters][:max_new_tokens] = 2_000 - model.prompt_size(prompt) }
.to_json
end
let(:stream_request_body) { request_body }
before { SiteSetting.ai_hugging_face_api_url = "https://test.dev" }
def response(content)
{ generated_text: content }
end
def stub_response(prompt, response_text)
WebMock
.stub_request(:post, "#{SiteSetting.ai_hugging_face_api_url}/generate")
.with(body: request_body)
.to_return(status: 200, body: JSON.dump(response(response_text)))
end
def stream_line(delta, finish_reason: nil)
+"data: " << {
token: {
id: 29_889,
text: delta,
logprob: -0.08319092,
special: !!finish_reason,
},
generated_text: finish_reason ? response_text : nil,
details: nil,
}.to_json
end
def stub_streamed_response(prompt, deltas)
chunks =
deltas.each_with_index.map do |_, index|
if index == (deltas.length - 1)
stream_line(deltas[index], finish_reason: true)
else
stream_line(deltas[index])
end
end
chunks = chunks.join("\n\n")
WebMock
.stub_request(:post, "#{SiteSetting.ai_hugging_face_api_url}/generate_stream")
.with(body: request_body)
.to_return(status: 200, body: chunks)
end
it_behaves_like "an endpoint that can communicate with a completion service"
end

View File

@ -0,0 +1,74 @@
# frozen_string_literal: true
require_relative "endpoint_examples"
RSpec.describe DiscourseAi::Completions::Endpoints::OpenAI do
subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::OpenAiTokenizer) }
let(:model_name) { "gpt-3.5-turbo" }
let(:prompt) do
[
{ role: "system", content: "You are a helpful bot." },
{ role: "user", content: "Write 3 words" },
]
end
let(:request_body) { model.default_options.merge(messages: prompt).to_json }
let(:stream_request_body) { model.default_options.merge(messages: prompt, stream: true).to_json }
def response(content)
{
id: "chatcmpl-6sZfAb30Rnv9Q7ufzFwvQsMpjZh8S",
object: "chat.completion",
created: 1_678_464_820,
model: "gpt-3.5-turbo-0301",
usage: {
prompt_tokens: 337,
completion_tokens: 162,
total_tokens: 499,
},
choices: [
{ message: { role: "assistant", content: content }, finish_reason: "stop", index: 0 },
],
}
end
def stub_response(prompt, response_text)
WebMock
.stub_request(:post, "https://api.openai.com/v1/chat/completions")
.with(body: { model: model_name, messages: prompt })
.to_return(status: 200, body: JSON.dump(response(response_text)))
end
def stream_line(delta, finish_reason: nil)
+"data: " << {
id: "chatcmpl-#{SecureRandom.hex}",
object: "chat.completion.chunk",
created: 1_681_283_881,
model: "gpt-3.5-turbo-0301",
choices: [{ delta: { content: delta } }],
finish_reason: finish_reason,
index: 0,
}.to_json
end
def stub_streamed_response(prompt, deltas)
chunks =
deltas.each_with_index.map do |_, index|
if index == (deltas.length - 1)
stream_line(deltas[index], finish_reason: "stop_sequence")
else
stream_line(deltas[index])
end
end
chunks = chunks.join("\n\n")
WebMock
.stub_request(:post, "https://api.openai.com/v1/chat/completions")
.with(body: model.default_options.merge(messages: prompt, stream: true).to_json)
.to_return(status: 200, body: chunks)
end
it_behaves_like "an endpoint that can communicate with a completion service"
end

View File

@ -0,0 +1,71 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::Completions::LLM do
subject(:llm) do
described_class.new(
DiscourseAi::Completions::Dialects::OrcaStyle.new,
canned_response,
"Upstage-Llama-2-*-instruct-v2",
)
end
fab!(:user) { Fabricate(:user) }
describe ".proxy" do
it "raises an exception when we can't proxy the model" do
fake_model = "unknown_v2"
expect { described_class.proxy(fake_model) }.to(
raise_error(DiscourseAi::Completions::LLM::UNKNOWN_MODEL),
)
end
end
describe "#completion!" do
let(:prompt) do
{
insts: <<~TEXT,
I want you to act as a title generator for written pieces. I will provide you with a text,
and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
TEXT
input: <<~TEXT,
Here is the text, inside <input></input> XML tags:
<input>
To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
</input>
TEXT
post_insts:
"Please put the translation between <ai></ai> tags and separate each title with a comma.",
}
end
let(:canned_response) do
DiscourseAi::Completions::Endpoints::CannedResponse.new(
[
"<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
],
)
end
context "when getting the full response" do
it "processes the prompt and return the response" do
llm_response = llm.completion!(prompt, user)
expect(llm_response).to eq(canned_response.responses[0])
end
end
context "when getting a streamed response" do
it "processes the prompt and call the given block with the partial response" do
llm_response = +""
llm.completion!(prompt, user) { |partial, cancel_fn| llm_response << partial }
expect(llm_response).to eq(canned_response.responses[0])
end
end
end
end

View File

@ -66,7 +66,10 @@ RSpec.describe DiscourseAi::AiBot::Commands::SearchCommand do
.expects(:asymmetric_topics_similarity_search)
.returns([post1.topic_id])
results = search.process(search_query: "hello world, sam", status: "public")
results =
DiscourseAi::Completions::LLM.with_prepared_responses(["<ai>#{query}</ai>"]) do
search.process(search_query: "hello world, sam", status: "public")
end
expect(results[:args]).to eq({ search_query: "hello world, sam", status: "public" })
expect(results[:rows].length).to eq(1)

View File

@ -13,15 +13,6 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
before do
SiteSetting.ai_embeddings_discourse_service_api_endpoint = "http://test.com"
prompt = DiscourseAi::Embeddings::HydeGenerators::OpenAi.new.prompt(query)
OpenAiCompletionsInferenceStubs.stub_response(
prompt,
hypothetical_post,
req_opts: {
max_tokens: 400,
},
)
hyde_embedding = [0.049382, 0.9999]
EmbeddingsGenerationStubs.discourse_service(
SiteSetting.ai_embeddings_model,
@ -39,10 +30,16 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
.returns(candidate_ids)
end
def trigger_search(query)
DiscourseAi::Completions::LLM.with_prepared_responses(["<ai>#{hypothetical_post}</ai>"]) do
subject.search_for_topics(query)
end
end
it "returns the first post of a topic included in the asymmetric search results" do
stub_candidate_ids([post.topic_id])
posts = subject.search_for_topics(query)
posts = trigger_search(query)
expect(posts).to contain_exactly(post)
end
@ -53,7 +50,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
post.topic.update!(visible: false)
stub_candidate_ids([post.topic_id])
posts = subject.search_for_topics(query)
posts = trigger_search(query)
expect(posts).to be_empty
end
@ -64,7 +61,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
pm_post = Fabricate(:private_message_post)
stub_candidate_ids([pm_post.topic_id])
posts = subject.search_for_topics(query)
posts = trigger_search(query)
expect(posts).to be_empty
end
@ -75,7 +72,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
post.update!(post_type: Post.types[:whisper])
stub_candidate_ids([post.topic_id])
posts = subject.search_for_topics(query)
posts = trigger_search(query)
expect(posts).to be_empty
end
@ -87,7 +84,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
reply.topic.first_post.trash!
stub_candidate_ids([reply.topic_id])
posts = subject.search_for_topics(query)
posts = trigger_search(query)
expect(posts).to be_empty
end
@ -98,7 +95,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
post_2 = Fabricate(:post)
stub_candidate_ids([post.topic_id])
posts = subject.search_for_topics(query)
posts = trigger_search(query)
expect(posts).not_to include(post_2)
end
@ -114,7 +111,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
end
it "returns an empty list" do
posts = subject.search_for_topics(query)
posts = trigger_search(query)
expect(posts).to be_empty
end
@ -122,14 +119,17 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
it "returns the results if the user has access to the category" do
group.add(user)
posts = subject.search_for_topics(query)
posts = trigger_search(query)
expect(posts).to contain_exactly(post)
end
context "while searching as anon" do
it "returns an empty list" do
posts = described_class.new(Guardian.new(nil)).search_for_topics(query)
posts =
DiscourseAi::Completions::LLM.with_prepared_responses(
["<ai>#{hypothetical_post}</ai>"],
) { described_class.new(Guardian.new(nil)).search_for_topics(query) }
expect(posts).to be_empty
end

View File

@ -1,122 +0,0 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::Summarization::Models::Anthropic do
subject(:model) { described_class.new(model_name, max_tokens: max_tokens) }
let(:model_name) { "claude-2" }
let(:max_tokens) { 720 }
let(:content) do
{
resource_path: "/t/-/1",
content_title: "This is a title",
contents: [{ poster: "asd", id: 1, text: "This is a text" }],
}
end
def as_chunk(item)
{ ids: [item[:id]], summary: "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }
end
def expected_messages(contents, opts)
base_prompt = <<~TEXT
Human: Summarize the following forum discussion inside the given <input> tag.
Try to keep the summary in the same language as the forum discussion.
Format the response, including links, using markdown.
Try generating links as well the format is #{opts[:resource_path]}/POST_ID
For example, a link to the 3rd post in the topic would be [post 3](#{opts[:resource_path]}/3)
Wrap the whole the summary inside <ai> tags.
The discussion title is: #{opts[:content_title]}.
Don't use more than 400 words.
TEXT
text =
contents.reduce("") do |memo, item|
memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
end
base_prompt += "<input>#{text}</input>\nAssistant:\n"
end
describe "#summarize_in_chunks" do
context "when the content fits in a single chunk" do
it "performs a request to summarize" do
opts = content.except(:contents)
AnthropicCompletionStubs.stub_response(
expected_messages(content[:contents], opts),
"<ai>This is summary 1</ai>",
)
chunks = content[:contents].map { |c| as_chunk(c) }
summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
expect(summarized_chunks).to contain_exactly("This is summary 1")
end
end
context "when the content fits in multiple chunks" do
it "performs a request for each one to summarize" do
content[:contents] << {
poster: "asd2",
id: 2,
text: "This is a different text to summarize",
}
opts = content.except(:contents)
content[:contents].each_with_index do |item, idx|
AnthropicCompletionStubs.stub_response(
expected_messages([item], opts),
"<ai>This is summary #{idx + 1}</ai>",
)
end
chunks = content[:contents].map { |c| as_chunk(c) }
summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2")
end
end
end
describe "#concatenate_summaries" do
it "combines all the different summaries into a single one" do
messages = <<~TEXT
Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative.
Include only the summary inside <ai> tags.
<input>summary 1</input>
<input>summary 2</input>
Assistant:
TEXT
AnthropicCompletionStubs.stub_response(messages, "<ai>concatenated summary</ai>")
expect(model.concatenate_summaries(["summary 1", "summary 2"])).to eq("concatenated summary")
end
end
describe "#summarize_with_truncation" do
let(:max_tokens) { 709 }
it "truncates the context to meet the token limit" do
opts = content.except(:contents)
instructions = <<~TEXT
Human: Summarize the following forum discussion inside the given <input> tag.
Try to keep the summary in the same language as the forum discussion.
Format the response, including links, using markdown.
Try generating links as well the format is #{opts[:resource_path]}/POST_ID
For example, a link to the 3rd post in the topic would be [post 3](#{opts[:resource_path]}/3)
Wrap the whole the summary inside <ai> tags.
The discussion title is: #{opts[:content_title]}.
Don't use more than 400 words.
<input>(1 asd said: This is a</input>
Assistant:
TEXT
AnthropicCompletionStubs.stub_response(instructions, "<ai>truncated summary</ai>")
expect(model.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary")
end
end
end

View File

@ -1,95 +0,0 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::Summarization::Models::Discourse do
subject(:model) { described_class.new(model_name, max_tokens: max_tokens) }
let(:model_name) { "bart-large-cnn-samsum" }
let(:max_tokens) { 20 }
let(:content) do
{
resource_path: "/t/1/POST_NUMBER",
content_title: "This is a title",
contents: [{ poster: "asd", id: 1, text: "This is a text" }],
}
end
before { SiteSetting.ai_summarization_discourse_service_api_endpoint = "https://test.com" }
def stub_request(prompt, response)
WebMock
.stub_request(
:post,
"#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
)
.with(body: JSON.dump(model: model_name, content: prompt))
.to_return(status: 200, body: JSON.dump(summary_text: response))
end
def expected_messages(contents, opts)
contents.reduce("") do |memo, item|
memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
end
end
def as_chunk(item)
{ ids: [item[:id]], summary: "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }
end
describe "#summarize_in_chunks" do
context "when the content fits in a single chunk" do
it "performs a request to summarize" do
opts = content.except(:contents)
stub_request(expected_messages(content[:contents], opts), "This is summary 1")
chunks = content[:contents].map { |c| as_chunk(c) }
summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
expect(summarized_chunks).to contain_exactly("This is summary 1")
end
end
context "when the content fits in multiple chunks" do
it "performs a request for each one to summarize" do
content[:contents] << {
poster: "asd2",
id: 2,
text: "This is a different text to summarize",
}
opts = content.except(:contents)
content[:contents].each_with_index do |item, idx|
stub_request(expected_messages([item], opts), "This is summary #{idx + 1}")
end
chunks = content[:contents].map { |c| as_chunk(c) }
summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2")
end
end
end
describe "#concatenate_summaries" do
it "combines all the different summaries into a single one" do
messages = ["summary 1", "summary 2"].join("\n")
stub_request(messages, "concatenated summary")
expect(model.concatenate_summaries(["summary 1", "summary 2"])).to eq("concatenated summary")
end
end
describe "#summarize_with_truncation" do
let(:max_tokens) { 9 }
it "truncates the context to meet the token limit" do
opts = content.except(:contents)
stub_request("( 1 asd said : this is", "truncated summary")
expect(model.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary")
end
end
end

View File

@ -1,121 +0,0 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::Summarization::Models::OpenAi do
subject(:model) { described_class.new(model_name, max_tokens: max_tokens) }
let(:model_name) { "gpt-3.5-turbo" }
let(:max_tokens) { 720 }
let(:content) do
{
resource_path: "/t/1/POST_NUMBER",
content_title: "This is a title",
contents: [{ poster: "asd", id: 1, text: "This is a text" }],
}
end
def as_chunk(item)
{ ids: [item[:id]], summary: "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }
end
def expected_messages(contents, opts)
base_prompt = <<~TEXT
You are a summarization bot.
You effectively summarise any text and reply ONLY with ONLY the summarized text.
You condense it into a shorter version.
You understand and generate Discourse forum Markdown.
You format the response, including links, using markdown.
Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)
The discussion title is: #{opts[:content_title]}.
TEXT
messages = [{ role: "system", content: base_prompt }]
text =
contents.reduce("") do |memo, item|
memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
end
messages << {
role: "user",
content:
"Summarize the following in 400 words. Keep the summary in the same language used in the text below.\n#{text}",
}
end
describe "#summarize_in_chunks" do
context "when the content fits in a single chunk" do
it "performs a request to summarize" do
opts = content.except(:contents)
OpenAiCompletionsInferenceStubs.stub_response(
expected_messages(content[:contents], opts),
"This is summary 1",
)
chunks = content[:contents].map { |c| as_chunk(c) }
summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
expect(summarized_chunks).to contain_exactly("This is summary 1")
end
end
context "when the content fits in multiple chunks" do
it "performs a request for each one to summarize" do
content[:contents] << {
poster: "asd2",
id: 2,
text: "This is a different text to summarize",
}
opts = content.except(:contents)
content[:contents].each_with_index do |item, idx|
OpenAiCompletionsInferenceStubs.stub_response(
expected_messages([item], opts),
"This is summary #{idx + 1}",
)
end
chunks = content[:contents].map { |c| as_chunk(c) }
summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2")
end
end
end
describe "#concatenate_summaries" do
it "combines all the different summaries into a single one" do
messages = [
{ role: "system", content: "You are a helpful bot" },
{
role: "user",
content:
"Concatenate these disjoint summaries, creating a cohesive narrative. Keep the summary in the same language used in the text below.\nsummary 1\nsummary 2",
},
]
OpenAiCompletionsInferenceStubs.stub_response(messages, "concatenated summary")
expect(model.concatenate_summaries(["summary 1", "summary 2"])).to eq("concatenated summary")
end
end
describe "#summarize_with_truncation" do
let(:max_tokens) { 709 }
it "truncates the context to meet the token limit" do
opts = content.except(:contents)
truncated_version = expected_messages(content[:contents], opts)
truncated_version.last[
:content
] = "Summarize the following in 400 words. Keep the summary in the same language used in the text below.\n(1 asd said: This is a"
OpenAiCompletionsInferenceStubs.stub_response(truncated_version, "truncated summary")
expect(model.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary")
end
end
end

View File

@ -1,28 +1,35 @@
# frozen_string_literal: true
require_relative "../../../../support/summarization/dummy_completion_model"
RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
describe "#summarize" do
subject(:strategy) { described_class.new(model) }
let(:summarize_text) { "This is a text" }
let(:model) { DummyCompletionModel.new(model_tokens) }
let(:model_tokens) do
# Make sure each content fits in a single chunk.
DiscourseAi::Tokenizer::BertTokenizer.size("(1 asd said: This is a text ") + 3
# 700 is the number of tokens reserved for the prompt.
700 + DiscourseAi::Tokenizer::OpenAiTokenizer.size("(1 asd said: This is a text ") + 3
end
let(:user) { User.new }
let(:model) do
DiscourseAi::Summarization::Models::OpenAi.new("gpt-4", max_tokens: model_tokens)
end
let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } }
let(:single_summary) { "this is a single summary" }
let(:concatenated_summary) { "this is a concatenated summary" }
let(:user) { User.new }
context "when the content to summarize fits in a single call" do
it "does one call to summarize content" do
result = strategy.summarize(content, user)
result =
DiscourseAi::Completions::LLM.with_prepared_responses([single_summary]) do |spy|
strategy.summarize(content, user).tap { expect(spy.completions).to eq(1) }
end
expect(model.summarization_calls).to eq(1)
expect(result[:summary]).to eq(DummyCompletionModel::SINGLE_SUMMARY)
expect(result[:summary]).to eq(single_summary)
end
end
@ -30,10 +37,12 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
it "summarizes each chunk and then concatenates them" do
content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
result = strategy.summarize(content, user)
result =
DiscourseAi::Completions::LLM.with_prepared_responses(
[single_summary, single_summary, concatenated_summary],
) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(3) } }
expect(model.summarization_calls).to eq(3)
expect(result[:summary]).to eq(DummyCompletionModel::CONCATENATED_SUMMARIES)
expect(result[:summary]).to eq(concatenated_summary)
end
end
end

View File

@ -1,28 +0,0 @@
# frozen_string_literal: true
require_relative "../../../../support/summarization/dummy_completion_model"
RSpec.describe DiscourseAi::Summarization::Strategies::TruncateContent do
describe "#summarize" do
subject(:strategy) { described_class.new(model) }
let(:summarize_text) { "This is a text" }
let(:model_tokens) { summarize_text.length }
let(:model) { DummyCompletionModel.new(model_tokens) }
let(:user) { User.new }
let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } }
context "when the content to summarize doesn't fit in a single call" do
it "summarizes a truncated version" do
content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
result = strategy.summarize(content, user)
expect(model.summarization_calls).to eq(1)
expect(result[:summary]).to eq(DummyCompletionModel::SINGLE_SUMMARY)
end
end
end
end

View File

@ -1,46 +0,0 @@
# frozen_string_literal: true
class DummyCompletionModel
SINGLE_SUMMARY = "this is a single summary"
CONCATENATED_SUMMARIES = "this is a concatenated summary"
def initialize(max_tokens)
@summarization_calls = 0
@available_tokens = max_tokens
end
attr_reader :max_length, :summarization_calls, :available_tokens
delegate :can_expand_tokens?, to: :tokenizer
def summarize_single(single_chunk, opts)
@summarization_calls += 1
SINGLE_SUMMARY
end
def summarize_in_chunks(chunks, opts)
chunks.map do |chunk|
chunk[:summary] = SINGLE_SUMMARY
@summarization_calls += 1
chunk
end
end
def concatenate_summaries(summaries)
@summarization_calls += 1
CONCATENATED_SUMMARIES
end
def summarize_with_truncation(_contents, _opts)
@summarization_calls += 1
SINGLE_SUMMARY
end
def format_content_item(item)
"(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
end
def tokenizer
DiscourseAi::Tokenizer::BertTokenizer
end
end