REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297)
* DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction
This commit is contained in:
parent
53b7f031ba
commit
3064d4c288
|
@ -0,0 +1,35 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Completions
|
||||
module Dialects
|
||||
class ChatGPT
|
||||
def self.can_translate?(model_name)
|
||||
%w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k].include?(model_name)
|
||||
end
|
||||
|
||||
def translate(generic_prompt)
|
||||
open_ai_prompt = [
|
||||
{
|
||||
role: "system",
|
||||
content: [generic_prompt[:insts], generic_prompt[:post_insts].to_s].join("\n"),
|
||||
},
|
||||
]
|
||||
|
||||
if generic_prompt[:examples]
|
||||
generic_prompt[:examples].each do |example_pair|
|
||||
open_ai_prompt << { role: "user", content: example_pair.first }
|
||||
open_ai_prompt << { role: "assistant", content: example_pair.second }
|
||||
end
|
||||
end
|
||||
|
||||
open_ai_prompt << { role: "user", content: generic_prompt[:input] }
|
||||
end
|
||||
|
||||
def tokenizer
|
||||
DiscourseAi::Tokenizer::OpenAiTokenizer
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,37 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Completions
|
||||
module Dialects
|
||||
class Claude
|
||||
def self.can_translate?(model_name)
|
||||
%w[claude-instant-1 claude-2].include?(model_name)
|
||||
end
|
||||
|
||||
def translate(generic_prompt)
|
||||
claude_prompt = +"Human: #{generic_prompt[:insts]}\n"
|
||||
|
||||
claude_prompt << build_examples(generic_prompt[:examples]) if generic_prompt[:examples]
|
||||
|
||||
claude_prompt << "#{generic_prompt[:input]}\n"
|
||||
|
||||
claude_prompt << "#{generic_prompt[:post_insts]}\n" if generic_prompt[:post_insts]
|
||||
|
||||
claude_prompt << "Assistant:\n"
|
||||
end
|
||||
|
||||
def tokenizer
|
||||
DiscourseAi::Tokenizer::AnthropicTokenizer
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def build_examples(examples_arr)
|
||||
examples_arr.reduce("") do |memo, example|
|
||||
memo += "<example>\nH: #{example[0]}\nA: #{example[1]}\n</example>\n"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,31 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Completions
|
||||
module Dialects
|
||||
class Llama2Classic
|
||||
def self.can_translate?(model_name)
|
||||
"Llama2-*-chat-hf" == model_name
|
||||
end
|
||||
|
||||
def translate(generic_prompt)
|
||||
llama2_prompt =
|
||||
+"[INST]<<SYS>>#{[generic_prompt[:insts], generic_prompt[:post_insts].to_s].join("\n")}<</SYS>>[/INST]\n"
|
||||
|
||||
if generic_prompt[:examples]
|
||||
generic_prompt[:examples].each do |example_pair|
|
||||
llama2_prompt << "[INST]#{example_pair.first}[/INST]\n"
|
||||
llama2_prompt << "#{example_pair.second}\n"
|
||||
end
|
||||
end
|
||||
|
||||
llama2_prompt << "[INST]#{generic_prompt[:input]}[/INST]\n"
|
||||
end
|
||||
|
||||
def tokenizer
|
||||
DiscourseAi::Tokenizer::Llama2Tokenizer
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,33 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Completions
|
||||
module Dialects
|
||||
class OrcaStyle
|
||||
def self.can_translate?(model_name)
|
||||
%w[StableBeluga2 Upstage-Llama-2-*-instruct-v2].include?(model_name)
|
||||
end
|
||||
|
||||
def translate(generic_prompt)
|
||||
orca_style_prompt =
|
||||
+"### System:\n#{[generic_prompt[:insts], generic_prompt[:post_insts].to_s].join("\n")}\n"
|
||||
|
||||
if generic_prompt[:examples]
|
||||
generic_prompt[:examples].each do |example_pair|
|
||||
orca_style_prompt << "### User:\n#{example_pair.first}\n"
|
||||
orca_style_prompt << "### Assistant:\n#{example_pair.second}\n"
|
||||
end
|
||||
end
|
||||
|
||||
orca_style_prompt << "### User:\n#{generic_prompt[:input]}\n"
|
||||
|
||||
orca_style_prompt << "### Assistant:\n"
|
||||
end
|
||||
|
||||
def tokenizer
|
||||
DiscourseAi::Tokenizer::Llama2Tokenizer
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,52 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Completions
|
||||
module Endpoints
|
||||
class Anthropic < Base
|
||||
def self.can_contact?(model_name)
|
||||
%w[claude-instant-1 claude-2].include?(model_name)
|
||||
end
|
||||
|
||||
def default_options
|
||||
{ max_tokens_to_sample: 2000, model: model }
|
||||
end
|
||||
|
||||
def provider_id
|
||||
AiApiAuditLog::Provider::Anthropic
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def model_uri
|
||||
@uri ||= URI("https://api.anthropic.com/v1/complete")
|
||||
end
|
||||
|
||||
def prepare_payload(prompt, model_params)
|
||||
default_options
|
||||
.merge(model_params)
|
||||
.merge(prompt: prompt)
|
||||
.tap { |payload| payload[:stream] = true if @streaming_mode }
|
||||
end
|
||||
|
||||
def prepare_request(payload)
|
||||
headers = {
|
||||
"anthropic-version" => "2023-06-01",
|
||||
"x-api-key" => SiteSetting.ai_anthropic_api_key,
|
||||
"content-type" => "application/json",
|
||||
}
|
||||
|
||||
Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload }
|
||||
end
|
||||
|
||||
def extract_completion_from(response_raw)
|
||||
JSON.parse(response_raw, symbolize_names: true)[:completion].to_s
|
||||
end
|
||||
|
||||
def partials_from(decoded_chunk)
|
||||
decoded_chunk.split("\n").map { |line| line.split("data: ", 2)[1] }.compact
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,86 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Completions
|
||||
module Endpoints
|
||||
class AwsBedrock < Base
|
||||
def self.can_contact?(model_name)
|
||||
SiteSetting.ai_bedrock_access_key_id.present? &&
|
||||
SiteSetting.ai_bedrock_secret_access_key.present? &&
|
||||
SiteSetting.ai_bedrock_region.present?
|
||||
end
|
||||
|
||||
def default_options
|
||||
{ max_tokens_to_sample: 20_000 }
|
||||
end
|
||||
|
||||
def provider_id
|
||||
AiApiAuditLog::Provider::Anthropic
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def model_uri
|
||||
api_url =
|
||||
"https://bedrock-runtime.#{SiteSetting.ai_bedrock_region}.amazonaws.com/model/anthropic.#{model}/invoke"
|
||||
|
||||
api_url = @streaming_mode ? (api_url + "-with-response-stream") : api_url
|
||||
|
||||
URI(api_url)
|
||||
end
|
||||
|
||||
def prepare_payload(prompt, model_params)
|
||||
default_options.merge(prompt: prompt).merge(model_params)
|
||||
end
|
||||
|
||||
def prepare_request(payload)
|
||||
headers = { "content-type" => "application/json", "Accept" => "*/*" }
|
||||
|
||||
signer =
|
||||
Aws::Sigv4::Signer.new(
|
||||
access_key_id: SiteSetting.ai_bedrock_access_key_id,
|
||||
region: SiteSetting.ai_bedrock_region,
|
||||
secret_access_key: SiteSetting.ai_bedrock_secret_access_key,
|
||||
service: "bedrock",
|
||||
)
|
||||
|
||||
Net::HTTP::Post
|
||||
.new(model_uri, headers)
|
||||
.tap do |r|
|
||||
r.body = payload
|
||||
|
||||
signed_request =
|
||||
signer.sign_request(req: r, http_method: r.method, url: model_uri, body: r.body)
|
||||
|
||||
r.initialize_http_header(headers.merge(signed_request.headers))
|
||||
end
|
||||
end
|
||||
|
||||
def decode(chunk)
|
||||
Aws::EventStream::Decoder
|
||||
.new
|
||||
.decode_chunk(chunk)
|
||||
.first
|
||||
.payload
|
||||
.string
|
||||
.then { JSON.parse(_1) }
|
||||
.dig("bytes")
|
||||
.then { Base64.decode64(_1) }
|
||||
rescue JSON::ParserError,
|
||||
Aws::EventStream::Errors::MessageChecksumError,
|
||||
Aws::EventStream::Errors::PreludeChecksumError => e
|
||||
Rails.logger.error("#{self.class.name}: #{e.message}")
|
||||
nil
|
||||
end
|
||||
|
||||
def extract_completion_from(response_raw)
|
||||
JSON.parse(response_raw, symbolize_names: true)[:completion].to_s
|
||||
end
|
||||
|
||||
def partials_from(decoded_chunk)
|
||||
[decoded_chunk]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,167 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Completions
|
||||
module Endpoints
|
||||
class Base
|
||||
CompletionFailed = Class.new(StandardError)
|
||||
TIMEOUT = 60
|
||||
|
||||
def self.endpoint_for(model_name)
|
||||
# Order is important.
|
||||
# Bedrock has priority over Anthropic if creadentials are present.
|
||||
[
|
||||
DiscourseAi::Completions::Endpoints::AwsBedrock,
|
||||
DiscourseAi::Completions::Endpoints::Anthropic,
|
||||
DiscourseAi::Completions::Endpoints::OpenAI,
|
||||
DiscourseAi::Completions::Endpoints::Huggingface,
|
||||
].detect(-> { raise DiscourseAi::Completions::LLM::UNKNOWN_MODEL }) do |ek|
|
||||
ek.can_contact?(model_name)
|
||||
end
|
||||
end
|
||||
|
||||
def self.can_contact?(_model_name)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def initialize(model_name, tokenizer)
|
||||
@model = model_name
|
||||
@tokenizer = tokenizer
|
||||
end
|
||||
|
||||
def perform_completion!(prompt, user, model_params = {})
|
||||
@streaming_mode = block_given?
|
||||
|
||||
Net::HTTP.start(
|
||||
model_uri.host,
|
||||
model_uri.port,
|
||||
use_ssl: true,
|
||||
read_timeout: TIMEOUT,
|
||||
open_timeout: TIMEOUT,
|
||||
write_timeout: TIMEOUT,
|
||||
) do |http|
|
||||
response_data = +""
|
||||
response_raw = +""
|
||||
request_body = prepare_payload(prompt, model_params).to_json
|
||||
|
||||
request = prepare_request(request_body)
|
||||
|
||||
http.request(request) do |response|
|
||||
if response.code.to_i != 200
|
||||
Rails.logger.error(
|
||||
"#{self.class.name}: status: #{response.code.to_i} - body: #{response.body}",
|
||||
)
|
||||
raise CompletionFailed
|
||||
end
|
||||
|
||||
log =
|
||||
AiApiAuditLog.new(
|
||||
provider_id: provider_id,
|
||||
user_id: user.id,
|
||||
raw_request_payload: request_body,
|
||||
request_tokens: prompt_size(prompt),
|
||||
)
|
||||
|
||||
if !@streaming_mode
|
||||
response_raw = response.read_body
|
||||
response_data = extract_completion_from(response_raw)
|
||||
|
||||
return response_data
|
||||
end
|
||||
|
||||
begin
|
||||
cancelled = false
|
||||
cancel = lambda { cancelled = true }
|
||||
|
||||
leftover = ""
|
||||
|
||||
response.read_body do |chunk|
|
||||
if cancelled
|
||||
http.finish
|
||||
return
|
||||
end
|
||||
|
||||
decoded_chunk = decode(chunk)
|
||||
response_raw << decoded_chunk
|
||||
|
||||
partials_from(leftover + decoded_chunk).each do |raw_partial|
|
||||
next if cancelled
|
||||
next if raw_partial.blank?
|
||||
|
||||
begin
|
||||
partial = extract_completion_from(raw_partial)
|
||||
leftover = ""
|
||||
response_data << partial
|
||||
|
||||
yield partial, cancel if partial
|
||||
rescue JSON::ParserError
|
||||
leftover = raw_partial
|
||||
end
|
||||
end
|
||||
end
|
||||
rescue IOError, StandardError
|
||||
raise if !cancelled
|
||||
end
|
||||
|
||||
return response_data
|
||||
ensure
|
||||
log.raw_response_payload = response_raw
|
||||
log.response_tokens = tokenizer.size(response_data)
|
||||
log.save!
|
||||
|
||||
if Rails.env.development? && log
|
||||
puts "#{self.class.name}: request_tokens #{log.request_tokens} response_tokens #{log.response_tokens}"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def default_options
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def provider_id
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def prompt_size(prompt)
|
||||
tokenizer.size(extract_prompt_for_tokenizer(prompt))
|
||||
end
|
||||
|
||||
attr_reader :tokenizer
|
||||
|
||||
protected
|
||||
|
||||
attr_reader :model
|
||||
|
||||
def model_uri
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def prepare_payload(_prompt, _model_params)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def prepare_request(_payload)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def extract_completion_from(_response_raw)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def decode(chunk)
|
||||
chunk
|
||||
end
|
||||
|
||||
def partials_from(_decoded_chunk)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def extract_prompt_for_tokenizer(prompt)
|
||||
prompt
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,47 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Completions
|
||||
module Endpoints
|
||||
class CannedResponse
|
||||
CANNED_RESPONSE_ERROR = Class.new(StandardError)
|
||||
|
||||
def self.can_contact?(_)
|
||||
Rails.env.test?
|
||||
end
|
||||
|
||||
def initialize(responses)
|
||||
@responses = responses
|
||||
@completions = 0
|
||||
end
|
||||
|
||||
attr_reader :responses, :completions
|
||||
|
||||
def perform_completion!(_prompt, _user, _model_params)
|
||||
response = responses[completions]
|
||||
if response.nil?
|
||||
raise CANNED_RESPONSE_ERROR,
|
||||
"The number of completions you requested exceed the number of canned responses"
|
||||
end
|
||||
|
||||
@completions += 1
|
||||
if block_given?
|
||||
cancelled = false
|
||||
cancel_fn = lambda { cancelled = true }
|
||||
|
||||
response.each_char do |char|
|
||||
break if cancelled
|
||||
yield(char, cancel_fn)
|
||||
end
|
||||
else
|
||||
response
|
||||
end
|
||||
end
|
||||
|
||||
def tokenizer
|
||||
DiscourseAi::Tokenizer::OpenAiTokenizer
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,75 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Completions
|
||||
module Endpoints
|
||||
class Huggingface < Base
|
||||
def self.can_contact?(model_name)
|
||||
%w[StableBeluga2 Upstage-Llama-2-*-instruct-v2 Llama2-*-chat-hf].include?(model_name)
|
||||
end
|
||||
|
||||
def default_options
|
||||
{ parameters: { repetition_penalty: 1.1, temperature: 0.7 } }
|
||||
end
|
||||
|
||||
def provider_id
|
||||
AiApiAuditLog::Provider::HuggingFaceTextGeneration
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def model_uri
|
||||
URI(SiteSetting.ai_hugging_face_api_url).tap do |uri|
|
||||
uri.path = @streaming_mode ? "/generate_stream" : "/generate"
|
||||
end
|
||||
end
|
||||
|
||||
def prepare_payload(prompt, model_params)
|
||||
default_options
|
||||
.merge(inputs: prompt)
|
||||
.tap do |payload|
|
||||
payload[:parameters].merge!(model_params)
|
||||
|
||||
token_limit = 2_000 || SiteSetting.ai_hugging_face_token_limit
|
||||
|
||||
payload[:parameters][:max_new_tokens] = token_limit - prompt_size(prompt)
|
||||
end
|
||||
end
|
||||
|
||||
def prepare_request(payload)
|
||||
headers =
|
||||
{ "Content-Type" => "application/json" }.tap do |h|
|
||||
if SiteSetting.ai_hugging_face_api_key.present?
|
||||
h["Authorization"] = "Bearer #{SiteSetting.ai_hugging_face_api_key}"
|
||||
end
|
||||
end
|
||||
|
||||
Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload }
|
||||
end
|
||||
|
||||
def extract_completion_from(response_raw)
|
||||
parsed = JSON.parse(response_raw, symbolize_names: true)
|
||||
|
||||
if @streaming_mode
|
||||
# Last chunk contains full response, which we already yielded.
|
||||
return if parsed.dig(:token, :special)
|
||||
|
||||
parsed.dig(:token, :text).to_s
|
||||
else
|
||||
parsed[:generated_text].to_s
|
||||
end
|
||||
end
|
||||
|
||||
def partials_from(decoded_chunk)
|
||||
decoded_chunk
|
||||
.split("\n")
|
||||
.map do |line|
|
||||
data = line.split("data: ", 2)[1]
|
||||
data&.squish == "[DONE]" ? nil : data
|
||||
end
|
||||
.compact
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,92 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Completions
|
||||
module Endpoints
|
||||
class OpenAI < Base
|
||||
def self.can_contact?(model_name)
|
||||
%w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k].include?(model_name)
|
||||
end
|
||||
|
||||
def default_options
|
||||
{ model: model }
|
||||
end
|
||||
|
||||
def provider_id
|
||||
AiApiAuditLog::Provider::OpenAI
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def model_uri
|
||||
url =
|
||||
if model.include?("gpt-4")
|
||||
if model.include?("32k")
|
||||
SiteSetting.ai_openai_gpt4_32k_url
|
||||
else
|
||||
SiteSetting.ai_openai_gpt4_url
|
||||
end
|
||||
else
|
||||
if model.include?("16k")
|
||||
SiteSetting.ai_openai_gpt35_16k_url
|
||||
else
|
||||
SiteSetting.ai_openai_gpt35_url
|
||||
end
|
||||
end
|
||||
|
||||
URI(url)
|
||||
end
|
||||
|
||||
def prepare_payload(prompt, model_params)
|
||||
default_options
|
||||
.merge(model_params)
|
||||
.merge(messages: prompt)
|
||||
.tap { |payload| payload[:stream] = true if @streaming_mode }
|
||||
end
|
||||
|
||||
def prepare_request(payload)
|
||||
headers =
|
||||
{ "Content-Type" => "application/json" }.tap do |h|
|
||||
if model_uri.host.include?("azure")
|
||||
h["api-key"] = SiteSetting.ai_openai_api_key
|
||||
else
|
||||
h["Authorization"] = "Bearer #{SiteSetting.ai_openai_api_key}"
|
||||
end
|
||||
|
||||
if SiteSetting.ai_openai_organization.present?
|
||||
h["OpenAI-Organization"] = SiteSetting.ai_openai_organization
|
||||
end
|
||||
end
|
||||
|
||||
Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload }
|
||||
end
|
||||
|
||||
def extract_completion_from(response_raw)
|
||||
parsed = JSON.parse(response_raw, symbolize_names: true)
|
||||
|
||||
(
|
||||
if @streaming_mode
|
||||
parsed.dig(:choices, 0, :delta, :content)
|
||||
else
|
||||
parsed.dig(:choices, 0, :message, :content)
|
||||
end
|
||||
).to_s
|
||||
end
|
||||
|
||||
def partials_from(decoded_chunk)
|
||||
decoded_chunk
|
||||
.split("\n")
|
||||
.map do |line|
|
||||
data = line.split("data: ", 2)[1]
|
||||
data == "[DONE]" ? nil : data
|
||||
end
|
||||
.compact
|
||||
end
|
||||
|
||||
def extract_prompt_for_tokenizer(prompt)
|
||||
prompt.map { |message| message[:content] || message["content"] || "" }.join("\n")
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,26 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Completions
|
||||
class EntryPoint
|
||||
def load_files
|
||||
require_relative "dialects/chat_gpt"
|
||||
require_relative "dialects/llama2_classic"
|
||||
require_relative "dialects/orca_style"
|
||||
require_relative "dialects/claude"
|
||||
|
||||
require_relative "endpoints/canned_response"
|
||||
require_relative "endpoints/base"
|
||||
require_relative "endpoints/anthropic"
|
||||
require_relative "endpoints/aws_bedrock"
|
||||
require_relative "endpoints/open_ai"
|
||||
require_relative "endpoints/hugging_face"
|
||||
|
||||
require_relative "llm"
|
||||
end
|
||||
|
||||
def inject_into(_)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,81 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
# A facade that abstracts multiple LLMs behind a single interface.
|
||||
#
|
||||
# Internally, it consists of the combination of a dialect and an endpoint.
|
||||
# After recieving a prompt using our generic format, it translates it to
|
||||
# the target model and routes the completion request through the correct gateway.
|
||||
#
|
||||
# Use the .proxy method to instantiate an object.
|
||||
# It chooses the best dialect and endpoint for the model you want to interact with.
|
||||
#
|
||||
# Tests of modules that perform LLM calls can use .with_prepared_responses to return canned responses
|
||||
# instead of relying on WebMock stubs like we did in the past.
|
||||
#
|
||||
module DiscourseAi
|
||||
module Completions
|
||||
class LLM
|
||||
UNKNOWN_MODEL = Class.new(StandardError)
|
||||
|
||||
def self.with_prepared_responses(responses)
|
||||
@canned_response = DiscourseAi::Completions::Endpoints::CannedResponse.new(responses)
|
||||
|
||||
yield(@canned_response).tap { @canned_response = nil }
|
||||
end
|
||||
|
||||
def self.proxy(model_name)
|
||||
dialects = [
|
||||
DiscourseAi::Completions::Dialects::Claude,
|
||||
DiscourseAi::Completions::Dialects::Llama2Classic,
|
||||
DiscourseAi::Completions::Dialects::ChatGPT,
|
||||
DiscourseAi::Completions::Dialects::OrcaStyle,
|
||||
]
|
||||
|
||||
dialect =
|
||||
dialects.detect(-> { raise UNKNOWN_MODEL }) { |d| d.can_translate?(model_name) }.new
|
||||
|
||||
return new(dialect, @canned_response, model_name) if @canned_response
|
||||
|
||||
gateway =
|
||||
DiscourseAi::Completions::Endpoints::Base.endpoint_for(model_name).new(
|
||||
model_name,
|
||||
dialect.tokenizer,
|
||||
)
|
||||
|
||||
new(dialect, gateway, model_name)
|
||||
end
|
||||
|
||||
def initialize(dialect, gateway, model_name)
|
||||
@dialect = dialect
|
||||
@gateway = gateway
|
||||
@model_name = model_name
|
||||
end
|
||||
|
||||
delegate :tokenizer, to: :dialect
|
||||
|
||||
# @param generic_prompt { Hash } - Prompt using our generic format.
|
||||
# We use the following keys from the hash:
|
||||
# - insts: String with instructions for the LLM.
|
||||
# - input: String containing user input
|
||||
# - examples (optional): Array of arrays with examples of input and responses. Each array is a input/response pair like [[example1, response1], [example2, response2]].
|
||||
# - post_insts (optional): Additional instructions for the LLM. Some dialects like Claude add these at the end of the prompt.
|
||||
#
|
||||
# @param user { User } - User requesting the summary.
|
||||
#
|
||||
# @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
|
||||
#
|
||||
# @returns { String } - Completion result.
|
||||
def completion!(generic_prompt, user, &partial_read_blk)
|
||||
prompt = dialect.translate(generic_prompt)
|
||||
|
||||
model_params = generic_prompt.dig(:params, model_name) || {}
|
||||
|
||||
gateway.perform_completion!(prompt, user, model_params, &partial_read_blk)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
attr_reader :dialect, :gateway, :model_name
|
||||
end
|
||||
end
|
||||
end
|
|
@ -15,11 +15,6 @@ module DiscourseAi
|
|||
require_relative "semantic_related"
|
||||
require_relative "semantic_topic_query"
|
||||
|
||||
require_relative "hyde_generators/base"
|
||||
require_relative "hyde_generators/openai"
|
||||
require_relative "hyde_generators/anthropic"
|
||||
require_relative "hyde_generators/llama2"
|
||||
require_relative "hyde_generators/llama2_ftos"
|
||||
require_relative "semantic_search"
|
||||
end
|
||||
|
||||
|
|
|
@ -1,37 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Embeddings
|
||||
module HydeGenerators
|
||||
class Anthropic < DiscourseAi::Embeddings::HydeGenerators::Base
|
||||
def prompt(search_term)
|
||||
<<~TEXT
|
||||
Human: Given a search term given between <input> tags, generate a forum post about a given subject.
|
||||
#{basic_prompt_instruction}
|
||||
<input>#{search_term}</input>
|
||||
|
||||
Respond with the generated post between <ai> tags.
|
||||
|
||||
Assistant:\n
|
||||
TEXT
|
||||
end
|
||||
|
||||
def models
|
||||
%w[claude-instant-1 claude-2]
|
||||
end
|
||||
|
||||
def hypothetical_post_from(query)
|
||||
response =
|
||||
::DiscourseAi::Inference::AnthropicCompletions.perform!(
|
||||
prompt(query),
|
||||
SiteSetting.ai_embeddings_semantic_search_hyde_model,
|
||||
max_tokens: 400,
|
||||
stop_sequences: ["</ai>"],
|
||||
).dig(:completion)
|
||||
|
||||
Nokogiri::HTML5.fragment(response).at("ai").text
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,28 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Embeddings
|
||||
module HydeGenerators
|
||||
class Base
|
||||
def self.current_hyde_model
|
||||
DiscourseAi::Embeddings::HydeGenerators::Base.descendants.find do |generator_klass|
|
||||
generator_klass.new.models.include?(
|
||||
SiteSetting.ai_embeddings_semantic_search_hyde_model,
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
def basic_prompt_instruction
|
||||
<<~TEXT
|
||||
Act as a content writer for a forum.
|
||||
The forum description is as follows:
|
||||
#{SiteSetting.title}
|
||||
#{SiteSetting.site_description}
|
||||
|
||||
Given the forum description write a forum post about the following subject:
|
||||
TEXT
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,35 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Embeddings
|
||||
module HydeGenerators
|
||||
class Llama2 < DiscourseAi::Embeddings::HydeGenerators::Base
|
||||
def prompt(search_term)
|
||||
<<~TEXT
|
||||
[INST] <<SYS>>
|
||||
You are a helpful bot
|
||||
You create forum posts about a given subject
|
||||
<</SYS>>
|
||||
|
||||
#{basic_prompt_instruction}
|
||||
#{search_term}
|
||||
[/INST]
|
||||
Here is a forum post about the above subject:
|
||||
TEXT
|
||||
end
|
||||
|
||||
def models
|
||||
["Llama2-*-chat-hf"]
|
||||
end
|
||||
|
||||
def hypothetical_post_from(query)
|
||||
::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(
|
||||
prompt(query),
|
||||
SiteSetting.ai_embeddings_semantic_search_hyde_model,
|
||||
token_limit: 400,
|
||||
).dig(:generated_text)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,28 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Embeddings
|
||||
module HydeGenerators
|
||||
class Llama2Ftos < DiscourseAi::Embeddings::HydeGenerators::Llama2
|
||||
def prompt(search_term)
|
||||
<<~TEXT
|
||||
### System:
|
||||
You are a helpful bot
|
||||
You create forum posts about a given subject
|
||||
|
||||
### User:
|
||||
#{basic_prompt_instruction}
|
||||
#{search_term}
|
||||
|
||||
### Assistant:
|
||||
Here is a forum post about the above subject:
|
||||
TEXT
|
||||
end
|
||||
|
||||
def models
|
||||
%w[StableBeluga2 Upstage-Llama-2-*-instruct-v2]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,31 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Embeddings
|
||||
module HydeGenerators
|
||||
class OpenAi < DiscourseAi::Embeddings::HydeGenerators::Base
|
||||
def prompt(search_term)
|
||||
[
|
||||
{
|
||||
role: "system",
|
||||
content: "You are a helpful bot. You create forum posts about a given subject.",
|
||||
},
|
||||
{ role: "user", content: "#{basic_prompt_instruction}\n#{search_term}" },
|
||||
]
|
||||
end
|
||||
|
||||
def models
|
||||
%w[gpt-3.5-turbo gpt-4]
|
||||
end
|
||||
|
||||
def hypothetical_post_from(query)
|
||||
::DiscourseAi::Inference::OpenAiCompletions.perform!(
|
||||
prompt(query),
|
||||
SiteSetting.ai_embeddings_semantic_search_hyde_model,
|
||||
max_tokens: 400,
|
||||
).dig(:choices, 0, :message, :content)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -55,10 +55,7 @@ module DiscourseAi
|
|||
hypothetical_post =
|
||||
Discourse
|
||||
.cache
|
||||
.fetch(hyde_key, expires_in: 1.week) do
|
||||
hyde_generator = DiscourseAi::Embeddings::HydeGenerators::Base.current_hyde_model.new
|
||||
hyde_generator.hypothetical_post_from(search_term)
|
||||
end
|
||||
.fetch(hyde_key, expires_in: 1.week) { hypothetical_post_from(search_term) }
|
||||
|
||||
hypothetical_post_embedding =
|
||||
Discourse
|
||||
|
@ -96,6 +93,30 @@ module DiscourseAi
|
|||
def build_embedding_key(digest, hyde_model, embedding_model)
|
||||
"#{build_hyde_key(digest, hyde_model)}-#{embedding_model}"
|
||||
end
|
||||
|
||||
def hypothetical_post_from(search_term)
|
||||
prompt = {
|
||||
insts: <<~TEXT,
|
||||
You are a content creator for a forum. The forum description is as follows:
|
||||
#{SiteSetting.title}
|
||||
#{SiteSetting.site_description}
|
||||
Given the forum description write a forum post about the following subject:
|
||||
TEXT
|
||||
input: <<~TEXT,
|
||||
Using this description, write a forum post about the subject inside the <input></input> XML tags:
|
||||
|
||||
<input>#{search_term}</input>
|
||||
TEXT
|
||||
post_insts: "Put the forum post between <ai></ai> tags.",
|
||||
}
|
||||
|
||||
llm_response =
|
||||
DiscourseAi::Completions::LLM.proxy(
|
||||
SiteSetting.ai_embeddings_semantic_search_hyde_model,
|
||||
).completion!(prompt, @guardian.user)
|
||||
|
||||
Nokogiri::HTML5.fragment(llm_response).at("ai").text
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -21,7 +21,6 @@ module DiscourseAi
|
|||
Models::OpenAi.new("gpt-4-32k", max_tokens: 32_768),
|
||||
Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096),
|
||||
Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384),
|
||||
Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384),
|
||||
Models::Anthropic.new("claude-2", max_tokens: 100_000),
|
||||
Models::Anthropic.new("claude-instant-1", max_tokens: 100_000),
|
||||
Models::Llama2.new("Llama2-chat-hf", max_tokens: SiteSetting.ai_hugging_face_token_limit),
|
||||
|
@ -36,6 +35,7 @@ module DiscourseAi
|
|||
end
|
||||
|
||||
truncable_models = [
|
||||
Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384),
|
||||
Models::Discourse.new("bart-large-cnn-samsum", max_tokens: 1024),
|
||||
Models::Discourse.new("flan-t5-base-samsum", max_tokens: 512),
|
||||
]
|
||||
|
|
|
@ -19,109 +19,6 @@ module DiscourseAi
|
|||
setting: "ai_anthropic_api_key",
|
||||
)
|
||||
end
|
||||
|
||||
def concatenate_summaries(summaries, &on_partial_blk)
|
||||
instructions = <<~TEXT
|
||||
Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative.
|
||||
Include only the summary inside <ai> tags.
|
||||
TEXT
|
||||
|
||||
instructions += summaries.reduce("") { |m, s| m += "<input>#{s}</input>\n" }
|
||||
instructions += "Assistant:\n"
|
||||
|
||||
completion(instructions, &on_partial_blk)
|
||||
end
|
||||
|
||||
def summarize_with_truncation(contents, opts, &on_partial_blk)
|
||||
instructions = build_base_prompt(opts)
|
||||
|
||||
text_to_summarize = contents.map { |c| format_content_item(c) }.join
|
||||
truncated_content = tokenizer.truncate(text_to_summarize, available_tokens)
|
||||
|
||||
instructions += "<input>#{truncated_content}</input>\nAssistant:\n"
|
||||
|
||||
completion(instructions, &on_partial_blk)
|
||||
end
|
||||
|
||||
def summarize_single(chunk_text, opts, &on_partial_blk)
|
||||
summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def summarize_chunk(chunk_text, opts, &on_partial_blk)
|
||||
completion(
|
||||
build_base_prompt(opts) + "<input>#{chunk_text}</input>\nAssistant:\n",
|
||||
&on_partial_blk
|
||||
)
|
||||
end
|
||||
|
||||
def build_base_prompt(opts)
|
||||
initial_instruction =
|
||||
if opts[:single_chunk]
|
||||
"Summarize the following forum discussion inside the given <input> tag, creating a cohesive narrative."
|
||||
else
|
||||
"Summarize the following forum discussion inside the given <input> tag."
|
||||
end
|
||||
|
||||
base_prompt = <<~TEXT
|
||||
Human: #{initial_instruction}
|
||||
Try to keep the summary in the same language as the forum discussion.
|
||||
Format the response, including links, using markdown.
|
||||
TEXT
|
||||
|
||||
base_prompt += <<~TEXT if opts[:resource_path]
|
||||
Try generating links as well the format is #{opts[:resource_path]}/POST_ID
|
||||
For example, a link to the 3rd post in the topic would be [post 3](#{opts[:resource_path]}/3)
|
||||
TEXT
|
||||
|
||||
base_prompt += "Wrap the whole the summary inside <ai> tags.\n"
|
||||
|
||||
base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[
|
||||
:content_title
|
||||
]
|
||||
|
||||
base_prompt += "Don't use more than 400 words.\n" unless opts[:single_chunk]
|
||||
|
||||
base_prompt
|
||||
end
|
||||
|
||||
def completion(prompt, &on_partial_blk)
|
||||
# We need to discard any text that might come before the <ai> tag.
|
||||
# Instructing the model to reply only with the summary seems impossible.
|
||||
pre_tag_partial = +""
|
||||
|
||||
if on_partial_blk
|
||||
on_partial_read =
|
||||
Proc.new do |partial|
|
||||
if pre_tag_partial.include?("<ai>")
|
||||
on_partial_blk.call(partial[:completion])
|
||||
else
|
||||
pre_tag_partial << partial[:completion]
|
||||
end
|
||||
end
|
||||
|
||||
response =
|
||||
::DiscourseAi::Inference::AnthropicCompletions.perform!(
|
||||
prompt,
|
||||
model,
|
||||
&on_partial_read
|
||||
)
|
||||
else
|
||||
response =
|
||||
::DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, model).dig(
|
||||
:completion,
|
||||
)
|
||||
end
|
||||
|
||||
Nokogiri::HTML5.fragment(response).at("ai")&.text.presence || response
|
||||
end
|
||||
|
||||
def tokenizer
|
||||
DiscourseAi::Tokenizer::AnthropicTokenizer
|
||||
end
|
||||
|
||||
attr_reader :max_tokens
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -21,29 +21,6 @@ module DiscourseAi
|
|||
raise NotImplemented
|
||||
end
|
||||
|
||||
def summarize_in_chunks(chunks, opts)
|
||||
chunks.map do |chunk|
|
||||
chunk[:summary] = summarize_chunk(chunk[:summary], opts)
|
||||
chunk
|
||||
end
|
||||
end
|
||||
|
||||
def concatenate_summaries(_summaries)
|
||||
raise NotImplemented
|
||||
end
|
||||
|
||||
def summarize_with_truncation(_contents, _opts)
|
||||
raise NotImplemented
|
||||
end
|
||||
|
||||
def summarize_single(chunk_text, opts)
|
||||
raise NotImplemented
|
||||
end
|
||||
|
||||
def format_content_item(item)
|
||||
"(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
|
||||
end
|
||||
|
||||
def available_tokens
|
||||
max_tokens - reserved_tokens
|
||||
end
|
||||
|
@ -57,16 +34,6 @@ module DiscourseAi
|
|||
# ~500 words
|
||||
700
|
||||
end
|
||||
|
||||
def summarize_chunk(_chunk_text, _opts)
|
||||
raise NotImplemented
|
||||
end
|
||||
|
||||
def tokenizer
|
||||
raise NotImplemented
|
||||
end
|
||||
|
||||
delegate :can_expand_tokens?, to: :tokenizer
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -22,44 +22,11 @@ module DiscourseAi
|
|||
)
|
||||
end
|
||||
|
||||
def concatenate_summaries(summaries)
|
||||
completion(summaries.join("\n"))
|
||||
end
|
||||
|
||||
def summarize_with_truncation(contents, opts)
|
||||
text_to_summarize = contents.map { |c| format_content_item(c) }.join
|
||||
truncated_content =
|
||||
::DiscourseAi::Tokenizer::BertTokenizer.truncate(text_to_summarize, available_tokens)
|
||||
|
||||
completion(truncated_content)
|
||||
end
|
||||
|
||||
def summarize_single(chunk_text, _opts)
|
||||
completion(chunk_text)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def summarize_chunk(chunk_text, _opts)
|
||||
completion(chunk_text)
|
||||
end
|
||||
|
||||
def reserved_tokens
|
||||
0
|
||||
end
|
||||
|
||||
def completion(prompt)
|
||||
::DiscourseAi::Inference::DiscourseClassifier.perform!(
|
||||
"#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
|
||||
model,
|
||||
prompt,
|
||||
SiteSetting.ai_summarization_discourse_service_api_key,
|
||||
).dig(:summary_text)
|
||||
end
|
||||
|
||||
def tokenizer
|
||||
DiscourseAi::Tokenizer::BertTokenizer
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -19,104 +19,6 @@ module DiscourseAi
|
|||
setting: "ai_hugging_face_api_url",
|
||||
)
|
||||
end
|
||||
|
||||
def concatenate_summaries(summaries, &on_partial_blk)
|
||||
prompt = <<~TEXT
|
||||
[INST] <<SYS>>
|
||||
You are a helpful bot
|
||||
<</SYS>>
|
||||
|
||||
Concatenate these disjoint summaries, creating a cohesive narrative:
|
||||
#{summaries.join("\n")} [/INST]
|
||||
TEXT
|
||||
|
||||
completion(prompt, &on_partial_blk)
|
||||
end
|
||||
|
||||
def summarize_with_truncation(contents, opts, &on_partial_blk)
|
||||
text_to_summarize = contents.map { |c| format_content_item(c) }.join
|
||||
truncated_content = tokenizer.truncate(text_to_summarize, available_tokens)
|
||||
|
||||
prompt = <<~TEXT
|
||||
[INST] <<SYS>>
|
||||
#{build_base_prompt(opts)}
|
||||
<</SYS>>
|
||||
|
||||
Summarize the following in up to 400 words:
|
||||
#{truncated_content} [/INST]
|
||||
Here is a summary of the above topic:
|
||||
TEXT
|
||||
|
||||
completion(prompt, &on_partial_blk)
|
||||
end
|
||||
|
||||
def summarize_single(chunk_text, opts, &on_partial_blk)
|
||||
summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def summarize_chunk(chunk_text, opts, &on_partial_blk)
|
||||
summary_instruction =
|
||||
if opts[:single_chunk]
|
||||
"Summarize the following forum discussion, creating a cohesive narrative:"
|
||||
else
|
||||
"Summarize the following in up to 400 words:"
|
||||
end
|
||||
|
||||
prompt = <<~TEXT
|
||||
[INST] <<SYS>>
|
||||
#{build_base_prompt(opts)}
|
||||
<</SYS>>
|
||||
|
||||
#{summary_instruction}
|
||||
#{chunk_text} [/INST]
|
||||
Here is a summary of the above topic:
|
||||
TEXT
|
||||
|
||||
completion(prompt, &on_partial_blk)
|
||||
end
|
||||
|
||||
def build_base_prompt(opts)
|
||||
base_prompt = <<~TEXT
|
||||
You are a summarization bot.
|
||||
You effectively summarise any text and reply ONLY with ONLY the summarized text.
|
||||
You condense it into a shorter version.
|
||||
You understand and generate Discourse forum Markdown.
|
||||
TEXT
|
||||
|
||||
if opts[:resource_path]
|
||||
base_prompt +=
|
||||
"Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)\n"
|
||||
end
|
||||
|
||||
base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[
|
||||
:content_title
|
||||
]
|
||||
|
||||
base_prompt
|
||||
end
|
||||
|
||||
def completion(prompt, &on_partial_blk)
|
||||
if on_partial_blk
|
||||
on_partial_read =
|
||||
Proc.new { |partial| on_partial_blk.call(partial.dig(:token, :text).to_s) }
|
||||
|
||||
::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(
|
||||
prompt,
|
||||
model,
|
||||
&on_partial_read
|
||||
)
|
||||
else
|
||||
::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(prompt, model).dig(
|
||||
:generated_text,
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
def tokenizer
|
||||
DiscourseAi::Tokenizer::Llama2Tokenizer
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -7,65 +7,6 @@ module DiscourseAi
|
|||
def display_name
|
||||
"Llama2FineTunedOrcaStyle's #{SiteSetting.ai_hugging_face_model_display_name.presence || model}"
|
||||
end
|
||||
|
||||
def concatenate_summaries(summaries, &on_partial_blk)
|
||||
prompt = <<~TEXT
|
||||
### System:
|
||||
You are a helpful bot
|
||||
|
||||
### User:
|
||||
Concatenate these disjoint summaries, creating a cohesive narrative:
|
||||
#{summaries.join("\n")}
|
||||
|
||||
### Assistant:
|
||||
TEXT
|
||||
|
||||
completion(prompt, &on_partial_blk)
|
||||
end
|
||||
|
||||
def summarize_with_truncation(contents, opts, &on_partial_blk)
|
||||
text_to_summarize = contents.map { |c| format_content_item(c) }.join
|
||||
truncated_content = tokenizer.truncate(text_to_summarize, available_tokens)
|
||||
|
||||
prompt = <<~TEXT
|
||||
### System:
|
||||
#{build_base_prompt(opts)}
|
||||
|
||||
### User:
|
||||
Summarize the following in up to 400 words:
|
||||
#{truncated_content}
|
||||
|
||||
### Assistant:
|
||||
Here is a summary of the above topic:
|
||||
TEXT
|
||||
|
||||
completion(prompt, &on_partial_blk)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def summarize_chunk(chunk_text, opts, &on_partial_blk)
|
||||
summary_instruction =
|
||||
if opts[:single_chunk]
|
||||
"Summarize the following forum discussion, creating a cohesive narrative:"
|
||||
else
|
||||
"Summarize the following in up to 400 words:"
|
||||
end
|
||||
|
||||
prompt = <<~TEXT
|
||||
### System:
|
||||
#{build_base_prompt(opts)}
|
||||
|
||||
### User:
|
||||
#{summary_instruction}
|
||||
#{chunk_text}
|
||||
|
||||
### Assistant:
|
||||
Here is a summary of the above topic:
|
||||
TEXT
|
||||
|
||||
completion(prompt, &on_partial_blk)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -19,100 +19,6 @@ module DiscourseAi
|
|||
setting: "ai_openai_api_key",
|
||||
)
|
||||
end
|
||||
|
||||
def concatenate_summaries(summaries, &on_partial_blk)
|
||||
messages = [
|
||||
{ role: "system", content: "You are a helpful bot" },
|
||||
{
|
||||
role: "user",
|
||||
content:
|
||||
"Concatenate these disjoint summaries, creating a cohesive narrative. Keep the summary in the same language used in the text below.\n#{summaries.join("\n")}",
|
||||
},
|
||||
]
|
||||
|
||||
completion(messages, &on_partial_blk)
|
||||
end
|
||||
|
||||
def summarize_with_truncation(contents, opts, &on_partial_blk)
|
||||
messages = [{ role: "system", content: build_base_prompt(opts) }]
|
||||
|
||||
text_to_summarize = contents.map { |c| format_content_item(c) }.join
|
||||
truncated_content = tokenizer.truncate(text_to_summarize, available_tokens)
|
||||
|
||||
messages << {
|
||||
role: "user",
|
||||
content:
|
||||
"Summarize the following in 400 words. Keep the summary in the same language used in the text below.\n#{truncated_content}",
|
||||
}
|
||||
|
||||
completion(messages, &on_partial_blk)
|
||||
end
|
||||
|
||||
def summarize_single(chunk_text, opts, &on_partial_blk)
|
||||
summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def summarize_chunk(chunk_text, opts, &on_partial_blk)
|
||||
summary_instruction =
|
||||
if opts[:single_chunk]
|
||||
"Summarize the following forum discussion, creating a cohesive narrative. Keep the summary in the same language used in the text below."
|
||||
else
|
||||
"Summarize the following in 400 words. Keep the summary in the same language used in the text below."
|
||||
end
|
||||
|
||||
completion(
|
||||
[
|
||||
{ role: "system", content: build_base_prompt(opts) },
|
||||
{ role: "user", content: "#{summary_instruction}\n#{chunk_text}" },
|
||||
],
|
||||
&on_partial_blk
|
||||
)
|
||||
end
|
||||
|
||||
def build_base_prompt(opts)
|
||||
base_prompt = <<~TEXT
|
||||
You are a summarization bot.
|
||||
You effectively summarise any text and reply ONLY with ONLY the summarized text.
|
||||
You condense it into a shorter version.
|
||||
You understand and generate Discourse forum Markdown.
|
||||
You format the response, including links, using markdown.
|
||||
TEXT
|
||||
|
||||
if opts[:resource_path]
|
||||
base_prompt +=
|
||||
"Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)\n"
|
||||
end
|
||||
|
||||
base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[
|
||||
:content_title
|
||||
]
|
||||
|
||||
base_prompt
|
||||
end
|
||||
|
||||
def completion(prompt, &on_partial_blk)
|
||||
if on_partial_blk
|
||||
on_partial_read =
|
||||
Proc.new do |partial|
|
||||
on_partial_blk.call(partial.dig(:choices, 0, :delta, :content).to_s)
|
||||
end
|
||||
|
||||
::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model, &on_partial_read)
|
||||
else
|
||||
::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model).dig(
|
||||
:choices,
|
||||
0,
|
||||
:message,
|
||||
:content,
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
def tokenizer
|
||||
DiscourseAi::Tokenizer::OpenAiTokenizer
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -16,22 +16,29 @@ module DiscourseAi
|
|||
:model,
|
||||
to: :completion_model
|
||||
|
||||
def summarize(content, _user, &on_partial_blk)
|
||||
def summarize(content, user, &on_partial_blk)
|
||||
opts = content.except(:contents)
|
||||
|
||||
chunks = split_into_chunks(content[:contents])
|
||||
llm = DiscourseAi::Completions::LLM.proxy(completion_model.model)
|
||||
|
||||
chunks = split_into_chunks(llm.tokenizer, content[:contents])
|
||||
|
||||
if chunks.length == 1
|
||||
{
|
||||
summary:
|
||||
completion_model.summarize_single(chunks.first[:summary], opts, &on_partial_blk),
|
||||
summary: summarize_single(llm, chunks.first[:summary], user, opts, &on_partial_blk),
|
||||
chunks: [],
|
||||
}
|
||||
else
|
||||
summaries = completion_model.summarize_in_chunks(chunks, opts)
|
||||
summaries = summarize_in_chunks(llm, chunks, user, opts)
|
||||
|
||||
{
|
||||
summary: completion_model.concatenate_summaries(summaries, &on_partial_blk),
|
||||
summary:
|
||||
concatenate_summaries(
|
||||
llm,
|
||||
summaries.map { |s| s[:summary] },
|
||||
user,
|
||||
&on_partial_blk
|
||||
),
|
||||
chunks: summaries,
|
||||
}
|
||||
end
|
||||
|
@ -39,14 +46,18 @@ module DiscourseAi
|
|||
|
||||
private
|
||||
|
||||
def split_into_chunks(contents)
|
||||
def format_content_item(item)
|
||||
"(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
|
||||
end
|
||||
|
||||
def split_into_chunks(tokenizer, contents)
|
||||
section = { ids: [], summary: "" }
|
||||
|
||||
chunks =
|
||||
contents.reduce([]) do |sections, item|
|
||||
new_content = completion_model.format_content_item(item)
|
||||
new_content = format_content_item(item)
|
||||
|
||||
if completion_model.can_expand_tokens?(
|
||||
if tokenizer.can_expand_tokens?(
|
||||
section[:summary],
|
||||
new_content,
|
||||
completion_model.available_tokens,
|
||||
|
@ -65,6 +76,71 @@ module DiscourseAi
|
|||
|
||||
chunks
|
||||
end
|
||||
|
||||
def summarize_single(llm, text, user, opts, &on_partial_blk)
|
||||
prompt = summarization_prompt(text, opts)
|
||||
|
||||
llm.completion!(prompt, user, &on_partial_blk)
|
||||
end
|
||||
|
||||
def summarize_in_chunks(llm, chunks, user, opts)
|
||||
chunks.map do |chunk|
|
||||
prompt = summarization_prompt(chunk[:summary], opts)
|
||||
prompt[:post_insts] = "Don't use more than 400 words for the summary."
|
||||
|
||||
chunk[:summary] = llm.completion!(prompt, user)
|
||||
chunk
|
||||
end
|
||||
end
|
||||
|
||||
def concatenate_summaries(llm, summaries, user, &on_partial_blk)
|
||||
prompt = summarization_prompt(summaries.join("\n"), {})
|
||||
prompt[:insts] = <<~TEXT
|
||||
You are a bot that can concatenate disjoint summaries, creating a cohesive narrative.
|
||||
Keep the resulting summary in the same language used in the text below.
|
||||
TEXT
|
||||
|
||||
llm.completion!(prompt, user, &on_partial_blk)
|
||||
end
|
||||
|
||||
def summarization_prompt(input, opts)
|
||||
insts = <<~TEXT
|
||||
You are a summarization bot that effectively summarize any text, creating a cohesive narrative.
|
||||
Your replies contain ONLY a summarized version of the text I provided and you, using the same language.
|
||||
You understand and generate Discourse forum Markdown.
|
||||
You format the response, including links, using Markdown.
|
||||
TEXT
|
||||
|
||||
insts += <<~TEXT if opts[:resource_path]
|
||||
Each message is formatted as "<POST_NUMBER>) <USERNAME> <MESSAGE> "
|
||||
Append <POST_NUMBER> to #{opts[:resource_path]} when linking posts.
|
||||
TEXT
|
||||
|
||||
insts += "The discussion title is: #{opts[:content_title]}.\n" if opts[:content_title]
|
||||
|
||||
prompt = { insts: insts, input: <<~TEXT }
|
||||
Here is the text, inside <input></input> XML tags:
|
||||
|
||||
<input>
|
||||
#{input}
|
||||
</input>
|
||||
TEXT
|
||||
|
||||
if opts[:resource_path]
|
||||
prompt[:examples] = [
|
||||
[
|
||||
"<input>(1 user1 said: I love Mondays 2) user2 said: I hate Mondays</input>",
|
||||
"Two users are sharing their feelings toward Mondays. [user1](#{opts[:resource_path]}/1) hates them, while [user2](#{opts[:resource_path]}/2) loves them.",
|
||||
],
|
||||
[
|
||||
"<input>3) usuario1: Amo los lunes 6) usuario2: Odio los lunes</input>",
|
||||
"Dos usuarios charlan sobre los lunes. [usuario1](#{opts[:resource_path]}/3) dice que los ama, mientras que [usuario2](#{opts[:resource_path]}/2) los odia.",
|
||||
],
|
||||
]
|
||||
end
|
||||
|
||||
prompt
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -25,6 +25,32 @@ module DiscourseAi
|
|||
chunks: [],
|
||||
}
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def format_content_item(item)
|
||||
"(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
|
||||
end
|
||||
|
||||
def summarize_with_truncation(contents, opts)
|
||||
text_to_summarize = contents.map { |c| format_content_item(c) }.join
|
||||
truncated_content =
|
||||
::DiscourseAi::Tokenizer::BertTokenizer.truncate(
|
||||
text_to_summarize,
|
||||
completion_model.available_tokens,
|
||||
)
|
||||
|
||||
completion(truncated_content)
|
||||
end
|
||||
|
||||
def completion(prompt)
|
||||
::DiscourseAi::Inference::DiscourseClassifier.perform!(
|
||||
"#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
|
||||
completion_model.model,
|
||||
prompt,
|
||||
SiteSetting.ai_summarization_discourse_service_api_key,
|
||||
).dig(:summary_text)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -52,6 +52,8 @@ after_initialize do
|
|||
|
||||
require_relative "lib/shared/database/connection"
|
||||
|
||||
require_relative "lib/completions/entry_point"
|
||||
|
||||
require_relative "lib/modules/nsfw/entry_point"
|
||||
require_relative "lib/modules/toxicity/entry_point"
|
||||
require_relative "lib/modules/sentiment/entry_point"
|
||||
|
@ -64,6 +66,7 @@ after_initialize do
|
|||
add_admin_route "discourse_ai.title", "discourse-ai"
|
||||
|
||||
[
|
||||
DiscourseAi::Completions::EntryPoint.new,
|
||||
DiscourseAi::Embeddings::EntryPoint.new,
|
||||
DiscourseAi::NSFW::EntryPoint.new,
|
||||
DiscourseAi::Toxicity::EntryPoint.new,
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe DiscourseAi::Completions::Dialects::ChatGPT do
|
||||
subject(:dialect) { described_class.new }
|
||||
|
||||
let(:prompt) do
|
||||
{
|
||||
insts: <<~TEXT,
|
||||
I want you to act as a title generator for written pieces. I will provide you with a text,
|
||||
and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
|
||||
and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
|
||||
TEXT
|
||||
input: <<~TEXT,
|
||||
Here is the text, inside <input></input> XML tags:
|
||||
<input>
|
||||
To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
|
||||
discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
|
||||
defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
|
||||
|
||||
Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
|
||||
a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
|
||||
slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
|
||||
dies so that a scene may be repeated.
|
||||
</input>
|
||||
TEXT
|
||||
post_insts:
|
||||
"Please put the translation between <ai></ai> tags and separate each title with a comma.",
|
||||
}
|
||||
end
|
||||
|
||||
describe "#translate" do
|
||||
it "translates a prompt written in our generic format to the ChatGPT format" do
|
||||
open_ai_version = [
|
||||
{ role: "system", content: [prompt[:insts], prompt[:post_insts]].join("\n") },
|
||||
{ role: "user", content: prompt[:input] },
|
||||
]
|
||||
|
||||
translated = dialect.translate(prompt)
|
||||
|
||||
expect(translated).to contain_exactly(*open_ai_version)
|
||||
end
|
||||
|
||||
it "include examples in the ChatGPT version" do
|
||||
prompt[:examples] = [
|
||||
[
|
||||
"<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
|
||||
"<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
|
||||
],
|
||||
]
|
||||
|
||||
open_ai_version = [
|
||||
{ role: "system", content: [prompt[:insts], prompt[:post_insts]].join("\n") },
|
||||
{ role: "user", content: prompt[:examples][0][0] },
|
||||
{ role: "assistant", content: prompt[:examples][0][1] },
|
||||
{ role: "user", content: prompt[:input] },
|
||||
]
|
||||
|
||||
translated = dialect.translate(prompt)
|
||||
|
||||
expect(translated).to contain_exactly(*open_ai_version)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,68 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe DiscourseAi::Completions::Dialects::Claude do
|
||||
subject(:dialect) { described_class.new }
|
||||
|
||||
let(:prompt) do
|
||||
{
|
||||
insts: <<~TEXT,
|
||||
I want you to act as a title generator for written pieces. I will provide you with a text,
|
||||
and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
|
||||
and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
|
||||
TEXT
|
||||
input: <<~TEXT,
|
||||
Here is the text, inside <input></input> XML tags:
|
||||
<input>
|
||||
To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
|
||||
discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
|
||||
defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
|
||||
|
||||
Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
|
||||
a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
|
||||
slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
|
||||
dies so that a scene may be repeated.
|
||||
</input>
|
||||
TEXT
|
||||
post_insts:
|
||||
"Please put the translation between <ai></ai> tags and separate each title with a comma.",
|
||||
}
|
||||
end
|
||||
|
||||
describe "#translate" do
|
||||
it "translates a prompt written in our generic format to Claude's format" do
|
||||
anthropic_version = <<~TEXT
|
||||
Human: #{prompt[:insts]}
|
||||
#{prompt[:input]}
|
||||
#{prompt[:post_insts]}
|
||||
Assistant:
|
||||
TEXT
|
||||
|
||||
translated = dialect.translate(prompt)
|
||||
|
||||
expect(translated).to eq(anthropic_version)
|
||||
end
|
||||
|
||||
it "knows how to translate examples to Claude's format" do
|
||||
prompt[:examples] = [
|
||||
[
|
||||
"<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
|
||||
"<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
|
||||
],
|
||||
]
|
||||
anthropic_version = <<~TEXT
|
||||
Human: #{prompt[:insts]}
|
||||
<example>
|
||||
H: #{prompt[:examples][0][0]}
|
||||
A: #{prompt[:examples][0][1]}
|
||||
</example>
|
||||
#{prompt[:input]}
|
||||
#{prompt[:post_insts]}
|
||||
Assistant:
|
||||
TEXT
|
||||
|
||||
translated = dialect.translate(prompt)
|
||||
|
||||
expect(translated).to eq(anthropic_version)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,63 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe DiscourseAi::Completions::Dialects::Llama2Classic do
|
||||
subject(:dialect) { described_class.new }
|
||||
|
||||
let(:prompt) do
|
||||
{
|
||||
insts: <<~TEXT,
|
||||
I want you to act as a title generator for written pieces. I will provide you with a text,
|
||||
and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
|
||||
and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
|
||||
TEXT
|
||||
input: <<~TEXT,
|
||||
Here is the text, inside <input></input> XML tags:
|
||||
<input>
|
||||
To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
|
||||
discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
|
||||
defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
|
||||
|
||||
Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
|
||||
a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
|
||||
slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
|
||||
dies so that a scene may be repeated.
|
||||
</input>
|
||||
TEXT
|
||||
post_insts:
|
||||
"Please put the translation between <ai></ai> tags and separate each title with a comma.",
|
||||
}
|
||||
end
|
||||
|
||||
describe "#translate" do
|
||||
it "translates a prompt written in our generic format to the Llama2 format" do
|
||||
llama2_classic_version = <<~TEXT
|
||||
[INST]<<SYS>>#{[prompt[:insts], prompt[:post_insts]].join("\n")}<</SYS>>[/INST]
|
||||
[INST]#{prompt[:input]}[/INST]
|
||||
TEXT
|
||||
|
||||
translated = dialect.translate(prompt)
|
||||
|
||||
expect(translated).to eq(llama2_classic_version)
|
||||
end
|
||||
|
||||
it "includes examples in the translation" do
|
||||
prompt[:examples] = [
|
||||
[
|
||||
"<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
|
||||
"<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
|
||||
],
|
||||
]
|
||||
|
||||
llama2_classic_version = <<~TEXT
|
||||
[INST]<<SYS>>#{[prompt[:insts], prompt[:post_insts]].join("\n")}<</SYS>>[/INST]
|
||||
[INST]#{prompt[:examples][0][0]}[/INST]
|
||||
#{prompt[:examples][0][1]}
|
||||
[INST]#{prompt[:input]}[/INST]
|
||||
TEXT
|
||||
|
||||
translated = dialect.translate(prompt)
|
||||
|
||||
expect(translated).to eq(llama2_classic_version)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,71 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe DiscourseAi::Completions::Dialects::OrcaStyle do
|
||||
subject(:dialect) { described_class.new }
|
||||
|
||||
describe "#translate" do
|
||||
let(:prompt) do
|
||||
{
|
||||
insts: <<~TEXT,
|
||||
I want you to act as a title generator for written pieces. I will provide you with a text,
|
||||
and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
|
||||
and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
|
||||
TEXT
|
||||
input: <<~TEXT,
|
||||
Here is the text, inside <input></input> XML tags:
|
||||
<input>
|
||||
To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
|
||||
discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
|
||||
defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
|
||||
|
||||
Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
|
||||
a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
|
||||
slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
|
||||
dies so that a scene may be repeated.
|
||||
</input>
|
||||
TEXT
|
||||
post_insts:
|
||||
"Please put the translation between <ai></ai> tags and separate each title with a comma.",
|
||||
}
|
||||
end
|
||||
|
||||
it "translates a prompt written in our generic format to the Open AI format" do
|
||||
orca_style_version = <<~TEXT
|
||||
### System:
|
||||
#{[prompt[:insts], prompt[:post_insts]].join("\n")}
|
||||
### User:
|
||||
#{prompt[:input]}
|
||||
### Assistant:
|
||||
TEXT
|
||||
|
||||
translated = dialect.translate(prompt)
|
||||
|
||||
expect(translated).to eq(orca_style_version)
|
||||
end
|
||||
|
||||
it "include examples in the translated prompt" do
|
||||
prompt[:examples] = [
|
||||
[
|
||||
"<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
|
||||
"<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
|
||||
],
|
||||
]
|
||||
|
||||
orca_style_version = <<~TEXT
|
||||
### System:
|
||||
#{[prompt[:insts], prompt[:post_insts]].join("\n")}
|
||||
### User:
|
||||
#{prompt[:examples][0][0]}
|
||||
### Assistant:
|
||||
#{prompt[:examples][0][1]}
|
||||
### User:
|
||||
#{prompt[:input]}
|
||||
### Assistant:
|
||||
TEXT
|
||||
|
||||
translated = dialect.translate(prompt)
|
||||
|
||||
expect(translated).to eq(orca_style_version)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,64 @@
|
|||
# frozen_String_literal: true
|
||||
|
||||
require_relative "endpoint_examples"
|
||||
|
||||
RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
|
||||
subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::AnthropicTokenizer) }
|
||||
|
||||
let(:model_name) { "claude-2" }
|
||||
let(:prompt) { "Human: write 3 words\n\n" }
|
||||
|
||||
let(:request_body) { model.default_options.merge(prompt: prompt).to_json }
|
||||
let(:stream_request_body) { model.default_options.merge(prompt: prompt, stream: true).to_json }
|
||||
|
||||
def response(content)
|
||||
{
|
||||
completion: content,
|
||||
stop: "\n\nHuman:",
|
||||
stop_reason: "stop_sequence",
|
||||
truncated: false,
|
||||
log_id: "12dcc7feafbee4a394e0de9dffde3ac5",
|
||||
model: model_name,
|
||||
exception: nil,
|
||||
}
|
||||
end
|
||||
|
||||
def stub_response(prompt, response_text)
|
||||
WebMock
|
||||
.stub_request(:post, "https://api.anthropic.com/v1/complete")
|
||||
.with(body: model.default_options.merge(prompt: prompt).to_json)
|
||||
.to_return(status: 200, body: JSON.dump(response(response_text)))
|
||||
end
|
||||
|
||||
def stream_line(delta, finish_reason: nil)
|
||||
+"data: " << {
|
||||
completion: delta,
|
||||
stop: finish_reason ? "\n\nHuman:" : nil,
|
||||
stop_reason: finish_reason,
|
||||
truncated: false,
|
||||
log_id: "12b029451c6d18094d868bc04ce83f63",
|
||||
model: "claude-2",
|
||||
exception: nil,
|
||||
}.to_json
|
||||
end
|
||||
|
||||
def stub_streamed_response(prompt, deltas)
|
||||
chunks =
|
||||
deltas.each_with_index.map do |_, index|
|
||||
if index == (deltas.length - 1)
|
||||
stream_line(deltas[index], finish_reason: "stop_sequence")
|
||||
else
|
||||
stream_line(deltas[index])
|
||||
end
|
||||
end
|
||||
|
||||
chunks = chunks.join("\n\n")
|
||||
|
||||
WebMock
|
||||
.stub_request(:post, "https://api.anthropic.com/v1/complete")
|
||||
.with(body: model.default_options.merge(prompt: prompt, stream: true).to_json)
|
||||
.to_return(status: 200, body: chunks)
|
||||
end
|
||||
|
||||
it_behaves_like "an endpoint that can communicate with a completion service"
|
||||
end
|
|
@ -0,0 +1,122 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require_relative "endpoint_examples"
|
||||
|
||||
RSpec.describe DiscourseAi::Completions::Endpoints::AwsBedrock do
|
||||
subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::AnthropicTokenizer) }
|
||||
|
||||
let(:model_name) { "claude-2" }
|
||||
let(:prompt) { "Human: write 3 words\n\n" }
|
||||
|
||||
let(:request_body) { model.default_options.merge(prompt: prompt).to_json }
|
||||
let(:stream_request_body) { model.default_options.merge(prompt: prompt).to_json }
|
||||
|
||||
before do
|
||||
SiteSetting.ai_bedrock_access_key_id = "123456"
|
||||
SiteSetting.ai_bedrock_secret_access_key = "asd-asd-asd"
|
||||
SiteSetting.ai_bedrock_region = "us-east-1"
|
||||
end
|
||||
|
||||
# Copied from https://github.com/bblimke/webmock/issues/629
|
||||
# Workaround for stubbing a streamed response
|
||||
before do
|
||||
mocked_http =
|
||||
Class.new(Net::HTTP) do
|
||||
def request(*)
|
||||
super do |response|
|
||||
response.instance_eval do
|
||||
def read_body(*, &block)
|
||||
if block_given?
|
||||
@body.each(&block)
|
||||
else
|
||||
super
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
yield response if block_given?
|
||||
|
||||
response
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@original_net_http = Net.send(:remove_const, :HTTP)
|
||||
Net.send(:const_set, :HTTP, mocked_http)
|
||||
end
|
||||
|
||||
after do
|
||||
Net.send(:remove_const, :HTTP)
|
||||
Net.send(:const_set, :HTTP, @original_net_http)
|
||||
end
|
||||
|
||||
def response(content)
|
||||
{
|
||||
completion: content,
|
||||
stop: "\n\nHuman:",
|
||||
stop_reason: "stop_sequence",
|
||||
truncated: false,
|
||||
log_id: "12dcc7feafbee4a394e0de9dffde3ac5",
|
||||
model: model_name,
|
||||
exception: nil,
|
||||
}
|
||||
end
|
||||
|
||||
def stub_response(prompt, response_text)
|
||||
WebMock
|
||||
.stub_request(
|
||||
:post,
|
||||
"https://bedrock-runtime.#{SiteSetting.ai_bedrock_region}.amazonaws.com/model/anthropic.#{model_name}/invoke",
|
||||
)
|
||||
.with(body: request_body)
|
||||
.to_return(status: 200, body: JSON.dump(response(response_text)))
|
||||
end
|
||||
|
||||
def stream_line(delta, finish_reason: nil)
|
||||
encoder = Aws::EventStream::Encoder.new
|
||||
|
||||
message =
|
||||
Aws::EventStream::Message.new(
|
||||
payload:
|
||||
StringIO.new(
|
||||
{
|
||||
bytes:
|
||||
Base64.encode64(
|
||||
{
|
||||
completion: delta,
|
||||
stop: finish_reason ? "\n\nHuman:" : nil,
|
||||
stop_reason: finish_reason,
|
||||
truncated: false,
|
||||
log_id: "12b029451c6d18094d868bc04ce83f63",
|
||||
model: "claude-2",
|
||||
exception: nil,
|
||||
}.to_json,
|
||||
),
|
||||
}.to_json,
|
||||
),
|
||||
)
|
||||
|
||||
encoder.encode(message)
|
||||
end
|
||||
|
||||
def stub_streamed_response(prompt, deltas)
|
||||
chunks =
|
||||
deltas.each_with_index.map do |_, index|
|
||||
if index == (deltas.length - 1)
|
||||
stream_line(deltas[index], finish_reason: "stop_sequence")
|
||||
else
|
||||
stream_line(deltas[index])
|
||||
end
|
||||
end
|
||||
|
||||
WebMock
|
||||
.stub_request(
|
||||
:post,
|
||||
"https://bedrock-runtime.#{SiteSetting.ai_bedrock_region}.amazonaws.com/model/anthropic.#{model_name}/invoke-with-response-stream",
|
||||
)
|
||||
.with(body: stream_request_body)
|
||||
.to_return(status: 200, body: chunks)
|
||||
end
|
||||
|
||||
it_behaves_like "an endpoint that can communicate with a completion service"
|
||||
end
|
|
@ -0,0 +1,71 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
RSpec.shared_examples "an endpoint that can communicate with a completion service" do
|
||||
describe "#perform_completion!" do
|
||||
fab!(:user) { Fabricate(:user) }
|
||||
|
||||
let(:response_text) { "1. Serenity\\n2. Laughter\\n3. Adventure" }
|
||||
|
||||
context "when using regular mode" do
|
||||
before { stub_response(prompt, response_text) }
|
||||
|
||||
it "can complete a trivial prompt" do
|
||||
completion_response = model.perform_completion!(prompt, user)
|
||||
|
||||
expect(completion_response).to eq(response_text)
|
||||
end
|
||||
|
||||
it "creates an audit log for the request" do
|
||||
model.perform_completion!(prompt, user)
|
||||
|
||||
expect(AiApiAuditLog.count).to eq(1)
|
||||
log = AiApiAuditLog.first
|
||||
|
||||
response_body = response(response_text).to_json
|
||||
|
||||
expect(log.provider_id).to eq(model.provider_id)
|
||||
expect(log.user_id).to eq(user.id)
|
||||
expect(log.raw_request_payload).to eq(request_body)
|
||||
expect(log.raw_response_payload).to eq(response_body)
|
||||
expect(log.request_tokens).to eq(model.prompt_size(prompt))
|
||||
expect(log.response_tokens).to eq(model.tokenizer.size(response_text))
|
||||
end
|
||||
end
|
||||
|
||||
context "when using stream mode" do
|
||||
let(:deltas) { ["Mount", "ain", " ", "Tree ", "Frog"] }
|
||||
|
||||
before { stub_streamed_response(prompt, deltas) }
|
||||
|
||||
it "can complete a trivial prompt" do
|
||||
completion_response = +""
|
||||
|
||||
model.perform_completion!(prompt, user) do |partial, cancel|
|
||||
completion_response << partial
|
||||
cancel.call if completion_response.split(" ").length == 2
|
||||
end
|
||||
|
||||
expect(completion_response).to eq(deltas[0...-1].join)
|
||||
end
|
||||
|
||||
it "creates an audit log and updates is on each read." do
|
||||
completion_response = +""
|
||||
|
||||
model.perform_completion!(prompt, user) do |partial, cancel|
|
||||
completion_response << partial
|
||||
cancel.call if completion_response.split(" ").length == 2
|
||||
end
|
||||
|
||||
expect(AiApiAuditLog.count).to eq(1)
|
||||
log = AiApiAuditLog.first
|
||||
|
||||
expect(log.provider_id).to eq(model.provider_id)
|
||||
expect(log.user_id).to eq(user.id)
|
||||
expect(log.raw_request_payload).to eq(stream_request_body)
|
||||
expect(log.raw_response_payload).to be_present
|
||||
expect(log.request_tokens).to eq(model.prompt_size(prompt))
|
||||
expect(log.response_tokens).to eq(model.tokenizer.size(deltas[0...-1].join))
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,68 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require_relative "endpoint_examples"
|
||||
|
||||
RSpec.describe DiscourseAi::Completions::Endpoints::Huggingface do
|
||||
subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::Llama2Tokenizer) }
|
||||
|
||||
let(:model_name) { "Llama2-*-chat-hf" }
|
||||
let(:prompt) { <<~TEXT }
|
||||
[INST]<<SYS>>You are a helpful bot.<</SYS>>[/INST]
|
||||
[INST]Write 3 words[/INST]
|
||||
TEXT
|
||||
|
||||
let(:request_body) do
|
||||
model
|
||||
.default_options
|
||||
.merge(inputs: prompt)
|
||||
.tap { |payload| payload[:parameters][:max_new_tokens] = 2_000 - model.prompt_size(prompt) }
|
||||
.to_json
|
||||
end
|
||||
let(:stream_request_body) { request_body }
|
||||
|
||||
before { SiteSetting.ai_hugging_face_api_url = "https://test.dev" }
|
||||
|
||||
def response(content)
|
||||
{ generated_text: content }
|
||||
end
|
||||
|
||||
def stub_response(prompt, response_text)
|
||||
WebMock
|
||||
.stub_request(:post, "#{SiteSetting.ai_hugging_face_api_url}/generate")
|
||||
.with(body: request_body)
|
||||
.to_return(status: 200, body: JSON.dump(response(response_text)))
|
||||
end
|
||||
|
||||
def stream_line(delta, finish_reason: nil)
|
||||
+"data: " << {
|
||||
token: {
|
||||
id: 29_889,
|
||||
text: delta,
|
||||
logprob: -0.08319092,
|
||||
special: !!finish_reason,
|
||||
},
|
||||
generated_text: finish_reason ? response_text : nil,
|
||||
details: nil,
|
||||
}.to_json
|
||||
end
|
||||
|
||||
def stub_streamed_response(prompt, deltas)
|
||||
chunks =
|
||||
deltas.each_with_index.map do |_, index|
|
||||
if index == (deltas.length - 1)
|
||||
stream_line(deltas[index], finish_reason: true)
|
||||
else
|
||||
stream_line(deltas[index])
|
||||
end
|
||||
end
|
||||
|
||||
chunks = chunks.join("\n\n")
|
||||
|
||||
WebMock
|
||||
.stub_request(:post, "#{SiteSetting.ai_hugging_face_api_url}/generate_stream")
|
||||
.with(body: request_body)
|
||||
.to_return(status: 200, body: chunks)
|
||||
end
|
||||
|
||||
it_behaves_like "an endpoint that can communicate with a completion service"
|
||||
end
|
|
@ -0,0 +1,74 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require_relative "endpoint_examples"
|
||||
|
||||
RSpec.describe DiscourseAi::Completions::Endpoints::OpenAI do
|
||||
subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::OpenAiTokenizer) }
|
||||
|
||||
let(:model_name) { "gpt-3.5-turbo" }
|
||||
let(:prompt) do
|
||||
[
|
||||
{ role: "system", content: "You are a helpful bot." },
|
||||
{ role: "user", content: "Write 3 words" },
|
||||
]
|
||||
end
|
||||
|
||||
let(:request_body) { model.default_options.merge(messages: prompt).to_json }
|
||||
let(:stream_request_body) { model.default_options.merge(messages: prompt, stream: true).to_json }
|
||||
|
||||
def response(content)
|
||||
{
|
||||
id: "chatcmpl-6sZfAb30Rnv9Q7ufzFwvQsMpjZh8S",
|
||||
object: "chat.completion",
|
||||
created: 1_678_464_820,
|
||||
model: "gpt-3.5-turbo-0301",
|
||||
usage: {
|
||||
prompt_tokens: 337,
|
||||
completion_tokens: 162,
|
||||
total_tokens: 499,
|
||||
},
|
||||
choices: [
|
||||
{ message: { role: "assistant", content: content }, finish_reason: "stop", index: 0 },
|
||||
],
|
||||
}
|
||||
end
|
||||
|
||||
def stub_response(prompt, response_text)
|
||||
WebMock
|
||||
.stub_request(:post, "https://api.openai.com/v1/chat/completions")
|
||||
.with(body: { model: model_name, messages: prompt })
|
||||
.to_return(status: 200, body: JSON.dump(response(response_text)))
|
||||
end
|
||||
|
||||
def stream_line(delta, finish_reason: nil)
|
||||
+"data: " << {
|
||||
id: "chatcmpl-#{SecureRandom.hex}",
|
||||
object: "chat.completion.chunk",
|
||||
created: 1_681_283_881,
|
||||
model: "gpt-3.5-turbo-0301",
|
||||
choices: [{ delta: { content: delta } }],
|
||||
finish_reason: finish_reason,
|
||||
index: 0,
|
||||
}.to_json
|
||||
end
|
||||
|
||||
def stub_streamed_response(prompt, deltas)
|
||||
chunks =
|
||||
deltas.each_with_index.map do |_, index|
|
||||
if index == (deltas.length - 1)
|
||||
stream_line(deltas[index], finish_reason: "stop_sequence")
|
||||
else
|
||||
stream_line(deltas[index])
|
||||
end
|
||||
end
|
||||
|
||||
chunks = chunks.join("\n\n")
|
||||
|
||||
WebMock
|
||||
.stub_request(:post, "https://api.openai.com/v1/chat/completions")
|
||||
.with(body: model.default_options.merge(messages: prompt, stream: true).to_json)
|
||||
.to_return(status: 200, body: chunks)
|
||||
end
|
||||
|
||||
it_behaves_like "an endpoint that can communicate with a completion service"
|
||||
end
|
|
@ -0,0 +1,71 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe DiscourseAi::Completions::LLM do
|
||||
subject(:llm) do
|
||||
described_class.new(
|
||||
DiscourseAi::Completions::Dialects::OrcaStyle.new,
|
||||
canned_response,
|
||||
"Upstage-Llama-2-*-instruct-v2",
|
||||
)
|
||||
end
|
||||
|
||||
fab!(:user) { Fabricate(:user) }
|
||||
|
||||
describe ".proxy" do
|
||||
it "raises an exception when we can't proxy the model" do
|
||||
fake_model = "unknown_v2"
|
||||
|
||||
expect { described_class.proxy(fake_model) }.to(
|
||||
raise_error(DiscourseAi::Completions::LLM::UNKNOWN_MODEL),
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
describe "#completion!" do
|
||||
let(:prompt) do
|
||||
{
|
||||
insts: <<~TEXT,
|
||||
I want you to act as a title generator for written pieces. I will provide you with a text,
|
||||
and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
|
||||
and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
|
||||
TEXT
|
||||
input: <<~TEXT,
|
||||
Here is the text, inside <input></input> XML tags:
|
||||
<input>
|
||||
To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
|
||||
discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
|
||||
defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
|
||||
</input>
|
||||
TEXT
|
||||
post_insts:
|
||||
"Please put the translation between <ai></ai> tags and separate each title with a comma.",
|
||||
}
|
||||
end
|
||||
|
||||
let(:canned_response) do
|
||||
DiscourseAi::Completions::Endpoints::CannedResponse.new(
|
||||
[
|
||||
"<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
|
||||
],
|
||||
)
|
||||
end
|
||||
|
||||
context "when getting the full response" do
|
||||
it "processes the prompt and return the response" do
|
||||
llm_response = llm.completion!(prompt, user)
|
||||
|
||||
expect(llm_response).to eq(canned_response.responses[0])
|
||||
end
|
||||
end
|
||||
|
||||
context "when getting a streamed response" do
|
||||
it "processes the prompt and call the given block with the partial response" do
|
||||
llm_response = +""
|
||||
|
||||
llm.completion!(prompt, user) { |partial, cancel_fn| llm_response << partial }
|
||||
|
||||
expect(llm_response).to eq(canned_response.responses[0])
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -66,7 +66,10 @@ RSpec.describe DiscourseAi::AiBot::Commands::SearchCommand do
|
|||
.expects(:asymmetric_topics_similarity_search)
|
||||
.returns([post1.topic_id])
|
||||
|
||||
results = search.process(search_query: "hello world, sam", status: "public")
|
||||
results =
|
||||
DiscourseAi::Completions::LLM.with_prepared_responses(["<ai>#{query}</ai>"]) do
|
||||
search.process(search_query: "hello world, sam", status: "public")
|
||||
end
|
||||
|
||||
expect(results[:args]).to eq({ search_query: "hello world, sam", status: "public" })
|
||||
expect(results[:rows].length).to eq(1)
|
||||
|
|
|
@ -13,15 +13,6 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
|
|||
before do
|
||||
SiteSetting.ai_embeddings_discourse_service_api_endpoint = "http://test.com"
|
||||
|
||||
prompt = DiscourseAi::Embeddings::HydeGenerators::OpenAi.new.prompt(query)
|
||||
OpenAiCompletionsInferenceStubs.stub_response(
|
||||
prompt,
|
||||
hypothetical_post,
|
||||
req_opts: {
|
||||
max_tokens: 400,
|
||||
},
|
||||
)
|
||||
|
||||
hyde_embedding = [0.049382, 0.9999]
|
||||
EmbeddingsGenerationStubs.discourse_service(
|
||||
SiteSetting.ai_embeddings_model,
|
||||
|
@ -39,10 +30,16 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
|
|||
.returns(candidate_ids)
|
||||
end
|
||||
|
||||
def trigger_search(query)
|
||||
DiscourseAi::Completions::LLM.with_prepared_responses(["<ai>#{hypothetical_post}</ai>"]) do
|
||||
subject.search_for_topics(query)
|
||||
end
|
||||
end
|
||||
|
||||
it "returns the first post of a topic included in the asymmetric search results" do
|
||||
stub_candidate_ids([post.topic_id])
|
||||
|
||||
posts = subject.search_for_topics(query)
|
||||
posts = trigger_search(query)
|
||||
|
||||
expect(posts).to contain_exactly(post)
|
||||
end
|
||||
|
@ -53,7 +50,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
|
|||
post.topic.update!(visible: false)
|
||||
stub_candidate_ids([post.topic_id])
|
||||
|
||||
posts = subject.search_for_topics(query)
|
||||
posts = trigger_search(query)
|
||||
|
||||
expect(posts).to be_empty
|
||||
end
|
||||
|
@ -64,7 +61,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
|
|||
pm_post = Fabricate(:private_message_post)
|
||||
stub_candidate_ids([pm_post.topic_id])
|
||||
|
||||
posts = subject.search_for_topics(query)
|
||||
posts = trigger_search(query)
|
||||
|
||||
expect(posts).to be_empty
|
||||
end
|
||||
|
@ -75,7 +72,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
|
|||
post.update!(post_type: Post.types[:whisper])
|
||||
stub_candidate_ids([post.topic_id])
|
||||
|
||||
posts = subject.search_for_topics(query)
|
||||
posts = trigger_search(query)
|
||||
|
||||
expect(posts).to be_empty
|
||||
end
|
||||
|
@ -87,7 +84,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
|
|||
reply.topic.first_post.trash!
|
||||
stub_candidate_ids([reply.topic_id])
|
||||
|
||||
posts = subject.search_for_topics(query)
|
||||
posts = trigger_search(query)
|
||||
|
||||
expect(posts).to be_empty
|
||||
end
|
||||
|
@ -98,7 +95,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
|
|||
post_2 = Fabricate(:post)
|
||||
stub_candidate_ids([post.topic_id])
|
||||
|
||||
posts = subject.search_for_topics(query)
|
||||
posts = trigger_search(query)
|
||||
|
||||
expect(posts).not_to include(post_2)
|
||||
end
|
||||
|
@ -114,7 +111,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
|
|||
end
|
||||
|
||||
it "returns an empty list" do
|
||||
posts = subject.search_for_topics(query)
|
||||
posts = trigger_search(query)
|
||||
|
||||
expect(posts).to be_empty
|
||||
end
|
||||
|
@ -122,14 +119,17 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
|
|||
it "returns the results if the user has access to the category" do
|
||||
group.add(user)
|
||||
|
||||
posts = subject.search_for_topics(query)
|
||||
posts = trigger_search(query)
|
||||
|
||||
expect(posts).to contain_exactly(post)
|
||||
end
|
||||
|
||||
context "while searching as anon" do
|
||||
it "returns an empty list" do
|
||||
posts = described_class.new(Guardian.new(nil)).search_for_topics(query)
|
||||
posts =
|
||||
DiscourseAi::Completions::LLM.with_prepared_responses(
|
||||
["<ai>#{hypothetical_post}</ai>"],
|
||||
) { described_class.new(Guardian.new(nil)).search_for_topics(query) }
|
||||
|
||||
expect(posts).to be_empty
|
||||
end
|
||||
|
|
|
@ -1,122 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe DiscourseAi::Summarization::Models::Anthropic do
|
||||
subject(:model) { described_class.new(model_name, max_tokens: max_tokens) }
|
||||
|
||||
let(:model_name) { "claude-2" }
|
||||
let(:max_tokens) { 720 }
|
||||
|
||||
let(:content) do
|
||||
{
|
||||
resource_path: "/t/-/1",
|
||||
content_title: "This is a title",
|
||||
contents: [{ poster: "asd", id: 1, text: "This is a text" }],
|
||||
}
|
||||
end
|
||||
|
||||
def as_chunk(item)
|
||||
{ ids: [item[:id]], summary: "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }
|
||||
end
|
||||
|
||||
def expected_messages(contents, opts)
|
||||
base_prompt = <<~TEXT
|
||||
Human: Summarize the following forum discussion inside the given <input> tag.
|
||||
Try to keep the summary in the same language as the forum discussion.
|
||||
Format the response, including links, using markdown.
|
||||
Try generating links as well the format is #{opts[:resource_path]}/POST_ID
|
||||
For example, a link to the 3rd post in the topic would be [post 3](#{opts[:resource_path]}/3)
|
||||
Wrap the whole the summary inside <ai> tags.
|
||||
The discussion title is: #{opts[:content_title]}.
|
||||
Don't use more than 400 words.
|
||||
TEXT
|
||||
|
||||
text =
|
||||
contents.reduce("") do |memo, item|
|
||||
memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
|
||||
end
|
||||
|
||||
base_prompt += "<input>#{text}</input>\nAssistant:\n"
|
||||
end
|
||||
|
||||
describe "#summarize_in_chunks" do
|
||||
context "when the content fits in a single chunk" do
|
||||
it "performs a request to summarize" do
|
||||
opts = content.except(:contents)
|
||||
|
||||
AnthropicCompletionStubs.stub_response(
|
||||
expected_messages(content[:contents], opts),
|
||||
"<ai>This is summary 1</ai>",
|
||||
)
|
||||
|
||||
chunks = content[:contents].map { |c| as_chunk(c) }
|
||||
summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
|
||||
|
||||
expect(summarized_chunks).to contain_exactly("This is summary 1")
|
||||
end
|
||||
end
|
||||
|
||||
context "when the content fits in multiple chunks" do
|
||||
it "performs a request for each one to summarize" do
|
||||
content[:contents] << {
|
||||
poster: "asd2",
|
||||
id: 2,
|
||||
text: "This is a different text to summarize",
|
||||
}
|
||||
opts = content.except(:contents)
|
||||
|
||||
content[:contents].each_with_index do |item, idx|
|
||||
AnthropicCompletionStubs.stub_response(
|
||||
expected_messages([item], opts),
|
||||
"<ai>This is summary #{idx + 1}</ai>",
|
||||
)
|
||||
end
|
||||
|
||||
chunks = content[:contents].map { |c| as_chunk(c) }
|
||||
summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
|
||||
|
||||
expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "#concatenate_summaries" do
|
||||
it "combines all the different summaries into a single one" do
|
||||
messages = <<~TEXT
|
||||
Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative.
|
||||
Include only the summary inside <ai> tags.
|
||||
<input>summary 1</input>
|
||||
<input>summary 2</input>
|
||||
Assistant:
|
||||
TEXT
|
||||
|
||||
AnthropicCompletionStubs.stub_response(messages, "<ai>concatenated summary</ai>")
|
||||
|
||||
expect(model.concatenate_summaries(["summary 1", "summary 2"])).to eq("concatenated summary")
|
||||
end
|
||||
end
|
||||
|
||||
describe "#summarize_with_truncation" do
|
||||
let(:max_tokens) { 709 }
|
||||
|
||||
it "truncates the context to meet the token limit" do
|
||||
opts = content.except(:contents)
|
||||
|
||||
instructions = <<~TEXT
|
||||
Human: Summarize the following forum discussion inside the given <input> tag.
|
||||
Try to keep the summary in the same language as the forum discussion.
|
||||
Format the response, including links, using markdown.
|
||||
Try generating links as well the format is #{opts[:resource_path]}/POST_ID
|
||||
For example, a link to the 3rd post in the topic would be [post 3](#{opts[:resource_path]}/3)
|
||||
Wrap the whole the summary inside <ai> tags.
|
||||
The discussion title is: #{opts[:content_title]}.
|
||||
Don't use more than 400 words.
|
||||
<input>(1 asd said: This is a</input>
|
||||
Assistant:
|
||||
TEXT
|
||||
|
||||
AnthropicCompletionStubs.stub_response(instructions, "<ai>truncated summary</ai>")
|
||||
|
||||
expect(model.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary")
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,95 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe DiscourseAi::Summarization::Models::Discourse do
|
||||
subject(:model) { described_class.new(model_name, max_tokens: max_tokens) }
|
||||
|
||||
let(:model_name) { "bart-large-cnn-samsum" }
|
||||
let(:max_tokens) { 20 }
|
||||
|
||||
let(:content) do
|
||||
{
|
||||
resource_path: "/t/1/POST_NUMBER",
|
||||
content_title: "This is a title",
|
||||
contents: [{ poster: "asd", id: 1, text: "This is a text" }],
|
||||
}
|
||||
end
|
||||
|
||||
before { SiteSetting.ai_summarization_discourse_service_api_endpoint = "https://test.com" }
|
||||
|
||||
def stub_request(prompt, response)
|
||||
WebMock
|
||||
.stub_request(
|
||||
:post,
|
||||
"#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
|
||||
)
|
||||
.with(body: JSON.dump(model: model_name, content: prompt))
|
||||
.to_return(status: 200, body: JSON.dump(summary_text: response))
|
||||
end
|
||||
|
||||
def expected_messages(contents, opts)
|
||||
contents.reduce("") do |memo, item|
|
||||
memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
|
||||
end
|
||||
end
|
||||
|
||||
def as_chunk(item)
|
||||
{ ids: [item[:id]], summary: "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }
|
||||
end
|
||||
|
||||
describe "#summarize_in_chunks" do
|
||||
context "when the content fits in a single chunk" do
|
||||
it "performs a request to summarize" do
|
||||
opts = content.except(:contents)
|
||||
|
||||
stub_request(expected_messages(content[:contents], opts), "This is summary 1")
|
||||
|
||||
chunks = content[:contents].map { |c| as_chunk(c) }
|
||||
summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
|
||||
|
||||
expect(summarized_chunks).to contain_exactly("This is summary 1")
|
||||
end
|
||||
end
|
||||
|
||||
context "when the content fits in multiple chunks" do
|
||||
it "performs a request for each one to summarize" do
|
||||
content[:contents] << {
|
||||
poster: "asd2",
|
||||
id: 2,
|
||||
text: "This is a different text to summarize",
|
||||
}
|
||||
opts = content.except(:contents)
|
||||
|
||||
content[:contents].each_with_index do |item, idx|
|
||||
stub_request(expected_messages([item], opts), "This is summary #{idx + 1}")
|
||||
end
|
||||
|
||||
chunks = content[:contents].map { |c| as_chunk(c) }
|
||||
summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
|
||||
|
||||
expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "#concatenate_summaries" do
|
||||
it "combines all the different summaries into a single one" do
|
||||
messages = ["summary 1", "summary 2"].join("\n")
|
||||
|
||||
stub_request(messages, "concatenated summary")
|
||||
|
||||
expect(model.concatenate_summaries(["summary 1", "summary 2"])).to eq("concatenated summary")
|
||||
end
|
||||
end
|
||||
|
||||
describe "#summarize_with_truncation" do
|
||||
let(:max_tokens) { 9 }
|
||||
|
||||
it "truncates the context to meet the token limit" do
|
||||
opts = content.except(:contents)
|
||||
|
||||
stub_request("( 1 asd said : this is", "truncated summary")
|
||||
|
||||
expect(model.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary")
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,121 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe DiscourseAi::Summarization::Models::OpenAi do
|
||||
subject(:model) { described_class.new(model_name, max_tokens: max_tokens) }
|
||||
|
||||
let(:model_name) { "gpt-3.5-turbo" }
|
||||
let(:max_tokens) { 720 }
|
||||
|
||||
let(:content) do
|
||||
{
|
||||
resource_path: "/t/1/POST_NUMBER",
|
||||
content_title: "This is a title",
|
||||
contents: [{ poster: "asd", id: 1, text: "This is a text" }],
|
||||
}
|
||||
end
|
||||
|
||||
def as_chunk(item)
|
||||
{ ids: [item[:id]], summary: "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }
|
||||
end
|
||||
|
||||
def expected_messages(contents, opts)
|
||||
base_prompt = <<~TEXT
|
||||
You are a summarization bot.
|
||||
You effectively summarise any text and reply ONLY with ONLY the summarized text.
|
||||
You condense it into a shorter version.
|
||||
You understand and generate Discourse forum Markdown.
|
||||
You format the response, including links, using markdown.
|
||||
Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)
|
||||
The discussion title is: #{opts[:content_title]}.
|
||||
TEXT
|
||||
|
||||
messages = [{ role: "system", content: base_prompt }]
|
||||
|
||||
text =
|
||||
contents.reduce("") do |memo, item|
|
||||
memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
|
||||
end
|
||||
|
||||
messages << {
|
||||
role: "user",
|
||||
content:
|
||||
"Summarize the following in 400 words. Keep the summary in the same language used in the text below.\n#{text}",
|
||||
}
|
||||
end
|
||||
|
||||
describe "#summarize_in_chunks" do
|
||||
context "when the content fits in a single chunk" do
|
||||
it "performs a request to summarize" do
|
||||
opts = content.except(:contents)
|
||||
|
||||
OpenAiCompletionsInferenceStubs.stub_response(
|
||||
expected_messages(content[:contents], opts),
|
||||
"This is summary 1",
|
||||
)
|
||||
|
||||
chunks = content[:contents].map { |c| as_chunk(c) }
|
||||
summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
|
||||
|
||||
expect(summarized_chunks).to contain_exactly("This is summary 1")
|
||||
end
|
||||
end
|
||||
|
||||
context "when the content fits in multiple chunks" do
|
||||
it "performs a request for each one to summarize" do
|
||||
content[:contents] << {
|
||||
poster: "asd2",
|
||||
id: 2,
|
||||
text: "This is a different text to summarize",
|
||||
}
|
||||
opts = content.except(:contents)
|
||||
|
||||
content[:contents].each_with_index do |item, idx|
|
||||
OpenAiCompletionsInferenceStubs.stub_response(
|
||||
expected_messages([item], opts),
|
||||
"This is summary #{idx + 1}",
|
||||
)
|
||||
end
|
||||
|
||||
chunks = content[:contents].map { |c| as_chunk(c) }
|
||||
summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] }
|
||||
|
||||
expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "#concatenate_summaries" do
|
||||
it "combines all the different summaries into a single one" do
|
||||
messages = [
|
||||
{ role: "system", content: "You are a helpful bot" },
|
||||
{
|
||||
role: "user",
|
||||
content:
|
||||
"Concatenate these disjoint summaries, creating a cohesive narrative. Keep the summary in the same language used in the text below.\nsummary 1\nsummary 2",
|
||||
},
|
||||
]
|
||||
|
||||
OpenAiCompletionsInferenceStubs.stub_response(messages, "concatenated summary")
|
||||
|
||||
expect(model.concatenate_summaries(["summary 1", "summary 2"])).to eq("concatenated summary")
|
||||
end
|
||||
end
|
||||
|
||||
describe "#summarize_with_truncation" do
|
||||
let(:max_tokens) { 709 }
|
||||
|
||||
it "truncates the context to meet the token limit" do
|
||||
opts = content.except(:contents)
|
||||
|
||||
truncated_version = expected_messages(content[:contents], opts)
|
||||
|
||||
truncated_version.last[
|
||||
:content
|
||||
] = "Summarize the following in 400 words. Keep the summary in the same language used in the text below.\n(1 asd said: This is a"
|
||||
|
||||
OpenAiCompletionsInferenceStubs.stub_response(truncated_version, "truncated summary")
|
||||
|
||||
expect(model.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary")
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,28 +1,35 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require_relative "../../../../support/summarization/dummy_completion_model"
|
||||
|
||||
RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
|
||||
describe "#summarize" do
|
||||
subject(:strategy) { described_class.new(model) }
|
||||
|
||||
let(:summarize_text) { "This is a text" }
|
||||
let(:model) { DummyCompletionModel.new(model_tokens) }
|
||||
let(:model_tokens) do
|
||||
# Make sure each content fits in a single chunk.
|
||||
DiscourseAi::Tokenizer::BertTokenizer.size("(1 asd said: This is a text ") + 3
|
||||
# 700 is the number of tokens reserved for the prompt.
|
||||
700 + DiscourseAi::Tokenizer::OpenAiTokenizer.size("(1 asd said: This is a text ") + 3
|
||||
end
|
||||
|
||||
let(:user) { User.new }
|
||||
let(:model) do
|
||||
DiscourseAi::Summarization::Models::OpenAi.new("gpt-4", max_tokens: model_tokens)
|
||||
end
|
||||
|
||||
let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } }
|
||||
|
||||
let(:single_summary) { "this is a single summary" }
|
||||
let(:concatenated_summary) { "this is a concatenated summary" }
|
||||
|
||||
let(:user) { User.new }
|
||||
|
||||
context "when the content to summarize fits in a single call" do
|
||||
it "does one call to summarize content" do
|
||||
result = strategy.summarize(content, user)
|
||||
result =
|
||||
DiscourseAi::Completions::LLM.with_prepared_responses([single_summary]) do |spy|
|
||||
strategy.summarize(content, user).tap { expect(spy.completions).to eq(1) }
|
||||
end
|
||||
|
||||
expect(model.summarization_calls).to eq(1)
|
||||
expect(result[:summary]).to eq(DummyCompletionModel::SINGLE_SUMMARY)
|
||||
expect(result[:summary]).to eq(single_summary)
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -30,10 +37,12 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do
|
|||
it "summarizes each chunk and then concatenates them" do
|
||||
content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
|
||||
|
||||
result = strategy.summarize(content, user)
|
||||
result =
|
||||
DiscourseAi::Completions::LLM.with_prepared_responses(
|
||||
[single_summary, single_summary, concatenated_summary],
|
||||
) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(3) } }
|
||||
|
||||
expect(model.summarization_calls).to eq(3)
|
||||
expect(result[:summary]).to eq(DummyCompletionModel::CONCATENATED_SUMMARIES)
|
||||
expect(result[:summary]).to eq(concatenated_summary)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,28 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require_relative "../../../../support/summarization/dummy_completion_model"
|
||||
|
||||
RSpec.describe DiscourseAi::Summarization::Strategies::TruncateContent do
|
||||
describe "#summarize" do
|
||||
subject(:strategy) { described_class.new(model) }
|
||||
|
||||
let(:summarize_text) { "This is a text" }
|
||||
let(:model_tokens) { summarize_text.length }
|
||||
let(:model) { DummyCompletionModel.new(model_tokens) }
|
||||
|
||||
let(:user) { User.new }
|
||||
|
||||
let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } }
|
||||
|
||||
context "when the content to summarize doesn't fit in a single call" do
|
||||
it "summarizes a truncated version" do
|
||||
content[:contents] << { poster: "asd2", id: 2, text: summarize_text }
|
||||
|
||||
result = strategy.summarize(content, user)
|
||||
|
||||
expect(model.summarization_calls).to eq(1)
|
||||
expect(result[:summary]).to eq(DummyCompletionModel::SINGLE_SUMMARY)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,46 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class DummyCompletionModel
|
||||
SINGLE_SUMMARY = "this is a single summary"
|
||||
CONCATENATED_SUMMARIES = "this is a concatenated summary"
|
||||
|
||||
def initialize(max_tokens)
|
||||
@summarization_calls = 0
|
||||
@available_tokens = max_tokens
|
||||
end
|
||||
|
||||
attr_reader :max_length, :summarization_calls, :available_tokens
|
||||
|
||||
delegate :can_expand_tokens?, to: :tokenizer
|
||||
|
||||
def summarize_single(single_chunk, opts)
|
||||
@summarization_calls += 1
|
||||
SINGLE_SUMMARY
|
||||
end
|
||||
|
||||
def summarize_in_chunks(chunks, opts)
|
||||
chunks.map do |chunk|
|
||||
chunk[:summary] = SINGLE_SUMMARY
|
||||
@summarization_calls += 1
|
||||
chunk
|
||||
end
|
||||
end
|
||||
|
||||
def concatenate_summaries(summaries)
|
||||
@summarization_calls += 1
|
||||
CONCATENATED_SUMMARIES
|
||||
end
|
||||
|
||||
def summarize_with_truncation(_contents, _opts)
|
||||
@summarization_calls += 1
|
||||
SINGLE_SUMMARY
|
||||
end
|
||||
|
||||
def format_content_item(item)
|
||||
"(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
|
||||
end
|
||||
|
||||
def tokenizer
|
||||
DiscourseAi::Tokenizer::BertTokenizer
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue