2023-04-10 10:04:42 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module ::DiscourseAi
|
|
|
|
module Inference
|
|
|
|
class AnthropicCompletions
|
|
|
|
CompletionFailed = Class.new(StandardError)
|
2023-05-11 09:03:03 -04:00
|
|
|
TIMEOUT = 60
|
2023-04-10 10:04:42 -04:00
|
|
|
|
2023-05-11 09:03:03 -04:00
|
|
|
def self.perform!(
|
|
|
|
prompt,
|
2023-07-13 11:32:08 -04:00
|
|
|
model = "claude-2",
|
2023-05-11 09:03:03 -04:00
|
|
|
temperature: nil,
|
|
|
|
top_p: nil,
|
|
|
|
max_tokens: nil,
|
2023-09-06 09:06:31 -04:00
|
|
|
user_id: nil,
|
2023-10-02 11:58:36 -04:00
|
|
|
stop_sequences: nil,
|
2023-10-31 17:41:31 -04:00
|
|
|
post: nil,
|
2023-10-02 11:58:36 -04:00
|
|
|
&blk
|
2023-05-11 09:03:03 -04:00
|
|
|
)
|
2023-10-02 11:58:36 -04:00
|
|
|
# HACK to get around the fact that they have different APIs
|
|
|
|
# we will introduce a proper LLM abstraction layer to handle this shenanigas later this year
|
|
|
|
if model == "claude-2" && SiteSetting.ai_bedrock_access_key_id.present? &&
|
|
|
|
SiteSetting.ai_bedrock_secret_access_key.present? &&
|
|
|
|
SiteSetting.ai_bedrock_region.present?
|
|
|
|
return(
|
|
|
|
AmazonBedrockInference.perform!(
|
|
|
|
prompt,
|
|
|
|
temperature: temperature,
|
|
|
|
top_p: top_p,
|
|
|
|
max_tokens: max_tokens,
|
|
|
|
user_id: user_id,
|
|
|
|
stop_sequences: stop_sequences,
|
|
|
|
&blk
|
|
|
|
)
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
2023-08-31 21:48:51 -04:00
|
|
|
log = nil
|
|
|
|
response_data = +""
|
|
|
|
response_raw = +""
|
|
|
|
|
2023-05-11 09:03:03 -04:00
|
|
|
url = URI("https://api.anthropic.com/v1/complete")
|
2023-04-10 10:04:42 -04:00
|
|
|
headers = {
|
2023-07-26 21:24:44 -04:00
|
|
|
"anthropic-version" => "2023-06-01",
|
2023-04-10 10:04:42 -04:00
|
|
|
"x-api-key" => SiteSetting.ai_anthropic_api_key,
|
2023-07-26 21:24:44 -04:00
|
|
|
"content-type" => "application/json",
|
2023-04-10 10:04:42 -04:00
|
|
|
}
|
|
|
|
|
2023-05-11 09:03:03 -04:00
|
|
|
payload = { model: model, prompt: prompt }
|
2023-04-10 10:04:42 -04:00
|
|
|
|
2023-05-11 09:03:03 -04:00
|
|
|
payload[:top_p] = top_p if top_p
|
2023-06-27 13:42:33 -04:00
|
|
|
payload[:max_tokens_to_sample] = max_tokens || 2000
|
|
|
|
payload[:temperature] = temperature if temperature
|
2023-05-11 09:03:03 -04:00
|
|
|
payload[:stream] = true if block_given?
|
2023-09-06 09:06:31 -04:00
|
|
|
payload[:stop_sequences] = stop_sequences if stop_sequences
|
2023-04-10 10:04:42 -04:00
|
|
|
|
2023-05-11 09:03:03 -04:00
|
|
|
Net::HTTP.start(
|
|
|
|
url.host,
|
|
|
|
url.port,
|
|
|
|
use_ssl: true,
|
|
|
|
read_timeout: TIMEOUT,
|
|
|
|
open_timeout: TIMEOUT,
|
|
|
|
write_timeout: TIMEOUT,
|
|
|
|
) do |http|
|
|
|
|
request = Net::HTTP::Post.new(url, headers)
|
|
|
|
request_body = payload.to_json
|
|
|
|
request.body = request_body
|
2023-04-10 10:04:42 -04:00
|
|
|
|
2023-05-11 09:03:03 -04:00
|
|
|
http.request(request) do |response|
|
|
|
|
if response.code.to_i != 200
|
|
|
|
Rails.logger.error(
|
|
|
|
"AnthropicCompletions: status: #{response.code.to_i} - body: #{response.body}",
|
|
|
|
)
|
|
|
|
raise CompletionFailed
|
|
|
|
end
|
|
|
|
|
|
|
|
log =
|
|
|
|
AiApiAuditLog.create!(
|
|
|
|
provider_id: AiApiAuditLog::Provider::Anthropic,
|
|
|
|
raw_request_payload: request_body,
|
|
|
|
user_id: user_id,
|
2023-10-31 17:41:31 -04:00
|
|
|
post_id: post&.id,
|
|
|
|
topic_id: post&.topic_id,
|
2023-05-11 09:03:03 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
if !block_given?
|
|
|
|
response_body = response.read_body
|
|
|
|
parsed_response = JSON.parse(response_body, symbolize_names: true)
|
|
|
|
|
|
|
|
log.update!(
|
|
|
|
raw_response_payload: response_body,
|
2023-05-15 14:10:42 -04:00
|
|
|
request_tokens: DiscourseAi::Tokenizer::AnthropicTokenizer.size(prompt),
|
|
|
|
response_tokens:
|
|
|
|
DiscourseAi::Tokenizer::AnthropicTokenizer.size(parsed_response[:completion]),
|
2023-05-11 09:03:03 -04:00
|
|
|
)
|
|
|
|
return parsed_response
|
|
|
|
end
|
|
|
|
|
|
|
|
begin
|
|
|
|
cancelled = false
|
|
|
|
cancel = lambda { cancelled = true }
|
2023-04-10 10:04:42 -04:00
|
|
|
|
2023-05-11 09:03:03 -04:00
|
|
|
response.read_body do |chunk|
|
|
|
|
if cancelled
|
|
|
|
http.finish
|
|
|
|
return
|
|
|
|
end
|
|
|
|
|
|
|
|
response_raw << chunk
|
|
|
|
|
|
|
|
chunk
|
|
|
|
.split("\n")
|
|
|
|
.each do |line|
|
|
|
|
data = line.split("data: ", 2)[1]
|
2023-07-26 21:24:44 -04:00
|
|
|
next if !data
|
2023-05-11 09:03:03 -04:00
|
|
|
|
2023-05-31 19:10:33 -04:00
|
|
|
if !cancelled
|
|
|
|
begin
|
|
|
|
partial = JSON.parse(data, symbolize_names: true)
|
2023-07-26 21:24:44 -04:00
|
|
|
response_data << partial[:completion].to_s
|
2023-05-11 09:03:03 -04:00
|
|
|
|
2023-07-26 21:24:44 -04:00
|
|
|
# ping has no data... do not yeild it
|
|
|
|
yield partial, cancel if partial[:completion]
|
2023-05-31 19:10:33 -04:00
|
|
|
rescue JSON::ParserError
|
|
|
|
nil
|
2023-07-26 21:24:44 -04:00
|
|
|
# TODO leftover chunk carry over to next
|
2023-05-31 19:10:33 -04:00
|
|
|
end
|
2023-05-11 09:03:03 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
rescue IOError
|
|
|
|
raise if !cancelled
|
|
|
|
end
|
|
|
|
end
|
2023-08-11 14:08:54 -04:00
|
|
|
|
|
|
|
return response_data
|
2023-05-11 09:03:03 -04:00
|
|
|
end
|
2023-08-31 21:48:51 -04:00
|
|
|
ensure
|
|
|
|
if block_given?
|
|
|
|
log.update!(
|
|
|
|
raw_response_payload: response_raw,
|
|
|
|
request_tokens: DiscourseAi::Tokenizer::AnthropicTokenizer.size(prompt),
|
|
|
|
response_tokens: DiscourseAi::Tokenizer::AnthropicTokenizer.size(response_data),
|
|
|
|
)
|
|
|
|
end
|
|
|
|
if Rails.env.development? && log
|
|
|
|
puts "AnthropicCompletions: request_tokens #{log.request_tokens} response_tokens #{log.response_tokens}"
|
|
|
|
end
|
2023-05-11 09:03:03 -04:00
|
|
|
end
|
2023-05-31 19:10:33 -04:00
|
|
|
|
|
|
|
def self.try_parse(data)
|
|
|
|
JSON.parse(data, symbolize_names: true)
|
|
|
|
rescue JSON::ParserError
|
|
|
|
nil
|
|
|
|
end
|
2023-04-10 10:04:42 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|