discourse-ai/lib/shared/inference/anthropic_completions.rb
Sam 181113159b
FIX: setting explorer was exceeding token budget
This refactor changes it so we only include minimal data in the
system prompt which leaves us lots of tokens for specific searches

The new search command allows us to pull in settings on demand

Descriptions are include in short search results, and names only
in longer results

Also: 

* In dev it is important to tell when calls are made to open ai
this adds a console log to increase awareness around token usage

* PERF: stop counting tokens so often

This changes it so we only count tokens once per response

Previously each time we heard back from open ai we would count
tokens, leading to uneeded delays

* bug fix, commands may reach in for tokenizer

* add logging to console for anthropic calls as well

* Update lib/shared/inference/openai_completions.rb

Co-authored-by: Martin Brennan <mjrbrennan@gmail.com>
2023-09-01 11:48:51 +10:00

135 lines
4.0 KiB
Ruby

# frozen_string_literal: true
module ::DiscourseAi
module Inference
class AnthropicCompletions
CompletionFailed = Class.new(StandardError)
TIMEOUT = 60
def self.perform!(
prompt,
model = "claude-2",
temperature: nil,
top_p: nil,
max_tokens: nil,
user_id: nil
)
log = nil
response_data = +""
response_raw = +""
url = URI("https://api.anthropic.com/v1/complete")
headers = {
"anthropic-version" => "2023-06-01",
"x-api-key" => SiteSetting.ai_anthropic_api_key,
"content-type" => "application/json",
}
payload = { model: model, prompt: prompt }
payload[:top_p] = top_p if top_p
payload[:max_tokens_to_sample] = max_tokens || 2000
payload[:temperature] = temperature if temperature
payload[:stream] = true if block_given?
Net::HTTP.start(
url.host,
url.port,
use_ssl: true,
read_timeout: TIMEOUT,
open_timeout: TIMEOUT,
write_timeout: TIMEOUT,
) do |http|
request = Net::HTTP::Post.new(url, headers)
request_body = payload.to_json
request.body = request_body
http.request(request) do |response|
if response.code.to_i != 200
Rails.logger.error(
"AnthropicCompletions: status: #{response.code.to_i} - body: #{response.body}",
)
raise CompletionFailed
end
log =
AiApiAuditLog.create!(
provider_id: AiApiAuditLog::Provider::Anthropic,
raw_request_payload: request_body,
user_id: user_id,
)
if !block_given?
response_body = response.read_body
parsed_response = JSON.parse(response_body, symbolize_names: true)
log.update!(
raw_response_payload: response_body,
request_tokens: DiscourseAi::Tokenizer::AnthropicTokenizer.size(prompt),
response_tokens:
DiscourseAi::Tokenizer::AnthropicTokenizer.size(parsed_response[:completion]),
)
return parsed_response
end
begin
cancelled = false
cancel = lambda { cancelled = true }
response.read_body do |chunk|
if cancelled
http.finish
return
end
response_raw << chunk
chunk
.split("\n")
.each do |line|
data = line.split("data: ", 2)[1]
next if !data
if !cancelled
begin
partial = JSON.parse(data, symbolize_names: true)
response_data << partial[:completion].to_s
# ping has no data... do not yeild it
yield partial, cancel if partial[:completion]
rescue JSON::ParserError
nil
# TODO leftover chunk carry over to next
end
end
end
rescue IOError
raise if !cancelled
end
end
return response_data
end
ensure
if block_given?
log.update!(
raw_response_payload: response_raw,
request_tokens: DiscourseAi::Tokenizer::AnthropicTokenizer.size(prompt),
response_tokens: DiscourseAi::Tokenizer::AnthropicTokenizer.size(response_data),
)
end
if Rails.env.development? && log
puts "AnthropicCompletions: request_tokens #{log.request_tokens} response_tokens #{log.response_tokens}"
end
end
def self.try_parse(data)
JSON.parse(data, symbolize_names: true)
rescue JSON::ParserError
nil
end
end
end
end
end