2023-11-23 10:58:54 -05:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module DiscourseAi
|
|
|
|
module Completions
|
|
|
|
module Dialects
|
2023-12-18 16:06:01 -05:00
|
|
|
class ChatGpt < Dialect
|
|
|
|
class << self
|
|
|
|
def can_translate?(model_name)
|
2023-12-18 20:04:15 -05:00
|
|
|
%w[
|
|
|
|
gpt-3.5-turbo
|
|
|
|
gpt-4
|
|
|
|
gpt-3.5-turbo-16k
|
|
|
|
gpt-4-32k
|
2024-01-25 17:08:02 -05:00
|
|
|
gpt-4-0125-preview
|
2023-12-18 20:04:15 -05:00
|
|
|
gpt-4-turbo
|
|
|
|
].include?(model_name)
|
2023-12-18 16:06:01 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def tokenizer
|
|
|
|
DiscourseAi::Tokenizer::OpenAiTokenizer
|
|
|
|
end
|
2023-11-23 10:58:54 -05:00
|
|
|
end
|
|
|
|
|
2024-01-15 21:48:00 -05:00
|
|
|
VALID_ID_REGEX = /\A[a-zA-Z0-9_]+\z/
|
|
|
|
|
2023-12-18 16:06:01 -05:00
|
|
|
def translate
|
2024-01-12 12:36:44 -05:00
|
|
|
messages = prompt.messages
|
2023-11-23 10:58:54 -05:00
|
|
|
|
2024-01-12 12:36:44 -05:00
|
|
|
# ChatGPT doesn't use an assistant msg to improve long-context responses.
|
2024-01-15 02:51:14 -05:00
|
|
|
if messages.last[:type] == :model
|
|
|
|
messages = messages.dup
|
|
|
|
messages.pop
|
|
|
|
end
|
2023-11-23 10:58:54 -05:00
|
|
|
|
2024-01-12 12:36:44 -05:00
|
|
|
trimmed_messages = trim_messages(messages)
|
2023-12-18 16:06:01 -05:00
|
|
|
|
2024-01-15 21:48:00 -05:00
|
|
|
embed_user_ids =
|
|
|
|
trimmed_messages.any? do |m|
|
|
|
|
m[:id] && m[:type] == :user && !m[:id].to_s.match?(VALID_ID_REGEX)
|
|
|
|
end
|
|
|
|
|
2024-01-12 12:36:44 -05:00
|
|
|
trimmed_messages.map do |msg|
|
|
|
|
if msg[:type] == :system
|
|
|
|
{ role: "system", content: msg[:content] }
|
|
|
|
elsif msg[:type] == :model
|
|
|
|
{ role: "assistant", content: msg[:content] }
|
|
|
|
elsif msg[:type] == :tool_call
|
|
|
|
call_details = JSON.parse(msg[:content], symbolize_names: true)
|
|
|
|
call_details[:arguments] = call_details[:arguments].to_json
|
2023-12-18 16:06:01 -05:00
|
|
|
|
2024-01-12 12:36:44 -05:00
|
|
|
{
|
|
|
|
role: "assistant",
|
|
|
|
content: nil,
|
|
|
|
tool_calls: [{ type: "function", function: call_details, id: msg[:id] }],
|
|
|
|
}
|
|
|
|
elsif msg[:type] == :tool
|
|
|
|
{ role: "tool", tool_call_id: msg[:id], content: msg[:content] }
|
|
|
|
else
|
2024-01-15 21:48:00 -05:00
|
|
|
user_message = { role: "user", content: msg[:content] }
|
|
|
|
if msg[:id]
|
|
|
|
if embed_user_ids
|
|
|
|
user_message[:content] = "#{msg[:id]}: #{msg[:content]}"
|
|
|
|
else
|
|
|
|
user_message[:name] = msg[:id]
|
|
|
|
end
|
2024-01-12 12:36:44 -05:00
|
|
|
end
|
2024-01-15 21:48:00 -05:00
|
|
|
user_message
|
2024-01-12 12:36:44 -05:00
|
|
|
end
|
|
|
|
end
|
2023-12-18 16:06:01 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def tools
|
2024-01-12 12:36:44 -05:00
|
|
|
prompt.tools.map do |t|
|
2024-01-02 09:21:13 -05:00
|
|
|
tool = t.dup
|
|
|
|
|
2024-01-04 08:44:07 -05:00
|
|
|
tool[:parameters] = t[:parameters]
|
|
|
|
.to_a
|
|
|
|
.reduce({ type: "object", properties: {}, required: [] }) do |memo, p|
|
2024-01-02 09:21:13 -05:00
|
|
|
name = p[:name]
|
|
|
|
memo[:required] << name if p[:required]
|
|
|
|
|
|
|
|
memo[:properties][name] = p.except(:name, :required, :item_type)
|
|
|
|
|
|
|
|
memo[:properties][name][:items] = { type: p[:item_type] } if p[:item_type]
|
|
|
|
memo
|
|
|
|
end
|
|
|
|
|
|
|
|
{ type: "function", function: tool }
|
|
|
|
end
|
2023-11-23 10:58:54 -05:00
|
|
|
end
|
|
|
|
|
2023-12-18 16:06:01 -05:00
|
|
|
def max_prompt_tokens
|
|
|
|
# provide a buffer of 120 tokens - our function counting is not
|
|
|
|
# 100% accurate and getting numbers to align exactly is very hard
|
2024-01-04 07:53:47 -05:00
|
|
|
buffer = (opts[:max_tokens] || 2500) + 50
|
2023-12-18 16:06:01 -05:00
|
|
|
|
|
|
|
if tools.present?
|
|
|
|
# note this is about 100 tokens over, OpenAI have a more optimal representation
|
|
|
|
@function_size ||= self.class.tokenizer.size(tools.to_json.to_s)
|
|
|
|
buffer += @function_size
|
|
|
|
end
|
|
|
|
|
|
|
|
model_max_tokens - buffer
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def per_message_overhead
|
|
|
|
# open ai defines about 4 tokens per message of overhead
|
|
|
|
4
|
|
|
|
end
|
|
|
|
|
|
|
|
def calculate_message_token(context)
|
|
|
|
self.class.tokenizer.size(context[:content].to_s + context[:name].to_s)
|
|
|
|
end
|
|
|
|
|
|
|
|
def model_max_tokens
|
|
|
|
case model_name
|
2024-01-02 09:21:13 -05:00
|
|
|
when "gpt-3.5-turbo-16k"
|
2023-12-18 16:06:01 -05:00
|
|
|
16_384
|
|
|
|
when "gpt-4"
|
|
|
|
8192
|
|
|
|
when "gpt-4-32k"
|
|
|
|
32_768
|
2024-01-25 17:08:02 -05:00
|
|
|
when "gpt-4-0125-preview"
|
2024-01-15 21:48:00 -05:00
|
|
|
131_072
|
|
|
|
when "gpt-4-turbo"
|
|
|
|
131_072
|
2023-12-18 16:06:01 -05:00
|
|
|
else
|
|
|
|
8192
|
|
|
|
end
|
2023-11-23 10:58:54 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|