2023-12-18 16:06:01 -05:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module DiscourseAi
|
|
|
|
module Completions
|
|
|
|
module Dialects
|
|
|
|
class Dialect
|
|
|
|
class << self
|
|
|
|
def can_translate?(_model_name)
|
|
|
|
raise NotImplemented
|
|
|
|
end
|
|
|
|
|
|
|
|
def dialect_for(model_name)
|
|
|
|
dialects = [
|
|
|
|
DiscourseAi::Completions::Dialects::ChatGpt,
|
|
|
|
DiscourseAi::Completions::Dialects::Gemini,
|
2024-05-07 09:02:16 -04:00
|
|
|
DiscourseAi::Completions::Dialects::Mistral,
|
2024-03-18 15:48:46 -04:00
|
|
|
DiscourseAi::Completions::Dialects::Claude,
|
2024-04-10 17:24:17 -04:00
|
|
|
DiscourseAi::Completions::Dialects::Command,
|
2023-12-18 16:06:01 -05:00
|
|
|
]
|
2023-12-18 20:04:15 -05:00
|
|
|
|
2024-01-10 23:56:40 -05:00
|
|
|
if Rails.env.test? || Rails.env.development?
|
|
|
|
dialects << DiscourseAi::Completions::Dialects::Fake
|
|
|
|
end
|
|
|
|
|
2023-12-18 20:04:15 -05:00
|
|
|
dialect = dialects.find { |d| d.can_translate?(model_name) }
|
|
|
|
raise DiscourseAi::Completions::Llm::UNKNOWN_MODEL if !dialect
|
|
|
|
dialect
|
2023-12-18 16:06:01 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def tokenizer
|
|
|
|
raise NotImplemented
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def initialize(generic_prompt, model_name, opts: {})
|
|
|
|
@prompt = generic_prompt
|
|
|
|
@model_name = model_name
|
|
|
|
@opts = opts
|
|
|
|
end
|
|
|
|
|
2024-05-07 09:02:16 -04:00
|
|
|
VALID_ID_REGEX = /\A[a-zA-Z0-9_]+\z/
|
2023-12-18 16:06:01 -05:00
|
|
|
|
2024-05-07 09:02:16 -04:00
|
|
|
def can_end_with_assistant_msg?
|
|
|
|
false
|
2024-03-05 14:04:37 -05:00
|
|
|
end
|
|
|
|
|
2024-05-07 09:02:16 -04:00
|
|
|
def native_tool_support?
|
|
|
|
false
|
2024-03-05 14:04:37 -05:00
|
|
|
end
|
|
|
|
|
2023-12-18 16:06:01 -05:00
|
|
|
def tools
|
2024-05-07 09:02:16 -04:00
|
|
|
@tools ||= tools_dialect.translated_tools
|
|
|
|
end
|
|
|
|
|
|
|
|
def translate
|
|
|
|
messages = prompt.messages
|
2023-12-18 16:06:01 -05:00
|
|
|
|
2024-05-07 09:02:16 -04:00
|
|
|
# Some models use an assistant msg to improve long-context responses.
|
|
|
|
if messages.last[:type] == :model && can_end_with_assistant_msg?
|
|
|
|
messages = messages.dup
|
|
|
|
messages.pop
|
2023-12-18 16:06:01 -05:00
|
|
|
end
|
|
|
|
|
2024-05-07 09:02:16 -04:00
|
|
|
trim_messages(messages).map { |msg| send("#{msg[:type]}_msg", msg) }.compact
|
2023-12-18 16:06:01 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def conversation_context
|
|
|
|
raise NotImplemented
|
|
|
|
end
|
|
|
|
|
|
|
|
def max_prompt_tokens
|
|
|
|
raise NotImplemented
|
|
|
|
end
|
|
|
|
|
2024-03-01 15:53:21 -05:00
|
|
|
attr_reader :prompt
|
|
|
|
|
2023-12-18 16:06:01 -05:00
|
|
|
private
|
|
|
|
|
2024-03-01 15:53:21 -05:00
|
|
|
attr_reader :model_name, :opts
|
2023-12-18 16:06:01 -05:00
|
|
|
|
2024-01-12 12:36:44 -05:00
|
|
|
def trim_messages(messages)
|
2023-12-18 16:06:01 -05:00
|
|
|
prompt_limit = max_prompt_tokens
|
2024-01-12 12:36:44 -05:00
|
|
|
current_token_count = 0
|
2023-12-26 12:49:55 -05:00
|
|
|
message_step_size = (max_prompt_tokens / 25).to_i * -1
|
2023-12-18 16:06:01 -05:00
|
|
|
|
2024-01-15 02:51:14 -05:00
|
|
|
trimmed_messages = []
|
2023-12-18 16:06:01 -05:00
|
|
|
|
2024-01-15 02:51:14 -05:00
|
|
|
range = (0..-1)
|
|
|
|
if messages.dig(0, :type) == :system
|
|
|
|
system_message = messages[0]
|
|
|
|
trimmed_messages << system_message
|
|
|
|
current_token_count += calculate_message_token(system_message)
|
|
|
|
range = (1..-1)
|
|
|
|
end
|
2024-01-04 08:44:07 -05:00
|
|
|
|
2024-01-15 02:51:14 -05:00
|
|
|
reversed_trimmed_msgs = []
|
2023-12-18 16:06:01 -05:00
|
|
|
|
2024-01-15 02:51:14 -05:00
|
|
|
messages[range].reverse.each do |msg|
|
|
|
|
break if current_token_count >= prompt_limit
|
2023-12-18 16:06:01 -05:00
|
|
|
|
2024-01-15 02:51:14 -05:00
|
|
|
message_tokens = calculate_message_token(msg)
|
2023-12-18 16:06:01 -05:00
|
|
|
|
2024-01-15 02:51:14 -05:00
|
|
|
dupped_msg = msg.dup
|
2023-12-18 16:06:01 -05:00
|
|
|
|
2024-01-15 02:51:14 -05:00
|
|
|
# Don't trim tool call metadata.
|
|
|
|
if msg[:type] == :tool_call
|
|
|
|
break if current_token_count + message_tokens + per_message_overhead > prompt_limit
|
|
|
|
|
|
|
|
current_token_count += message_tokens + per_message_overhead
|
|
|
|
reversed_trimmed_msgs << dupped_msg
|
|
|
|
next
|
|
|
|
end
|
|
|
|
|
|
|
|
# Trimming content to make sure we respect token limit.
|
|
|
|
while dupped_msg[:content].present? &&
|
|
|
|
message_tokens + current_token_count + per_message_overhead > prompt_limit
|
|
|
|
dupped_msg[:content] = dupped_msg[:content][0..message_step_size] || ""
|
|
|
|
message_tokens = calculate_message_token(dupped_msg)
|
|
|
|
end
|
|
|
|
|
|
|
|
next if dupped_msg[:content].blank?
|
|
|
|
|
|
|
|
current_token_count += message_tokens + per_message_overhead
|
|
|
|
|
|
|
|
reversed_trimmed_msgs << dupped_msg
|
|
|
|
end
|
2023-12-18 16:06:01 -05:00
|
|
|
|
2024-01-15 21:48:00 -05:00
|
|
|
reversed_trimmed_msgs.pop if reversed_trimmed_msgs.last&.dig(:type) == :tool
|
|
|
|
|
2024-01-15 02:51:14 -05:00
|
|
|
trimmed_messages.concat(reversed_trimmed_msgs.reverse)
|
2023-12-18 16:06:01 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def per_message_overhead
|
|
|
|
0
|
|
|
|
end
|
|
|
|
|
2024-01-12 12:36:44 -05:00
|
|
|
def calculate_message_token(msg)
|
|
|
|
self.class.tokenizer.size(msg[:content].to_s)
|
2024-01-08 08:28:03 -05:00
|
|
|
end
|
2024-05-07 09:02:16 -04:00
|
|
|
|
|
|
|
def tools_dialect
|
|
|
|
@tools_dialect ||= DiscourseAi::Completions::Dialects::XmlTools.new(prompt.tools)
|
|
|
|
end
|
|
|
|
|
|
|
|
def system_msg(msg)
|
|
|
|
raise NotImplemented
|
|
|
|
end
|
|
|
|
|
|
|
|
def assistant_msg(msg)
|
|
|
|
raise NotImplemented
|
|
|
|
end
|
|
|
|
|
|
|
|
def user_msg(msg)
|
|
|
|
raise NotImplemented
|
|
|
|
end
|
|
|
|
|
|
|
|
def tool_call_msg(msg)
|
|
|
|
{ role: "assistant", content: tools_dialect.from_raw_tool_call(msg) }
|
|
|
|
end
|
|
|
|
|
|
|
|
def tool_msg(msg)
|
|
|
|
{ role: "user", content: tools_dialect.from_raw_tool(msg) }
|
|
|
|
end
|
2023-12-18 16:06:01 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|