2023-12-18 18:06:01 -03:00
# frozen_string_literal: true
module DiscourseAi
module Completions
module Dialects
class Dialect
class << self
2024-12-06 07:45:58 +11:00
def can_translate? ( llm_model )
2023-12-18 18:06:01 -03:00
raise NotImplemented
end
2024-05-13 12:46:42 -03:00
def all_dialects
[
2023-12-18 18:06:01 -03:00
DiscourseAi :: Completions :: Dialects :: ChatGpt ,
DiscourseAi :: Completions :: Dialects :: Gemini ,
2024-03-19 06:48:46 +11:00
DiscourseAi :: Completions :: Dialects :: Claude ,
2024-04-11 07:24:17 +10:00
DiscourseAi :: Completions :: Dialects :: Command ,
2024-10-01 10:45:03 +10:00
DiscourseAi :: Completions :: Dialects :: Ollama ,
2024-11-19 17:28:09 +11:00
DiscourseAi :: Completions :: Dialects :: Mistral ,
2024-12-06 07:45:58 +11:00
DiscourseAi :: Completions :: Dialects :: Nova ,
2024-05-13 15:54:42 -03:00
DiscourseAi :: Completions :: Dialects :: OpenAiCompatible ,
2023-12-18 18:06:01 -03:00
]
2024-05-13 12:46:42 -03:00
end
2024-12-06 07:45:58 +11:00
def dialect_for ( llm_model )
2024-05-13 17:28:27 -03:00
dialects = [ ]
2024-01-11 15:56:40 +11:00
if Rails . env . test? || Rails . env . development?
2024-05-13 15:54:42 -03:00
dialects = [ DiscourseAi :: Completions :: Dialects :: Fake ]
2024-01-11 15:56:40 +11:00
end
2024-05-13 15:54:42 -03:00
dialects = dialects . concat ( all_dialects )
2024-12-06 07:45:58 +11:00
dialect = dialects . find { | d | d . can_translate? ( llm_model ) }
2023-12-19 12:04:15 +11:00
raise DiscourseAi :: Completions :: Llm :: UNKNOWN_MODEL if ! dialect
2024-05-13 15:54:42 -03:00
2023-12-19 12:04:15 +11:00
dialect
2023-12-18 18:06:01 -03:00
end
end
2024-07-30 13:44:57 -03:00
def initialize ( generic_prompt , llm_model , opts : { } )
2023-12-18 18:06:01 -03:00
@prompt = generic_prompt
@opts = opts
2024-05-16 09:50:22 -03:00
@llm_model = llm_model
2023-12-18 18:06:01 -03:00
end
2024-05-07 10:02:16 -03:00
VALID_ID_REGEX = / \ A[a-zA-Z0-9_]+ \ z /
2023-12-18 18:06:01 -03:00
2024-05-07 10:02:16 -03:00
def native_tool_support?
false
2024-03-06 06:04:37 +11:00
end
2024-07-24 16:29:47 -03:00
def vision_support?
2024-07-30 13:44:57 -03:00
llm_model . vision_enabled?
2024-07-24 16:29:47 -03:00
end
2023-12-18 18:06:01 -03:00
def tools
2024-05-07 10:02:16 -03:00
@tools || = tools_dialect . translated_tools
end
2024-10-05 08:46:57 +09:00
def tool_choice
prompt . tool_choice
end
2025-03-25 08:06:43 +11:00
def self . no_more_tool_calls_text
# note, Anthropic must never prefill with an ending whitespace
" I WILL NOT USE TOOLS IN THIS REPLY, user expressed they wanted to stop using tool calls. \n Here is the best, complete, answer I can come up with given the information I have. "
end
def self . no_more_tool_calls_text_user
" DO NOT USE TOOLS IN YOUR REPLY. Return the best answer you can given the information I supplied you. "
end
def no_more_tool_calls_text
self . class . no_more_tool_calls_text
end
def no_more_tool_calls_text_user
self . class . no_more_tool_calls_text_user
end
2024-05-07 10:02:16 -03:00
def translate
2025-03-25 08:06:43 +11:00
messages = trim_messages ( prompt . messages )
last_message = messages . last
inject_done_on_last_tool_call = false
2023-12-18 18:06:01 -03:00
2025-03-25 08:06:43 +11:00
if ! native_tool_support? && last_message && last_message [ :type ] . to_sym == :tool &&
prompt . tool_choice == :none
inject_done_on_last_tool_call = true
2023-12-18 18:06:01 -03:00
end
2025-03-25 08:06:43 +11:00
translated =
messages
. map do | msg |
case msg [ :type ] . to_sym
when :system
system_msg ( msg )
when :user
user_msg ( msg )
when :model
model_msg ( msg )
when :tool
if inject_done_on_last_tool_call && msg == last_message
tools_dialect . inject_done { tool_msg ( msg ) }
else
tool_msg ( msg )
end
when :tool_call
tool_call_msg ( msg )
else
raise ArgumentError , " Unknown message type: #{ msg [ :type ] } "
end
end
. compact
translated
2023-12-18 18:06:01 -03:00
end
def conversation_context
raise NotImplemented
end
def max_prompt_tokens
raise NotImplemented
end
2024-03-02 07:53:21 +11:00
attr_reader :prompt
2023-12-18 18:06:01 -03:00
private
2024-07-30 13:44:57 -03:00
attr_reader :opts , :llm_model
2023-12-18 18:06:01 -03:00
2024-01-12 14:36:44 -03:00
def trim_messages ( messages )
2023-12-18 18:06:01 -03:00
prompt_limit = max_prompt_tokens
2024-01-12 14:36:44 -03:00
current_token_count = 0
2024-07-30 13:44:57 -03:00
message_step_size = ( prompt_limit / 25 ) . to_i * - 1
2023-12-18 18:06:01 -03:00
2024-01-15 18:51:14 +11:00
trimmed_messages = [ ]
2023-12-18 18:06:01 -03:00
2024-01-15 18:51:14 +11:00
range = ( 0 .. - 1 )
if messages . dig ( 0 , :type ) == :system
2024-07-12 15:09:01 -03:00
max_system_tokens = prompt_limit * 0 . 6
2024-01-15 18:51:14 +11:00
system_message = messages [ 0 ]
2024-07-12 15:09:01 -03:00
system_size = calculate_message_token ( system_message )
if system_size > max_system_tokens
system_message [ :content ] = tokenizer . truncate (
system_message [ :content ] ,
max_system_tokens ,
)
end
2024-01-15 18:51:14 +11:00
trimmed_messages << system_message
current_token_count += calculate_message_token ( system_message )
range = ( 1 .. - 1 )
end
2024-01-04 10:44:07 -03:00
2024-01-15 18:51:14 +11:00
reversed_trimmed_msgs = [ ]
2023-12-18 18:06:01 -03:00
2024-01-15 18:51:14 +11:00
messages [ range ] . reverse . each do | msg |
break if current_token_count > = prompt_limit
2023-12-18 18:06:01 -03:00
2024-01-15 18:51:14 +11:00
message_tokens = calculate_message_token ( msg )
2023-12-18 18:06:01 -03:00
2024-01-15 18:51:14 +11:00
dupped_msg = msg . dup
2023-12-18 18:06:01 -03:00
2024-01-15 18:51:14 +11:00
# Don't trim tool call metadata.
if msg [ :type ] == :tool_call
break if current_token_count + message_tokens + per_message_overhead > prompt_limit
current_token_count += message_tokens + per_message_overhead
reversed_trimmed_msgs << dupped_msg
next
end
# Trimming content to make sure we respect token limit.
while dupped_msg [ :content ] . present? &&
message_tokens + current_token_count + per_message_overhead > prompt_limit
dupped_msg [ :content ] = dupped_msg [ :content ] [ 0 .. message_step_size ] || " "
message_tokens = calculate_message_token ( dupped_msg )
end
next if dupped_msg [ :content ] . blank?
current_token_count += message_tokens + per_message_overhead
reversed_trimmed_msgs << dupped_msg
end
2023-12-18 18:06:01 -03:00
2024-01-16 13:48:00 +11:00
reversed_trimmed_msgs . pop if reversed_trimmed_msgs . last & . dig ( :type ) == :tool
2024-01-15 18:51:14 +11:00
trimmed_messages . concat ( reversed_trimmed_msgs . reverse )
2023-12-18 18:06:01 -03:00
end
def per_message_overhead
0
end
2024-01-12 14:36:44 -03:00
def calculate_message_token ( msg )
2024-07-30 13:44:57 -03:00
llm_model . tokenizer_class . size ( msg [ :content ] . to_s )
2024-01-09 00:28:03 +11:00
end
2024-05-07 10:02:16 -03:00
def tools_dialect
@tools_dialect || = DiscourseAi :: Completions :: Dialects :: XmlTools . new ( prompt . tools )
end
def system_msg ( msg )
raise NotImplemented
end
2024-11-19 09:22:39 +11:00
def model_msg ( msg )
2024-05-07 10:02:16 -03:00
raise NotImplemented
end
def user_msg ( msg )
raise NotImplemented
end
def tool_call_msg ( msg )
2024-11-19 09:22:39 +11:00
new_content = tools_dialect . from_raw_tool_call ( msg )
msg = msg . merge ( content : new_content )
model_msg ( msg )
2024-05-07 10:02:16 -03:00
end
def tool_msg ( msg )
2024-11-19 09:22:39 +11:00
new_content = tools_dialect . from_raw_tool ( msg )
msg = msg . merge ( content : new_content )
user_msg ( msg )
2024-05-07 10:02:16 -03:00
end
2023-12-18 18:06:01 -03:00
end
end
end
end