# frozen_string_literal: true
module DiscourseAi
module Completions
module Dialects
class Dialect
class << self
def can_translate?(_model_name)
raise NotImplemented
end
def dialect_for(model_name)
dialects = [
DiscourseAi::Completions::Dialects::Claude,
DiscourseAi::Completions::Dialects::Llama2Classic,
DiscourseAi::Completions::Dialects::ChatGpt,
DiscourseAi::Completions::Dialects::OrcaStyle,
DiscourseAi::Completions::Dialects::Gemini,
DiscourseAi::Completions::Dialects::Mixtral,
]
dialect = dialects.find { |d| d.can_translate?(model_name) }
raise DiscourseAi::Completions::Llm::UNKNOWN_MODEL if !dialect
dialect
end
def tokenizer
raise NotImplemented
end
end
def initialize(generic_prompt, model_name, opts: {})
@prompt = generic_prompt
@model_name = model_name
@opts = opts
end
def translate
raise NotImplemented
end
def tools
tools = +""
prompt[:tools].each do |function|
parameters = +""
if function[:parameters].present?
function[:parameters].each do |parameter|
parameters << <<~PARAMETER
#{parameter[:name]}
#{parameter[:type]}
#{parameter[:description]}
#{parameter[:required]}
PARAMETER
if parameter[:enum]
parameters << "#{parameter[:enum].join(",")}\n"
end
parameters << "\n"
end
end
tools << <<~TOOLS
#{function[:name]}
#{function[:description]}
#{parameters}
TOOLS
end
tools
end
def conversation_context
raise NotImplemented
end
def max_prompt_tokens
raise NotImplemented
end
private
attr_reader :prompt, :model_name, :opts
def trim_context(conversation_context)
prompt_limit = max_prompt_tokens
current_token_count = calculate_token_count_without_context
message_step_size = (max_prompt_tokens / 25).to_i * -1
conversation_context.reduce([]) do |memo, context|
break(memo) if current_token_count >= prompt_limit
dupped_context = context.dup
message_tokens = calculate_message_token(dupped_context)
# Trimming content to make sure we respect token limit.
while dupped_context[:content].present? &&
message_tokens + current_token_count + per_message_overhead > prompt_limit
dupped_context[:content] = dupped_context[:content][0..message_step_size] || ""
message_tokens = calculate_message_token(dupped_context)
end
next(memo) if dupped_context[:content].blank?
current_token_count += message_tokens + per_message_overhead
memo << dupped_context
end
end
def calculate_token_count_without_context
tokenizer = self.class.tokenizer
examples_count =
prompt[:examples].to_a.sum do |pair|
tokenizer.size(pair.join) + (per_message_overhead * 2)
end
input_count = tokenizer.size(prompt[:input].to_s) + per_message_overhead
examples_count + input_count +
prompt
.except(:conversation_context, :tools, :examples, :input)
.sum { |_, v| tokenizer.size(v) + per_message_overhead }
end
def per_message_overhead
0
end
def calculate_message_token(context)
self.class.tokenizer.size(context[:content].to_s)
end
def build_tools_prompt
return "" if prompt[:tools].blank?
<<~TEXT
In this environment you have access to a set of tools you can use to answer the user's question.
You may call them like this. Only invoke one function at a time and wait for the results before invoking another function:
$TOOL_NAME
<$PARAMETER_NAME>$PARAMETER_VALUE$PARAMETER_NAME>
...
Here are the tools available:
#{tools}
TEXT
end
end
end
end
end