# frozen_string_literal: true module DiscourseAi module Completions module Dialects class Dialect class << self def can_translate?(_model_name) raise NotImplemented end def dialect_for(model_name) dialects = [ DiscourseAi::Completions::Dialects::Claude, DiscourseAi::Completions::Dialects::Llama2Classic, DiscourseAi::Completions::Dialects::ChatGpt, DiscourseAi::Completions::Dialects::OrcaStyle, DiscourseAi::Completions::Dialects::Gemini, DiscourseAi::Completions::Dialects::Mixtral, ] if Rails.env.test? || Rails.env.development? dialects << DiscourseAi::Completions::Dialects::Fake end dialect = dialects.find { |d| d.can_translate?(model_name) } raise DiscourseAi::Completions::Llm::UNKNOWN_MODEL if !dialect dialect end def tokenizer raise NotImplemented end def tool_preamble <<~TEXT In this environment you have access to a set of tools you can use to answer the user's question. You may call them like this. Only invoke one function at a time and wait for the results before invoking another function: $TOOL_NAME <$PARAMETER_NAME>$PARAMETER_VALUE ... if a parameter type is an array, return a JSON array of values. For example: [1,"two",3.0] Here are the tools available: TEXT end end def initialize(generic_prompt, model_name, opts: {}) @prompt = generic_prompt @model_name = model_name @opts = opts end def translate raise NotImplemented end def tools tools = +"" prompt.tools.each do |function| parameters = +"" if function[:parameters].present? function[:parameters].each do |parameter| parameters << <<~PARAMETER #{parameter[:name]} #{parameter[:type]} #{parameter[:description]} #{parameter[:required]} PARAMETER if parameter[:enum] parameters << "#{parameter[:enum].join(",")}\n" end parameters << "\n" end end tools << <<~TOOLS #{function[:name]} #{function[:description]} #{parameters} TOOLS end tools end def conversation_context raise NotImplemented end def max_prompt_tokens raise NotImplemented end private attr_reader :prompt, :model_name, :opts def trim_messages(messages) prompt_limit = max_prompt_tokens current_token_count = 0 message_step_size = (max_prompt_tokens / 25).to_i * -1 trimmed_messages = [] range = (0..-1) if messages.dig(0, :type) == :system system_message = messages[0] trimmed_messages << system_message current_token_count += calculate_message_token(system_message) range = (1..-1) end reversed_trimmed_msgs = [] messages[range].reverse.each do |msg| break if current_token_count >= prompt_limit message_tokens = calculate_message_token(msg) dupped_msg = msg.dup # Don't trim tool call metadata. if msg[:type] == :tool_call break if current_token_count + message_tokens + per_message_overhead > prompt_limit current_token_count += message_tokens + per_message_overhead reversed_trimmed_msgs << dupped_msg next end # Trimming content to make sure we respect token limit. while dupped_msg[:content].present? && message_tokens + current_token_count + per_message_overhead > prompt_limit dupped_msg[:content] = dupped_msg[:content][0..message_step_size] || "" message_tokens = calculate_message_token(dupped_msg) end next if dupped_msg[:content].blank? current_token_count += message_tokens + per_message_overhead reversed_trimmed_msgs << dupped_msg end reversed_trimmed_msgs.pop if reversed_trimmed_msgs.last&.dig(:type) == :tool trimmed_messages.concat(reversed_trimmed_msgs.reverse) end def per_message_overhead 0 end def calculate_message_token(msg) self.class.tokenizer.size(msg[:content].to_s) end def build_tools_prompt return "" if prompt.tools.blank? (<<~TEXT).strip #{self.class.tool_preamble} #{tools} TEXT end end end end end