# frozen_string_literal: true module DiscourseAi module Completions module Dialects class Dialect class << self def can_translate?(_model_name) raise NotImplemented end def dialect_for(model_name) dialects = [ DiscourseAi::Completions::Dialects::Claude, DiscourseAi::Completions::Dialects::Llama2Classic, DiscourseAi::Completions::Dialects::ChatGpt, DiscourseAi::Completions::Dialects::OrcaStyle, DiscourseAi::Completions::Dialects::Gemini, DiscourseAi::Completions::Dialects::Mixtral, ] dialect = dialects.find { |d| d.can_translate?(model_name) } raise DiscourseAi::Completions::Llm::UNKNOWN_MODEL if !dialect dialect end def tokenizer raise NotImplemented end end def initialize(generic_prompt, model_name, opts: {}) @prompt = generic_prompt @model_name = model_name @opts = opts end def translate raise NotImplemented end def tools tools = +"" prompt[:tools].each do |function| parameters = +"" if function[:parameters].present? function[:parameters].each do |parameter| parameters << <<~PARAMETER #{parameter[:name]} #{parameter[:type]} #{parameter[:description]} #{parameter[:required]} PARAMETER if parameter[:enum] parameters << "#{parameter[:enum].join(",")}\n" end parameters << "\n" end end tools << <<~TOOLS #{function[:name]} #{function[:description]} #{parameters} TOOLS end tools end def conversation_context raise NotImplemented end def max_prompt_tokens raise NotImplemented end private attr_reader :prompt, :model_name, :opts def trim_context(conversation_context) prompt_limit = max_prompt_tokens current_token_count = calculate_token_count_without_context message_step_size = (max_prompt_tokens / 25).to_i * -1 conversation_context.reduce([]) do |memo, context| break(memo) if current_token_count >= prompt_limit dupped_context = context.dup message_tokens = calculate_message_token(dupped_context) # Trimming content to make sure we respect token limit. while dupped_context[:content].present? && message_tokens + current_token_count + per_message_overhead > prompt_limit dupped_context[:content] = dupped_context[:content][0..message_step_size] || "" message_tokens = calculate_message_token(dupped_context) end next(memo) if dupped_context[:content].blank? current_token_count += message_tokens + per_message_overhead memo << dupped_context end end def calculate_token_count_without_context tokenizer = self.class.tokenizer examples_count = prompt[:examples].to_a.sum do |pair| tokenizer.size(pair.join) + (per_message_overhead * 2) end input_count = tokenizer.size(prompt[:input].to_s) + per_message_overhead examples_count + input_count + prompt .except(:conversation_context, :tools, :examples, :input) .sum { |_, v| tokenizer.size(v) + per_message_overhead } end def per_message_overhead 0 end def calculate_message_token(context) self.class.tokenizer.size(context[:content].to_s) end def build_tools_prompt return "" if prompt[:tools].blank? <<~TEXT In this environment you have access to a set of tools you can use to answer the user's question. You may call them like this. Only invoke one function at a time and wait for the results before invoking another function: $TOOL_NAME <$PARAMETER_NAME>$PARAMETER_VALUE ... Here are the tools available: #{tools} TEXT end end end end end