REFACTOR: Represent generic prompts with an Object. (#416)

* REFACTOR: Represent generic prompts with an Object. * Adds a bit more validation for clarity * Rewrite bot title prompt and fix quirk handling --------- Co-authored-by: Sam Saffron <sam.saffron@gmail.com>
2025-06-28 18:42:16 +00:00 · 2024-01-12 14:36:44 -03:00 · 2024-01-12 14:36:44 -03:00 · 04eae76f68
commit 04eae76f68
parent 705ef986b4
39 changed files with 880 additions and 1539 deletions
--- a/app/models/completion_prompt.rb
+++ b/app/models/completion_prompt.rb
@ -33,11 +33,18 @@ class CompletionPrompt < ActiveRecord::Base
        input
      end
-    messages_hash.merge(input: <<~TEXT)
+    instructions = [messages_hash[:insts], messages_hash[:post_insts].to_s].join("\n")
-    <input>
+
-    #{user_input}
+    prompt = DiscourseAi::Completions::Prompt.new(instructions)
-    </input>
+
-    TEXT
+    messages_hash[:examples].to_a do |example_pair|
      prompt.push(type: :user, content: example_pair.first)
      prompt.push(type: :model, content: example_pair.second)
    end
    prompt.push(type: :user, content: "<input>#{user_input}</input>")
    prompt
  end
  private
--- a/lib/ai_bot/bot.rb
+++ b/lib/ai_bot/bot.rb
@ -18,14 +18,19 @@ module DiscourseAi
      attr_reader :bot_user
      def get_updated_title(conversation_context, post_user)
-        title_prompt = { insts: <<~TEXT, conversation_context: conversation_context }
+        system_insts = <<~TEXT.strip
-          You are titlebot. Given a topic, you will figure out a title.
+        You are titlebot. Given a topic, you will figure out a title.
-          You will never respond with anything but 7 word topic title.
+        You will never respond with anything but 7 word topic title.
        TEXT
-        title_prompt[
+        title_prompt =
-          :input
+          DiscourseAi::Completions::Prompt.new(system_insts, messages: conversation_context)
-        ] = "Based on our previous conversation, suggest a 7 word title without quoting any of it."
+
        title_prompt.push(
          type: :user,
          content:
            "Based on our previous conversation, suggest a 7 word title without quoting any of it.",
        )
        DiscourseAi::Completions::Llm
          .proxy(model)
@ -57,27 +62,30 @@ module DiscourseAi
                tool_call_id = tool.tool_call_id
                invocation_result_json = invoke_tool(tool, llm, cancel, &update_blk).to_json
-                invocation_context = {
+                tool_call_message = {
-                  type: "tool",
+                  type: :tool_call,
-                  name: tool_call_id,
+                  id: tool_call_id,
                  content: invocation_result_json,
                }
                tool_context = {
                  type: "tool_call",
                  name: tool_call_id,
                  content: { name: tool.name, arguments: tool.parameters }.to_json,
                }
-                prompt[:conversation_context] ||= []
+                tool_message = { type: :tool, id: tool_call_id, content: invocation_result_json }
                if tool.standalone?
-                  prompt[:conversation_context] = [invocation_context, tool_context]
+                  standalone_conext =
                    context.dup.merge(
                      conversation_context: [
                        context[:conversation_context].last,
                        tool_call_message,
                        tool_message,
                      ],
                    )
                  prompt = persona.craft_prompt(standalone_conext)
                else
-                  prompt[:conversation_context] = [invocation_context, tool_context] +
+                  prompt.push(**tool_call_message)
-                    prompt[:conversation_context]
+                  prompt.push(**tool_message)
                end
-                raw_context << [tool_context[:content], tool_call_id, "tool_call"]
+                raw_context << [tool_call_message[:content], tool_call_id, "tool_call"]
                raw_context << [invocation_result_json, tool_call_id, "tool"]
              else
                update_blk.call(partial, cancel, nil)
@ -91,7 +99,7 @@ module DiscourseAi
          total_completions += 1
          # do not allow tools when we are at the end of a chain (total_completions == MAX_COMPLETIONS)
-          prompt.delete(:tools) if total_completions == MAX_COMPLETIONS
+          prompt.tools = [] if total_completions == MAX_COMPLETIONS
        end
        raw_context
--- a/lib/ai_bot/personas/persona.rb
+++ b/lib/ai_bot/personas/persona.rb
@ -100,17 +100,18 @@ module DiscourseAi
              found.nil? ? match : found.to_s
            end
-          insts = <<~TEXT
+          prompt =
-          #{system_insts}
+            DiscourseAi::Completions::Prompt.new(
-          #{available_tools.map(&:custom_system_message).compact_blank.join("\n")}
+              <<~TEXT.strip,
            #{system_insts}
            #{available_tools.map(&:custom_system_message).compact_blank.join("\n")}
          TEXT
              messages: context[:conversation_context].to_a,
            )
-          { insts: insts }.tap do |prompt|
+          prompt.tools = available_tools.map(&:signature) if available_tools
-            prompt[:tools] = available_tools.map(&:signature) if available_tools
+
-            prompt[:conversation_context] = context[:conversation_context] if context[
+          prompt
              :conversation_context
            ]
          end
        end
        def find_tool(partial)
--- a/lib/ai_bot/playground.rb
+++ b/lib/ai_bot/playground.rb
@ -36,8 +36,9 @@ module DiscourseAi
            .pluck(:raw, :username, "post_custom_prompts.custom_prompt")
        result = []
        first = true
-        context.each do |raw, username, custom_prompt|
+        context.reverse_each do |raw, username, custom_prompt|
          custom_prompt_translation =
            Proc.new do |message|
              # We can't keep backwards-compatibility for stored functions.
@ -45,27 +46,29 @@ module DiscourseAi
              if message[2] != "function"
                custom_context = {
                  content: message[0],
-                  type: message[2].present? ? message[2] : "assistant",
+                  type: message[2].present? ? message[2].to_sym : :model,
                }
-                custom_context[:name] = message[1] if custom_context[:type] != "assistant"
+                custom_context[:id] = message[1] if custom_context[:type] != :model
-                custom_context
+                result << custom_context
              end
            end
          if custom_prompt.present?
-            result << {
+            if first
-              type: "multi_turn",
+              custom_prompt.each(&custom_prompt_translation)
-              content: custom_prompt.reverse_each.map(&custom_prompt_translation).compact,
+              first = false
-            }
+            else
              custom_prompt.first(2).each(&custom_prompt_translation)
            end
          else
            context = {
              content: raw,
-              type: (available_bot_usernames.include?(username) ? "assistant" : "user"),
+              type: (available_bot_usernames.include?(username) ? :model : :user),
            }
-            context[:name] = clean_username(username) if context[:type] == "user"
+            context[:id] = username if context[:type] == :user
            result << context
          end
@ -208,16 +211,6 @@ module DiscourseAi
      def available_bot_usernames
        @bot_usernames ||= DiscourseAi::AiBot::EntryPoint::BOTS.map(&:second)
      end
      def clean_username(username)
        if username.match?(/\0[a-zA-Z0-9_-]{1,64}\z/)
          username
        else
          # not the best in the world, but this is what we have to work with
          # if sites enable unicode usernames this can get messy
          username.gsub(/[^a-zA-Z0-9_-]/, "_")[0..63]
        end
      end
    end
  end
 end
--- a/lib/ai_helper/assistant.rb
+++ b/lib/ai_helper/assistant.rb
@ -36,10 +36,10 @@ module DiscourseAi
      def generate_prompt(completion_prompt, input, user, &block)
        llm = DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_helper_model)
-        generic_prompt = completion_prompt.messages_with_input(input)
+        prompt = completion_prompt.messages_with_input(input)
        llm.generate(
-          generic_prompt,
+          prompt,
          user: user,
          temperature: completion_prompt.temperature,
          stop_sequences: completion_prompt.stop_sequences,
--- a/lib/ai_helper/painter.rb
+++ b/lib/ai_helper/painter.rb
@ -57,10 +57,14 @@ module DiscourseAi
      end
      def difussion_prompt(text, user)
-        prompt = { insts: <<~TEXT, input: text }
+        prompt =
          DiscourseAi::Completions::Prompt.new(
            <<~TEXT.strip,
          Provide me a StableDiffusion prompt to generate an image that illustrates the following post in 40 words or less, be creative.
          You'll find the post between <input></input> XML tags.
        TEXT
            messages: [{ type: :user, content: text, id: user.username }],
          )
        DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_helper_model).generate(
          prompt,
--- a/lib/automation/report_runner.rb
+++ b/lib/automation/report_runner.rb
@ -101,7 +101,7 @@ module DiscourseAi
            tokens_per_post: @tokens_per_post,
            tokenizer: @llm.tokenizer,
          )
-        input = <<~INPUT
+        input = <<~INPUT.strip
          #{@instructions}
          <context>
@ -111,11 +111,14 @@ module DiscourseAi
          #{@instructions}
        INPUT
-        prompt = {
+        prompt =
-          insts: "You are a helpful bot specializing in summarizing activity on Discourse sites",
+          DiscourseAi::Completions::Prompt.new(
-          input: input,
+            "You are a helpful bot specializing in summarizing activity on Discourse sites",
-          final_insts: "Here is the report I generated for you",
+            messages: [
-        }
+              { type: :user, content: input },
              { type: :model, content: "Here is the report I generated for you" },
            ],
          )
        result = +""
--- a/lib/completions/dialects/chat_gpt.rb
+++ b/lib/completions/dialects/chat_gpt.rb
@ -22,28 +22,39 @@ module DiscourseAi
        end
        def translate
-          open_ai_prompt = [
+          messages = prompt.messages
            { role: "system", content: [prompt[:insts], prompt[:post_insts].to_s].join("\n") },
          ]
-          if prompt[:examples]
+          # ChatGPT doesn't use an assistant msg to improve long-context responses.
-            prompt[:examples].each do |example_pair|
+          messages.pop if messages.last[:type] == :model
-              open_ai_prompt << { role: "user", content: example_pair.first }
+
-              open_ai_prompt << { role: "assistant", content: example_pair.second }
+          trimmed_messages = trim_messages(messages)
          trimmed_messages.map do |msg|
            if msg[:type] == :system
              { role: "system", content: msg[:content] }
            elsif msg[:type] == :model
              { role: "assistant", content: msg[:content] }
            elsif msg[:type] == :tool_call
              call_details = JSON.parse(msg[:content], symbolize_names: true)
              call_details[:arguments] = call_details[:arguments].to_json
              {
                role: "assistant",
                content: nil,
                tool_calls: [{ type: "function", function: call_details, id: msg[:id] }],
              }
            elsif msg[:type] == :tool
              { role: "tool", tool_call_id: msg[:id], content: msg[:content] }
            else
              { role: "user", content: msg[:content] }.tap do |user_msg|
                user_msg[:name] = msg[:id] if msg[:id]
              end
            end
          end
          open_ai_prompt.concat(conversation_context) if prompt[:conversation_context]
          open_ai_prompt << { role: "user", content: prompt[:input] } if prompt[:input]
          open_ai_prompt
        end
        def tools
-          return if prompt[:tools].blank?
+          prompt.tools.map do |t|
          prompt[:tools].map do |t|
            tool = t.dup
            tool[:parameters] = t[:parameters]
@ -62,39 +73,6 @@ module DiscourseAi
          end
        end
        def conversation_context
          return [] if prompt[:conversation_context].blank?
          flattened_context = flatten_context(prompt[:conversation_context])
          trimmed_context = trim_context(flattened_context)
          trimmed_context.reverse.map do |context|
            if context[:type] == "tool_call"
              function = JSON.parse(context[:content], symbolize_names: true)
              function[:arguments] = function[:arguments].to_json
              {
                role: "assistant",
                content: nil,
                tool_calls: [{ type: "function", function: function, id: context[:name] }],
              }
            else
              translated = context.slice(:content)
              translated[:role] = context[:type]
              if context[:name]
                if translated[:role] == "tool"
                  translated[:tool_call_id] = context[:name]
                else
                  translated[:name] = context[:name]
                end
              end
              translated
            end
          end
        end
        def max_prompt_tokens
          # provide a buffer of 120 tokens - our function counting is not
          # 100% accurate and getting numbers to align exactly is very hard
--- a/lib/completions/dialects/claude.rb
+++ b/lib/completions/dialects/claude.rb
@ -14,39 +14,50 @@ module DiscourseAi
          end
        end
        def pad_newlines!(prompt)
          if prompt[-1..-1] != "\n"
            prompt << "\n\n"
          elsif prompt[-2..-1] != "\n\n"
            prompt << "\n"
          end
        end
        def translate
-          claude_prompt = uses_system_message? ? +"" : +"Human: "
+          messages = prompt.messages
          claude_prompt << prompt[:insts] << "\n"
-          claude_prompt << build_tools_prompt if prompt[:tools]
+          trimmed_messages = trim_messages(messages)
-          claude_prompt << build_examples(prompt[:examples]) if prompt[:examples]
+          # Need to include this differently
          last_message = trimmed_messages.last[:type] == :assistant ? trimmed_messages.pop : nil
-          pad_newlines!(claude_prompt)
+          claude_prompt =
            trimmed_messages.reduce(+"") do |memo, msg|
              next(memo) if msg[:type] == :tool_call
-          claude_prompt << conversation_context if prompt[:conversation_context]
+              if msg[:type] == :system
                memo << "Human: " unless uses_system_message?
                memo << msg[:content]
                if prompt.tools
                  memo << "\n"
                  memo << build_tools_prompt
                end
              elsif msg[:type] == :model
                memo << "\n\nAssistant: #{msg[:content]}"
              elsif msg[:type] == :tool
                memo << "\n\nAssistant:\n"
-          pad_newlines!(claude_prompt)
+                memo << (<<~TEXT).strip
                <function_results>
                <result>
                <tool_name>#{msg[:id]}</tool_name>
                <json>
                #{msg[:content]}
                </json>
                </result>
                </function_results>
                TEXT
              else
                memo << "\n\nHuman: #{msg[:content]}"
              end
-          if uses_system_message? && (prompt[:input] || prompt[:post_insts])
+              memo
-            claude_prompt << "Human: "
+            end
          end
          claude_prompt << "#{prompt[:input]}\n" if prompt[:input]
-          claude_prompt << "#{prompt[:post_insts]}\n" if prompt[:post_insts]
+          claude_prompt << "\n\nAssistant:"
          claude_prompt << " #{last_message[:content]}:" if last_message
          pad_newlines!(claude_prompt)
          claude_prompt << "Assistant: "
          claude_prompt << " #{prompt[:final_insts]}:" if prompt[:final_insts]
          claude_prompt
        end
@ -54,49 +65,11 @@ module DiscourseAi
          100_000 # Claude-2.1 has a 200k context window.
        end
        def conversation_context
          return "" if prompt[:conversation_context].blank?
          clean_context = prompt[:conversation_context].select { |cc| cc[:type] != "tool_call" }
          flattened_context = flatten_context(clean_context)
          trimmed_context = trim_context(flattened_context)
          trimmed_context
            .reverse
            .map do |context|
              row = context[:type] == "user" ? +"Human:" : +"Assistant:"
              if context[:type] == "tool"
                row << "\n"
                row << (<<~TEXT).strip
                  <function_results>
                  <result>
                  <tool_name>#{context[:name]}</tool_name>
                  <json>
                  #{context[:content]}
                  </json>
                  </result>
                  </function_results>
                TEXT
              else
                row << " "
                row << context[:content]
              end
            end
            .join("\n\n")
        end
        private
        def uses_system_message?
          model_name == "claude-2"
        end
        def build_examples(examples_arr)
          examples_arr.reduce("") do |memo, example|
            memo += "<example>\nH: #{example[0]}\nA: #{example[1]}\n</example>\n"
          end
        end
      end
    end
  end
--- a/lib/completions/dialects/dialect.rb
+++ b/lib/completions/dialects/dialect.rb
@ -31,6 +31,27 @@ module DiscourseAi
          def tokenizer
            raise NotImplemented
          end
          def tool_preamble
            <<~TEXT
              In this environment you have access to a set of tools you can use to answer the user's question.
              You may call them like this. Only invoke one function at a time and wait for the results before invoking another function:
              <function_calls>
              <invoke>
              <tool_name>$TOOL_NAME</tool_name>
              <parameters>
              <$PARAMETER_NAME>$PARAMETER_VALUE</$PARAMETER_NAME>
              ...
              </parameters>
              </invoke>
              </function_calls>
              if a parameter type is an array, return a JSON array of values. For example:
              [1,"two",3.0]
              Here are the tools available:
            TEXT
          end
        end
        def initialize(generic_prompt, model_name, opts: {})
@ -46,7 +67,7 @@ module DiscourseAi
        def tools
          tools = +""
-          prompt[:tools].each do |function|
+          prompt.tools.each do |function|
            parameters = +""
            if function[:parameters].present?
              function[:parameters].each do |parameter|
@ -89,114 +110,60 @@ module DiscourseAi
        attr_reader :prompt, :model_name, :opts
-        def trim_context(conversation_context)
+        def trim_messages(messages)
          prompt_limit = max_prompt_tokens
-          current_token_count = calculate_token_count_without_context
+          current_token_count = 0
          message_step_size = (max_prompt_tokens / 25).to_i * -1
-          conversation_context.reduce([]) do |memo, context|
+          reversed_trimmed_msgs =
-            break(memo) if current_token_count >= prompt_limit
+            messages
              .reverse
              .reduce([]) do |acc, msg|
                message_tokens = calculate_message_token(msg)
-            dupped_context = context.dup
+                dupped_msg = msg.dup
-            message_tokens = calculate_message_token(dupped_context)
+                # Don't trim tool call metadata.
                if msg[:type] == :tool_call
                  current_token_count += message_tokens + per_message_overhead
                  acc << dupped_msg
                  next(acc)
                end
-            # Don't trim tool call metadata.
+                # Trimming content to make sure we respect token limit.
-            if context[:type] == "tool_call"
+                while dupped_msg[:content].present? &&
-              current_token_count += calculate_message_token(context) + per_message_overhead
+                        message_tokens + current_token_count + per_message_overhead > prompt_limit
-              memo << context
+                  dupped_msg[:content] = dupped_msg[:content][0..message_step_size] || ""
-              next(memo)
+                  message_tokens = calculate_message_token(dupped_msg)
-            end
+                end
-            # Trimming content to make sure we respect token limit.
+                next(acc) if dupped_msg[:content].blank?
            while dupped_context[:content].present? &&
                    message_tokens + current_token_count + per_message_overhead > prompt_limit
              dupped_context[:content] = dupped_context[:content][0..message_step_size] || ""
              message_tokens = calculate_message_token(dupped_context)
            end
-            next(memo) if dupped_context[:content].blank?
+                current_token_count += message_tokens + per_message_overhead
-            current_token_count += message_tokens + per_message_overhead
+                acc << dupped_msg
              end
-            memo << dupped_context
+          reversed_trimmed_msgs.reverse
          end
        end
        def calculate_token_count_without_context
          tokenizer = self.class.tokenizer
          examples_count =
            prompt[:examples].to_a.sum do |pair|
              tokenizer.size(pair.join) + (per_message_overhead * 2)
            end
          input_count = tokenizer.size(prompt[:input].to_s) + per_message_overhead
          examples_count + input_count +
            prompt
              .except(:conversation_context, :tools, :examples, :input)
              .sum { |_, v| tokenizer.size(v) + per_message_overhead }
        end
        def per_message_overhead
          0
        end
-        def calculate_message_token(context)
+        def calculate_message_token(msg)
-          self.class.tokenizer.size(context[:content].to_s)
+          self.class.tokenizer.size(msg[:content].to_s)
        end
        def self.tool_preamble
          <<~TEXT
            In this environment you have access to a set of tools you can use to answer the user's question.
            You may call them like this. Only invoke one function at a time and wait for the results before invoking another function:
            <function_calls>
            <invoke>
            <tool_name>$TOOL_NAME</tool_name>
            <parameters>
            <$PARAMETER_NAME>$PARAMETER_VALUE</$PARAMETER_NAME>
            ...
            </parameters>
            </invoke>
            </function_calls>
            if a parameter type is an array, return a JSON array of values. For example:
            [1,"two",3.0]
            Here are the tools available:
          TEXT
        end
        def build_tools_prompt
-          return "" if prompt[:tools].blank?
+          return "" if prompt.tools.blank?
-          <<~TEXT
+          (<<~TEXT).strip
            #{self.class.tool_preamble}
            <tools>
            #{tools}</tools>
          TEXT
        end
        def flatten_context(context)
          found_first_multi_turn = false
          context
            .map do |a_context|
              if a_context[:type] == "multi_turn"
                if found_first_multi_turn
                  # Only take tool and tool_call_id from subsequent multi-turn interactions.
                  # Drop assistant responses
                  a_context[:content].last(2)
                else
                  found_first_multi_turn = true
                  a_context[:content]
                end
              else
                a_context
              end
            end
            .flatten
        end
      end
    end
  end
--- a/lib/completions/dialects/gemini.rb
+++ b/lib/completions/dialects/gemini.rb
@ -18,39 +18,60 @@ module DiscourseAi
          # Gemini complains if we don't alternate model/user roles.
          noop_model_response = { role: "model", parts: { text: "Ok." } }
-          gemini_prompt = [
+          messages = prompt.messages
            {
              role: "user",
              parts: {
                text: [prompt[:insts], prompt[:post_insts].to_s].join("\n"),
              },
            },
            noop_model_response,
          ]
-          if prompt[:examples]
+          # Gemini doesn't use an assistant msg to improve long-context responses.
-            prompt[:examples].each do |example_pair|
+          messages.pop if messages.last[:type] == :model
-              gemini_prompt << { role: "user", parts: { text: example_pair.first } }
+
-              gemini_prompt << { role: "model", parts: { text: example_pair.second } }
+          trim_messages(messages).reduce([]) do |memo, msg|
            if msg[:type] == :system
              memo << { role: "user", parts: { text: msg[:content] } }
              memo << noop_model_response.dup
            elsif msg[:type] == :model
              memo << { role: "model", parts: { text: msg[:content] } }
            elsif msg[:type] == :tool_call
              call_details = JSON.parse(msg[:content], symbolize_names: true)
              memo << {
                role: "model",
                parts: {
                  functionCall: {
                    name: call_details[:name],
                    args: call_details[:arguments],
                  },
                },
              }
            elsif msg[:type] == :tool
              memo << {
                role: "function",
                parts: {
                  functionResponse: {
                    name: msg[:id],
                    response: {
                      content: msg[:content],
                    },
                  },
                },
              }
            else
              # Gemini quirk. Doesn't accept tool -> user or user -> user msgs.
              previous_msg_role = memo.last&.dig(:role)
              if previous_msg_role == "user" || previous_msg_role == "tool"
                memo << noop_model_response.dup
              end
              memo << { role: "user", parts: { text: msg[:content] } }
            end
            memo
          end
          gemini_prompt.concat(conversation_context) if prompt[:conversation_context]
          if prompt[:input]
            gemini_prompt << noop_model_response.dup if gemini_prompt.last[:role] == "user"
            gemini_prompt << { role: "user", parts: { text: prompt[:input] } }
          end
          gemini_prompt
        end
        def tools
-          return if prompt[:tools].blank?
+          return if prompt.tools.blank?
          translated_tools =
-            prompt[:tools].map do |t|
+            prompt.tools.map do |t|
              tool = t.slice(:name, :description)
              if t[:parameters]
@ -73,48 +94,6 @@ module DiscourseAi
          [{ function_declarations: translated_tools }]
        end
        def conversation_context
          return [] if prompt[:conversation_context].blank?
          flattened_context = flatten_context(prompt[:conversation_context])
          trimmed_context = trim_context(flattened_context)
          trimmed_context.reverse.map do |context|
            if context[:type] == "tool_call"
              function = JSON.parse(context[:content], symbolize_names: true)
              {
                role: "model",
                parts: {
                  functionCall: {
                    name: function[:name],
                    args: function[:arguments],
                  },
                },
              }
            elsif context[:type] == "tool"
              {
                role: "function",
                parts: {
                  functionResponse: {
                    name: context[:name],
                    response: {
                      content: context[:content],
                    },
                  },
                },
              }
            else
              {
                role: context[:type] == "assistant" ? "model" : "user",
                parts: {
                  text: context[:content],
                },
              }
            end
          end
        end
        def max_prompt_tokens
          16_384 # 50% of model tokens
        end
@ -124,25 +103,6 @@ module DiscourseAi
        def calculate_message_token(context)
          self.class.tokenizer.size(context[:content].to_s + context[:name].to_s)
        end
        private
        def flatten_context(context)
          flattened = []
          context.each do |c|
            if c[:type] == "multi_turn"
              # gemini quirk
              if c[:content].first[:type] == "tool"
                flattend << { type: "assistant", content: "ok." }
              end
              flattened.concat(c[:content])
            else
              flattened << c
            end
          end
          flattened
        end
      end
    end
  end
--- a/lib/completions/dialects/llama2_classic.rb
+++ b/lib/completions/dialects/llama2_classic.rb
@ -15,58 +15,48 @@ module DiscourseAi
        end
        def translate
-          llama2_prompt = +<<~TEXT
+          messages = prompt.messages
          [INST]
          <<SYS>>
          #{prompt[:insts]}
          #{build_tools_prompt}#{prompt[:post_insts]}
          <</SYS>>
          [/INST]
          TEXT
-          if prompt[:examples]
+          llama2_prompt =
-            prompt[:examples].each do |example_pair|
+            trim_messages(messages).reduce(+"") do |memo, msg|
-              llama2_prompt << "[INST]#{example_pair.first}[/INST]\n"
+              next(memo) if msg[:type] == :tool_call
              llama2_prompt << "#{example_pair.second}\n"
            end
          end
-          llama2_prompt << conversation_context if prompt[:conversation_context].present?
+              if msg[:type] == :system
-
+                memo << (<<~TEXT).strip
          llama2_prompt << "[INST]#{prompt[:input]}[/INST]\n"
        end
        def conversation_context
          return "" if prompt[:conversation_context].blank?
          clean_context = prompt[:conversation_context].select { |cc| cc[:type] != "tool_call" }
          flattened_context = flatten_context(clean_context)
          trimmed_context = trim_context(flattened_context)
          trimmed_context
            .reverse
            .reduce(+"") do |memo, context|
              if context[:type] == "tool"
                memo << <<~TEXT
                [INST]
                <<SYS>>
                #{msg[:content]}
                #{build_tools_prompt}
                <</SYS>>
                [/INST]
                TEXT
              elsif msg[:type] == :model
                memo << "\n#{msg[:content]}"
              elsif msg[:type] == :tool
                tool = JSON.parse(msg[:content], symbolize_names: true)
                memo << "\n[INST]\n"
                memo << (<<~TEXT).strip
                <function_results>
                <result>
-                <tool_name>#{context[:name]}</tool_name>
+                <tool_name>#{msg[:id]}</tool_name>
                <json>
-                #{context[:content]}
+                #{msg[:content]}
                </json>
                </result>
                </function_results>
                [/INST]
                TEXT
              elsif context[:type] == "assistant"
                memo << "[INST]" << context[:content] << "[/INST]\n"
              else
-                memo << context[:content] << "\n"
+                memo << "\n[INST]#{msg[:content]}[/INST]"
              end
              memo
            end
          llama2_prompt << "\n" if llama2_prompt.ends_with?("[/INST]")
          llama2_prompt
        end
        def max_prompt_tokens
--- a/lib/completions/dialects/mixtral.rb
+++ b/lib/completions/dialects/mixtral.rb
@ -17,56 +17,44 @@ module DiscourseAi
        end
        def translate
-          mixtral_prompt = +<<~TEXT
+          messages = prompt.messages
          <s> [INST]
          #{prompt[:insts]}
          #{build_tools_prompt}#{prompt[:post_insts]}
          [/INST] Ok </s>
          TEXT
-          if prompt[:examples]
+          mixtral_prompt =
-            prompt[:examples].each do |example_pair|
+            trim_messages(messages).reduce(+"") do |memo, msg|
-              mixtral_prompt << "[INST] #{example_pair.first} [/INST]\n"
+              next(memo) if msg[:type] == :tool_call
              mixtral_prompt << "#{example_pair.second}</s>\n"
            end
          end
-          mixtral_prompt << conversation_context if prompt[:conversation_context].present?
+              if msg[:type] == :system
-
+                memo << (<<~TEXT).strip
-          mixtral_prompt << "[INST] #{prompt[:input]} [/INST]\n"
+                <s> [INST]
-        end
+                #{msg[:content]}
-
+                #{build_tools_prompt}
-        def conversation_context
+                [/INST] Ok </s>
-          return "" if prompt[:conversation_context].blank?
+                TEXT
-
+              elsif msg[:type] == :model
-          clean_context = prompt[:conversation_context].select { |cc| cc[:type] != "tool_call" }
+                memo << "\n#{msg[:content]}</s>"
-          flattened_context = flatten_context(clean_context)
+              elsif msg[:type] == :tool
-          trimmed_context = trim_context(flattened_context)
+                memo << "\n"
          trimmed_context
            .reverse
            .reduce(+"") do |memo, context|
              memo << "[INST] " if context[:type] == "user"
              if context[:type] == "tool"
                memo << <<~TEXT
                memo << (<<~TEXT).strip
                <function_results>
                <result>
-                <tool_name>#{context[:name]}</tool_name>
+                <tool_name>#{msg[:id]}</tool_name>
                <json>
-                #{context[:content]}
+                #{msg[:content]}
                </json>
                </result>
                </function_results>
                TEXT
              else
-                memo << context[:content] << "\n"
+                memo << "\n[INST]#{msg[:content]}[/INST]"
                memo << "[/INST]" if context[:type] == "user"
              end
              memo
            end
          mixtral_prompt << "\n" if mixtral_prompt.ends_with?("[/INST]")
          mixtral_prompt
        end
        def max_prompt_tokens
--- a/lib/completions/dialects/orca_style.rb
+++ b/lib/completions/dialects/orca_style.rb
@ -15,54 +15,48 @@ module DiscourseAi
        end
        def translate
-          orca_style_prompt = +<<~TEXT
+          messages = prompt.messages
-          ### System:
+          trimmed_messages = trim_messages(messages)
          #{prompt[:insts]}
          #{build_tools_prompt}#{prompt[:post_insts]}
          TEXT
-          if prompt[:examples]
+          # Need to include this differently
-            prompt[:examples].each do |example_pair|
+          last_message = trimmed_messages.last[:type] == :assistant ? trimmed_messages.pop : nil
              orca_style_prompt << "### User:\n#{example_pair.first}\n"
              orca_style_prompt << "### Assistant:\n#{example_pair.second}\n"
            end
          end
-          orca_style_prompt << "### User:\n#{prompt[:input]}\n"
+          llama2_prompt =
            trimmed_messages.reduce(+"") do |memo, msg|
              next(memo) if msg[:type] == :tool_call
-          orca_style_prompt << "### Assistant:\n"
+              if msg[:type] == :system
-        end
+                memo << (<<~TEXT).strip
-
+                ### System:
-        def conversation_context
+                #{msg[:content]}
-          return "" if prompt[:conversation_context].blank?
+                #{build_tools_prompt}
-
+                TEXT
-          clean_context = prompt[:conversation_context].select { |cc| cc[:type] != "tool_call" }
+              elsif msg[:type] == :model
-          flattened_context = flatten_context(clean_context)
+                memo << "\n### Assistant:\n#{msg[:content]}"
-          trimmed_context = trim_context(flattened_context)
+              elsif msg[:type] == :tool
-
+                memo << "\n### Assistant:\n"
          trimmed_context
            .reverse
            .reduce(+"") do |memo, context|
              memo << (context[:type] == "user" ? "### User:" : "### Assistant:")
              if context[:type] == "tool"
                memo << <<~TEXT
                memo << (<<~TEXT).strip
                <function_results>
                <result>
-                <tool_name>#{context[:name]}</tool_name>
+                <tool_name>#{msg[:id]}</tool_name>
                <json>
-                #{context[:content]}
+                #{msg[:content]}
                </json>
                </result>
                </function_results>
                TEXT
              else
-                memo << " " << context[:content] << "\n"
+                memo << "\n### User:\n#{msg[:content]}"
              end
              memo
            end
          llama2_prompt << "\n### Assistant:\n"
          llama2_prompt << "#{last_message[:content]}:" if last_message
          llama2_prompt
        end
        def max_prompt_tokens
--- a/lib/completions/llm.rb
+++ b/lib/completions/llm.rb
@ -48,41 +48,7 @@ module DiscourseAi
      delegate :tokenizer, to: :dialect_klass
-      # @param generic_prompt { Hash } - Prompt using our generic format.
+      # @param generic_prompt { DiscourseAi::Completions::Prompt } - Our generic prompt object
      # We use the following keys from the hash:
      #   - insts: String with instructions for the LLM.
      #   - input: String containing user input
      #   - examples (optional): Array of arrays with examples of input and responses. Each array is a input/response pair like [[example1, response1], [example2, response2]].
      #   - post_insts (optional): Additional instructions for the LLM. Some dialects like Claude add these at the end of the prompt.
      #   - conversation_context (optional): Array of hashes to provide context about an ongoing conversation with the model.
      #     We translate the array in reverse order, meaning the first element would be the most recent message in the conversation.
      #     Example:
      #
      #   [
      #    { type: "user", name: "user1", content: "This is a new message by a user" },
      #    { type: "assistant", content: "I'm a previous bot reply, that's why there's no user" },
      #    { type: "tool", name: "tool_id", content: "I'm a tool result" },
      #    { type: "tool_call_id", name: "tool_id", content: { name: "tool", args: { ...tool_args } } },
      #    { type: "multi_turn", content: [assistant_reply_from_a_tool, tool_call, tool_call_id] }
      #   ]
      #
      #   - tools (optional - only functions supported): Array of functions a model can call. Each function is defined as a hash. Example:
      #
      #     {
      #       name: "get_weather",
      #       description: "Get the weather in a city",
      #       parameters: [
      #         { name: "location", type: "string", description: "the city name", required: true },
      #         {
      #           name: "unit",
      #           type: "string",
      #           description: "the unit of measurement celcius c or fahrenheit f",
      #           enum: %w[c f],
      #           required: true,
      #         },
      #       ],
      #     }
      #
      # @param user { User } - User requesting the summary.
      #
      # @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
@ -104,7 +70,7 @@ module DiscourseAi
      # </function_calls>
      #
      def generate(
-        generic_prompt,
+        prompt,
        temperature: nil,
        max_tokens: nil,
        stop_sequences: nil,
@ -117,15 +83,14 @@ module DiscourseAi
          stop_sequences: stop_sequences,
        }
        model_params.merge!(generic_prompt.dig(:params, model_name) || {})
        model_params.keys.each { |key| model_params.delete(key) if model_params[key].nil? }
-        dialect = dialect_klass.new(generic_prompt, model_name, opts: model_params)
+        dialect = dialect_klass.new(prompt, model_name, opts: model_params)
        gateway.perform_completion!(dialect, user, model_params, &partial_read_blk)
      end
      def max_prompt_tokens
-        dialect_klass.new({}, model_name).max_prompt_tokens
+        dialect_klass.new(DiscourseAi::Completions::Prompt.new(""), model_name).max_prompt_tokens
      end
      attr_reader :model_name
--- a/lib/completions/prompt.rb
+++ b/lib/completions/prompt.rb
@ -0,0 +1,75 @@
 # frozen_string_literal: true
 module DiscourseAi
  module Completions
    class Prompt
      INVALID_TURN = Class.new(StandardError)
      attr_reader :system_message, :messages
      attr_accessor :tools
      def initialize(system_msg, messages: [], tools: [])
        raise ArgumentError, "messages must be an array" if !messages.is_a?(Array)
        raise ArgumentError, "tools must be an array" if !tools.is_a?(Array)
        system_message = { type: :system, content: system_msg }
        @messages = [system_message].concat(messages)
        @messages.each { |message| validate_message(message) }
        @messages.each_cons(2) { |last_turn, new_turn| validate_turn(last_turn, new_turn) }
        @tools = tools
      end
      def push(type:, content:, id: nil)
        return if type == :system
        new_message = { type: type, content: content }
        new_message[:id] = type == :user ? clean_username(id) : id if id && type != :model
        validate_message(new_message)
        validate_turn(messages.last, new_message)
        messages << new_message
      end
      private
      def clean_username(username)
        if username.match?(/\0[a-zA-Z0-9_-]{1,64}\z/)
          username
        else
          # not the best in the world, but this is what we have to work with
          # if sites enable unicode usernames this can get messy
          username.gsub(/[^a-zA-Z0-9_-]/, "_")[0..63]
        end
      end
      def validate_message(message)
        valid_types = %i[system user model tool tool_call]
        if !valid_types.include?(message[:type])
          raise ArgumentError, "message type must be one of #{valid_types}"
        end
        valid_keys = %i[type content id]
        if (invalid_keys = message.keys - valid_keys).any?
          raise ArgumentError, "message contains invalid keys: #{invalid_keys}"
        end
        raise ArgumentError, "message content must be a string" if !message[:content].is_a?(String)
      end
      def validate_turn(last_turn, new_turn)
        valid_types = %i[tool tool_call model user]
        raise INVALID_TURN if !valid_types.include?(new_turn[:type])
        if last_turn[:type] == :system && %i[tool tool_call model].include?(new_turn[:type])
          raise INVALID_TURN
        end
        raise INVALID_TURN if new_turn[:type] == :tool && last_turn[:type] != :tool_call
        raise INVALID_TURN if new_turn[:type] == :model && last_turn[:type] == :model
      end
    end
  end
 end
--- a/lib/embeddings/semantic_search.rb
+++ b/lib/embeddings/semantic_search.rb
@ -95,19 +95,19 @@ module DiscourseAi
      end
      def hypothetical_post_from(search_term)
-        prompt = {
+        prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
          insts: <<~TEXT,
          You are a content creator for a forum. The forum description is as follows:
          #{SiteSetting.title}
          #{SiteSetting.site_description}
          Put the forum post between <ai></ai> tags.
        TEXT
-          input: <<~TEXT,
+
        prompt.push(type: :user, content: <<~TEXT.strip)
          Using this description, write a forum post about the subject inside the <input></input> XML tags:
          <input>#{search_term}</input>
        TEXT
          post_insts: "Put the forum post between <ai></ai> tags.",
        }
        llm_response =
          DiscourseAi::Completions::Llm.proxy(
--- a/lib/summarization/strategies/fold_content.rb
+++ b/lib/summarization/strategies/fold_content.rb
@ -105,16 +105,14 @@ module DiscourseAi
        def summarize_in_chunks(llm, chunks, user, opts)
          chunks.map do |chunk|
            prompt = summarization_prompt(chunk[:summary], opts)
            prompt[:post_insts] = "Don't use more than 400 words for the summary."
-            chunk[:summary] = llm.generate(prompt, user: user)
+            chunk[:summary] = llm.generate(prompt, user: user, max_tokens: 300)
            chunk
          end
        end
        def concatenate_summaries(llm, summaries, user, &on_partial_blk)
-          prompt = {}
+          prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
          prompt[:insts] = <<~TEXT
            You are a summarization bot that effectively concatenates disjoint summaries, creating a cohesive narrative.
            The narrative you create is in the form of one or multiple paragraphs.
            Your reply MUST BE a single concatenated summary using the summaries I'll provide to you.
@ -123,7 +121,7 @@ module DiscourseAi
            You format the response, including links, using Markdown.
          TEXT
-          prompt[:input] = <<~TEXT
+          prompt.push(type: :user, content: <<~TEXT.strip)
            THESE are the summaries, each one separated by a newline, all of them inside <input></input> XML tags:
            <input>
@ -151,28 +149,39 @@ module DiscourseAi
                For example, a link to the 3rd post in the topic would be [post 3](#{opts[:resource_path]}/3)
              TEXT
-          prompt = { insts: insts, input: <<~TEXT }
+          prompt = DiscourseAi::Completions::Prompt.new(insts.strip)
              #{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""}
              Here are the posts, inside <input></input> XML tags:
              <input>
                #{input}
              </input>
          TEXT
          if opts[:resource_path]
-            prompt[:examples] = [
+            prompt.push(
-              [
+              type: :user,
-                "<input>1) user1 said: I love Mondays 2) user2 said: I hate Mondays</input>",
+              content: "<input>1) user1 said: I love Mondays 2) user2 said: I hate Mondays</input>",
            )
            prompt.push(
              type: :model,
              content:
                "Two users are sharing their feelings toward Mondays. [user1](#{opts[:resource_path]}/1) hates them, while [user2](#{opts[:resource_path]}/2) loves them.",
-              ],
+            )
-              [
+
-                "<input>3) usuario1: Amo los lunes 6) usuario2: Odio los lunes</input>",
+            prompt.push(
              type: :user,
              content: "<input>3) usuario1: Amo los lunes 6) usuario2: Odio los lunes</input>",
            )
            prompt.push(
              type: :model,
              content:
                "Dos usuarios charlan sobre los lunes. [usuario1](#{opts[:resource_path]}/3) dice que los ama, mientras que [usuario2](#{opts[:resource_path]}/2) los odia.",
-              ],
+            )
            ]
          end
          prompt.push(type: :user, content: <<~TEXT.strip)
          #{opts[:content_title].present? ? "The discussion title is: " + opts[:content_title] + ".\n" : ""}
          Here are the posts, inside <input></input> XML tags:
          <input>
            #{input}
          </input>
          TEXT
          prompt
        end
      end
--- a/spec/lib/completions/dialects/chat_gpt_spec.rb
+++ b/spec/lib/completions/dialects/chat_gpt_spec.rb
@ -1,105 +1,30 @@
 # frozen_string_literal: true
 require_relative "dialect_context"
 RSpec.describe DiscourseAi::Completions::Dialects::ChatGpt do
-  subject(:dialect) { described_class.new(prompt, "gpt-4") }
+  let(:model_name) { "gpt-4" }
-
+  let(:context) { DialectContext.new(described_class, model_name) }
  let(:tool) do
    {
      name: "get_weather",
      description: "Get the weather in a city",
      parameters: [
        { name: "location", type: "string", description: "the city name", required: true },
        {
          name: "unit",
          type: "string",
          description: "the unit of measurement celcius c or fahrenheit f",
          enum: %w[c f],
          required: true,
        },
      ],
    }
  end
  let(:prompt) do
    {
      insts: <<~TEXT,
      I want you to act as a title generator for written pieces. I will provide you with a text,
      and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
      and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
    TEXT
      input: <<~TEXT,
      Here is the text, inside <input></input> XML tags:
      <input>
        To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
        discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
        defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
        Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
        a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
        slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
        dies so that a scene may be repeated.
      </input>
    TEXT
      post_insts:
        "Please put the translation between <ai></ai> tags and separate each title with a comma.",
      tools: [tool],
    }
  end
  describe "#translate" do
    it "translates a prompt written in our generic format to the ChatGPT format" do
      open_ai_version = [
-        { role: "system", content: [prompt[:insts], prompt[:post_insts]].join("\n") },
+        { role: "system", content: context.system_insts },
-        { role: "user", content: prompt[:input] },
+        { role: "user", content: context.simple_user_input },
      ]
-      translated = dialect.translate
+      translated = context.system_user_scenario
      expect(translated).to contain_exactly(*open_ai_version)
    end
-    it "include examples in the ChatGPT version" do
+    it "translates tool_call and tool messages" do
-      prompt[:examples] = [
+      expect(context.multi_turn_scenario).to eq(
        [
          "<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
          "<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
        ],
      ]
      open_ai_version = [
        { role: "system", content: [prompt[:insts], prompt[:post_insts]].join("\n") },
        { role: "user", content: prompt[:examples][0][0] },
        { role: "assistant", content: prompt[:examples][0][1] },
        { role: "user", content: prompt[:input] },
      ]
      translated = dialect.translate
      expect(translated).to contain_exactly(*open_ai_version)
    end
  end
  describe "#conversation_context" do
    let(:context) do
      [
        { type: "user", name: "user1", content: "This is a new message by a user" },
        { type: "assistant", content: "I'm a previous bot reply, that's why there's no user" },
        { type: "tool", name: "tool_id", content: "I'm a tool result" },
        {
          type: "tool_call",
          name: "tool_id",
          content: { name: "get_weather", arguments: { location: "Sydney", unit: "c" } }.to_json,
        },
      ]
    end
    it "adds conversation in reverse order (first == newer)" do
      prompt[:conversation_context] = context
      translated_context = dialect.conversation_context
      expect(translated_context).to eq(
        [
          { role: "system", content: context.system_insts },
          { role: "user", content: "This is a message by a user", name: "user1" },
          { role: "assistant", content: "I'm a previous bot reply, that's why there's no user" },
          { role: "user", name: "user1", content: "This is a new message by a user" },
          {
            role: "assistant",
            content: nil,
@ -114,21 +39,16 @@ RSpec.describe DiscourseAi::Completions::Dialects::ChatGpt do
              },
            ],
          },
-          { role: "tool", content: context.third[:content], tool_call_id: context.third[:name] },
+          { role: "tool", content: "I'm a tool result".to_json, tool_call_id: "tool_id" },
          { role: "assistant", content: context.second[:content] },
          { role: "user", content: context.first[:content], name: context.first[:name] },
        ],
      )
    end
    it "trims content if it's getting too long" do
-      context.third[:content] = context.third[:content] * 1000
+      translated = context.long_user_input_scenario
-      prompt[:conversation_context] = context
+      expect(translated.last[:role]).to eq("user")
-
+      expect(translated.last[:content].length).to be < context.long_message_text.length
      translated_context = dialect.conversation_context
      expect(translated_context.third[:content].length).to be < context.third[:content].length
    end
  end
@ -136,11 +56,11 @@ RSpec.describe DiscourseAi::Completions::Dialects::ChatGpt do
    it "returns a list of available tools" do
      open_ai_tool_f = {
        function: {
-          description: tool[:description],
+          description: context.tools.first[:description],
-          name: tool[:name],
+          name: context.tools.first[:name],
          parameters: {
            properties:
-              tool[:parameters].reduce({}) do |memo, p|
+              context.tools.first[:parameters].reduce({}) do |memo, p|
                memo[p[:name]] = { description: p[:description], type: p[:type] }
                memo[p[:name]][:enum] = p[:enum] if p[:enum]
@ -154,7 +74,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::ChatGpt do
        type: "function",
      }
-      expect(subject.tools).to contain_exactly(open_ai_tool_f)
+      expect(context.dialect_tools).to contain_exactly(open_ai_tool_f)
    end
  end
 end
--- a/spec/lib/completions/dialects/claude_spec.rb
+++ b/spec/lib/completions/dialects/claude_spec.rb
@ -1,209 +1,64 @@
 # frozen_string_literal: true
 require_relative "dialect_context"
 RSpec.describe DiscourseAi::Completions::Dialects::Claude do
-  subject(:dialect) { described_class.new(prompt, "claude-2") }
+  let(:model_name) { "claude-2" }
-
+  let(:context) { DialectContext.new(described_class, model_name) }
  let(:tool) do
    {
      name: "get_weather",
      description: "Get the weather in a city",
      parameters: [
        { name: "location", type: "string", description: "the city name", required: true },
        {
          name: "unit",
          type: "string",
          description: "the unit of measurement celcius c or fahrenheit f",
          enum: %w[c f],
          required: true,
        },
      ],
    }
  end
  let(:prompt) do
    {
      insts: <<~TEXT,
      I want you to act as a title generator for written pieces. I will provide you with a text,
      and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
      and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
    TEXT
      input: <<~TEXT,
      Here is the text, inside <input></input> XML tags:
      <input>
        To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
        discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
        defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
        Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
        a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
        slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
        dies so that a scene may be repeated.
      </input>
    TEXT
      post_insts:
        "Please put the translation between <ai></ai> tags and separate each title with a comma.",
    }
  end
  describe "#translate" do
    it "translates a prompt written in our generic format to Claude's format" do
-      anthropic_version = (<<~TEXT).strip + " "
+      anthropic_version = (<<~TEXT).strip
-      #{prompt[:insts]}
+      #{context.system_insts}
-      Human: #{prompt[:input]}
+      #{described_class.tool_preamble}
      #{prompt[:post_insts]}
      Assistant:
      TEXT
      translated = dialect.translate
      expect(translated).to eq(anthropic_version)
    end
    it "knows how to translate examples to Claude's format" do
      prompt[:examples] = [
        [
          "<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
          "<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
        ],
      ]
      anthropic_version = (<<~TEXT).strip + " "
      #{prompt[:insts]}
      <example>
      H: #{prompt[:examples][0][0]}
      A: #{prompt[:examples][0][1]}
      </example>
      Human: #{prompt[:input]}
      #{prompt[:post_insts]}
      Assistant:
      TEXT
      translated = dialect.translate
      expect(translated).to eq(anthropic_version)
    end
    it "include tools inside the prompt" do
      prompt[:tools] = [tool]
      anthropic_version = (<<~TEXT).strip + " "
      #{prompt[:insts]}
      #{DiscourseAi::Completions::Dialects::Claude.tool_preamble}
      <tools>
-      #{dialect.tools}</tools>
+      #{context.dialect_tools}</tools>
-      Human: #{prompt[:input]}
+      Human: #{context.simple_user_input}
      #{prompt[:post_insts]}
      Assistant:
      TEXT
-      translated = dialect.translate
+      translated = context.system_user_scenario
      expect(translated).to eq(anthropic_version)
    end
-    it "includes all the right newlines" do
+    it "translates tool messages" do
-      prompt.clear
+      expected = +(<<~TEXT).strip
-      prompt.merge!(
+      #{context.system_insts}
-        {
+      #{described_class.tool_preamble}
-          insts: "You are an artist",
+      <tools>
-          conversation_context: [
+      #{context.dialect_tools}</tools>
            { content: "draw another funny cat", type: "user", name: "sam" },
            { content: "ok", type: "assistant" },
            { content: "draw a funny cat", type: "user", name: "sam" },
          ],
        },
      )
-      expected = (<<~TEXT).strip + " "
+      Human: This is a message by a user
        You are an artist
-        Human: draw a funny cat
+      Assistant: I'm a previous bot reply, that's why there's no user
-        Assistant: ok
+      Human: This is a new message by a user
        Human: draw another funny cat
        Assistant:
      TEXT
      expect(dialect.translate).to eq(expected)
    end
  end
  describe "#conversation_context" do
    let(:context) do
      [
        { type: "user", name: "user1", content: "This is a new message by a user" },
        { type: "assistant", content: "I'm a previous bot reply, that's why there's no user" },
        { type: "tool", name: "tool_id", content: "I'm a tool result" },
      ]
    end
    it "adds conversation in reverse order (first == newer)" do
      prompt[:conversation_context] = context
      expected = (<<~TEXT).strip
      Assistant:
      <function_results>
      <result>
      <tool_name>tool_id</tool_name>
      <json>
-      #{context.last[:content]}
+      "I'm a tool result"
      </json>
      </result>
      </function_results>
-      Assistant: #{context.second[:content]}
+      Assistant:
      Human: #{context.first[:content]}
      TEXT
-      translated_context = dialect.conversation_context
+      expect(context.multi_turn_scenario).to eq(expected)
      expect(translated_context).to eq(expected)
    end
    it "trims content if it's getting too long" do
-      context.last[:content] = context.last[:content] * 20_000
+      length = 19_000
      prompt[:conversation_context] = context
-      translated_context = dialect.conversation_context
+      translated = context.long_user_input_scenario(length: length)
-      expect(translated_context.length).to be < context.last[:content].length
+      expect(translated.length).to be < context.long_message_text(length: length).length
    end
  end
  describe "#tools" do
    it "translates tools to the tool syntax" do
      prompt[:tools] = [tool]
      translated_tool = <<~TEXT
        <tool_description>
        <tool_name>get_weather</tool_name>
        <description>Get the weather in a city</description>
        <parameters>
        <parameter>
        <name>location</name>
        <type>string</type>
        <description>the city name</description>
        <required>true</required>
        </parameter>
        <parameter>
        <name>unit</name>
        <type>string</type>
        <description>the unit of measurement celcius c or fahrenheit f</description>
        <required>true</required>
        <options>c,f</options>
        </parameter>
        </parameters>
        </tool_description>
      TEXT
      expect(dialect.tools).to eq(translated_tool)
    end
  end
 end
--- a/spec/lib/completions/dialects/dialect_context.rb
+++ b/spec/lib/completions/dialects/dialect_context.rb
@ -0,0 +1,101 @@
 # frozen_string_literal: true
 class DialectContext
  def initialize(dialect_klass, model_name)
    @dialect_klass = dialect_klass
    @model_name = model_name
  end
  def dialect(prompt)
    @dialect_klass.new(prompt, @model_name)
  end
  def prompt
    DiscourseAi::Completions::Prompt.new(system_insts, tools: tools)
  end
  def dialect_tools
    dialect(prompt).tools
  end
  def system_user_scenario
    a_prompt = prompt
    a_prompt.push(type: :user, content: simple_user_input)
    dialect(a_prompt).translate
  end
  def multi_turn_scenario
    context_and_multi_turn = [
      { type: :user, id: "user1", content: "This is a message by a user" },
      { type: :model, content: "I'm a previous bot reply, that's why there's no user" },
      { type: :user, id: "user1", content: "This is a new message by a user" },
      {
        type: :tool_call,
        id: "tool_id",
        content: { name: "get_weather", arguments: { location: "Sydney", unit: "c" } }.to_json,
      },
      { type: :tool, id: "tool_id", content: "I'm a tool result".to_json },
    ]
    a_prompt = prompt
    context_and_multi_turn.each { |msg| a_prompt.push(**msg) }
    dialect(a_prompt).translate
  end
  def long_user_input_scenario(length: 1_000)
    long_message = long_message_text(length: length)
    a_prompt = prompt
    a_prompt.push(type: :user, content: long_message, id: "user1")
    dialect(a_prompt).translate
  end
  def long_message_text(length: 1_000)
    "This a message by a user" * length
  end
  def simple_user_input
    <<~TEXT
      Here is the text, inside <input></input> XML tags:
      <input>
        To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
        discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
        defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
        Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
        a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
        slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
        dies so that a scene may be repeated.
      </input>
      TEXT
  end
  def system_insts
    <<~TEXT
    I want you to act as a title generator for written pieces. I will provide you with a text,
    and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
    and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
    TEXT
  end
  def tools
    [
      {
        name: "get_weather",
        description: "Get the weather in a city",
        parameters: [
          { name: "location", type: "string", description: "the city name", required: true },
          {
            name: "unit",
            type: "string",
            description: "the unit of measurement celcius c or fahrenheit f",
            enum: %w[c f],
            required: true,
          },
        ],
      },
    ]
  end
 end
--- a/spec/lib/completions/dialects/gemini_spec.rb
+++ b/spec/lib/completions/dialects/gemini_spec.rb
@ -1,240 +1,70 @@
 # frozen_string_literal: true
 require_relative "dialect_context"
 RSpec.describe DiscourseAi::Completions::Dialects::Gemini do
-  subject(:dialect) { described_class.new(prompt, "gemini-pro") }
+  let(:model_name) { "gemini-pro" }
-
+  let(:context) { DialectContext.new(described_class, model_name) }
  let(:tool) do
    {
      name: "get_weather",
      description: "Get the weather in a city",
      parameters: [
        { name: "location", type: "string", description: "the city name", required: true },
        {
          name: "unit",
          type: "string",
          description: "the unit of measurement celcius c or fahrenheit f",
          enum: %w[c f],
          required: true,
        },
      ],
    }
  end
  let(:prompt) do
    {
      insts: <<~TEXT,
      I want you to act as a title generator for written pieces. I will provide you with a text,
      and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
      and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
    TEXT
      input: <<~TEXT,
      Here is the text, inside <input></input> XML tags:
      <input>
        To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
        discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
        defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
        Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
        a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
        slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
        dies so that a scene may be repeated.
      </input>
    TEXT
      post_insts:
        "Please put the translation between <ai></ai> tags and separate each title with a comma.",
      tools: [tool],
    }
  end
  describe "#translate" do
    it "translates a prompt written in our generic format to the Gemini format" do
      gemini_version = [
-        { role: "user", parts: { text: [prompt[:insts], prompt[:post_insts]].join("\n") } },
+        { role: "user", parts: { text: context.system_insts } },
        { role: "model", parts: { text: "Ok." } },
-        { role: "user", parts: { text: prompt[:input] } },
+        { role: "user", parts: { text: context.simple_user_input } },
      ]
-      translated = dialect.translate
+      translated = context.system_user_scenario
      expect(translated).to eq(gemini_version)
    end
-    it "include examples in the Gemini version" do
+    it "translates tool_call and tool messages" do
-      prompt[:examples] = [
+      expect(context.multi_turn_scenario).to eq(
        [
          "<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
          "<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
        ],
      ]
      gemini_version = [
        { role: "user", parts: { text: [prompt[:insts], prompt[:post_insts]].join("\n") } },
        { role: "model", parts: { text: "Ok." } },
        { role: "user", parts: { text: prompt[:examples][0][0] } },
        { role: "model", parts: { text: prompt[:examples][0][1] } },
        { role: "user", parts: { text: prompt[:input] } },
      ]
      translated = dialect.translate
      expect(translated).to contain_exactly(*gemini_version)
    end
  end
  describe "#conversation_context" do
    let(:context) do
      [
        { type: "user", name: "user1", content: "This is a new message by a user" },
        { type: "assistant", content: "I'm a previous bot reply, that's why there's no user" },
        { type: "tool", name: "tool_id", content: "I'm a tool result" },
      ]
    end
    it "adds conversation in reverse order (first == newer)" do
      prompt[:conversation_context] = context
      translated_context = dialect.conversation_context
      expect(translated_context).to eq(
        [
          { role: "user", parts: { text: context.system_insts } },
          { role: "model", parts: { text: "Ok." } },
          { role: "user", parts: { text: "This is a message by a user" } },
          {
-            role: "function",
+            role: "model",
            parts: {
-              functionResponse: {
+              text: "I'm a previous bot reply, that's why there's no user",
-                name: context.last[:name],
+            },
-                response: {
+          },
-                  content: context.last[:content],
+          { role: "user", parts: { text: "This is a new message by a user" } },
          {
            role: "model",
            parts: {
              functionCall: {
                name: "get_weather",
                args: {
                  location: "Sydney",
                  unit: "c",
                },
              },
            },
          },
          {
            role: "function",
            parts: {
              functionResponse: {
                name: "tool_id",
                response: {
                  content: "I'm a tool result".to_json,
                },
              },
            },
          },
          { role: "model", parts: { text: context.second[:content] } },
          { role: "user", parts: { text: context.first[:content] } },
        ],
      )
    end
    it "trims content if it's getting too long" do
-      context.last[:content] = context.last[:content] * 1000
+      translated = context.long_user_input_scenario(length: 5_000)
-      prompt[:conversation_context] = context
+      expect(translated.last[:role]).to eq("user")
-
+      expect(translated.last.dig(:parts, :text).length).to be <
-      translated_context = dialect.conversation_context
+        context.long_message_text(length: 5_000).length
      expect(translated_context.last.dig(:parts, :text).length).to be <
        context.last[:content].length
    end
    context "when working with multi-turn contexts" do
      context "when the multi-turn is for turn that doesn't chain" do
        it "uses the tool_call context" do
          prompt[:conversation_context] = [
            {
              type: "multi_turn",
              content: [
                {
                  type: "tool_call",
                  name: "get_weather",
                  content: {
                    name: "get_weather",
                    arguments: {
                      location: "Sydney",
                      unit: "c",
                    },
                  }.to_json,
                },
                { type: "tool", name: "get_weather", content: "I'm a tool result" },
              ],
            },
          ]
          translated_context = dialect.conversation_context
          expected = [
            {
              role: "function",
              parts: {
                functionResponse: {
                  name: "get_weather",
                  response: {
                    content: "I'm a tool result",
                  },
                },
              },
            },
            {
              role: "model",
              parts: {
                functionCall: {
                  name: "get_weather",
                  args: {
                    location: "Sydney",
                    unit: "c",
                  },
                },
              },
            },
          ]
          expect(translated_context).to eq(expected)
        end
      end
      context "when the multi-turn is from a chainable tool" do
        it "uses the assistant context" do
          prompt[:conversation_context] = [
            {
              type: "multi_turn",
              content: [
                {
                  type: "tool_call",
                  name: "get_weather",
                  content: {
                    name: "get_weather",
                    arguments: {
                      location: "Sydney",
                      unit: "c",
                    },
                  }.to_json,
                },
                { type: "tool", name: "get_weather", content: "I'm a tool result" },
                { type: "assistant", content: "I'm a bot reply!" },
              ],
            },
          ]
          translated_context = dialect.conversation_context
          expected = [
            { role: "model", parts: { text: "I'm a bot reply!" } },
            {
              role: "function",
              parts: {
                functionResponse: {
                  name: "get_weather",
                  response: {
                    content: "I'm a tool result",
                  },
                },
              },
            },
            {
              role: "model",
              parts: {
                functionCall: {
                  name: "get_weather",
                  args: {
                    location: "Sydney",
                    unit: "c",
                  },
                },
              },
            },
          ]
          expect(translated_context).to eq(expected)
        end
      end
    end
  end
@ -264,7 +94,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::Gemini do
        ],
      }
-      expect(subject.tools).to contain_exactly(gemini_tools)
+      expect(context.dialect_tools).to contain_exactly(gemini_tools)
    end
  end
 end
--- a/spec/lib/completions/dialects/llama2_classic_spec.rb
+++ b/spec/lib/completions/dialects/llama2_classic_spec.rb
@ -1,183 +1,62 @@
 # frozen_string_literal: true
 require_relative "dialect_context"
 RSpec.describe DiscourseAi::Completions::Dialects::Llama2Classic do
-  subject(:dialect) { described_class.new(prompt, "Llama2-chat-hf") }
+  let(:model_name) { "Llama2-chat-hf" }
-
+  let(:context) { DialectContext.new(described_class, model_name) }
  let(:tool) do
    {
      name: "get_weather",
      description: "Get the weather in a city",
      parameters: [
        { name: "location", type: "string", description: "the city name", required: true },
        {
          name: "unit",
          type: "string",
          description: "the unit of measurement celcius c or fahrenheit f",
          enum: %w[c f],
          required: true,
        },
      ],
    }
  end
  let(:prompt) do
    {
      insts: <<~TEXT,
      I want you to act as a title generator for written pieces. I will provide you with a text,
      and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
      and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
    TEXT
      input: <<~TEXT,
      Here is the text, inside <input></input> XML tags:
      <input>
        To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
        discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
        defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
        Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
        a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
        slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
        dies so that a scene may be repeated.
      </input>
    TEXT
      post_insts:
        "Please put the translation between <ai></ai> tags and separate each title with a comma.",
    }
  end
  describe "#translate" do
    it "translates a prompt written in our generic format to the Llama2 format" do
      llama2_classic_version = <<~TEXT
      [INST]
      <<SYS>>
-      #{prompt[:insts]}
+      #{context.system_insts}
-      #{prompt[:post_insts]}
+      #{described_class.tool_preamble}
      <</SYS>>
      [/INST]
      [INST]#{prompt[:input]}[/INST]
      TEXT
      translated = dialect.translate
      expect(translated).to eq(llama2_classic_version)
    end
    it "includes examples in the translation" do
      prompt[:examples] = [
        [
          "<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
          "<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
        ],
      ]
      llama2_classic_version = <<~TEXT
      [INST]
      <<SYS>>
      #{prompt[:insts]}
      #{prompt[:post_insts]}
      <</SYS>>
      [/INST]
      [INST]#{prompt[:examples][0][0]}[/INST]
      #{prompt[:examples][0][1]}
      [INST]#{prompt[:input]}[/INST]
      TEXT
      translated = dialect.translate
      expect(translated).to eq(llama2_classic_version)
    end
    it "include tools inside the prompt" do
      prompt[:tools] = [tool]
      llama2_classic_version = <<~TEXT
      [INST]
      <<SYS>>
      #{prompt[:insts]}
      #{DiscourseAi::Completions::Dialects::Llama2Classic.tool_preamble}
      <tools>
-      #{dialect.tools}</tools>
+      #{context.dialect_tools}</tools>
      #{prompt[:post_insts]}
      <</SYS>>
      [/INST]
-      [INST]#{prompt[:input]}[/INST]
+      [INST]#{context.simple_user_input}[/INST]
      TEXT
-      translated = dialect.translate
+      translated = context.system_user_scenario
      expect(translated).to eq(llama2_classic_version)
    end
  end
-  describe "#conversation_context" do
+    it "translates tool messages" do
-    let(:context) do
+      expected = +(<<~TEXT)
-      [
+      [INST]
-        { type: "user", name: "user1", content: "This is a new message by a user" },
+      <<SYS>>
-        { type: "assistant", content: "I'm a previous bot reply, that's why there's no user" },
+      #{context.system_insts}
-        { type: "tool", name: "tool_id", content: "I'm a tool result" },
+      #{described_class.tool_preamble}
-      ]
+      <tools>
-    end
+      #{context.dialect_tools}</tools>
-
+      <</SYS>>
-    it "adds conversation in reverse order (first == newer)" do
+      [/INST]
-      prompt[:conversation_context] = context
+      [INST]This is a message by a user[/INST]
-
+      I'm a previous bot reply, that's why there's no user
-      expected = <<~TEXT
+      [INST]This is a new message by a user[/INST]
      [INST]
      <function_results>
      <result>
      <tool_name>tool_id</tool_name>
      <json>
-      #{context.last[:content]}
+      "I'm a tool result"
      </json>
      </result>
      </function_results>
      [/INST]
      [INST]#{context.second[:content]}[/INST]
      #{context.first[:content]}
      TEXT
-      translated_context = dialect.conversation_context
+      expect(context.multi_turn_scenario).to eq(expected)
      expect(translated_context).to eq(expected)
    end
    it "trims content if it's getting too long" do
-      context.last[:content] = context.last[:content] * 1_000
+      translated = context.long_user_input_scenario
      prompt[:conversation_context] = context
-      translated_context = dialect.conversation_context
+      expect(translated.length).to be < context.long_message_text.length
      expect(translated_context.length).to be < context.last[:content].length
    end
  end
  describe "#tools" do
    it "translates functions to the tool syntax" do
      prompt[:tools] = [tool]
      translated_tool = <<~TEXT
        <tool_description>
        <tool_name>get_weather</tool_name>
        <description>Get the weather in a city</description>
        <parameters>
        <parameter>
        <name>location</name>
        <type>string</type>
        <description>the city name</description>
        <required>true</required>
        </parameter>
        <parameter>
        <name>unit</name>
        <type>string</type>
        <description>the unit of measurement celcius c or fahrenheit f</description>
        <required>true</required>
        <options>c,f</options>
        </parameter>
        </parameters>
        </tool_description>
      TEXT
      expect(dialect.tools).to eq(translated_tool)
    end
  end
 end
--- a/spec/lib/completions/dialects/mixtral_spec.rb
+++ b/spec/lib/completions/dialects/mixtral_spec.rb
@ -1,176 +1,57 @@
 # frozen_string_literal: true
 require_relative "dialect_context"
 RSpec.describe DiscourseAi::Completions::Dialects::Mixtral do
-  subject(:dialect) { described_class.new(prompt, "mistralai/Mixtral-8x7B-Instruct-v0.1") }
+  let(:model_name) { "mistralai/Mixtral-8x7B-Instruct-v0.1" }
-
+  let(:context) { DialectContext.new(described_class, model_name) }
  let(:tool) do
    {
      name: "get_weather",
      description: "Get the weather in a city",
      parameters: [
        { name: "location", type: "string", description: "the city name", required: true },
        {
          name: "unit",
          type: "string",
          description: "the unit of measurement celcius c or fahrenheit f",
          enum: %w[c f],
          required: true,
        },
      ],
    }
  end
  let(:prompt) do
    {
      insts: <<~TEXT,
      I want you to act as a title generator for written pieces. I will provide you with a text,
      and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
      and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
    TEXT
      input: <<~TEXT,
      Here is the text, inside <input></input> XML tags:
      <input>
        To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
        discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
        defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
        Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
        a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
        slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
        dies so that a scene may be repeated.
      </input>
    TEXT
      post_insts:
        "Please put the translation between <ai></ai> tags and separate each title with a comma.",
    }
  end
  describe "#translate" do
-    it "translates a prompt written in our generic format to the Open AI format" do
+    it "translates a prompt written in our generic format to the Llama2 format" do
-      orca_style_version = <<~TEXT
+      llama2_classic_version = <<~TEXT
      <s> [INST]
-      #{prompt[:insts]}
+      #{context.system_insts}
-      #{prompt[:post_insts]}
+      #{described_class.tool_preamble}
      [/INST] Ok </s>
      [INST] #{prompt[:input]} [/INST]
      TEXT
      translated = dialect.translate
      expect(translated).to eq(orca_style_version)
    end
    it "include examples in the translated prompt" do
      prompt[:examples] = [
        [
          "<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
          "<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
        ],
      ]
      orca_style_version = <<~TEXT
      <s> [INST]
      #{prompt[:insts]}
      #{prompt[:post_insts]}
      [/INST] Ok </s>
      [INST] #{prompt[:examples][0][0]} [/INST]
      #{prompt[:examples][0][1]}</s>
      [INST] #{prompt[:input]} [/INST]
      TEXT
      translated = dialect.translate
      expect(translated).to eq(orca_style_version)
    end
    it "include tools inside the prompt" do
      prompt[:tools] = [tool]
      orca_style_version = <<~TEXT
      <s> [INST]
      #{prompt[:insts]}
      #{DiscourseAi::Completions::Dialects::Mixtral.tool_preamble}
      <tools>
-      #{dialect.tools}</tools>
+      #{context.dialect_tools}</tools>
      #{prompt[:post_insts]}
      [/INST] Ok </s>
-      [INST] #{prompt[:input]} [/INST]
+      [INST]#{context.simple_user_input}[/INST]
      TEXT
-      translated = dialect.translate
+      translated = context.system_user_scenario
-      expect(translated).to eq(orca_style_version)
+      expect(translated).to eq(llama2_classic_version)
    end
  end
  describe "#conversation_context" do
    let(:context) do
      [
        { type: "user", name: "user1", content: "This is a new message by a user" },
        { type: "assistant", content: "I'm a previous bot reply, that's why there's no user" },
        { type: "tool", name: "tool_id", content: "I'm a tool result" },
      ]
    end
-    it "adds conversation in reverse order (first == newer)" do
+    it "translates tool messages" do
-      prompt[:conversation_context] = context
+      expected = +(<<~TEXT).strip
-
+      <s> [INST]
-      expected = <<~TEXT
+      #{context.system_insts}
      #{described_class.tool_preamble}
      <tools>
      #{context.dialect_tools}</tools>
      [/INST] Ok </s>
      [INST]This is a message by a user[/INST]
      I'm a previous bot reply, that's why there's no user</s>
      [INST]This is a new message by a user[/INST]
      <function_results>
      <result>
      <tool_name>tool_id</tool_name>
      <json>
-      #{context.last[:content]}
+      "I'm a tool result"
      </json>
      </result>
      </function_results>
      #{context.second[:content]}
      [INST] #{context.first[:content]}
      [/INST]
      TEXT
-      translated_context = dialect.conversation_context
+      expect(context.multi_turn_scenario).to eq(expected)
      expect(translated_context.strip).to eq(expected.strip)
    end
    it "trims content if it's getting too long" do
-      context.last[:content] = context.last[:content] * 6_000
+      length = 6_000
-      prompt[:conversation_context] = context
+      translated = context.long_user_input_scenario(length: length)
-      translated_context = dialect.conversation_context
+      expect(translated.length).to be < context.long_message_text(length: length).length
      expect(translated_context.length).to be < context.last[:content].length
    end
  end
  describe "#tools" do
    it "translates tools to the tool syntax" do
      prompt[:tools] = [tool]
      translated_tool = <<~TEXT
        <tool_description>
        <tool_name>get_weather</tool_name>
        <description>Get the weather in a city</description>
        <parameters>
        <parameter>
        <name>location</name>
        <type>string</type>
        <description>the city name</description>
        <required>true</required>
        </parameter>
        <parameter>
        <name>unit</name>
        <type>string</type>
        <description>the unit of measurement celcius c or fahrenheit f</description>
        <required>true</required>
        <options>c,f</options>
        </parameter>
        </parameters>
        </tool_description>
      TEXT
      expect(dialect.tools).to eq(translated_tool)
    end
  end
 end
--- a/spec/lib/completions/dialects/orca_style_spec.rb
+++ b/spec/lib/completions/dialects/orca_style_spec.rb
@ -1,181 +1,61 @@
 # frozen_string_literal: true
 require_relative "dialect_context"
 RSpec.describe DiscourseAi::Completions::Dialects::OrcaStyle do
-  subject(:dialect) { described_class.new(prompt, "StableBeluga2") }
+  let(:model_name) { "StableBeluga2" }
-
+  let(:context) { DialectContext.new(described_class, model_name) }
  let(:tool) do
    {
      name: "get_weather",
      description: "Get the weather in a city",
      parameters: [
        { name: "location", type: "string", description: "the city name", required: true },
        {
          name: "unit",
          type: "string",
          description: "the unit of measurement celcius c or fahrenheit f",
          enum: %w[c f],
          required: true,
        },
      ],
    }
  end
  let(:prompt) do
    {
      insts: <<~TEXT,
      I want you to act as a title generator for written pieces. I will provide you with a text,
      and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
      and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
    TEXT
      input: <<~TEXT,
      Here is the text, inside <input></input> XML tags:
      <input>
        To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
        discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
        defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
        Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires,
        a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and
        slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he
        dies so that a scene may be repeated.
      </input>
    TEXT
      post_insts:
        "Please put the translation between <ai></ai> tags and separate each title with a comma.",
    }
  end
  describe "#translate" do
-    it "translates a prompt written in our generic format to the Open AI format" do
+    it "translates a prompt written in our generic format to the Llama2 format" do
-      orca_style_version = <<~TEXT
+      llama2_classic_version = <<~TEXT
      ### System:
-      #{prompt[:insts]}
+      #{context.system_insts}
-      #{prompt[:post_insts]}
+      #{described_class.tool_preamble}
      ### User:
      #{prompt[:input]}
      ### Assistant:
      TEXT
      translated = dialect.translate
      expect(translated).to eq(orca_style_version)
    end
    it "include examples in the translated prompt" do
      prompt[:examples] = [
        [
          "<input>In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.</input>",
          "<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
        ],
      ]
      orca_style_version = <<~TEXT
      ### System:
      #{prompt[:insts]}
      #{prompt[:post_insts]}
      ### User:
      #{prompt[:examples][0][0]}
      ### Assistant:
      #{prompt[:examples][0][1]}
      ### User:
      #{prompt[:input]}
      ### Assistant:
      TEXT
      translated = dialect.translate
      expect(translated).to eq(orca_style_version)
    end
    it "include tools inside the prompt" do
      prompt[:tools] = [tool]
      orca_style_version = <<~TEXT
      ### System:
      #{prompt[:insts]}
      #{DiscourseAi::Completions::Dialects::OrcaStyle.tool_preamble}
      <tools>
-      #{dialect.tools}</tools>
+      #{context.dialect_tools}</tools>
      #{prompt[:post_insts]}
      ### User:
-      #{prompt[:input]}
+      #{context.simple_user_input}
      ### Assistant:
      TEXT
-      translated = dialect.translate
+      translated = context.system_user_scenario
-      expect(translated).to eq(orca_style_version)
+      expect(translated).to eq(llama2_classic_version)
    end
  end
  describe "#conversation_context" do
    let(:context) do
      [
        { type: "user", name: "user1", content: "This is a new message by a user" },
        { type: "assistant", content: "I'm a previous bot reply, that's why there's no user" },
        { type: "tool", name: "tool_id", content: "I'm a tool result" },
      ]
    end
-    it "adds conversation in reverse order (first == newer)" do
+    it "translates tool messages" do
-      prompt[:conversation_context] = context
+      expected = +(<<~TEXT)
-
+      ### System:
-      expected = <<~TEXT
+      #{context.system_insts}
      #{described_class.tool_preamble}
      <tools>
      #{context.dialect_tools}</tools>
      ### User:
      This is a message by a user
      ### Assistant:
      I'm a previous bot reply, that's why there's no user
      ### User:
      This is a new message by a user
      ### Assistant:
      <function_results>
      <result>
      <tool_name>tool_id</tool_name>
      <json>
-      #{context.last[:content]}
+      "I'm a tool result"
      </json>
      </result>
      </function_results>
-      ### Assistant: #{context.second[:content]}
+      ### Assistant:
      ### User: #{context.first[:content]}
      TEXT
-      translated_context = dialect.conversation_context
+      expect(context.multi_turn_scenario).to eq(expected)
      expect(translated_context).to eq(expected)
    end
    it "trims content if it's getting too long" do
-      context.last[:content] = context.last[:content] * 1_000
+      translated = context.long_user_input_scenario
      prompt[:conversation_context] = context
-      translated_context = dialect.conversation_context
+      expect(translated.length).to be < context.long_message_text.length
      expect(translated_context.length).to be < context.last[:content].length
    end
  end
  describe "#tools" do
    it "translates tools to the tool syntax" do
      prompt[:tools] = [tool]
      translated_tool = <<~TEXT
        <tool_description>
        <tool_name>get_weather</tool_name>
        <description>Get the weather in a city</description>
        <parameters>
        <parameter>
        <name>location</name>
        <type>string</type>
        <description>the city name</description>
        <required>true</required>
        </parameter>
        <parameter>
        <name>unit</name>
        <type>string</type>
        <description>the unit of measurement celcius c or fahrenheit f</description>
        <required>true</required>
        <options>c,f</options>
        </parameter>
        </parameters>
        </tool_description>
      TEXT
      expect(dialect.tools).to eq(translated_tool)
    end
  end
 end
--- a/spec/lib/completions/endpoints/anthropic_spec.rb
+++ b/spec/lib/completions/endpoints/anthropic_spec.rb
@ -6,7 +6,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
  subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::AnthropicTokenizer) }
  let(:model_name) { "claude-2" }
  let(:generic_prompt) { { insts: "write 3 words" } }
  let(:dialect) { DiscourseAi::Completions::Dialects::Claude.new(generic_prompt, model_name) }
  let(:prompt) { dialect.translate }
--- a/spec/lib/completions/endpoints/aws_bedrock_spec.rb
+++ b/spec/lib/completions/endpoints/aws_bedrock_spec.rb
@ -9,7 +9,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::AwsBedrock do
  let(:model_name) { "claude-2" }
  let(:bedrock_name) { "claude-v2:1" }
  let(:generic_prompt) { { insts: "write 3 words" } }
  let(:dialect) { DiscourseAi::Completions::Dialects::Claude.new(generic_prompt, model_name) }
  let(:prompt) { dialect.translate }
--- a/spec/lib/completions/endpoints/endpoint_examples.rb
+++ b/spec/lib/completions/endpoints/endpoint_examples.rb
@ -34,6 +34,13 @@ RSpec.shared_examples "an endpoint that can communicate with a completion servic
    Net.send(:const_set, :HTTP, @original_net_http)
  end
  let(:generic_prompt) do
    DiscourseAi::Completions::Prompt.new(
      "You write words",
      messages: [{ type: :user, content: "write 3 words" }],
    )
  end
  describe "#perform_completion!" do
    fab!(:user) { Fabricate(:user) }
@ -97,16 +104,11 @@ RSpec.shared_examples "an endpoint that can communicate with a completion servic
      end
      context "with functions" do
-        let(:generic_prompt) do
+        before do
-          {
+          generic_prompt.tools = [tool]
-            insts: "You can tell me the weather",
+          stub_response(prompt, tool_call, tool_call: true)
            input: "Return the weather in Sydney",
            tools: [tool],
          }
        end
        before { stub_response(prompt, tool_call, tool_call: true) }
        it "returns a function invocation" do
          completion_response = model.perform_completion!(dialect, user)
@ -153,16 +155,11 @@ RSpec.shared_examples "an endpoint that can communicate with a completion servic
      end
      context "with functions" do
-        let(:generic_prompt) do
+        before do
-          {
+          generic_prompt.tools = [tool]
-            insts: "You can tell me the weather",
+          stub_streamed_response(prompt, tool_deltas, tool_call: true)
            input: "Return the weather in Sydney",
            tools: [tool],
          }
        end
        before { stub_streamed_response(prompt, tool_deltas, tool_call: true) }
        it "waits for the invocation to finish before calling the partial" do
          buffered_partial = ""
--- a/spec/lib/completions/endpoints/gemini_spec.rb
+++ b/spec/lib/completions/endpoints/gemini_spec.rb
@ -6,7 +6,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
  subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::OpenAiTokenizer) }
  let(:model_name) { "gemini-pro" }
  let(:generic_prompt) { { insts: "You are a helpful bot.", input: "write 3 words" } }
  let(:dialect) { DiscourseAi::Completions::Dialects::Gemini.new(generic_prompt, model_name) }
  let(:prompt) { dialect.translate }
@ -38,14 +37,18 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
    model
      .default_options
      .merge(contents: prompt)
-      .tap { |b| b[:tools] = [{ function_declarations: [tool_payload] }] if generic_prompt[:tools] }
+      .tap do |b|
        b[:tools] = [{ function_declarations: [tool_payload] }] if generic_prompt.tools.present?
      end
      .to_json
  end
  let(:stream_request_body) do
    model
      .default_options
      .merge(contents: prompt)
-      .tap { |b| b[:tools] = [{ function_declarations: [tool_payload] }] if generic_prompt[:tools] }
+      .tap do |b|
        b[:tools] = [{ function_declarations: [tool_payload] }] if generic_prompt.tools.present?
      end
      .to_json
  end
--- a/spec/lib/completions/endpoints/hugging_face_spec.rb
+++ b/spec/lib/completions/endpoints/hugging_face_spec.rb
@ -6,7 +6,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::HuggingFace do
  subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::Llama2Tokenizer) }
  let(:model_name) { "Llama2-*-chat-hf" }
  let(:generic_prompt) { { insts: "You are a helpful bot.", input: "write 3 words" } }
  let(:dialect) do
    DiscourseAi::Completions::Dialects::Llama2Classic.new(generic_prompt, model_name)
  end
--- a/spec/lib/completions/endpoints/open_ai_spec.rb
+++ b/spec/lib/completions/endpoints/open_ai_spec.rb
@ -6,7 +6,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
  subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::OpenAiTokenizer) }
  let(:model_name) { "gpt-3.5-turbo" }
  let(:generic_prompt) { { insts: "You are a helpful bot.", input: "write 3 words" } }
  let(:dialect) { DiscourseAi::Completions::Dialects::ChatGpt.new(generic_prompt, model_name) }
  let(:prompt) { dialect.translate }
@ -37,7 +36,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
    model
      .default_options
      .merge(messages: prompt)
-      .tap { |b| b[:tools] = dialect.tools if generic_prompt[:tools] }
+      .tap { |b| b[:tools] = dialect.tools if generic_prompt.tools.present? }
      .to_json
  end
@ -45,7 +44,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
    model
      .default_options
      .merge(messages: prompt, stream: true)
-      .tap { |b| b[:tools] = dialect.tools if generic_prompt[:tools] }
+      .tap { |b| b[:tools] = dialect.tools if generic_prompt.tools.present? }
      .to_json
  end
@ -183,7 +182,10 @@ data: [D|ONE]
      partials = []
      llm = DiscourseAi::Completions::Llm.proxy("gpt-3.5-turbo")
-      llm.generate({ insts: "test" }, user: Discourse.system_user) { |partial| partials << partial }
+      llm.generate(
        DiscourseAi::Completions::Prompt.new("test"),
        user: Discourse.system_user,
      ) { |partial| partials << partial }
      expect(partials.join).to eq("test,test2,test3,test4")
    end
@ -212,7 +214,10 @@ data: [D|ONE]
      partials = []
      llm = DiscourseAi::Completions::Llm.proxy("gpt-3.5-turbo")
-      llm.generate({ insts: "test" }, user: Discourse.system_user) { |partial| partials << partial }
+      llm.generate(
        DiscourseAi::Completions::Prompt.new("test"),
        user: Discourse.system_user,
      ) { |partial| partials << partial }
      expect(partials.join).to eq("test,test1,test2,test3,test4")
    end
--- a/spec/lib/completions/endpoints/vllm_spec.rb
+++ b/spec/lib/completions/endpoints/vllm_spec.rb
@ -6,7 +6,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Vllm do
  subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::MixtralTokenizer) }
  let(:model_name) { "mistralai/Mixtral-8x7B-Instruct-v0.1" }
  let(:generic_prompt) { { insts: "You are a helpful bot.", input: "write 3 words" } }
  let(:dialect) { DiscourseAi::Completions::Dialects::Mixtral.new(generic_prompt, model_name) }
  let(:prompt) { dialect.translate }
--- a/spec/lib/completions/llm_spec.rb
+++ b/spec/lib/completions/llm_spec.rb
@ -29,15 +29,21 @@ RSpec.describe DiscourseAi::Completions::Llm do
    let(:llm) { described_class.proxy("fake") }
    let(:prompt) do
      DiscourseAi::Completions::Prompt.new(
        "You are fake",
        messages: [{ type: :user, content: "fake orders" }],
      )
    end
    it "can generate a response" do
-      response = llm.generate({ input: "fake prompt" }, user: user)
+      response = llm.generate(prompt, user: user)
      expect(response).to be_present
    end
    it "can generate content via a block" do
      partials = []
-      response =
+      response = llm.generate(prompt, user: user) { |partial| partials << partial }
        llm.generate({ input: "fake prompt" }, user: user) { |partial| partials << partial }
      expect(partials.length).to eq(10)
      expect(response).to eq(DiscourseAi::Completions::Endpoints::Fake.fake_content)
@ -48,23 +54,22 @@ RSpec.describe DiscourseAi::Completions::Llm do
  describe "#generate" do
    let(:prompt) do
-      {
+      system_insts = (<<~TEXT).strip
-        insts: <<~TEXT,
+      I want you to act as a title generator for written pieces. I will provide you with a text,
-        I want you to act as a title generator for written pieces. I will provide you with a text,
+      and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
-        and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
+      and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
        and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
      TEXT
-        input: <<~TEXT,
+
-        Here is the text, inside <input></input> XML tags:
+      DiscourseAi::Completions::Prompt
-        <input>
+        .new(system_insts)
-          To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
+        .tap { |a_prompt| a_prompt.push(type: :user, content: (<<~TEXT).strip) }
-          discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
+          Here is the text, inside <input></input> XML tags:
-          defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
+          <input>
-        </input>
+            To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
-      TEXT
+            discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
-        post_insts:
+            defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
-          "Please put the translation between <ai></ai> tags and separate each title with a comma.",
+          </input>
-      }
+          TEXT
    end
    let(:canned_response) do
--- a/spec/lib/completions/prompt_spec.rb
+++ b/spec/lib/completions/prompt_spec.rb
@ -0,0 +1,66 @@
 # frozen_string_literal: true
 RSpec.describe DiscourseAi::Completions::Prompt do
  subject(:prompt) { described_class.new(system_insts) }
  let(:system_insts) { "These are the system instructions." }
  let(:user_msg) { "Write something nice" }
  let(:username) { "username1" }
  describe ".new" do
    it "raises for invalid attributes" do
      expect { described_class.new("a bot", messages: {}) }.to raise_error(ArgumentError)
      expect { described_class.new("a bot", tools: {}) }.to raise_error(ArgumentError)
      bad_messages = [{ type: :user, content: "a system message", unknown_attribute: :random }]
      expect { described_class.new("a bot", messages: bad_messages) }.to raise_error(ArgumentError)
      bad_messages2 = [{ type: :user }]
      expect { described_class.new("a bot", messages: bad_messages2) }.to raise_error(ArgumentError)
      bad_messages3 = [{ content: "some content associated to no one" }]
      expect { described_class.new("a bot", messages: bad_messages3) }.to raise_error(ArgumentError)
    end
  end
  describe "#push" do
    describe "turn validations" do
      it "validates that tool messages have a previous tool_call message" do
        prompt.push(type: :user, content: user_msg, id: username)
        prompt.push(type: :model, content: "I'm a model msg")
        expect { prompt.push(type: :tool, content: "I'm the tool call results") }.to raise_error(
          DiscourseAi::Completions::Prompt::INVALID_TURN,
        )
      end
      it "validates that model messages have either a previous tool or user messages" do
        prompt.push(type: :user, content: user_msg, id: username)
        prompt.push(type: :model, content: "I'm a model msg")
        expect { prompt.push(type: :model, content: "I'm a second model msg") }.to raise_error(
          DiscourseAi::Completions::Prompt::INVALID_TURN,
        )
      end
    end
    it "system message is always first" do
      prompt.push(type: :user, content: user_msg, id: username)
      system_message = prompt.messages.first
      expect(system_message[:type]).to eq(:system)
      expect(system_message[:content]).to eq(system_insts)
    end
    it "includes the pushed message" do
      prompt.push(type: :user, content: user_msg, id: username)
      system_message = prompt.messages.last
      expect(system_message[:type]).to eq(:user)
      expect(system_message[:content]).to eq(user_msg)
      expect(system_message[:id]).to eq(username)
    end
  end
 end
--- a/spec/lib/modules/ai_bot/bot_spec.rb
+++ b/spec/lib/modules/ai_bot/bot_spec.rb
@ -11,18 +11,6 @@ RSpec.describe DiscourseAi::AiBot::Bot do
  let(:bot_user) { User.find(DiscourseAi::AiBot::EntryPoint::GPT4_ID) }
  let!(:user) { Fabricate(:user) }
  let!(:pm) do
    Fabricate(
      :private_message_topic,
      title: "This is my special PM",
      user: user,
      topic_allowed_users: [
        Fabricate.build(:topic_allowed_user, user: user),
        Fabricate.build(:topic_allowed_user, user: bot_user),
      ],
    )
  end
  let!(:pm_post) { Fabricate(:post, topic: pm, user: user, raw: "Does my site has tags?") }
  let(:function_call) { <<~TEXT }
    Let me try using a function to get more info:<function_calls>
@ -49,7 +37,7 @@ RSpec.describe DiscourseAi::AiBot::Bot do
        HTML
-        context = {}
+        context = { conversation_context: [{ type: :user, content: "Does my site has tags?" }] }
        DiscourseAi::Completions::Llm.with_prepared_responses(llm_responses) do
          bot.reply(context) do |_bot_reply_post, cancel, placeholder|
--- a/spec/lib/modules/ai_bot/personas/persona_spec.rb
+++ b/spec/lib/modules/ai_bot/personas/persona_spec.rb
@ -52,14 +52,15 @@ RSpec.describe DiscourseAi::AiBot::Personas::Persona do
    freeze_time
    rendered = persona.craft_prompt(context)
    system_message = rendered.messages.first[:content]
-    expect(rendered[:insts]).to include(Discourse.base_url)
+    expect(system_message).to include(Discourse.base_url)
-    expect(rendered[:insts]).to include("test site title")
+    expect(system_message).to include("test site title")
-    expect(rendered[:insts]).to include("test site description")
+    expect(system_message).to include("test site description")
-    expect(rendered[:insts]).to include("joe, jane")
+    expect(system_message).to include("joe, jane")
-    expect(rendered[:insts]).to include(Time.zone.now.to_s)
+    expect(system_message).to include(Time.zone.now.to_s)
-    tools = rendered[:tools]
+    tools = rendered.tools
    expect(tools.find { |t| t[:name] == "search" }).to be_present
    expect(tools.find { |t| t[:name] == "tags" }).to be_present
@ -107,7 +108,7 @@ RSpec.describe DiscourseAi::AiBot::Personas::Persona do
      instance = custom_persona.new
      expect(instance.tools).to eq([DiscourseAi::AiBot::Tools::Image])
-      expect(instance.craft_prompt(context).dig(:insts)).to eq("you are pun bot\n\n")
+      expect(instance.craft_prompt(context).messages.first[:content]).to eq("you are pun bot")
      # should update
      persona.update!(name: "zzzpun_bot2")
--- a/spec/lib/modules/ai_bot/playground_spec.rb
+++ b/spec/lib/modules/ai_bot/playground_spec.rb
@ -155,9 +155,9 @@ RSpec.describe DiscourseAi::AiBot::Playground do
      expect(context).to contain_exactly(
        *[
-          { type: "user", name: user.username, content: third_post.raw },
+          { type: :user, id: user.username, content: third_post.raw },
-          { type: "assistant", content: second_post.raw },
+          { type: :model, content: second_post.raw },
-          { type: "user", name: user.username, content: first_post.raw },
+          { type: :user, id: user.username, content: first_post.raw },
        ],
      )
    end
@ -169,8 +169,8 @@ RSpec.describe DiscourseAi::AiBot::Playground do
      expect(context).to contain_exactly(
        *[
-          { type: "user", name: user.username, content: third_post.raw },
+          { type: :user, id: user.username, content: third_post.raw },
-          { type: "assistant", content: second_post.raw },
+          { type: :model, content: second_post.raw },
        ],
      )
    end
@ -197,16 +197,42 @@ RSpec.describe DiscourseAi::AiBot::Playground do
        expect(context).to contain_exactly(
          *[
-            { type: "user", name: user.username, content: third_post.raw },
+            { type: :user, id: user.username, content: third_post.raw },
-            {
+            { type: :model, content: custom_prompt.third.first },
-              type: "multi_turn",
+            { type: :tool_call, content: custom_prompt.second.first, id: "time" },
-              content: [
+            { type: :tool, id: "time", content: custom_prompt.first.first },
-                { type: "assistant", content: custom_prompt.third.first },
+            { type: :user, id: user.username, content: first_post.raw },
-                { type: "tool_call", content: custom_prompt.second.first, name: "time" },
+          ],
-                { type: "tool", name: "time", content: custom_prompt.first.first },
+        )
-              ],
+      end
-            },
+
-            { type: "user", name: user.username, content: first_post.raw },
+      it "include replies generated from tools only once" do
        custom_prompt = [
          [
            { args: { timezone: "Buenos Aires" }, time: "2023-12-14 17:24:00 -0300" }.to_json,
            "time",
            "tool",
          ],
          [
            { name: "time", arguments: { name: "time", timezone: "Buenos Aires" } }.to_json,
            "time",
            "tool_call",
          ],
          ["I replied this thanks to the time command", bot_user.username],
        ]
        PostCustomPrompt.create!(post: second_post, custom_prompt: custom_prompt)
        PostCustomPrompt.create!(post: first_post, custom_prompt: custom_prompt)
        context = playground.conversation_context(third_post)
        expect(context).to contain_exactly(
          *[
            { type: :user, id: user.username, content: third_post.raw },
            { type: :model, content: custom_prompt.third.first },
            { type: :tool_call, content: custom_prompt.second.first, id: "time" },
            { type: :tool, id: "time", content: custom_prompt.first.first },
            { type: :tool_call, content: custom_prompt.second.first, id: "time" },
            { type: :tool, id: "time", content: custom_prompt.first.first },
          ],
        )
      end
--- a/spec/models/completion_prompt_spec.rb
+++ b/spec/models/completion_prompt_spec.rb
@ -26,18 +26,16 @@ RSpec.describe CompletionPrompt do
      let(:custom_prompt) { described_class.find(described_class::CUSTOM_PROMPT) }
      it "wraps the user input with <input> XML tags and adds a custom instruction if given" do
-        expected = <<~TEXT
+        expected = <<~TEXT.strip
-        <input>
+        <input>Translate to Turkish:
-        Translate to Turkish:
+        #{user_input}</input>
        #{user_input}
        </input>
        TEXT
        custom_prompt.custom_instruction = "Translate to Turkish"
        prompt = custom_prompt.messages_with_input(user_input)
-        expect(prompt[:input]).to eq(expected)
+        expect(prompt.messages.last[:content]).to eq(expected)
      end
    end
@ -45,16 +43,13 @@ RSpec.describe CompletionPrompt do
      let(:title_prompt) { described_class.find(described_class::GENERATE_TITLES) }
      it "wraps user input with <input> XML tags" do
-        expected = <<~TEXT
+        expected = "<input>#{user_input}</input>"
-        <input>
+
        #{user_input}
        </input>
        TEXT
        title_prompt.custom_instruction = "Translate to Turkish"
        prompt = title_prompt.messages_with_input(user_input)
-        expect(prompt[:input]).to eq(expected)
+        expect(prompt.messages.last[:content]).to eq(expected)
      end
    end
  end
--- a/spec/requests/ai_helper/assistant_controller_spec.rb
+++ b/spec/requests/ai_helper/assistant_controller_spec.rb
@ -85,11 +85,9 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do
        expected_diff =
          "<div class=\"inline-diff\"><p><ins>Un </ins><ins>usuario </ins><ins>escribio </ins><ins>esto</ins><del>A </del><del>user </del><del>wrote </del><del>this</del></p></div>"
-        expected_input = <<~TEXT
+        expected_input = <<~TEXT.strip
-        <input>
+        <input>Translate to Spanish:
-        Translate to Spanish:
+        A user wrote this</input>
        A user wrote this
        </input>
        TEXT
        DiscourseAi::Completions::Llm.with_prepared_responses([translated_text]) do |spy|