discourse-ai/lib/completions/dialects/gemini.rb

# frozen_string_literal: true

module DiscourseAi
  module Completions
    module Dialects
      class Gemini < Dialect
        class << self
          def can_translate?(model_name)
            %w[gemini-pro].include?(model_name)
          end

          def tokenizer
            DiscourseAi::Tokenizer::OpenAiTokenizer ## TODO Replace with GeminiTokenizer
          end
        end

        def translate
          # Gemini complains if we don't alternate model/user roles.
          noop_model_response = { role: "model", parts: { text: "Ok." } }

          gemini_prompt = [
            {
              role: "user",
              parts: {
                text: [prompt[:insts], prompt[:post_insts].to_s].join("\n"),
              },
            },
            noop_model_response,
          ]

          if prompt[:examples]
            prompt[:examples].each do |example_pair|
              gemini_prompt << { role: "user", parts: { text: example_pair.first } }
              gemini_prompt << { role: "model", parts: { text: example_pair.second } }
            end
          end

          gemini_prompt.concat(conversation_context) if prompt[:conversation_context]

          if prompt[:input]
            gemini_prompt << noop_model_response.dup if gemini_prompt.last[:role] == "user"

            gemini_prompt << { role: "user", parts: { text: prompt[:input] } }
          end

          gemini_prompt
        end

        def tools
          return if prompt[:tools].blank?

          translated_tools =
            prompt[:tools].map do |t|
              tool = t.slice(:name, :description)

              if t[:parameters]
                tool[:parameters] = t[:parameters].reduce(
                  { type: "object", required: [], properties: {} },
                ) do |memo, p|
                  name = p[:name]
                  memo[:required] << name if p[:required]

                  memo[:properties][name] = p.except(:name, :required, :item_type)

                  memo[:properties][name][:items] = { type: p[:item_type] } if p[:item_type]
                  memo
                end
              end

              tool
            end

          [{ function_declarations: translated_tools }]
        end

        def conversation_context
          return [] if prompt[:conversation_context].blank?

          flattened_context = flatten_context(prompt[:conversation_context])
          trimmed_context = trim_context(flattened_context)

          trimmed_context.reverse.map do |context|
            if context[:type] == "tool_call"
              function = JSON.parse(context[:content], symbolize_names: true)

              {
                role: "model",
                parts: {
                  functionCall: {
                    name: function[:name],
                    args: function[:arguments],
                  },
                },
              }
            elsif context[:type] == "tool"
              {
                role: "function",
                parts: {
                  functionResponse: {
                    name: context[:name],
                    response: {
                      content: context[:content],
                    },
                  },
                },
              }
            else
              {
                role: context[:type] == "assistant" ? "model" : "user",
                parts: {
                  text: context[:content],
                },
              }
            end
          end
        end

        def max_prompt_tokens
          16_384 # 50% of model tokens
        end

        protected

        def calculate_message_token(context)
          self.class.tokenizer.size(context[:content].to_s + context[:name].to_s)
        end

        private

        def flatten_context(context)
          flattened = []
          context.each do |c|
            if c[:type] == "multi_turn"
              # gemini quirk
              if c[:content].first[:type] == "tool"
                flattend << { type: "assistant", content: "ok." }
              end

              flattened.concat(c[:content])
            else
              flattened << c
            end
          end
          flattened
        end
      end
    end
  end
end
FEATURE: Support for Gemini in AiHelper / Search / Summarization (#358) 2023-12-15 12:32:01 -05:00			`# frozen_string_literal: true`

			`module DiscourseAi`
			`module Completions`
			`module Dialects`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`class Gemini < Dialect`
			`class << self`
			`def can_translate?(model_name)`
			`%w[gemini-pro].include?(model_name)`
			`end`

			`def tokenizer`
			`DiscourseAi::Tokenizer::OpenAiTokenizer ## TODO Replace with GeminiTokenizer`
			`end`
FEATURE: Support for Gemini in AiHelper / Search / Summarization (#358) 2023-12-15 12:32:01 -05:00			`end`

DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`def translate`
FEATURE: AI Bot Gemini support. (#402) It also corrects the syntax around tool support, which was wrong. Gemini doesn't want us to include messages about previous tool invocations, so I had to shuffle around some code to send the response it generated from those invocations instead. For this, I created the "multi_turn" context, which bundles all the context involved in the interaction. 2024-01-04 16:15:34 -05:00			`# Gemini complains if we don't alternate model/user roles.`
			`noop_model_response = { role: "model", parts: { text: "Ok." } }`

FEATURE: Support for Gemini in AiHelper / Search / Summarization (#358) 2023-12-15 12:32:01 -05:00			`gemini_prompt = [`
			`{`
			`role: "user",`
			`parts: {`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`text: [prompt[:insts], prompt[:post_insts].to_s].join("\n"),`
FEATURE: Support for Gemini in AiHelper / Search / Summarization (#358) 2023-12-15 12:32:01 -05:00			`},`
			`},`
FEATURE: AI Bot Gemini support. (#402) It also corrects the syntax around tool support, which was wrong. Gemini doesn't want us to include messages about previous tool invocations, so I had to shuffle around some code to send the response it generated from those invocations instead. For this, I created the "multi_turn" context, which bundles all the context involved in the interaction. 2024-01-04 16:15:34 -05:00			`noop_model_response,`
FEATURE: Support for Gemini in AiHelper / Search / Summarization (#358) 2023-12-15 12:32:01 -05:00			`]`

DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`if prompt[:examples]`
			`prompt[:examples].each do \|example_pair\|`
FEATURE: Support for Gemini in AiHelper / Search / Summarization (#358) 2023-12-15 12:32:01 -05:00			`gemini_prompt << { role: "user", parts: { text: example_pair.first } }`
			`gemini_prompt << { role: "model", parts: { text: example_pair.second } }`
			`end`
			`end`

FIX: Correctly translate and read tools for Claude and Chat GPT. (#393) I tested against the live models for the AI bot migration. It ensures Open AI's tool syntax is correct and we can correctly read the replies. : 2024-01-02 09:21:13 -05:00			`gemini_prompt.concat(conversation_context) if prompt[:conversation_context]`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00
FEATURE: AI Bot Gemini support. (#402) It also corrects the syntax around tool support, which was wrong. Gemini doesn't want us to include messages about previous tool invocations, so I had to shuffle around some code to send the response it generated from those invocations instead. For this, I created the "multi_turn" context, which bundles all the context involved in the interaction. 2024-01-04 16:15:34 -05:00			`if prompt[:input]`
			`gemini_prompt << noop_model_response.dup if gemini_prompt.last[:role] == "user"`

			`gemini_prompt << { role: "user", parts: { text: prompt[:input] } }`
			`end`

			`gemini_prompt`
FEATURE: Support for Gemini in AiHelper / Search / Summarization (#358) 2023-12-15 12:32:01 -05:00			`end`

DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`def tools`
			`return if prompt[:tools].blank?`

			`translated_tools =`
			`prompt[:tools].map do \|t\|`
FEATURE: AI Bot Gemini support. (#402) It also corrects the syntax around tool support, which was wrong. Gemini doesn't want us to include messages about previous tool invocations, so I had to shuffle around some code to send the response it generated from those invocations instead. For this, I created the "multi_turn" context, which bundles all the context involved in the interaction. 2024-01-04 16:15:34 -05:00			`tool = t.slice(:name, :description)`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00
FEATURE: AI Bot Gemini support. (#402) It also corrects the syntax around tool support, which was wrong. Gemini doesn't want us to include messages about previous tool invocations, so I had to shuffle around some code to send the response it generated from those invocations instead. For this, I created the "multi_turn" context, which bundles all the context involved in the interaction. 2024-01-04 16:15:34 -05:00			`if t[:parameters]`
			`tool[:parameters] = t[:parameters].reduce(`
			`{ type: "object", required: [], properties: {} },`
			`) do \|memo, p\|`
			`name = p[:name]`
			`memo[:required] << name if p[:required]`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00
FEATURE: AI Bot Gemini support. (#402) It also corrects the syntax around tool support, which was wrong. Gemini doesn't want us to include messages about previous tool invocations, so I had to shuffle around some code to send the response it generated from those invocations instead. For this, I created the "multi_turn" context, which bundles all the context involved in the interaction. 2024-01-04 16:15:34 -05:00			`memo[:properties][name] = p.except(:name, :required, :item_type)`

			`memo[:properties][name][:items] = { type: p[:item_type] } if p[:item_type]`
			`memo`
			`end`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`end`

FEATURE: AI Bot Gemini support. (#402) It also corrects the syntax around tool support, which was wrong. Gemini doesn't want us to include messages about previous tool invocations, so I had to shuffle around some code to send the response it generated from those invocations instead. For this, I created the "multi_turn" context, which bundles all the context involved in the interaction. 2024-01-04 16:15:34 -05:00			`tool`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`end`

			`[{ function_declarations: translated_tools }]`
			`end`

			`def conversation_context`
			`return [] if prompt[:conversation_context].blank?`

FEATURE: AI Bot Gemini support. (#402) It also corrects the syntax around tool support, which was wrong. Gemini doesn't want us to include messages about previous tool invocations, so I had to shuffle around some code to send the response it generated from those invocations instead. For this, I created the "multi_turn" context, which bundles all the context involved in the interaction. 2024-01-04 16:15:34 -05:00			`flattened_context = flatten_context(prompt[:conversation_context])`
			`trimmed_context = trim_context(flattened_context)`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00
			`trimmed_context.reverse.map do \|context\|`
FEATURE: AI Bot Gemini support. (#402) It also corrects the syntax around tool support, which was wrong. Gemini doesn't want us to include messages about previous tool invocations, so I had to shuffle around some code to send the response it generated from those invocations instead. For this, I created the "multi_turn" context, which bundles all the context involved in the interaction. 2024-01-04 16:15:34 -05:00			`if context[:type] == "tool_call"`
			`function = JSON.parse(context[:content], symbolize_names: true)`

			`{`
			`role: "model",`
			`parts: {`
			`functionCall: {`
			`name: function[:name],`
			`args: function[:arguments],`
			`},`
			`},`
			`}`
			`elsif context[:type] == "tool"`
			`{`
			`role: "function",`
			`parts: {`
			`functionResponse: {`
			`name: context[:name],`
			`response: {`
			`content: context[:content],`
			`},`
			`},`
			`},`
			`}`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`else`
FEATURE: AI Bot Gemini support. (#402) It also corrects the syntax around tool support, which was wrong. Gemini doesn't want us to include messages about previous tool invocations, so I had to shuffle around some code to send the response it generated from those invocations instead. For this, I created the "multi_turn" context, which bundles all the context involved in the interaction. 2024-01-04 16:15:34 -05:00			`{`
			`role: context[:type] == "assistant" ? "model" : "user",`
			`parts: {`
			`text: context[:content],`
			`},`
			`}`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`end`
			`end`
			`end`

			`def max_prompt_tokens`
			`16_384 # 50% of model tokens`
			`end`

			`protected`

			`def calculate_message_token(context)`
			`self.class.tokenizer.size(context[:content].to_s + context[:name].to_s)`
FEATURE: Support for Gemini in AiHelper / Search / Summarization (#358) 2023-12-15 12:32:01 -05:00			`end`
FEATURE: AI Bot Gemini support. (#402) It also corrects the syntax around tool support, which was wrong. Gemini doesn't want us to include messages about previous tool invocations, so I had to shuffle around some code to send the response it generated from those invocations instead. For this, I created the "multi_turn" context, which bundles all the context involved in the interaction. 2024-01-04 16:15:34 -05:00
			`private`

			`def flatten_context(context)`
FIX: improve bot behavior (#408) * FIX: improve bot behavior - Provide more information to Gemini context post function execution - Use system prompts for Claude (fixes Dall E) - Ensure Assistant is properly separated - Teach Claude to return arrays in JSON vs XML Also refactors tests so we do not copy tool preamble everywhere * System msg is claude-2 only. fix typo --------- Co-authored-by: Roman Rizzi <rizziromanalejandro@gmail.com> 2024-01-08 08:28:03 -05:00			`flattened = []`
			`context.each do \|c\|`
			`if c[:type] == "multi_turn"`
			`# gemini quirk`
			`if c[:content].first[:type] == "tool"`
			`flattend << { type: "assistant", content: "ok." }`
			`end`

			`flattened.concat(c[:content])`
FEATURE: AI Bot Gemini support. (#402) It also corrects the syntax around tool support, which was wrong. Gemini doesn't want us to include messages about previous tool invocations, so I had to shuffle around some code to send the response it generated from those invocations instead. For this, I created the "multi_turn" context, which bundles all the context involved in the interaction. 2024-01-04 16:15:34 -05:00			`else`
FIX: improve bot behavior (#408) * FIX: improve bot behavior - Provide more information to Gemini context post function execution - Use system prompts for Claude (fixes Dall E) - Ensure Assistant is properly separated - Teach Claude to return arrays in JSON vs XML Also refactors tests so we do not copy tool preamble everywhere * System msg is claude-2 only. fix typo --------- Co-authored-by: Roman Rizzi <rizziromanalejandro@gmail.com> 2024-01-08 08:28:03 -05:00			`flattened << c`
FEATURE: AI Bot Gemini support. (#402) It also corrects the syntax around tool support, which was wrong. Gemini doesn't want us to include messages about previous tool invocations, so I had to shuffle around some code to send the response it generated from those invocations instead. For this, I created the "multi_turn" context, which bundles all the context involved in the interaction. 2024-01-04 16:15:34 -05:00			`end`
			`end`
FIX: improve bot behavior (#408) * FIX: improve bot behavior - Provide more information to Gemini context post function execution - Use system prompts for Claude (fixes Dall E) - Ensure Assistant is properly separated - Teach Claude to return arrays in JSON vs XML Also refactors tests so we do not copy tool preamble everywhere * System msg is claude-2 only. fix typo --------- Co-authored-by: Roman Rizzi <rizziromanalejandro@gmail.com> 2024-01-08 08:28:03 -05:00			`flattened`
FEATURE: AI Bot Gemini support. (#402) It also corrects the syntax around tool support, which was wrong. Gemini doesn't want us to include messages about previous tool invocations, so I had to shuffle around some code to send the response it generated from those invocations instead. For this, I created the "multi_turn" context, which bundles all the context involved in the interaction. 2024-01-04 16:15:34 -05:00			`end`
FEATURE: Support for Gemini in AiHelper / Search / Summarization (#358) 2023-12-15 12:32:01 -05:00			`end`
			`end`
			`end`
			`end`