discourse-ai/lib/completions/endpoints/gemini.rb

# frozen_string_literal: true

module DiscourseAi
  module Completions
    module Endpoints
      class Gemini < Base
        class << self
          def can_contact?(endpoint_name)
            endpoint_name == "google"
          end

          def dependant_setting_names
            %w[ai_gemini_api_key]
          end

          def correctly_configured?(_model_name)
            SiteSetting.ai_gemini_api_key.present?
          end

          def endpoint_name(model_name)
            "Google - #{model_name}"
          end
        end

        def default_options
          { generationConfig: {} }
        end

        def normalize_model_params(model_params)
          model_params = model_params.dup

          if model_params[:stop_sequences]
            model_params[:stopSequences] = model_params.delete(:stop_sequences)
          end

          if model_params[:max_tokens]
            model_params[:maxOutputTokens] = model_params.delete(:max_tokens)
          end

          model_params[:topP] = model_params.delete(:top_p) if model_params[:top_p]

          # temperature already supported

          model_params
        end

        def provider_id
          AiApiAuditLog::Provider::Gemini
        end

        private

        def model_uri
          if llm_model
            url = llm_model.url
          else
            mapped_model = model
            if model == "gemini-1.5-pro"
              mapped_model = "gemini-1.5-pro-latest"
            elsif model == "gemini-1.5-flash"
              mapped_model = "gemini-1.5-flash-latest"
            elsif model == "gemini-1.0-pro"
              mapped_model = "gemini-pro-latest"
            end
            url = "https://generativelanguage.googleapis.com/v1beta/models/#{mapped_model}"
          end

          key = llm_model&.api_key || SiteSetting.ai_gemini_api_key

          if @streaming_mode
            url = "#{url}:streamGenerateContent?key=#{key}&alt=sse"
          else
            url = "#{url}:generateContent?key=#{key}"
          end

          URI(url)
        end

        def prepare_payload(prompt, model_params, dialect)
          tools = dialect.tools

          payload = default_options.merge(contents: prompt[:messages])
          payload[:systemInstruction] = {
            role: "system",
            parts: [{ text: prompt[:system_instruction].to_s }],
          } if prompt[:system_instruction].present?
          payload[:tools] = tools if tools.present?
          payload[:generationConfig].merge!(model_params) if model_params.present?
          payload
        end

        def prepare_request(payload)
          headers = { "Content-Type" => "application/json" }

          Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload }
        end

        def extract_completion_from(response_raw)
          parsed =
            if @streaming_mode
              response_raw
            else
              JSON.parse(response_raw, symbolize_names: true)
            end
          response_h = parsed.dig(:candidates, 0, :content, :parts, 0)

          @has_function_call ||= response_h.dig(:functionCall).present?
          @has_function_call ? response_h[:functionCall] : response_h.dig(:text)
        end

        def partials_from(decoded_chunk)
          decoded_chunk
        end

        def chunk_to_string(chunk)
          chunk.to_s
        end

        class Decoder
          def initialize
            @buffer = +""
          end

          def decode(str)
            @buffer << str

            lines = @buffer.split(/\r?\n\r?\n/)

            keep_last = false

            decoded =
              lines
                .map do |line|
                  if line.start_with?("data: {")
                    begin
                      JSON.parse(line[6..-1], symbolize_names: true)
                    rescue JSON::ParserError
                      keep_last = line
                      nil
                    end
                  else
                    keep_last = line
                    nil
                  end
                end
                .compact

            if keep_last
              @buffer = +(keep_last)
            else
              @buffer = +""
            end

            decoded
          end
        end

        def decode(chunk)
          @decoder ||= Decoder.new
          @decoder.decode(chunk)
        end

        def extract_prompt_for_tokenizer(prompt)
          prompt.to_s
        end

        def has_tool?(_response_data)
          @has_function_call
        end

        def native_tool_support?
          true
        end

        def add_to_function_buffer(function_buffer, payload: nil, partial: nil)
          if @streaming_mode
            return function_buffer if !partial
          else
            partial = payload
          end

          function_buffer.at("tool_name").content = partial[:name] if partial[:name].present?

          if partial[:args]
            argument_fragments =
              partial[:args].reduce(+"") do |memo, (arg_name, value)|
                memo << "\n<#{arg_name}>#{value}</#{arg_name}>"
              end
            argument_fragments << "\n"

            function_buffer.at("parameters").children =
              Nokogiri::HTML5::DocumentFragment.parse(argument_fragments)
          end

          function_buffer
        end
      end
    end
  end
end
FEATURE: Support for Gemini in AiHelper / Search / Summarization (#358) 2023-12-15 12:32:01 -05:00			`# frozen_string_literal: true`

			`module DiscourseAi`
			`module Completions`
			`module Endpoints`
			`class Gemini < Base`
UX: Validations to LLM-backed features (except AI Bot) (#436) * UX: Validations to Llm-backed features (except AI Bot) This change is part of an ongoing effort to prevent enabling a broken feature due to lack of configuration. We also want to explicit which provider we are going to use. For example, Claude models are available through AWS Bedrock and Anthropic, but the configuration differs. Validations are: * You must choose a model before enabling the feature. * You must turn off the feature before setting the model to blank. * You must configure each model settings before being able to select it. * Add provider name to summarization options * vLLM can technically support same models as HF * Check we can talk to the selected model * Check for Bedrock instead of anthropic as a site could have both creds setup 2024-01-29 14:04:25 -05:00			`class << self`
HACK: Llama3 support for summarization/AI helper. (#616) There are still some limitations to which models we can support with the `LlmModel` class. This will enable support for Llama3 while we sort those out. 2024-05-13 14:54:42 -04:00			`def can_contact?(endpoint_name)`
			`endpoint_name == "google"`
UX: Validations to LLM-backed features (except AI Bot) (#436) * UX: Validations to Llm-backed features (except AI Bot) This change is part of an ongoing effort to prevent enabling a broken feature due to lack of configuration. We also want to explicit which provider we are going to use. For example, Claude models are available through AWS Bedrock and Anthropic, but the configuration differs. Validations are: * You must choose a model before enabling the feature. * You must turn off the feature before setting the model to blank. * You must configure each model settings before being able to select it. * Add provider name to summarization options * vLLM can technically support same models as HF * Check we can talk to the selected model * Check for Bedrock instead of anthropic as a site could have both creds setup 2024-01-29 14:04:25 -05:00			`end`

			`def dependant_setting_names`
			`%w[ai_gemini_api_key]`
			`end`

			`def correctly_configured?(_model_name)`
			`SiteSetting.ai_gemini_api_key.present?`
			`end`

			`def endpoint_name(model_name)`
			`"Google - #{model_name}"`
			`end`
FEATURE: Support for Gemini in AiHelper / Search / Summarization (#358) 2023-12-15 12:32:01 -05:00			`end`

			`def default_options`
FIX: AI helper not working correctly with mixtral (#399) * FIX: AI helper not working correctly with mixtral This PR introduces a new function on the generic llm called #generate This will replace the implementation of completion! #generate introduces a new way to pass temperature, max_tokens and stop_sequences Then LLM implementers need to implement #normalize_model_params to ensure the generic names match the LLM specific endpoint This also adds temperature and stop_sequences to completion_prompts this allows for much more robust completion prompts * port everything over to #generate * Fix translation - On anthropic this no longer throws random "This is your translation:" - On mixtral this actually works * fix markdown table generation as well 2024-01-04 07:53:47 -05:00			`{ generationConfig: {} }`
			`end`

			`def normalize_model_params(model_params)`
			`model_params = model_params.dup`

			`if model_params[:stop_sequences]`
			`model_params[:stopSequences] = model_params.delete(:stop_sequences)`
			`end`

FEATURE: fine tune llm report to follow instructions more closely (#451) - Allow users to supply top_p and temperature values, which means people can fine tune randomness - Fix bad localization string - Fix bad remapping of max tokens in gemini - Add support for top_p as a general param to llms - Amend system prompt so persona stops treating a user as an adversary 2024-01-30 17:58:25 -05:00			`if model_params[:max_tokens]`
FIX: AI helper not working correctly with mixtral (#399) * FIX: AI helper not working correctly with mixtral This PR introduces a new function on the generic llm called #generate This will replace the implementation of completion! #generate introduces a new way to pass temperature, max_tokens and stop_sequences Then LLM implementers need to implement #normalize_model_params to ensure the generic names match the LLM specific endpoint This also adds temperature and stop_sequences to completion_prompts this allows for much more robust completion prompts * port everything over to #generate * Fix translation - On anthropic this no longer throws random "This is your translation:" - On mixtral this actually works * fix markdown table generation as well 2024-01-04 07:53:47 -05:00			`model_params[:maxOutputTokens] = model_params.delete(:max_tokens)`
			`end`

FEATURE: fine tune llm report to follow instructions more closely (#451) - Allow users to supply top_p and temperature values, which means people can fine tune randomness - Fix bad localization string - Fix bad remapping of max tokens in gemini - Add support for top_p as a general param to llms - Amend system prompt so persona stops treating a user as an adversary 2024-01-30 17:58:25 -05:00			`model_params[:topP] = model_params.delete(:top_p) if model_params[:top_p]`

FIX: AI helper not working correctly with mixtral (#399) * FIX: AI helper not working correctly with mixtral This PR introduces a new function on the generic llm called #generate This will replace the implementation of completion! #generate introduces a new way to pass temperature, max_tokens and stop_sequences Then LLM implementers need to implement #normalize_model_params to ensure the generic names match the LLM specific endpoint This also adds temperature and stop_sequences to completion_prompts this allows for much more robust completion prompts * port everything over to #generate * Fix translation - On anthropic this no longer throws random "This is your translation:" - On mixtral this actually works * fix markdown table generation as well 2024-01-04 07:53:47 -05:00			`# temperature already supported`

			`model_params`
FEATURE: Support for Gemini in AiHelper / Search / Summarization (#358) 2023-12-15 12:32:01 -05:00			`end`

			`def provider_id`
			`AiApiAuditLog::Provider::Gemini`
			`end`

			`private`

			`def model_uri`
FEATURE: Set endpoint credentials directly from LlmModel. (#625) * FEATURE: Set endpoint credentials directly from LlmModel. Drop Llama2Tokenizer since we no longer use it. * Allow http for custom LLMs --------- Co-authored-by: Rafael Silva <xfalcox@gmail.com> 2024-05-16 08:50:22 -04:00			`if llm_model`
			`url = llm_model.url`
			`else`
FIX: correct gemini streaming implementation (#632) This also implements image support and gemini-flash support 2024-05-22 02:35:29 -04:00			`mapped_model = model`
			`if model == "gemini-1.5-pro"`
			`mapped_model = "gemini-1.5-pro-latest"`
			`elsif model == "gemini-1.5-flash"`
			`mapped_model = "gemini-1.5-flash-latest"`
			`elsif model == "gemini-1.0-pro"`
			`mapped_model = "gemini-pro-latest"`
			`end`
FEATURE: Set endpoint credentials directly from LlmModel. (#625) * FEATURE: Set endpoint credentials directly from LlmModel. Drop Llama2Tokenizer since we no longer use it. * Allow http for custom LLMs --------- Co-authored-by: Rafael Silva <xfalcox@gmail.com> 2024-05-16 08:50:22 -04:00			`url = "https://generativelanguage.googleapis.com/v1beta/models/#{mapped_model}"`
			`end`

			`key = llm_model&.api_key \|\| SiteSetting.ai_gemini_api_key`

FIX: correct gemini streaming implementation (#632) This also implements image support and gemini-flash support 2024-05-22 02:35:29 -04:00			`if @streaming_mode`
			`url = "#{url}:streamGenerateContent?key=#{key}&alt=sse"`
			`else`
			`url = "#{url}:generateContent?key=#{key}"`
			`end`
FEATURE: Support for Gemini in AiHelper / Search / Summarization (#358) 2023-12-15 12:32:01 -05:00
			`URI(url)`
			`end`

DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`def prepare_payload(prompt, model_params, dialect)`
FEATURE: AI Bot Gemini support. (#402) It also corrects the syntax around tool support, which was wrong. Gemini doesn't want us to include messages about previous tool invocations, so I had to shuffle around some code to send the response it generated from those invocations instead. For this, I created the "multi_turn" context, which bundles all the context involved in the interaction. 2024-01-04 16:15:34 -05:00			`tools = dialect.tools`

FIX: correct gemini streaming implementation (#632) This also implements image support and gemini-flash support 2024-05-22 02:35:29 -04:00			`payload = default_options.merge(contents: prompt[:messages])`
			`payload[:systemInstruction] = {`
			`role: "system",`
			`parts: [{ text: prompt[:system_instruction].to_s }],`
			`} if prompt[:system_instruction].present?`
			`payload[:tools] = tools if tools.present?`
			`payload[:generationConfig].merge!(model_params) if model_params.present?`
			`payload`
FEATURE: Support for Gemini in AiHelper / Search / Summarization (#358) 2023-12-15 12:32:01 -05:00			`end`

			`def prepare_request(payload)`
			`headers = { "Content-Type" => "application/json" }`

			`Net::HTTP::Post.new(model_uri, headers).tap { \|r\| r.body = payload }`
			`end`

			`def extract_completion_from(response_raw)`
FEATURE: AI Bot Gemini support. (#402) It also corrects the syntax around tool support, which was wrong. Gemini doesn't want us to include messages about previous tool invocations, so I had to shuffle around some code to send the response it generated from those invocations instead. For this, I created the "multi_turn" context, which bundles all the context involved in the interaction. 2024-01-04 16:15:34 -05:00			`parsed =`
			`if @streaming_mode`
			`response_raw`
			`else`
			`JSON.parse(response_raw, symbolize_names: true)`
			`end`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`response_h = parsed.dig(:candidates, 0, :content, :parts, 0)`
FEATURE: Support for Gemini in AiHelper / Search / Summarization (#358) 2023-12-15 12:32:01 -05:00
FIX: Correctly translate and read tools for Claude and Chat GPT. (#393) I tested against the live models for the AI bot migration. It ensures Open AI's tool syntax is correct and we can correctly read the replies. : 2024-01-02 09:21:13 -05:00			`@has_function_call \|\|= response_h.dig(:functionCall).present?`
			`@has_function_call ? response_h[:functionCall] : response_h.dig(:text)`
FEATURE: Support for Gemini in AiHelper / Search / Summarization (#358) 2023-12-15 12:32:01 -05:00			`end`

			`def partials_from(decoded_chunk)`
FIX: correct gemini streaming implementation (#632) This also implements image support and gemini-flash support 2024-05-22 02:35:29 -04:00			`decoded_chunk`
			`end`

			`def chunk_to_string(chunk)`
			`chunk.to_s`
			`end`

			`class Decoder`
			`def initialize`
			`@buffer = +""`
			`end`

			`def decode(str)`
			`@buffer << str`

			`lines = @buffer.split(/\r?\n\r?\n/)`

			`keep_last = false`

			`decoded =`
			`lines`
			`.map do \|line\|`
			`if line.start_with?("data: {")`
			`begin`
			`JSON.parse(line[6..-1], symbolize_names: true)`
			`rescue JSON::ParserError`
			`keep_last = line`
			`nil`
			`end`
			`else`
			`keep_last = line`
			`nil`
			`end`
			`end`
			`.compact`

			`if keep_last`
			`@buffer = +(keep_last)`
			`else`
			`@buffer = +""`
			`end`

			`decoded`
FEATURE: AI Bot Gemini support. (#402) It also corrects the syntax around tool support, which was wrong. Gemini doesn't want us to include messages about previous tool invocations, so I had to shuffle around some code to send the response it generated from those invocations instead. For this, I created the "multi_turn" context, which bundles all the context involved in the interaction. 2024-01-04 16:15:34 -05:00			`end`
FEATURE: Support for Gemini in AiHelper / Search / Summarization (#358) 2023-12-15 12:32:01 -05:00			`end`

FIX: correct gemini streaming implementation (#632) This also implements image support and gemini-flash support 2024-05-22 02:35:29 -04:00			`def decode(chunk)`
			`@decoder \|\|= Decoder.new`
			`@decoder.decode(chunk)`
			`end`

FEATURE: Support for Gemini in AiHelper / Search / Summarization (#358) 2023-12-15 12:32:01 -05:00			`def extract_prompt_for_tokenizer(prompt)`
			`prompt.to_s`
			`end`

FIX: Correctly translate and read tools for Claude and Chat GPT. (#393) I tested against the live models for the AI bot migration. It ensures Open AI's tool syntax is correct and we can correctly read the replies. : 2024-01-02 09:21:13 -05:00			`def has_tool?(_response_data)`
			`@has_function_call`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`end`

FIX: more robust function call support (#581) For quite a few weeks now, some times, when running function calls on Anthropic we would get a "stray" - "calls" line. This has been enormously frustrating! I have been unable to find the source of the bug so instead decoupled the implementation and create a very clear "function call normalizer" This new class is extensively tested and guards against the type of edge cases we saw pre-normalizer. This also simplifies the implementation of "endpoint" which no longer needs to handle all this complex logic. 2024-04-18 16:54:54 -04:00			`def native_tool_support?`
			`true`
FEATURE: Add GitHub Helper AI Bot persona and tools (#513) Introduces a new AI Bot persona called 'GitHub Helper' which is specialized in assisting with GitHub-related tasks and questions. It includes the following key changes: - Implements the GitHub Helper persona class with its system prompt and available tools - Adds three new AI Bot tools for GitHub interactions: - github_file_content: Retrieves content of files from a GitHub repository - github_pull_request_diff: Retrieves the diff for a GitHub pull request - github_search_code: Searches for code in a GitHub repository - Updates the AI Bot dialects to support the new GitHub tools - Implements multiple function calls for standard tool dialect 2024-03-07 14:37:23 -05:00			`end`

			`def add_to_function_buffer(function_buffer, payload: nil, partial: nil)`
			`if @streaming_mode`
			`return function_buffer if !partial`
			`else`
			`partial = payload`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`end`

FEATURE: Add GitHub Helper AI Bot persona and tools (#513) Introduces a new AI Bot persona called 'GitHub Helper' which is specialized in assisting with GitHub-related tasks and questions. It includes the following key changes: - Implements the GitHub Helper persona class with its system prompt and available tools - Adds three new AI Bot tools for GitHub interactions: - github_file_content: Retrieves content of files from a GitHub repository - github_pull_request_diff: Retrieves the diff for a GitHub pull request - github_search_code: Searches for code in a GitHub repository - Updates the AI Bot dialects to support the new GitHub tools - Implements multiple function calls for standard tool dialect 2024-03-07 14:37:23 -05:00			`function_buffer.at("tool_name").content = partial[:name] if partial[:name].present?`

DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`if partial[:args]`
			`argument_fragments =`
			`partial[:args].reduce(+"") do \|memo, (arg_name, value)\|`
			`memo << "\n<#{arg_name}>#{value}</#{arg_name}>"`
			`end`
			`argument_fragments << "\n"`

			`function_buffer.at("parameters").children =`
			`Nokogiri::HTML5::DocumentFragment.parse(argument_fragments)`
			`end`

			`function_buffer`
			`end`
FEATURE: Support for Gemini in AiHelper / Search / Summarization (#358) 2023-12-15 12:32:01 -05:00			`end`
			`end`
			`end`
			`end`