discourse-ai/lib/completions/dialects/chat_gpt.rb

# frozen_string_literal: true

module DiscourseAi
  module Completions
    module Dialects
      class ChatGpt < Dialect
        class << self
          def can_translate?(model_name)
            %w[
              gpt-3.5-turbo
              gpt-4
              gpt-3.5-turbo-16k
              gpt-4-32k
              gpt-4-turbo
              gpt-4-vision-preview
            ].include?(model_name)
          end

          def tokenizer
            DiscourseAi::Tokenizer::OpenAiTokenizer
          end
        end

        VALID_ID_REGEX = /\A[a-zA-Z0-9_]+\z/

        def translate
          messages = prompt.messages

          # ChatGPT doesn't use an assistant msg to improve long-context responses.
          if messages.last[:type] == :model
            messages = messages.dup
            messages.pop
          end

          trimmed_messages = trim_messages(messages)

          embed_user_ids =
            trimmed_messages.any? do |m|
              m[:id] && m[:type] == :user && !m[:id].to_s.match?(VALID_ID_REGEX)
            end

          trimmed_messages.map do |msg|
            if msg[:type] == :system
              { role: "system", content: msg[:content] }
            elsif msg[:type] == :model
              { role: "assistant", content: msg[:content] }
            elsif msg[:type] == :tool_call
              call_details = JSON.parse(msg[:content], symbolize_names: true)
              call_details[:arguments] = call_details[:arguments].to_json
              call_details[:name] = msg[:name]

              {
                role: "assistant",
                content: nil,
                tool_calls: [{ type: "function", function: call_details, id: msg[:id] }],
              }
            elsif msg[:type] == :tool
              { role: "tool", tool_call_id: msg[:id], content: msg[:content], name: msg[:name] }
            else
              user_message = { role: "user", content: msg[:content] }
              if msg[:id]
                if embed_user_ids
                  user_message[:content] = "#{msg[:id]}: #{msg[:content]}"
                else
                  user_message[:name] = msg[:id]
                end
              end
              user_message
            end
          end
        end

        def tools
          prompt.tools.map do |t|
            tool = t.dup

            tool[:parameters] = t[:parameters]
              .to_a
              .reduce({ type: "object", properties: {}, required: [] }) do |memo, p|
                name = p[:name]
                memo[:required] << name if p[:required]

                memo[:properties][name] = p.except(:name, :required, :item_type)

                memo[:properties][name][:items] = { type: p[:item_type] } if p[:item_type]
                memo
              end

            { type: "function", function: tool }
          end
        end

        def max_prompt_tokens
          # provide a buffer of 120 tokens - our function counting is not
          # 100% accurate and getting numbers to align exactly is very hard
          buffer = (opts[:max_tokens] || 2500) + 50

          if tools.present?
            # note this is about 100 tokens over, OpenAI have a more optimal representation
            @function_size ||= self.class.tokenizer.size(tools.to_json.to_s)
            buffer += @function_size
          end

          model_max_tokens - buffer
        end

        private

        def per_message_overhead
          # open ai defines about 4 tokens per message of overhead
          4
        end

        def calculate_message_token(context)
          self.class.tokenizer.size(context[:content].to_s + context[:name].to_s)
        end

        def model_max_tokens
          case model_name
          when "gpt-3.5-turbo-16k"
            16_384
          when "gpt-4"
            8192
          when "gpt-4-32k"
            32_768
          when "gpt-4-turbo"
            131_072
          else
            8192
          end
        end
      end
    end
  end
end
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00			`# frozen_string_literal: true`

			`module DiscourseAi`
			`module Completions`
			`module Dialects`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`class ChatGpt < Dialect`
			`class << self`
			`def can_translate?(model_name)`
FEATURE: LLM based peroidical summary report (#357) Introduce a Discourse Automation based periodical report. Depends on Discourse Automation. Report works best with very large context language models such as GPT-4-Turbo and Claude 2. - Introduces final_insts to generic llm format, for claude to work best it is better to guide the last assistant message (we should add this to other spots as well) - Adds GPT-4 turbo support to generic llm interface 2023-12-18 20:04:15 -05:00			`%w[`
			`gpt-3.5-turbo`
			`gpt-4`
			`gpt-3.5-turbo-16k`
			`gpt-4-32k`
			`gpt-4-turbo`
FEATURE: AI image caption (#470) This PR adds a new feature where you can generate captions for images in the composer using AI. --------- Co-authored-by: Rafael Silva <xfalcox@gmail.com> 2024-02-19 12:56:28 -05:00			`gpt-4-vision-preview`
FEATURE: LLM based peroidical summary report (#357) Introduce a Discourse Automation based periodical report. Depends on Discourse Automation. Report works best with very large context language models such as GPT-4-Turbo and Claude 2. - Introduces final_insts to generic llm format, for claude to work best it is better to guide the last assistant message (we should add this to other spots as well) - Adds GPT-4 turbo support to generic llm interface 2023-12-18 20:04:15 -05:00			`].include?(model_name)`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`end`

			`def tokenizer`
			`DiscourseAi::Tokenizer::OpenAiTokenizer`
			`end`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00			`end`

FIX: scrub invalid prompts when truncating (#426) When you trim a prompt we never want to have a state where there is a "tool" reply without a corresponding tool call, it makes no sense Also - GPT-4-Turbo is 128k, fix that - Claude was not preserving username in prompt - We were throwing away unicode usernames instead of adding to message 2024-01-15 21:48:00 -05:00			`VALID_ID_REGEX = /\A[a-zA-Z0-9_]+\z/`

DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`def translate`
REFACTOR: Represent generic prompts with an Object. (#416) * REFACTOR: Represent generic prompts with an Object. * Adds a bit more validation for clarity * Rewrite bot title prompt and fix quirk handling --------- Co-authored-by: Sam Saffron <sam.saffron@gmail.com> 2024-01-12 12:36:44 -05:00			`messages = prompt.messages`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00
REFACTOR: Represent generic prompts with an Object. (#416) * REFACTOR: Represent generic prompts with an Object. * Adds a bit more validation for clarity * Rewrite bot title prompt and fix quirk handling --------- Co-authored-by: Sam Saffron <sam.saffron@gmail.com> 2024-01-12 12:36:44 -05:00			`# ChatGPT doesn't use an assistant msg to improve long-context responses.`
FEATURE: even smoother streaming (#420) Account properly for function calls, don't stream through <details> blocks - Rush cooked content back to client - Wait longer (up to 60 seconds) before giving up on streaming - Clean up message bus channels so we don't have leftover data - Make ai streamer much more reusable and much easier to read - If buffer grows quickly, rush update so you are not artificially waiting - Refine prompt interface - Fix lost system message when prompt gets long 2024-01-15 02:51:14 -05:00			`if messages.last[:type] == :model`
			`messages = messages.dup`
			`messages.pop`
			`end`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00
REFACTOR: Represent generic prompts with an Object. (#416) * REFACTOR: Represent generic prompts with an Object. * Adds a bit more validation for clarity * Rewrite bot title prompt and fix quirk handling --------- Co-authored-by: Sam Saffron <sam.saffron@gmail.com> 2024-01-12 12:36:44 -05:00			`trimmed_messages = trim_messages(messages)`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00
FIX: scrub invalid prompts when truncating (#426) When you trim a prompt we never want to have a state where there is a "tool" reply without a corresponding tool call, it makes no sense Also - GPT-4-Turbo is 128k, fix that - Claude was not preserving username in prompt - We were throwing away unicode usernames instead of adding to message 2024-01-15 21:48:00 -05:00			`embed_user_ids =`
			`trimmed_messages.any? do \|m\|`
			`m[:id] && m[:type] == :user && !m[:id].to_s.match?(VALID_ID_REGEX)`
			`end`

REFACTOR: Represent generic prompts with an Object. (#416) * REFACTOR: Represent generic prompts with an Object. * Adds a bit more validation for clarity * Rewrite bot title prompt and fix quirk handling --------- Co-authored-by: Sam Saffron <sam.saffron@gmail.com> 2024-01-12 12:36:44 -05:00			`trimmed_messages.map do \|msg\|`
			`if msg[:type] == :system`
			`{ role: "system", content: msg[:content] }`
			`elsif msg[:type] == :model`
			`{ role: "assistant", content: msg[:content] }`
			`elsif msg[:type] == :tool_call`
			`call_details = JSON.parse(msg[:content], symbolize_names: true)`
			`call_details[:arguments] = call_details[:arguments].to_json`
FIX: Tune function calling (#519) Adds support for "name" on functions which can be used for tool calls For function calls we need to keep track of id/name and previously we only supported either Also attempts to improve sql helper 2024-03-08 16:46:40 -05:00			`call_details[:name] = msg[:name]`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00
REFACTOR: Represent generic prompts with an Object. (#416) * REFACTOR: Represent generic prompts with an Object. * Adds a bit more validation for clarity * Rewrite bot title prompt and fix quirk handling --------- Co-authored-by: Sam Saffron <sam.saffron@gmail.com> 2024-01-12 12:36:44 -05:00			`{`
			`role: "assistant",`
			`content: nil,`
			`tool_calls: [{ type: "function", function: call_details, id: msg[:id] }],`
			`}`
			`elsif msg[:type] == :tool`
FIX: Tune function calling (#519) Adds support for "name" on functions which can be used for tool calls For function calls we need to keep track of id/name and previously we only supported either Also attempts to improve sql helper 2024-03-08 16:46:40 -05:00			`{ role: "tool", tool_call_id: msg[:id], content: msg[:content], name: msg[:name] }`
REFACTOR: Represent generic prompts with an Object. (#416) * REFACTOR: Represent generic prompts with an Object. * Adds a bit more validation for clarity * Rewrite bot title prompt and fix quirk handling --------- Co-authored-by: Sam Saffron <sam.saffron@gmail.com> 2024-01-12 12:36:44 -05:00			`else`
FIX: scrub invalid prompts when truncating (#426) When you trim a prompt we never want to have a state where there is a "tool" reply without a corresponding tool call, it makes no sense Also - GPT-4-Turbo is 128k, fix that - Claude was not preserving username in prompt - We were throwing away unicode usernames instead of adding to message 2024-01-15 21:48:00 -05:00			`user_message = { role: "user", content: msg[:content] }`
			`if msg[:id]`
			`if embed_user_ids`
			`user_message[:content] = "#{msg[:id]}: #{msg[:content]}"`
			`else`
			`user_message[:name] = msg[:id]`
			`end`
REFACTOR: Represent generic prompts with an Object. (#416) * REFACTOR: Represent generic prompts with an Object. * Adds a bit more validation for clarity * Rewrite bot title prompt and fix quirk handling --------- Co-authored-by: Sam Saffron <sam.saffron@gmail.com> 2024-01-12 12:36:44 -05:00			`end`
FIX: scrub invalid prompts when truncating (#426) When you trim a prompt we never want to have a state where there is a "tool" reply without a corresponding tool call, it makes no sense Also - GPT-4-Turbo is 128k, fix that - Claude was not preserving username in prompt - We were throwing away unicode usernames instead of adding to message 2024-01-15 21:48:00 -05:00			`user_message`
REFACTOR: Represent generic prompts with an Object. (#416) * REFACTOR: Represent generic prompts with an Object. * Adds a bit more validation for clarity * Rewrite bot title prompt and fix quirk handling --------- Co-authored-by: Sam Saffron <sam.saffron@gmail.com> 2024-01-12 12:36:44 -05:00			`end`
			`end`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`end`

			`def tools`
REFACTOR: Represent generic prompts with an Object. (#416) * REFACTOR: Represent generic prompts with an Object. * Adds a bit more validation for clarity * Rewrite bot title prompt and fix quirk handling --------- Co-authored-by: Sam Saffron <sam.saffron@gmail.com> 2024-01-12 12:36:44 -05:00			`prompt.tools.map do \|t\|`
FIX: Correctly translate and read tools for Claude and Chat GPT. (#393) I tested against the live models for the AI bot migration. It ensures Open AI's tool syntax is correct and we can correctly read the replies. : 2024-01-02 09:21:13 -05:00			`tool = t.dup`

DEV: AI bot migration to the Llm pattern. (#343) * DEV: AI bot migration to the Llm pattern. We added tool and conversation context support to the Llm service in discourse-ai#366, meaning we met all the conditions to migrate this module. This PR migrates to the new pattern, meaning adding a new bot now requires minimal effort as long as the service supports it. On top of this, we introduce the concept of a "Playground" to separate the PM-specific bits from the completion, allowing us to use the bot in other contexts like chat in the future. Commands are called tools, and we simplified all the placeholder logic to perform updates in a single place, making the flow more one-wayish. * Followup fixes based on testing * Cleanup unused inference code * FIX: text-based tools could be in the middle of a sentence * GPT-4-turbo support * Use new LLM API 2024-01-04 08:44:07 -05:00			`tool[:parameters] = t[:parameters]`
			`.to_a`
			`.reduce({ type: "object", properties: {}, required: [] }) do \|memo, p\|`
FIX: Correctly translate and read tools for Claude and Chat GPT. (#393) I tested against the live models for the AI bot migration. It ensures Open AI's tool syntax is correct and we can correctly read the replies. : 2024-01-02 09:21:13 -05:00			`name = p[:name]`
			`memo[:required] << name if p[:required]`

			`memo[:properties][name] = p.except(:name, :required, :item_type)`

			`memo[:properties][name][:items] = { type: p[:item_type] } if p[:item_type]`
			`memo`
			`end`

			`{ type: "function", function: tool }`
			`end`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00			`end`

DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`def max_prompt_tokens`
			`# provide a buffer of 120 tokens - our function counting is not`
			`# 100% accurate and getting numbers to align exactly is very hard`
FIX: AI helper not working correctly with mixtral (#399) * FIX: AI helper not working correctly with mixtral This PR introduces a new function on the generic llm called #generate This will replace the implementation of completion! #generate introduces a new way to pass temperature, max_tokens and stop_sequences Then LLM implementers need to implement #normalize_model_params to ensure the generic names match the LLM specific endpoint This also adds temperature and stop_sequences to completion_prompts this allows for much more robust completion prompts * port everything over to #generate * Fix translation - On anthropic this no longer throws random "This is your translation:" - On mixtral this actually works * fix markdown table generation as well 2024-01-04 07:53:47 -05:00			`buffer = (opts[:max_tokens] \|\| 2500) + 50`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00
			`if tools.present?`
			`# note this is about 100 tokens over, OpenAI have a more optimal representation`
			`@function_size \|\|= self.class.tokenizer.size(tools.to_json.to_s)`
			`buffer += @function_size`
			`end`

			`model_max_tokens - buffer`
			`end`

			`private`

			`def per_message_overhead`
			`# open ai defines about 4 tokens per message of overhead`
			`4`
			`end`

			`def calculate_message_token(context)`
			`self.class.tokenizer.size(context[:content].to_s + context[:name].to_s)`
			`end`

			`def model_max_tokens`
			`case model_name`
FIX: Correctly translate and read tools for Claude and Chat GPT. (#393) I tested against the live models for the AI bot migration. It ensures Open AI's tool syntax is correct and we can correctly read the replies. : 2024-01-02 09:21:13 -05:00			`when "gpt-3.5-turbo-16k"`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`16_384`
			`when "gpt-4"`
			`8192`
			`when "gpt-4-32k"`
			`32_768`
FIX: scrub invalid prompts when truncating (#426) When you trim a prompt we never want to have a state where there is a "tool" reply without a corresponding tool call, it makes no sense Also - GPT-4-Turbo is 128k, fix that - Claude was not preserving username in prompt - We were throwing away unicode usernames instead of adding to message 2024-01-15 21:48:00 -05:00			`when "gpt-4-turbo"`
			`131_072`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`else`
			`8192`
			`end`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00			`end`
			`end`
			`end`
			`end`
			`end`