discourse-ai/lib/completions/endpoints/anthropic.rb

# frozen_string_literal: true

module DiscourseAi
  module Completions
    module Endpoints
      class Anthropic < Base
        class << self
          def can_contact?(endpoint_name, model_name)
            endpoint_name == "anthropic" &&
              %w[claude-instant-1 claude-2 claude-3-haiku claude-3-opus claude-3-sonnet].include?(
                model_name,
              )
          end

          def dependant_setting_names
            %w[ai_anthropic_api_key]
          end

          def correctly_configured?(_model_name)
            SiteSetting.ai_anthropic_api_key.present?
          end

          def endpoint_name(model_name)
            "Anthropic - #{model_name}"
          end
        end

        def normalize_model_params(model_params)
          # max_tokens, temperature, stop_sequences are already supported
          model_params
        end

        def default_options(dialect)
          # skipping 2.0 support for now, since other models are better
          mapped_model =
            case model
            when "claude-2"
              "claude-2.1"
            when "claude-instant-1"
              "claude-instant-1.2"
            when "claude-3-haiku"
              "claude-3-haiku-20240307"
            when "claude-3-sonnet"
              "claude-3-sonnet-20240229"
            when "claude-3-opus"
              "claude-3-opus-20240229"
            else
              raise "Unsupported model: #{model}"
            end

          options = { model: mapped_model, max_tokens: 3_000 }

          options[:stop_sequences] = ["</function_calls>"] if dialect.prompt.has_tools?
          options
        end

        def provider_id
          AiApiAuditLog::Provider::Anthropic
        end

        private

        # this is an approximation, we will update it later if request goes through
        def prompt_size(prompt)
          tokenizer.size(prompt.system_prompt.to_s + " " + prompt.messages.to_s)
        end

        def model_uri
          @uri ||= URI("https://api.anthropic.com/v1/messages")
        end

        def prepare_payload(prompt, model_params, dialect)
          payload = default_options(dialect).merge(model_params).merge(messages: prompt.messages)

          payload[:system] = prompt.system_prompt if prompt.system_prompt.present?
          payload[:stream] = true if @streaming_mode

          payload
        end

        def prepare_request(payload)
          headers = {
            "anthropic-version" => "2023-06-01",
            "x-api-key" => SiteSetting.ai_anthropic_api_key,
            "content-type" => "application/json",
          }

          Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload }
        end

        def final_log_update(log)
          log.request_tokens = @input_tokens if @input_tokens
          log.response_tokens = @output_tokens if @output_tokens
        end

        def extract_completion_from(response_raw)
          result = ""
          parsed = JSON.parse(response_raw, symbolize_names: true)

          if @streaming_mode
            if parsed[:type] == "content_block_start" || parsed[:type] == "content_block_delta"
              result = parsed.dig(:delta, :text).to_s
            elsif parsed[:type] == "message_start"
              @input_tokens = parsed.dig(:message, :usage, :input_tokens)
            elsif parsed[:type] == "message_delta"
              @output_tokens = parsed.dig(:delta, :usage, :output_tokens)
            end
          else
            result = parsed.dig(:content, 0, :text).to_s
            @input_tokens = parsed.dig(:usage, :input_tokens)
            @output_tokens = parsed.dig(:usage, :output_tokens)
          end

          result
        end

        def partials_from(decoded_chunk)
          decoded_chunk.split("\n").map { |line| line.split("data: ", 2)[1] }.compact
        end
      end
    end
  end
end
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00			`# frozen_string_literal: true`

			`module DiscourseAi`
			`module Completions`
			`module Endpoints`
			`class Anthropic < Base`
UX: Validations to LLM-backed features (except AI Bot) (#436) * UX: Validations to Llm-backed features (except AI Bot) This change is part of an ongoing effort to prevent enabling a broken feature due to lack of configuration. We also want to explicit which provider we are going to use. For example, Claude models are available through AWS Bedrock and Anthropic, but the configuration differs. Validations are: * You must choose a model before enabling the feature. * You must turn off the feature before setting the model to blank. * You must configure each model settings before being able to select it. * Add provider name to summarization options * vLLM can technically support same models as HF * Check we can talk to the selected model * Check for Bedrock instead of anthropic as a site could have both creds setup 2024-01-29 14:04:25 -05:00			`class << self`
			`def can_contact?(endpoint_name, model_name)`
FEATURE: add Claude 3 sonnet/haiku support for Amazon Bedrock (#534) This PR consolidates the implements new Anthropic Messages interface for Bedrock Claude endpoints and adds support for the new Claude 3 models (haiku, opus, sonnet). Key changes: - Renamed `AnthropicMessages` and `Anthropic` endpoint classes into a single `Anthropic` class (ditto for ClaudeMessages -> Claude) - Updated `AwsBedrock` endpoints to use the new `/messages` API format for all Claude models - Added `claude-3-haiku`, `claude-3-opus` and `claude-3-sonnet` model support in both Anthropic and AWS Bedrock endpoints - Updated specs for the new consolidated endpoints and Claude 3 model support This refactor removes support for old non messages API which has been deprecated by anthropic 2024-03-18 15:48:46 -04:00			`endpoint_name == "anthropic" &&`
			`%w[claude-instant-1 claude-2 claude-3-haiku claude-3-opus claude-3-sonnet].include?(`
			`model_name,`
			`)`
UX: Validations to LLM-backed features (except AI Bot) (#436) * UX: Validations to Llm-backed features (except AI Bot) This change is part of an ongoing effort to prevent enabling a broken feature due to lack of configuration. We also want to explicit which provider we are going to use. For example, Claude models are available through AWS Bedrock and Anthropic, but the configuration differs. Validations are: * You must choose a model before enabling the feature. * You must turn off the feature before setting the model to blank. * You must configure each model settings before being able to select it. * Add provider name to summarization options * vLLM can technically support same models as HF * Check we can talk to the selected model * Check for Bedrock instead of anthropic as a site could have both creds setup 2024-01-29 14:04:25 -05:00			`end`

			`def dependant_setting_names`
			`%w[ai_anthropic_api_key]`
			`end`

			`def correctly_configured?(_model_name)`
			`SiteSetting.ai_anthropic_api_key.present?`
			`end`

			`def endpoint_name(model_name)`
			`"Anthropic - #{model_name}"`
			`end`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00			`end`

FIX: AI helper not working correctly with mixtral (#399) * FIX: AI helper not working correctly with mixtral This PR introduces a new function on the generic llm called #generate This will replace the implementation of completion! #generate introduces a new way to pass temperature, max_tokens and stop_sequences Then LLM implementers need to implement #normalize_model_params to ensure the generic names match the LLM specific endpoint This also adds temperature and stop_sequences to completion_prompts this allows for much more robust completion prompts * port everything over to #generate * Fix translation - On anthropic this no longer throws random "This is your translation:" - On mixtral this actually works * fix markdown table generation as well 2024-01-04 07:53:47 -05:00			`def normalize_model_params(model_params)`
FEATURE: add Claude 3 sonnet/haiku support for Amazon Bedrock (#534) This PR consolidates the implements new Anthropic Messages interface for Bedrock Claude endpoints and adds support for the new Claude 3 models (haiku, opus, sonnet). Key changes: - Renamed `AnthropicMessages` and `Anthropic` endpoint classes into a single `Anthropic` class (ditto for ClaudeMessages -> Claude) - Updated `AwsBedrock` endpoints to use the new `/messages` API format for all Claude models - Added `claude-3-haiku`, `claude-3-opus` and `claude-3-sonnet` model support in both Anthropic and AWS Bedrock endpoints - Updated specs for the new consolidated endpoints and Claude 3 model support This refactor removes support for old non messages API which has been deprecated by anthropic 2024-03-18 15:48:46 -04:00			`# max_tokens, temperature, stop_sequences are already supported`
FIX: AI helper not working correctly with mixtral (#399) * FIX: AI helper not working correctly with mixtral This PR introduces a new function on the generic llm called #generate This will replace the implementation of completion! #generate introduces a new way to pass temperature, max_tokens and stop_sequences Then LLM implementers need to implement #normalize_model_params to ensure the generic names match the LLM specific endpoint This also adds temperature and stop_sequences to completion_prompts this allows for much more robust completion prompts * port everything over to #generate * Fix translation - On anthropic this no longer throws random "This is your translation:" - On mixtral this actually works * fix markdown table generation as well 2024-01-04 07:53:47 -05:00			`model_params`
			`end`

FEATURE: add Claude 3 sonnet/haiku support for Amazon Bedrock (#534) This PR consolidates the implements new Anthropic Messages interface for Bedrock Claude endpoints and adds support for the new Claude 3 models (haiku, opus, sonnet). Key changes: - Renamed `AnthropicMessages` and `Anthropic` endpoint classes into a single `Anthropic` class (ditto for ClaudeMessages -> Claude) - Updated `AwsBedrock` endpoints to use the new `/messages` API format for all Claude models - Added `claude-3-haiku`, `claude-3-opus` and `claude-3-sonnet` model support in both Anthropic and AWS Bedrock endpoints - Updated specs for the new consolidated endpoints and Claude 3 model support This refactor removes support for old non messages API which has been deprecated by anthropic 2024-03-18 15:48:46 -04:00			`def default_options(dialect)`
			`# skipping 2.0 support for now, since other models are better`
			`mapped_model =`
			`case model`
			`when "claude-2"`
			`"claude-2.1"`
			`when "claude-instant-1"`
			`"claude-instant-1.2"`
			`when "claude-3-haiku"`
			`"claude-3-haiku-20240307"`
			`when "claude-3-sonnet"`
			`"claude-3-sonnet-20240229"`
			`when "claude-3-opus"`
			`"claude-3-opus-20240229"`
			`else`
			`raise "Unsupported model: #{model}"`
			`end`

			`options = { model: mapped_model, max_tokens: 3_000 }`

			`options[:stop_sequences] = ["</function_calls>"] if dialect.prompt.has_tools?`
			`options`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00			`end`

			`def provider_id`
			`AiApiAuditLog::Provider::Anthropic`
			`end`

			`private`

FEATURE: add Claude 3 sonnet/haiku support for Amazon Bedrock (#534) This PR consolidates the implements new Anthropic Messages interface for Bedrock Claude endpoints and adds support for the new Claude 3 models (haiku, opus, sonnet). Key changes: - Renamed `AnthropicMessages` and `Anthropic` endpoint classes into a single `Anthropic` class (ditto for ClaudeMessages -> Claude) - Updated `AwsBedrock` endpoints to use the new `/messages` API format for all Claude models - Added `claude-3-haiku`, `claude-3-opus` and `claude-3-sonnet` model support in both Anthropic and AWS Bedrock endpoints - Updated specs for the new consolidated endpoints and Claude 3 model support This refactor removes support for old non messages API which has been deprecated by anthropic 2024-03-18 15:48:46 -04:00			`# this is an approximation, we will update it later if request goes through`
			`def prompt_size(prompt)`
REFACTOR: Migrate Vllm/TGI-served models to the OpenAI format. (#588) Both endpoints provide OpenAI-compatible servers. The only difference is that Vllm doesn't support passing tools as a separate parameter. Even if the tool param is supported, it ultimately relies on the model's ability to handle native functions, which is not the case with the models we have today. As a part of this change, we are dropping support for StableBeluga/Llama2 models. They don't have a chat_template, meaning the new API can translate them. These changes let us remove some of our existing dialects and are a first step in our plan to support any LLM by defining them as data-driven concepts. I rewrote the "translate" method to use a template method and extracted the tool support strategies into its classes to simplify the code. Finally, these changes bring support for Ollama when running in dev mode. It only works with Mistral for now, but it will change soon.. 2024-05-07 09:02:16 -04:00			`tokenizer.size(prompt.system_prompt.to_s + " " + prompt.messages.to_s)`
FEATURE: add Claude 3 sonnet/haiku support for Amazon Bedrock (#534) This PR consolidates the implements new Anthropic Messages interface for Bedrock Claude endpoints and adds support for the new Claude 3 models (haiku, opus, sonnet). Key changes: - Renamed `AnthropicMessages` and `Anthropic` endpoint classes into a single `Anthropic` class (ditto for ClaudeMessages -> Claude) - Updated `AwsBedrock` endpoints to use the new `/messages` API format for all Claude models - Added `claude-3-haiku`, `claude-3-opus` and `claude-3-sonnet` model support in both Anthropic and AWS Bedrock endpoints - Updated specs for the new consolidated endpoints and Claude 3 model support This refactor removes support for old non messages API which has been deprecated by anthropic 2024-03-18 15:48:46 -04:00			`end`

REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00			`def model_uri`
FEATURE: add Claude 3 sonnet/haiku support for Amazon Bedrock (#534) This PR consolidates the implements new Anthropic Messages interface for Bedrock Claude endpoints and adds support for the new Claude 3 models (haiku, opus, sonnet). Key changes: - Renamed `AnthropicMessages` and `Anthropic` endpoint classes into a single `Anthropic` class (ditto for ClaudeMessages -> Claude) - Updated `AwsBedrock` endpoints to use the new `/messages` API format for all Claude models - Added `claude-3-haiku`, `claude-3-opus` and `claude-3-sonnet` model support in both Anthropic and AWS Bedrock endpoints - Updated specs for the new consolidated endpoints and Claude 3 model support This refactor removes support for old non messages API which has been deprecated by anthropic 2024-03-18 15:48:46 -04:00			`@uri \|\|= URI("https://api.anthropic.com/v1/messages")`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00			`end`

FEATURE: add Claude 3 sonnet/haiku support for Amazon Bedrock (#534) This PR consolidates the implements new Anthropic Messages interface for Bedrock Claude endpoints and adds support for the new Claude 3 models (haiku, opus, sonnet). Key changes: - Renamed `AnthropicMessages` and `Anthropic` endpoint classes into a single `Anthropic` class (ditto for ClaudeMessages -> Claude) - Updated `AwsBedrock` endpoints to use the new `/messages` API format for all Claude models - Added `claude-3-haiku`, `claude-3-opus` and `claude-3-sonnet` model support in both Anthropic and AWS Bedrock endpoints - Updated specs for the new consolidated endpoints and Claude 3 model support This refactor removes support for old non messages API which has been deprecated by anthropic 2024-03-18 15:48:46 -04:00			`def prepare_payload(prompt, model_params, dialect)`
			`payload = default_options(dialect).merge(model_params).merge(messages: prompt.messages)`

			`payload[:system] = prompt.system_prompt if prompt.system_prompt.present?`
			`payload[:stream] = true if @streaming_mode`

			`payload`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00			`end`

			`def prepare_request(payload)`
			`headers = {`
			`"anthropic-version" => "2023-06-01",`
			`"x-api-key" => SiteSetting.ai_anthropic_api_key,`
			`"content-type" => "application/json",`
			`}`

			`Net::HTTP::Post.new(model_uri, headers).tap { \|r\| r.body = payload }`
			`end`

FEATURE: add Claude 3 sonnet/haiku support for Amazon Bedrock (#534) This PR consolidates the implements new Anthropic Messages interface for Bedrock Claude endpoints and adds support for the new Claude 3 models (haiku, opus, sonnet). Key changes: - Renamed `AnthropicMessages` and `Anthropic` endpoint classes into a single `Anthropic` class (ditto for ClaudeMessages -> Claude) - Updated `AwsBedrock` endpoints to use the new `/messages` API format for all Claude models - Added `claude-3-haiku`, `claude-3-opus` and `claude-3-sonnet` model support in both Anthropic and AWS Bedrock endpoints - Updated specs for the new consolidated endpoints and Claude 3 model support This refactor removes support for old non messages API which has been deprecated by anthropic 2024-03-18 15:48:46 -04:00			`def final_log_update(log)`
			`log.request_tokens = @input_tokens if @input_tokens`
			`log.response_tokens = @output_tokens if @output_tokens`
			`end`

REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00			`def extract_completion_from(response_raw)`
FEATURE: add Claude 3 sonnet/haiku support for Amazon Bedrock (#534) This PR consolidates the implements new Anthropic Messages interface for Bedrock Claude endpoints and adds support for the new Claude 3 models (haiku, opus, sonnet). Key changes: - Renamed `AnthropicMessages` and `Anthropic` endpoint classes into a single `Anthropic` class (ditto for ClaudeMessages -> Claude) - Updated `AwsBedrock` endpoints to use the new `/messages` API format for all Claude models - Added `claude-3-haiku`, `claude-3-opus` and `claude-3-sonnet` model support in both Anthropic and AWS Bedrock endpoints - Updated specs for the new consolidated endpoints and Claude 3 model support This refactor removes support for old non messages API which has been deprecated by anthropic 2024-03-18 15:48:46 -04:00			`result = ""`
			`parsed = JSON.parse(response_raw, symbolize_names: true)`

			`if @streaming_mode`
			`if parsed[:type] == "content_block_start" \|\| parsed[:type] == "content_block_delta"`
			`result = parsed.dig(:delta, :text).to_s`
			`elsif parsed[:type] == "message_start"`
			`@input_tokens = parsed.dig(:message, :usage, :input_tokens)`
			`elsif parsed[:type] == "message_delta"`
			`@output_tokens = parsed.dig(:delta, :usage, :output_tokens)`
			`end`
			`else`
			`result = parsed.dig(:content, 0, :text).to_s`
			`@input_tokens = parsed.dig(:usage, :input_tokens)`
			`@output_tokens = parsed.dig(:usage, :output_tokens)`
			`end`

			`result`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00			`end`

			`def partials_from(decoded_chunk)`
			`decoded_chunk.split("\n").map { \|line\| line.split("data: ", 2)[1] }.compact`
			`end`
			`end`
			`end`
			`end`
			`end`