discourse-ai/lib/shared/inference/anthropic_completions.rb

# frozen_string_literal: true

module ::DiscourseAi
  module Inference
    class AnthropicCompletions
      CompletionFailed = Class.new(StandardError)
      TIMEOUT = 60

      def self.perform!(
        prompt,
        model = "claude-2",
        temperature: nil,
        top_p: nil,
        max_tokens: nil,
        user_id: nil,
        stop_sequences: nil,
        post: nil,
        &blk
      )
        # HACK to get around the fact that they have different APIs
        # we will introduce a proper LLM abstraction layer to handle this shenanigas later this year
        if model == "claude-2" && SiteSetting.ai_bedrock_access_key_id.present? &&
             SiteSetting.ai_bedrock_secret_access_key.present? &&
             SiteSetting.ai_bedrock_region.present?
          return(
            AmazonBedrockInference.perform!(
              prompt,
              temperature: temperature,
              top_p: top_p,
              max_tokens: max_tokens,
              user_id: user_id,
              stop_sequences: stop_sequences,
              &blk
            )
          )
        end

        log = nil
        response_data = +""
        response_raw = +""

        url = URI("https://api.anthropic.com/v1/complete")
        headers = {
          "anthropic-version" => "2023-06-01",
          "x-api-key" => SiteSetting.ai_anthropic_api_key,
          "content-type" => "application/json",
        }

        payload = { model: model, prompt: prompt }

        payload[:top_p] = top_p if top_p
        payload[:max_tokens_to_sample] = max_tokens || 2000
        payload[:temperature] = temperature if temperature
        payload[:stream] = true if block_given?
        payload[:stop_sequences] = stop_sequences if stop_sequences

        Net::HTTP.start(
          url.host,
          url.port,
          use_ssl: true,
          read_timeout: TIMEOUT,
          open_timeout: TIMEOUT,
          write_timeout: TIMEOUT,
        ) do |http|
          request = Net::HTTP::Post.new(url, headers)
          request_body = payload.to_json
          request.body = request_body

          http.request(request) do |response|
            if response.code.to_i != 200
              Rails.logger.error(
                "AnthropicCompletions: status: #{response.code.to_i} - body: #{response.body}",
              )
              raise CompletionFailed
            end

            log =
              AiApiAuditLog.create!(
                provider_id: AiApiAuditLog::Provider::Anthropic,
                raw_request_payload: request_body,
                user_id: user_id,
                post_id: post&.id,
                topic_id: post&.topic_id,
              )

            if !block_given?
              response_body = response.read_body
              parsed_response = JSON.parse(response_body, symbolize_names: true)

              log.update!(
                raw_response_payload: response_body,
                request_tokens: DiscourseAi::Tokenizer::AnthropicTokenizer.size(prompt),
                response_tokens:
                  DiscourseAi::Tokenizer::AnthropicTokenizer.size(parsed_response[:completion]),
              )
              return parsed_response
            end

            begin
              cancelled = false
              cancel = lambda { cancelled = true }

              response.read_body do |chunk|
                if cancelled
                  http.finish
                  return
                end

                response_raw << chunk

                chunk
                  .split("\n")
                  .each do |line|
                    data = line.split("data: ", 2)[1]
                    next if !data

                    if !cancelled
                      begin
                        partial = JSON.parse(data, symbolize_names: true)
                        response_data << partial[:completion].to_s

                        # ping has no data... do not yeild it
                        yield partial, cancel if partial[:completion]
                      rescue JSON::ParserError
                        nil
                        # TODO leftover chunk carry over to next
                      end
                    end
                  end
              rescue IOError
                raise if !cancelled
              end
            end

            return response_data
          end
        ensure
          if block_given?
            log.update!(
              raw_response_payload: response_raw,
              request_tokens: DiscourseAi::Tokenizer::AnthropicTokenizer.size(prompt),
              response_tokens: DiscourseAi::Tokenizer::AnthropicTokenizer.size(response_data),
            )
          end
          if Rails.env.development? && log
            puts "AnthropicCompletions: request_tokens #{log.request_tokens} response_tokens #{log.response_tokens}"
          end
        end

        def self.try_parse(data)
          JSON.parse(data, symbolize_names: true)
        rescue JSON::ParserError
          nil
        end
      end
    end
  end
end
FEATURE: Anthropic Claude for AIHelper and Summarization modules (#39) 2023-04-10 10:04:42 -04:00			`# frozen_string_literal: true`

			`module ::DiscourseAi`
			`module Inference`
			`class AnthropicCompletions`
			`CompletionFailed = Class.new(StandardError)`
FEATURE: Multi-model support for the AI Bot module. (#56) We'll create one bot user for each available model. When listed in the `ai_bot_enabled_chat_bots` setting, they will reply. This PR lets us use Claude-v1 in stream mode. 2023-05-11 09:03:03 -04:00			`TIMEOUT = 60`
FEATURE: Anthropic Claude for AIHelper and Summarization modules (#39) 2023-04-10 10:04:42 -04:00
FEATURE: Multi-model support for the AI Bot module. (#56) We'll create one bot user for each available model. When listed in the `ai_bot_enabled_chat_bots` setting, they will reply. This PR lets us use Claude-v1 in stream mode. 2023-05-11 09:03:03 -04:00			`def self.perform!(`
			`prompt,`
FEATURE: Claude 2 for summarization and AIHelper (#101) 2023-07-13 11:32:08 -04:00			`model = "claude-2",`
FEATURE: Multi-model support for the AI Bot module. (#56) We'll create one bot user for each available model. When listed in the `ai_bot_enabled_chat_bots` setting, they will reply. This PR lets us use Claude-v1 in stream mode. 2023-05-11 09:03:03 -04:00			`temperature: nil,`
			`top_p: nil,`
			`max_tokens: nil,`
FEATURE: Use stop_sequences for faster HyDE searches with Claude (#203) 2023-09-06 09:06:31 -04:00			`user_id: nil,`
FEATURE: Allow Anthropic inference via AWS Bedrock (#235) If a module LLM model is set to claude-2 and the ai_bedrock variables are all present we will use AWS Bedrock instead of Antrhopic own APIs. This is quite hacky, but will allow us to test the waters with AWS Bedrock early access with every module. This situation of "same module, completely different API" is quite a bit far from what we had in the OpenAI/Azure separation, so it's more food for thought for when we start working on the LLM abstraction layer soon this year. 2023-10-02 11:58:36 -04:00			`stop_sequences: nil,`
FEATURE: support topic_id and post_id logging in ai audit log (#274) This makes it easier to track who is responsible for a completion in logs Note: ai helper and summarization are not yet implemented 2023-10-31 17:41:31 -04:00			`post: nil,`
FEATURE: Allow Anthropic inference via AWS Bedrock (#235) If a module LLM model is set to claude-2 and the ai_bedrock variables are all present we will use AWS Bedrock instead of Antrhopic own APIs. This is quite hacky, but will allow us to test the waters with AWS Bedrock early access with every module. This situation of "same module, completely different API" is quite a bit far from what we had in the OpenAI/Azure separation, so it's more food for thought for when we start working on the LLM abstraction layer soon this year. 2023-10-02 11:58:36 -04:00			`&blk`
FEATURE: Multi-model support for the AI Bot module. (#56) We'll create one bot user for each available model. When listed in the `ai_bot_enabled_chat_bots` setting, they will reply. This PR lets us use Claude-v1 in stream mode. 2023-05-11 09:03:03 -04:00			`)`
FEATURE: Allow Anthropic inference via AWS Bedrock (#235) If a module LLM model is set to claude-2 and the ai_bedrock variables are all present we will use AWS Bedrock instead of Antrhopic own APIs. This is quite hacky, but will allow us to test the waters with AWS Bedrock early access with every module. This situation of "same module, completely different API" is quite a bit far from what we had in the OpenAI/Azure separation, so it's more food for thought for when we start working on the LLM abstraction layer soon this year. 2023-10-02 11:58:36 -04:00			`# HACK to get around the fact that they have different APIs`
			`# we will introduce a proper LLM abstraction layer to handle this shenanigas later this year`
			`if model == "claude-2" && SiteSetting.ai_bedrock_access_key_id.present? &&`
			`SiteSetting.ai_bedrock_secret_access_key.present? &&`
			`SiteSetting.ai_bedrock_region.present?`
			`return(`
			`AmazonBedrockInference.perform!(`
			`prompt,`
			`temperature: temperature,`
			`top_p: top_p,`
			`max_tokens: max_tokens,`
			`user_id: user_id,`
			`stop_sequences: stop_sequences,`
			`&blk`
			`)`
			`)`
			`end`

FIX: setting explorer was exceeding token budget This refactor changes it so we only include minimal data in the system prompt which leaves us lots of tokens for specific searches The new search command allows us to pull in settings on demand Descriptions are include in short search results, and names only in longer results Also: * In dev it is important to tell when calls are made to open ai this adds a console log to increase awareness around token usage * PERF: stop counting tokens so often This changes it so we only count tokens once per response Previously each time we heard back from open ai we would count tokens, leading to uneeded delays * bug fix, commands may reach in for tokenizer * add logging to console for anthropic calls as well * Update lib/shared/inference/openai_completions.rb Co-authored-by: Martin Brennan <mjrbrennan@gmail.com> 2023-08-31 21:48:51 -04:00			`log = nil`
			`response_data = +""`
			`response_raw = +""`

FEATURE: Multi-model support for the AI Bot module. (#56) We'll create one bot user for each available model. When listed in the `ai_bot_enabled_chat_bots` setting, they will reply. This PR lets us use Claude-v1 in stream mode. 2023-05-11 09:03:03 -04:00			`url = URI("https://api.anthropic.com/v1/complete")`
FEATURE: Anthropic Claude for AIHelper and Summarization modules (#39) 2023-04-10 10:04:42 -04:00			`headers = {`
FEATURE: port to use claude-2 for chat bot (#114) Claude 1 costs the same and is less good than Claude 2. Make use of Claude 2 in all spots ... This also fixes streaming so it uses the far more efficient streaming protocol. 2023-07-26 21:24:44 -04:00			`"anthropic-version" => "2023-06-01",`
FEATURE: Anthropic Claude for AIHelper and Summarization modules (#39) 2023-04-10 10:04:42 -04:00			`"x-api-key" => SiteSetting.ai_anthropic_api_key,`
FEATURE: port to use claude-2 for chat bot (#114) Claude 1 costs the same and is less good than Claude 2. Make use of Claude 2 in all spots ... This also fixes streaming so it uses the far more efficient streaming protocol. 2023-07-26 21:24:44 -04:00			`"content-type" => "application/json",`
FEATURE: Anthropic Claude for AIHelper and Summarization modules (#39) 2023-04-10 10:04:42 -04:00			`}`

FEATURE: Multi-model support for the AI Bot module. (#56) We'll create one bot user for each available model. When listed in the `ai_bot_enabled_chat_bots` setting, they will reply. This PR lets us use Claude-v1 in stream mode. 2023-05-11 09:03:03 -04:00			`payload = { model: model, prompt: prompt }`
FEATURE: Anthropic Claude for AIHelper and Summarization modules (#39) 2023-04-10 10:04:42 -04:00
FEATURE: Multi-model support for the AI Bot module. (#56) We'll create one bot user for each available model. When listed in the `ai_bot_enabled_chat_bots` setting, they will reply. This PR lets us use Claude-v1 in stream mode. 2023-05-11 09:03:03 -04:00			`payload[:top_p] = top_p if top_p`
FIX: Claude's max_tookens_to_sample is a required field (#97) 2023-06-27 13:42:33 -04:00			`payload[:max_tokens_to_sample] = max_tokens \|\| 2000`
			`payload[:temperature] = temperature if temperature`
FEATURE: Multi-model support for the AI Bot module. (#56) We'll create one bot user for each available model. When listed in the `ai_bot_enabled_chat_bots` setting, they will reply. This PR lets us use Claude-v1 in stream mode. 2023-05-11 09:03:03 -04:00			`payload[:stream] = true if block_given?`
FEATURE: Use stop_sequences for faster HyDE searches with Claude (#203) 2023-09-06 09:06:31 -04:00			`payload[:stop_sequences] = stop_sequences if stop_sequences`
FEATURE: Anthropic Claude for AIHelper and Summarization modules (#39) 2023-04-10 10:04:42 -04:00
FEATURE: Multi-model support for the AI Bot module. (#56) We'll create one bot user for each available model. When listed in the `ai_bot_enabled_chat_bots` setting, they will reply. This PR lets us use Claude-v1 in stream mode. 2023-05-11 09:03:03 -04:00			`Net::HTTP.start(`
			`url.host,`
			`url.port,`
			`use_ssl: true,`
			`read_timeout: TIMEOUT,`
			`open_timeout: TIMEOUT,`
			`write_timeout: TIMEOUT,`
			`) do \|http\|`
			`request = Net::HTTP::Post.new(url, headers)`
			`request_body = payload.to_json`
			`request.body = request_body`
FEATURE: Anthropic Claude for AIHelper and Summarization modules (#39) 2023-04-10 10:04:42 -04:00
FEATURE: Multi-model support for the AI Bot module. (#56) We'll create one bot user for each available model. When listed in the `ai_bot_enabled_chat_bots` setting, they will reply. This PR lets us use Claude-v1 in stream mode. 2023-05-11 09:03:03 -04:00			`http.request(request) do \|response\|`
			`if response.code.to_i != 200`
			`Rails.logger.error(`
			`"AnthropicCompletions: status: #{response.code.to_i} - body: #{response.body}",`
			`)`
			`raise CompletionFailed`
			`end`

			`log =`
			`AiApiAuditLog.create!(`
			`provider_id: AiApiAuditLog::Provider::Anthropic,`
			`raw_request_payload: request_body,`
			`user_id: user_id,`
FEATURE: support topic_id and post_id logging in ai audit log (#274) This makes it easier to track who is responsible for a completion in logs Note: ai helper and summarization are not yet implemented 2023-10-31 17:41:31 -04:00			`post_id: post&.id,`
			`topic_id: post&.topic_id,`
FEATURE: Multi-model support for the AI Bot module. (#56) We'll create one bot user for each available model. When listed in the `ai_bot_enabled_chat_bots` setting, they will reply. This PR lets us use Claude-v1 in stream mode. 2023-05-11 09:03:03 -04:00			`)`

			`if !block_given?`
			`response_body = response.read_body`
			`parsed_response = JSON.parse(response_body, symbolize_names: true)`

			`log.update!(`
			`raw_response_payload: response_body,`
Refinements to embeddings and tokenizers (#61) * Refinements to embeddings and tokenizers * lint * Truncate with tokenizers for summary * fix 2023-05-15 14:10:42 -04:00			`request_tokens: DiscourseAi::Tokenizer::AnthropicTokenizer.size(prompt),`
			`response_tokens:`
			`DiscourseAi::Tokenizer::AnthropicTokenizer.size(parsed_response[:completion]),`
FEATURE: Multi-model support for the AI Bot module. (#56) We'll create one bot user for each available model. When listed in the `ai_bot_enabled_chat_bots` setting, they will reply. This PR lets us use Claude-v1 in stream mode. 2023-05-11 09:03:03 -04:00			`)`
			`return parsed_response`
			`end`

			`begin`
			`cancelled = false`
			`cancel = lambda { cancelled = true }`
FEATURE: Anthropic Claude for AIHelper and Summarization modules (#39) 2023-04-10 10:04:42 -04:00
FEATURE: Multi-model support for the AI Bot module. (#56) We'll create one bot user for each available model. When listed in the `ai_bot_enabled_chat_bots` setting, they will reply. This PR lets us use Claude-v1 in stream mode. 2023-05-11 09:03:03 -04:00			`response.read_body do \|chunk\|`
			`if cancelled`
			`http.finish`
			`return`
			`end`

			`response_raw << chunk`

			`chunk`
			`.split("\n")`
			`.each do \|line\|`
			`data = line.split("data: ", 2)[1]`
FEATURE: port to use claude-2 for chat bot (#114) Claude 1 costs the same and is less good than Claude 2. Make use of Claude 2 in all spots ... This also fixes streaming so it uses the far more efficient streaming protocol. 2023-07-26 21:24:44 -04:00			`next if !data`
FEATURE: Multi-model support for the AI Bot module. (#56) We'll create one bot user for each available model. When listed in the `ai_bot_enabled_chat_bots` setting, they will reply. This PR lets us use Claude-v1 in stream mode. 2023-05-11 09:03:03 -04:00
FEATURE: disable smart commands on Claude and GPT 3.5 (#84) For the time being smart commands only work consistently on GPT 4. Avoid using any smart commands on the earlier models. Additionally adds better error handling to Claude which sometimes streams partial json and slightly tunes the search command. 2023-05-31 19:10:33 -04:00			`if !cancelled`
			`begin`
			`partial = JSON.parse(data, symbolize_names: true)`
FEATURE: port to use claude-2 for chat bot (#114) Claude 1 costs the same and is less good than Claude 2. Make use of Claude 2 in all spots ... This also fixes streaming so it uses the far more efficient streaming protocol. 2023-07-26 21:24:44 -04:00			`response_data << partial[:completion].to_s`
FEATURE: Multi-model support for the AI Bot module. (#56) We'll create one bot user for each available model. When listed in the `ai_bot_enabled_chat_bots` setting, they will reply. This PR lets us use Claude-v1 in stream mode. 2023-05-11 09:03:03 -04:00
FEATURE: port to use claude-2 for chat bot (#114) Claude 1 costs the same and is less good than Claude 2. Make use of Claude 2 in all spots ... This also fixes streaming so it uses the far more efficient streaming protocol. 2023-07-26 21:24:44 -04:00			`# ping has no data... do not yeild it`
			`yield partial, cancel if partial[:completion]`
FEATURE: disable smart commands on Claude and GPT 3.5 (#84) For the time being smart commands only work consistently on GPT 4. Avoid using any smart commands on the earlier models. Additionally adds better error handling to Claude which sometimes streams partial json and slightly tunes the search command. 2023-05-31 19:10:33 -04:00			`rescue JSON::ParserError`
			`nil`
FEATURE: port to use claude-2 for chat bot (#114) Claude 1 costs the same and is less good than Claude 2. Make use of Claude 2 in all spots ... This also fixes streaming so it uses the far more efficient streaming protocol. 2023-07-26 21:24:44 -04:00			`# TODO leftover chunk carry over to next`
FEATURE: disable smart commands on Claude and GPT 3.5 (#84) For the time being smart commands only work consistently on GPT 4. Avoid using any smart commands on the earlier models. Additionally adds better error handling to Claude which sometimes streams partial json and slightly tunes the search command. 2023-05-31 19:10:33 -04:00			`end`
FEATURE: Multi-model support for the AI Bot module. (#56) We'll create one bot user for each available model. When listed in the `ai_bot_enabled_chat_bots` setting, they will reply. This PR lets us use Claude-v1 in stream mode. 2023-05-11 09:03:03 -04:00			`end`
			`end`
			`rescue IOError`
			`raise if !cancelled`
			`end`
			`end`
FEATURE: streaming mode for the FoldContent strategy. (#134) 2023-08-11 14:08:54 -04:00
			`return response_data`
FEATURE: Multi-model support for the AI Bot module. (#56) We'll create one bot user for each available model. When listed in the `ai_bot_enabled_chat_bots` setting, they will reply. This PR lets us use Claude-v1 in stream mode. 2023-05-11 09:03:03 -04:00			`end`
FIX: setting explorer was exceeding token budget This refactor changes it so we only include minimal data in the system prompt which leaves us lots of tokens for specific searches The new search command allows us to pull in settings on demand Descriptions are include in short search results, and names only in longer results Also: * In dev it is important to tell when calls are made to open ai this adds a console log to increase awareness around token usage * PERF: stop counting tokens so often This changes it so we only count tokens once per response Previously each time we heard back from open ai we would count tokens, leading to uneeded delays * bug fix, commands may reach in for tokenizer * add logging to console for anthropic calls as well * Update lib/shared/inference/openai_completions.rb Co-authored-by: Martin Brennan <mjrbrennan@gmail.com> 2023-08-31 21:48:51 -04:00			`ensure`
			`if block_given?`
			`log.update!(`
			`raw_response_payload: response_raw,`
			`request_tokens: DiscourseAi::Tokenizer::AnthropicTokenizer.size(prompt),`
			`response_tokens: DiscourseAi::Tokenizer::AnthropicTokenizer.size(response_data),`
			`)`
			`end`
			`if Rails.env.development? && log`
			`puts "AnthropicCompletions: request_tokens #{log.request_tokens} response_tokens #{log.response_tokens}"`
			`end`
FEATURE: Multi-model support for the AI Bot module. (#56) We'll create one bot user for each available model. When listed in the `ai_bot_enabled_chat_bots` setting, they will reply. This PR lets us use Claude-v1 in stream mode. 2023-05-11 09:03:03 -04:00			`end`
FEATURE: disable smart commands on Claude and GPT 3.5 (#84) For the time being smart commands only work consistently on GPT 4. Avoid using any smart commands on the earlier models. Additionally adds better error handling to Claude which sometimes streams partial json and slightly tunes the search command. 2023-05-31 19:10:33 -04:00
			`def self.try_parse(data)`
			`JSON.parse(data, symbolize_names: true)`
			`rescue JSON::ParserError`
			`nil`
			`end`
FEATURE: Anthropic Claude for AIHelper and Summarization modules (#39) 2023-04-10 10:04:42 -04:00			`end`
			`end`
			`end`
			`end`