discourse-ai/lib/completions/endpoints/samba_nova.rb

# frozen_string_literal: true

module DiscourseAi
  module Completions
    module Endpoints
      class SambaNova < Base
        def self.can_contact?(model_provider)
          model_provider == "samba_nova"
        end

        def normalize_model_params(model_params)
          model_params = model_params.dup

          # max_tokens, temperature are already supported
          if model_params[:stop_sequences]
            model_params[:stop] = model_params.delete(:stop_sequences)
          end

          model_params
        end

        def default_options
          { model: llm_model.name }
        end

        def provider_id
          AiApiAuditLog::Provider::SambaNova
        end

        private

        def model_uri
          URI(llm_model.url)
        end

        def prepare_payload(prompt, model_params, dialect)
          payload = default_options.merge(model_params).merge(messages: prompt)

          payload[:stream] = true if @streaming_mode

          payload
        end

        def prepare_request(payload)
          headers = { "Content-Type" => "application/json" }
          api_key = llm_model.api_key

          headers["Authorization"] = "Bearer #{api_key}"

          Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload }
        end

        def final_log_update(log)
          log.request_tokens = @prompt_tokens if @prompt_tokens
          log.response_tokens = @completion_tokens if @completion_tokens
        end

        def xml_tools_enabled?
          true
        end

        def decode(response_raw)
          json = JSON.parse(response_raw, symbolize_names: true)
          [json.dig(:choices, 0, :message, :content)]
        end

        def decode_chunk(chunk)
          @json_decoder ||= JsonStreamDecoder.new
          (@json_decoder << chunk)
            .map do |json|
              text = json.dig(:choices, 0, :delta, :content)

              @prompt_tokens ||= json.dig(:usage, :prompt_tokens)
              @completion_tokens ||= json.dig(:usage, :completion_tokens)

              if !text.to_s.empty?
                text
              else
                nil
              end
            end
            .flatten
            .compact
        end
      end
    end
  end
end
FEATURE: add a SambaNova LLM provider (#797) Note, at the moment the context window is quite small, it is mainly useful as a helper backend or hyde generator 2024-09-11 21:28:08 -04:00			`# frozen_string_literal: true`

			`module DiscourseAi`
			`module Completions`
			`module Endpoints`
			`class SambaNova < Base`
			`def self.can_contact?(model_provider)`
			`model_provider == "samba_nova"`
			`end`

			`def normalize_model_params(model_params)`
			`model_params = model_params.dup`

			`# max_tokens, temperature are already supported`
			`if model_params[:stop_sequences]`
			`model_params[:stop] = model_params.delete(:stop_sequences)`
			`end`

			`model_params`
			`end`

			`def default_options`
			`{ model: llm_model.name }`
			`end`

			`def provider_id`
			`AiApiAuditLog::Provider::SambaNova`
			`end`

			`private`

			`def model_uri`
			`URI(llm_model.url)`
			`end`

			`def prepare_payload(prompt, model_params, dialect)`
			`payload = default_options.merge(model_params).merge(messages: prompt)`

			`payload[:stream] = true if @streaming_mode`

			`payload`
			`end`

			`def prepare_request(payload)`
			`headers = { "Content-Type" => "application/json" }`
			`api_key = llm_model.api_key`

			`headers["Authorization"] = "Bearer #{api_key}"`

			`Net::HTTP::Post.new(model_uri, headers).tap { \|r\| r.body = payload }`
			`end`

			`def final_log_update(log)`
			`log.request_tokens = @prompt_tokens if @prompt_tokens`
			`log.response_tokens = @completion_tokens if @completion_tokens`
			`end`

FEATURE: improve tool support (#904) This re-implements tool support in DiscourseAi::Completions::Llm #generate Previously tool support was always returned via XML and it would be the responsibility of the caller to parse XML New implementation has the endpoints return ToolCall objects. Additionally this simplifies the Llm endpoint interface and gives it more clarity. Llms must implement decode, decode_chunk (for streaming) It is the implementers responsibility to figure out how to decode chunks, base no longer implements. To make this easy we ship a flexible json decoder which is easy to wire up. Also (new) Better debugging for PMs, we now have a next / previous button to see all the Llm messages associated with a PM Token accounting is fixed for vllm (we were not correctly counting tokens) 2024-11-11 16:14:30 -05:00			`def xml_tools_enabled?`
			`true`
			`end`
FEATURE: add a SambaNova LLM provider (#797) Note, at the moment the context window is quite small, it is mainly useful as a helper backend or hyde generator 2024-09-11 21:28:08 -04:00
FEATURE: improve tool support (#904) This re-implements tool support in DiscourseAi::Completions::Llm #generate Previously tool support was always returned via XML and it would be the responsibility of the caller to parse XML New implementation has the endpoints return ToolCall objects. Additionally this simplifies the Llm endpoint interface and gives it more clarity. Llms must implement decode, decode_chunk (for streaming) It is the implementers responsibility to figure out how to decode chunks, base no longer implements. To make this easy we ship a flexible json decoder which is easy to wire up. Also (new) Better debugging for PMs, we now have a next / previous button to see all the Llm messages associated with a PM Token accounting is fixed for vllm (we were not correctly counting tokens) 2024-11-11 16:14:30 -05:00			`def decode(response_raw)`
			`json = JSON.parse(response_raw, symbolize_names: true)`
			`[json.dig(:choices, 0, :message, :content)]`
			`end`
FEATURE: add a SambaNova LLM provider (#797) Note, at the moment the context window is quite small, it is mainly useful as a helper backend or hyde generator 2024-09-11 21:28:08 -04:00
FEATURE: improve tool support (#904) This re-implements tool support in DiscourseAi::Completions::Llm #generate Previously tool support was always returned via XML and it would be the responsibility of the caller to parse XML New implementation has the endpoints return ToolCall objects. Additionally this simplifies the Llm endpoint interface and gives it more clarity. Llms must implement decode, decode_chunk (for streaming) It is the implementers responsibility to figure out how to decode chunks, base no longer implements. To make this easy we ship a flexible json decoder which is easy to wire up. Also (new) Better debugging for PMs, we now have a next / previous button to see all the Llm messages associated with a PM Token accounting is fixed for vllm (we were not correctly counting tokens) 2024-11-11 16:14:30 -05:00			`def decode_chunk(chunk)`
			`@json_decoder \|\|= JsonStreamDecoder.new`
			`(@json_decoder << chunk)`
			`.map do \|json\|`
			`text = json.dig(:choices, 0, :delta, :content)`
FEATURE: add a SambaNova LLM provider (#797) Note, at the moment the context window is quite small, it is mainly useful as a helper backend or hyde generator 2024-09-11 21:28:08 -04:00
FEATURE: improve tool support (#904) This re-implements tool support in DiscourseAi::Completions::Llm #generate Previously tool support was always returned via XML and it would be the responsibility of the caller to parse XML New implementation has the endpoints return ToolCall objects. Additionally this simplifies the Llm endpoint interface and gives it more clarity. Llms must implement decode, decode_chunk (for streaming) It is the implementers responsibility to figure out how to decode chunks, base no longer implements. To make this easy we ship a flexible json decoder which is easy to wire up. Also (new) Better debugging for PMs, we now have a next / previous button to see all the Llm messages associated with a PM Token accounting is fixed for vllm (we were not correctly counting tokens) 2024-11-11 16:14:30 -05:00			`@prompt_tokens \|\|= json.dig(:usage, :prompt_tokens)`
			`@completion_tokens \|\|= json.dig(:usage, :completion_tokens)`
FEATURE: add a SambaNova LLM provider (#797) Note, at the moment the context window is quite small, it is mainly useful as a helper backend or hyde generator 2024-09-11 21:28:08 -04:00
FEATURE: improve tool support (#904) This re-implements tool support in DiscourseAi::Completions::Llm #generate Previously tool support was always returned via XML and it would be the responsibility of the caller to parse XML New implementation has the endpoints return ToolCall objects. Additionally this simplifies the Llm endpoint interface and gives it more clarity. Llms must implement decode, decode_chunk (for streaming) It is the implementers responsibility to figure out how to decode chunks, base no longer implements. To make this easy we ship a flexible json decoder which is easy to wire up. Also (new) Better debugging for PMs, we now have a next / previous button to see all the Llm messages associated with a PM Token accounting is fixed for vllm (we were not correctly counting tokens) 2024-11-11 16:14:30 -05:00			`if !text.to_s.empty?`
			`text`
			`else`
			`nil`
			`end`
FEATURE: add a SambaNova LLM provider (#797) Note, at the moment the context window is quite small, it is mainly useful as a helper backend or hyde generator 2024-09-11 21:28:08 -04:00			`end`
FEATURE: improve tool support (#904) This re-implements tool support in DiscourseAi::Completions::Llm #generate Previously tool support was always returned via XML and it would be the responsibility of the caller to parse XML New implementation has the endpoints return ToolCall objects. Additionally this simplifies the Llm endpoint interface and gives it more clarity. Llms must implement decode, decode_chunk (for streaming) It is the implementers responsibility to figure out how to decode chunks, base no longer implements. To make this easy we ship a flexible json decoder which is easy to wire up. Also (new) Better debugging for PMs, we now have a next / previous button to see all the Llm messages associated with a PM Token accounting is fixed for vllm (we were not correctly counting tokens) 2024-11-11 16:14:30 -05:00			`.flatten`
FEATURE: add a SambaNova LLM provider (#797) Note, at the moment the context window is quite small, it is mainly useful as a helper backend or hyde generator 2024-09-11 21:28:08 -04:00			`.compact`
			`end`
			`end`
			`end`
			`end`
			`end`