discourse-ai/spec/shared/inference/openai_completions_spec.rb

# frozen_string_literal: true
require "rails_helper"

describe DiscourseAi::Inference::OpenAiCompletions do
  before { SiteSetting.ai_openai_api_key = "abc-123" }

  fab!(:user)

  it "supports sending an organization id" do
    SiteSetting.ai_openai_organization = "org_123"

    stub_request(:post, "https://api.openai.com/v1/chat/completions").with(
      body:
        "{\"model\":\"gpt-3.5-turbo-0613\",\"messages\":[{\"role\":\"system\",\"content\":\"hello\"}]}",
      headers: {
        "Authorization" => "Bearer abc-123",
        "Content-Type" => "application/json",
        "Host" => "api.openai.com",
        "User-Agent" => "Ruby",
        "OpenAI-Organization" => "org_123",
      },
    ).to_return(
      status: 200,
      body: { choices: [message: { content: "world" }] }.to_json,
      headers: {
      },
    )

    result =
      DiscourseAi::Inference::OpenAiCompletions.perform!(
        [{ role: "system", content: "hello" }],
        "gpt-3.5-turbo-0613",
      )

    expect(result.dig(:choices, 0, :message, :content)).to eq("world")
  end

  context "when configured using Azure" do
    it "Supports custom Azure endpoints for completions" do
      gpt_url_base =
        "https://company.openai.azure.com/openai/deployments/deployment/chat/completions?api-version=2023-03-15-preview"
      key = "12345"
      SiteSetting.ai_openai_api_key = key

      [
        { setting_name: "ai_openai_gpt35_url", model: "gpt-35-turbo" },
        { setting_name: "ai_openai_gpt35_16k_url", model: "gpt-35-16k-turbo" },
        { setting_name: "ai_openai_gpt4_url", model: "gpt-4" },
        { setting_name: "ai_openai_gpt4_32k_url", model: "gpt-4-32k" },
        { setting_name: "ai_openai_gpt4_turbo_url", model: "gpt-4-1106-preview" },
      ].each do |config|
        gpt_url = "#{gpt_url_base}/#{config[:model]}"
        setting_name = config[:setting_name]
        model = config[:model]

        SiteSetting.public_send("#{setting_name}=".to_sym, gpt_url)

        expected = {
          id: "chatcmpl-7TfPzOyBGW5K6dyWp3NPU0mYLGZRQ",
          object: "chat.completion",
          created: 1_687_305_079,
          model: model,
          choices: [
            {
              index: 0,
              finish_reason: "stop",
              message: {
                role: "assistant",
                content: "Hi there! How can I assist you today?",
              },
            },
          ],
          usage: {
            completion_tokens: 10,
            prompt_tokens: 9,
            total_tokens: 19,
          },
        }

        stub_request(:post, gpt_url).with(
          body: "{\"model\":\"#{model}\",\"messages\":[{\"role\":\"user\",\"content\":\"hello\"}]}",
          headers: {
            "Api-Key" => "12345",
            "Content-Type" => "application/json",
            "Host" => "company.openai.azure.com",
          },
        ).to_return(status: 200, body: expected.to_json, headers: {})

        result =
          DiscourseAi::Inference::OpenAiCompletions.perform!(
            [role: "user", content: "hello"],
            model,
          )

        expect(result).to eq(expected)
      end
    end
  end

  it "supports function calling" do
    prompt = [role: "system", content: "you are weatherbot"]
    prompt << { role: "user", content: "what is the weather in sydney?" }

    functions = []

    function =
      DiscourseAi::Inference::Function.new(
        name: "get_weather",
        description: "Get the weather in a city",
      )

    function.add_parameter(
      name: "location",
      type: "string",
      description: "the city name",
      required: true,
    )

    function.add_parameter(
      name: "unit",
      type: "string",
      description: "the unit of measurement celcius c or fahrenheit f",
      enum: %w[c f],
      required: true,
    )

    functions << function

    function_calls = []
    current_function_call = nil

    deltas = [
      { role: "assistant" },
      { function_call: { name: "get_weather", arguments: "" } },
      { function_call: { arguments: "{ \"location\": " } },
      { function_call: { arguments: "\"sydney\", \"unit\": \"c\" }" } },
    ]

    OpenAiCompletionsInferenceStubs.stub_streamed_response(
      prompt,
      deltas,
      model: "gpt-3.5-turbo-0613",
      req_opts: {
        functions: functions,
        stream: true,
      },
    )

    DiscourseAi::Inference::OpenAiCompletions.perform!(
      prompt,
      "gpt-3.5-turbo-0613",
      functions: functions,
    ) do |json, cancel|
      fn = json.dig(:choices, 0, :delta, :function_call)
      if fn && fn[:name]
        current_function_call = { name: fn[:name], arguments: +fn[:arguments].to_s.dup }
        function_calls << current_function_call
      elsif fn && fn[:arguments] && current_function_call
        current_function_call[:arguments] << fn[:arguments]
      end
    end

    expect(function_calls.length).to eq(1)
    expect(function_calls[0][:name]).to eq("get_weather")
    expect(JSON.parse(function_calls[0][:arguments])).to eq(
      { "location" => "sydney", "unit" => "c" },
    )

    prompt << { role: "function", name: "get_weather", content: 22.to_json }

    OpenAiCompletionsInferenceStubs.stub_response(
      prompt,
      "The current temperature in Sydney is 22 degrees Celsius.",
      model: "gpt-3.5-turbo-0613",
      req_opts: {
        functions: functions,
      },
    )

    result =
      DiscourseAi::Inference::OpenAiCompletions.perform!(
        prompt,
        "gpt-3.5-turbo-0613",
        functions: functions,
      )

    expect(result.dig(:choices, 0, :message, :content)).to eq(
      "The current temperature in Sydney is 22 degrees Celsius.",
    )
  end

  it "supports rate limits" do
    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
      [
        { status: 429, body: "", headers: {} },
        { status: 429, body: "", headers: {} },
        { status: 200, body: { choices: [message: { content: "ok" }] }.to_json, headers: {} },
      ],
    )
    completions =
      DiscourseAi::Inference::OpenAiCompletions.perform!(
        [{ role: "user", content: "hello" }],
        "gpt-3.5-turbo",
        temperature: 0.5,
        top_p: 0.8,
        max_tokens: 700,
        retries: 3,
        retry_timeout: 0,
      )

    expect(completions.dig(:choices, 0, :message, :content)).to eq("ok")
  end

  it "supports will raise once rate limit is met" do
    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
      [
        { status: 429, body: "", headers: {} },
        { status: 429, body: "", headers: {} },
        { status: 429, body: "", headers: {} },
      ],
    )

    expect do
      DiscourseAi::Inference::OpenAiCompletions.perform!(
        [{ role: "user", content: "hello" }],
        "gpt-3.5-turbo",
        temperature: 0.5,
        top_p: 0.8,
        max_tokens: 700,
        retries: 3,
        retry_timeout: 0,
      )
    end.to raise_error(DiscourseAi::Inference::OpenAiCompletions::CompletionFailed)
  end

  it "can complete a trivial prompt" do
    response_text = "1. Serenity\\n2. Laughter\\n3. Adventure"
    prompt = [role: "user", content: "write 3 words"]
    user_id = 183
    req_opts = { temperature: 0.5, top_p: 0.8, max_tokens: 700 }

    OpenAiCompletionsInferenceStubs.stub_response(prompt, response_text, req_opts: req_opts)

    completions =
      DiscourseAi::Inference::OpenAiCompletions.perform!(
        prompt,
        "gpt-3.5-turbo",
        temperature: 0.5,
        top_p: 0.8,
        max_tokens: 700,
        user_id: user_id,
      )

    expect(completions.dig(:choices, 0, :message, :content)).to eq(response_text)

    expect(AiApiAuditLog.count).to eq(1)
    log = AiApiAuditLog.first

    body = { model: "gpt-3.5-turbo", messages: prompt }.merge(req_opts).to_json
    request_body = OpenAiCompletionsInferenceStubs.response(response_text).to_json

    expect(log.provider_id).to eq(AiApiAuditLog::Provider::OpenAI)
    expect(log.request_tokens).to eq(337)
    expect(log.response_tokens).to eq(162)
    expect(log.raw_request_payload).to eq(body)
    expect(log.raw_response_payload).to eq(request_body)
  end

  context "when Webmock has streaming support" do
    # See: https://github.com/bblimke/webmock/issues/629
    let(:mock_net_http) do
      Class.new(Net::HTTP) do
        def request(*)
          super do |response|
            response.instance_eval do
              def read_body(*, &)
                @body.each(&)
              end
            end

            yield response if block_given?

            response
          end
        end
      end
    end

    let(:remove_original_net_http) { Net.send(:remove_const, :HTTP) }
    let(:original_http) { remove_original_net_http }
    let(:stub_net_http) { Net.send(:const_set, :HTTP, mock_net_http) }

    let(:remove_stubbed_net_http) { Net.send(:remove_const, :HTTP) }
    let(:restore_net_http) { Net.send(:const_set, :HTTP, original_http) }

    before do
      mock_net_http
      remove_original_net_http
      stub_net_http
    end

    after do
      remove_stubbed_net_http
      restore_net_http
    end

    it "supports extremely slow streaming under new interface" do
      raw_data = <<~TEXT
data: {"choices":[{"delta":{"content":"test"}}]}

data: {"choices":[{"delta":{"content":"test1"}}]}

data: {"choices":[{"delta":{"content":"test2"}}]}

data: [DONE]
    TEXT

      chunks = raw_data.split("")

      stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
        status: 200,
        body: chunks,
      )

      partials = []
      llm = DiscourseAi::Completions::Llm.proxy("gpt-3.5-turbo")
      llm.completion!({ insts: "test" }, user) { |partial| partials << partial }

      expect(partials.join).to eq("testtest1test2")
    end

    it "support extremely slow streaming" do
      raw_data = <<~TEXT
data: {"choices":[{"delta":{"content":"test"}}]}

data: {"choices":[{"delta":{"content":"test1"}}]}

data: {"choices":[{"delta":{"content":"test2"}}]}

data: [DONE]
    TEXT

      chunks = raw_data.split("")

      stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
        status: 200,
        body: chunks,
      )

      partials = []
      DiscourseAi::Inference::OpenAiCompletions.perform!([], "gpt-3.5-turbo") do |partial, cancel|
        partials << partial
      end

      expect(partials.length).to eq(3)
      expect(partials).to eq(
        [
          { choices: [{ delta: { content: "test" } }] },
          { choices: [{ delta: { content: "test1" } }] },
          { choices: [{ delta: { content: "test2" } }] },
        ],
      )
    end
  end

  it "can operate in streaming mode" do
    deltas = [
      { role: "assistant" },
      { content: "Mount" },
      { content: "ain" },
      { content: " " },
      { content: "Tree " },
      { content: "Frog" },
    ]

    prompt = [role: "user", content: "write 3 words"]
    content = +""

    OpenAiCompletionsInferenceStubs.stub_streamed_response(
      prompt,
      deltas,
      req_opts: {
        stream: true,
      },
    )

    DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, "gpt-3.5-turbo") do |partial, cancel|
      data = partial.dig(:choices, 0, :delta, :content)
      content << data if data
      cancel.call if content.split(" ").length == 2
    end

    expect(content).to eq("Mountain Tree ")

    expect(AiApiAuditLog.count).to eq(1)
    log = AiApiAuditLog.first

    request_body = { model: "gpt-3.5-turbo", messages: prompt, stream: true }.to_json

    expect(log.provider_id).to eq(AiApiAuditLog::Provider::OpenAI)
    expect(log.request_tokens).to eq(4)
    expect(log.response_tokens).to eq(3)
    expect(log.raw_request_payload).to eq(request_body)
    expect(log.raw_response_payload).to be_present
  end
end