FEATURE: implement GPT-4 turbo support (#345)

Keep in mind: - GPT-4 is only going to be fully released next year - so this hardcodes preview model for now - Fixes streaming bugs which became a big problem with GPT-4 turbo - Adds Azure endpoing for turbo as well Co-authored-by: Martin Brennan <martin@discourse.org>
2023-12-11 14:59:57 +11:00 · 2023-12-11 14:59:57 +11:00 · 3c9901d43a
parent 6380ebd829
commit 3c9901d43a
7 changed files with 103 additions and 5 deletions
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -125,6 +125,7 @@ en:
        bot_names:
          gpt-4: "GPT-4"
          gpt-4-turbo: "GPT-4 Turbo"
          gpt-3:
            5-turbo: "GPT-3.5"
          claude-2: "Claude 2"
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@ -42,6 +42,7 @@ en:
    ai_openai_gpt35_16k_url: "Custom URL used for GPT 3.5 16k chat completions. (for Azure support)"
    ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)"
    ai_openai_gpt4_32k_url: "Custom URL used for GPT 4 32k chat completions. (for Azure support)"
    ai_openai_gpt4_turbo_url: "Custom URL used for GPT 4 Turbo chat completions. (for Azure support)"
    ai_openai_dall_e_3_url: "Custom URL used for DALL-E 3 image generation. (for Azure support)"
    ai_openai_organization: "(Optional, leave empty to omit) Organization id used for the OpenAI API. Passed in using the OpenAI-Organization header."
    ai_openai_embeddings_url: "Custom URL used for the OpenAI embeddings API. (in the case of Azure it can be: https://COMPANY.openai.azure.com/openai/deployments/DEPLOYMENT/embeddings?api-version=2023-05-15)"
--- a/config/settings.yml
+++ b/config/settings.yml
@ -95,6 +95,7 @@ discourse_ai:
  ai_openai_gpt35_16k_url: "https://api.openai.com/v1/chat/completions"
  ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions"
  ai_openai_gpt4_32k_url: "https://api.openai.com/v1/chat/completions"
  ai_openai_gpt4_turbo_url: "https://api.openai.com/v1/chat/completions"
  ai_openai_dall_e_3_url: "https://api.openai.com/v1/images/generations"
  ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings"
  ai_openai_organization: ""
@ -256,6 +257,7 @@ discourse_ai:
    choices:
      - gpt-3.5-turbo
      - gpt-4
      - gpt-4-turbo
      - claude-2
  ai_bot_add_to_header:
    default: true
--- a/lib/ai_bot/entry_point.rb
+++ b/lib/ai_bot/entry_point.rb
@ -8,14 +8,18 @@ module DiscourseAi
      GPT4_ID = -110
      GPT3_5_TURBO_ID = -111
      CLAUDE_V2_ID = -112
      GPT4_TURBO_ID = -113
      BOTS = [
        [GPT4_ID, "gpt4_bot", "gpt-4"],
        [GPT3_5_TURBO_ID, "gpt3.5_bot", "gpt-3.5-turbo"],
        [CLAUDE_V2_ID, "claude_bot", "claude-2"],
        [GPT4_TURBO_ID, "gpt4t_bot", "gpt-4-turbo"],
      ]
      def self.map_bot_model_to_user_id(model_name)
        case model_name
        in "gpt-4-turbo"
          GPT4_TURBO_ID
        in "gpt-3.5-turbo"
          GPT3_5_TURBO_ID
        in "gpt-4"
--- a/lib/ai_bot/open_ai_bot.rb
+++ b/lib/ai_bot/open_ai_bot.rb
@ -5,6 +5,7 @@ module DiscourseAi
    class OpenAiBot < Bot
      def self.can_reply_as?(bot_user)
        open_ai_bot_ids = [
          DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID,
          DiscourseAi::AiBot::EntryPoint::GPT4_ID,
          DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID,
        ]
@ -23,7 +24,9 @@ module DiscourseAi
          buffer += @function_size
        end
-        if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
+        if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID
          150_000 - buffer
        elsif bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
          8192 - buffer
        else
          16_384 - buffer
@ -75,8 +78,15 @@ module DiscourseAi
      end
      def model_for(low_cost: false)
-        return "gpt-4" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost
+        if low_cost || bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID
-        "gpt-3.5-turbo-16k"
+          "gpt-3.5-turbo-16k"
        elsif bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
          "gpt-4"
        else
          # not quite released yet, once released we should replace with
          # gpt-4-turbo
          "gpt-4-1106-preview"
        end
      end
      def clean_username(username)
--- a/lib/inference/open_ai_completions.rb
+++ b/lib/inference/open_ai_completions.rb
@ -29,7 +29,9 @@ module ::DiscourseAi
        url =
          if model.include?("gpt-4")
-            if model.include?("32k")
+            if model.include?("turbo") || model.include?("1106-preview")
              URI(SiteSetting.ai_openai_gpt4_turbo_url)
            elsif model.include?("32k")
              URI(SiteSetting.ai_openai_gpt4_32k_url)
            else
              URI(SiteSetting.ai_openai_gpt4_url)
@ -134,6 +136,11 @@ module ::DiscourseAi
                response_raw << chunk
                if (leftover + chunk).length < "data: [DONE]".length
                  leftover += chunk
                  next
                end
                (leftover + chunk)
                  .split("\n")
                  .each do |line|
--- a/spec/shared/inference/openai_completions_spec.rb
+++ b/spec/shared/inference/openai_completions_spec.rb
@ -45,6 +45,7 @@ describe DiscourseAi::Inference::OpenAiCompletions do
        { setting_name: "ai_openai_gpt35_16k_url", model: "gpt-35-16k-turbo" },
        { setting_name: "ai_openai_gpt4_url", model: "gpt-4" },
        { setting_name: "ai_openai_gpt4_32k_url", model: "gpt-4-32k" },
        { setting_name: "ai_openai_gpt4_turbo_url", model: "gpt-4-1106-preview" },
      ].each do |config|
        gpt_url = "#{gpt_url_base}/#{config[:model]}"
        setting_name = config[:setting_name]
@ -263,6 +264,78 @@ describe DiscourseAi::Inference::OpenAiCompletions do
    expect(log.raw_response_payload).to eq(request_body)
  end
  context "when Webmock has streaming support" do
    # See: https://github.com/bblimke/webmock/issues/629
    let(:mock_net_http) do
      Class.new(Net::HTTP) do
        def request(*)
          super do |response|
            response.instance_eval do
              def read_body(*, &)
                @body.each(&)
              end
            end
            yield response if block_given?
            response
          end
        end
      end
    end
    let(:remove_original_net_http) { Net.send(:remove_const, :HTTP) }
    let(:original_http) { remove_original_net_http }
    let(:stub_net_http) { Net.send(:const_set, :HTTP, mock_net_http) }
    let(:remove_stubbed_net_http) { Net.send(:remove_const, :HTTP) }
    let(:restore_net_http) { Net.send(:const_set, :HTTP, original_http) }
    before do
      mock_net_http
      remove_original_net_http
      stub_net_http
    end
    after do
      remove_stubbed_net_http
      restore_net_http
    end
    it "support extremely slow streaming" do
      raw_data = <<~TEXT
 data: {"choices":[{"delta":{"content":"test"}}]}
 data: {"choices":[{"delta":{"content":"test1"}}]}
 data: {"choices":[{"delta":{"content":"test2"}}]}
 data: [DONE]
    TEXT
      chunks = raw_data.split("")
      stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
        status: 200,
        body: chunks,
      )
      partials = []
      DiscourseAi::Inference::OpenAiCompletions.perform!([], "gpt-3.5-turbo") do |partial, cancel|
        partials << partial
      end
      expect(partials.length).to eq(3)
      expect(partials).to eq(
        [
          { choices: [{ delta: { content: "test" } }] },
          { choices: [{ delta: { content: "test1" } }] },
          { choices: [{ delta: { content: "test2" } }] },
        ],
      )
    end
  end
  it "can operate in streaming mode" do
    deltas = [
      { role: "assistant" },