FEATURE: implement GPT-4 turbo support (#345)

Keep in mind: - GPT-4 is only going to be fully released next year - so this hardcodes preview model for now - Fixes streaming bugs which became a big problem with GPT-4 turbo - Adds Azure endpoing for turbo as well Co-authored-by: Martin Brennan <martin@discourse.org>
2025-02-21 19:05:18 +00:00 · 2023-12-11 14:59:57 +11:00 · 2023-12-11 14:59:57 +11:00 · 3c9901d43a
commit 3c9901d43a
parent 6380ebd829
7 changed files with 103 additions and 5 deletions
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -125,6 +125,7 @@ en:

        bot_names:
          gpt-4: "GPT-4"
+          gpt-4-turbo: "GPT-4 Turbo"
          gpt-3:
            5-turbo: "GPT-3.5"
          claude-2: "Claude 2"
@ -135,7 +136,7 @@ en:
          label: "sentiment"
          title: "Experimental AI-powered sentiment analysis of this person's most recent posts."

-      
+

    review:
      types:
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@ -42,6 +42,7 @@ en:
    ai_openai_gpt35_16k_url: "Custom URL used for GPT 3.5 16k chat completions. (for Azure support)"
    ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)"
    ai_openai_gpt4_32k_url: "Custom URL used for GPT 4 32k chat completions. (for Azure support)"
+    ai_openai_gpt4_turbo_url: "Custom URL used for GPT 4 Turbo chat completions. (for Azure support)"
    ai_openai_dall_e_3_url: "Custom URL used for DALL-E 3 image generation. (for Azure support)"
    ai_openai_organization: "(Optional, leave empty to omit) Organization id used for the OpenAI API. Passed in using the OpenAI-Organization header."
    ai_openai_embeddings_url: "Custom URL used for the OpenAI embeddings API. (in the case of Azure it can be: https://COMPANY.openai.azure.com/openai/deployments/DEPLOYMENT/embeddings?api-version=2023-05-15)"
--- a/config/settings.yml
+++ b/config/settings.yml
@ -95,6 +95,7 @@ discourse_ai:
  ai_openai_gpt35_16k_url: "https://api.openai.com/v1/chat/completions"
  ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions"
  ai_openai_gpt4_32k_url: "https://api.openai.com/v1/chat/completions"
+  ai_openai_gpt4_turbo_url: "https://api.openai.com/v1/chat/completions"
  ai_openai_dall_e_3_url: "https://api.openai.com/v1/images/generations"
  ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings"
  ai_openai_organization: ""
@ -256,6 +257,7 @@ discourse_ai:
    choices:
      - gpt-3.5-turbo
      - gpt-4
+      - gpt-4-turbo
      - claude-2
  ai_bot_add_to_header:
    default: true
--- a/lib/ai_bot/entry_point.rb
+++ b/lib/ai_bot/entry_point.rb
@ -8,14 +8,18 @@ module DiscourseAi
      GPT4_ID = -110
      GPT3_5_TURBO_ID = -111
      CLAUDE_V2_ID = -112
+      GPT4_TURBO_ID = -113
      BOTS = [
        [GPT4_ID, "gpt4_bot", "gpt-4"],
        [GPT3_5_TURBO_ID, "gpt3.5_bot", "gpt-3.5-turbo"],
        [CLAUDE_V2_ID, "claude_bot", "claude-2"],
+        [GPT4_TURBO_ID, "gpt4t_bot", "gpt-4-turbo"],
      ]

      def self.map_bot_model_to_user_id(model_name)
        case model_name
+        in "gpt-4-turbo"
+          GPT4_TURBO_ID
        in "gpt-3.5-turbo"
          GPT3_5_TURBO_ID
        in "gpt-4"
--- a/lib/ai_bot/open_ai_bot.rb
+++ b/lib/ai_bot/open_ai_bot.rb
@ -5,6 +5,7 @@ module DiscourseAi
    class OpenAiBot < Bot
      def self.can_reply_as?(bot_user)
        open_ai_bot_ids = [
+          DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID,
          DiscourseAi::AiBot::EntryPoint::GPT4_ID,
          DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID,
        ]
@ -23,7 +24,9 @@ module DiscourseAi
          buffer += @function_size
        end

-        if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
+        if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID
+          150_000 - buffer
+        elsif bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
          8192 - buffer
        else
          16_384 - buffer
@ -75,8 +78,15 @@ module DiscourseAi
      end

      def model_for(low_cost: false)
-        return "gpt-4" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost
-        "gpt-3.5-turbo-16k"
+        if low_cost || bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID
+          "gpt-3.5-turbo-16k"
+        elsif bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
+          "gpt-4"
+        else
+          # not quite released yet, once released we should replace with
+          # gpt-4-turbo
+          "gpt-4-1106-preview"
+        end
      end

      def clean_username(username)
--- a/lib/inference/open_ai_completions.rb
+++ b/lib/inference/open_ai_completions.rb
@ -29,7 +29,9 @@ module ::DiscourseAi

        url =
          if model.include?("gpt-4")
-            if model.include?("32k")
+            if model.include?("turbo") || model.include?("1106-preview")
+              URI(SiteSetting.ai_openai_gpt4_turbo_url)
+            elsif model.include?("32k")
              URI(SiteSetting.ai_openai_gpt4_32k_url)
            else
              URI(SiteSetting.ai_openai_gpt4_url)
@ -134,6 +136,11 @@ module ::DiscourseAi

                response_raw << chunk

+                if (leftover + chunk).length < "data: [DONE]".length
+                  leftover += chunk
+                  next
+                end
+
                (leftover + chunk)
                  .split("\n")
                  .each do |line|
--- a/spec/shared/inference/openai_completions_spec.rb
+++ b/spec/shared/inference/openai_completions_spec.rb
@ -45,6 +45,7 @@ describe DiscourseAi::Inference::OpenAiCompletions do
        { setting_name: "ai_openai_gpt35_16k_url", model: "gpt-35-16k-turbo" },
        { setting_name: "ai_openai_gpt4_url", model: "gpt-4" },
        { setting_name: "ai_openai_gpt4_32k_url", model: "gpt-4-32k" },
+        { setting_name: "ai_openai_gpt4_turbo_url", model: "gpt-4-1106-preview" },
      ].each do |config|
        gpt_url = "#{gpt_url_base}/#{config[:model]}"
        setting_name = config[:setting_name]
@ -263,6 +264,78 @@ describe DiscourseAi::Inference::OpenAiCompletions do
    expect(log.raw_response_payload).to eq(request_body)
  end

+  context "when Webmock has streaming support" do
+    # See: https://github.com/bblimke/webmock/issues/629
+    let(:mock_net_http) do
+      Class.new(Net::HTTP) do
+        def request(*)
+          super do |response|
+            response.instance_eval do
+              def read_body(*, &)
+                @body.each(&)
+              end
+            end
+
+            yield response if block_given?
+
+            response
+          end
+        end
+      end
+    end
+
+    let(:remove_original_net_http) { Net.send(:remove_const, :HTTP) }
+    let(:original_http) { remove_original_net_http }
+    let(:stub_net_http) { Net.send(:const_set, :HTTP, mock_net_http) }
+
+    let(:remove_stubbed_net_http) { Net.send(:remove_const, :HTTP) }
+    let(:restore_net_http) { Net.send(:const_set, :HTTP, original_http) }
+
+    before do
+      mock_net_http
+      remove_original_net_http
+      stub_net_http
+    end
+
+    after do
+      remove_stubbed_net_http
+      restore_net_http
+    end
+
+    it "support extremely slow streaming" do
+      raw_data = <<~TEXT
+data: {"choices":[{"delta":{"content":"test"}}]}
+
+data: {"choices":[{"delta":{"content":"test1"}}]}
+
+data: {"choices":[{"delta":{"content":"test2"}}]}
+
+data: [DONE]
+    TEXT
+
+      chunks = raw_data.split("")
+
+      stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
+        status: 200,
+        body: chunks,
+      )
+
+      partials = []
+      DiscourseAi::Inference::OpenAiCompletions.perform!([], "gpt-3.5-turbo") do |partial, cancel|
+        partials << partial
+      end
+
+      expect(partials.length).to eq(3)
+      expect(partials).to eq(
+        [
+          { choices: [{ delta: { content: "test" } }] },
+          { choices: [{ delta: { content: "test1" } }] },
+          { choices: [{ delta: { content: "test2" } }] },
+        ],
+      )
+    end
+  end
+
  it "can operate in streaming mode" do
    deltas = [
      { role: "assistant" },