From b4477ecdcdea4d9d7a2b9f407180175a6cb22909 Mon Sep 17 00:00:00 2001
From: Sam <sam.saffron@gmail.com>
Date: Thu, 17 Aug 2023 11:00:11 +1000
Subject: [PATCH] FEATURE: support 16k and 32k variants for Azure GPT (#140)

Azure requires a single HTTP endpoint per type of completion.

The settings: `ai_openai_gpt35_16k_url` and `ai_openai_gpt4_32k_url` can be
used now to configure the extra endpoints

This amends token limit which was off a bit due to function calls and fixes
a minor JS issue where we were not testing for a property
---
 .../composer-open.js                          |  5 +-
 config/locales/server.en.yml                  |  4 +-
 config/settings.yml                           |  2 +
 lib/modules/ai_bot/open_ai_bot.rb             |  8 +-
 lib/shared/inference/openai_completions.rb    | 12 ++-
 .../inference/openai_completions_spec.rb      | 90 ++++++++++---------
 6 files changed, 73 insertions(+), 48 deletions(-)

diff --git a/assets/javascripts/discourse/connectors/composer-after-composer-editor/composer-open.js b/assets/javascripts/discourse/connectors/composer-after-composer-editor/composer-open.js
index a31b7188..b095138d 100644
--- a/assets/javascripts/discourse/connectors/composer-after-composer-editor/composer-open.js
+++ b/assets/javascripts/discourse/connectors/composer-after-composer-editor/composer-open.js
@@ -20,7 +20,10 @@ export default class extends Component {
 
   @computed("composerModel.targetRecipients")
   get isAiBotChat() {
-    if (this.composerModel.targetRecipients) {
+    if (
+      this.composerModel.targetRecipients &&
+      this.currentUser.ai_enabled_chat_bots
+    ) {
       let reciepients = this.composerModel.targetRecipients.split(",");
 
       return this.currentUser.ai_enabled_chat_bots.any((bot) =>
diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml
index b64f9d0e..8d87f97b 100644
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@@ -31,8 +31,10 @@ en:
     ai_nsfw_flag_threshold_sexy: "Threshold for an image classified as sexy to be considered NSFW."
     ai_nsfw_models: "Models to use for NSFW inference."
 
-    ai_openai_gpt35_url: "Custom URL used for GPT 3.5 chat completions. (Azure: MUST support function calling and ideally is a GPT3.5 16K endpoint)"
+    ai_openai_gpt35_url: "Custom URL used for GPT 3.5 chat completions. (for Azuer support)"
+    ai_openai_gpt35_16k_url: "Custom URL used for GPT 3.5 16k chat completions. (for Azure support)"
     ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)"
+    ai_openai_gpt4_32k_url: "Custom URL used for GPT 4 32k chat completions. (for Azure support)"
     ai_openai_embeddings_url: "Custom URL used for the OpenAI embeddings API. (in the case of Azure it can be: https://COMPANY.openai.azure.com/openai/deployments/DEPLOYMENT/embeddings?api-version=2023-05-15)"
     ai_openai_api_key: "API key for OpenAI API"
     ai_anthropic_api_key: "API key for Anthropic API"
diff --git a/config/settings.yml b/config/settings.yml
index a2dce5cb..f401394f 100644
--- a/config/settings.yml
+++ b/config/settings.yml
@@ -89,7 +89,9 @@ plugins:
      - nsfw_detector
 
   ai_openai_gpt35_url: "https://api.openai.com/v1/chat/completions"
+  ai_openai_gpt35_16k_url: "https://api.openai.com/v1/chat/completions"
   ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions"
+  ai_openai_gpt4_32k_url: "https://api.openai.com/v1/chat/completions"
   ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings"
   ai_openai_api_key:
     default: ""
diff --git a/lib/modules/ai_bot/open_ai_bot.rb b/lib/modules/ai_bot/open_ai_bot.rb
index bb064a83..a37d9f88 100644
--- a/lib/modules/ai_bot/open_ai_bot.rb
+++ b/lib/modules/ai_bot/open_ai_bot.rb
@@ -17,12 +17,12 @@ module DiscourseAi
         # also allow for an extra 500 or so spare tokens
         #
         # 2500 are the max reply tokens
-        # Then we have 400 or so for the full function suite
+        # Then we have 450 or so for the full function suite
         # 100 additional for growth around function calls
         if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
-          8192 - 3000
+          8192 - 3050
         else
-          16_384 - 3000
+          16_384 - 3050
         end
       end
 
@@ -110,7 +110,7 @@ module DiscourseAi
       end
 
       def model_for(low_cost: false)
-        return "gpt-4-0613" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost
+        return "gpt-4" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost
         "gpt-3.5-turbo-16k"
       end
 
diff --git a/lib/shared/inference/openai_completions.rb b/lib/shared/inference/openai_completions.rb
index d521214c..1381c6b8 100644
--- a/lib/shared/inference/openai_completions.rb
+++ b/lib/shared/inference/openai_completions.rb
@@ -62,9 +62,17 @@ module ::DiscourseAi
       )
         url =
           if model.include?("gpt-4")
-            URI(SiteSetting.ai_openai_gpt4_url)
+            if model.include?("32k")
+              URI(SiteSetting.ai_openai_gpt4_32k_url)
+            else
+              URI(SiteSetting.ai_openai_gpt4_url)
+            end
           else
-            URI(SiteSetting.ai_openai_gpt35_url)
+            if model.include?("16k")
+              URI(SiteSetting.ai_openai_gpt35_16k_url)
+            else
+              URI(SiteSetting.ai_openai_gpt35_url)
+            end
           end
         headers = { "Content-Type" => "application/json" }
 
diff --git a/spec/shared/inference/openai_completions_spec.rb b/spec/shared/inference/openai_completions_spec.rb
index dca271ba..5c9d8504 100644
--- a/spec/shared/inference/openai_completions_spec.rb
+++ b/spec/shared/inference/openai_completions_spec.rb
@@ -7,53 +7,63 @@ describe DiscourseAi::Inference::OpenAiCompletions do
   before { SiteSetting.ai_openai_api_key = "abc-123" }
 
   context "when configured using Azure" do
-    it "Supports GPT 3.5 completions" do
-      SiteSetting.ai_openai_api_key = "12345"
-      SiteSetting.ai_openai_gpt35_url =
+    it "Supports custom Azure endpoints for completions" do
+      gpt_url_base =
         "https://company.openai.azure.com/openai/deployments/deployment/chat/completions?api-version=2023-03-15-preview"
+      key = "12345"
+      SiteSetting.ai_openai_api_key = key
 
-      expected = {
-        id: "chatcmpl-7TfPzOyBGW5K6dyWp3NPU0mYLGZRQ",
-        object: "chat.completion",
-        created: 1_687_305_079,
-        model: "gpt-35-turbo",
-        choices: [
-          {
-            index: 0,
-            finish_reason: "stop",
-            message: {
-              role: "assistant",
-              content: "Hi there! How can I assist you today?",
+      [
+        { setting_name: "ai_openai_gpt35_url", model: "gpt-35-turbo" },
+        { setting_name: "ai_openai_gpt35_16k_url", model: "gpt-35-16k-turbo" },
+        { setting_name: "ai_openai_gpt4_url", model: "gpt-4" },
+        { setting_name: "ai_openai_gpt4_32k_url", model: "gpt-4-32k" },
+      ].each do |config|
+        gpt_url = "#{gpt_url_base}/#{config[:model]}"
+        setting_name = config[:setting_name]
+        model = config[:model]
+
+        SiteSetting.public_send("#{setting_name}=".to_sym, gpt_url)
+
+        expected = {
+          id: "chatcmpl-7TfPzOyBGW5K6dyWp3NPU0mYLGZRQ",
+          object: "chat.completion",
+          created: 1_687_305_079,
+          model: model,
+          choices: [
+            {
+              index: 0,
+              finish_reason: "stop",
+              message: {
+                role: "assistant",
+                content: "Hi there! How can I assist you today?",
+              },
             },
+          ],
+          usage: {
+            completion_tokens: 10,
+            prompt_tokens: 9,
+            total_tokens: 19,
           },
-        ],
-        usage: {
-          completion_tokens: 10,
-          prompt_tokens: 9,
-          total_tokens: 19,
-        },
-      }
+        }
 
-      stub_request(
-        :post,
-        "https://company.openai.azure.com/openai/deployments/deployment/chat/completions?api-version=2023-03-15-preview",
-      ).with(
-        body:
-          "{\"model\":\"gpt-3.5-turbo-0613\",\"messages\":[{\"role\":\"user\",\"content\":\"hello\"}]}",
-        headers: {
-          "Api-Key" => "12345",
-          "Content-Type" => "application/json",
-          "Host" => "company.openai.azure.com",
-        },
-      ).to_return(status: 200, body: expected.to_json, headers: {})
+        stub_request(:post, gpt_url).with(
+          body: "{\"model\":\"#{model}\",\"messages\":[{\"role\":\"user\",\"content\":\"hello\"}]}",
+          headers: {
+            "Api-Key" => "12345",
+            "Content-Type" => "application/json",
+            "Host" => "company.openai.azure.com",
+          },
+        ).to_return(status: 200, body: expected.to_json, headers: {})
 
-      result =
-        DiscourseAi::Inference::OpenAiCompletions.perform!(
-          [role: "user", content: "hello"],
-          "gpt-3.5-turbo-0613",
-        )
+        result =
+          DiscourseAi::Inference::OpenAiCompletions.perform!(
+            [role: "user", content: "hello"],
+            model,
+          )
 
-      expect(result).to eq(expected)
+        expect(result).to eq(expected)
+      end
     end
   end