FEATURE: support 16k and 32k variants for Azure GPT (#140)

Azure requires a single HTTP endpoint per type of completion. The settings: `ai_openai_gpt35_16k_url` and `ai_openai_gpt4_32k_url` can be used now to configure the extra endpoints This amends token limit which was off a bit due to function calls and fixes a minor JS issue where we were not testing for a property
2025-06-29 02:52:16 +00:00 · 2023-08-17 11:00:11 +10:00 · 2023-08-17 11:00:11 +10:00 · b4477ecdcd
commit b4477ecdcd
parent 01f833f86e
6 changed files with 73 additions and 48 deletions
--- a/assets/javascripts/discourse/connectors/composer-after-composer-editor/composer-open.js
+++ b/assets/javascripts/discourse/connectors/composer-after-composer-editor/composer-open.js
@ -20,7 +20,10 @@ export default class extends Component {
  @computed("composerModel.targetRecipients")
  get isAiBotChat() {
-    if (this.composerModel.targetRecipients) {
+    if (
      this.composerModel.targetRecipients &&
      this.currentUser.ai_enabled_chat_bots
    ) {
      let reciepients = this.composerModel.targetRecipients.split(",");
      return this.currentUser.ai_enabled_chat_bots.any((bot) =>
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@ -31,8 +31,10 @@ en:
    ai_nsfw_flag_threshold_sexy: "Threshold for an image classified as sexy to be considered NSFW."
    ai_nsfw_models: "Models to use for NSFW inference."
-    ai_openai_gpt35_url: "Custom URL used for GPT 3.5 chat completions. (Azure: MUST support function calling and ideally is a GPT3.5 16K endpoint)"
+    ai_openai_gpt35_url: "Custom URL used for GPT 3.5 chat completions. (for Azuer support)"
    ai_openai_gpt35_16k_url: "Custom URL used for GPT 3.5 16k chat completions. (for Azure support)"
    ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)"
    ai_openai_gpt4_32k_url: "Custom URL used for GPT 4 32k chat completions. (for Azure support)"
    ai_openai_embeddings_url: "Custom URL used for the OpenAI embeddings API. (in the case of Azure it can be: https://COMPANY.openai.azure.com/openai/deployments/DEPLOYMENT/embeddings?api-version=2023-05-15)"
    ai_openai_api_key: "API key for OpenAI API"
    ai_anthropic_api_key: "API key for Anthropic API"
--- a/config/settings.yml
+++ b/config/settings.yml
@ -89,7 +89,9 @@ plugins:
     - nsfw_detector
  ai_openai_gpt35_url: "https://api.openai.com/v1/chat/completions"
  ai_openai_gpt35_16k_url: "https://api.openai.com/v1/chat/completions"
  ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions"
  ai_openai_gpt4_32k_url: "https://api.openai.com/v1/chat/completions"
  ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings"
  ai_openai_api_key:
    default: ""
--- a/lib/modules/ai_bot/open_ai_bot.rb
+++ b/lib/modules/ai_bot/open_ai_bot.rb
@ -17,12 +17,12 @@ module DiscourseAi
        # also allow for an extra 500 or so spare tokens
        #
        # 2500 are the max reply tokens
-        # Then we have 400 or so for the full function suite
+        # Then we have 450 or so for the full function suite
        # 100 additional for growth around function calls
        if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
-          8192 - 3000
+          8192 - 3050
        else
-          16_384 - 3000
+          16_384 - 3050
        end
      end
@ -110,7 +110,7 @@ module DiscourseAi
      end
      def model_for(low_cost: false)
-        return "gpt-4-0613" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost
+        return "gpt-4" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost
        "gpt-3.5-turbo-16k"
      end
--- a/lib/shared/inference/openai_completions.rb
+++ b/lib/shared/inference/openai_completions.rb
@ -62,10 +62,18 @@ module ::DiscourseAi
      )
        url =
          if model.include?("gpt-4")
            if model.include?("32k")
              URI(SiteSetting.ai_openai_gpt4_32k_url)
            else
              URI(SiteSetting.ai_openai_gpt4_url)
            end
          else
            if model.include?("16k")
              URI(SiteSetting.ai_openai_gpt35_16k_url)
            else
              URI(SiteSetting.ai_openai_gpt35_url)
            end
          end
        headers = { "Content-Type" => "application/json" }
        if url.host.include? ("azure")
--- a/spec/shared/inference/openai_completions_spec.rb
+++ b/spec/shared/inference/openai_completions_spec.rb
@ -7,16 +7,29 @@ describe DiscourseAi::Inference::OpenAiCompletions do
  before { SiteSetting.ai_openai_api_key = "abc-123" }
  context "when configured using Azure" do
-    it "Supports GPT 3.5 completions" do
+    it "Supports custom Azure endpoints for completions" do
-      SiteSetting.ai_openai_api_key = "12345"
+      gpt_url_base =
      SiteSetting.ai_openai_gpt35_url =
        "https://company.openai.azure.com/openai/deployments/deployment/chat/completions?api-version=2023-03-15-preview"
      key = "12345"
      SiteSetting.ai_openai_api_key = key
      [
        { setting_name: "ai_openai_gpt35_url", model: "gpt-35-turbo" },
        { setting_name: "ai_openai_gpt35_16k_url", model: "gpt-35-16k-turbo" },
        { setting_name: "ai_openai_gpt4_url", model: "gpt-4" },
        { setting_name: "ai_openai_gpt4_32k_url", model: "gpt-4-32k" },
      ].each do |config|
        gpt_url = "#{gpt_url_base}/#{config[:model]}"
        setting_name = config[:setting_name]
        model = config[:model]
        SiteSetting.public_send("#{setting_name}=".to_sym, gpt_url)
        expected = {
          id: "chatcmpl-7TfPzOyBGW5K6dyWp3NPU0mYLGZRQ",
          object: "chat.completion",
          created: 1_687_305_079,
-        model: "gpt-35-turbo",
+          model: model,
          choices: [
            {
              index: 0,
@ -34,12 +47,8 @@ describe DiscourseAi::Inference::OpenAiCompletions do
          },
        }
-      stub_request(
+        stub_request(:post, gpt_url).with(
-        :post,
+          body: "{\"model\":\"#{model}\",\"messages\":[{\"role\":\"user\",\"content\":\"hello\"}]}",
        "https://company.openai.azure.com/openai/deployments/deployment/chat/completions?api-version=2023-03-15-preview",
      ).with(
        body:
          "{\"model\":\"gpt-3.5-turbo-0613\",\"messages\":[{\"role\":\"user\",\"content\":\"hello\"}]}",
          headers: {
            "Api-Key" => "12345",
            "Content-Type" => "application/json",
@ -50,12 +59,13 @@ describe DiscourseAi::Inference::OpenAiCompletions do
        result =
          DiscourseAi::Inference::OpenAiCompletions.perform!(
            [role: "user", content: "hello"],
-          "gpt-3.5-turbo-0613",
+            model,
          )
        expect(result).to eq(expected)
      end
    end
  end
  it "supports function calling" do
    prompt = [role: "system", content: "you are weatherbot"]