From b4477ecdcdea4d9d7a2b9f407180175a6cb22909 Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 17 Aug 2023 11:00:11 +1000 Subject: [PATCH] FEATURE: support 16k and 32k variants for Azure GPT (#140) Azure requires a single HTTP endpoint per type of completion. The settings: `ai_openai_gpt35_16k_url` and `ai_openai_gpt4_32k_url` can be used now to configure the extra endpoints This amends token limit which was off a bit due to function calls and fixes a minor JS issue where we were not testing for a property --- .../composer-open.js | 5 +- config/locales/server.en.yml | 4 +- config/settings.yml | 2 + lib/modules/ai_bot/open_ai_bot.rb | 8 +- lib/shared/inference/openai_completions.rb | 12 ++- .../inference/openai_completions_spec.rb | 90 ++++++++++--------- 6 files changed, 73 insertions(+), 48 deletions(-) diff --git a/assets/javascripts/discourse/connectors/composer-after-composer-editor/composer-open.js b/assets/javascripts/discourse/connectors/composer-after-composer-editor/composer-open.js index a31b7188..b095138d 100644 --- a/assets/javascripts/discourse/connectors/composer-after-composer-editor/composer-open.js +++ b/assets/javascripts/discourse/connectors/composer-after-composer-editor/composer-open.js @@ -20,7 +20,10 @@ export default class extends Component { @computed("composerModel.targetRecipients") get isAiBotChat() { - if (this.composerModel.targetRecipients) { + if ( + this.composerModel.targetRecipients && + this.currentUser.ai_enabled_chat_bots + ) { let reciepients = this.composerModel.targetRecipients.split(","); return this.currentUser.ai_enabled_chat_bots.any((bot) => diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index b64f9d0e..8d87f97b 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -31,8 +31,10 @@ en: ai_nsfw_flag_threshold_sexy: "Threshold for an image classified as sexy to be considered NSFW." ai_nsfw_models: "Models to use for NSFW inference." - ai_openai_gpt35_url: "Custom URL used for GPT 3.5 chat completions. (Azure: MUST support function calling and ideally is a GPT3.5 16K endpoint)" + ai_openai_gpt35_url: "Custom URL used for GPT 3.5 chat completions. (for Azuer support)" + ai_openai_gpt35_16k_url: "Custom URL used for GPT 3.5 16k chat completions. (for Azure support)" ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)" + ai_openai_gpt4_32k_url: "Custom URL used for GPT 4 32k chat completions. (for Azure support)" ai_openai_embeddings_url: "Custom URL used for the OpenAI embeddings API. (in the case of Azure it can be: https://COMPANY.openai.azure.com/openai/deployments/DEPLOYMENT/embeddings?api-version=2023-05-15)" ai_openai_api_key: "API key for OpenAI API" ai_anthropic_api_key: "API key for Anthropic API" diff --git a/config/settings.yml b/config/settings.yml index a2dce5cb..f401394f 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -89,7 +89,9 @@ plugins: - nsfw_detector ai_openai_gpt35_url: "https://api.openai.com/v1/chat/completions" + ai_openai_gpt35_16k_url: "https://api.openai.com/v1/chat/completions" ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions" + ai_openai_gpt4_32k_url: "https://api.openai.com/v1/chat/completions" ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings" ai_openai_api_key: default: "" diff --git a/lib/modules/ai_bot/open_ai_bot.rb b/lib/modules/ai_bot/open_ai_bot.rb index bb064a83..a37d9f88 100644 --- a/lib/modules/ai_bot/open_ai_bot.rb +++ b/lib/modules/ai_bot/open_ai_bot.rb @@ -17,12 +17,12 @@ module DiscourseAi # also allow for an extra 500 or so spare tokens # # 2500 are the max reply tokens - # Then we have 400 or so for the full function suite + # Then we have 450 or so for the full function suite # 100 additional for growth around function calls if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID - 8192 - 3000 + 8192 - 3050 else - 16_384 - 3000 + 16_384 - 3050 end end @@ -110,7 +110,7 @@ module DiscourseAi end def model_for(low_cost: false) - return "gpt-4-0613" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost + return "gpt-4" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost "gpt-3.5-turbo-16k" end diff --git a/lib/shared/inference/openai_completions.rb b/lib/shared/inference/openai_completions.rb index d521214c..1381c6b8 100644 --- a/lib/shared/inference/openai_completions.rb +++ b/lib/shared/inference/openai_completions.rb @@ -62,9 +62,17 @@ module ::DiscourseAi ) url = if model.include?("gpt-4") - URI(SiteSetting.ai_openai_gpt4_url) + if model.include?("32k") + URI(SiteSetting.ai_openai_gpt4_32k_url) + else + URI(SiteSetting.ai_openai_gpt4_url) + end else - URI(SiteSetting.ai_openai_gpt35_url) + if model.include?("16k") + URI(SiteSetting.ai_openai_gpt35_16k_url) + else + URI(SiteSetting.ai_openai_gpt35_url) + end end headers = { "Content-Type" => "application/json" } diff --git a/spec/shared/inference/openai_completions_spec.rb b/spec/shared/inference/openai_completions_spec.rb index dca271ba..5c9d8504 100644 --- a/spec/shared/inference/openai_completions_spec.rb +++ b/spec/shared/inference/openai_completions_spec.rb @@ -7,53 +7,63 @@ describe DiscourseAi::Inference::OpenAiCompletions do before { SiteSetting.ai_openai_api_key = "abc-123" } context "when configured using Azure" do - it "Supports GPT 3.5 completions" do - SiteSetting.ai_openai_api_key = "12345" - SiteSetting.ai_openai_gpt35_url = + it "Supports custom Azure endpoints for completions" do + gpt_url_base = "https://company.openai.azure.com/openai/deployments/deployment/chat/completions?api-version=2023-03-15-preview" + key = "12345" + SiteSetting.ai_openai_api_key = key - expected = { - id: "chatcmpl-7TfPzOyBGW5K6dyWp3NPU0mYLGZRQ", - object: "chat.completion", - created: 1_687_305_079, - model: "gpt-35-turbo", - choices: [ - { - index: 0, - finish_reason: "stop", - message: { - role: "assistant", - content: "Hi there! How can I assist you today?", + [ + { setting_name: "ai_openai_gpt35_url", model: "gpt-35-turbo" }, + { setting_name: "ai_openai_gpt35_16k_url", model: "gpt-35-16k-turbo" }, + { setting_name: "ai_openai_gpt4_url", model: "gpt-4" }, + { setting_name: "ai_openai_gpt4_32k_url", model: "gpt-4-32k" }, + ].each do |config| + gpt_url = "#{gpt_url_base}/#{config[:model]}" + setting_name = config[:setting_name] + model = config[:model] + + SiteSetting.public_send("#{setting_name}=".to_sym, gpt_url) + + expected = { + id: "chatcmpl-7TfPzOyBGW5K6dyWp3NPU0mYLGZRQ", + object: "chat.completion", + created: 1_687_305_079, + model: model, + choices: [ + { + index: 0, + finish_reason: "stop", + message: { + role: "assistant", + content: "Hi there! How can I assist you today?", + }, }, + ], + usage: { + completion_tokens: 10, + prompt_tokens: 9, + total_tokens: 19, }, - ], - usage: { - completion_tokens: 10, - prompt_tokens: 9, - total_tokens: 19, - }, - } + } - stub_request( - :post, - "https://company.openai.azure.com/openai/deployments/deployment/chat/completions?api-version=2023-03-15-preview", - ).with( - body: - "{\"model\":\"gpt-3.5-turbo-0613\",\"messages\":[{\"role\":\"user\",\"content\":\"hello\"}]}", - headers: { - "Api-Key" => "12345", - "Content-Type" => "application/json", - "Host" => "company.openai.azure.com", - }, - ).to_return(status: 200, body: expected.to_json, headers: {}) + stub_request(:post, gpt_url).with( + body: "{\"model\":\"#{model}\",\"messages\":[{\"role\":\"user\",\"content\":\"hello\"}]}", + headers: { + "Api-Key" => "12345", + "Content-Type" => "application/json", + "Host" => "company.openai.azure.com", + }, + ).to_return(status: 200, body: expected.to_json, headers: {}) - result = - DiscourseAi::Inference::OpenAiCompletions.perform!( - [role: "user", content: "hello"], - "gpt-3.5-turbo-0613", - ) + result = + DiscourseAi::Inference::OpenAiCompletions.perform!( + [role: "user", content: "hello"], + model, + ) - expect(result).to eq(expected) + expect(result).to eq(expected) + end end end