FEATURE: support 16k and 32k variants for Azure GPT (#140)

Azure requires a single HTTP endpoint per type of completion.

The settings: `ai_openai_gpt35_16k_url` and `ai_openai_gpt4_32k_url` can be
used now to configure the extra endpoints

This amends token limit which was off a bit due to function calls and fixes
a minor JS issue where we were not testing for a property
This commit is contained in:
Sam 2023-08-17 11:00:11 +10:00 committed by GitHub
parent 01f833f86e
commit b4477ecdcd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 73 additions and 48 deletions

View File

@ -20,7 +20,10 @@ export default class extends Component {
@computed("composerModel.targetRecipients") @computed("composerModel.targetRecipients")
get isAiBotChat() { get isAiBotChat() {
if (this.composerModel.targetRecipients) { if (
this.composerModel.targetRecipients &&
this.currentUser.ai_enabled_chat_bots
) {
let reciepients = this.composerModel.targetRecipients.split(","); let reciepients = this.composerModel.targetRecipients.split(",");
return this.currentUser.ai_enabled_chat_bots.any((bot) => return this.currentUser.ai_enabled_chat_bots.any((bot) =>

View File

@ -31,8 +31,10 @@ en:
ai_nsfw_flag_threshold_sexy: "Threshold for an image classified as sexy to be considered NSFW." ai_nsfw_flag_threshold_sexy: "Threshold for an image classified as sexy to be considered NSFW."
ai_nsfw_models: "Models to use for NSFW inference." ai_nsfw_models: "Models to use for NSFW inference."
ai_openai_gpt35_url: "Custom URL used for GPT 3.5 chat completions. (Azure: MUST support function calling and ideally is a GPT3.5 16K endpoint)" ai_openai_gpt35_url: "Custom URL used for GPT 3.5 chat completions. (for Azuer support)"
ai_openai_gpt35_16k_url: "Custom URL used for GPT 3.5 16k chat completions. (for Azure support)"
ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)" ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)"
ai_openai_gpt4_32k_url: "Custom URL used for GPT 4 32k chat completions. (for Azure support)"
ai_openai_embeddings_url: "Custom URL used for the OpenAI embeddings API. (in the case of Azure it can be: https://COMPANY.openai.azure.com/openai/deployments/DEPLOYMENT/embeddings?api-version=2023-05-15)" ai_openai_embeddings_url: "Custom URL used for the OpenAI embeddings API. (in the case of Azure it can be: https://COMPANY.openai.azure.com/openai/deployments/DEPLOYMENT/embeddings?api-version=2023-05-15)"
ai_openai_api_key: "API key for OpenAI API" ai_openai_api_key: "API key for OpenAI API"
ai_anthropic_api_key: "API key for Anthropic API" ai_anthropic_api_key: "API key for Anthropic API"

View File

@ -89,7 +89,9 @@ plugins:
- nsfw_detector - nsfw_detector
ai_openai_gpt35_url: "https://api.openai.com/v1/chat/completions" ai_openai_gpt35_url: "https://api.openai.com/v1/chat/completions"
ai_openai_gpt35_16k_url: "https://api.openai.com/v1/chat/completions"
ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions" ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions"
ai_openai_gpt4_32k_url: "https://api.openai.com/v1/chat/completions"
ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings" ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings"
ai_openai_api_key: ai_openai_api_key:
default: "" default: ""

View File

@ -17,12 +17,12 @@ module DiscourseAi
# also allow for an extra 500 or so spare tokens # also allow for an extra 500 or so spare tokens
# #
# 2500 are the max reply tokens # 2500 are the max reply tokens
# Then we have 400 or so for the full function suite # Then we have 450 or so for the full function suite
# 100 additional for growth around function calls # 100 additional for growth around function calls
if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
8192 - 3000 8192 - 3050
else else
16_384 - 3000 16_384 - 3050
end end
end end
@ -110,7 +110,7 @@ module DiscourseAi
end end
def model_for(low_cost: false) def model_for(low_cost: false)
return "gpt-4-0613" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost return "gpt-4" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost
"gpt-3.5-turbo-16k" "gpt-3.5-turbo-16k"
end end

View File

@ -62,9 +62,17 @@ module ::DiscourseAi
) )
url = url =
if model.include?("gpt-4") if model.include?("gpt-4")
URI(SiteSetting.ai_openai_gpt4_url) if model.include?("32k")
URI(SiteSetting.ai_openai_gpt4_32k_url)
else
URI(SiteSetting.ai_openai_gpt4_url)
end
else else
URI(SiteSetting.ai_openai_gpt35_url) if model.include?("16k")
URI(SiteSetting.ai_openai_gpt35_16k_url)
else
URI(SiteSetting.ai_openai_gpt35_url)
end
end end
headers = { "Content-Type" => "application/json" } headers = { "Content-Type" => "application/json" }

View File

@ -7,53 +7,63 @@ describe DiscourseAi::Inference::OpenAiCompletions do
before { SiteSetting.ai_openai_api_key = "abc-123" } before { SiteSetting.ai_openai_api_key = "abc-123" }
context "when configured using Azure" do context "when configured using Azure" do
it "Supports GPT 3.5 completions" do it "Supports custom Azure endpoints for completions" do
SiteSetting.ai_openai_api_key = "12345" gpt_url_base =
SiteSetting.ai_openai_gpt35_url =
"https://company.openai.azure.com/openai/deployments/deployment/chat/completions?api-version=2023-03-15-preview" "https://company.openai.azure.com/openai/deployments/deployment/chat/completions?api-version=2023-03-15-preview"
key = "12345"
SiteSetting.ai_openai_api_key = key
expected = { [
id: "chatcmpl-7TfPzOyBGW5K6dyWp3NPU0mYLGZRQ", { setting_name: "ai_openai_gpt35_url", model: "gpt-35-turbo" },
object: "chat.completion", { setting_name: "ai_openai_gpt35_16k_url", model: "gpt-35-16k-turbo" },
created: 1_687_305_079, { setting_name: "ai_openai_gpt4_url", model: "gpt-4" },
model: "gpt-35-turbo", { setting_name: "ai_openai_gpt4_32k_url", model: "gpt-4-32k" },
choices: [ ].each do |config|
{ gpt_url = "#{gpt_url_base}/#{config[:model]}"
index: 0, setting_name = config[:setting_name]
finish_reason: "stop", model = config[:model]
message: {
role: "assistant", SiteSetting.public_send("#{setting_name}=".to_sym, gpt_url)
content: "Hi there! How can I assist you today?",
expected = {
id: "chatcmpl-7TfPzOyBGW5K6dyWp3NPU0mYLGZRQ",
object: "chat.completion",
created: 1_687_305_079,
model: model,
choices: [
{
index: 0,
finish_reason: "stop",
message: {
role: "assistant",
content: "Hi there! How can I assist you today?",
},
}, },
],
usage: {
completion_tokens: 10,
prompt_tokens: 9,
total_tokens: 19,
}, },
], }
usage: {
completion_tokens: 10,
prompt_tokens: 9,
total_tokens: 19,
},
}
stub_request( stub_request(:post, gpt_url).with(
:post, body: "{\"model\":\"#{model}\",\"messages\":[{\"role\":\"user\",\"content\":\"hello\"}]}",
"https://company.openai.azure.com/openai/deployments/deployment/chat/completions?api-version=2023-03-15-preview", headers: {
).with( "Api-Key" => "12345",
body: "Content-Type" => "application/json",
"{\"model\":\"gpt-3.5-turbo-0613\",\"messages\":[{\"role\":\"user\",\"content\":\"hello\"}]}", "Host" => "company.openai.azure.com",
headers: { },
"Api-Key" => "12345", ).to_return(status: 200, body: expected.to_json, headers: {})
"Content-Type" => "application/json",
"Host" => "company.openai.azure.com",
},
).to_return(status: 200, body: expected.to_json, headers: {})
result = result =
DiscourseAi::Inference::OpenAiCompletions.perform!( DiscourseAi::Inference::OpenAiCompletions.perform!(
[role: "user", content: "hello"], [role: "user", content: "hello"],
"gpt-3.5-turbo-0613", model,
) )
expect(result).to eq(expected) expect(result).to eq(expected)
end
end end
end end