FEATURE: support 16k and 32k variants for Azure GPT (#140)
Azure requires a single HTTP endpoint per type of completion. The settings: `ai_openai_gpt35_16k_url` and `ai_openai_gpt4_32k_url` can be used now to configure the extra endpoints This amends token limit which was off a bit due to function calls and fixes a minor JS issue where we were not testing for a property
This commit is contained in:
parent
01f833f86e
commit
b4477ecdcd
|
@ -20,7 +20,10 @@ export default class extends Component {
|
||||||
|
|
||||||
@computed("composerModel.targetRecipients")
|
@computed("composerModel.targetRecipients")
|
||||||
get isAiBotChat() {
|
get isAiBotChat() {
|
||||||
if (this.composerModel.targetRecipients) {
|
if (
|
||||||
|
this.composerModel.targetRecipients &&
|
||||||
|
this.currentUser.ai_enabled_chat_bots
|
||||||
|
) {
|
||||||
let reciepients = this.composerModel.targetRecipients.split(",");
|
let reciepients = this.composerModel.targetRecipients.split(",");
|
||||||
|
|
||||||
return this.currentUser.ai_enabled_chat_bots.any((bot) =>
|
return this.currentUser.ai_enabled_chat_bots.any((bot) =>
|
||||||
|
|
|
@ -31,8 +31,10 @@ en:
|
||||||
ai_nsfw_flag_threshold_sexy: "Threshold for an image classified as sexy to be considered NSFW."
|
ai_nsfw_flag_threshold_sexy: "Threshold for an image classified as sexy to be considered NSFW."
|
||||||
ai_nsfw_models: "Models to use for NSFW inference."
|
ai_nsfw_models: "Models to use for NSFW inference."
|
||||||
|
|
||||||
ai_openai_gpt35_url: "Custom URL used for GPT 3.5 chat completions. (Azure: MUST support function calling and ideally is a GPT3.5 16K endpoint)"
|
ai_openai_gpt35_url: "Custom URL used for GPT 3.5 chat completions. (for Azuer support)"
|
||||||
|
ai_openai_gpt35_16k_url: "Custom URL used for GPT 3.5 16k chat completions. (for Azure support)"
|
||||||
ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)"
|
ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)"
|
||||||
|
ai_openai_gpt4_32k_url: "Custom URL used for GPT 4 32k chat completions. (for Azure support)"
|
||||||
ai_openai_embeddings_url: "Custom URL used for the OpenAI embeddings API. (in the case of Azure it can be: https://COMPANY.openai.azure.com/openai/deployments/DEPLOYMENT/embeddings?api-version=2023-05-15)"
|
ai_openai_embeddings_url: "Custom URL used for the OpenAI embeddings API. (in the case of Azure it can be: https://COMPANY.openai.azure.com/openai/deployments/DEPLOYMENT/embeddings?api-version=2023-05-15)"
|
||||||
ai_openai_api_key: "API key for OpenAI API"
|
ai_openai_api_key: "API key for OpenAI API"
|
||||||
ai_anthropic_api_key: "API key for Anthropic API"
|
ai_anthropic_api_key: "API key for Anthropic API"
|
||||||
|
|
|
@ -89,7 +89,9 @@ plugins:
|
||||||
- nsfw_detector
|
- nsfw_detector
|
||||||
|
|
||||||
ai_openai_gpt35_url: "https://api.openai.com/v1/chat/completions"
|
ai_openai_gpt35_url: "https://api.openai.com/v1/chat/completions"
|
||||||
|
ai_openai_gpt35_16k_url: "https://api.openai.com/v1/chat/completions"
|
||||||
ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions"
|
ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions"
|
||||||
|
ai_openai_gpt4_32k_url: "https://api.openai.com/v1/chat/completions"
|
||||||
ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings"
|
ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings"
|
||||||
ai_openai_api_key:
|
ai_openai_api_key:
|
||||||
default: ""
|
default: ""
|
||||||
|
|
|
@ -17,12 +17,12 @@ module DiscourseAi
|
||||||
# also allow for an extra 500 or so spare tokens
|
# also allow for an extra 500 or so spare tokens
|
||||||
#
|
#
|
||||||
# 2500 are the max reply tokens
|
# 2500 are the max reply tokens
|
||||||
# Then we have 400 or so for the full function suite
|
# Then we have 450 or so for the full function suite
|
||||||
# 100 additional for growth around function calls
|
# 100 additional for growth around function calls
|
||||||
if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
|
if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
|
||||||
8192 - 3000
|
8192 - 3050
|
||||||
else
|
else
|
||||||
16_384 - 3000
|
16_384 - 3050
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -110,7 +110,7 @@ module DiscourseAi
|
||||||
end
|
end
|
||||||
|
|
||||||
def model_for(low_cost: false)
|
def model_for(low_cost: false)
|
||||||
return "gpt-4-0613" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost
|
return "gpt-4" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost
|
||||||
"gpt-3.5-turbo-16k"
|
"gpt-3.5-turbo-16k"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -62,9 +62,17 @@ module ::DiscourseAi
|
||||||
)
|
)
|
||||||
url =
|
url =
|
||||||
if model.include?("gpt-4")
|
if model.include?("gpt-4")
|
||||||
URI(SiteSetting.ai_openai_gpt4_url)
|
if model.include?("32k")
|
||||||
|
URI(SiteSetting.ai_openai_gpt4_32k_url)
|
||||||
|
else
|
||||||
|
URI(SiteSetting.ai_openai_gpt4_url)
|
||||||
|
end
|
||||||
else
|
else
|
||||||
URI(SiteSetting.ai_openai_gpt35_url)
|
if model.include?("16k")
|
||||||
|
URI(SiteSetting.ai_openai_gpt35_16k_url)
|
||||||
|
else
|
||||||
|
URI(SiteSetting.ai_openai_gpt35_url)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
headers = { "Content-Type" => "application/json" }
|
headers = { "Content-Type" => "application/json" }
|
||||||
|
|
||||||
|
|
|
@ -7,53 +7,63 @@ describe DiscourseAi::Inference::OpenAiCompletions do
|
||||||
before { SiteSetting.ai_openai_api_key = "abc-123" }
|
before { SiteSetting.ai_openai_api_key = "abc-123" }
|
||||||
|
|
||||||
context "when configured using Azure" do
|
context "when configured using Azure" do
|
||||||
it "Supports GPT 3.5 completions" do
|
it "Supports custom Azure endpoints for completions" do
|
||||||
SiteSetting.ai_openai_api_key = "12345"
|
gpt_url_base =
|
||||||
SiteSetting.ai_openai_gpt35_url =
|
|
||||||
"https://company.openai.azure.com/openai/deployments/deployment/chat/completions?api-version=2023-03-15-preview"
|
"https://company.openai.azure.com/openai/deployments/deployment/chat/completions?api-version=2023-03-15-preview"
|
||||||
|
key = "12345"
|
||||||
|
SiteSetting.ai_openai_api_key = key
|
||||||
|
|
||||||
expected = {
|
[
|
||||||
id: "chatcmpl-7TfPzOyBGW5K6dyWp3NPU0mYLGZRQ",
|
{ setting_name: "ai_openai_gpt35_url", model: "gpt-35-turbo" },
|
||||||
object: "chat.completion",
|
{ setting_name: "ai_openai_gpt35_16k_url", model: "gpt-35-16k-turbo" },
|
||||||
created: 1_687_305_079,
|
{ setting_name: "ai_openai_gpt4_url", model: "gpt-4" },
|
||||||
model: "gpt-35-turbo",
|
{ setting_name: "ai_openai_gpt4_32k_url", model: "gpt-4-32k" },
|
||||||
choices: [
|
].each do |config|
|
||||||
{
|
gpt_url = "#{gpt_url_base}/#{config[:model]}"
|
||||||
index: 0,
|
setting_name = config[:setting_name]
|
||||||
finish_reason: "stop",
|
model = config[:model]
|
||||||
message: {
|
|
||||||
role: "assistant",
|
SiteSetting.public_send("#{setting_name}=".to_sym, gpt_url)
|
||||||
content: "Hi there! How can I assist you today?",
|
|
||||||
|
expected = {
|
||||||
|
id: "chatcmpl-7TfPzOyBGW5K6dyWp3NPU0mYLGZRQ",
|
||||||
|
object: "chat.completion",
|
||||||
|
created: 1_687_305_079,
|
||||||
|
model: model,
|
||||||
|
choices: [
|
||||||
|
{
|
||||||
|
index: 0,
|
||||||
|
finish_reason: "stop",
|
||||||
|
message: {
|
||||||
|
role: "assistant",
|
||||||
|
content: "Hi there! How can I assist you today?",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
|
],
|
||||||
|
usage: {
|
||||||
|
completion_tokens: 10,
|
||||||
|
prompt_tokens: 9,
|
||||||
|
total_tokens: 19,
|
||||||
},
|
},
|
||||||
],
|
}
|
||||||
usage: {
|
|
||||||
completion_tokens: 10,
|
|
||||||
prompt_tokens: 9,
|
|
||||||
total_tokens: 19,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
stub_request(
|
stub_request(:post, gpt_url).with(
|
||||||
:post,
|
body: "{\"model\":\"#{model}\",\"messages\":[{\"role\":\"user\",\"content\":\"hello\"}]}",
|
||||||
"https://company.openai.azure.com/openai/deployments/deployment/chat/completions?api-version=2023-03-15-preview",
|
headers: {
|
||||||
).with(
|
"Api-Key" => "12345",
|
||||||
body:
|
"Content-Type" => "application/json",
|
||||||
"{\"model\":\"gpt-3.5-turbo-0613\",\"messages\":[{\"role\":\"user\",\"content\":\"hello\"}]}",
|
"Host" => "company.openai.azure.com",
|
||||||
headers: {
|
},
|
||||||
"Api-Key" => "12345",
|
).to_return(status: 200, body: expected.to_json, headers: {})
|
||||||
"Content-Type" => "application/json",
|
|
||||||
"Host" => "company.openai.azure.com",
|
|
||||||
},
|
|
||||||
).to_return(status: 200, body: expected.to_json, headers: {})
|
|
||||||
|
|
||||||
result =
|
result =
|
||||||
DiscourseAi::Inference::OpenAiCompletions.perform!(
|
DiscourseAi::Inference::OpenAiCompletions.perform!(
|
||||||
[role: "user", content: "hello"],
|
[role: "user", content: "hello"],
|
||||||
"gpt-3.5-turbo-0613",
|
model,
|
||||||
)
|
)
|
||||||
|
|
||||||
expect(result).to eq(expected)
|
expect(result).to eq(expected)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue