FEATURE: implement GPT-4 turbo support (#345)
Keep in mind: - GPT-4 is only going to be fully released next year - so this hardcodes preview model for now - Fixes streaming bugs which became a big problem with GPT-4 turbo - Adds Azure endpoing for turbo as well Co-authored-by: Martin Brennan <martin@discourse.org>
This commit is contained in:
parent
6380ebd829
commit
3c9901d43a
|
@ -125,6 +125,7 @@ en:
|
||||||
|
|
||||||
bot_names:
|
bot_names:
|
||||||
gpt-4: "GPT-4"
|
gpt-4: "GPT-4"
|
||||||
|
gpt-4-turbo: "GPT-4 Turbo"
|
||||||
gpt-3:
|
gpt-3:
|
||||||
5-turbo: "GPT-3.5"
|
5-turbo: "GPT-3.5"
|
||||||
claude-2: "Claude 2"
|
claude-2: "Claude 2"
|
||||||
|
|
|
@ -42,6 +42,7 @@ en:
|
||||||
ai_openai_gpt35_16k_url: "Custom URL used for GPT 3.5 16k chat completions. (for Azure support)"
|
ai_openai_gpt35_16k_url: "Custom URL used for GPT 3.5 16k chat completions. (for Azure support)"
|
||||||
ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)"
|
ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)"
|
||||||
ai_openai_gpt4_32k_url: "Custom URL used for GPT 4 32k chat completions. (for Azure support)"
|
ai_openai_gpt4_32k_url: "Custom URL used for GPT 4 32k chat completions. (for Azure support)"
|
||||||
|
ai_openai_gpt4_turbo_url: "Custom URL used for GPT 4 Turbo chat completions. (for Azure support)"
|
||||||
ai_openai_dall_e_3_url: "Custom URL used for DALL-E 3 image generation. (for Azure support)"
|
ai_openai_dall_e_3_url: "Custom URL used for DALL-E 3 image generation. (for Azure support)"
|
||||||
ai_openai_organization: "(Optional, leave empty to omit) Organization id used for the OpenAI API. Passed in using the OpenAI-Organization header."
|
ai_openai_organization: "(Optional, leave empty to omit) Organization id used for the OpenAI API. Passed in using the OpenAI-Organization header."
|
||||||
ai_openai_embeddings_url: "Custom URL used for the OpenAI embeddings API. (in the case of Azure it can be: https://COMPANY.openai.azure.com/openai/deployments/DEPLOYMENT/embeddings?api-version=2023-05-15)"
|
ai_openai_embeddings_url: "Custom URL used for the OpenAI embeddings API. (in the case of Azure it can be: https://COMPANY.openai.azure.com/openai/deployments/DEPLOYMENT/embeddings?api-version=2023-05-15)"
|
||||||
|
|
|
@ -95,6 +95,7 @@ discourse_ai:
|
||||||
ai_openai_gpt35_16k_url: "https://api.openai.com/v1/chat/completions"
|
ai_openai_gpt35_16k_url: "https://api.openai.com/v1/chat/completions"
|
||||||
ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions"
|
ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions"
|
||||||
ai_openai_gpt4_32k_url: "https://api.openai.com/v1/chat/completions"
|
ai_openai_gpt4_32k_url: "https://api.openai.com/v1/chat/completions"
|
||||||
|
ai_openai_gpt4_turbo_url: "https://api.openai.com/v1/chat/completions"
|
||||||
ai_openai_dall_e_3_url: "https://api.openai.com/v1/images/generations"
|
ai_openai_dall_e_3_url: "https://api.openai.com/v1/images/generations"
|
||||||
ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings"
|
ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings"
|
||||||
ai_openai_organization: ""
|
ai_openai_organization: ""
|
||||||
|
@ -256,6 +257,7 @@ discourse_ai:
|
||||||
choices:
|
choices:
|
||||||
- gpt-3.5-turbo
|
- gpt-3.5-turbo
|
||||||
- gpt-4
|
- gpt-4
|
||||||
|
- gpt-4-turbo
|
||||||
- claude-2
|
- claude-2
|
||||||
ai_bot_add_to_header:
|
ai_bot_add_to_header:
|
||||||
default: true
|
default: true
|
||||||
|
|
|
@ -8,14 +8,18 @@ module DiscourseAi
|
||||||
GPT4_ID = -110
|
GPT4_ID = -110
|
||||||
GPT3_5_TURBO_ID = -111
|
GPT3_5_TURBO_ID = -111
|
||||||
CLAUDE_V2_ID = -112
|
CLAUDE_V2_ID = -112
|
||||||
|
GPT4_TURBO_ID = -113
|
||||||
BOTS = [
|
BOTS = [
|
||||||
[GPT4_ID, "gpt4_bot", "gpt-4"],
|
[GPT4_ID, "gpt4_bot", "gpt-4"],
|
||||||
[GPT3_5_TURBO_ID, "gpt3.5_bot", "gpt-3.5-turbo"],
|
[GPT3_5_TURBO_ID, "gpt3.5_bot", "gpt-3.5-turbo"],
|
||||||
[CLAUDE_V2_ID, "claude_bot", "claude-2"],
|
[CLAUDE_V2_ID, "claude_bot", "claude-2"],
|
||||||
|
[GPT4_TURBO_ID, "gpt4t_bot", "gpt-4-turbo"],
|
||||||
]
|
]
|
||||||
|
|
||||||
def self.map_bot_model_to_user_id(model_name)
|
def self.map_bot_model_to_user_id(model_name)
|
||||||
case model_name
|
case model_name
|
||||||
|
in "gpt-4-turbo"
|
||||||
|
GPT4_TURBO_ID
|
||||||
in "gpt-3.5-turbo"
|
in "gpt-3.5-turbo"
|
||||||
GPT3_5_TURBO_ID
|
GPT3_5_TURBO_ID
|
||||||
in "gpt-4"
|
in "gpt-4"
|
||||||
|
|
|
@ -5,6 +5,7 @@ module DiscourseAi
|
||||||
class OpenAiBot < Bot
|
class OpenAiBot < Bot
|
||||||
def self.can_reply_as?(bot_user)
|
def self.can_reply_as?(bot_user)
|
||||||
open_ai_bot_ids = [
|
open_ai_bot_ids = [
|
||||||
|
DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID,
|
||||||
DiscourseAi::AiBot::EntryPoint::GPT4_ID,
|
DiscourseAi::AiBot::EntryPoint::GPT4_ID,
|
||||||
DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID,
|
DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID,
|
||||||
]
|
]
|
||||||
|
@ -23,7 +24,9 @@ module DiscourseAi
|
||||||
buffer += @function_size
|
buffer += @function_size
|
||||||
end
|
end
|
||||||
|
|
||||||
if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
|
if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID
|
||||||
|
150_000 - buffer
|
||||||
|
elsif bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
|
||||||
8192 - buffer
|
8192 - buffer
|
||||||
else
|
else
|
||||||
16_384 - buffer
|
16_384 - buffer
|
||||||
|
@ -75,8 +78,15 @@ module DiscourseAi
|
||||||
end
|
end
|
||||||
|
|
||||||
def model_for(low_cost: false)
|
def model_for(low_cost: false)
|
||||||
return "gpt-4" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost
|
if low_cost || bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID
|
||||||
"gpt-3.5-turbo-16k"
|
"gpt-3.5-turbo-16k"
|
||||||
|
elsif bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
|
||||||
|
"gpt-4"
|
||||||
|
else
|
||||||
|
# not quite released yet, once released we should replace with
|
||||||
|
# gpt-4-turbo
|
||||||
|
"gpt-4-1106-preview"
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def clean_username(username)
|
def clean_username(username)
|
||||||
|
|
|
@ -29,7 +29,9 @@ module ::DiscourseAi
|
||||||
|
|
||||||
url =
|
url =
|
||||||
if model.include?("gpt-4")
|
if model.include?("gpt-4")
|
||||||
if model.include?("32k")
|
if model.include?("turbo") || model.include?("1106-preview")
|
||||||
|
URI(SiteSetting.ai_openai_gpt4_turbo_url)
|
||||||
|
elsif model.include?("32k")
|
||||||
URI(SiteSetting.ai_openai_gpt4_32k_url)
|
URI(SiteSetting.ai_openai_gpt4_32k_url)
|
||||||
else
|
else
|
||||||
URI(SiteSetting.ai_openai_gpt4_url)
|
URI(SiteSetting.ai_openai_gpt4_url)
|
||||||
|
@ -134,6 +136,11 @@ module ::DiscourseAi
|
||||||
|
|
||||||
response_raw << chunk
|
response_raw << chunk
|
||||||
|
|
||||||
|
if (leftover + chunk).length < "data: [DONE]".length
|
||||||
|
leftover += chunk
|
||||||
|
next
|
||||||
|
end
|
||||||
|
|
||||||
(leftover + chunk)
|
(leftover + chunk)
|
||||||
.split("\n")
|
.split("\n")
|
||||||
.each do |line|
|
.each do |line|
|
||||||
|
|
|
@ -45,6 +45,7 @@ describe DiscourseAi::Inference::OpenAiCompletions do
|
||||||
{ setting_name: "ai_openai_gpt35_16k_url", model: "gpt-35-16k-turbo" },
|
{ setting_name: "ai_openai_gpt35_16k_url", model: "gpt-35-16k-turbo" },
|
||||||
{ setting_name: "ai_openai_gpt4_url", model: "gpt-4" },
|
{ setting_name: "ai_openai_gpt4_url", model: "gpt-4" },
|
||||||
{ setting_name: "ai_openai_gpt4_32k_url", model: "gpt-4-32k" },
|
{ setting_name: "ai_openai_gpt4_32k_url", model: "gpt-4-32k" },
|
||||||
|
{ setting_name: "ai_openai_gpt4_turbo_url", model: "gpt-4-1106-preview" },
|
||||||
].each do |config|
|
].each do |config|
|
||||||
gpt_url = "#{gpt_url_base}/#{config[:model]}"
|
gpt_url = "#{gpt_url_base}/#{config[:model]}"
|
||||||
setting_name = config[:setting_name]
|
setting_name = config[:setting_name]
|
||||||
|
@ -263,6 +264,78 @@ describe DiscourseAi::Inference::OpenAiCompletions do
|
||||||
expect(log.raw_response_payload).to eq(request_body)
|
expect(log.raw_response_payload).to eq(request_body)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context "when Webmock has streaming support" do
|
||||||
|
# See: https://github.com/bblimke/webmock/issues/629
|
||||||
|
let(:mock_net_http) do
|
||||||
|
Class.new(Net::HTTP) do
|
||||||
|
def request(*)
|
||||||
|
super do |response|
|
||||||
|
response.instance_eval do
|
||||||
|
def read_body(*, &)
|
||||||
|
@body.each(&)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
yield response if block_given?
|
||||||
|
|
||||||
|
response
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
let(:remove_original_net_http) { Net.send(:remove_const, :HTTP) }
|
||||||
|
let(:original_http) { remove_original_net_http }
|
||||||
|
let(:stub_net_http) { Net.send(:const_set, :HTTP, mock_net_http) }
|
||||||
|
|
||||||
|
let(:remove_stubbed_net_http) { Net.send(:remove_const, :HTTP) }
|
||||||
|
let(:restore_net_http) { Net.send(:const_set, :HTTP, original_http) }
|
||||||
|
|
||||||
|
before do
|
||||||
|
mock_net_http
|
||||||
|
remove_original_net_http
|
||||||
|
stub_net_http
|
||||||
|
end
|
||||||
|
|
||||||
|
after do
|
||||||
|
remove_stubbed_net_http
|
||||||
|
restore_net_http
|
||||||
|
end
|
||||||
|
|
||||||
|
it "support extremely slow streaming" do
|
||||||
|
raw_data = <<~TEXT
|
||||||
|
data: {"choices":[{"delta":{"content":"test"}}]}
|
||||||
|
|
||||||
|
data: {"choices":[{"delta":{"content":"test1"}}]}
|
||||||
|
|
||||||
|
data: {"choices":[{"delta":{"content":"test2"}}]}
|
||||||
|
|
||||||
|
data: [DONE]
|
||||||
|
TEXT
|
||||||
|
|
||||||
|
chunks = raw_data.split("")
|
||||||
|
|
||||||
|
stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
|
||||||
|
status: 200,
|
||||||
|
body: chunks,
|
||||||
|
)
|
||||||
|
|
||||||
|
partials = []
|
||||||
|
DiscourseAi::Inference::OpenAiCompletions.perform!([], "gpt-3.5-turbo") do |partial, cancel|
|
||||||
|
partials << partial
|
||||||
|
end
|
||||||
|
|
||||||
|
expect(partials.length).to eq(3)
|
||||||
|
expect(partials).to eq(
|
||||||
|
[
|
||||||
|
{ choices: [{ delta: { content: "test" } }] },
|
||||||
|
{ choices: [{ delta: { content: "test1" } }] },
|
||||||
|
{ choices: [{ delta: { content: "test2" } }] },
|
||||||
|
],
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
it "can operate in streaming mode" do
|
it "can operate in streaming mode" do
|
||||||
deltas = [
|
deltas = [
|
||||||
{ role: "assistant" },
|
{ role: "assistant" },
|
||||||
|
|
Loading…
Reference in New Issue