FEATURE: implement GPT-4 turbo support (#345)
Keep in mind: - GPT-4 is only going to be fully released next year - so this hardcodes preview model for now - Fixes streaming bugs which became a big problem with GPT-4 turbo - Adds Azure endpoing for turbo as well Co-authored-by: Martin Brennan <martin@discourse.org>
This commit is contained in:
parent
6380ebd829
commit
3c9901d43a
|
@ -125,6 +125,7 @@ en:
|
|||
|
||||
bot_names:
|
||||
gpt-4: "GPT-4"
|
||||
gpt-4-turbo: "GPT-4 Turbo"
|
||||
gpt-3:
|
||||
5-turbo: "GPT-3.5"
|
||||
claude-2: "Claude 2"
|
||||
|
@ -135,7 +136,7 @@ en:
|
|||
label: "sentiment"
|
||||
title: "Experimental AI-powered sentiment analysis of this person's most recent posts."
|
||||
|
||||
|
||||
|
||||
|
||||
review:
|
||||
types:
|
||||
|
|
|
@ -42,6 +42,7 @@ en:
|
|||
ai_openai_gpt35_16k_url: "Custom URL used for GPT 3.5 16k chat completions. (for Azure support)"
|
||||
ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)"
|
||||
ai_openai_gpt4_32k_url: "Custom URL used for GPT 4 32k chat completions. (for Azure support)"
|
||||
ai_openai_gpt4_turbo_url: "Custom URL used for GPT 4 Turbo chat completions. (for Azure support)"
|
||||
ai_openai_dall_e_3_url: "Custom URL used for DALL-E 3 image generation. (for Azure support)"
|
||||
ai_openai_organization: "(Optional, leave empty to omit) Organization id used for the OpenAI API. Passed in using the OpenAI-Organization header."
|
||||
ai_openai_embeddings_url: "Custom URL used for the OpenAI embeddings API. (in the case of Azure it can be: https://COMPANY.openai.azure.com/openai/deployments/DEPLOYMENT/embeddings?api-version=2023-05-15)"
|
||||
|
|
|
@ -95,6 +95,7 @@ discourse_ai:
|
|||
ai_openai_gpt35_16k_url: "https://api.openai.com/v1/chat/completions"
|
||||
ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions"
|
||||
ai_openai_gpt4_32k_url: "https://api.openai.com/v1/chat/completions"
|
||||
ai_openai_gpt4_turbo_url: "https://api.openai.com/v1/chat/completions"
|
||||
ai_openai_dall_e_3_url: "https://api.openai.com/v1/images/generations"
|
||||
ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings"
|
||||
ai_openai_organization: ""
|
||||
|
@ -256,6 +257,7 @@ discourse_ai:
|
|||
choices:
|
||||
- gpt-3.5-turbo
|
||||
- gpt-4
|
||||
- gpt-4-turbo
|
||||
- claude-2
|
||||
ai_bot_add_to_header:
|
||||
default: true
|
||||
|
|
|
@ -8,14 +8,18 @@ module DiscourseAi
|
|||
GPT4_ID = -110
|
||||
GPT3_5_TURBO_ID = -111
|
||||
CLAUDE_V2_ID = -112
|
||||
GPT4_TURBO_ID = -113
|
||||
BOTS = [
|
||||
[GPT4_ID, "gpt4_bot", "gpt-4"],
|
||||
[GPT3_5_TURBO_ID, "gpt3.5_bot", "gpt-3.5-turbo"],
|
||||
[CLAUDE_V2_ID, "claude_bot", "claude-2"],
|
||||
[GPT4_TURBO_ID, "gpt4t_bot", "gpt-4-turbo"],
|
||||
]
|
||||
|
||||
def self.map_bot_model_to_user_id(model_name)
|
||||
case model_name
|
||||
in "gpt-4-turbo"
|
||||
GPT4_TURBO_ID
|
||||
in "gpt-3.5-turbo"
|
||||
GPT3_5_TURBO_ID
|
||||
in "gpt-4"
|
||||
|
|
|
@ -5,6 +5,7 @@ module DiscourseAi
|
|||
class OpenAiBot < Bot
|
||||
def self.can_reply_as?(bot_user)
|
||||
open_ai_bot_ids = [
|
||||
DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID,
|
||||
DiscourseAi::AiBot::EntryPoint::GPT4_ID,
|
||||
DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID,
|
||||
]
|
||||
|
@ -23,7 +24,9 @@ module DiscourseAi
|
|||
buffer += @function_size
|
||||
end
|
||||
|
||||
if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
|
||||
if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID
|
||||
150_000 - buffer
|
||||
elsif bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
|
||||
8192 - buffer
|
||||
else
|
||||
16_384 - buffer
|
||||
|
@ -75,8 +78,15 @@ module DiscourseAi
|
|||
end
|
||||
|
||||
def model_for(low_cost: false)
|
||||
return "gpt-4" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost
|
||||
"gpt-3.5-turbo-16k"
|
||||
if low_cost || bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID
|
||||
"gpt-3.5-turbo-16k"
|
||||
elsif bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
|
||||
"gpt-4"
|
||||
else
|
||||
# not quite released yet, once released we should replace with
|
||||
# gpt-4-turbo
|
||||
"gpt-4-1106-preview"
|
||||
end
|
||||
end
|
||||
|
||||
def clean_username(username)
|
||||
|
|
|
@ -29,7 +29,9 @@ module ::DiscourseAi
|
|||
|
||||
url =
|
||||
if model.include?("gpt-4")
|
||||
if model.include?("32k")
|
||||
if model.include?("turbo") || model.include?("1106-preview")
|
||||
URI(SiteSetting.ai_openai_gpt4_turbo_url)
|
||||
elsif model.include?("32k")
|
||||
URI(SiteSetting.ai_openai_gpt4_32k_url)
|
||||
else
|
||||
URI(SiteSetting.ai_openai_gpt4_url)
|
||||
|
@ -134,6 +136,11 @@ module ::DiscourseAi
|
|||
|
||||
response_raw << chunk
|
||||
|
||||
if (leftover + chunk).length < "data: [DONE]".length
|
||||
leftover += chunk
|
||||
next
|
||||
end
|
||||
|
||||
(leftover + chunk)
|
||||
.split("\n")
|
||||
.each do |line|
|
||||
|
|
|
@ -45,6 +45,7 @@ describe DiscourseAi::Inference::OpenAiCompletions do
|
|||
{ setting_name: "ai_openai_gpt35_16k_url", model: "gpt-35-16k-turbo" },
|
||||
{ setting_name: "ai_openai_gpt4_url", model: "gpt-4" },
|
||||
{ setting_name: "ai_openai_gpt4_32k_url", model: "gpt-4-32k" },
|
||||
{ setting_name: "ai_openai_gpt4_turbo_url", model: "gpt-4-1106-preview" },
|
||||
].each do |config|
|
||||
gpt_url = "#{gpt_url_base}/#{config[:model]}"
|
||||
setting_name = config[:setting_name]
|
||||
|
@ -263,6 +264,78 @@ describe DiscourseAi::Inference::OpenAiCompletions do
|
|||
expect(log.raw_response_payload).to eq(request_body)
|
||||
end
|
||||
|
||||
context "when Webmock has streaming support" do
|
||||
# See: https://github.com/bblimke/webmock/issues/629
|
||||
let(:mock_net_http) do
|
||||
Class.new(Net::HTTP) do
|
||||
def request(*)
|
||||
super do |response|
|
||||
response.instance_eval do
|
||||
def read_body(*, &)
|
||||
@body.each(&)
|
||||
end
|
||||
end
|
||||
|
||||
yield response if block_given?
|
||||
|
||||
response
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
let(:remove_original_net_http) { Net.send(:remove_const, :HTTP) }
|
||||
let(:original_http) { remove_original_net_http }
|
||||
let(:stub_net_http) { Net.send(:const_set, :HTTP, mock_net_http) }
|
||||
|
||||
let(:remove_stubbed_net_http) { Net.send(:remove_const, :HTTP) }
|
||||
let(:restore_net_http) { Net.send(:const_set, :HTTP, original_http) }
|
||||
|
||||
before do
|
||||
mock_net_http
|
||||
remove_original_net_http
|
||||
stub_net_http
|
||||
end
|
||||
|
||||
after do
|
||||
remove_stubbed_net_http
|
||||
restore_net_http
|
||||
end
|
||||
|
||||
it "support extremely slow streaming" do
|
||||
raw_data = <<~TEXT
|
||||
data: {"choices":[{"delta":{"content":"test"}}]}
|
||||
|
||||
data: {"choices":[{"delta":{"content":"test1"}}]}
|
||||
|
||||
data: {"choices":[{"delta":{"content":"test2"}}]}
|
||||
|
||||
data: [DONE]
|
||||
TEXT
|
||||
|
||||
chunks = raw_data.split("")
|
||||
|
||||
stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
|
||||
status: 200,
|
||||
body: chunks,
|
||||
)
|
||||
|
||||
partials = []
|
||||
DiscourseAi::Inference::OpenAiCompletions.perform!([], "gpt-3.5-turbo") do |partial, cancel|
|
||||
partials << partial
|
||||
end
|
||||
|
||||
expect(partials.length).to eq(3)
|
||||
expect(partials).to eq(
|
||||
[
|
||||
{ choices: [{ delta: { content: "test" } }] },
|
||||
{ choices: [{ delta: { content: "test1" } }] },
|
||||
{ choices: [{ delta: { content: "test2" } }] },
|
||||
],
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
it "can operate in streaming mode" do
|
||||
deltas = [
|
||||
{ role: "assistant" },
|
||||
|
|
Loading…
Reference in New Issue