FEATURE: implement GPT-4 turbo support (#345)

Keep in mind:

- GPT-4 is only going to be fully released next year - so this hardcodes preview model for now
- Fixes streaming bugs which became a big problem with GPT-4 turbo
- Adds Azure endpoing for turbo as well

Co-authored-by: Martin Brennan <martin@discourse.org>
This commit is contained in:
Sam 2023-12-11 14:59:57 +11:00 committed by GitHub
parent 6380ebd829
commit 3c9901d43a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 103 additions and 5 deletions

View File

@ -125,6 +125,7 @@ en:
bot_names:
gpt-4: "GPT-4"
gpt-4-turbo: "GPT-4 Turbo"
gpt-3:
5-turbo: "GPT-3.5"
claude-2: "Claude 2"
@ -135,7 +136,7 @@ en:
label: "sentiment"
title: "Experimental AI-powered sentiment analysis of this person's most recent posts."
review:
types:

View File

@ -42,6 +42,7 @@ en:
ai_openai_gpt35_16k_url: "Custom URL used for GPT 3.5 16k chat completions. (for Azure support)"
ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)"
ai_openai_gpt4_32k_url: "Custom URL used for GPT 4 32k chat completions. (for Azure support)"
ai_openai_gpt4_turbo_url: "Custom URL used for GPT 4 Turbo chat completions. (for Azure support)"
ai_openai_dall_e_3_url: "Custom URL used for DALL-E 3 image generation. (for Azure support)"
ai_openai_organization: "(Optional, leave empty to omit) Organization id used for the OpenAI API. Passed in using the OpenAI-Organization header."
ai_openai_embeddings_url: "Custom URL used for the OpenAI embeddings API. (in the case of Azure it can be: https://COMPANY.openai.azure.com/openai/deployments/DEPLOYMENT/embeddings?api-version=2023-05-15)"

View File

@ -95,6 +95,7 @@ discourse_ai:
ai_openai_gpt35_16k_url: "https://api.openai.com/v1/chat/completions"
ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions"
ai_openai_gpt4_32k_url: "https://api.openai.com/v1/chat/completions"
ai_openai_gpt4_turbo_url: "https://api.openai.com/v1/chat/completions"
ai_openai_dall_e_3_url: "https://api.openai.com/v1/images/generations"
ai_openai_embeddings_url: "https://api.openai.com/v1/embeddings"
ai_openai_organization: ""
@ -256,6 +257,7 @@ discourse_ai:
choices:
- gpt-3.5-turbo
- gpt-4
- gpt-4-turbo
- claude-2
ai_bot_add_to_header:
default: true

View File

@ -8,14 +8,18 @@ module DiscourseAi
GPT4_ID = -110
GPT3_5_TURBO_ID = -111
CLAUDE_V2_ID = -112
GPT4_TURBO_ID = -113
BOTS = [
[GPT4_ID, "gpt4_bot", "gpt-4"],
[GPT3_5_TURBO_ID, "gpt3.5_bot", "gpt-3.5-turbo"],
[CLAUDE_V2_ID, "claude_bot", "claude-2"],
[GPT4_TURBO_ID, "gpt4t_bot", "gpt-4-turbo"],
]
def self.map_bot_model_to_user_id(model_name)
case model_name
in "gpt-4-turbo"
GPT4_TURBO_ID
in "gpt-3.5-turbo"
GPT3_5_TURBO_ID
in "gpt-4"

View File

@ -5,6 +5,7 @@ module DiscourseAi
class OpenAiBot < Bot
def self.can_reply_as?(bot_user)
open_ai_bot_ids = [
DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID,
DiscourseAi::AiBot::EntryPoint::GPT4_ID,
DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID,
]
@ -23,7 +24,9 @@ module DiscourseAi
buffer += @function_size
end
if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID
150_000 - buffer
elsif bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
8192 - buffer
else
16_384 - buffer
@ -75,8 +78,15 @@ module DiscourseAi
end
def model_for(low_cost: false)
return "gpt-4" if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID && !low_cost
"gpt-3.5-turbo-16k"
if low_cost || bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID
"gpt-3.5-turbo-16k"
elsif bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
"gpt-4"
else
# not quite released yet, once released we should replace with
# gpt-4-turbo
"gpt-4-1106-preview"
end
end
def clean_username(username)

View File

@ -29,7 +29,9 @@ module ::DiscourseAi
url =
if model.include?("gpt-4")
if model.include?("32k")
if model.include?("turbo") || model.include?("1106-preview")
URI(SiteSetting.ai_openai_gpt4_turbo_url)
elsif model.include?("32k")
URI(SiteSetting.ai_openai_gpt4_32k_url)
else
URI(SiteSetting.ai_openai_gpt4_url)
@ -134,6 +136,11 @@ module ::DiscourseAi
response_raw << chunk
if (leftover + chunk).length < "data: [DONE]".length
leftover += chunk
next
end
(leftover + chunk)
.split("\n")
.each do |line|

View File

@ -45,6 +45,7 @@ describe DiscourseAi::Inference::OpenAiCompletions do
{ setting_name: "ai_openai_gpt35_16k_url", model: "gpt-35-16k-turbo" },
{ setting_name: "ai_openai_gpt4_url", model: "gpt-4" },
{ setting_name: "ai_openai_gpt4_32k_url", model: "gpt-4-32k" },
{ setting_name: "ai_openai_gpt4_turbo_url", model: "gpt-4-1106-preview" },
].each do |config|
gpt_url = "#{gpt_url_base}/#{config[:model]}"
setting_name = config[:setting_name]
@ -263,6 +264,78 @@ describe DiscourseAi::Inference::OpenAiCompletions do
expect(log.raw_response_payload).to eq(request_body)
end
context "when Webmock has streaming support" do
# See: https://github.com/bblimke/webmock/issues/629
let(:mock_net_http) do
Class.new(Net::HTTP) do
def request(*)
super do |response|
response.instance_eval do
def read_body(*, &)
@body.each(&)
end
end
yield response if block_given?
response
end
end
end
end
let(:remove_original_net_http) { Net.send(:remove_const, :HTTP) }
let(:original_http) { remove_original_net_http }
let(:stub_net_http) { Net.send(:const_set, :HTTP, mock_net_http) }
let(:remove_stubbed_net_http) { Net.send(:remove_const, :HTTP) }
let(:restore_net_http) { Net.send(:const_set, :HTTP, original_http) }
before do
mock_net_http
remove_original_net_http
stub_net_http
end
after do
remove_stubbed_net_http
restore_net_http
end
it "support extremely slow streaming" do
raw_data = <<~TEXT
data: {"choices":[{"delta":{"content":"test"}}]}
data: {"choices":[{"delta":{"content":"test1"}}]}
data: {"choices":[{"delta":{"content":"test2"}}]}
data: [DONE]
TEXT
chunks = raw_data.split("")
stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
status: 200,
body: chunks,
)
partials = []
DiscourseAi::Inference::OpenAiCompletions.perform!([], "gpt-3.5-turbo") do |partial, cancel|
partials << partial
end
expect(partials.length).to eq(3)
expect(partials).to eq(
[
{ choices: [{ delta: { content: "test" } }] },
{ choices: [{ delta: { content: "test1" } }] },
{ choices: [{ delta: { content: "test2" } }] },
],
)
end
end
it "can operate in streaming mode" do
deltas = [
{ role: "assistant" },