FEATURE: port to use claude-2 for chat bot (#114)

Claude 1 costs the same and is less good than Claude 2. Make use of Claude
2 in all spots ...

This also fixes streaming so it uses the far more efficient streaming protocol.
This commit is contained in:
Sam 2023-07-27 11:24:44 +10:00 committed by GitHub
parent 2031388f9c
commit 4b0c077ce5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 35 additions and 47 deletions

View File

@ -26,7 +26,7 @@ en:
gpt-4: "GPT-4" gpt-4: "GPT-4"
gpt-3: gpt-3:
5-turbo: "GPT-3.5" 5-turbo: "GPT-3.5"
claude-v1: "Claude V1" claude-2: "Claude 2"
review: review:

View File

@ -181,7 +181,7 @@ plugins:
choices: choices:
- gpt-3.5-turbo - gpt-3.5-turbo
- gpt-4 - gpt-4
- claude-v1 - claude-2
ai_bot_enabled_chat_commands: ai_bot_enabled_chat_commands:
type: list type: list
default: "categories|google|image|search|tags|time" default: "categories|google|image|search|tags|time"

View File

@ -4,7 +4,7 @@ module DiscourseAi
module AiBot module AiBot
class AnthropicBot < Bot class AnthropicBot < Bot
def self.can_reply_as?(bot_user) def self.can_reply_as?(bot_user)
bot_user.id == DiscourseAi::AiBot::EntryPoint::CLAUDE_V1_ID bot_user.id == DiscourseAi::AiBot::EntryPoint::CLAUDE_V2_ID
end end
def bot_prompt_with_topic_context(post) def bot_prompt_with_topic_context(post)
@ -12,7 +12,7 @@ module DiscourseAi
end end
def prompt_limit def prompt_limit
7500 # https://console.anthropic.com/docs/prompt-design#what-is-a-prompt 50_000 # https://console.anthropic.com/docs/prompt-design#what-is-a-prompt
end end
def title_prompt(post) def title_prompt(post)
@ -20,14 +20,7 @@ module DiscourseAi
end end
def get_delta(partial, context) def get_delta(partial, context)
context[:pos] ||= 0 partial[:completion]
full = partial[:completion]
delta = full[context[:pos]..-1]
context[:pos] = full.length
delta
end end
private private
@ -45,7 +38,7 @@ module DiscourseAi
end end
def model_for def model_for
"claude-v1.3" "claude-2"
end end
def get_updated_title(prompt) def get_updated_title(prompt)

View File

@ -5,12 +5,8 @@ module DiscourseAi
class EntryPoint class EntryPoint
GPT4_ID = -110 GPT4_ID = -110
GPT3_5_TURBO_ID = -111 GPT3_5_TURBO_ID = -111
CLAUDE_V1_ID = -112 CLAUDE_V2_ID = -112
BOTS = [ BOTS = [[GPT4_ID, "gpt4_bot"], [GPT3_5_TURBO_ID, "gpt3.5_bot"], [CLAUDE_V2_ID, "claude_bot"]]
[GPT4_ID, "gpt4_bot"],
[GPT3_5_TURBO_ID, "gpt3.5_bot"],
[CLAUDE_V1_ID, "claude_v1_bot"],
]
def self.map_bot_model_to_user_id(model_name) def self.map_bot_model_to_user_id(model_name)
case model_name case model_name
@ -18,8 +14,8 @@ module DiscourseAi
GPT3_5_TURBO_ID GPT3_5_TURBO_ID
in "gpt-4" in "gpt-4"
GPT4_ID GPT4_ID
in "claude-v1" in "claude-2"
CLAUDE_V1_ID CLAUDE_V2_ID
else else
nil nil
end end

View File

@ -20,8 +20,6 @@ module DiscourseAi
Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096), Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096),
Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384), Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384),
Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384), Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384),
Models::Anthropic.new("claude-v1", max_tokens: 9000),
Models::Anthropic.new("claude-v1-100k", max_tokens: 100_000),
Models::Anthropic.new("claude-2", max_tokens: 100_000), Models::Anthropic.new("claude-2", max_tokens: 100_000),
] ]

View File

@ -16,8 +16,9 @@ module ::DiscourseAi
) )
url = URI("https://api.anthropic.com/v1/complete") url = URI("https://api.anthropic.com/v1/complete")
headers = { headers = {
"anthropic-version" => "2023-06-01",
"x-api-key" => SiteSetting.ai_anthropic_api_key, "x-api-key" => SiteSetting.ai_anthropic_api_key,
"Content-Type" => "application/json", "content-type" => "application/json",
} }
payload = { model: model, prompt: prompt } payload = { model: model, prompt: prompt }
@ -85,17 +86,18 @@ module ::DiscourseAi
.split("\n") .split("\n")
.each do |line| .each do |line|
data = line.split("data: ", 2)[1] data = line.split("data: ", 2)[1]
next if !data || data.squish == "[DONE]" next if !data
if !cancelled if !cancelled
begin begin
# partial contains the entire payload till now
partial = JSON.parse(data, symbolize_names: true) partial = JSON.parse(data, symbolize_names: true)
response_data = partial[:completion].to_s response_data << partial[:completion].to_s
yield partial, cancel # ping has no data... do not yeild it
yield partial, cancel if partial[:completion]
rescue JSON::ParserError rescue JSON::ParserError
nil nil
# TODO leftover chunk carry over to next
end end
end end
end end

View File

@ -16,7 +16,7 @@ RSpec.describe DiscourseAi::AiBot::AnthropicBot do
reply << subject.get_delta({ completion: "Hello " }, context) reply << subject.get_delta({ completion: "Hello " }, context)
expect(reply).to eq("Hello ") expect(reply).to eq("Hello ")
reply << subject.get_delta({ completion: "Hello world" }, context) reply << subject.get_delta({ completion: "world" }, context)
expect(reply).to eq("Hello world") expect(reply).to eq("Hello world")
end end
end end

View File

@ -57,7 +57,7 @@ RSpec.describe DiscourseAi::AiBot::EntryPoint do
end end
it "includes the bot's user_id" do it "includes the bot's user_id" do
claude_bot = User.find(described_class::CLAUDE_V1_ID) claude_bot = User.find(described_class::CLAUDE_V2_ID)
claude_post_attrs = post_args.merge(target_usernames: [claude_bot.username].join(",")) claude_post_attrs = post_args.merge(target_usernames: [claude_bot.username].join(","))
expect { PostCreator.create!(admin, claude_post_attrs) }.to change( expect { PostCreator.create!(admin, claude_post_attrs) }.to change(

View File

@ -78,12 +78,12 @@ RSpec.describe Jobs::CreateAiReply do
let(:deltas) { claude_response.split(" ").map { |w| "#{w} " } } let(:deltas) { claude_response.split(" ").map { |w| "#{w} " } }
before do before do
bot_user = User.find(DiscourseAi::AiBot::EntryPoint::CLAUDE_V1_ID) bot_user = User.find(DiscourseAi::AiBot::EntryPoint::CLAUDE_V2_ID)
AnthropicCompletionStubs.stub_streamed_response( AnthropicCompletionStubs.stub_streamed_response(
DiscourseAi::AiBot::AnthropicBot.new(bot_user).bot_prompt_with_topic_context(post), DiscourseAi::AiBot::AnthropicBot.new(bot_user).bot_prompt_with_topic_context(post),
deltas, deltas,
model: "claude-v1.3", model: "claude-2",
req_opts: { req_opts: {
max_tokens_to_sample: 3000, max_tokens_to_sample: 3000,
temperature: 0.4, temperature: 0.4,
@ -95,7 +95,7 @@ RSpec.describe Jobs::CreateAiReply do
it "adds a reply from the Claude bot" do it "adds a reply from the Claude bot" do
subject.execute( subject.execute(
post_id: topic.first_post.id, post_id: topic.first_post.id,
bot_user_id: DiscourseAi::AiBot::EntryPoint::CLAUDE_V1_ID, bot_user_id: DiscourseAi::AiBot::EntryPoint::CLAUDE_V2_ID,
) )
expect(topic.posts.last.raw).to eq(expected_response) expect(topic.posts.last.raw).to eq(expected_response)

View File

@ -5,7 +5,7 @@ require_relative "../../../../support/anthropic_completion_stubs"
RSpec.describe DiscourseAi::Summarization::Models::Anthropic do RSpec.describe DiscourseAi::Summarization::Models::Anthropic do
subject(:model) { described_class.new(model_name, max_tokens: max_tokens) } subject(:model) { described_class.new(model_name, max_tokens: max_tokens) }
let(:model_name) { "claude-v1" } let(:model_name) { "claude-2" }
let(:max_tokens) { 720 } let(:max_tokens) { 720 }
let(:content) do let(:content) do

View File

@ -16,7 +16,7 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
completions = completions =
DiscourseAi::Inference::AnthropicCompletions.perform!( DiscourseAi::Inference::AnthropicCompletions.perform!(
prompt, prompt,
"claude-v1", "claude-2",
temperature: req_opts[:temperature], temperature: req_opts[:temperature],
max_tokens: req_opts[:max_tokens_to_sample], max_tokens: req_opts[:max_tokens_to_sample],
user_id: user_id, user_id: user_id,
@ -27,7 +27,7 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
expect(AiApiAuditLog.count).to eq(1) expect(AiApiAuditLog.count).to eq(1)
log = AiApiAuditLog.first log = AiApiAuditLog.first
request_body = { model: "claude-v1", prompt: prompt }.merge(req_opts).to_json request_body = { model: "claude-2", prompt: prompt }.merge(req_opts).to_json
response_body = AnthropicCompletionStubs.response(response_text).to_json response_body = AnthropicCompletionStubs.response(response_text).to_json
expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic) expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
@ -47,11 +47,11 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
DiscourseAi::Inference::AnthropicCompletions.perform!( DiscourseAi::Inference::AnthropicCompletions.perform!(
prompt, prompt,
"claude-v1", "claude-2",
max_tokens: req_opts[:max_tokens_to_sample], max_tokens: req_opts[:max_tokens_to_sample],
) do |partial, cancel| ) do |partial, cancel|
data = partial[:completion] data = partial[:completion]
content = data if data content << data if data
cancel.call if content.split(" ").length == 2 cancel.call if content.split(" ").length == 2
end end
@ -60,7 +60,7 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
expect(AiApiAuditLog.count).to eq(1) expect(AiApiAuditLog.count).to eq(1)
log = AiApiAuditLog.first log = AiApiAuditLog.first
request_body = { model: "claude-v1", prompt: prompt }.merge(req_opts).to_json request_body = { model: "claude-2", prompt: prompt }.merge(req_opts).to_json
expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic) expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
expect(log.request_tokens).to eq(6) expect(log.request_tokens).to eq(6)

View File

@ -9,7 +9,7 @@ class AnthropicCompletionStubs
stop_reason: "stop_sequence", stop_reason: "stop_sequence",
truncated: false, truncated: false,
log_id: "12dcc7feafbee4a394e0de9dffde3ac5", log_id: "12dcc7feafbee4a394e0de9dffde3ac5",
model: "claude-v1", model: "claude-2",
exception: nil, exception: nil,
} }
end end
@ -18,7 +18,7 @@ class AnthropicCompletionStubs
WebMock WebMock
.stub_request(:post, "https://api.anthropic.com/v1/complete") .stub_request(:post, "https://api.anthropic.com/v1/complete")
.with( .with(
body: { model: "claude-v1", prompt: prompt, max_tokens_to_sample: 2000 }.merge( body: { model: "claude-2", prompt: prompt, max_tokens_to_sample: 2000 }.merge(
req_opts, req_opts,
).to_json, ).to_json,
) )
@ -32,7 +32,7 @@ class AnthropicCompletionStubs
stop_reason: finish_reason, stop_reason: finish_reason,
truncated: false, truncated: false,
log_id: "12b029451c6d18094d868bc04ce83f63", log_id: "12b029451c6d18094d868bc04ce83f63",
model: "claude-v1", model: "claude-2",
exception: nil, exception: nil,
}.to_json }.to_json
end end
@ -41,18 +41,17 @@ class AnthropicCompletionStubs
chunks = chunks =
deltas.each_with_index.map do |_, index| deltas.each_with_index.map do |_, index|
if index == (deltas.length - 1) if index == (deltas.length - 1)
stream_line(deltas.join(""), finish_reason: "stop_sequence") stream_line(deltas[index], finish_reason: "stop_sequence")
else else
stream_line(deltas[0..index].join("")) stream_line(deltas[index])
end end
end end
chunks << "[DONE]"
chunks = chunks.join("\n\n") chunks = chunks.join("\n\n")
WebMock WebMock
.stub_request(:post, "https://api.anthropic.com/v1/complete") .stub_request(:post, "https://api.anthropic.com/v1/complete")
.with(body: { model: model || "claude-v1", prompt: prompt }.merge(req_opts).to_json) .with(body: { model: model || "claude-2", prompt: prompt }.merge(req_opts).to_json)
.to_return(status: 200, body: chunks) .to_return(status: 200, body: chunks)
end end
end end