FEATURE: port to use claude-2 for chat bot (#114)

Claude 1 costs the same and is less good than Claude 2. Make use of Claude
2 in all spots ...

This also fixes streaming so it uses the far more efficient streaming protocol.
This commit is contained in:
Sam 2023-07-27 11:24:44 +10:00 committed by GitHub
parent 2031388f9c
commit 4b0c077ce5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 35 additions and 47 deletions

View File

@ -26,7 +26,7 @@ en:
gpt-4: "GPT-4"
gpt-3:
5-turbo: "GPT-3.5"
claude-v1: "Claude V1"
claude-2: "Claude 2"
review:

View File

@ -181,7 +181,7 @@ plugins:
choices:
- gpt-3.5-turbo
- gpt-4
- claude-v1
- claude-2
ai_bot_enabled_chat_commands:
type: list
default: "categories|google|image|search|tags|time"

View File

@ -4,7 +4,7 @@ module DiscourseAi
module AiBot
class AnthropicBot < Bot
def self.can_reply_as?(bot_user)
bot_user.id == DiscourseAi::AiBot::EntryPoint::CLAUDE_V1_ID
bot_user.id == DiscourseAi::AiBot::EntryPoint::CLAUDE_V2_ID
end
def bot_prompt_with_topic_context(post)
@ -12,7 +12,7 @@ module DiscourseAi
end
def prompt_limit
7500 # https://console.anthropic.com/docs/prompt-design#what-is-a-prompt
50_000 # https://console.anthropic.com/docs/prompt-design#what-is-a-prompt
end
def title_prompt(post)
@ -20,14 +20,7 @@ module DiscourseAi
end
def get_delta(partial, context)
context[:pos] ||= 0
full = partial[:completion]
delta = full[context[:pos]..-1]
context[:pos] = full.length
delta
partial[:completion]
end
private
@ -45,7 +38,7 @@ module DiscourseAi
end
def model_for
"claude-v1.3"
"claude-2"
end
def get_updated_title(prompt)

View File

@ -5,12 +5,8 @@ module DiscourseAi
class EntryPoint
GPT4_ID = -110
GPT3_5_TURBO_ID = -111
CLAUDE_V1_ID = -112
BOTS = [
[GPT4_ID, "gpt4_bot"],
[GPT3_5_TURBO_ID, "gpt3.5_bot"],
[CLAUDE_V1_ID, "claude_v1_bot"],
]
CLAUDE_V2_ID = -112
BOTS = [[GPT4_ID, "gpt4_bot"], [GPT3_5_TURBO_ID, "gpt3.5_bot"], [CLAUDE_V2_ID, "claude_bot"]]
def self.map_bot_model_to_user_id(model_name)
case model_name
@ -18,8 +14,8 @@ module DiscourseAi
GPT3_5_TURBO_ID
in "gpt-4"
GPT4_ID
in "claude-v1"
CLAUDE_V1_ID
in "claude-2"
CLAUDE_V2_ID
else
nil
end

View File

@ -20,8 +20,6 @@ module DiscourseAi
Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096),
Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384),
Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384),
Models::Anthropic.new("claude-v1", max_tokens: 9000),
Models::Anthropic.new("claude-v1-100k", max_tokens: 100_000),
Models::Anthropic.new("claude-2", max_tokens: 100_000),
]

View File

@ -16,8 +16,9 @@ module ::DiscourseAi
)
url = URI("https://api.anthropic.com/v1/complete")
headers = {
"anthropic-version" => "2023-06-01",
"x-api-key" => SiteSetting.ai_anthropic_api_key,
"Content-Type" => "application/json",
"content-type" => "application/json",
}
payload = { model: model, prompt: prompt }
@ -85,17 +86,18 @@ module ::DiscourseAi
.split("\n")
.each do |line|
data = line.split("data: ", 2)[1]
next if !data || data.squish == "[DONE]"
next if !data
if !cancelled
begin
# partial contains the entire payload till now
partial = JSON.parse(data, symbolize_names: true)
response_data = partial[:completion].to_s
response_data << partial[:completion].to_s
yield partial, cancel
# ping has no data... do not yeild it
yield partial, cancel if partial[:completion]
rescue JSON::ParserError
nil
# TODO leftover chunk carry over to next
end
end
end

View File

@ -16,7 +16,7 @@ RSpec.describe DiscourseAi::AiBot::AnthropicBot do
reply << subject.get_delta({ completion: "Hello " }, context)
expect(reply).to eq("Hello ")
reply << subject.get_delta({ completion: "Hello world" }, context)
reply << subject.get_delta({ completion: "world" }, context)
expect(reply).to eq("Hello world")
end
end

View File

@ -57,7 +57,7 @@ RSpec.describe DiscourseAi::AiBot::EntryPoint do
end
it "includes the bot's user_id" do
claude_bot = User.find(described_class::CLAUDE_V1_ID)
claude_bot = User.find(described_class::CLAUDE_V2_ID)
claude_post_attrs = post_args.merge(target_usernames: [claude_bot.username].join(","))
expect { PostCreator.create!(admin, claude_post_attrs) }.to change(

View File

@ -78,12 +78,12 @@ RSpec.describe Jobs::CreateAiReply do
let(:deltas) { claude_response.split(" ").map { |w| "#{w} " } }
before do
bot_user = User.find(DiscourseAi::AiBot::EntryPoint::CLAUDE_V1_ID)
bot_user = User.find(DiscourseAi::AiBot::EntryPoint::CLAUDE_V2_ID)
AnthropicCompletionStubs.stub_streamed_response(
DiscourseAi::AiBot::AnthropicBot.new(bot_user).bot_prompt_with_topic_context(post),
deltas,
model: "claude-v1.3",
model: "claude-2",
req_opts: {
max_tokens_to_sample: 3000,
temperature: 0.4,
@ -95,7 +95,7 @@ RSpec.describe Jobs::CreateAiReply do
it "adds a reply from the Claude bot" do
subject.execute(
post_id: topic.first_post.id,
bot_user_id: DiscourseAi::AiBot::EntryPoint::CLAUDE_V1_ID,
bot_user_id: DiscourseAi::AiBot::EntryPoint::CLAUDE_V2_ID,
)
expect(topic.posts.last.raw).to eq(expected_response)

View File

@ -5,7 +5,7 @@ require_relative "../../../../support/anthropic_completion_stubs"
RSpec.describe DiscourseAi::Summarization::Models::Anthropic do
subject(:model) { described_class.new(model_name, max_tokens: max_tokens) }
let(:model_name) { "claude-v1" }
let(:model_name) { "claude-2" }
let(:max_tokens) { 720 }
let(:content) do

View File

@ -16,7 +16,7 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
completions =
DiscourseAi::Inference::AnthropicCompletions.perform!(
prompt,
"claude-v1",
"claude-2",
temperature: req_opts[:temperature],
max_tokens: req_opts[:max_tokens_to_sample],
user_id: user_id,
@ -27,7 +27,7 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
expect(AiApiAuditLog.count).to eq(1)
log = AiApiAuditLog.first
request_body = { model: "claude-v1", prompt: prompt }.merge(req_opts).to_json
request_body = { model: "claude-2", prompt: prompt }.merge(req_opts).to_json
response_body = AnthropicCompletionStubs.response(response_text).to_json
expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
@ -47,11 +47,11 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
DiscourseAi::Inference::AnthropicCompletions.perform!(
prompt,
"claude-v1",
"claude-2",
max_tokens: req_opts[:max_tokens_to_sample],
) do |partial, cancel|
data = partial[:completion]
content = data if data
content << data if data
cancel.call if content.split(" ").length == 2
end
@ -60,7 +60,7 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
expect(AiApiAuditLog.count).to eq(1)
log = AiApiAuditLog.first
request_body = { model: "claude-v1", prompt: prompt }.merge(req_opts).to_json
request_body = { model: "claude-2", prompt: prompt }.merge(req_opts).to_json
expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
expect(log.request_tokens).to eq(6)

View File

@ -9,7 +9,7 @@ class AnthropicCompletionStubs
stop_reason: "stop_sequence",
truncated: false,
log_id: "12dcc7feafbee4a394e0de9dffde3ac5",
model: "claude-v1",
model: "claude-2",
exception: nil,
}
end
@ -18,7 +18,7 @@ class AnthropicCompletionStubs
WebMock
.stub_request(:post, "https://api.anthropic.com/v1/complete")
.with(
body: { model: "claude-v1", prompt: prompt, max_tokens_to_sample: 2000 }.merge(
body: { model: "claude-2", prompt: prompt, max_tokens_to_sample: 2000 }.merge(
req_opts,
).to_json,
)
@ -32,7 +32,7 @@ class AnthropicCompletionStubs
stop_reason: finish_reason,
truncated: false,
log_id: "12b029451c6d18094d868bc04ce83f63",
model: "claude-v1",
model: "claude-2",
exception: nil,
}.to_json
end
@ -41,18 +41,17 @@ class AnthropicCompletionStubs
chunks =
deltas.each_with_index.map do |_, index|
if index == (deltas.length - 1)
stream_line(deltas.join(""), finish_reason: "stop_sequence")
stream_line(deltas[index], finish_reason: "stop_sequence")
else
stream_line(deltas[0..index].join(""))
stream_line(deltas[index])
end
end
chunks << "[DONE]"
chunks = chunks.join("\n\n")
WebMock
.stub_request(:post, "https://api.anthropic.com/v1/complete")
.with(body: { model: model || "claude-v1", prompt: prompt }.merge(req_opts).to_json)
.with(body: { model: model || "claude-2", prompt: prompt }.merge(req_opts).to_json)
.to_return(status: 200, body: chunks)
end
end