FEATURE: port to use claude-2 for chat bot (#114)
Claude 1 costs the same and is less good than Claude 2. Make use of Claude 2 in all spots ... This also fixes streaming so it uses the far more efficient streaming protocol.
This commit is contained in:
parent
2031388f9c
commit
4b0c077ce5
|
@ -26,7 +26,7 @@ en:
|
||||||
gpt-4: "GPT-4"
|
gpt-4: "GPT-4"
|
||||||
gpt-3:
|
gpt-3:
|
||||||
5-turbo: "GPT-3.5"
|
5-turbo: "GPT-3.5"
|
||||||
claude-v1: "Claude V1"
|
claude-2: "Claude 2"
|
||||||
|
|
||||||
|
|
||||||
review:
|
review:
|
||||||
|
|
|
@ -181,7 +181,7 @@ plugins:
|
||||||
choices:
|
choices:
|
||||||
- gpt-3.5-turbo
|
- gpt-3.5-turbo
|
||||||
- gpt-4
|
- gpt-4
|
||||||
- claude-v1
|
- claude-2
|
||||||
ai_bot_enabled_chat_commands:
|
ai_bot_enabled_chat_commands:
|
||||||
type: list
|
type: list
|
||||||
default: "categories|google|image|search|tags|time"
|
default: "categories|google|image|search|tags|time"
|
||||||
|
|
|
@ -4,7 +4,7 @@ module DiscourseAi
|
||||||
module AiBot
|
module AiBot
|
||||||
class AnthropicBot < Bot
|
class AnthropicBot < Bot
|
||||||
def self.can_reply_as?(bot_user)
|
def self.can_reply_as?(bot_user)
|
||||||
bot_user.id == DiscourseAi::AiBot::EntryPoint::CLAUDE_V1_ID
|
bot_user.id == DiscourseAi::AiBot::EntryPoint::CLAUDE_V2_ID
|
||||||
end
|
end
|
||||||
|
|
||||||
def bot_prompt_with_topic_context(post)
|
def bot_prompt_with_topic_context(post)
|
||||||
|
@ -12,7 +12,7 @@ module DiscourseAi
|
||||||
end
|
end
|
||||||
|
|
||||||
def prompt_limit
|
def prompt_limit
|
||||||
7500 # https://console.anthropic.com/docs/prompt-design#what-is-a-prompt
|
50_000 # https://console.anthropic.com/docs/prompt-design#what-is-a-prompt
|
||||||
end
|
end
|
||||||
|
|
||||||
def title_prompt(post)
|
def title_prompt(post)
|
||||||
|
@ -20,14 +20,7 @@ module DiscourseAi
|
||||||
end
|
end
|
||||||
|
|
||||||
def get_delta(partial, context)
|
def get_delta(partial, context)
|
||||||
context[:pos] ||= 0
|
partial[:completion]
|
||||||
|
|
||||||
full = partial[:completion]
|
|
||||||
delta = full[context[:pos]..-1]
|
|
||||||
|
|
||||||
context[:pos] = full.length
|
|
||||||
|
|
||||||
delta
|
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
@ -45,7 +38,7 @@ module DiscourseAi
|
||||||
end
|
end
|
||||||
|
|
||||||
def model_for
|
def model_for
|
||||||
"claude-v1.3"
|
"claude-2"
|
||||||
end
|
end
|
||||||
|
|
||||||
def get_updated_title(prompt)
|
def get_updated_title(prompt)
|
||||||
|
|
|
@ -5,12 +5,8 @@ module DiscourseAi
|
||||||
class EntryPoint
|
class EntryPoint
|
||||||
GPT4_ID = -110
|
GPT4_ID = -110
|
||||||
GPT3_5_TURBO_ID = -111
|
GPT3_5_TURBO_ID = -111
|
||||||
CLAUDE_V1_ID = -112
|
CLAUDE_V2_ID = -112
|
||||||
BOTS = [
|
BOTS = [[GPT4_ID, "gpt4_bot"], [GPT3_5_TURBO_ID, "gpt3.5_bot"], [CLAUDE_V2_ID, "claude_bot"]]
|
||||||
[GPT4_ID, "gpt4_bot"],
|
|
||||||
[GPT3_5_TURBO_ID, "gpt3.5_bot"],
|
|
||||||
[CLAUDE_V1_ID, "claude_v1_bot"],
|
|
||||||
]
|
|
||||||
|
|
||||||
def self.map_bot_model_to_user_id(model_name)
|
def self.map_bot_model_to_user_id(model_name)
|
||||||
case model_name
|
case model_name
|
||||||
|
@ -18,8 +14,8 @@ module DiscourseAi
|
||||||
GPT3_5_TURBO_ID
|
GPT3_5_TURBO_ID
|
||||||
in "gpt-4"
|
in "gpt-4"
|
||||||
GPT4_ID
|
GPT4_ID
|
||||||
in "claude-v1"
|
in "claude-2"
|
||||||
CLAUDE_V1_ID
|
CLAUDE_V2_ID
|
||||||
else
|
else
|
||||||
nil
|
nil
|
||||||
end
|
end
|
||||||
|
|
|
@ -20,8 +20,6 @@ module DiscourseAi
|
||||||
Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096),
|
Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096),
|
||||||
Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384),
|
Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384),
|
||||||
Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384),
|
Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384),
|
||||||
Models::Anthropic.new("claude-v1", max_tokens: 9000),
|
|
||||||
Models::Anthropic.new("claude-v1-100k", max_tokens: 100_000),
|
|
||||||
Models::Anthropic.new("claude-2", max_tokens: 100_000),
|
Models::Anthropic.new("claude-2", max_tokens: 100_000),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -16,8 +16,9 @@ module ::DiscourseAi
|
||||||
)
|
)
|
||||||
url = URI("https://api.anthropic.com/v1/complete")
|
url = URI("https://api.anthropic.com/v1/complete")
|
||||||
headers = {
|
headers = {
|
||||||
|
"anthropic-version" => "2023-06-01",
|
||||||
"x-api-key" => SiteSetting.ai_anthropic_api_key,
|
"x-api-key" => SiteSetting.ai_anthropic_api_key,
|
||||||
"Content-Type" => "application/json",
|
"content-type" => "application/json",
|
||||||
}
|
}
|
||||||
|
|
||||||
payload = { model: model, prompt: prompt }
|
payload = { model: model, prompt: prompt }
|
||||||
|
@ -85,17 +86,18 @@ module ::DiscourseAi
|
||||||
.split("\n")
|
.split("\n")
|
||||||
.each do |line|
|
.each do |line|
|
||||||
data = line.split("data: ", 2)[1]
|
data = line.split("data: ", 2)[1]
|
||||||
next if !data || data.squish == "[DONE]"
|
next if !data
|
||||||
|
|
||||||
if !cancelled
|
if !cancelled
|
||||||
begin
|
begin
|
||||||
# partial contains the entire payload till now
|
|
||||||
partial = JSON.parse(data, symbolize_names: true)
|
partial = JSON.parse(data, symbolize_names: true)
|
||||||
response_data = partial[:completion].to_s
|
response_data << partial[:completion].to_s
|
||||||
|
|
||||||
yield partial, cancel
|
# ping has no data... do not yeild it
|
||||||
|
yield partial, cancel if partial[:completion]
|
||||||
rescue JSON::ParserError
|
rescue JSON::ParserError
|
||||||
nil
|
nil
|
||||||
|
# TODO leftover chunk carry over to next
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -16,7 +16,7 @@ RSpec.describe DiscourseAi::AiBot::AnthropicBot do
|
||||||
reply << subject.get_delta({ completion: "Hello " }, context)
|
reply << subject.get_delta({ completion: "Hello " }, context)
|
||||||
expect(reply).to eq("Hello ")
|
expect(reply).to eq("Hello ")
|
||||||
|
|
||||||
reply << subject.get_delta({ completion: "Hello world" }, context)
|
reply << subject.get_delta({ completion: "world" }, context)
|
||||||
expect(reply).to eq("Hello world")
|
expect(reply).to eq("Hello world")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -57,7 +57,7 @@ RSpec.describe DiscourseAi::AiBot::EntryPoint do
|
||||||
end
|
end
|
||||||
|
|
||||||
it "includes the bot's user_id" do
|
it "includes the bot's user_id" do
|
||||||
claude_bot = User.find(described_class::CLAUDE_V1_ID)
|
claude_bot = User.find(described_class::CLAUDE_V2_ID)
|
||||||
claude_post_attrs = post_args.merge(target_usernames: [claude_bot.username].join(","))
|
claude_post_attrs = post_args.merge(target_usernames: [claude_bot.username].join(","))
|
||||||
|
|
||||||
expect { PostCreator.create!(admin, claude_post_attrs) }.to change(
|
expect { PostCreator.create!(admin, claude_post_attrs) }.to change(
|
||||||
|
|
|
@ -78,12 +78,12 @@ RSpec.describe Jobs::CreateAiReply do
|
||||||
let(:deltas) { claude_response.split(" ").map { |w| "#{w} " } }
|
let(:deltas) { claude_response.split(" ").map { |w| "#{w} " } }
|
||||||
|
|
||||||
before do
|
before do
|
||||||
bot_user = User.find(DiscourseAi::AiBot::EntryPoint::CLAUDE_V1_ID)
|
bot_user = User.find(DiscourseAi::AiBot::EntryPoint::CLAUDE_V2_ID)
|
||||||
|
|
||||||
AnthropicCompletionStubs.stub_streamed_response(
|
AnthropicCompletionStubs.stub_streamed_response(
|
||||||
DiscourseAi::AiBot::AnthropicBot.new(bot_user).bot_prompt_with_topic_context(post),
|
DiscourseAi::AiBot::AnthropicBot.new(bot_user).bot_prompt_with_topic_context(post),
|
||||||
deltas,
|
deltas,
|
||||||
model: "claude-v1.3",
|
model: "claude-2",
|
||||||
req_opts: {
|
req_opts: {
|
||||||
max_tokens_to_sample: 3000,
|
max_tokens_to_sample: 3000,
|
||||||
temperature: 0.4,
|
temperature: 0.4,
|
||||||
|
@ -95,7 +95,7 @@ RSpec.describe Jobs::CreateAiReply do
|
||||||
it "adds a reply from the Claude bot" do
|
it "adds a reply from the Claude bot" do
|
||||||
subject.execute(
|
subject.execute(
|
||||||
post_id: topic.first_post.id,
|
post_id: topic.first_post.id,
|
||||||
bot_user_id: DiscourseAi::AiBot::EntryPoint::CLAUDE_V1_ID,
|
bot_user_id: DiscourseAi::AiBot::EntryPoint::CLAUDE_V2_ID,
|
||||||
)
|
)
|
||||||
|
|
||||||
expect(topic.posts.last.raw).to eq(expected_response)
|
expect(topic.posts.last.raw).to eq(expected_response)
|
||||||
|
|
|
@ -5,7 +5,7 @@ require_relative "../../../../support/anthropic_completion_stubs"
|
||||||
RSpec.describe DiscourseAi::Summarization::Models::Anthropic do
|
RSpec.describe DiscourseAi::Summarization::Models::Anthropic do
|
||||||
subject(:model) { described_class.new(model_name, max_tokens: max_tokens) }
|
subject(:model) { described_class.new(model_name, max_tokens: max_tokens) }
|
||||||
|
|
||||||
let(:model_name) { "claude-v1" }
|
let(:model_name) { "claude-2" }
|
||||||
let(:max_tokens) { 720 }
|
let(:max_tokens) { 720 }
|
||||||
|
|
||||||
let(:content) do
|
let(:content) do
|
||||||
|
|
|
@ -16,7 +16,7 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
|
||||||
completions =
|
completions =
|
||||||
DiscourseAi::Inference::AnthropicCompletions.perform!(
|
DiscourseAi::Inference::AnthropicCompletions.perform!(
|
||||||
prompt,
|
prompt,
|
||||||
"claude-v1",
|
"claude-2",
|
||||||
temperature: req_opts[:temperature],
|
temperature: req_opts[:temperature],
|
||||||
max_tokens: req_opts[:max_tokens_to_sample],
|
max_tokens: req_opts[:max_tokens_to_sample],
|
||||||
user_id: user_id,
|
user_id: user_id,
|
||||||
|
@ -27,7 +27,7 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
|
||||||
expect(AiApiAuditLog.count).to eq(1)
|
expect(AiApiAuditLog.count).to eq(1)
|
||||||
log = AiApiAuditLog.first
|
log = AiApiAuditLog.first
|
||||||
|
|
||||||
request_body = { model: "claude-v1", prompt: prompt }.merge(req_opts).to_json
|
request_body = { model: "claude-2", prompt: prompt }.merge(req_opts).to_json
|
||||||
response_body = AnthropicCompletionStubs.response(response_text).to_json
|
response_body = AnthropicCompletionStubs.response(response_text).to_json
|
||||||
|
|
||||||
expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
|
expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
|
||||||
|
@ -47,11 +47,11 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
|
||||||
|
|
||||||
DiscourseAi::Inference::AnthropicCompletions.perform!(
|
DiscourseAi::Inference::AnthropicCompletions.perform!(
|
||||||
prompt,
|
prompt,
|
||||||
"claude-v1",
|
"claude-2",
|
||||||
max_tokens: req_opts[:max_tokens_to_sample],
|
max_tokens: req_opts[:max_tokens_to_sample],
|
||||||
) do |partial, cancel|
|
) do |partial, cancel|
|
||||||
data = partial[:completion]
|
data = partial[:completion]
|
||||||
content = data if data
|
content << data if data
|
||||||
cancel.call if content.split(" ").length == 2
|
cancel.call if content.split(" ").length == 2
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -60,7 +60,7 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
|
||||||
expect(AiApiAuditLog.count).to eq(1)
|
expect(AiApiAuditLog.count).to eq(1)
|
||||||
log = AiApiAuditLog.first
|
log = AiApiAuditLog.first
|
||||||
|
|
||||||
request_body = { model: "claude-v1", prompt: prompt }.merge(req_opts).to_json
|
request_body = { model: "claude-2", prompt: prompt }.merge(req_opts).to_json
|
||||||
|
|
||||||
expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
|
expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
|
||||||
expect(log.request_tokens).to eq(6)
|
expect(log.request_tokens).to eq(6)
|
||||||
|
|
|
@ -9,7 +9,7 @@ class AnthropicCompletionStubs
|
||||||
stop_reason: "stop_sequence",
|
stop_reason: "stop_sequence",
|
||||||
truncated: false,
|
truncated: false,
|
||||||
log_id: "12dcc7feafbee4a394e0de9dffde3ac5",
|
log_id: "12dcc7feafbee4a394e0de9dffde3ac5",
|
||||||
model: "claude-v1",
|
model: "claude-2",
|
||||||
exception: nil,
|
exception: nil,
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
@ -18,7 +18,7 @@ class AnthropicCompletionStubs
|
||||||
WebMock
|
WebMock
|
||||||
.stub_request(:post, "https://api.anthropic.com/v1/complete")
|
.stub_request(:post, "https://api.anthropic.com/v1/complete")
|
||||||
.with(
|
.with(
|
||||||
body: { model: "claude-v1", prompt: prompt, max_tokens_to_sample: 2000 }.merge(
|
body: { model: "claude-2", prompt: prompt, max_tokens_to_sample: 2000 }.merge(
|
||||||
req_opts,
|
req_opts,
|
||||||
).to_json,
|
).to_json,
|
||||||
)
|
)
|
||||||
|
@ -32,7 +32,7 @@ class AnthropicCompletionStubs
|
||||||
stop_reason: finish_reason,
|
stop_reason: finish_reason,
|
||||||
truncated: false,
|
truncated: false,
|
||||||
log_id: "12b029451c6d18094d868bc04ce83f63",
|
log_id: "12b029451c6d18094d868bc04ce83f63",
|
||||||
model: "claude-v1",
|
model: "claude-2",
|
||||||
exception: nil,
|
exception: nil,
|
||||||
}.to_json
|
}.to_json
|
||||||
end
|
end
|
||||||
|
@ -41,18 +41,17 @@ class AnthropicCompletionStubs
|
||||||
chunks =
|
chunks =
|
||||||
deltas.each_with_index.map do |_, index|
|
deltas.each_with_index.map do |_, index|
|
||||||
if index == (deltas.length - 1)
|
if index == (deltas.length - 1)
|
||||||
stream_line(deltas.join(""), finish_reason: "stop_sequence")
|
stream_line(deltas[index], finish_reason: "stop_sequence")
|
||||||
else
|
else
|
||||||
stream_line(deltas[0..index].join(""))
|
stream_line(deltas[index])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
chunks << "[DONE]"
|
|
||||||
chunks = chunks.join("\n\n")
|
chunks = chunks.join("\n\n")
|
||||||
|
|
||||||
WebMock
|
WebMock
|
||||||
.stub_request(:post, "https://api.anthropic.com/v1/complete")
|
.stub_request(:post, "https://api.anthropic.com/v1/complete")
|
||||||
.with(body: { model: model || "claude-v1", prompt: prompt }.merge(req_opts).to_json)
|
.with(body: { model: model || "claude-2", prompt: prompt }.merge(req_opts).to_json)
|
||||||
.to_return(status: 200, body: chunks)
|
.to_return(status: 200, body: chunks)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue