FEATURE: port to use claude-2 for chat bot (#114)
Claude 1 costs the same and is less good than Claude 2. Make use of Claude 2 in all spots ... This also fixes streaming so it uses the far more efficient streaming protocol.
This commit is contained in:
parent
2031388f9c
commit
4b0c077ce5
|
@ -13,7 +13,7 @@ en:
|
|||
reviewables:
|
||||
model_used: "Model used:"
|
||||
accuracy: "Accuracy:"
|
||||
|
||||
|
||||
embeddings:
|
||||
semantic_search: "Topics (Semantic)"
|
||||
|
||||
|
@ -26,7 +26,7 @@ en:
|
|||
gpt-4: "GPT-4"
|
||||
gpt-3:
|
||||
5-turbo: "GPT-3.5"
|
||||
claude-v1: "Claude V1"
|
||||
claude-2: "Claude 2"
|
||||
|
||||
|
||||
review:
|
||||
|
|
|
@ -181,7 +181,7 @@ plugins:
|
|||
choices:
|
||||
- gpt-3.5-turbo
|
||||
- gpt-4
|
||||
- claude-v1
|
||||
- claude-2
|
||||
ai_bot_enabled_chat_commands:
|
||||
type: list
|
||||
default: "categories|google|image|search|tags|time"
|
||||
|
|
|
@ -4,7 +4,7 @@ module DiscourseAi
|
|||
module AiBot
|
||||
class AnthropicBot < Bot
|
||||
def self.can_reply_as?(bot_user)
|
||||
bot_user.id == DiscourseAi::AiBot::EntryPoint::CLAUDE_V1_ID
|
||||
bot_user.id == DiscourseAi::AiBot::EntryPoint::CLAUDE_V2_ID
|
||||
end
|
||||
|
||||
def bot_prompt_with_topic_context(post)
|
||||
|
@ -12,7 +12,7 @@ module DiscourseAi
|
|||
end
|
||||
|
||||
def prompt_limit
|
||||
7500 # https://console.anthropic.com/docs/prompt-design#what-is-a-prompt
|
||||
50_000 # https://console.anthropic.com/docs/prompt-design#what-is-a-prompt
|
||||
end
|
||||
|
||||
def title_prompt(post)
|
||||
|
@ -20,14 +20,7 @@ module DiscourseAi
|
|||
end
|
||||
|
||||
def get_delta(partial, context)
|
||||
context[:pos] ||= 0
|
||||
|
||||
full = partial[:completion]
|
||||
delta = full[context[:pos]..-1]
|
||||
|
||||
context[:pos] = full.length
|
||||
|
||||
delta
|
||||
partial[:completion]
|
||||
end
|
||||
|
||||
private
|
||||
|
@ -45,7 +38,7 @@ module DiscourseAi
|
|||
end
|
||||
|
||||
def model_for
|
||||
"claude-v1.3"
|
||||
"claude-2"
|
||||
end
|
||||
|
||||
def get_updated_title(prompt)
|
||||
|
|
|
@ -5,12 +5,8 @@ module DiscourseAi
|
|||
class EntryPoint
|
||||
GPT4_ID = -110
|
||||
GPT3_5_TURBO_ID = -111
|
||||
CLAUDE_V1_ID = -112
|
||||
BOTS = [
|
||||
[GPT4_ID, "gpt4_bot"],
|
||||
[GPT3_5_TURBO_ID, "gpt3.5_bot"],
|
||||
[CLAUDE_V1_ID, "claude_v1_bot"],
|
||||
]
|
||||
CLAUDE_V2_ID = -112
|
||||
BOTS = [[GPT4_ID, "gpt4_bot"], [GPT3_5_TURBO_ID, "gpt3.5_bot"], [CLAUDE_V2_ID, "claude_bot"]]
|
||||
|
||||
def self.map_bot_model_to_user_id(model_name)
|
||||
case model_name
|
||||
|
@ -18,8 +14,8 @@ module DiscourseAi
|
|||
GPT3_5_TURBO_ID
|
||||
in "gpt-4"
|
||||
GPT4_ID
|
||||
in "claude-v1"
|
||||
CLAUDE_V1_ID
|
||||
in "claude-2"
|
||||
CLAUDE_V2_ID
|
||||
else
|
||||
nil
|
||||
end
|
||||
|
|
|
@ -20,8 +20,6 @@ module DiscourseAi
|
|||
Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096),
|
||||
Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384),
|
||||
Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384),
|
||||
Models::Anthropic.new("claude-v1", max_tokens: 9000),
|
||||
Models::Anthropic.new("claude-v1-100k", max_tokens: 100_000),
|
||||
Models::Anthropic.new("claude-2", max_tokens: 100_000),
|
||||
]
|
||||
|
||||
|
|
|
@ -16,8 +16,9 @@ module ::DiscourseAi
|
|||
)
|
||||
url = URI("https://api.anthropic.com/v1/complete")
|
||||
headers = {
|
||||
"anthropic-version" => "2023-06-01",
|
||||
"x-api-key" => SiteSetting.ai_anthropic_api_key,
|
||||
"Content-Type" => "application/json",
|
||||
"content-type" => "application/json",
|
||||
}
|
||||
|
||||
payload = { model: model, prompt: prompt }
|
||||
|
@ -85,17 +86,18 @@ module ::DiscourseAi
|
|||
.split("\n")
|
||||
.each do |line|
|
||||
data = line.split("data: ", 2)[1]
|
||||
next if !data || data.squish == "[DONE]"
|
||||
next if !data
|
||||
|
||||
if !cancelled
|
||||
begin
|
||||
# partial contains the entire payload till now
|
||||
partial = JSON.parse(data, symbolize_names: true)
|
||||
response_data = partial[:completion].to_s
|
||||
response_data << partial[:completion].to_s
|
||||
|
||||
yield partial, cancel
|
||||
# ping has no data... do not yeild it
|
||||
yield partial, cancel if partial[:completion]
|
||||
rescue JSON::ParserError
|
||||
nil
|
||||
# TODO leftover chunk carry over to next
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -16,7 +16,7 @@ RSpec.describe DiscourseAi::AiBot::AnthropicBot do
|
|||
reply << subject.get_delta({ completion: "Hello " }, context)
|
||||
expect(reply).to eq("Hello ")
|
||||
|
||||
reply << subject.get_delta({ completion: "Hello world" }, context)
|
||||
reply << subject.get_delta({ completion: "world" }, context)
|
||||
expect(reply).to eq("Hello world")
|
||||
end
|
||||
end
|
||||
|
|
|
@ -57,7 +57,7 @@ RSpec.describe DiscourseAi::AiBot::EntryPoint do
|
|||
end
|
||||
|
||||
it "includes the bot's user_id" do
|
||||
claude_bot = User.find(described_class::CLAUDE_V1_ID)
|
||||
claude_bot = User.find(described_class::CLAUDE_V2_ID)
|
||||
claude_post_attrs = post_args.merge(target_usernames: [claude_bot.username].join(","))
|
||||
|
||||
expect { PostCreator.create!(admin, claude_post_attrs) }.to change(
|
||||
|
|
|
@ -78,12 +78,12 @@ RSpec.describe Jobs::CreateAiReply do
|
|||
let(:deltas) { claude_response.split(" ").map { |w| "#{w} " } }
|
||||
|
||||
before do
|
||||
bot_user = User.find(DiscourseAi::AiBot::EntryPoint::CLAUDE_V1_ID)
|
||||
bot_user = User.find(DiscourseAi::AiBot::EntryPoint::CLAUDE_V2_ID)
|
||||
|
||||
AnthropicCompletionStubs.stub_streamed_response(
|
||||
DiscourseAi::AiBot::AnthropicBot.new(bot_user).bot_prompt_with_topic_context(post),
|
||||
deltas,
|
||||
model: "claude-v1.3",
|
||||
model: "claude-2",
|
||||
req_opts: {
|
||||
max_tokens_to_sample: 3000,
|
||||
temperature: 0.4,
|
||||
|
@ -95,7 +95,7 @@ RSpec.describe Jobs::CreateAiReply do
|
|||
it "adds a reply from the Claude bot" do
|
||||
subject.execute(
|
||||
post_id: topic.first_post.id,
|
||||
bot_user_id: DiscourseAi::AiBot::EntryPoint::CLAUDE_V1_ID,
|
||||
bot_user_id: DiscourseAi::AiBot::EntryPoint::CLAUDE_V2_ID,
|
||||
)
|
||||
|
||||
expect(topic.posts.last.raw).to eq(expected_response)
|
||||
|
|
|
@ -5,7 +5,7 @@ require_relative "../../../../support/anthropic_completion_stubs"
|
|||
RSpec.describe DiscourseAi::Summarization::Models::Anthropic do
|
||||
subject(:model) { described_class.new(model_name, max_tokens: max_tokens) }
|
||||
|
||||
let(:model_name) { "claude-v1" }
|
||||
let(:model_name) { "claude-2" }
|
||||
let(:max_tokens) { 720 }
|
||||
|
||||
let(:content) do
|
||||
|
|
|
@ -16,7 +16,7 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
|
|||
completions =
|
||||
DiscourseAi::Inference::AnthropicCompletions.perform!(
|
||||
prompt,
|
||||
"claude-v1",
|
||||
"claude-2",
|
||||
temperature: req_opts[:temperature],
|
||||
max_tokens: req_opts[:max_tokens_to_sample],
|
||||
user_id: user_id,
|
||||
|
@ -27,7 +27,7 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
|
|||
expect(AiApiAuditLog.count).to eq(1)
|
||||
log = AiApiAuditLog.first
|
||||
|
||||
request_body = { model: "claude-v1", prompt: prompt }.merge(req_opts).to_json
|
||||
request_body = { model: "claude-2", prompt: prompt }.merge(req_opts).to_json
|
||||
response_body = AnthropicCompletionStubs.response(response_text).to_json
|
||||
|
||||
expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
|
||||
|
@ -47,11 +47,11 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
|
|||
|
||||
DiscourseAi::Inference::AnthropicCompletions.perform!(
|
||||
prompt,
|
||||
"claude-v1",
|
||||
"claude-2",
|
||||
max_tokens: req_opts[:max_tokens_to_sample],
|
||||
) do |partial, cancel|
|
||||
data = partial[:completion]
|
||||
content = data if data
|
||||
content << data if data
|
||||
cancel.call if content.split(" ").length == 2
|
||||
end
|
||||
|
||||
|
@ -60,7 +60,7 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
|
|||
expect(AiApiAuditLog.count).to eq(1)
|
||||
log = AiApiAuditLog.first
|
||||
|
||||
request_body = { model: "claude-v1", prompt: prompt }.merge(req_opts).to_json
|
||||
request_body = { model: "claude-2", prompt: prompt }.merge(req_opts).to_json
|
||||
|
||||
expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
|
||||
expect(log.request_tokens).to eq(6)
|
||||
|
|
|
@ -9,7 +9,7 @@ class AnthropicCompletionStubs
|
|||
stop_reason: "stop_sequence",
|
||||
truncated: false,
|
||||
log_id: "12dcc7feafbee4a394e0de9dffde3ac5",
|
||||
model: "claude-v1",
|
||||
model: "claude-2",
|
||||
exception: nil,
|
||||
}
|
||||
end
|
||||
|
@ -18,7 +18,7 @@ class AnthropicCompletionStubs
|
|||
WebMock
|
||||
.stub_request(:post, "https://api.anthropic.com/v1/complete")
|
||||
.with(
|
||||
body: { model: "claude-v1", prompt: prompt, max_tokens_to_sample: 2000 }.merge(
|
||||
body: { model: "claude-2", prompt: prompt, max_tokens_to_sample: 2000 }.merge(
|
||||
req_opts,
|
||||
).to_json,
|
||||
)
|
||||
|
@ -32,7 +32,7 @@ class AnthropicCompletionStubs
|
|||
stop_reason: finish_reason,
|
||||
truncated: false,
|
||||
log_id: "12b029451c6d18094d868bc04ce83f63",
|
||||
model: "claude-v1",
|
||||
model: "claude-2",
|
||||
exception: nil,
|
||||
}.to_json
|
||||
end
|
||||
|
@ -41,18 +41,17 @@ class AnthropicCompletionStubs
|
|||
chunks =
|
||||
deltas.each_with_index.map do |_, index|
|
||||
if index == (deltas.length - 1)
|
||||
stream_line(deltas.join(""), finish_reason: "stop_sequence")
|
||||
stream_line(deltas[index], finish_reason: "stop_sequence")
|
||||
else
|
||||
stream_line(deltas[0..index].join(""))
|
||||
stream_line(deltas[index])
|
||||
end
|
||||
end
|
||||
|
||||
chunks << "[DONE]"
|
||||
chunks = chunks.join("\n\n")
|
||||
|
||||
WebMock
|
||||
.stub_request(:post, "https://api.anthropic.com/v1/complete")
|
||||
.with(body: { model: model || "claude-v1", prompt: prompt }.merge(req_opts).to_json)
|
||||
.with(body: { model: model || "claude-2", prompt: prompt }.merge(req_opts).to_json)
|
||||
.to_return(status: 200, body: chunks)
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue