From abde82c1f39e720d9d3e7abdcf48a2a59d2361e7 Mon Sep 17 00:00:00 2001 From: Roman Rizzi Date: Tue, 9 Jan 2024 14:10:20 -0300 Subject: [PATCH] FIX: Use claude-2.1 to enable system prompts (#411) --- lib/completions/dialects/claude.rb | 7 +++++-- lib/completions/endpoints/anthropic.rb | 2 +- lib/completions/endpoints/aws_bedrock.rb | 5 ++++- lib/summarization/entry_point.rb | 2 +- spec/lib/completions/dialects/claude_spec.rb | 8 ++++---- spec/lib/completions/endpoints/aws_bedrock_spec.rb | 4 ++-- 6 files changed, 17 insertions(+), 11 deletions(-) diff --git a/lib/completions/dialects/claude.rb b/lib/completions/dialects/claude.rb index e47b1498..71997056 100644 --- a/lib/completions/dialects/claude.rb +++ b/lib/completions/dialects/claude.rb @@ -24,7 +24,10 @@ module DiscourseAi claude_prompt << conversation_context if prompt[:conversation_context] - claude_prompt << "#{prompt[:input]}\n" + if uses_system_message? && (prompt[:input] || prompt[:post_insts]) + claude_prompt << "Human: " + end + claude_prompt << "#{prompt[:input]}\n" if prompt[:input] claude_prompt << "#{prompt[:post_insts]}\n" if prompt[:post_insts] @@ -35,7 +38,7 @@ module DiscourseAi end def max_prompt_tokens - 50_000 + 100_000 # Claude-2.1 has a 200k context window. end def conversation_context diff --git a/lib/completions/endpoints/anthropic.rb b/lib/completions/endpoints/anthropic.rb index e4d26a8e..5484365b 100644 --- a/lib/completions/endpoints/anthropic.rb +++ b/lib/completions/endpoints/anthropic.rb @@ -22,7 +22,7 @@ module DiscourseAi def default_options { - model: model, + model: model == "claude-2" ? "claude-2.1" : model, max_tokens_to_sample: 3_000, stop_sequences: ["\n\nHuman:", ""], } diff --git a/lib/completions/endpoints/aws_bedrock.rb b/lib/completions/endpoints/aws_bedrock.rb index 35c388c4..03782eb8 100644 --- a/lib/completions/endpoints/aws_bedrock.rb +++ b/lib/completions/endpoints/aws_bedrock.rb @@ -37,9 +37,12 @@ module DiscourseAi def model_uri # Bedrock uses slightly different names + # See: https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids-arns.html bedrock_model_id = model.split("-") bedrock_model_id[-1] = "v#{bedrock_model_id.last}" - bedrock_model_id = bedrock_model_id.join("-") + bedrock_model_id = +(bedrock_model_id.join("-")) + + bedrock_model_id << ":1" if model == "claude-2" # For claude-2.1 api_url = "https://bedrock-runtime.#{SiteSetting.ai_bedrock_region}.amazonaws.com/model/anthropic.#{bedrock_model_id}/invoke" diff --git a/lib/summarization/entry_point.rb b/lib/summarization/entry_point.rb index 87d2a5b1..3f4176f2 100644 --- a/lib/summarization/entry_point.rb +++ b/lib/summarization/entry_point.rb @@ -10,7 +10,7 @@ module DiscourseAi Models::OpenAi.new("gpt-4-1106-preview", max_tokens: 100_000), Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096), Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384), - Models::Anthropic.new("claude-2", max_tokens: 100_000), + Models::Anthropic.new("claude-2", max_tokens: 200_000), Models::Anthropic.new("claude-instant-1", max_tokens: 100_000), Models::Llama2.new("Llama2-chat-hf", max_tokens: SiteSetting.ai_hugging_face_token_limit), Models::Llama2FineTunedOrcaStyle.new( diff --git a/spec/lib/completions/dialects/claude_spec.rb b/spec/lib/completions/dialects/claude_spec.rb index 453db8f3..2d7a8162 100644 --- a/spec/lib/completions/dialects/claude_spec.rb +++ b/spec/lib/completions/dialects/claude_spec.rb @@ -49,7 +49,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::Claude do it "translates a prompt written in our generic format to Claude's format" do anthropic_version = <<~TEXT #{prompt[:insts]} - #{prompt[:input]} + Human: #{prompt[:input]} #{prompt[:post_insts]} @@ -74,7 +74,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::Claude do H: #{prompt[:examples][0][0]} A: #{prompt[:examples][0][1]} - #{prompt[:input]} + Human: #{prompt[:input]} #{prompt[:post_insts]} @@ -94,7 +94,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::Claude do #{DiscourseAi::Completions::Dialects::Claude.tool_preamble} #{dialect.tools} - #{prompt[:input]} + Human: #{prompt[:input]} #{prompt[:post_insts]} @@ -139,7 +139,7 @@ RSpec.describe DiscourseAi::Completions::Dialects::Claude do end it "trims content if it's getting too long" do - context.last[:content] = context.last[:content] * 10_000 + context.last[:content] = context.last[:content] * 20_000 prompt[:conversation_context] = context translated_context = dialect.conversation_context diff --git a/spec/lib/completions/endpoints/aws_bedrock_spec.rb b/spec/lib/completions/endpoints/aws_bedrock_spec.rb index 65999393..123ab4ae 100644 --- a/spec/lib/completions/endpoints/aws_bedrock_spec.rb +++ b/spec/lib/completions/endpoints/aws_bedrock_spec.rb @@ -8,7 +8,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::AwsBedrock do subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::AnthropicTokenizer) } let(:model_name) { "claude-2" } - let(:bedrock_name) { "claude-v2" } + let(:bedrock_name) { "claude-v2:1" } let(:generic_prompt) { { insts: "write 3 words" } } let(:dialect) { DiscourseAi::Completions::Dialects::Claude.new(generic_prompt, model_name) } let(:prompt) { dialect.translate } @@ -62,7 +62,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::AwsBedrock do stop_reason: finish_reason, truncated: false, log_id: "12b029451c6d18094d868bc04ce83f63", - model: "claude-2", + model: "claude-2.1", exception: nil, }.to_json, ),