FEATURE: Gemini 1.5 pro support and Claude Opus bedrock support (#580)

- Updated AI Bot to only support Gemini 1.5 (used to support 1.0) - 1.0 was removed cause it is not appropriate for Bot usage
- Summaries and automation can now lean on Gemini 1.5 pro
- Amazon added support for Claude 3 Opus, added internal support for it on bedrock
This commit is contained in:
Sam 2024-04-17 15:37:19 +10:00 committed by GitHub
parent a5e4ab2825
commit 50be66ee63
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 41 additions and 16 deletions

View File

@ -12,6 +12,7 @@ en:
gpt_3_5_turbo: GPT 3.5 Turbo
claude_2: Claude 2
gemini_pro: Gemini Pro
gemini_1_5_pro: Gemini 1.5 Pro
claude_3_opus: Claude 3 Opus
claude_3_sonnet: Claude 3 Sonnet
claude_3_haiku: Claude 3 Haiku
@ -296,7 +297,8 @@ en:
gpt-3:
5-turbo: "GPT-3.5"
claude-2: "Claude 2"
gemini-pro: "Gemini"
gemini-1:
5-pro: "Gemini"
mixtral-8x7B-Instruct-V0:
"1": "Mixtral-8x7B V0.1"
sentiments:

View File

@ -339,7 +339,7 @@ discourse_ai:
- gpt-4
- gpt-4-turbo
- claude-2
- gemini-pro
- gemini-1.5-pro
- mixtral-8x7B-Instruct-V0.1
- claude-3-opus
- claude-3-sonnet

View File

@ -177,8 +177,11 @@ module DiscourseAi
when DiscourseAi::AiBot::EntryPoint::FAKE_ID
"fake:fake"
when DiscourseAi::AiBot::EntryPoint::CLAUDE_3_OPUS_ID
# no bedrock support yet 18-03
if DiscourseAi::Completions::Endpoints::AwsBedrock.correctly_configured?("claude-3-opus")
"aws_bedrock:claude-3-opus"
else
"anthropic:claude-3-opus"
end
when DiscourseAi::AiBot::EntryPoint::COHERE_COMMAND_R_PLUS
"cohere:command-r-plus"
when DiscourseAi::AiBot::EntryPoint::CLAUDE_3_SONNET_ID

View File

@ -25,7 +25,7 @@ module DiscourseAi
[CLAUDE_V2_ID, "claude_bot", "claude-2"],
[GPT4_TURBO_ID, "gpt4t_bot", "gpt-4-turbo"],
[MIXTRAL_ID, "mixtral_bot", "mixtral-8x7B-Instruct-V0.1"],
[GEMINI_ID, "gemini_bot", "gemini-pro"],
[GEMINI_ID, "gemini_bot", "gemini-1.5-pro"],
[FAKE_ID, "fake_bot", "fake"],
[CLAUDE_3_OPUS_ID, "claude_3_opus_bot", "claude-3-opus"],
[CLAUDE_3_SONNET_ID, "claude_3_sonnet_bot", "claude-3-sonnet"],
@ -59,7 +59,7 @@ module DiscourseAi
CLAUDE_V2_ID
in "mixtral-8x7B-Instruct-V0.1"
MIXTRAL_ID
in "gemini-pro"
in "gemini-1.5-pro"
GEMINI_ID
in "fake"
FAKE_ID

View File

@ -7,6 +7,7 @@ module DiscourseAi
{ id: "gpt-4", name: "discourse_automation.ai_models.gpt_4" },
{ id: "gpt-3.5-turbo", name: "discourse_automation.ai_models.gpt_3_5_turbo" },
{ id: "gemini-pro", name: "discourse_automation.ai_models.gemini_pro" },
{ id: "gemini-1.5-pro", name: "discourse_automation.ai_models.gemini_1_5_pro" },
{ id: "claude-2", name: "discourse_automation.ai_models.claude_2" },
{ id: "claude-3-sonnet", name: "discourse_automation.ai_models.claude_3_sonnet" },
{ id: "claude-3-opus", name: "discourse_automation.ai_models.claude_3_opus" },
@ -24,7 +25,7 @@ module DiscourseAi
]
def self.translate_model(model)
return "google:gemini-pro" if model == "gemini-pro"
return "google:#{model}" if model.start_with? "gemini"
return "open_ai:#{model}" if model.start_with? "gpt"
return "cohere:#{model}" if model.start_with? "command"

View File

@ -6,7 +6,7 @@ module DiscourseAi
class Gemini < Dialect
class << self
def can_translate?(model_name)
%w[gemini-pro].include?(model_name)
%w[gemini-pro gemini-1.5-pro].include?(model_name)
end
def tokenizer
@ -97,8 +97,13 @@ module DiscourseAi
end
def max_prompt_tokens
if model_name == "gemini-1.5-pro"
# technically we support 1 million tokens, but we're being conservative
800_000
else
16_384 # 50% of model tokens
end
end
protected

View File

@ -9,7 +9,9 @@ module DiscourseAi
class << self
def can_contact?(endpoint_name, model_name)
endpoint_name == "aws_bedrock" &&
%w[claude-instant-1 claude-2 claude-3-haiku claude-3-sonnet].include?(model_name)
%w[claude-instant-1 claude-2 claude-3-haiku claude-3-sonnet claude-3-opus].include?(
model_name,
)
end
def dependant_setting_names
@ -67,6 +69,8 @@ module DiscourseAi
"anthropic.claude-3-sonnet-20240229-v1:0"
when "claude-instant-1"
"anthropic.claude-instant-v1"
when "claude-3-opus"
"anthropic.claude-3-opus-20240229-v1:0"
end
api_url =

View File

@ -7,7 +7,7 @@ module DiscourseAi
class << self
def can_contact?(endpoint_name, model_name)
return false unless endpoint_name == "google"
%w[gemini-pro].include?(model_name)
%w[gemini-pro gemini-1.5-pro].include?(model_name)
end
def dependant_setting_names
@ -52,8 +52,9 @@ module DiscourseAi
private
def model_uri
mapped_model = model == "gemini-1.5-pro" ? "gemini-1.5-pro-latest" : model
url =
"https://generativelanguage.googleapis.com/v1beta/models/#{model}:#{@streaming_mode ? "streamGenerateContent" : "generateContent"}?key=#{SiteSetting.ai_gemini_api_key}"
"https://generativelanguage.googleapis.com/v1beta/models/#{mapped_model}:#{@streaming_mode ? "streamGenerateContent" : "generateContent"}?key=#{SiteSetting.ai_gemini_api_key}"
URI(url)
end

View File

@ -23,7 +23,13 @@ module DiscourseAi
# However, since they use the same URL/key settings, there's no reason to duplicate them.
@models_by_provider ||=
{
aws_bedrock: %w[claude-instant-1 claude-2 claude-3-haiku claude-3-sonnet],
aws_bedrock: %w[
claude-instant-1
claude-2
claude-3-haiku
claude-3-sonnet
claude-3-opus
],
anthropic: %w[claude-instant-1 claude-2 claude-3-haiku claude-3-sonnet claude-3-opus],
vllm: %w[
mistralai/Mixtral-8x7B-Instruct-v0.1
@ -50,7 +56,7 @@ module DiscourseAi
gpt-4-turbo
gpt-4-vision-preview
],
google: %w[gemini-pro],
google: %w[gemini-pro gemini-1.5-pro],
}.tap { |h| h[:fake] = ["fake"] if Rails.env.test? || Rails.env.development? }
end

View File

@ -19,6 +19,7 @@ module DiscourseAi
max_tokens: SiteSetting.ai_hugging_face_token_limit,
),
Models::Gemini.new("google:gemini-pro", max_tokens: 32_768),
Models::Gemini.new("google:gemini-1.5-pro", max_tokens: 800_000),
]
claude_prov = "anthropic"
@ -40,8 +41,10 @@ module DiscourseAi
max_tokens: 200_000,
)
# no opus yet for AWS bedrock
foldable_models << Models::Anthropic.new("anthropic:claude-3-opus", max_tokens: 200_000)
foldable_models << Models::Anthropic.new(
"#{claude_prov}:claude-3-opus",
max_tokens: 200_000,
)
mixtral_prov = "hugging_face"
if DiscourseAi::Completions::Endpoints::Vllm.correctly_configured?(