From 50be66ee63145c9a25ecb998e51afdf5e2df3af1 Mon Sep 17 00:00:00 2001 From: Sam Date: Wed, 17 Apr 2024 15:37:19 +1000 Subject: [PATCH] FEATURE: Gemini 1.5 pro support and Claude Opus bedrock support (#580) - Updated AI Bot to only support Gemini 1.5 (used to support 1.0) - 1.0 was removed cause it is not appropriate for Bot usage - Summaries and automation can now lean on Gemini 1.5 pro - Amazon added support for Claude 3 Opus, added internal support for it on bedrock --- config/locales/client.en.yml | 4 +++- config/settings.yml | 2 +- lib/ai_bot/bot.rb | 7 +++++-- lib/ai_bot/entry_point.rb | 4 ++-- lib/automation.rb | 3 ++- lib/completions/dialects/gemini.rb | 9 +++++++-- lib/completions/endpoints/aws_bedrock.rb | 6 +++++- lib/completions/endpoints/gemini.rb | 5 +++-- lib/completions/llm.rb | 10 ++++++++-- lib/summarization/entry_point.rb | 7 +++++-- 10 files changed, 41 insertions(+), 16 deletions(-) diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml index ed7c2bee..c9b94792 100644 --- a/config/locales/client.en.yml +++ b/config/locales/client.en.yml @@ -12,6 +12,7 @@ en: gpt_3_5_turbo: GPT 3.5 Turbo claude_2: Claude 2 gemini_pro: Gemini Pro + gemini_1_5_pro: Gemini 1.5 Pro claude_3_opus: Claude 3 Opus claude_3_sonnet: Claude 3 Sonnet claude_3_haiku: Claude 3 Haiku @@ -296,7 +297,8 @@ en: gpt-3: 5-turbo: "GPT-3.5" claude-2: "Claude 2" - gemini-pro: "Gemini" + gemini-1: + 5-pro: "Gemini" mixtral-8x7B-Instruct-V0: "1": "Mixtral-8x7B V0.1" sentiments: diff --git a/config/settings.yml b/config/settings.yml index fc3a024f..0f86f47e 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -339,7 +339,7 @@ discourse_ai: - gpt-4 - gpt-4-turbo - claude-2 - - gemini-pro + - gemini-1.5-pro - mixtral-8x7B-Instruct-V0.1 - claude-3-opus - claude-3-sonnet diff --git a/lib/ai_bot/bot.rb b/lib/ai_bot/bot.rb index 11dd55f9..b9efb83d 100644 --- a/lib/ai_bot/bot.rb +++ b/lib/ai_bot/bot.rb @@ -177,8 +177,11 @@ module DiscourseAi when DiscourseAi::AiBot::EntryPoint::FAKE_ID "fake:fake" when DiscourseAi::AiBot::EntryPoint::CLAUDE_3_OPUS_ID - # no bedrock support yet 18-03 - "anthropic:claude-3-opus" + if DiscourseAi::Completions::Endpoints::AwsBedrock.correctly_configured?("claude-3-opus") + "aws_bedrock:claude-3-opus" + else + "anthropic:claude-3-opus" + end when DiscourseAi::AiBot::EntryPoint::COHERE_COMMAND_R_PLUS "cohere:command-r-plus" when DiscourseAi::AiBot::EntryPoint::CLAUDE_3_SONNET_ID diff --git a/lib/ai_bot/entry_point.rb b/lib/ai_bot/entry_point.rb index 6774a6f3..0a76a41e 100644 --- a/lib/ai_bot/entry_point.rb +++ b/lib/ai_bot/entry_point.rb @@ -25,7 +25,7 @@ module DiscourseAi [CLAUDE_V2_ID, "claude_bot", "claude-2"], [GPT4_TURBO_ID, "gpt4t_bot", "gpt-4-turbo"], [MIXTRAL_ID, "mixtral_bot", "mixtral-8x7B-Instruct-V0.1"], - [GEMINI_ID, "gemini_bot", "gemini-pro"], + [GEMINI_ID, "gemini_bot", "gemini-1.5-pro"], [FAKE_ID, "fake_bot", "fake"], [CLAUDE_3_OPUS_ID, "claude_3_opus_bot", "claude-3-opus"], [CLAUDE_3_SONNET_ID, "claude_3_sonnet_bot", "claude-3-sonnet"], @@ -59,7 +59,7 @@ module DiscourseAi CLAUDE_V2_ID in "mixtral-8x7B-Instruct-V0.1" MIXTRAL_ID - in "gemini-pro" + in "gemini-1.5-pro" GEMINI_ID in "fake" FAKE_ID diff --git a/lib/automation.rb b/lib/automation.rb index eb561e0d..d1604fa6 100644 --- a/lib/automation.rb +++ b/lib/automation.rb @@ -7,6 +7,7 @@ module DiscourseAi { id: "gpt-4", name: "discourse_automation.ai_models.gpt_4" }, { id: "gpt-3.5-turbo", name: "discourse_automation.ai_models.gpt_3_5_turbo" }, { id: "gemini-pro", name: "discourse_automation.ai_models.gemini_pro" }, + { id: "gemini-1.5-pro", name: "discourse_automation.ai_models.gemini_1_5_pro" }, { id: "claude-2", name: "discourse_automation.ai_models.claude_2" }, { id: "claude-3-sonnet", name: "discourse_automation.ai_models.claude_3_sonnet" }, { id: "claude-3-opus", name: "discourse_automation.ai_models.claude_3_opus" }, @@ -24,7 +25,7 @@ module DiscourseAi ] def self.translate_model(model) - return "google:gemini-pro" if model == "gemini-pro" + return "google:#{model}" if model.start_with? "gemini" return "open_ai:#{model}" if model.start_with? "gpt" return "cohere:#{model}" if model.start_with? "command" diff --git a/lib/completions/dialects/gemini.rb b/lib/completions/dialects/gemini.rb index c052a5d2..a425d4f5 100644 --- a/lib/completions/dialects/gemini.rb +++ b/lib/completions/dialects/gemini.rb @@ -6,7 +6,7 @@ module DiscourseAi class Gemini < Dialect class << self def can_translate?(model_name) - %w[gemini-pro].include?(model_name) + %w[gemini-pro gemini-1.5-pro].include?(model_name) end def tokenizer @@ -97,7 +97,12 @@ module DiscourseAi end def max_prompt_tokens - 16_384 # 50% of model tokens + if model_name == "gemini-1.5-pro" + # technically we support 1 million tokens, but we're being conservative + 800_000 + else + 16_384 # 50% of model tokens + end end protected diff --git a/lib/completions/endpoints/aws_bedrock.rb b/lib/completions/endpoints/aws_bedrock.rb index d62cfe41..5f17cad1 100644 --- a/lib/completions/endpoints/aws_bedrock.rb +++ b/lib/completions/endpoints/aws_bedrock.rb @@ -9,7 +9,9 @@ module DiscourseAi class << self def can_contact?(endpoint_name, model_name) endpoint_name == "aws_bedrock" && - %w[claude-instant-1 claude-2 claude-3-haiku claude-3-sonnet].include?(model_name) + %w[claude-instant-1 claude-2 claude-3-haiku claude-3-sonnet claude-3-opus].include?( + model_name, + ) end def dependant_setting_names @@ -67,6 +69,8 @@ module DiscourseAi "anthropic.claude-3-sonnet-20240229-v1:0" when "claude-instant-1" "anthropic.claude-instant-v1" + when "claude-3-opus" + "anthropic.claude-3-opus-20240229-v1:0" end api_url = diff --git a/lib/completions/endpoints/gemini.rb b/lib/completions/endpoints/gemini.rb index 86e189a9..dae4b483 100644 --- a/lib/completions/endpoints/gemini.rb +++ b/lib/completions/endpoints/gemini.rb @@ -7,7 +7,7 @@ module DiscourseAi class << self def can_contact?(endpoint_name, model_name) return false unless endpoint_name == "google" - %w[gemini-pro].include?(model_name) + %w[gemini-pro gemini-1.5-pro].include?(model_name) end def dependant_setting_names @@ -52,8 +52,9 @@ module DiscourseAi private def model_uri + mapped_model = model == "gemini-1.5-pro" ? "gemini-1.5-pro-latest" : model url = - "https://generativelanguage.googleapis.com/v1beta/models/#{model}:#{@streaming_mode ? "streamGenerateContent" : "generateContent"}?key=#{SiteSetting.ai_gemini_api_key}" + "https://generativelanguage.googleapis.com/v1beta/models/#{mapped_model}:#{@streaming_mode ? "streamGenerateContent" : "generateContent"}?key=#{SiteSetting.ai_gemini_api_key}" URI(url) end diff --git a/lib/completions/llm.rb b/lib/completions/llm.rb index 3e10109e..8c1d0ef8 100644 --- a/lib/completions/llm.rb +++ b/lib/completions/llm.rb @@ -23,7 +23,13 @@ module DiscourseAi # However, since they use the same URL/key settings, there's no reason to duplicate them. @models_by_provider ||= { - aws_bedrock: %w[claude-instant-1 claude-2 claude-3-haiku claude-3-sonnet], + aws_bedrock: %w[ + claude-instant-1 + claude-2 + claude-3-haiku + claude-3-sonnet + claude-3-opus + ], anthropic: %w[claude-instant-1 claude-2 claude-3-haiku claude-3-sonnet claude-3-opus], vllm: %w[ mistralai/Mixtral-8x7B-Instruct-v0.1 @@ -50,7 +56,7 @@ module DiscourseAi gpt-4-turbo gpt-4-vision-preview ], - google: %w[gemini-pro], + google: %w[gemini-pro gemini-1.5-pro], }.tap { |h| h[:fake] = ["fake"] if Rails.env.test? || Rails.env.development? } end diff --git a/lib/summarization/entry_point.rb b/lib/summarization/entry_point.rb index 480d6bdb..bb875df0 100644 --- a/lib/summarization/entry_point.rb +++ b/lib/summarization/entry_point.rb @@ -19,6 +19,7 @@ module DiscourseAi max_tokens: SiteSetting.ai_hugging_face_token_limit, ), Models::Gemini.new("google:gemini-pro", max_tokens: 32_768), + Models::Gemini.new("google:gemini-1.5-pro", max_tokens: 800_000), ] claude_prov = "anthropic" @@ -40,8 +41,10 @@ module DiscourseAi max_tokens: 200_000, ) - # no opus yet for AWS bedrock - foldable_models << Models::Anthropic.new("anthropic:claude-3-opus", max_tokens: 200_000) + foldable_models << Models::Anthropic.new( + "#{claude_prov}:claude-3-opus", + max_tokens: 200_000, + ) mixtral_prov = "hugging_face" if DiscourseAi::Completions::Endpoints::Vllm.correctly_configured?(