FEATURE: GPT4o support and better auditing (#618)
- Introduce new support for GPT4o (automation / bot / summary / helper) - Properly account for token counts on OpenAI models - Track feature that was used when generating AI completions - Remove custom llm support for summarization as we need better interfaces to control registration and de-registration
This commit is contained in:
parent
8b00c47087
commit
8eee6893d6
|
@ -22,12 +22,13 @@ end
|
||||||
# id :bigint not null, primary key
|
# id :bigint not null, primary key
|
||||||
# provider_id :integer not null
|
# provider_id :integer not null
|
||||||
# user_id :integer
|
# user_id :integer
|
||||||
# topic_id :integer
|
|
||||||
# post_id :integer
|
|
||||||
# request_tokens :integer
|
# request_tokens :integer
|
||||||
# response_tokens :integer
|
# response_tokens :integer
|
||||||
# raw_request_payload :string
|
# raw_request_payload :string
|
||||||
# raw_response_payload :string
|
# raw_response_payload :string
|
||||||
# created_at :datetime not null
|
# created_at :datetime not null
|
||||||
# updated_at :datetime not null
|
# updated_at :datetime not null
|
||||||
|
# topic_id :integer
|
||||||
|
# post_id :integer
|
||||||
|
# feature_name :string(255)
|
||||||
#
|
#
|
||||||
|
|
|
@ -20,6 +20,7 @@ en:
|
||||||
mistral_7b_instruct_v0_2: Mistral 7B Instruct V0.2
|
mistral_7b_instruct_v0_2: Mistral 7B Instruct V0.2
|
||||||
command_r: Cohere Command R
|
command_r: Cohere Command R
|
||||||
command_r_plus: Cohere Command R+
|
command_r_plus: Cohere Command R+
|
||||||
|
gpt_4o: GPT 4 Omni
|
||||||
scriptables:
|
scriptables:
|
||||||
llm_report:
|
llm_report:
|
||||||
fields:
|
fields:
|
||||||
|
@ -328,6 +329,7 @@ en:
|
||||||
cohere-command-r-plus: "Cohere Command R Plus"
|
cohere-command-r-plus: "Cohere Command R Plus"
|
||||||
gpt-4: "GPT-4"
|
gpt-4: "GPT-4"
|
||||||
gpt-4-turbo: "GPT-4 Turbo"
|
gpt-4-turbo: "GPT-4 Turbo"
|
||||||
|
gpt-4o: "GPT-4 Omni"
|
||||||
gpt-3:
|
gpt-3:
|
||||||
5-turbo: "GPT-3.5"
|
5-turbo: "GPT-3.5"
|
||||||
claude-2: "Claude 2"
|
claude-2: "Claude 2"
|
||||||
|
|
|
@ -215,6 +215,7 @@ pl_PL:
|
||||||
bot_names:
|
bot_names:
|
||||||
gpt-4: "GPT-4"
|
gpt-4: "GPT-4"
|
||||||
gpt-4-turbo: "GPT-4 Turbo"
|
gpt-4-turbo: "GPT-4 Turbo"
|
||||||
|
gpt-4o: "GPT-4 Omni"
|
||||||
gpt-3:
|
gpt-3:
|
||||||
5-turbo: "GPT-3.5"
|
5-turbo: "GPT-3.5"
|
||||||
claude-2: "Claude 2"
|
claude-2: "Claude 2"
|
||||||
|
|
|
@ -43,6 +43,7 @@ en:
|
||||||
ai_openai_gpt35_url: "Custom URL used for GPT 3.5 chat completions. (for Azure support)"
|
ai_openai_gpt35_url: "Custom URL used for GPT 3.5 chat completions. (for Azure support)"
|
||||||
ai_openai_gpt35_16k_url: "Custom URL used for GPT 3.5 16k chat completions. (for Azure support)"
|
ai_openai_gpt35_16k_url: "Custom URL used for GPT 3.5 16k chat completions. (for Azure support)"
|
||||||
ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)"
|
ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)"
|
||||||
|
ai_openai_gpt4o_url: "Custom URL used for GPT 4 Omni chat completions. (for Azure support)"
|
||||||
ai_openai_gpt4_32k_url: "Custom URL used for GPT 4 32k chat completions. (for Azure support)"
|
ai_openai_gpt4_32k_url: "Custom URL used for GPT 4 32k chat completions. (for Azure support)"
|
||||||
ai_openai_gpt4_turbo_url: "Custom URL used for GPT 4 Turbo chat completions. (for Azure support)"
|
ai_openai_gpt4_turbo_url: "Custom URL used for GPT 4 Turbo chat completions. (for Azure support)"
|
||||||
ai_openai_dall_e_3_url: "Custom URL used for DALL-E 3 image generation. (for Azure support)"
|
ai_openai_dall_e_3_url: "Custom URL used for DALL-E 3 image generation. (for Azure support)"
|
||||||
|
|
|
@ -98,6 +98,7 @@ discourse_ai:
|
||||||
|
|
||||||
ai_openai_gpt35_url: "https://api.openai.com/v1/chat/completions"
|
ai_openai_gpt35_url: "https://api.openai.com/v1/chat/completions"
|
||||||
ai_openai_gpt35_16k_url: "https://api.openai.com/v1/chat/completions"
|
ai_openai_gpt35_16k_url: "https://api.openai.com/v1/chat/completions"
|
||||||
|
ai_openai_gpt4o_url: "https://api.openai.com/v1/chat/completions"
|
||||||
ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions"
|
ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions"
|
||||||
ai_openai_gpt4_32k_url: "https://api.openai.com/v1/chat/completions"
|
ai_openai_gpt4_32k_url: "https://api.openai.com/v1/chat/completions"
|
||||||
ai_openai_gpt4_turbo_url: "https://api.openai.com/v1/chat/completions"
|
ai_openai_gpt4_turbo_url: "https://api.openai.com/v1/chat/completions"
|
||||||
|
@ -343,6 +344,7 @@ discourse_ai:
|
||||||
- gpt-3.5-turbo
|
- gpt-3.5-turbo
|
||||||
- gpt-4
|
- gpt-4
|
||||||
- gpt-4-turbo
|
- gpt-4-turbo
|
||||||
|
- gpt-4o
|
||||||
- claude-2
|
- claude-2
|
||||||
- gemini-1.5-pro
|
- gemini-1.5-pro
|
||||||
- mixtral-8x7B-Instruct-V0.1
|
- mixtral-8x7B-Instruct-V0.1
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
class AddFeatureNameToAiApiAuditLog < ActiveRecord::Migration[7.0]
|
||||||
|
def change
|
||||||
|
add_column :ai_api_audit_logs, :feature_name, :string, limit: 255
|
||||||
|
end
|
||||||
|
end
|
|
@ -52,7 +52,7 @@ class ExplicitProviderBackwardsCompat < ActiveRecord::Migration[7.0]
|
||||||
end
|
end
|
||||||
|
|
||||||
def append_provider(value)
|
def append_provider(value)
|
||||||
open_ai_models = %w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k gpt-4-turbo]
|
open_ai_models = %w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k gpt-4-turbo gpt-4o]
|
||||||
return "open_ai:#{value}" if open_ai_models.include?(value)
|
return "open_ai:#{value}" if open_ai_models.include?(value)
|
||||||
return "google:#{value}" if value == "gemini-pro"
|
return "google:#{value}" if value == "gemini-pro"
|
||||||
|
|
||||||
|
|
|
@ -43,7 +43,7 @@ module DiscourseAi
|
||||||
|
|
||||||
DiscourseAi::Completions::Llm
|
DiscourseAi::Completions::Llm
|
||||||
.proxy(model)
|
.proxy(model)
|
||||||
.generate(title_prompt, user: post.user)
|
.generate(title_prompt, user: post.user, feature_name: "bot_title")
|
||||||
.strip
|
.strip
|
||||||
.split("\n")
|
.split("\n")
|
||||||
.last
|
.last
|
||||||
|
@ -67,7 +67,7 @@ module DiscourseAi
|
||||||
tool_found = false
|
tool_found = false
|
||||||
|
|
||||||
result =
|
result =
|
||||||
llm.generate(prompt, **llm_kwargs) do |partial, cancel|
|
llm.generate(prompt, feature_name: "bot", **llm_kwargs) do |partial, cancel|
|
||||||
tools = persona.find_tools(partial, bot_user: user, llm: llm, context: context)
|
tools = persona.find_tools(partial, bot_user: user, llm: llm, context: context)
|
||||||
|
|
||||||
if (tools.present?)
|
if (tools.present?)
|
||||||
|
@ -162,6 +162,8 @@ module DiscourseAi
|
||||||
"open_ai:gpt-4"
|
"open_ai:gpt-4"
|
||||||
when DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID
|
when DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID
|
||||||
"open_ai:gpt-4-turbo"
|
"open_ai:gpt-4-turbo"
|
||||||
|
when DiscourseAi::AiBot::EntryPoint::GPT4O_ID
|
||||||
|
"open_ai:gpt-4o"
|
||||||
when DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID
|
when DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID
|
||||||
"open_ai:gpt-3.5-turbo-16k"
|
"open_ai:gpt-3.5-turbo-16k"
|
||||||
when DiscourseAi::AiBot::EntryPoint::MIXTRAL_ID
|
when DiscourseAi::AiBot::EntryPoint::MIXTRAL_ID
|
||||||
|
|
|
@ -18,6 +18,7 @@ module DiscourseAi
|
||||||
CLAUDE_3_SONNET_ID = -118
|
CLAUDE_3_SONNET_ID = -118
|
||||||
CLAUDE_3_HAIKU_ID = -119
|
CLAUDE_3_HAIKU_ID = -119
|
||||||
COHERE_COMMAND_R_PLUS = -120
|
COHERE_COMMAND_R_PLUS = -120
|
||||||
|
GPT4O_ID = -121
|
||||||
|
|
||||||
BOTS = [
|
BOTS = [
|
||||||
[GPT4_ID, "gpt4_bot", "gpt-4"],
|
[GPT4_ID, "gpt4_bot", "gpt-4"],
|
||||||
|
@ -31,6 +32,7 @@ module DiscourseAi
|
||||||
[CLAUDE_3_SONNET_ID, "claude_3_sonnet_bot", "claude-3-sonnet"],
|
[CLAUDE_3_SONNET_ID, "claude_3_sonnet_bot", "claude-3-sonnet"],
|
||||||
[CLAUDE_3_HAIKU_ID, "claude_3_haiku_bot", "claude-3-haiku"],
|
[CLAUDE_3_HAIKU_ID, "claude_3_haiku_bot", "claude-3-haiku"],
|
||||||
[COHERE_COMMAND_R_PLUS, "cohere_command_bot", "cohere-command-r-plus"],
|
[COHERE_COMMAND_R_PLUS, "cohere_command_bot", "cohere-command-r-plus"],
|
||||||
|
[GPT4O_ID, "gpt4o_bot", "gpt-4o"],
|
||||||
]
|
]
|
||||||
|
|
||||||
BOT_USER_IDS = BOTS.map(&:first)
|
BOT_USER_IDS = BOTS.map(&:first)
|
||||||
|
@ -49,6 +51,8 @@ module DiscourseAi
|
||||||
|
|
||||||
def self.map_bot_model_to_user_id(model_name)
|
def self.map_bot_model_to_user_id(model_name)
|
||||||
case model_name
|
case model_name
|
||||||
|
in "gpt-4o"
|
||||||
|
GPT4O_ID
|
||||||
in "gpt-4-turbo"
|
in "gpt-4-turbo"
|
||||||
GPT4_TURBO_ID
|
GPT4_TURBO_ID
|
||||||
in "gpt-3.5-turbo"
|
in "gpt-3.5-turbo"
|
||||||
|
|
|
@ -17,7 +17,7 @@ module DiscourseAi
|
||||||
end
|
end
|
||||||
|
|
||||||
def consolidate_question
|
def consolidate_question
|
||||||
@llm.generate(revised_prompt, user: @user)
|
@llm.generate(revised_prompt, user: @user, feature_name: "question_consolidator")
|
||||||
end
|
end
|
||||||
|
|
||||||
def revised_prompt
|
def revised_prompt
|
||||||
|
|
|
@ -135,7 +135,14 @@ module DiscourseAi
|
||||||
|
|
||||||
prompt = section_prompt(topic, section, guidance)
|
prompt = section_prompt(topic, section, guidance)
|
||||||
|
|
||||||
summary = llm.generate(prompt, temperature: 0.6, max_tokens: 400, user: bot_user)
|
summary =
|
||||||
|
llm.generate(
|
||||||
|
prompt,
|
||||||
|
temperature: 0.6,
|
||||||
|
max_tokens: 400,
|
||||||
|
user: bot_user,
|
||||||
|
feature_name: "summarize_tool",
|
||||||
|
)
|
||||||
|
|
||||||
summaries << summary
|
summaries << summary
|
||||||
end
|
end
|
||||||
|
@ -150,7 +157,13 @@ module DiscourseAi
|
||||||
"concatenated the disjoint summaries, creating a cohesive narrative:\n#{summaries.join("\n")}}",
|
"concatenated the disjoint summaries, creating a cohesive narrative:\n#{summaries.join("\n")}}",
|
||||||
}
|
}
|
||||||
|
|
||||||
llm.generate(concatenation_prompt, temperature: 0.6, max_tokens: 500, user: bot_user)
|
llm.generate(
|
||||||
|
concatenation_prompt,
|
||||||
|
temperature: 0.6,
|
||||||
|
max_tokens: 500,
|
||||||
|
user: bot_user,
|
||||||
|
feature_name: "summarize_tool",
|
||||||
|
)
|
||||||
else
|
else
|
||||||
summaries.first
|
summaries.first
|
||||||
end
|
end
|
||||||
|
|
|
@ -85,6 +85,7 @@ module DiscourseAi
|
||||||
user: user,
|
user: user,
|
||||||
temperature: completion_prompt.temperature,
|
temperature: completion_prompt.temperature,
|
||||||
stop_sequences: completion_prompt.stop_sequences,
|
stop_sequences: completion_prompt.stop_sequences,
|
||||||
|
feature_name: "ai_helper",
|
||||||
&block
|
&block
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
@ -163,6 +164,7 @@ module DiscourseAi
|
||||||
prompt,
|
prompt,
|
||||||
user: Discourse.system_user,
|
user: Discourse.system_user,
|
||||||
max_tokens: 1024,
|
max_tokens: 1024,
|
||||||
|
feature_name: "image_caption",
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -32,6 +32,7 @@ module DiscourseAi
|
||||||
prompt,
|
prompt,
|
||||||
user: Discourse.system_user,
|
user: Discourse.system_user,
|
||||||
stop_sequences: ["</input>"],
|
stop_sequences: ["</input>"],
|
||||||
|
feature_name: "chat_thread_title",
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -68,6 +68,7 @@ module DiscourseAi
|
||||||
DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_helper_model).generate(
|
DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_helper_model).generate(
|
||||||
prompt,
|
prompt,
|
||||||
user: user,
|
user: user,
|
||||||
|
feature_name: "illustrate_post",
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
module DiscourseAi
|
module DiscourseAi
|
||||||
module Automation
|
module Automation
|
||||||
AVAILABLE_MODELS = [
|
AVAILABLE_MODELS = [
|
||||||
|
{ id: "gpt-4o", name: "discourse_automation.ai_models.gpt_4o" },
|
||||||
{ id: "gpt-4-turbo", name: "discourse_automation.ai_models.gpt_4_turbo" },
|
{ id: "gpt-4-turbo", name: "discourse_automation.ai_models.gpt_4_turbo" },
|
||||||
{ id: "gpt-4", name: "discourse_automation.ai_models.gpt_4" },
|
{ id: "gpt-4", name: "discourse_automation.ai_models.gpt_4" },
|
||||||
{ id: "gpt-3.5-turbo", name: "discourse_automation.ai_models.gpt_3_5_turbo" },
|
{ id: "gpt-3.5-turbo", name: "discourse_automation.ai_models.gpt_3_5_turbo" },
|
||||||
|
|
|
@ -41,6 +41,7 @@ module DiscourseAi
|
||||||
temperature: 0,
|
temperature: 0,
|
||||||
max_tokens: llm.tokenizer.tokenize(search_for_text).length * 2 + 10,
|
max_tokens: llm.tokenizer.tokenize(search_for_text).length * 2 + 10,
|
||||||
user: Discourse.system_user,
|
user: Discourse.system_user,
|
||||||
|
feature_name: "llm_triage",
|
||||||
)
|
)
|
||||||
|
|
||||||
if result.present? && result.strip.downcase.include?(search_for_text)
|
if result.present? && result.strip.downcase.include?(search_for_text)
|
||||||
|
|
|
@ -154,6 +154,7 @@ Follow the provided writing composition instructions carefully and precisely ste
|
||||||
temperature: @temperature,
|
temperature: @temperature,
|
||||||
top_p: @top_p,
|
top_p: @top_p,
|
||||||
user: Discourse.system_user,
|
user: Discourse.system_user,
|
||||||
|
feature_name: "ai_report",
|
||||||
) do |response|
|
) do |response|
|
||||||
print response if Rails.env.development? && @debug_mode
|
print response if Rails.env.development? && @debug_mode
|
||||||
result << response
|
result << response
|
||||||
|
|
|
@ -83,7 +83,8 @@ module DiscourseAi
|
||||||
end
|
end
|
||||||
|
|
||||||
def inline_images(content, message)
|
def inline_images(content, message)
|
||||||
if model_name.include?("gpt-4-vision") || model_name == "gpt-4-turbo"
|
if model_name.include?("gpt-4-vision") || model_name == "gpt-4-turbo" ||
|
||||||
|
model_name == "gpt-4o"
|
||||||
content = message[:content]
|
content = message[:content]
|
||||||
encoded_uploads = prompt.encoded_uploads(message)
|
encoded_uploads = prompt.encoded_uploads(message)
|
||||||
if encoded_uploads.present?
|
if encoded_uploads.present?
|
||||||
|
@ -125,6 +126,8 @@ module DiscourseAi
|
||||||
32_768
|
32_768
|
||||||
when "gpt-4-turbo"
|
when "gpt-4-turbo"
|
||||||
131_072
|
131_072
|
||||||
|
when "gpt-4o"
|
||||||
|
131_072
|
||||||
else
|
else
|
||||||
8192
|
8192
|
||||||
end
|
end
|
||||||
|
|
|
@ -73,7 +73,7 @@ module DiscourseAi
|
||||||
true
|
true
|
||||||
end
|
end
|
||||||
|
|
||||||
def perform_completion!(dialect, user, model_params = {}, &blk)
|
def perform_completion!(dialect, user, model_params = {}, feature_name: nil, &blk)
|
||||||
allow_tools = dialect.prompt.has_tools?
|
allow_tools = dialect.prompt.has_tools?
|
||||||
model_params = normalize_model_params(model_params)
|
model_params = normalize_model_params(model_params)
|
||||||
|
|
||||||
|
@ -114,6 +114,7 @@ module DiscourseAi
|
||||||
request_tokens: prompt_size(prompt),
|
request_tokens: prompt_size(prompt),
|
||||||
topic_id: dialect.prompt.topic_id,
|
topic_id: dialect.prompt.topic_id,
|
||||||
post_id: dialect.prompt.post_id,
|
post_id: dialect.prompt.post_id,
|
||||||
|
feature_name: feature_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
if !@streaming_mode
|
if !@streaming_mode
|
||||||
|
|
|
@ -23,7 +23,7 @@ module DiscourseAi
|
||||||
|
|
||||||
attr_reader :responses, :completions, :prompt
|
attr_reader :responses, :completions, :prompt
|
||||||
|
|
||||||
def perform_completion!(prompt, _user, _model_params)
|
def perform_completion!(prompt, _user, _model_params, feature_name: nil)
|
||||||
@prompt = prompt
|
@prompt = prompt
|
||||||
response = responses[completions]
|
response = responses[completions]
|
||||||
if response.nil?
|
if response.nil?
|
||||||
|
|
|
@ -110,7 +110,7 @@ module DiscourseAi
|
||||||
@last_call = params
|
@last_call = params
|
||||||
end
|
end
|
||||||
|
|
||||||
def perform_completion!(dialect, user, model_params = {})
|
def perform_completion!(dialect, user, model_params = {}, feature_name: nil)
|
||||||
self.class.last_call = { dialect: dialect, user: user, model_params: model_params }
|
self.class.last_call = { dialect: dialect, user: user, model_params: model_params }
|
||||||
|
|
||||||
content = self.class.fake_content
|
content = self.class.fake_content
|
||||||
|
|
|
@ -12,6 +12,7 @@ module DiscourseAi
|
||||||
def dependant_setting_names
|
def dependant_setting_names
|
||||||
%w[
|
%w[
|
||||||
ai_openai_api_key
|
ai_openai_api_key
|
||||||
|
ai_openai_gpt4o_url
|
||||||
ai_openai_gpt4_32k_url
|
ai_openai_gpt4_32k_url
|
||||||
ai_openai_gpt4_turbo_url
|
ai_openai_gpt4_turbo_url
|
||||||
ai_openai_gpt4_url
|
ai_openai_gpt4_url
|
||||||
|
@ -33,6 +34,8 @@ module DiscourseAi
|
||||||
else
|
else
|
||||||
if model.include?("1106") || model.include?("turbo")
|
if model.include?("1106") || model.include?("turbo")
|
||||||
SiteSetting.ai_openai_gpt4_turbo_url
|
SiteSetting.ai_openai_gpt4_turbo_url
|
||||||
|
elsif model.include?("gpt-4o")
|
||||||
|
SiteSetting.ai_openai_gpt4o_url
|
||||||
else
|
else
|
||||||
SiteSetting.ai_openai_gpt4_url
|
SiteSetting.ai_openai_gpt4_url
|
||||||
end
|
end
|
||||||
|
@ -98,35 +101,47 @@ module DiscourseAi
|
||||||
end
|
end
|
||||||
|
|
||||||
def prepare_payload(prompt, model_params, dialect)
|
def prepare_payload(prompt, model_params, dialect)
|
||||||
default_options
|
payload = default_options.merge(model_params).merge(messages: prompt)
|
||||||
.merge(model_params)
|
|
||||||
.merge(messages: prompt)
|
if @streaming_mode
|
||||||
.tap do |payload|
|
payload[:stream] = true
|
||||||
payload[:stream] = true if @streaming_mode
|
payload[:stream_options] = { include_usage: true }
|
||||||
payload[:tools] = dialect.tools if dialect.tools.present?
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
payload[:tools] = dialect.tools if dialect.tools.present?
|
||||||
|
payload
|
||||||
end
|
end
|
||||||
|
|
||||||
def prepare_request(payload)
|
def prepare_request(payload)
|
||||||
headers =
|
headers = { "Content-Type" => "application/json" }
|
||||||
{ "Content-Type" => "application/json" }.tap do |h|
|
|
||||||
if model_uri.host.include?("azure")
|
if model_uri.host.include?("azure")
|
||||||
h["api-key"] = SiteSetting.ai_openai_api_key
|
headers["api-key"] = SiteSetting.ai_openai_api_key
|
||||||
else
|
else
|
||||||
h["Authorization"] = "Bearer #{SiteSetting.ai_openai_api_key}"
|
headers["Authorization"] = "Bearer #{SiteSetting.ai_openai_api_key}"
|
||||||
end
|
end
|
||||||
|
|
||||||
if SiteSetting.ai_openai_organization.present?
|
if SiteSetting.ai_openai_organization.present?
|
||||||
h["OpenAI-Organization"] = SiteSetting.ai_openai_organization
|
headers["OpenAI-Organization"] = SiteSetting.ai_openai_organization
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload }
|
Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload }
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def final_log_update(log)
|
||||||
|
log.request_tokens = @prompt_tokens if @prompt_tokens
|
||||||
|
log.response_tokens = @completion_tokens if @completion_tokens
|
||||||
|
end
|
||||||
|
|
||||||
def extract_completion_from(response_raw)
|
def extract_completion_from(response_raw)
|
||||||
parsed = JSON.parse(response_raw, symbolize_names: true).dig(:choices, 0)
|
json = JSON.parse(response_raw, symbolize_names: true)
|
||||||
# half a line sent here
|
|
||||||
|
if @streaming_mode
|
||||||
|
@prompt_tokens ||= json.dig(:usage, :prompt_tokens)
|
||||||
|
@completion_tokens ||= json.dig(:usage, :completion_tokens)
|
||||||
|
end
|
||||||
|
|
||||||
|
parsed = json.dig(:choices, 0)
|
||||||
return if !parsed
|
return if !parsed
|
||||||
|
|
||||||
response_h = @streaming_mode ? parsed.dig(:delta) : parsed.dig(:message)
|
response_h = @streaming_mode ? parsed.dig(:delta) : parsed.dig(:message)
|
||||||
|
|
|
@ -54,6 +54,7 @@ module DiscourseAi
|
||||||
gpt-4-32k
|
gpt-4-32k
|
||||||
gpt-4-turbo
|
gpt-4-turbo
|
||||||
gpt-4-vision-preview
|
gpt-4-vision-preview
|
||||||
|
gpt-4o
|
||||||
],
|
],
|
||||||
google: %w[gemini-pro gemini-1.5-pro],
|
google: %w[gemini-pro gemini-1.5-pro],
|
||||||
}.tap do |h|
|
}.tap do |h|
|
||||||
|
@ -106,12 +107,6 @@ module DiscourseAi
|
||||||
dialect_klass =
|
dialect_klass =
|
||||||
DiscourseAi::Completions::Dialects::Dialect.dialect_for(model_name_without_prov)
|
DiscourseAi::Completions::Dialects::Dialect.dialect_for(model_name_without_prov)
|
||||||
|
|
||||||
if is_custom_model
|
|
||||||
tokenizer = llm_model.tokenizer_class
|
|
||||||
else
|
|
||||||
tokenizer = dialect_klass.tokenizer
|
|
||||||
end
|
|
||||||
|
|
||||||
if @canned_response
|
if @canned_response
|
||||||
if @canned_llm && @canned_llm != model_name
|
if @canned_llm && @canned_llm != model_name
|
||||||
raise "Invalid call LLM call, expected #{@canned_llm} but got #{model_name}"
|
raise "Invalid call LLM call, expected #{@canned_llm} but got #{model_name}"
|
||||||
|
@ -164,6 +159,7 @@ module DiscourseAi
|
||||||
max_tokens: nil,
|
max_tokens: nil,
|
||||||
stop_sequences: nil,
|
stop_sequences: nil,
|
||||||
user:,
|
user:,
|
||||||
|
feature_name: nil,
|
||||||
&partial_read_blk
|
&partial_read_blk
|
||||||
)
|
)
|
||||||
self.class.record_prompt(prompt)
|
self.class.record_prompt(prompt)
|
||||||
|
@ -196,7 +192,13 @@ module DiscourseAi
|
||||||
model_name,
|
model_name,
|
||||||
opts: model_params.merge(max_prompt_tokens: @max_prompt_tokens),
|
opts: model_params.merge(max_prompt_tokens: @max_prompt_tokens),
|
||||||
)
|
)
|
||||||
gateway.perform_completion!(dialect, user, model_params, &partial_read_blk)
|
gateway.perform_completion!(
|
||||||
|
dialect,
|
||||||
|
user,
|
||||||
|
model_params,
|
||||||
|
feature_name: feature_name,
|
||||||
|
&partial_read_blk
|
||||||
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
def max_prompt_tokens
|
def max_prompt_tokens
|
||||||
|
|
|
@ -69,7 +69,7 @@ module DiscourseAi
|
||||||
def can_talk_to_model?(model_name)
|
def can_talk_to_model?(model_name)
|
||||||
DiscourseAi::Completions::Llm
|
DiscourseAi::Completions::Llm
|
||||||
.proxy(model_name)
|
.proxy(model_name)
|
||||||
.generate("How much is 1 + 1?", user: nil)
|
.generate("How much is 1 + 1?", user: nil, feature_name: "llm_validator")
|
||||||
.present?
|
.present?
|
||||||
rescue StandardError
|
rescue StandardError
|
||||||
false
|
false
|
||||||
|
|
|
@ -169,7 +169,7 @@ module DiscourseAi
|
||||||
llm_response =
|
llm_response =
|
||||||
DiscourseAi::Completions::Llm.proxy(
|
DiscourseAi::Completions::Llm.proxy(
|
||||||
SiteSetting.ai_embeddings_semantic_search_hyde_model,
|
SiteSetting.ai_embeddings_semantic_search_hyde_model,
|
||||||
).generate(prompt, user: @guardian.user)
|
).generate(prompt, user: @guardian.user, feature_name: "semantic_search_hyde")
|
||||||
|
|
||||||
Nokogiri::HTML5.fragment(llm_response).at("ai")&.text&.presence || llm_response
|
Nokogiri::HTML5.fragment(llm_response).at("ai")&.text&.presence || llm_response
|
||||||
end
|
end
|
||||||
|
|
|
@ -8,6 +8,7 @@ module DiscourseAi
|
||||||
Models::OpenAi.new("open_ai:gpt-4", max_tokens: 8192),
|
Models::OpenAi.new("open_ai:gpt-4", max_tokens: 8192),
|
||||||
Models::OpenAi.new("open_ai:gpt-4-32k", max_tokens: 32_768),
|
Models::OpenAi.new("open_ai:gpt-4-32k", max_tokens: 32_768),
|
||||||
Models::OpenAi.new("open_ai:gpt-4-turbo", max_tokens: 100_000),
|
Models::OpenAi.new("open_ai:gpt-4-turbo", max_tokens: 100_000),
|
||||||
|
Models::OpenAi.new("open_ai:gpt-4o", max_tokens: 100_000),
|
||||||
Models::OpenAi.new("open_ai:gpt-3.5-turbo", max_tokens: 4096),
|
Models::OpenAi.new("open_ai:gpt-3.5-turbo", max_tokens: 4096),
|
||||||
Models::OpenAi.new("open_ai:gpt-3.5-turbo-16k", max_tokens: 16_384),
|
Models::OpenAi.new("open_ai:gpt-3.5-turbo-16k", max_tokens: 16_384),
|
||||||
Models::Gemini.new("google:gemini-pro", max_tokens: 32_768),
|
Models::Gemini.new("google:gemini-pro", max_tokens: 32_768),
|
||||||
|
@ -50,24 +51,31 @@ module DiscourseAi
|
||||||
max_tokens: 32_000,
|
max_tokens: 32_000,
|
||||||
)
|
)
|
||||||
|
|
||||||
LlmModel.all.each do |model|
|
# TODO: Roman, we need to de-register custom LLMs on destroy from summarization
|
||||||
foldable_models << Models::CustomLlm.new(
|
# strategy and clear cache
|
||||||
"custom:#{model.id}",
|
# it may be better to pull all of this code into Discourse AI cause as it stands
|
||||||
max_tokens: model.max_prompt_tokens,
|
# the coupling is making it really hard to reason about summarization
|
||||||
)
|
#
|
||||||
end
|
# Auto registration and de-registration needs to be tested
|
||||||
|
|
||||||
|
#LlmModel.all.each do |model|
|
||||||
|
# foldable_models << Models::CustomLlm.new(
|
||||||
|
# "custom:#{model.id}",
|
||||||
|
# max_tokens: model.max_prompt_tokens,
|
||||||
|
# )
|
||||||
|
#end
|
||||||
|
|
||||||
foldable_models.each do |model|
|
foldable_models.each do |model|
|
||||||
plugin.register_summarization_strategy(Strategies::FoldContent.new(model))
|
plugin.register_summarization_strategy(Strategies::FoldContent.new(model))
|
||||||
end
|
end
|
||||||
|
|
||||||
plugin.add_model_callback(LlmModel, :after_create) do
|
#plugin.add_model_callback(LlmModel, :after_create) do
|
||||||
new_model = Models::CustomLlm.new("custom:#{self.id}", max_tokens: self.max_prompt_tokens)
|
# new_model = Models::CustomLlm.new("custom:#{self.id}", max_tokens: self.max_prompt_tokens)
|
||||||
|
|
||||||
if ::Summarization::Base.find_strategy("custom:#{self.id}").nil?
|
# if ::Summarization::Base.find_strategy("custom:#{self.id}").nil?
|
||||||
plugin.register_summarization_strategy(Strategies::FoldContent.new(new_model))
|
# plugin.register_summarization_strategy(Strategies::FoldContent.new(new_model))
|
||||||
end
|
# end
|
||||||
end
|
#end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -99,14 +99,19 @@ module DiscourseAi
|
||||||
def summarize_single(llm, text, user, opts, &on_partial_blk)
|
def summarize_single(llm, text, user, opts, &on_partial_blk)
|
||||||
prompt = summarization_prompt(text, opts)
|
prompt = summarization_prompt(text, opts)
|
||||||
|
|
||||||
llm.generate(prompt, user: user, &on_partial_blk)
|
llm.generate(prompt, user: user, feature_name: "summarize", &on_partial_blk)
|
||||||
end
|
end
|
||||||
|
|
||||||
def summarize_in_chunks(llm, chunks, user, opts)
|
def summarize_in_chunks(llm, chunks, user, opts)
|
||||||
chunks.map do |chunk|
|
chunks.map do |chunk|
|
||||||
prompt = summarization_prompt(chunk[:summary], opts)
|
prompt = summarization_prompt(chunk[:summary], opts)
|
||||||
|
|
||||||
chunk[:summary] = llm.generate(prompt, user: user, max_tokens: 300)
|
chunk[:summary] = llm.generate(
|
||||||
|
prompt,
|
||||||
|
user: user,
|
||||||
|
max_tokens: 300,
|
||||||
|
feature_name: "summarize",
|
||||||
|
)
|
||||||
chunk
|
chunk
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -268,7 +268,9 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
|
||||||
).to_return(status: 200, body: body)
|
).to_return(status: 200, body: body)
|
||||||
|
|
||||||
result = +""
|
result = +""
|
||||||
llm.generate(prompt, user: Discourse.system_user) { |partial, cancel| result << partial }
|
llm.generate(prompt, user: Discourse.system_user, feature_name: "testing") do |partial, cancel|
|
||||||
|
result << partial
|
||||||
|
end
|
||||||
|
|
||||||
expect(result).to eq("Hello!")
|
expect(result).to eq("Hello!")
|
||||||
|
|
||||||
|
@ -285,6 +287,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
|
||||||
expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
|
expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
|
||||||
expect(log.request_tokens).to eq(25)
|
expect(log.request_tokens).to eq(25)
|
||||||
expect(log.response_tokens).to eq(15)
|
expect(log.response_tokens).to eq(15)
|
||||||
|
expect(log.feature_name).to eq("testing")
|
||||||
end
|
end
|
||||||
|
|
||||||
it "can return multiple function calls" do
|
it "can return multiple function calls" do
|
||||||
|
|
|
@ -135,7 +135,10 @@ class OpenAiMock < EndpointMock
|
||||||
.default_options
|
.default_options
|
||||||
.merge(messages: prompt)
|
.merge(messages: prompt)
|
||||||
.tap do |b|
|
.tap do |b|
|
||||||
b[:stream] = true if stream
|
if stream
|
||||||
|
b[:stream] = true
|
||||||
|
b[:stream_options] = { include_usage: true }
|
||||||
|
end
|
||||||
b[:tools] = [tool_payload] if tool_call
|
b[:tools] = [tool_payload] if tool_call
|
||||||
end
|
end
|
||||||
.to_json
|
.to_json
|
||||||
|
@ -431,6 +434,36 @@ TEXT
|
||||||
expect(content).to eq(expected)
|
expect(content).to eq(expected)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "uses proper token accounting" do
|
||||||
|
response = <<~TEXT.strip
|
||||||
|
data: {"id":"chatcmpl-9OZidiHncpBhhNMcqCus9XiJ3TkqR","object":"chat.completion.chunk","created":1715644203,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_729ea513f7","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":null}|
|
||||||
|
|
||||||
|
data: {"id":"chatcmpl-9OZidiHncpBhhNMcqCus9XiJ3TkqR","object":"chat.completion.chunk","created":1715644203,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_729ea513f7","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null}|
|
||||||
|
|
||||||
|
data: {"id":"chatcmpl-9OZidiHncpBhhNMcqCus9XiJ3TkqR","object":"chat.completion.chunk","created":1715644203,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_729ea513f7","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}|
|
||||||
|
|
||||||
|
data: {"id":"chatcmpl-9OZidiHncpBhhNMcqCus9XiJ3TkqR","object":"chat.completion.chunk","created":1715644203,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_729ea513f7","choices":[],"usage":{"prompt_tokens":20,"completion_tokens":9,"total_tokens":29}}|
|
||||||
|
|
||||||
|
data: [DONE]
|
||||||
|
TEXT
|
||||||
|
|
||||||
|
chunks = response.split("|")
|
||||||
|
open_ai_mock.with_chunk_array_support do
|
||||||
|
open_ai_mock.stub_raw(chunks)
|
||||||
|
partials = []
|
||||||
|
|
||||||
|
dialect = compliance.dialect(prompt: compliance.generic_prompt)
|
||||||
|
endpoint.perform_completion!(dialect, user) { |partial| partials << partial }
|
||||||
|
|
||||||
|
expect(partials).to eq(["Hello"])
|
||||||
|
|
||||||
|
log = AiApiAuditLog.order("id desc").first
|
||||||
|
|
||||||
|
expect(log.request_tokens).to eq(20)
|
||||||
|
expect(log.response_tokens).to eq(9)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
it "properly handles spaces in tools payload" do
|
it "properly handles spaces in tools payload" do
|
||||||
raw_data = <<~TEXT.strip
|
raw_data = <<~TEXT.strip
|
||||||
data: {"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"func_id","type":"function","function":{"name":"go|ogle","arg|uments":""}}]}}]}
|
data: {"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"func_id","type":"function","function":{"name":"go|ogle","arg|uments":""}}]}}]}
|
||||||
|
|
Loading…
Reference in New Issue