FEATURE: GPT4o support and better auditing (#618)

- Introduce new support for GPT4o (automation / bot / summary / helper)
- Properly account for token counts on OpenAI models
- Track feature that was used when generating AI completions
- Remove custom llm support for summarization as we need better interfaces to control registration and de-registration
This commit is contained in:
Sam 2024-05-14 13:28:46 +10:00 committed by GitHub
parent 8b00c47087
commit 8eee6893d6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
29 changed files with 167 additions and 57 deletions

View File

@ -22,12 +22,13 @@ end
# id :bigint not null, primary key # id :bigint not null, primary key
# provider_id :integer not null # provider_id :integer not null
# user_id :integer # user_id :integer
# topic_id :integer
# post_id :integer
# request_tokens :integer # request_tokens :integer
# response_tokens :integer # response_tokens :integer
# raw_request_payload :string # raw_request_payload :string
# raw_response_payload :string # raw_response_payload :string
# created_at :datetime not null # created_at :datetime not null
# updated_at :datetime not null # updated_at :datetime not null
# topic_id :integer
# post_id :integer
# feature_name :string(255)
# #

View File

@ -20,6 +20,7 @@ en:
mistral_7b_instruct_v0_2: Mistral 7B Instruct V0.2 mistral_7b_instruct_v0_2: Mistral 7B Instruct V0.2
command_r: Cohere Command R command_r: Cohere Command R
command_r_plus: Cohere Command R+ command_r_plus: Cohere Command R+
gpt_4o: GPT 4 Omni
scriptables: scriptables:
llm_report: llm_report:
fields: fields:
@ -328,6 +329,7 @@ en:
cohere-command-r-plus: "Cohere Command R Plus" cohere-command-r-plus: "Cohere Command R Plus"
gpt-4: "GPT-4" gpt-4: "GPT-4"
gpt-4-turbo: "GPT-4 Turbo" gpt-4-turbo: "GPT-4 Turbo"
gpt-4o: "GPT-4 Omni"
gpt-3: gpt-3:
5-turbo: "GPT-3.5" 5-turbo: "GPT-3.5"
claude-2: "Claude 2" claude-2: "Claude 2"

View File

@ -215,6 +215,7 @@ pl_PL:
bot_names: bot_names:
gpt-4: "GPT-4" gpt-4: "GPT-4"
gpt-4-turbo: "GPT-4 Turbo" gpt-4-turbo: "GPT-4 Turbo"
gpt-4o: "GPT-4 Omni"
gpt-3: gpt-3:
5-turbo: "GPT-3.5" 5-turbo: "GPT-3.5"
claude-2: "Claude 2" claude-2: "Claude 2"

View File

@ -43,6 +43,7 @@ en:
ai_openai_gpt35_url: "Custom URL used for GPT 3.5 chat completions. (for Azure support)" ai_openai_gpt35_url: "Custom URL used for GPT 3.5 chat completions. (for Azure support)"
ai_openai_gpt35_16k_url: "Custom URL used for GPT 3.5 16k chat completions. (for Azure support)" ai_openai_gpt35_16k_url: "Custom URL used for GPT 3.5 16k chat completions. (for Azure support)"
ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)" ai_openai_gpt4_url: "Custom URL used for GPT 4 chat completions. (for Azure support)"
ai_openai_gpt4o_url: "Custom URL used for GPT 4 Omni chat completions. (for Azure support)"
ai_openai_gpt4_32k_url: "Custom URL used for GPT 4 32k chat completions. (for Azure support)" ai_openai_gpt4_32k_url: "Custom URL used for GPT 4 32k chat completions. (for Azure support)"
ai_openai_gpt4_turbo_url: "Custom URL used for GPT 4 Turbo chat completions. (for Azure support)" ai_openai_gpt4_turbo_url: "Custom URL used for GPT 4 Turbo chat completions. (for Azure support)"
ai_openai_dall_e_3_url: "Custom URL used for DALL-E 3 image generation. (for Azure support)" ai_openai_dall_e_3_url: "Custom URL used for DALL-E 3 image generation. (for Azure support)"

View File

@ -98,6 +98,7 @@ discourse_ai:
ai_openai_gpt35_url: "https://api.openai.com/v1/chat/completions" ai_openai_gpt35_url: "https://api.openai.com/v1/chat/completions"
ai_openai_gpt35_16k_url: "https://api.openai.com/v1/chat/completions" ai_openai_gpt35_16k_url: "https://api.openai.com/v1/chat/completions"
ai_openai_gpt4o_url: "https://api.openai.com/v1/chat/completions"
ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions" ai_openai_gpt4_url: "https://api.openai.com/v1/chat/completions"
ai_openai_gpt4_32k_url: "https://api.openai.com/v1/chat/completions" ai_openai_gpt4_32k_url: "https://api.openai.com/v1/chat/completions"
ai_openai_gpt4_turbo_url: "https://api.openai.com/v1/chat/completions" ai_openai_gpt4_turbo_url: "https://api.openai.com/v1/chat/completions"
@ -343,6 +344,7 @@ discourse_ai:
- gpt-3.5-turbo - gpt-3.5-turbo
- gpt-4 - gpt-4
- gpt-4-turbo - gpt-4-turbo
- gpt-4o
- claude-2 - claude-2
- gemini-1.5-pro - gemini-1.5-pro
- mixtral-8x7B-Instruct-V0.1 - mixtral-8x7B-Instruct-V0.1

View File

@ -0,0 +1,7 @@
# frozen_string_literal: true
class AddFeatureNameToAiApiAuditLog < ActiveRecord::Migration[7.0]
def change
add_column :ai_api_audit_logs, :feature_name, :string, limit: 255
end
end

View File

@ -52,7 +52,7 @@ class ExplicitProviderBackwardsCompat < ActiveRecord::Migration[7.0]
end end
def append_provider(value) def append_provider(value)
open_ai_models = %w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k gpt-4-turbo] open_ai_models = %w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k gpt-4-turbo gpt-4o]
return "open_ai:#{value}" if open_ai_models.include?(value) return "open_ai:#{value}" if open_ai_models.include?(value)
return "google:#{value}" if value == "gemini-pro" return "google:#{value}" if value == "gemini-pro"

View File

@ -43,7 +43,7 @@ module DiscourseAi
DiscourseAi::Completions::Llm DiscourseAi::Completions::Llm
.proxy(model) .proxy(model)
.generate(title_prompt, user: post.user) .generate(title_prompt, user: post.user, feature_name: "bot_title")
.strip .strip
.split("\n") .split("\n")
.last .last
@ -67,7 +67,7 @@ module DiscourseAi
tool_found = false tool_found = false
result = result =
llm.generate(prompt, **llm_kwargs) do |partial, cancel| llm.generate(prompt, feature_name: "bot", **llm_kwargs) do |partial, cancel|
tools = persona.find_tools(partial, bot_user: user, llm: llm, context: context) tools = persona.find_tools(partial, bot_user: user, llm: llm, context: context)
if (tools.present?) if (tools.present?)
@ -162,6 +162,8 @@ module DiscourseAi
"open_ai:gpt-4" "open_ai:gpt-4"
when DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID when DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID
"open_ai:gpt-4-turbo" "open_ai:gpt-4-turbo"
when DiscourseAi::AiBot::EntryPoint::GPT4O_ID
"open_ai:gpt-4o"
when DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID when DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID
"open_ai:gpt-3.5-turbo-16k" "open_ai:gpt-3.5-turbo-16k"
when DiscourseAi::AiBot::EntryPoint::MIXTRAL_ID when DiscourseAi::AiBot::EntryPoint::MIXTRAL_ID

View File

@ -18,6 +18,7 @@ module DiscourseAi
CLAUDE_3_SONNET_ID = -118 CLAUDE_3_SONNET_ID = -118
CLAUDE_3_HAIKU_ID = -119 CLAUDE_3_HAIKU_ID = -119
COHERE_COMMAND_R_PLUS = -120 COHERE_COMMAND_R_PLUS = -120
GPT4O_ID = -121
BOTS = [ BOTS = [
[GPT4_ID, "gpt4_bot", "gpt-4"], [GPT4_ID, "gpt4_bot", "gpt-4"],
@ -31,6 +32,7 @@ module DiscourseAi
[CLAUDE_3_SONNET_ID, "claude_3_sonnet_bot", "claude-3-sonnet"], [CLAUDE_3_SONNET_ID, "claude_3_sonnet_bot", "claude-3-sonnet"],
[CLAUDE_3_HAIKU_ID, "claude_3_haiku_bot", "claude-3-haiku"], [CLAUDE_3_HAIKU_ID, "claude_3_haiku_bot", "claude-3-haiku"],
[COHERE_COMMAND_R_PLUS, "cohere_command_bot", "cohere-command-r-plus"], [COHERE_COMMAND_R_PLUS, "cohere_command_bot", "cohere-command-r-plus"],
[GPT4O_ID, "gpt4o_bot", "gpt-4o"],
] ]
BOT_USER_IDS = BOTS.map(&:first) BOT_USER_IDS = BOTS.map(&:first)
@ -49,6 +51,8 @@ module DiscourseAi
def self.map_bot_model_to_user_id(model_name) def self.map_bot_model_to_user_id(model_name)
case model_name case model_name
in "gpt-4o"
GPT4O_ID
in "gpt-4-turbo" in "gpt-4-turbo"
GPT4_TURBO_ID GPT4_TURBO_ID
in "gpt-3.5-turbo" in "gpt-3.5-turbo"

View File

@ -17,7 +17,7 @@ module DiscourseAi
end end
def consolidate_question def consolidate_question
@llm.generate(revised_prompt, user: @user) @llm.generate(revised_prompt, user: @user, feature_name: "question_consolidator")
end end
def revised_prompt def revised_prompt

View File

@ -135,7 +135,14 @@ module DiscourseAi
prompt = section_prompt(topic, section, guidance) prompt = section_prompt(topic, section, guidance)
summary = llm.generate(prompt, temperature: 0.6, max_tokens: 400, user: bot_user) summary =
llm.generate(
prompt,
temperature: 0.6,
max_tokens: 400,
user: bot_user,
feature_name: "summarize_tool",
)
summaries << summary summaries << summary
end end
@ -150,7 +157,13 @@ module DiscourseAi
"concatenated the disjoint summaries, creating a cohesive narrative:\n#{summaries.join("\n")}}", "concatenated the disjoint summaries, creating a cohesive narrative:\n#{summaries.join("\n")}}",
} }
llm.generate(concatenation_prompt, temperature: 0.6, max_tokens: 500, user: bot_user) llm.generate(
concatenation_prompt,
temperature: 0.6,
max_tokens: 500,
user: bot_user,
feature_name: "summarize_tool",
)
else else
summaries.first summaries.first
end end

View File

@ -85,6 +85,7 @@ module DiscourseAi
user: user, user: user,
temperature: completion_prompt.temperature, temperature: completion_prompt.temperature,
stop_sequences: completion_prompt.stop_sequences, stop_sequences: completion_prompt.stop_sequences,
feature_name: "ai_helper",
&block &block
) )
end end
@ -163,6 +164,7 @@ module DiscourseAi
prompt, prompt,
user: Discourse.system_user, user: Discourse.system_user,
max_tokens: 1024, max_tokens: 1024,
feature_name: "image_caption",
) )
end end
end end

View File

@ -32,6 +32,7 @@ module DiscourseAi
prompt, prompt,
user: Discourse.system_user, user: Discourse.system_user,
stop_sequences: ["</input>"], stop_sequences: ["</input>"],
feature_name: "chat_thread_title",
) )
end end

View File

@ -68,6 +68,7 @@ module DiscourseAi
DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_helper_model).generate( DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_helper_model).generate(
prompt, prompt,
user: user, user: user,
feature_name: "illustrate_post",
) )
end end
end end

View File

@ -3,6 +3,7 @@
module DiscourseAi module DiscourseAi
module Automation module Automation
AVAILABLE_MODELS = [ AVAILABLE_MODELS = [
{ id: "gpt-4o", name: "discourse_automation.ai_models.gpt_4o" },
{ id: "gpt-4-turbo", name: "discourse_automation.ai_models.gpt_4_turbo" }, { id: "gpt-4-turbo", name: "discourse_automation.ai_models.gpt_4_turbo" },
{ id: "gpt-4", name: "discourse_automation.ai_models.gpt_4" }, { id: "gpt-4", name: "discourse_automation.ai_models.gpt_4" },
{ id: "gpt-3.5-turbo", name: "discourse_automation.ai_models.gpt_3_5_turbo" }, { id: "gpt-3.5-turbo", name: "discourse_automation.ai_models.gpt_3_5_turbo" },

View File

@ -41,6 +41,7 @@ module DiscourseAi
temperature: 0, temperature: 0,
max_tokens: llm.tokenizer.tokenize(search_for_text).length * 2 + 10, max_tokens: llm.tokenizer.tokenize(search_for_text).length * 2 + 10,
user: Discourse.system_user, user: Discourse.system_user,
feature_name: "llm_triage",
) )
if result.present? && result.strip.downcase.include?(search_for_text) if result.present? && result.strip.downcase.include?(search_for_text)

View File

@ -154,6 +154,7 @@ Follow the provided writing composition instructions carefully and precisely ste
temperature: @temperature, temperature: @temperature,
top_p: @top_p, top_p: @top_p,
user: Discourse.system_user, user: Discourse.system_user,
feature_name: "ai_report",
) do |response| ) do |response|
print response if Rails.env.development? && @debug_mode print response if Rails.env.development? && @debug_mode
result << response result << response

View File

@ -83,7 +83,8 @@ module DiscourseAi
end end
def inline_images(content, message) def inline_images(content, message)
if model_name.include?("gpt-4-vision") || model_name == "gpt-4-turbo" if model_name.include?("gpt-4-vision") || model_name == "gpt-4-turbo" ||
model_name == "gpt-4o"
content = message[:content] content = message[:content]
encoded_uploads = prompt.encoded_uploads(message) encoded_uploads = prompt.encoded_uploads(message)
if encoded_uploads.present? if encoded_uploads.present?
@ -125,6 +126,8 @@ module DiscourseAi
32_768 32_768
when "gpt-4-turbo" when "gpt-4-turbo"
131_072 131_072
when "gpt-4o"
131_072
else else
8192 8192
end end

View File

@ -73,7 +73,7 @@ module DiscourseAi
true true
end end
def perform_completion!(dialect, user, model_params = {}, &blk) def perform_completion!(dialect, user, model_params = {}, feature_name: nil, &blk)
allow_tools = dialect.prompt.has_tools? allow_tools = dialect.prompt.has_tools?
model_params = normalize_model_params(model_params) model_params = normalize_model_params(model_params)
@ -114,6 +114,7 @@ module DiscourseAi
request_tokens: prompt_size(prompt), request_tokens: prompt_size(prompt),
topic_id: dialect.prompt.topic_id, topic_id: dialect.prompt.topic_id,
post_id: dialect.prompt.post_id, post_id: dialect.prompt.post_id,
feature_name: feature_name,
) )
if !@streaming_mode if !@streaming_mode

View File

@ -23,7 +23,7 @@ module DiscourseAi
attr_reader :responses, :completions, :prompt attr_reader :responses, :completions, :prompt
def perform_completion!(prompt, _user, _model_params) def perform_completion!(prompt, _user, _model_params, feature_name: nil)
@prompt = prompt @prompt = prompt
response = responses[completions] response = responses[completions]
if response.nil? if response.nil?

View File

@ -110,7 +110,7 @@ module DiscourseAi
@last_call = params @last_call = params
end end
def perform_completion!(dialect, user, model_params = {}) def perform_completion!(dialect, user, model_params = {}, feature_name: nil)
self.class.last_call = { dialect: dialect, user: user, model_params: model_params } self.class.last_call = { dialect: dialect, user: user, model_params: model_params }
content = self.class.fake_content content = self.class.fake_content

View File

@ -12,6 +12,7 @@ module DiscourseAi
def dependant_setting_names def dependant_setting_names
%w[ %w[
ai_openai_api_key ai_openai_api_key
ai_openai_gpt4o_url
ai_openai_gpt4_32k_url ai_openai_gpt4_32k_url
ai_openai_gpt4_turbo_url ai_openai_gpt4_turbo_url
ai_openai_gpt4_url ai_openai_gpt4_url
@ -33,6 +34,8 @@ module DiscourseAi
else else
if model.include?("1106") || model.include?("turbo") if model.include?("1106") || model.include?("turbo")
SiteSetting.ai_openai_gpt4_turbo_url SiteSetting.ai_openai_gpt4_turbo_url
elsif model.include?("gpt-4o")
SiteSetting.ai_openai_gpt4o_url
else else
SiteSetting.ai_openai_gpt4_url SiteSetting.ai_openai_gpt4_url
end end
@ -98,35 +101,47 @@ module DiscourseAi
end end
def prepare_payload(prompt, model_params, dialect) def prepare_payload(prompt, model_params, dialect)
default_options payload = default_options.merge(model_params).merge(messages: prompt)
.merge(model_params)
.merge(messages: prompt) if @streaming_mode
.tap do |payload| payload[:stream] = true
payload[:stream] = true if @streaming_mode payload[:stream_options] = { include_usage: true }
payload[:tools] = dialect.tools if dialect.tools.present?
end end
payload[:tools] = dialect.tools if dialect.tools.present?
payload
end end
def prepare_request(payload) def prepare_request(payload)
headers = headers = { "Content-Type" => "application/json" }
{ "Content-Type" => "application/json" }.tap do |h|
if model_uri.host.include?("azure") if model_uri.host.include?("azure")
h["api-key"] = SiteSetting.ai_openai_api_key headers["api-key"] = SiteSetting.ai_openai_api_key
else else
h["Authorization"] = "Bearer #{SiteSetting.ai_openai_api_key}" headers["Authorization"] = "Bearer #{SiteSetting.ai_openai_api_key}"
end end
if SiteSetting.ai_openai_organization.present? if SiteSetting.ai_openai_organization.present?
h["OpenAI-Organization"] = SiteSetting.ai_openai_organization headers["OpenAI-Organization"] = SiteSetting.ai_openai_organization
end
end end
Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload } Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload }
end end
def final_log_update(log)
log.request_tokens = @prompt_tokens if @prompt_tokens
log.response_tokens = @completion_tokens if @completion_tokens
end
def extract_completion_from(response_raw) def extract_completion_from(response_raw)
parsed = JSON.parse(response_raw, symbolize_names: true).dig(:choices, 0) json = JSON.parse(response_raw, symbolize_names: true)
# half a line sent here
if @streaming_mode
@prompt_tokens ||= json.dig(:usage, :prompt_tokens)
@completion_tokens ||= json.dig(:usage, :completion_tokens)
end
parsed = json.dig(:choices, 0)
return if !parsed return if !parsed
response_h = @streaming_mode ? parsed.dig(:delta) : parsed.dig(:message) response_h = @streaming_mode ? parsed.dig(:delta) : parsed.dig(:message)

View File

@ -54,6 +54,7 @@ module DiscourseAi
gpt-4-32k gpt-4-32k
gpt-4-turbo gpt-4-turbo
gpt-4-vision-preview gpt-4-vision-preview
gpt-4o
], ],
google: %w[gemini-pro gemini-1.5-pro], google: %w[gemini-pro gemini-1.5-pro],
}.tap do |h| }.tap do |h|
@ -106,12 +107,6 @@ module DiscourseAi
dialect_klass = dialect_klass =
DiscourseAi::Completions::Dialects::Dialect.dialect_for(model_name_without_prov) DiscourseAi::Completions::Dialects::Dialect.dialect_for(model_name_without_prov)
if is_custom_model
tokenizer = llm_model.tokenizer_class
else
tokenizer = dialect_klass.tokenizer
end
if @canned_response if @canned_response
if @canned_llm && @canned_llm != model_name if @canned_llm && @canned_llm != model_name
raise "Invalid call LLM call, expected #{@canned_llm} but got #{model_name}" raise "Invalid call LLM call, expected #{@canned_llm} but got #{model_name}"
@ -164,6 +159,7 @@ module DiscourseAi
max_tokens: nil, max_tokens: nil,
stop_sequences: nil, stop_sequences: nil,
user:, user:,
feature_name: nil,
&partial_read_blk &partial_read_blk
) )
self.class.record_prompt(prompt) self.class.record_prompt(prompt)
@ -196,7 +192,13 @@ module DiscourseAi
model_name, model_name,
opts: model_params.merge(max_prompt_tokens: @max_prompt_tokens), opts: model_params.merge(max_prompt_tokens: @max_prompt_tokens),
) )
gateway.perform_completion!(dialect, user, model_params, &partial_read_blk) gateway.perform_completion!(
dialect,
user,
model_params,
feature_name: feature_name,
&partial_read_blk
)
end end
def max_prompt_tokens def max_prompt_tokens

View File

@ -69,7 +69,7 @@ module DiscourseAi
def can_talk_to_model?(model_name) def can_talk_to_model?(model_name)
DiscourseAi::Completions::Llm DiscourseAi::Completions::Llm
.proxy(model_name) .proxy(model_name)
.generate("How much is 1 + 1?", user: nil) .generate("How much is 1 + 1?", user: nil, feature_name: "llm_validator")
.present? .present?
rescue StandardError rescue StandardError
false false

View File

@ -169,7 +169,7 @@ module DiscourseAi
llm_response = llm_response =
DiscourseAi::Completions::Llm.proxy( DiscourseAi::Completions::Llm.proxy(
SiteSetting.ai_embeddings_semantic_search_hyde_model, SiteSetting.ai_embeddings_semantic_search_hyde_model,
).generate(prompt, user: @guardian.user) ).generate(prompt, user: @guardian.user, feature_name: "semantic_search_hyde")
Nokogiri::HTML5.fragment(llm_response).at("ai")&.text&.presence || llm_response Nokogiri::HTML5.fragment(llm_response).at("ai")&.text&.presence || llm_response
end end

View File

@ -8,6 +8,7 @@ module DiscourseAi
Models::OpenAi.new("open_ai:gpt-4", max_tokens: 8192), Models::OpenAi.new("open_ai:gpt-4", max_tokens: 8192),
Models::OpenAi.new("open_ai:gpt-4-32k", max_tokens: 32_768), Models::OpenAi.new("open_ai:gpt-4-32k", max_tokens: 32_768),
Models::OpenAi.new("open_ai:gpt-4-turbo", max_tokens: 100_000), Models::OpenAi.new("open_ai:gpt-4-turbo", max_tokens: 100_000),
Models::OpenAi.new("open_ai:gpt-4o", max_tokens: 100_000),
Models::OpenAi.new("open_ai:gpt-3.5-turbo", max_tokens: 4096), Models::OpenAi.new("open_ai:gpt-3.5-turbo", max_tokens: 4096),
Models::OpenAi.new("open_ai:gpt-3.5-turbo-16k", max_tokens: 16_384), Models::OpenAi.new("open_ai:gpt-3.5-turbo-16k", max_tokens: 16_384),
Models::Gemini.new("google:gemini-pro", max_tokens: 32_768), Models::Gemini.new("google:gemini-pro", max_tokens: 32_768),
@ -50,24 +51,31 @@ module DiscourseAi
max_tokens: 32_000, max_tokens: 32_000,
) )
LlmModel.all.each do |model| # TODO: Roman, we need to de-register custom LLMs on destroy from summarization
foldable_models << Models::CustomLlm.new( # strategy and clear cache
"custom:#{model.id}", # it may be better to pull all of this code into Discourse AI cause as it stands
max_tokens: model.max_prompt_tokens, # the coupling is making it really hard to reason about summarization
) #
end # Auto registration and de-registration needs to be tested
#LlmModel.all.each do |model|
# foldable_models << Models::CustomLlm.new(
# "custom:#{model.id}",
# max_tokens: model.max_prompt_tokens,
# )
#end
foldable_models.each do |model| foldable_models.each do |model|
plugin.register_summarization_strategy(Strategies::FoldContent.new(model)) plugin.register_summarization_strategy(Strategies::FoldContent.new(model))
end end
plugin.add_model_callback(LlmModel, :after_create) do #plugin.add_model_callback(LlmModel, :after_create) do
new_model = Models::CustomLlm.new("custom:#{self.id}", max_tokens: self.max_prompt_tokens) # new_model = Models::CustomLlm.new("custom:#{self.id}", max_tokens: self.max_prompt_tokens)
if ::Summarization::Base.find_strategy("custom:#{self.id}").nil? # if ::Summarization::Base.find_strategy("custom:#{self.id}").nil?
plugin.register_summarization_strategy(Strategies::FoldContent.new(new_model)) # plugin.register_summarization_strategy(Strategies::FoldContent.new(new_model))
end # end
end #end
end end
end end
end end

View File

@ -99,14 +99,19 @@ module DiscourseAi
def summarize_single(llm, text, user, opts, &on_partial_blk) def summarize_single(llm, text, user, opts, &on_partial_blk)
prompt = summarization_prompt(text, opts) prompt = summarization_prompt(text, opts)
llm.generate(prompt, user: user, &on_partial_blk) llm.generate(prompt, user: user, feature_name: "summarize", &on_partial_blk)
end end
def summarize_in_chunks(llm, chunks, user, opts) def summarize_in_chunks(llm, chunks, user, opts)
chunks.map do |chunk| chunks.map do |chunk|
prompt = summarization_prompt(chunk[:summary], opts) prompt = summarization_prompt(chunk[:summary], opts)
chunk[:summary] = llm.generate(prompt, user: user, max_tokens: 300) chunk[:summary] = llm.generate(
prompt,
user: user,
max_tokens: 300,
feature_name: "summarize",
)
chunk chunk
end end
end end

View File

@ -268,7 +268,9 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
).to_return(status: 200, body: body) ).to_return(status: 200, body: body)
result = +"" result = +""
llm.generate(prompt, user: Discourse.system_user) { |partial, cancel| result << partial } llm.generate(prompt, user: Discourse.system_user, feature_name: "testing") do |partial, cancel|
result << partial
end
expect(result).to eq("Hello!") expect(result).to eq("Hello!")
@ -285,6 +287,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic) expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
expect(log.request_tokens).to eq(25) expect(log.request_tokens).to eq(25)
expect(log.response_tokens).to eq(15) expect(log.response_tokens).to eq(15)
expect(log.feature_name).to eq("testing")
end end
it "can return multiple function calls" do it "can return multiple function calls" do

View File

@ -135,7 +135,10 @@ class OpenAiMock < EndpointMock
.default_options .default_options
.merge(messages: prompt) .merge(messages: prompt)
.tap do |b| .tap do |b|
b[:stream] = true if stream if stream
b[:stream] = true
b[:stream_options] = { include_usage: true }
end
b[:tools] = [tool_payload] if tool_call b[:tools] = [tool_payload] if tool_call
end end
.to_json .to_json
@ -431,6 +434,36 @@ TEXT
expect(content).to eq(expected) expect(content).to eq(expected)
end end
it "uses proper token accounting" do
response = <<~TEXT.strip
data: {"id":"chatcmpl-9OZidiHncpBhhNMcqCus9XiJ3TkqR","object":"chat.completion.chunk","created":1715644203,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_729ea513f7","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":null}|
data: {"id":"chatcmpl-9OZidiHncpBhhNMcqCus9XiJ3TkqR","object":"chat.completion.chunk","created":1715644203,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_729ea513f7","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null}|
data: {"id":"chatcmpl-9OZidiHncpBhhNMcqCus9XiJ3TkqR","object":"chat.completion.chunk","created":1715644203,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_729ea513f7","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null}|
data: {"id":"chatcmpl-9OZidiHncpBhhNMcqCus9XiJ3TkqR","object":"chat.completion.chunk","created":1715644203,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_729ea513f7","choices":[],"usage":{"prompt_tokens":20,"completion_tokens":9,"total_tokens":29}}|
data: [DONE]
TEXT
chunks = response.split("|")
open_ai_mock.with_chunk_array_support do
open_ai_mock.stub_raw(chunks)
partials = []
dialect = compliance.dialect(prompt: compliance.generic_prompt)
endpoint.perform_completion!(dialect, user) { |partial| partials << partial }
expect(partials).to eq(["Hello"])
log = AiApiAuditLog.order("id desc").first
expect(log.request_tokens).to eq(20)
expect(log.response_tokens).to eq(9)
end
end
it "properly handles spaces in tools payload" do it "properly handles spaces in tools payload" do
raw_data = <<~TEXT.strip raw_data = <<~TEXT.strip
data: {"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"func_id","type":"function","function":{"name":"go|ogle","arg|uments":""}}]}}]} data: {"choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"func_id","type":"function","function":{"name":"go|ogle","arg|uments":""}}]}}]}