FEATURE: Add periodic problem checks for each LLM in use (#1020)

This feature adds a periodic problem check which periodically checks for issues with LLMs that are in use. Periodically, we will run a test to see if the in use LLMs are still operational. If it is not, the LLM with the problem is surfaced to the admin so they can easily go and update the configuration.
This commit is contained in:
Keegan George 2024-12-17 08:00:05 +09:00 committed by GitHub
parent 24b107881a
commit 90ce942108
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 136 additions and 0 deletions

View File

@ -13,6 +13,11 @@ class LlmModel < ActiveRecord::Base
validates_presence_of :name, :api_key
validates :max_prompt_tokens, numericality: { greater_than: 0 }
validate :required_provider_params
scope :in_use,
-> do
model_ids = DiscourseAi::Configuration::LlmEnumerator.global_usage.keys
where(id: model_ids)
end
def self.provider_params
{

View File

@ -0,0 +1,58 @@
# frozen_string_literal: true
class ProblemCheck::AiLlmStatus < ProblemCheck
self.priority = "high"
self.perform_every = 6.hours
def call
llm_errors
end
def base_path
Discourse.base_path
end
private
def llm_errors
return [] if !SiteSetting.discourse_ai_enabled
LlmModel.in_use.find_each.filter_map do |model|
try_validate(model) { validator.run_test(model) }
end
end
def try_validate(model, &blk)
begin
blk.call
nil
rescue => e
error_message = parse_error_message(e.message)
message =
"#{I18n.t("dashboard.problem.ai_llm_status", { base_path: base_path, model_name: model.display_name, model_id: model.id })}"
Problem.new(
message,
priority: "high",
identifier: "ai_llm_status",
target: model.id,
details: {
model_id: model.id,
model_name: model.display_name,
error: error_message,
},
)
end
end
def validator
@validator ||= DiscourseAi::Configuration::LlmValidator.new
end
def parse_error_message(message)
begin
JSON.parse(message)["message"]
rescue JSON::ParserError
message.to_s
end
end
end

View File

@ -453,3 +453,6 @@ en:
no_default_llm: The persona must have a default_llm defined.
user_not_allowed: The user is not allowed to participate in the topic.
prompt_message_length: The message %{idx} is over the 1000 character limit.
dashboard:
problem:
ai_llm_status: "The LLM model: %{model_name} is encountering issues. Please check the <a href='%{base_path}/admin/plugins/discourse-ai/ai-llms/%{model_id}'>model's configuration page</a>."

View File

@ -75,6 +75,8 @@ after_initialize do
DiscourseAi::AiModeration::EntryPoint.new,
].each { |a_module| a_module.inject_into(self) }
register_problem_check ProblemCheck::AiLlmStatus
register_reviewable_type ReviewableAiChatMessage
register_reviewable_type ReviewableAiPost

View File

@ -0,0 +1,68 @@
# frozen_string_literal: true
require "rails_helper"
RSpec.describe ProblemCheck::AiLlmStatus do
subject(:check) { described_class.new }
fab!(:llm_model)
let(:post_url) { "https://api.openai.com/v1/chat/completions" }
let(:success_response) do
{
model: "gpt-4-turbo",
usage: {
max_prompt_tokens: 131_072,
},
choices: [
{ message: { role: "assistant", content: "test" }, finish_reason: "stop", index: 0 },
],
}.to_json
end
let(:error_response) do
{ message: "API key error! Please check you have supplied the correct API key." }.to_json
end
before do
stub_request(:post, post_url).to_return(status: 200, body: success_response, headers: {})
SiteSetting.ai_summarization_model = "custom:#{llm_model.id}"
SiteSetting.ai_summarization_enabled = true
end
describe "#call" do
it "does nothing if discourse-ai plugin disabled" do
SiteSetting.discourse_ai_enabled = false
expect(check).to be_chill_about_it
end
context "with discourse-ai plugin enabled for the site" do
before { SiteSetting.discourse_ai_enabled = true }
it "returns a problem with an LLM model" do
stub_request(:post, post_url).to_return(status: 403, body: error_response, headers: {})
message =
"#{I18n.t("dashboard.problem.ai_llm_status", { base_path: Discourse.base_path, model_name: llm_model.display_name, model_id: llm_model.id })}"
expect(described_class.new.call).to contain_exactly(
have_attributes(
identifier: "ai_llm_status",
target: llm_model.id,
priority: "high",
message: message,
details: {
model_id: llm_model.id,
model_name: llm_model.display_name,
error: JSON.parse(error_response)["message"],
},
),
)
end
it "does not return a problem if the LLM models are working" do
stub_request(:post, post_url).to_return(status: 200, body: success_response, headers: {})
expect(check).to be_chill_about_it
end
end
end
end