From 90ce9421082e53af844530dedf85f3dd7e60d5ba Mon Sep 17 00:00:00 2001
From: Keegan George <kgeorge13@gmail.com>
Date: Tue, 17 Dec 2024 08:00:05 +0900
Subject: [PATCH] FEATURE: Add periodic problem checks for each LLM in use
 (#1020)

This feature adds a periodic problem check which periodically checks for issues with LLMs that are in use. Periodically, we will run a test to see if the in use LLMs are still operational. If it is not, the LLM with the problem is surfaced to the admin so they can easily go and update the configuration.
---
 app/models/llm_model.rb                       |  5 ++
 app/services/problem_check/ai_llm_status.rb   | 58 ++++++++++++++++
 config/locales/server.en.yml                  |  3 +
 plugin.rb                                     |  2 +
 .../problem_check/ai_llm_status_spec.rb       | 68 +++++++++++++++++++
 5 files changed, 136 insertions(+)
 create mode 100644 app/services/problem_check/ai_llm_status.rb
 create mode 100644 spec/services/problem_check/ai_llm_status_spec.rb
diff --git a/app/models/llm_model.rb b/app/models/llm_model.rb
index a78f876c..9a0ad7eb 100644
--- a/app/models/llm_model.rb
+++ b/app/models/llm_model.rb
@@ -13,6 +13,11 @@ class LlmModel < ActiveRecord::Base
   validates_presence_of :name, :api_key
   validates :max_prompt_tokens, numericality: { greater_than: 0 }
   validate :required_provider_params
+  scope :in_use,
+        -> do
+          model_ids = DiscourseAi::Configuration::LlmEnumerator.global_usage.keys
+          where(id: model_ids)
+        end
 
   def self.provider_params
     {
diff --git a/app/services/problem_check/ai_llm_status.rb b/app/services/problem_check/ai_llm_status.rb
new file mode 100644
index 00000000..f0f01cc4
--- /dev/null
+++ b/app/services/problem_check/ai_llm_status.rb
@@ -0,0 +1,58 @@
+# frozen_string_literal: true
+
+class ProblemCheck::AiLlmStatus < ProblemCheck
+  self.priority = "high"
+  self.perform_every = 6.hours
+
+  def call
+    llm_errors
+  end
+
+  def base_path
+    Discourse.base_path
+  end
+
+  private
+
+  def llm_errors
+    return [] if !SiteSetting.discourse_ai_enabled
+    LlmModel.in_use.find_each.filter_map do |model|
+      try_validate(model) { validator.run_test(model) }
+    end
+  end
+
+  def try_validate(model, &blk)
+    begin
+      blk.call
+      nil
+    rescue => e
+      error_message = parse_error_message(e.message)
+      message =
+        "#{I18n.t("dashboard.problem.ai_llm_status", { base_path: base_path, model_name: model.display_name, model_id: model.id })}"
+
+      Problem.new(
+        message,
+        priority: "high",
+        identifier: "ai_llm_status",
+        target: model.id,
+        details: {
+          model_id: model.id,
+          model_name: model.display_name,
+          error: error_message,
+        },
+      )
+    end
+  end
+
+  def validator
+    @validator ||= DiscourseAi::Configuration::LlmValidator.new
+  end
+
+  def parse_error_message(message)
+    begin
+      JSON.parse(message)["message"]
+    rescue JSON::ParserError
+      message.to_s
+    end
+  end
+end
diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml
index f5dc0d3f..df5ef2da 100644
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@@ -453,3 +453,6 @@ en:
       no_default_llm: The persona must have a default_llm defined.
       user_not_allowed: The user is not allowed to participate in the topic.
       prompt_message_length: The message %{idx} is over the 1000 character limit.
+  dashboard:
+    problem:
+      ai_llm_status: "The LLM model: %{model_name} is encountering issues. Please check the <a href='%{base_path}/admin/plugins/discourse-ai/ai-llms/%{model_id}'>model's configuration page</a>."
diff --git a/plugin.rb b/plugin.rb
index aac3eee3..ea3c719a 100644
--- a/plugin.rb
+++ b/plugin.rb
@@ -75,6 +75,8 @@ after_initialize do
     DiscourseAi::AiModeration::EntryPoint.new,
   ].each { |a_module| a_module.inject_into(self) }
 
+  register_problem_check ProblemCheck::AiLlmStatus
+
   register_reviewable_type ReviewableAiChatMessage
   register_reviewable_type ReviewableAiPost
 
diff --git a/spec/services/problem_check/ai_llm_status_spec.rb b/spec/services/problem_check/ai_llm_status_spec.rb
new file mode 100644
index 00000000..e0f01a81
--- /dev/null
+++ b/spec/services/problem_check/ai_llm_status_spec.rb
@@ -0,0 +1,68 @@
+# frozen_string_literal: true
+
+require "rails_helper"
+
+RSpec.describe ProblemCheck::AiLlmStatus do
+  subject(:check) { described_class.new }
+
+  fab!(:llm_model)
+
+  let(:post_url) { "https://api.openai.com/v1/chat/completions" }
+  let(:success_response) do
+    {
+      model: "gpt-4-turbo",
+      usage: {
+        max_prompt_tokens: 131_072,
+      },
+      choices: [
+        { message: { role: "assistant", content: "test" }, finish_reason: "stop", index: 0 },
+      ],
+    }.to_json
+  end
+
+  let(:error_response) do
+    { message: "API key error! Please check you have supplied the correct API key." }.to_json
+  end
+
+  before do
+    stub_request(:post, post_url).to_return(status: 200, body: success_response, headers: {})
+    SiteSetting.ai_summarization_model = "custom:#{llm_model.id}"
+    SiteSetting.ai_summarization_enabled = true
+  end
+
+  describe "#call" do
+    it "does nothing if discourse-ai plugin disabled" do
+      SiteSetting.discourse_ai_enabled = false
+      expect(check).to be_chill_about_it
+    end
+
+    context "with discourse-ai plugin enabled for the site" do
+      before { SiteSetting.discourse_ai_enabled = true }
+
+      it "returns a problem with an LLM model" do
+        stub_request(:post, post_url).to_return(status: 403, body: error_response, headers: {})
+        message =
+          "#{I18n.t("dashboard.problem.ai_llm_status", { base_path: Discourse.base_path, model_name: llm_model.display_name, model_id: llm_model.id })}"
+
+        expect(described_class.new.call).to contain_exactly(
+          have_attributes(
+            identifier: "ai_llm_status",
+            target: llm_model.id,
+            priority: "high",
+            message: message,
+            details: {
+              model_id: llm_model.id,
+              model_name: llm_model.display_name,
+              error: JSON.parse(error_response)["message"],
+            },
+          ),
+        )
+      end
+
+      it "does not return a problem if the LLM models are working" do
+        stub_request(:post, post_url).to_return(status: 200, body: success_response, headers: {})
+        expect(check).to be_chill_about_it
+      end
+    end
+  end
+end