DEV: Parallel scheduled admin checks (#24190)

This PR does some preparatory refactoring of scheduled admin checks in order for us to be able to do custom retry strategies for some of them.

Instead of running all checks in sequence inside a single, scheduled job, the scheduled job spawns one new job per check.

In order to be concurrency-safe, we need to change the existing Redis data structure from a string (of serialized JSON) to a list of strings (of serialized JSON).
This commit is contained in:
Ted Johansson 2023-11-03 09:05:29 +08:00 committed by GitHub
parent 4346512f88
commit 47e58357b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 173 additions and 120 deletions

View File

@ -0,0 +1,15 @@
# frozen_string_literal: true
module Jobs
# This job runs a singular scheduled admin check. It is scheduled by
# the ProblemChecks (plural) scheduled job.
class ProblemCheck < ::Jobs::Base
sidekiq_options retry: false
def execute(args)
identifier = args[:check_identifier]
AdminDashboardData.execute_scheduled_check(identifier.to_sym)
end
end
end

View File

@ -5,6 +5,8 @@ module Jobs
# on a regular basis. To add a problem check for this scheduled job run
# call AdminDashboardData.add_scheduled_problem_check
class ProblemChecks < ::Jobs::Scheduled
sidekiq_options retry: false
every 10.minutes
def execute(_args)

View File

@ -35,7 +35,7 @@ class AdminDashboardData
GLOBAL_REPORTS ||= []
PROBLEM_MESSAGE_PREFIX = "admin-problem:"
SCHEDULED_PROBLEM_STORAGE_KEY = "admin-found-scheduled-problems"
SCHEDULED_PROBLEM_STORAGE_KEY = "admin-found-scheduled-problems-list"
def initialize(opts = {})
@opts = opts
@ -89,12 +89,11 @@ class AdminDashboardData
if problem.identifier.present?
return if problems.find { |p| p.identifier == problem.identifier }
end
problems << problem
set_found_scheduled_check_problems(problems)
set_found_scheduled_check_problem(problem)
end
def self.set_found_scheduled_check_problems(problems)
Discourse.redis.setex(SCHEDULED_PROBLEM_STORAGE_KEY, 300, JSON.dump(problems.map(&:to_h)))
def self.set_found_scheduled_check_problem(problem)
Discourse.redis.rpush(SCHEDULED_PROBLEM_STORAGE_KEY, JSON.dump(problem.to_h))
end
def self.clear_found_scheduled_check_problems
@ -103,21 +102,25 @@ class AdminDashboardData
def self.clear_found_problem(identifier)
problems = load_found_scheduled_check_problems
problems.reject! { |p| p.identifier == identifier }
set_found_scheduled_check_problems(problems)
problem = problems.find { |p| p.identifier == identifier }
Discourse.redis.lrem(SCHEDULED_PROBLEM_STORAGE_KEY, 1, JSON.dump(problem.to_h))
end
def self.load_found_scheduled_check_problems
found_problems_json = Discourse.redis.get(SCHEDULED_PROBLEM_STORAGE_KEY)
return [] if found_problems_json.blank?
begin
JSON.parse(found_problems_json).map { |problem| Problem.from_h(problem) }
rescue JSON::ParserError => err
Discourse.warn_exception(
err,
message: "Error parsing found problem JSON in admin dashboard: #{found_problems_json}",
)
[]
found_problems = Discourse.redis.lrange(SCHEDULED_PROBLEM_STORAGE_KEY, 0, -1)
return [] if found_problems.blank?
found_problems.filter_map do |problem|
begin
Problem.from_h(JSON.parse(problem))
rescue JSON::ParserError => err
Discourse.warn_exception(
err,
message: "Error parsing found problem JSON in admin dashboard: #{problem}",
)
nil
end
end
end
@ -145,28 +148,29 @@ class AdminDashboardData
end
def self.execute_scheduled_checks
found_problems = []
problem_scheduled_check_blocks.each do |check_identifier, blk|
problems = nil
problem_scheduled_check_blocks.keys.each do |check_identifier|
Jobs.enqueue(:problem_check, check_identifier: check_identifier.to_s)
end
end
begin
problems = instance_exec(&blk)
rescue StandardError => err
Discourse.warn_exception(
err,
message: "A scheduled admin dashboard problem check (#{check_identifier}) errored.",
)
# we don't want to hold up other checks because this one errored
next
def self.execute_scheduled_check(identifier)
check = problem_scheduled_check_blocks[identifier]
problems = instance_exec(&check)
Array
.wrap(problems)
.compact
.each do |problem|
next if !problem.is_a?(Problem)
add_found_scheduled_check_problem(problem)
end
found_problems += Array.wrap(problems)
end
found_problems.compact.each do |problem|
next if !problem.is_a?(Problem)
add_found_scheduled_check_problem(problem)
end
rescue StandardError => err
Discourse.warn_exception(
err,
message: "A scheduled admin dashboard problem check (#{identifier}) errored.",
)
end
##

View File

@ -0,0 +1,68 @@
# frozen_string_literal: true
RSpec.describe Jobs::ProblemCheck do
after do
Discourse.redis.flushdb
AdminDashboardData.reset_problem_checks
end
it "runs the scheduled problem check that has been added and adds the messages to the load_found_scheduled_check_problems array" do
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
AdminDashboardData::Problem.new("big problem")
end
described_class.new.execute(check_identifier: :test_identifier)
problems = AdminDashboardData.load_found_scheduled_check_problems
expect(problems.count).to eq(1)
expect(problems.first).to be_a(AdminDashboardData::Problem)
expect(problems.first.to_s).to eq("big problem")
end
it "can handle the problem check returning multiple problems" do
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
[
AdminDashboardData::Problem.new("big problem"),
AdminDashboardData::Problem.new(
"yuge problem",
priority: "high",
identifier: "config_is_a_mess",
),
]
end
described_class.new.execute(check_identifier: :test_identifier)
problems = AdminDashboardData.load_found_scheduled_check_problems
expect(problems.map(&:to_s)).to match_array(["big problem", "yuge problem"])
end
it "does not add the same problem twice if the identifier already exists" do
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
[
AdminDashboardData::Problem.new(
"yuge problem",
priority: "high",
identifier: "config_is_a_mess",
),
AdminDashboardData::Problem.new(
"nasty problem",
priority: "high",
identifier: "config_is_a_mess",
),
]
end
described_class.new.execute(check_identifier: :test_identifier)
problems = AdminDashboardData.load_found_scheduled_check_problems
expect(problems.map(&:to_s)).to match_array(["yuge problem"])
end
it "handles errors from a troublesome check" do
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
raise StandardError.new("something went wrong")
AdminDashboardData::Problem.new("polling issue")
end
described_class.new.execute(check_identifier: :test_identifier)
expect(AdminDashboardData.load_found_scheduled_check_problems.count).to eq(0)
end
end

View File

@ -1,63 +1,16 @@
# frozen_string_literal: true
RSpec.describe Jobs::ProblemChecks do
before { Jobs.run_immediately! }
after do
Discourse.redis.flushdb
AdminDashboardData.reset_problem_checks
end
it "runs the scheduled problem check that has been added and adds the messages to the load_found_scheduled_check_problems array" do
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
AdminDashboardData::Problem.new("big problem")
end
described_class.new.execute(nil)
problems = AdminDashboardData.load_found_scheduled_check_problems
expect(problems.count).to eq(1)
expect(problems.first).to be_a(AdminDashboardData::Problem)
expect(problems.first.to_s).to eq("big problem")
end
it "can handle the problem check returning multiple problems" do
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
[
AdminDashboardData::Problem.new("big problem"),
AdminDashboardData::Problem.new(
"yuge problem",
priority: "high",
identifier: "config_is_a_mess",
),
]
end
described_class.new.execute(nil)
problems = AdminDashboardData.load_found_scheduled_check_problems
expect(problems.map(&:to_s)).to match_array(["big problem", "yuge problem"])
end
it "does not add the same problem twice if the identifier already exists" do
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
[
AdminDashboardData::Problem.new(
"yuge problem",
priority: "high",
identifier: "config_is_a_mess",
),
AdminDashboardData::Problem.new(
"nasty problem",
priority: "high",
identifier: "config_is_a_mess",
),
]
end
described_class.new.execute(nil)
problems = AdminDashboardData.load_found_scheduled_check_problems
expect(problems.map(&:to_s)).to match_array(["yuge problem"])
end
it "starts with a blank slate every time the checks are run to avoid duplicate problems and to clear no longer firing problems" do
problem_should_fire = true
AdminDashboardData.reset_problem_checks
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
if problem_should_fire
problem_should_fire = false
@ -70,17 +23,4 @@ RSpec.describe Jobs::ProblemChecks do
described_class.new.execute(nil)
expect(AdminDashboardData.load_found_scheduled_check_problems.count).to eq(0)
end
it "handles errors from a troublesome check and proceeds with the rest" do
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
raise StandardError.new("something went wrong")
AdminDashboardData::Problem.new("polling issue")
end
AdminDashboardData.add_scheduled_problem_check(:test_identifier_2) do
AdminDashboardData::Problem.new("yuge problem", priority: "high")
end
described_class.new.execute(nil)
expect(AdminDashboardData.load_found_scheduled_check_problems.count).to eq(1)
end
end

View File

@ -48,25 +48,6 @@ RSpec.describe AdminDashboardData do
end
describe "adding scheduled checks" do
it "adds the passed block to the scheduled checks" do
called = false
AdminDashboardData.add_scheduled_problem_check(:test_identifier) { called = true }
AdminDashboardData.execute_scheduled_checks
expect(called).to eq(true)
end
it "adds a found problem from a scheduled check" do
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
AdminDashboardData::Problem.new("test problem")
end
AdminDashboardData.execute_scheduled_checks
problems = AdminDashboardData.load_found_scheduled_check_problems
expect(problems.first).to be_a(AdminDashboardData::Problem)
expect(problems.first.message).to eq("test problem")
end
it "does not add duplicate problems with the same identifier" do
prob1 = AdminDashboardData::Problem.new("test problem", identifier: "test")
prob2 = AdminDashboardData::Problem.new("test problem 2", identifier: "test")
@ -78,7 +59,7 @@ RSpec.describe AdminDashboardData do
end
it "does not error when loading malformed problems saved in redis" do
Discourse.redis.set(AdminDashboardData::SCHEDULED_PROBLEM_STORAGE_KEY, "{ 'badjson")
Discourse.redis.rpush(AdminDashboardData::SCHEDULED_PROBLEM_STORAGE_KEY, "{ 'badjson")
expect(AdminDashboardData.load_found_scheduled_check_problems).to eq([])
end
@ -101,6 +82,49 @@ RSpec.describe AdminDashboardData do
include_examples "stats cacheable"
end
describe ".execute_scheduled_checks" do
let(:blk) { -> {} }
before { AdminDashboardData.add_scheduled_problem_check(:foo, &blk) }
after { AdminDashboardData.reset_problem_checks }
it do
expect_enqueued_with(job: :problem_check, args: { check_identifier: :foo }) do
AdminDashboardData.execute_scheduled_checks
end
end
end
describe ".execute_scheduled_check" do
context "when problems are found" do
let(:blk) { -> { self::Problem.new("Problem") } }
before do
AdminDashboardData.add_scheduled_problem_check(:foo, &blk)
AdminDashboardData.expects(:add_found_scheduled_check_problem).once
end
after { AdminDashboardData.reset_problem_checks }
it do
expect(described_class.execute_scheduled_check(:foo)).to all(be_a(described_class::Problem))
end
end
context "when check errors out" do
let(:blk) { -> { raise StandardError } }
before do
AdminDashboardData.add_scheduled_problem_check(:foo, &blk)
Discourse.expects(:warn_exception).once
end
after { AdminDashboardData.reset_problem_checks }
it { expect(described_class.execute_scheduled_check(:foo)).to eq(nil) }
end
end
describe "#problem_message_check" do
let(:key) { AdminDashboardData.problem_messages.first }