DEV: Parallel scheduled admin checks (#24190)
This PR does some preparatory refactoring of scheduled admin checks in order for us to be able to do custom retry strategies for some of them. Instead of running all checks in sequence inside a single, scheduled job, the scheduled job spawns one new job per check. In order to be concurrency-safe, we need to change the existing Redis data structure from a string (of serialized JSON) to a list of strings (of serialized JSON).
This commit is contained in:
parent
4346512f88
commit
47e58357b6
|
@ -0,0 +1,15 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Jobs
|
||||
# This job runs a singular scheduled admin check. It is scheduled by
|
||||
# the ProblemChecks (plural) scheduled job.
|
||||
class ProblemCheck < ::Jobs::Base
|
||||
sidekiq_options retry: false
|
||||
|
||||
def execute(args)
|
||||
identifier = args[:check_identifier]
|
||||
|
||||
AdminDashboardData.execute_scheduled_check(identifier.to_sym)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -5,6 +5,8 @@ module Jobs
|
|||
# on a regular basis. To add a problem check for this scheduled job run
|
||||
# call AdminDashboardData.add_scheduled_problem_check
|
||||
class ProblemChecks < ::Jobs::Scheduled
|
||||
sidekiq_options retry: false
|
||||
|
||||
every 10.minutes
|
||||
|
||||
def execute(_args)
|
||||
|
|
|
@ -35,7 +35,7 @@ class AdminDashboardData
|
|||
GLOBAL_REPORTS ||= []
|
||||
|
||||
PROBLEM_MESSAGE_PREFIX = "admin-problem:"
|
||||
SCHEDULED_PROBLEM_STORAGE_KEY = "admin-found-scheduled-problems"
|
||||
SCHEDULED_PROBLEM_STORAGE_KEY = "admin-found-scheduled-problems-list"
|
||||
|
||||
def initialize(opts = {})
|
||||
@opts = opts
|
||||
|
@ -89,12 +89,11 @@ class AdminDashboardData
|
|||
if problem.identifier.present?
|
||||
return if problems.find { |p| p.identifier == problem.identifier }
|
||||
end
|
||||
problems << problem
|
||||
set_found_scheduled_check_problems(problems)
|
||||
set_found_scheduled_check_problem(problem)
|
||||
end
|
||||
|
||||
def self.set_found_scheduled_check_problems(problems)
|
||||
Discourse.redis.setex(SCHEDULED_PROBLEM_STORAGE_KEY, 300, JSON.dump(problems.map(&:to_h)))
|
||||
def self.set_found_scheduled_check_problem(problem)
|
||||
Discourse.redis.rpush(SCHEDULED_PROBLEM_STORAGE_KEY, JSON.dump(problem.to_h))
|
||||
end
|
||||
|
||||
def self.clear_found_scheduled_check_problems
|
||||
|
@ -103,21 +102,25 @@ class AdminDashboardData
|
|||
|
||||
def self.clear_found_problem(identifier)
|
||||
problems = load_found_scheduled_check_problems
|
||||
problems.reject! { |p| p.identifier == identifier }
|
||||
set_found_scheduled_check_problems(problems)
|
||||
problem = problems.find { |p| p.identifier == identifier }
|
||||
Discourse.redis.lrem(SCHEDULED_PROBLEM_STORAGE_KEY, 1, JSON.dump(problem.to_h))
|
||||
end
|
||||
|
||||
def self.load_found_scheduled_check_problems
|
||||
found_problems_json = Discourse.redis.get(SCHEDULED_PROBLEM_STORAGE_KEY)
|
||||
return [] if found_problems_json.blank?
|
||||
begin
|
||||
JSON.parse(found_problems_json).map { |problem| Problem.from_h(problem) }
|
||||
rescue JSON::ParserError => err
|
||||
Discourse.warn_exception(
|
||||
err,
|
||||
message: "Error parsing found problem JSON in admin dashboard: #{found_problems_json}",
|
||||
)
|
||||
[]
|
||||
found_problems = Discourse.redis.lrange(SCHEDULED_PROBLEM_STORAGE_KEY, 0, -1)
|
||||
|
||||
return [] if found_problems.blank?
|
||||
|
||||
found_problems.filter_map do |problem|
|
||||
begin
|
||||
Problem.from_h(JSON.parse(problem))
|
||||
rescue JSON::ParserError => err
|
||||
Discourse.warn_exception(
|
||||
err,
|
||||
message: "Error parsing found problem JSON in admin dashboard: #{problem}",
|
||||
)
|
||||
nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -145,28 +148,29 @@ class AdminDashboardData
|
|||
end
|
||||
|
||||
def self.execute_scheduled_checks
|
||||
found_problems = []
|
||||
problem_scheduled_check_blocks.each do |check_identifier, blk|
|
||||
problems = nil
|
||||
problem_scheduled_check_blocks.keys.each do |check_identifier|
|
||||
Jobs.enqueue(:problem_check, check_identifier: check_identifier.to_s)
|
||||
end
|
||||
end
|
||||
|
||||
begin
|
||||
problems = instance_exec(&blk)
|
||||
rescue StandardError => err
|
||||
Discourse.warn_exception(
|
||||
err,
|
||||
message: "A scheduled admin dashboard problem check (#{check_identifier}) errored.",
|
||||
)
|
||||
# we don't want to hold up other checks because this one errored
|
||||
next
|
||||
def self.execute_scheduled_check(identifier)
|
||||
check = problem_scheduled_check_blocks[identifier]
|
||||
|
||||
problems = instance_exec(&check)
|
||||
|
||||
Array
|
||||
.wrap(problems)
|
||||
.compact
|
||||
.each do |problem|
|
||||
next if !problem.is_a?(Problem)
|
||||
|
||||
add_found_scheduled_check_problem(problem)
|
||||
end
|
||||
|
||||
found_problems += Array.wrap(problems)
|
||||
end
|
||||
|
||||
found_problems.compact.each do |problem|
|
||||
next if !problem.is_a?(Problem)
|
||||
add_found_scheduled_check_problem(problem)
|
||||
end
|
||||
rescue StandardError => err
|
||||
Discourse.warn_exception(
|
||||
err,
|
||||
message: "A scheduled admin dashboard problem check (#{identifier}) errored.",
|
||||
)
|
||||
end
|
||||
|
||||
##
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe Jobs::ProblemCheck do
|
||||
after do
|
||||
Discourse.redis.flushdb
|
||||
AdminDashboardData.reset_problem_checks
|
||||
end
|
||||
|
||||
it "runs the scheduled problem check that has been added and adds the messages to the load_found_scheduled_check_problems array" do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
||||
AdminDashboardData::Problem.new("big problem")
|
||||
end
|
||||
|
||||
described_class.new.execute(check_identifier: :test_identifier)
|
||||
problems = AdminDashboardData.load_found_scheduled_check_problems
|
||||
expect(problems.count).to eq(1)
|
||||
expect(problems.first).to be_a(AdminDashboardData::Problem)
|
||||
expect(problems.first.to_s).to eq("big problem")
|
||||
end
|
||||
|
||||
it "can handle the problem check returning multiple problems" do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
||||
[
|
||||
AdminDashboardData::Problem.new("big problem"),
|
||||
AdminDashboardData::Problem.new(
|
||||
"yuge problem",
|
||||
priority: "high",
|
||||
identifier: "config_is_a_mess",
|
||||
),
|
||||
]
|
||||
end
|
||||
|
||||
described_class.new.execute(check_identifier: :test_identifier)
|
||||
problems = AdminDashboardData.load_found_scheduled_check_problems
|
||||
expect(problems.map(&:to_s)).to match_array(["big problem", "yuge problem"])
|
||||
end
|
||||
|
||||
it "does not add the same problem twice if the identifier already exists" do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
||||
[
|
||||
AdminDashboardData::Problem.new(
|
||||
"yuge problem",
|
||||
priority: "high",
|
||||
identifier: "config_is_a_mess",
|
||||
),
|
||||
AdminDashboardData::Problem.new(
|
||||
"nasty problem",
|
||||
priority: "high",
|
||||
identifier: "config_is_a_mess",
|
||||
),
|
||||
]
|
||||
end
|
||||
|
||||
described_class.new.execute(check_identifier: :test_identifier)
|
||||
problems = AdminDashboardData.load_found_scheduled_check_problems
|
||||
expect(problems.map(&:to_s)).to match_array(["yuge problem"])
|
||||
end
|
||||
|
||||
it "handles errors from a troublesome check" do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
||||
raise StandardError.new("something went wrong")
|
||||
AdminDashboardData::Problem.new("polling issue")
|
||||
end
|
||||
|
||||
described_class.new.execute(check_identifier: :test_identifier)
|
||||
expect(AdminDashboardData.load_found_scheduled_check_problems.count).to eq(0)
|
||||
end
|
||||
end
|
|
@ -1,63 +1,16 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe Jobs::ProblemChecks do
|
||||
before { Jobs.run_immediately! }
|
||||
|
||||
after do
|
||||
Discourse.redis.flushdb
|
||||
AdminDashboardData.reset_problem_checks
|
||||
end
|
||||
|
||||
it "runs the scheduled problem check that has been added and adds the messages to the load_found_scheduled_check_problems array" do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
||||
AdminDashboardData::Problem.new("big problem")
|
||||
end
|
||||
|
||||
described_class.new.execute(nil)
|
||||
problems = AdminDashboardData.load_found_scheduled_check_problems
|
||||
expect(problems.count).to eq(1)
|
||||
expect(problems.first).to be_a(AdminDashboardData::Problem)
|
||||
expect(problems.first.to_s).to eq("big problem")
|
||||
end
|
||||
|
||||
it "can handle the problem check returning multiple problems" do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
||||
[
|
||||
AdminDashboardData::Problem.new("big problem"),
|
||||
AdminDashboardData::Problem.new(
|
||||
"yuge problem",
|
||||
priority: "high",
|
||||
identifier: "config_is_a_mess",
|
||||
),
|
||||
]
|
||||
end
|
||||
|
||||
described_class.new.execute(nil)
|
||||
problems = AdminDashboardData.load_found_scheduled_check_problems
|
||||
expect(problems.map(&:to_s)).to match_array(["big problem", "yuge problem"])
|
||||
end
|
||||
|
||||
it "does not add the same problem twice if the identifier already exists" do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
||||
[
|
||||
AdminDashboardData::Problem.new(
|
||||
"yuge problem",
|
||||
priority: "high",
|
||||
identifier: "config_is_a_mess",
|
||||
),
|
||||
AdminDashboardData::Problem.new(
|
||||
"nasty problem",
|
||||
priority: "high",
|
||||
identifier: "config_is_a_mess",
|
||||
),
|
||||
]
|
||||
end
|
||||
|
||||
described_class.new.execute(nil)
|
||||
problems = AdminDashboardData.load_found_scheduled_check_problems
|
||||
expect(problems.map(&:to_s)).to match_array(["yuge problem"])
|
||||
end
|
||||
|
||||
it "starts with a blank slate every time the checks are run to avoid duplicate problems and to clear no longer firing problems" do
|
||||
problem_should_fire = true
|
||||
AdminDashboardData.reset_problem_checks
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
||||
if problem_should_fire
|
||||
problem_should_fire = false
|
||||
|
@ -70,17 +23,4 @@ RSpec.describe Jobs::ProblemChecks do
|
|||
described_class.new.execute(nil)
|
||||
expect(AdminDashboardData.load_found_scheduled_check_problems.count).to eq(0)
|
||||
end
|
||||
|
||||
it "handles errors from a troublesome check and proceeds with the rest" do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
||||
raise StandardError.new("something went wrong")
|
||||
AdminDashboardData::Problem.new("polling issue")
|
||||
end
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier_2) do
|
||||
AdminDashboardData::Problem.new("yuge problem", priority: "high")
|
||||
end
|
||||
|
||||
described_class.new.execute(nil)
|
||||
expect(AdminDashboardData.load_found_scheduled_check_problems.count).to eq(1)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -48,25 +48,6 @@ RSpec.describe AdminDashboardData do
|
|||
end
|
||||
|
||||
describe "adding scheduled checks" do
|
||||
it "adds the passed block to the scheduled checks" do
|
||||
called = false
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) { called = true }
|
||||
|
||||
AdminDashboardData.execute_scheduled_checks
|
||||
expect(called).to eq(true)
|
||||
end
|
||||
|
||||
it "adds a found problem from a scheduled check" do
|
||||
AdminDashboardData.add_scheduled_problem_check(:test_identifier) do
|
||||
AdminDashboardData::Problem.new("test problem")
|
||||
end
|
||||
|
||||
AdminDashboardData.execute_scheduled_checks
|
||||
problems = AdminDashboardData.load_found_scheduled_check_problems
|
||||
expect(problems.first).to be_a(AdminDashboardData::Problem)
|
||||
expect(problems.first.message).to eq("test problem")
|
||||
end
|
||||
|
||||
it "does not add duplicate problems with the same identifier" do
|
||||
prob1 = AdminDashboardData::Problem.new("test problem", identifier: "test")
|
||||
prob2 = AdminDashboardData::Problem.new("test problem 2", identifier: "test")
|
||||
|
@ -78,7 +59,7 @@ RSpec.describe AdminDashboardData do
|
|||
end
|
||||
|
||||
it "does not error when loading malformed problems saved in redis" do
|
||||
Discourse.redis.set(AdminDashboardData::SCHEDULED_PROBLEM_STORAGE_KEY, "{ 'badjson")
|
||||
Discourse.redis.rpush(AdminDashboardData::SCHEDULED_PROBLEM_STORAGE_KEY, "{ 'badjson")
|
||||
expect(AdminDashboardData.load_found_scheduled_check_problems).to eq([])
|
||||
end
|
||||
|
||||
|
@ -101,6 +82,49 @@ RSpec.describe AdminDashboardData do
|
|||
include_examples "stats cacheable"
|
||||
end
|
||||
|
||||
describe ".execute_scheduled_checks" do
|
||||
let(:blk) { -> {} }
|
||||
|
||||
before { AdminDashboardData.add_scheduled_problem_check(:foo, &blk) }
|
||||
after { AdminDashboardData.reset_problem_checks }
|
||||
|
||||
it do
|
||||
expect_enqueued_with(job: :problem_check, args: { check_identifier: :foo }) do
|
||||
AdminDashboardData.execute_scheduled_checks
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe ".execute_scheduled_check" do
|
||||
context "when problems are found" do
|
||||
let(:blk) { -> { self::Problem.new("Problem") } }
|
||||
|
||||
before do
|
||||
AdminDashboardData.add_scheduled_problem_check(:foo, &blk)
|
||||
AdminDashboardData.expects(:add_found_scheduled_check_problem).once
|
||||
end
|
||||
|
||||
after { AdminDashboardData.reset_problem_checks }
|
||||
|
||||
it do
|
||||
expect(described_class.execute_scheduled_check(:foo)).to all(be_a(described_class::Problem))
|
||||
end
|
||||
end
|
||||
|
||||
context "when check errors out" do
|
||||
let(:blk) { -> { raise StandardError } }
|
||||
|
||||
before do
|
||||
AdminDashboardData.add_scheduled_problem_check(:foo, &blk)
|
||||
Discourse.expects(:warn_exception).once
|
||||
end
|
||||
|
||||
after { AdminDashboardData.reset_problem_checks }
|
||||
|
||||
it { expect(described_class.execute_scheduled_check(:foo)).to eq(nil) }
|
||||
end
|
||||
end
|
||||
|
||||
describe "#problem_message_check" do
|
||||
let(:key) { AdminDashboardData.problem_messages.first }
|
||||
|
||||
|
|
Loading…
Reference in New Issue