FIX: properly log all internal job failures (#17805)
Our internal implementation of #perform on jobs performs remapping. This happens cause we do "exception aggregation". Scheduled jobs run on every site in the multisite cluster, and we report one error per site that failed. During this aggregation we reshape the context from the original object shape returned by mini_scheduler The new integration test ensures this interface will remain stable even if decoupled parts of the code change shapes. Co-authored-by: Alan Guo Xiang Tan <gxtan1990@gmail.com>
This commit is contained in:
parent
0df1c4eab2
commit
4967541275
|
@ -177,6 +177,7 @@ module Discourse
|
||||||
|
|
||||||
job = context[:job]
|
job = context[:job]
|
||||||
|
|
||||||
|
# mini_scheduler direct reporting
|
||||||
if Hash === job
|
if Hash === job
|
||||||
job_class = job["class"]
|
job_class = job["class"]
|
||||||
if job_class
|
if job_class
|
||||||
|
@ -184,6 +185,11 @@ module Discourse
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# internal reporting
|
||||||
|
if job.class == Class && ::Jobs::Base > job
|
||||||
|
job_exception_stats[job] += 1
|
||||||
|
end
|
||||||
|
|
||||||
cm = RailsMultisite::ConnectionManagement
|
cm = RailsMultisite::ConnectionManagement
|
||||||
parent_logger.handle_exception(ex, {
|
parent_logger.handle_exception(ex, {
|
||||||
current_db: cm.current_db,
|
current_db: cm.current_db,
|
||||||
|
|
|
@ -10,12 +10,15 @@ RSpec.describe ::Jobs::Base do
|
||||||
end
|
end
|
||||||
|
|
||||||
class BadJob < ::Jobs::Base
|
class BadJob < ::Jobs::Base
|
||||||
|
class BadJobError < StandardError
|
||||||
|
end
|
||||||
|
|
||||||
attr_accessor :fail_count
|
attr_accessor :fail_count
|
||||||
|
|
||||||
def execute(args)
|
def execute(args)
|
||||||
@fail_count ||= 0
|
@fail_count ||= 0
|
||||||
@fail_count += 1
|
@fail_count += 1
|
||||||
raise StandardError
|
raise BadJobError
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -35,6 +38,29 @@ RSpec.describe ::Jobs::Base do
|
||||||
expect(bad.fail_count).to eq(3)
|
expect(bad.fail_count).to eq(3)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
describe '#perform' do
|
||||||
|
context 'when a job raises an error' do
|
||||||
|
before do
|
||||||
|
Discourse.reset_job_exception_stats!
|
||||||
|
end
|
||||||
|
|
||||||
|
after do
|
||||||
|
Discourse.reset_job_exception_stats!
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'collects stats for failing jobs in Discourse.job_exception_stats' do
|
||||||
|
bad = BadJob.new
|
||||||
|
3.times do
|
||||||
|
# During test env handle_job_exception errors out
|
||||||
|
# in production this is suppressed
|
||||||
|
expect { bad.perform({}) }.to raise_error(BadJob::BadJobError)
|
||||||
|
end
|
||||||
|
|
||||||
|
expect(Discourse.job_exception_stats).to eq({ BadJob => 3 })
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
it 'delegates the process call to execute' do
|
it 'delegates the process call to execute' do
|
||||||
::Jobs::Base.any_instance.expects(:execute).with('hello' => 'world')
|
::Jobs::Base.any_instance.expects(:execute).with('hello' => 'world')
|
||||||
::Jobs::Base.new.perform('hello' => 'world', 'sync_exec' => true)
|
::Jobs::Base.new.perform('hello' => 'world', 'sync_exec' => true)
|
||||||
|
|
Loading…
Reference in New Issue