diff --git a/app/controllers/topics_controller.rb b/app/controllers/topics_controller.rb index 0949e9d484d..83de9366956 100644 --- a/app/controllers/topics_controller.rb +++ b/app/controllers/topics_controller.rb @@ -385,7 +385,7 @@ class TopicsController < ApplicationController user_id = (current_user.id if current_user) track_visit = should_track_visit_to_topic? - Scheduler::Defer.later do + Scheduler::Defer.later "Track Visit" do View.create_for_parent(Topic, topic_id, ip, user_id) if track_visit TopicUser.track_visit! topic_id, user_id diff --git a/app/jobs/base.rb b/app/jobs/base.rb index d75588fd788..db6ed11ef35 100644 --- a/app/jobs/base.rb +++ b/app/jobs/base.rb @@ -55,6 +55,23 @@ module Jobs true end + # Construct an error context object for Discourse.handle_exception + # Subclasses are encouraged to use this! + # + # `opts` is the arguments passed to execute(). + # `code_desc` is a short string describing what the code was doing (optional). + # `extra` is for any other context you logged. + # Note that, when building your `extra`, that :opts, :job, and :code are used by this method, + # and :current_db and :current_hostname are used by handle_exception. + def error_context(opts, code_desc = nil, extra = {}) + ctx = {} + ctx[:opts] = opts + ctx[:job] = self.class + ctx[:code] = code_desc if code_desc + ctx.merge!(extra) if extra != nil + ctx + end + def self.delayed_perform(opts={}) self.new.perform(opts) end @@ -75,6 +92,7 @@ module Jobs end def perform(*args) + total_db_time = 0 ensure_db_instrumented opts = args.extract_options!.with_indifferent_access @@ -88,7 +106,12 @@ module Jobs if opts.has_key?(:current_site_id) && opts[:current_site_id] != RailsMultisite::ConnectionManagement.current_db raise ArgumentError.new("You can't connect to another database when executing a job synchronously.") else - return execute(opts) + begin + retval = execute(opts) + rescue => exc + Discourse.handle_exception(exc, error_context(opts)) + end + return retval end end @@ -100,11 +123,10 @@ module Jobs RailsMultisite::ConnectionManagement.all_dbs end - total_db_time = 0 exceptions = [] dbs.each do |db| begin - thread_exception = nil + thread_exception = {} # NOTE: This looks odd, in fact it looks crazy but there is a reason # A bug in therubyracer means that under certain conditions running in a fiber # can cause the whole v8 context to corrupt so much that it will hang sidekiq @@ -128,9 +150,15 @@ module Jobs begin RailsMultisite::ConnectionManagement.establish_connection(db: db) I18n.locale = SiteSetting.default_locale - execute(opts) + begin + execute(opts) + rescue => e + thread_exception[:ex] = e + end rescue => e - thread_exception = e + thread_exception[:ex] = e + thread_exception[:code] = "establishing database connection to #{db}" + thread_exception[:other] = { problem_db: db } ensure ActiveRecord::Base.connection_handler.clear_active_connections! total_db_time += Instrumenter.stats.duration_ms @@ -138,17 +166,19 @@ module Jobs end t.join - exceptions << thread_exception if thread_exception + exceptions << thread_exception unless thread_exception.empty? end end if exceptions.length > 0 - exceptions[1..-1].each do |exception| - Discourse.handle_exception(exception, opts) + exceptions.each do |exception_hash| + Discourse.handle_exception(exception_hash[:ex], + error_context(opts, exception_hash[:code], exception_hash[:other])) end - raise exceptions[0] + raise HandledExceptionWrapper.new exceptions[0][:ex] end + nil ensure ActiveRecord::Base.connection_handler.clear_active_connections! @db_duration = total_db_time @@ -156,6 +186,14 @@ module Jobs end + class HandledExceptionWrapper < Exception + attr_accessor :wrapped + def initialize(ex) + super("Wrapped #{ex.class}: #{ex.message}") + @wrapped = ex + end + end + class Scheduled < Base extend Scheduler::Schedule end diff --git a/app/jobs/scheduled/periodical_updates.rb b/app/jobs/scheduled/periodical_updates.rb index b43f1eb81da..27aaac4d433 100644 --- a/app/jobs/scheduled/periodical_updates.rb +++ b/app/jobs/scheduled/periodical_updates.rb @@ -29,7 +29,10 @@ module Jobs # Forces rebake of old posts where needed, as long as no system avatars need updating unless UserAvatar.where("last_gravatar_download_attempt IS NULL").limit(1).first - Post.rebake_old(250) + problems = Post.rebake_old(250) + problems.each do |hash| + Discourse.handle_exception(hash[:ex], error_context(args, "Rebaking post id #{hash[:post].id}", post_id: hash[:post].id)) + end end end diff --git a/app/jobs/scheduled/poll_mailbox.rb b/app/jobs/scheduled/poll_mailbox.rb index 564d20c6c58..05c5da4eca5 100644 --- a/app/jobs/scheduled/poll_mailbox.rb +++ b/app/jobs/scheduled/poll_mailbox.rb @@ -13,6 +13,7 @@ module Jobs include Email::BuildEmailHelper def execute(args) + @args = args if SiteSetting.pop3s_polling_enabled? poll_pop3s end @@ -47,7 +48,7 @@ module Jobs client_message = RejectionMailer.send_rejection(message.from, message.body, message.to, message_template) Email::Sender.new(client_message, message_template).send else - Discourse.handle_exception(e, { code: "unknown error for incoming email", mail: mail_string} ) + Discourse.handle_exception(e, error_context(@args, "unknown error when processing incoming email", mail: mail_string)) end ensure mail.delete @@ -70,7 +71,7 @@ module Jobs pop.finish end rescue Net::POPAuthenticationError => e - Discourse.handle_exception(e, { code: "signing in for incoming email" } ) + Discourse.handle_exception(e, error_context(@args, "Signing in to poll incoming email")) end end diff --git a/app/models/post.rb b/app/models/post.rb index 1c5a9f185d4..6f8389d9f24 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -319,14 +319,16 @@ class Post < ActiveRecord::Base end def self.rebake_old(limit) + problems = [] Post.where('baked_version IS NULL OR baked_version < ?', BAKED_VERSION) .limit(limit).each do |p| begin p.rebake! rescue => e - Discourse.handle_exception(e) + problems << {post: p, ex: e} end end + problems end def rebake!(opts={}) diff --git a/config/initializers/sidekiq.rb b/config/initializers/sidekiq.rb index 86103246183..df5662f302a 100644 --- a/config/initializers/sidekiq.rb +++ b/config/initializers/sidekiq.rb @@ -33,7 +33,7 @@ if Sidekiq.server? manager.tick rescue => e # the show must go on - Discourse.handle_exception(e) + Discourse.handle_exception(e, {code: "Ticking scheduling manager"}) end sleep 1 end @@ -43,7 +43,7 @@ end Sidekiq.logger.level = Logger::WARN -class SidekiqLogsterReporter +class SidekiqLogsterReporter < Sidekiq::ExceptionHandler::Logger def call(ex, context = {}) # Pass context to Logster fake_env = {} @@ -65,6 +65,7 @@ class SidekiqLogsterReporter end end +Sidekiq.error_handlers.clear Sidekiq.error_handlers << SidekiqLogsterReporter.new diff --git a/lib/auth/default_current_user_provider.rb b/lib/auth/default_current_user_provider.rb index 716dff9adf2..4b71c9682b4 100644 --- a/lib/auth/default_current_user_provider.rb +++ b/lib/auth/default_current_user_provider.rb @@ -34,7 +34,7 @@ class Auth::DefaultCurrentUserProvider if current_user && should_update_last_seen? u = current_user - Scheduler::Defer.later do + Scheduler::Defer.later "Updating Last Seen" do u.update_last_seen! u.update_ip_address!(request.ip) end diff --git a/lib/discourse.rb b/lib/discourse.rb index e8a8b772ee6..5184dacbb41 100644 --- a/lib/discourse.rb +++ b/lib/discourse.rb @@ -9,8 +9,11 @@ module Discourse extend Sidekiq::ExceptionHandler end - # Responsible for exceptions in Sidekiq jobs - not requests! - def self.handle_exception(ex, context = nil, parent_logger = nil) + # Log an exception. + # + # If your code is in a scheduled job, it is recommended to use the error_context() function to pass the context. + # See app/jobs/base.rb for the error_context function. + def self.handle_exception(ex, context = {}, parent_logger = nil) context ||= {} parent_logger ||= SidekiqExceptionHandler diff --git a/lib/scheduler/defer.rb b/lib/scheduler/defer.rb index 5c27c56b53a..d9587302d0a 100644 --- a/lib/scheduler/defer.rb +++ b/lib/scheduler/defer.rb @@ -14,10 +14,10 @@ module Scheduler @async = val end - def later(&blk) + def later(desc = nil, &blk) if @async start_thread unless @thread.alive? - @queue << [RailsMultisite::ConnectionManagement.current_db, blk] + @queue << [RailsMultisite::ConnectionManagement.current_db, blk, desc] else blk.call end @@ -46,11 +46,15 @@ module Scheduler end def do_work - db, job = @queue.deq - RailsMultisite::ConnectionManagement.establish_connection(db: db) - job.call + db, job, desc = @queue.deq + begin + RailsMultisite::ConnectionManagement.establish_connection(db: db) + job.call + rescue => ex + Discourse.handle_exception(ex, {code: "Running deferred code '#{desc}'"}) + end rescue => ex - Discourse.handle_exception(ex) + Discourse.handle_exception(ex, {code: "Processing deferred code queue"}) ensure ActiveRecord::Base.connection_handler.clear_active_connections! end diff --git a/lib/scheduler/manager.rb b/lib/scheduler/manager.rb index 3d48eb101ec..f77a83b5cb5 100644 --- a/lib/scheduler/manager.rb +++ b/lib/scheduler/manager.rb @@ -42,13 +42,13 @@ module Scheduler def keep_alive @manager.keep_alive rescue => ex - Discourse.handle_exception(ex) + Discourse.handle_exception(ex, {code: "Scheduling manager keep-alive"}) end def reschedule_orphans @manager.reschedule_orphans! rescue => ex - Discourse.handle_exception(ex) + Discourse.handle_exception(ex, {code: "Scheduling manager orphan rescheduler"}) end def process_queue @@ -62,8 +62,11 @@ module Scheduler info.prev_result = "RUNNING" @mutex.synchronize { info.write! } klass.new.perform + rescue Jobs::HandledExceptionWrapper + # Discourse.handle_exception was already called + failed = true rescue => e - Discourse.handle_exception(e) + Discourse.handle_exception(e, {code: "Running a scheduled job", job: klass}) failed = true end duration = ((Time.now.to_f - start) * 1000).to_i @@ -74,7 +77,7 @@ module Scheduler @mutex.synchronize { info.write! } end rescue => ex - Discourse.handle_exception(ex) + Discourse.handle_exception(ex, {code: "Processing scheduled job queue"}) ensure @running = false end diff --git a/spec/components/discourse_spec.rb b/spec/components/discourse_spec.rb index c693b4aa50a..819732eb3ad 100644 --- a/spec/components/discourse_spec.rb +++ b/spec/components/discourse_spec.rb @@ -117,22 +117,37 @@ describe Discourse do end context "#handle_exception" do - class TempLogger + + class TempSidekiqLogger < Sidekiq::ExceptionHandler::Logger attr_accessor :exception, :context - def handle_exception(exception, context) - self.exception = exception - self.context = context + def call(ex, ctx) + self.exception = ex + self.context = ctx end end + + let!(:logger) { TempSidekiqLogger.new } + + before do + Sidekiq.error_handlers.clear + Sidekiq.error_handlers << logger + end it "should not fail when called" do - logger = TempLogger.new exception = StandardError.new - Discourse.handle_exception(exception, nil, logger) + Discourse.handle_exception(exception, nil, nil) logger.exception.should == exception logger.context.keys.should == [:current_db, :current_hostname] end + + it "correctly passes extra context" do + exception = StandardError.new + + Discourse.handle_exception(exception, {code: "Doing a test", post_id: 31}, nil) + logger.exception.should == exception + logger.context.keys.sort.should == [:current_db, :current_hostname, :code, :post_id].sort + end end end diff --git a/spec/jobs/jobs_base_spec.rb b/spec/jobs/jobs_base_spec.rb index f6f59d30114..3fceba53e6c 100644 --- a/spec/jobs/jobs_base_spec.rb +++ b/spec/jobs/jobs_base_spec.rb @@ -27,13 +27,13 @@ describe Jobs::Base do end it 'handles errors in multisite' do - RailsMultisite::ConnectionManagement.expects(:all_dbs).returns(['default','default']) - # just stub so logs are not noisy - Discourse.expects(:handle_exception).returns(nil) + RailsMultisite::ConnectionManagement.expects(:all_dbs).returns(['default','default','default']) + # one exception per database + Discourse.expects(:handle_exception).times(3) bad = BadJob.new expect{bad.perform({})}.to raise_error - bad.fail_count.should == 2 + bad.fail_count.should == 3 end it 'delegates the process call to execute' do