2014-01-30 00:21:38 -05:00
|
|
|
# Initially we used sidetiq, this was a problem:
|
|
|
|
#
|
|
|
|
# 1. No mechnism to add "randomisation" into job execution
|
|
|
|
# 2. No stats about previous runs or failures
|
|
|
|
# 3. Dependency on ice_cube gem causes runaway CPU
|
|
|
|
|
2014-04-11 01:43:33 -04:00
|
|
|
require_dependency 'distributed_mutex'
|
|
|
|
|
2014-01-30 00:21:38 -05:00
|
|
|
module Scheduler
|
|
|
|
class Manager
|
|
|
|
attr_accessor :random_ratio, :redis
|
|
|
|
|
|
|
|
|
|
|
|
class Runner
|
|
|
|
def initialize(manager)
|
2014-02-11 21:32:34 -05:00
|
|
|
@mutex = Mutex.new
|
2014-01-30 00:21:38 -05:00
|
|
|
@queue = Queue.new
|
|
|
|
@manager = manager
|
2014-02-11 21:32:34 -05:00
|
|
|
@reschedule_orphans_thread = Thread.new do
|
2014-01-30 00:21:38 -05:00
|
|
|
while true
|
2014-02-11 21:32:34 -05:00
|
|
|
sleep 1.minute
|
|
|
|
@mutex.synchronize do
|
|
|
|
reschedule_orphans
|
2014-01-30 00:21:38 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2014-02-11 21:32:34 -05:00
|
|
|
@keep_alive_thread = Thread.new do
|
|
|
|
while true
|
|
|
|
@mutex.synchronize do
|
|
|
|
keep_alive
|
|
|
|
end
|
|
|
|
sleep (@manager.keep_alive_duration / 2)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
@thread = Thread.new do
|
|
|
|
while true
|
|
|
|
process_queue
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def keep_alive
|
|
|
|
@manager.keep_alive
|
|
|
|
rescue => ex
|
2014-07-17 18:19:58 -04:00
|
|
|
Discourse.handle_exception(ex, {message: "Scheduling manager keep-alive"})
|
2014-02-11 21:32:34 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def reschedule_orphans
|
|
|
|
@manager.reschedule_orphans!
|
|
|
|
rescue => ex
|
2014-07-17 18:19:58 -04:00
|
|
|
Discourse.handle_exception(ex, {message: "Scheduling manager orphan rescheduler"})
|
2014-02-11 21:32:34 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def process_queue
|
|
|
|
klass = @queue.deq
|
|
|
|
# hack alert, I need to both deq and set @running atomically.
|
|
|
|
@running = true
|
|
|
|
failed = false
|
|
|
|
start = Time.now.to_f
|
|
|
|
info = @mutex.synchronize { @manager.schedule_info(klass) }
|
|
|
|
begin
|
|
|
|
info.prev_result = "RUNNING"
|
|
|
|
@mutex.synchronize { info.write! }
|
|
|
|
klass.new.perform
|
2014-07-17 16:22:46 -04:00
|
|
|
rescue Jobs::HandledExceptionWrapper
|
2014-07-17 18:19:58 -04:00
|
|
|
# Discourse.handle_exception was already called, and we don't have any extra info to give
|
2014-07-17 16:22:46 -04:00
|
|
|
failed = true
|
2014-02-11 21:32:34 -05:00
|
|
|
rescue => e
|
2014-07-17 18:19:58 -04:00
|
|
|
Discourse.handle_exception(e, {message: "Running a scheduled job", job: klass})
|
2014-02-11 21:32:34 -05:00
|
|
|
failed = true
|
|
|
|
end
|
|
|
|
duration = ((Time.now.to_f - start) * 1000).to_i
|
|
|
|
info.prev_duration = duration
|
|
|
|
info.prev_result = failed ? "FAILED" : "OK"
|
|
|
|
info.current_owner = nil
|
|
|
|
attempts(3) do
|
|
|
|
@mutex.synchronize { info.write! }
|
|
|
|
end
|
|
|
|
rescue => ex
|
2014-07-17 18:19:58 -04:00
|
|
|
Discourse.handle_exception(ex, {message: "Processing scheduled job queue"})
|
2014-02-11 21:32:34 -05:00
|
|
|
ensure
|
|
|
|
@running = false
|
2014-01-30 00:21:38 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def stop!
|
2014-02-11 21:32:34 -05:00
|
|
|
@mutex.synchronize do
|
|
|
|
@thread.kill
|
|
|
|
@keep_alive_thread.kill
|
|
|
|
@reschedule_orphans_thread.kill
|
|
|
|
end
|
2014-01-30 00:21:38 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def enq(klass)
|
|
|
|
@queue << klass
|
|
|
|
end
|
|
|
|
|
|
|
|
def wait_till_done
|
2014-02-06 19:21:28 -05:00
|
|
|
while !@queue.empty? && !(@queue.num_waiting > 0)
|
2014-01-30 00:21:38 -05:00
|
|
|
sleep 0.001
|
|
|
|
end
|
2014-02-11 21:32:34 -05:00
|
|
|
# this is a hack, but is only used for test anyway
|
|
|
|
sleep 0.001
|
|
|
|
while @running
|
|
|
|
sleep 0.001
|
|
|
|
end
|
2014-01-30 00:21:38 -05:00
|
|
|
end
|
2014-02-11 21:32:34 -05:00
|
|
|
|
|
|
|
def attempts(n)
|
|
|
|
n.times {
|
|
|
|
begin
|
|
|
|
yield; break
|
|
|
|
rescue
|
|
|
|
sleep Random.rand
|
|
|
|
end
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2014-01-30 00:21:38 -05:00
|
|
|
end
|
|
|
|
|
2014-02-05 18:14:41 -05:00
|
|
|
def self.without_runner(redis=nil)
|
|
|
|
self.new(redis, true)
|
|
|
|
end
|
|
|
|
|
|
|
|
def initialize(redis = nil, skip_runner = false)
|
2014-01-30 00:21:38 -05:00
|
|
|
@redis = $redis || redis
|
|
|
|
@random_ratio = 0.1
|
2014-02-05 18:14:41 -05:00
|
|
|
unless skip_runner
|
|
|
|
@runner = Runner.new(self)
|
|
|
|
self.class.current = self
|
|
|
|
end
|
2014-01-30 00:21:38 -05:00
|
|
|
@manager_id = SecureRandom.hex
|
|
|
|
end
|
|
|
|
|
2014-02-05 18:14:41 -05:00
|
|
|
def self.current
|
|
|
|
@current
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.current=(manager)
|
|
|
|
@current = manager
|
|
|
|
end
|
|
|
|
|
2014-01-30 00:21:38 -05:00
|
|
|
def schedule_info(klass)
|
|
|
|
ScheduleInfo.new(klass, self)
|
|
|
|
end
|
|
|
|
|
|
|
|
def next_run(klass)
|
|
|
|
schedule_info(klass).next_run
|
|
|
|
end
|
|
|
|
|
|
|
|
def ensure_schedule!(klass)
|
|
|
|
lock do
|
|
|
|
schedule_info(klass).schedule!
|
|
|
|
end
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
def remove(klass)
|
|
|
|
lock do
|
|
|
|
schedule_info(klass).del!
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2014-02-11 21:32:34 -05:00
|
|
|
def reschedule_orphans!
|
|
|
|
lock do
|
|
|
|
redis.zrange(Manager.queue_key, 0, -1).each do |key|
|
|
|
|
klass = get_klass(key)
|
|
|
|
next unless klass
|
|
|
|
info = schedule_info(klass)
|
|
|
|
|
|
|
|
if ['QUEUED', 'RUNNING'].include?(info.prev_result) &&
|
|
|
|
(info.current_owner.blank? || !redis.get(info.current_owner))
|
|
|
|
info.prev_result = 'ORPHAN'
|
|
|
|
info.next_run = Time.now.to_i
|
|
|
|
info.write!
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def get_klass(name)
|
|
|
|
name.constantize
|
|
|
|
rescue NameError
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
|
2014-01-30 00:21:38 -05:00
|
|
|
def tick
|
|
|
|
lock do
|
|
|
|
(key, due), _ = redis.zrange Manager.queue_key, 0, 0, withscores: true
|
2014-02-05 18:14:41 -05:00
|
|
|
return unless key
|
2014-01-30 00:21:38 -05:00
|
|
|
if due.to_i <= Time.now.to_i
|
2014-02-11 21:32:34 -05:00
|
|
|
klass = get_klass(key)
|
2014-03-03 15:36:50 -05:00
|
|
|
unless klass
|
|
|
|
# corrupt key, nuke it (renamed job or something)
|
|
|
|
redis.zrem Manager.queue_key, key
|
|
|
|
return
|
|
|
|
end
|
2014-01-30 00:21:38 -05:00
|
|
|
info = schedule_info(klass)
|
|
|
|
info.prev_run = Time.now.to_i
|
2014-02-05 18:14:41 -05:00
|
|
|
info.prev_result = "QUEUED"
|
|
|
|
info.prev_duration = -1
|
2014-01-30 00:21:38 -05:00
|
|
|
info.next_run = nil
|
2014-02-11 21:32:34 -05:00
|
|
|
info.current_owner = identity_key
|
2014-01-30 00:21:38 -05:00
|
|
|
info.schedule!
|
|
|
|
@runner.enq(klass)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def blocking_tick
|
|
|
|
tick
|
|
|
|
@runner.wait_till_done
|
|
|
|
end
|
|
|
|
|
|
|
|
def stop!
|
|
|
|
@runner.stop!
|
2014-02-05 18:14:41 -05:00
|
|
|
self.class.current = nil
|
2014-01-30 00:21:38 -05:00
|
|
|
end
|
|
|
|
|
2014-02-11 21:32:34 -05:00
|
|
|
def keep_alive_duration
|
|
|
|
60
|
|
|
|
end
|
|
|
|
|
|
|
|
def keep_alive
|
|
|
|
redis.setex identity_key, keep_alive_duration, ""
|
|
|
|
end
|
2014-01-30 00:21:38 -05:00
|
|
|
|
|
|
|
def lock
|
2014-04-11 01:43:33 -04:00
|
|
|
DistributedMutex.new(Manager.lock_key).synchronize do
|
|
|
|
yield
|
2014-01-30 00:21:38 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2014-02-11 21:32:34 -05:00
|
|
|
|
2014-02-05 18:14:41 -05:00
|
|
|
def self.discover_schedules
|
2014-04-17 01:57:17 -04:00
|
|
|
# hack for developemnt reloader is crazytown
|
|
|
|
# multiple classes with same name can be in
|
|
|
|
# object space
|
|
|
|
unique = Set.new
|
2014-02-05 18:14:41 -05:00
|
|
|
schedules = []
|
|
|
|
ObjectSpace.each_object(Scheduler::Schedule) do |schedule|
|
2014-04-17 01:57:17 -04:00
|
|
|
if schedule.scheduled?
|
|
|
|
next if unique.include?(schedule.to_s)
|
|
|
|
schedules << schedule
|
|
|
|
unique << schedule.to_s
|
|
|
|
end
|
2014-02-05 18:14:41 -05:00
|
|
|
end
|
|
|
|
schedules
|
|
|
|
end
|
|
|
|
|
2014-02-11 21:32:34 -05:00
|
|
|
@mutex = Mutex.new
|
|
|
|
def self.seq
|
|
|
|
@mutex.synchronize do
|
|
|
|
@i ||= 0
|
|
|
|
@i += 1
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def identity_key
|
|
|
|
@identity_key ||= "_scheduler_#{`hostname`}:#{Process.pid}:#{self.class.seq}"
|
|
|
|
end
|
|
|
|
|
2014-01-30 00:21:38 -05:00
|
|
|
def self.lock_key
|
|
|
|
"_scheduler_lock_"
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.queue_key
|
|
|
|
"_scheduler_queue_"
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.schedule_key(klass)
|
|
|
|
"_scheduler_#{klass}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|