DEV: Add `DISCOURSE_DUMP_BACKTRACES_ON_UNICORN_WORKER_TIMEOUT` env (#27199)
This commit adds a `DISCOURSE_DUMP_BACKTRACES_ON_UNICORN_WORKER_TIMEOUT` environment that will allow us to dump all backtraces for all threads of a Unicorn worker 2 seconds before it times out. In development, backtraces are dumped to `STDOUT` and in production we will dump it to `unicorn.stdout.log`. We want to dump all the backtraces to make it easier to identify the cause of a Unicorn worker timing out.
This commit is contained in:
parent
3a91a92563
commit
6cafe59c76
|
@ -268,4 +268,9 @@ end
|
|||
after_fork do |server, worker|
|
||||
DiscourseEvent.trigger(:web_fork_started)
|
||||
Discourse.after_fork
|
||||
|
||||
Signal.trap("USR2") { puts <<~MSG }
|
||||
[#{Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S.%6N")} ##{Process.pid}] Received USR2 signal, dumping backtrace for all threads
|
||||
#{Thread.list.map { |t| "#{t.backtrace&.join("\n")}" }.join("\n\n")}
|
||||
MSG
|
||||
end
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
if ENV["DISCOURSE_DUMP_BACKTRACES_ON_UNICORN_WORKER_TIMEOUT"] && defined?(Unicorn::HttpServer)
|
||||
module UnicornHTTPServerPatch
|
||||
# Original source: https://github.com/defunkt/unicorn/blob/6c9c442fb6aa12fd871237bc2bb5aec56c5b3538/lib/unicorn/http_server.rb#L477-L496
|
||||
def murder_lazy_workers
|
||||
next_sleep = @timeout - 1
|
||||
now = time_now.to_i
|
||||
@workers.dup.each_pair do |wpid, worker|
|
||||
tick = worker.tick
|
||||
0 == tick and next # skip workers that haven't processed any clients
|
||||
diff = now - tick
|
||||
tmp = @timeout - diff
|
||||
|
||||
# START MONKEY PATCH
|
||||
if tmp < 2
|
||||
logger.error "worker=#{worker.nr} PID:#{wpid} running too long " \
|
||||
"(#{diff}s), sending USR2 to dump thread backtraces"
|
||||
kill_worker(:USR2, wpid)
|
||||
end
|
||||
# END MONKEY PATCH
|
||||
|
||||
if tmp >= 0
|
||||
next_sleep > tmp and next_sleep = tmp
|
||||
next
|
||||
end
|
||||
next_sleep = 0
|
||||
logger.error "worker=#{worker.nr} PID:#{wpid} timeout " \
|
||||
"(#{diff}s > #{@timeout}s), killing"
|
||||
|
||||
kill_worker(:KILL, wpid) # take no prisoners for timeout violations
|
||||
end
|
||||
next_sleep <= 0 ? 1 : next_sleep
|
||||
end
|
||||
end
|
||||
|
||||
Unicorn::HttpServer.prepend(UnicornHTTPServerPatch)
|
||||
end
|
Loading…
Reference in New Issue