DEV: Add `DISCOURSE_DUMP_BACKTRACES_ON_UNICORN_WORKER_TIMEOUT` env (#27199)
This commit adds a `DISCOURSE_DUMP_BACKTRACES_ON_UNICORN_WORKER_TIMEOUT` environment that will allow us to dump all backtraces for all threads of a Unicorn worker 2 seconds before it times out. In development, backtraces are dumped to `STDOUT` and in production we will dump it to `unicorn.stdout.log`. We want to dump all the backtraces to make it easier to identify the cause of a Unicorn worker timing out.
This commit is contained in:
parent
3a91a92563
commit
6cafe59c76
|
@ -268,4 +268,9 @@ end
|
||||||
after_fork do |server, worker|
|
after_fork do |server, worker|
|
||||||
DiscourseEvent.trigger(:web_fork_started)
|
DiscourseEvent.trigger(:web_fork_started)
|
||||||
Discourse.after_fork
|
Discourse.after_fork
|
||||||
|
|
||||||
|
Signal.trap("USR2") { puts <<~MSG }
|
||||||
|
[#{Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S.%6N")} ##{Process.pid}] Received USR2 signal, dumping backtrace for all threads
|
||||||
|
#{Thread.list.map { |t| "#{t.backtrace&.join("\n")}" }.join("\n\n")}
|
||||||
|
MSG
|
||||||
end
|
end
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
if ENV["DISCOURSE_DUMP_BACKTRACES_ON_UNICORN_WORKER_TIMEOUT"] && defined?(Unicorn::HttpServer)
|
||||||
|
module UnicornHTTPServerPatch
|
||||||
|
# Original source: https://github.com/defunkt/unicorn/blob/6c9c442fb6aa12fd871237bc2bb5aec56c5b3538/lib/unicorn/http_server.rb#L477-L496
|
||||||
|
def murder_lazy_workers
|
||||||
|
next_sleep = @timeout - 1
|
||||||
|
now = time_now.to_i
|
||||||
|
@workers.dup.each_pair do |wpid, worker|
|
||||||
|
tick = worker.tick
|
||||||
|
0 == tick and next # skip workers that haven't processed any clients
|
||||||
|
diff = now - tick
|
||||||
|
tmp = @timeout - diff
|
||||||
|
|
||||||
|
# START MONKEY PATCH
|
||||||
|
if tmp < 2
|
||||||
|
logger.error "worker=#{worker.nr} PID:#{wpid} running too long " \
|
||||||
|
"(#{diff}s), sending USR2 to dump thread backtraces"
|
||||||
|
kill_worker(:USR2, wpid)
|
||||||
|
end
|
||||||
|
# END MONKEY PATCH
|
||||||
|
|
||||||
|
if tmp >= 0
|
||||||
|
next_sleep > tmp and next_sleep = tmp
|
||||||
|
next
|
||||||
|
end
|
||||||
|
next_sleep = 0
|
||||||
|
logger.error "worker=#{worker.nr} PID:#{wpid} timeout " \
|
||||||
|
"(#{diff}s > #{@timeout}s), killing"
|
||||||
|
|
||||||
|
kill_worker(:KILL, wpid) # take no prisoners for timeout violations
|
||||||
|
end
|
||||||
|
next_sleep <= 0 ? 1 : next_sleep
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
Unicorn::HttpServer.prepend(UnicornHTTPServerPatch)
|
||||||
|
end
|
Loading…
Reference in New Issue