HBASE-8979 JVMClusterUtil will join forever on a stuck regionserver <--- zombie maker

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1504527 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2013-07-18 17:01:44 +00:00
parent c4f38c18f4
commit 508cb1bd98
1 changed files with 9 additions and 5 deletions

View File

@ -251,16 +251,17 @@ public class JVMClusterUtil {
} }
boolean wasInterrupted = false; boolean wasInterrupted = false;
final long maxTime = System.currentTimeMillis() + 120 * 1000; final long maxTime = System.currentTimeMillis() + 30 * 1000;
if (regionservers != null) { if (regionservers != null) {
// first try nicely. // first try nicely.
for (RegionServerThread t : regionservers) { for (RegionServerThread t : regionservers) {
t.getRegionServer().stop("Shutdown requested"); t.getRegionServer().stop("Shutdown requested");
} }
for (RegionServerThread t : regionservers) { for (RegionServerThread t : regionservers) {
if (t.isAlive() && !wasInterrupted && System.currentTimeMillis() < maxTime) { long now = System.currentTimeMillis();
if (t.isAlive() && !wasInterrupted && now < maxTime) {
try { try {
t.join(maxTime); t.join(maxTime - now);
} catch (InterruptedException e) { } catch (InterruptedException e) {
LOG.info("Got InterruptedException on shutdown - " + LOG.info("Got InterruptedException on shutdown - " +
"not waiting anymore on region server ends", e); "not waiting anymore on region server ends", e);
@ -270,17 +271,20 @@ public class JVMClusterUtil {
} }
// Let's try to interrupt the remaining threads if any. // Let's try to interrupt the remaining threads if any.
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 100; ++i) {
boolean atLeastOneLiveServer = false;
for (RegionServerThread t : regionservers) { for (RegionServerThread t : regionservers) {
if (t.isAlive()) { if (t.isAlive()) {
atLeastOneLiveServer = true;
try { try {
LOG.warn("RegionServerThreads remaining, give one more chance before interrupting"); LOG.warn("RegionServerThreads remaining, give one more chance before interrupting");
t.join(10); t.join(1000);
} catch (InterruptedException e) { } catch (InterruptedException e) {
wasInterrupted = true; wasInterrupted = true;
} }
} }
} }
if (!atLeastOneLiveServer) break;
for (RegionServerThread t : regionservers) { for (RegionServerThread t : regionservers) {
if (t.isAlive()) { if (t.isAlive()) {
LOG.warn("RegionServerThreads taking too long to stop, interrupting"); LOG.warn("RegionServerThreads taking too long to stop, interrupting");