diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 57f5a48a8a9..54334a122b3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -48,6 +48,7 @@ import org.apache.hadoop.hbase.YouAreDeadException; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.ClusterConnection; import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer; import org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler; import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler; @@ -62,6 +63,7 @@ import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.ServerInfo; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.regionserver.RegionOpeningState; +import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Triple; import org.apache.hadoop.hbase.util.Pair; @@ -844,6 +846,11 @@ public class ServerManager { RetryCounter retryCounter = pingRetryCounterFactory.create(); while (retryCounter.shouldRetry()) { + synchronized (this.onlineServers) { + if (this.deadservers.isDeadServer(server)) { + return false; + } + } try { AdminService.BlockingInterface admin = getRsAdmin(server); if (admin != null) { @@ -851,13 +858,21 @@ public class ServerManager { return info != null && info.hasServerName() && server.getStartcode() == info.getServerName().getStartCode(); } + } catch (RegionServerStoppedException | ServerNotRunningYetException e) { + if (LOG.isDebugEnabled()) { + LOG.debug("Couldn't reach " + server, e); + } + break; } catch (IOException ioe) { - LOG.debug("Couldn't reach " + server + ", try=" + retryCounter.getAttemptTimes() - + " of " + retryCounter.getMaxAttempts(), ioe); + if (LOG.isDebugEnabled()) { + LOG.debug("Couldn't reach " + server + ", try=" + retryCounter.getAttemptTimes() + " of " + + retryCounter.getMaxAttempts(), ioe); + } try { retryCounter.sleepUntilNextRetry(); } catch(InterruptedException ie) { Thread.currentThread().interrupt(); + break; } } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java index 35ba761f053..ce4170a03f4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java @@ -167,6 +167,7 @@ public class TestDistributedLogSplitting { conf.setInt(HConstants.REGIONSERVER_INFO_PORT, -1); conf.setFloat(HConstants.LOAD_BALANCER_SLOP_KEY, (float) 100.0); // no load balancing conf.setInt("hbase.regionserver.wal.max.splitters", 3); + conf.setInt("hbase.master.maximum.ping.server.attempts", 3); TEST_UTIL.shutdownMiniHBaseCluster(); TEST_UTIL = new HBaseTestingUtility(conf); TEST_UTIL.setDFSCluster(dfsCluster);