From c9dcc9a065ec124094aa0361e27ed9330a106d5c Mon Sep 17 00:00:00 2001 From: Andrew Purtell Date: Thu, 11 Oct 2018 15:28:36 -0700 Subject: [PATCH] HBASE-21266 Not running balancer because processing dead regionservers, but empty dead rs list --- .../hadoop/hbase/master/DeadServer.java | 81 ++++++++++++++----- .../hadoop/hbase/master/TestDeadServer.java | 4 +- .../TestEndToEndSplitTransaction.java | 6 +- 3 files changed, 68 insertions(+), 23 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java index 116d24e7215..4183201c614 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java @@ -25,6 +25,8 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.Pair; +import com.google.common.base.Preconditions; + import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -36,6 +38,7 @@ import java.util.List; import java.util.Map; import java.util.Set; + /** * Class to hold dead servers list and utility querying dead server list. * On znode expiration, servers are added here. @@ -54,14 +57,9 @@ public class DeadServer { private final Map deadServers = new HashMap<>(); /** - * Number of dead servers currently being processed + * Set of dead servers currently being processed */ - private int numProcessing = 0; - - /** - * Whether a dead server is being processed currently. - */ - private volatile boolean processing = false; + private final Set processingServers = new HashSet(); /** * A dead server that comes back alive has a different start code. The new start code should be @@ -76,7 +74,13 @@ public class DeadServer { while (it.hasNext()) { ServerName sn = it.next(); if (ServerName.isSameAddress(sn, newServerName)) { + // remove from deadServers it.remove(); + // remove from processingServers + boolean removed = processingServers.remove(sn); + if (removed) { + LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size()); + } return true; } } @@ -92,6 +96,14 @@ public class DeadServer { return deadServers.containsKey(serverName); } + /** + * @param serverName server name. + * @return true if this server is on the processing servers list false otherwise + */ + public synchronized boolean isProcessingServer(final ServerName serverName) { + return processingServers.contains(serverName); + } + /** * Checks if there are currently any dead servers being processed by the * master. Returns true if at least one region server is currently being @@ -99,7 +111,9 @@ public class DeadServer { * * @return true if any RS are being processed as dead */ - public synchronized boolean areDeadServersInProgress() { return processing; } + public synchronized boolean areDeadServersInProgress() { + return !processingServers.isEmpty(); + } public synchronized Set copyServerNames() { Set clone = new HashSet<>(deadServers.size()); @@ -112,10 +126,13 @@ public class DeadServer { * @param sn the server name */ public synchronized void add(ServerName sn) { - processing = true; if (!deadServers.containsKey(sn)){ deadServers.put(sn, EnvironmentEdgeManager.currentTime()); } + boolean added = processingServers.add(sn); + if (LOG.isDebugEnabled() && added) { + LOG.debug("Added " + sn + "; numProcessing=" + processingServers.size()); + } } /** @@ -123,18 +140,27 @@ public class DeadServer { * @param sn ServerName for the dead server. */ public synchronized void notifyServer(ServerName sn) { - if (LOG.isTraceEnabled()) { LOG.trace("Started processing " + sn); } - processing = true; - numProcessing++; + boolean added = processingServers.add(sn); + if (LOG.isDebugEnabled()) { + if (added) { + LOG.debug("Added " + sn + "; numProcessing=" + processingServers.size()); + } + LOG.debug("Started processing " + sn + "; numProcessing=" + processingServers.size()); + } } + /** + * Complete processing for this dead server. + * @param sn ServerName for the dead server. + */ public synchronized void finish(ServerName sn) { - numProcessing--; - if (LOG.isTraceEnabled()) LOG.trace("Finished " + sn + "; numProcessing=" + numProcessing); - - assert numProcessing >= 0: "Number of dead servers in processing should always be non-negative"; - - if (numProcessing == 0) { processing = false; } + boolean removed = processingServers.remove(sn); + if (LOG.isDebugEnabled()) { + LOG.debug("Finished processing " + sn + "; numProcessing=" + processingServers.size()); + if (removed) { + LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size()); + } + } } public synchronized int size() { @@ -150,19 +176,33 @@ public class DeadServer { while (it.hasNext()) { ServerName sn = it.next(); if (ServerName.isSameAddress(sn, newServerName)) { + // remove from deadServers it.remove(); + // remove from processingServers + boolean removed = processingServers.remove(sn); + if (removed) { + LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size()); + } } } } @Override public synchronized String toString() { + // Display unified set of servers from both maps + Set servers = new HashSet(); + servers.addAll(deadServers.keySet()); + servers.addAll(processingServers); StringBuilder sb = new StringBuilder(); - for (ServerName sn : deadServers.keySet()) { + for (ServerName sn : servers) { if (sb.length() > 0) { sb.append(", "); } sb.append(sn.toString()); + // Star entries that are being processed + if (processingServers.contains(sn)) { + sb.append("*"); + } } return sb.toString(); } @@ -211,6 +251,9 @@ public class DeadServer { */ public synchronized boolean removeDeadServer(final ServerName deadServerName) { + Preconditions.checkState(!processingServers.contains(deadServerName), + "Asked to remove server still in processingServers set " + deadServerName + + " (numProcessing=" + processingServers.size() + ")"); if (deadServers.remove(deadServerName) == null) { return false; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java index 4e852f8cd56..73ff789dfdc 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java @@ -123,7 +123,6 @@ public class TestDeadServer { DeadServer d = new DeadServer(); - d.add(hostname123); mee.incValue(1); d.add(hostname1234); @@ -164,14 +163,17 @@ public class TestDeadServer { d.add(hostname1234); Assert.assertEquals(2, d.size()); + d.finish(hostname123); d.removeDeadServer(hostname123); Assert.assertEquals(1, d.size()); + d.finish(hostname1234); d.removeDeadServer(hostname1234); Assert.assertTrue(d.isEmpty()); d.add(hostname1234); Assert.assertFalse(d.removeDeadServer(hostname123_2)); Assert.assertEquals(1, d.size()); + d.finish(hostname1234); Assert.assertTrue(d.removeDeadServer(hostname1234)); Assert.assertTrue(d.isEmpty()); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java index 85e9d3098d5..1418d6e74fb 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java @@ -296,7 +296,7 @@ public class TestEndToEndSplitTransaction { Throwable ex; RegionChecker(Configuration conf, Stoppable stopper, TableName tableName) throws IOException { - super("RegionChecker", stopper, 10); + super("RegionChecker", stopper, 100); this.conf = conf; this.tableName = tableName; @@ -509,7 +509,7 @@ public class TestEndToEndSplitTransaction { log("found region in META: " + hri.getRegionNameAsString()); break; } - Threads.sleep(10); + Threads.sleep(100); } } @@ -532,7 +532,7 @@ public class TestEndToEndSplitTransaction { } catch (IOException ex) { // wait some more } - Threads.sleep(10); + Threads.sleep(100); } } }