From c3d25923a2082a000ca94f53a77e3719fde2991a Mon Sep 17 00:00:00 2001 From: Jonathan Gray Date: Mon, 29 Nov 2010 21:01:52 +0000 Subject: [PATCH] HBASE-3282 Need to retain DeadServers to ensure we don't allow previously expired RS instances to rejoin cluster git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1040291 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 + .../hadoop/hbase/master/DeadServer.java | 46 +++++++++++++++++-- .../apache/hadoop/hbase/master/HMaster.java | 2 +- .../hadoop/hbase/master/ServerManager.java | 12 ++++- .../master/handler/ServerShutdownHandler.java | 2 +- .../hadoop/hbase/master/TestDeadServer.java | 22 ++++++++- .../hbase/master/TestRollingRestart.java | 6 +-- 7 files changed, 80 insertions(+), 12 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index a84cf590523..1d274990a13 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -7,6 +7,8 @@ Release 0.91.0 - Unreleased BUG FIXES HBASE-3280 YouAreDeadException being swallowed in HRS getMaster + HBASE-3282 Need to retain DeadServers to ensure we don't allow + previously expired RS instances to rejoin cluster IMPROVEMENTS HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via diff --git a/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java b/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java index 9085355c6df..efcbb9929a4 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java +++ b/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java @@ -22,6 +22,8 @@ package org.apache.hadoop.hbase.master; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; import java.util.Set; import org.apache.commons.lang.NotImplementedException; @@ -40,6 +42,20 @@ public class DeadServer implements Set { */ private final Set deadServers = new HashSet(); + /** Linked list of dead servers used to bound size of dead server set */ + private final List deadServerList = new LinkedList(); + + /** Maximum number of dead servers to keep track of */ + private final int maxDeadServers; + + /** Number of dead servers currently being processed */ + private int numProcessing; + + public DeadServer(int maxDeadServers) { + super(); + this.maxDeadServers = maxDeadServers; + this.numProcessing = 0; + } /** * @param serverName @@ -61,12 +77,36 @@ public class DeadServer implements Set { return HServerInfo.isServer(this, serverName, hostAndPortOnly); } + /** + * Checks if there are currently any dead servers being processed by the + * master. Returns true if at least one region server is currently being + * processed as dead. + * @return true if any RS are being processed as dead + */ + public boolean areDeadServersInProgress() { + return numProcessing != 0; + } + public synchronized Set clone() { Set clone = new HashSet(this.deadServers.size()); clone.addAll(this.deadServers); return clone; } + public synchronized boolean add(String e) { + this.numProcessing++; + // Check to see if we are at capacity for dead servers + if (deadServerList.size() == this.maxDeadServers) { + deadServers.remove(deadServerList.remove(0)); + } + deadServerList.add(e); + return deadServers.add(e); + } + + public synchronized void finish(String e) { + this.numProcessing--; + } + public synchronized int size() { return deadServers.size(); } @@ -91,12 +131,8 @@ public class DeadServer implements Set { return deadServers.toArray(a); } - public synchronized boolean add(String e) { - return deadServers.add(e); - } - public synchronized boolean remove(Object o) { - return deadServers.remove(o); + throw new UnsupportedOperationException(); } public synchronized boolean containsAll(Collection c) { diff --git a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index fe3364242e1..7da8a97a27a 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -664,7 +664,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { abbreviate(this.assignmentManager.getRegionsInTransition().toString(), 256)); return false; } - if (!this.serverManager.getDeadServers().isEmpty()) { + if (!this.serverManager.areDeadServersInProgress()) { LOG.debug("Not running balancer because dead regionserver processing"); } Map> assignments = diff --git a/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index f3587f0dd8e..bf8f3acfcaf 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -87,7 +87,7 @@ public class ServerManager { // Reporting to track master metrics. private final MasterMetrics metrics; - final DeadServer deadservers = new DeadServer(); + private final DeadServer deadservers; private final long maxSkew; @@ -104,6 +104,8 @@ public class ServerManager { this.metrics = metrics; Configuration c = master.getConfiguration(); maxSkew = c.getLong("hbase.master.maxclockskew", 30000); + this.deadservers = + new DeadServer(c.getInt("hbase.master.maxdeadservers", 100)); } /** @@ -399,6 +401,14 @@ public class ServerManager { return this.deadservers.clone(); } + /** + * Checks if any dead servers are currently in progress. + * @return true if any RS are being processed as dead, false if not + */ + public boolean areDeadServersInProgress() { + return this.deadservers.areDeadServersInProgress(); + } + /** * @param hsa * @return The HServerInfo whose HServerAddress is hsa or null diff --git a/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java b/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java index 8c809d6b1a0..65a9f8b69d3 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java +++ b/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java @@ -147,7 +147,7 @@ public class ServerShutdownHandler extends EventHandler { this.services.getAssignmentManager().assign(e.getKey(), true); } } - this.deadServers.remove(serverName); + this.deadServers.finish(serverName); LOG.info("Finished processing of shutdown of " + serverName); } diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java b/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java index 1a56b2b2ccc..252159c0ba4 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java @@ -25,7 +25,7 @@ import org.junit.Test; public class TestDeadServer { @Test public void testIsDead() { - DeadServer ds = new DeadServer(); + DeadServer ds = new DeadServer(2); final String hostname123 = "127.0.0.1,123,3"; assertFalse(ds.isDeadServer(hostname123, false)); assertFalse(ds.isDeadServer(hostname123, true)); @@ -34,5 +34,25 @@ public class TestDeadServer { assertFalse(ds.isDeadServer("127.0.0.1:1", true)); assertFalse(ds.isDeadServer("127.0.0.1:1234", true)); assertTrue(ds.isDeadServer("127.0.0.1:123", true)); + assertTrue(ds.areDeadServersInProgress()); + ds.finish(hostname123); + assertFalse(ds.areDeadServersInProgress()); + final String hostname1234 = "127.0.0.2,1234,4"; + ds.add(hostname1234); + assertTrue(ds.isDeadServer(hostname123, false)); + assertTrue(ds.isDeadServer(hostname1234, false)); + assertTrue(ds.areDeadServersInProgress()); + ds.finish(hostname1234); + assertFalse(ds.areDeadServersInProgress()); + final String hostname12345 = "127.0.0.2,12345,4"; + ds.add(hostname12345); + // hostname123 should now be evicted + assertFalse(ds.isDeadServer(hostname123, false)); + // but others should still be dead + assertTrue(ds.isDeadServer(hostname1234, false)); + assertTrue(ds.isDeadServer(hostname12345, false)); + assertTrue(ds.areDeadServersInProgress()); + ds.finish(hostname12345); + assertFalse(ds.areDeadServersInProgress()); } } \ No newline at end of file diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestRollingRestart.java b/src/test/java/org/apache/hadoop/hbase/master/TestRollingRestart.java index d219ec3260d..6089ae63b49 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestRollingRestart.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestRollingRestart.java @@ -305,14 +305,14 @@ public class TestRollingRestart { String serverName) throws InterruptedException { ServerManager sm = activeMaster.getMaster().getServerManager(); // First wait for it to be in dead list - while (!sm.deadservers.isDeadServer(serverName)) { + while (!sm.getDeadServers().contains(serverName)) { log("Waiting for [" + serverName + "] to be listed as dead in master"); Thread.sleep(1); } log("Server [" + serverName + "] marked as dead, waiting for it to " + "finish dead processing"); - while (sm.deadservers.isDeadServer(serverName)) { - log("Server [" + serverName + "] still marked as dead, waiting"); + while (sm.areDeadServersInProgress()) { + log("Server [" + serverName + "] still being processed, waiting"); Thread.sleep(100); } log("Server [" + serverName + "] done with server shutdown processing");