HBASE-3282 Need to retain DeadServers to ensure we don't allow previously expired RS instances to rejoin cluster

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1040291 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jonathan Gray 2010-11-29 21:01:52 +00:00
parent eb3beb9c24
commit c3d25923a2
7 changed files with 80 additions and 12 deletions

View File

@ -7,6 +7,8 @@ Release 0.91.0 - Unreleased
BUG FIXES
HBASE-3280 YouAreDeadException being swallowed in HRS getMaster
HBASE-3282 Need to retain DeadServers to ensure we don't allow
previously expired RS instances to rejoin cluster
IMPROVEMENTS
HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via

View File

@ -22,6 +22,8 @@ package org.apache.hadoop.hbase.master;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang.NotImplementedException;
@ -40,6 +42,20 @@ public class DeadServer implements Set<String> {
*/
private final Set<String> deadServers = new HashSet<String>();
/** Linked list of dead servers used to bound size of dead server set */
private final List<String> deadServerList = new LinkedList<String>();
/** Maximum number of dead servers to keep track of */
private final int maxDeadServers;
/** Number of dead servers currently being processed */
private int numProcessing;
public DeadServer(int maxDeadServers) {
super();
this.maxDeadServers = maxDeadServers;
this.numProcessing = 0;
}
/**
* @param serverName
@ -61,12 +77,36 @@ public class DeadServer implements Set<String> {
return HServerInfo.isServer(this, serverName, hostAndPortOnly);
}
/**
* Checks if there are currently any dead servers being processed by the
* master. Returns true if at least one region server is currently being
* processed as dead.
* @return true if any RS are being processed as dead
*/
public boolean areDeadServersInProgress() {
return numProcessing != 0;
}
public synchronized Set<String> clone() {
Set<String> clone = new HashSet<String>(this.deadServers.size());
clone.addAll(this.deadServers);
return clone;
}
public synchronized boolean add(String e) {
this.numProcessing++;
// Check to see if we are at capacity for dead servers
if (deadServerList.size() == this.maxDeadServers) {
deadServers.remove(deadServerList.remove(0));
}
deadServerList.add(e);
return deadServers.add(e);
}
public synchronized void finish(String e) {
this.numProcessing--;
}
public synchronized int size() {
return deadServers.size();
}
@ -91,12 +131,8 @@ public class DeadServer implements Set<String> {
return deadServers.toArray(a);
}
public synchronized boolean add(String e) {
return deadServers.add(e);
}
public synchronized boolean remove(Object o) {
return deadServers.remove(o);
throw new UnsupportedOperationException();
}
public synchronized boolean containsAll(Collection<?> c) {

View File

@ -664,7 +664,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
abbreviate(this.assignmentManager.getRegionsInTransition().toString(), 256));
return false;
}
if (!this.serverManager.getDeadServers().isEmpty()) {
if (!this.serverManager.areDeadServersInProgress()) {
LOG.debug("Not running balancer because dead regionserver processing");
}
Map<HServerInfo, List<HRegionInfo>> assignments =

View File

@ -87,7 +87,7 @@ public class ServerManager {
// Reporting to track master metrics.
private final MasterMetrics metrics;
final DeadServer deadservers = new DeadServer();
private final DeadServer deadservers;
private final long maxSkew;
@ -104,6 +104,8 @@ public class ServerManager {
this.metrics = metrics;
Configuration c = master.getConfiguration();
maxSkew = c.getLong("hbase.master.maxclockskew", 30000);
this.deadservers =
new DeadServer(c.getInt("hbase.master.maxdeadservers", 100));
}
/**
@ -399,6 +401,14 @@ public class ServerManager {
return this.deadservers.clone();
}
/**
* Checks if any dead servers are currently in progress.
* @return true if any RS are being processed as dead, false if not
*/
public boolean areDeadServersInProgress() {
return this.deadservers.areDeadServersInProgress();
}
/**
* @param hsa
* @return The HServerInfo whose HServerAddress is <code>hsa</code> or null

View File

@ -147,7 +147,7 @@ public class ServerShutdownHandler extends EventHandler {
this.services.getAssignmentManager().assign(e.getKey(), true);
}
}
this.deadServers.remove(serverName);
this.deadServers.finish(serverName);
LOG.info("Finished processing of shutdown of " + serverName);
}

View File

@ -25,7 +25,7 @@ import org.junit.Test;
public class TestDeadServer {
@Test public void testIsDead() {
DeadServer ds = new DeadServer();
DeadServer ds = new DeadServer(2);
final String hostname123 = "127.0.0.1,123,3";
assertFalse(ds.isDeadServer(hostname123, false));
assertFalse(ds.isDeadServer(hostname123, true));
@ -34,5 +34,25 @@ public class TestDeadServer {
assertFalse(ds.isDeadServer("127.0.0.1:1", true));
assertFalse(ds.isDeadServer("127.0.0.1:1234", true));
assertTrue(ds.isDeadServer("127.0.0.1:123", true));
assertTrue(ds.areDeadServersInProgress());
ds.finish(hostname123);
assertFalse(ds.areDeadServersInProgress());
final String hostname1234 = "127.0.0.2,1234,4";
ds.add(hostname1234);
assertTrue(ds.isDeadServer(hostname123, false));
assertTrue(ds.isDeadServer(hostname1234, false));
assertTrue(ds.areDeadServersInProgress());
ds.finish(hostname1234);
assertFalse(ds.areDeadServersInProgress());
final String hostname12345 = "127.0.0.2,12345,4";
ds.add(hostname12345);
// hostname123 should now be evicted
assertFalse(ds.isDeadServer(hostname123, false));
// but others should still be dead
assertTrue(ds.isDeadServer(hostname1234, false));
assertTrue(ds.isDeadServer(hostname12345, false));
assertTrue(ds.areDeadServersInProgress());
ds.finish(hostname12345);
assertFalse(ds.areDeadServersInProgress());
}
}

View File

@ -305,14 +305,14 @@ public class TestRollingRestart {
String serverName) throws InterruptedException {
ServerManager sm = activeMaster.getMaster().getServerManager();
// First wait for it to be in dead list
while (!sm.deadservers.isDeadServer(serverName)) {
while (!sm.getDeadServers().contains(serverName)) {
log("Waiting for [" + serverName + "] to be listed as dead in master");
Thread.sleep(1);
}
log("Server [" + serverName + "] marked as dead, waiting for it to " +
"finish dead processing");
while (sm.deadservers.isDeadServer(serverName)) {
log("Server [" + serverName + "] still marked as dead, waiting");
while (sm.areDeadServersInProgress()) {
log("Server [" + serverName + "] still being processed, waiting");
Thread.sleep(100);
}
log("Server [" + serverName + "] done with server shutdown processing");