HBASE-3282 Need to retain DeadServers to ensure we don't allow previously expired RS instances to rejoin cluster
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1040291 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
eb3beb9c24
commit
c3d25923a2
|
@ -7,6 +7,8 @@ Release 0.91.0 - Unreleased
|
|||
|
||||
BUG FIXES
|
||||
HBASE-3280 YouAreDeadException being swallowed in HRS getMaster
|
||||
HBASE-3282 Need to retain DeadServers to ensure we don't allow
|
||||
previously expired RS instances to rejoin cluster
|
||||
|
||||
IMPROVEMENTS
|
||||
HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via
|
||||
|
|
|
@ -22,6 +22,8 @@ package org.apache.hadoop.hbase.master;
|
|||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.lang.NotImplementedException;
|
||||
|
@ -40,6 +42,20 @@ public class DeadServer implements Set<String> {
|
|||
*/
|
||||
private final Set<String> deadServers = new HashSet<String>();
|
||||
|
||||
/** Linked list of dead servers used to bound size of dead server set */
|
||||
private final List<String> deadServerList = new LinkedList<String>();
|
||||
|
||||
/** Maximum number of dead servers to keep track of */
|
||||
private final int maxDeadServers;
|
||||
|
||||
/** Number of dead servers currently being processed */
|
||||
private int numProcessing;
|
||||
|
||||
public DeadServer(int maxDeadServers) {
|
||||
super();
|
||||
this.maxDeadServers = maxDeadServers;
|
||||
this.numProcessing = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param serverName
|
||||
|
@ -61,12 +77,36 @@ public class DeadServer implements Set<String> {
|
|||
return HServerInfo.isServer(this, serverName, hostAndPortOnly);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if there are currently any dead servers being processed by the
|
||||
* master. Returns true if at least one region server is currently being
|
||||
* processed as dead.
|
||||
* @return true if any RS are being processed as dead
|
||||
*/
|
||||
public boolean areDeadServersInProgress() {
|
||||
return numProcessing != 0;
|
||||
}
|
||||
|
||||
public synchronized Set<String> clone() {
|
||||
Set<String> clone = new HashSet<String>(this.deadServers.size());
|
||||
clone.addAll(this.deadServers);
|
||||
return clone;
|
||||
}
|
||||
|
||||
public synchronized boolean add(String e) {
|
||||
this.numProcessing++;
|
||||
// Check to see if we are at capacity for dead servers
|
||||
if (deadServerList.size() == this.maxDeadServers) {
|
||||
deadServers.remove(deadServerList.remove(0));
|
||||
}
|
||||
deadServerList.add(e);
|
||||
return deadServers.add(e);
|
||||
}
|
||||
|
||||
public synchronized void finish(String e) {
|
||||
this.numProcessing--;
|
||||
}
|
||||
|
||||
public synchronized int size() {
|
||||
return deadServers.size();
|
||||
}
|
||||
|
@ -91,12 +131,8 @@ public class DeadServer implements Set<String> {
|
|||
return deadServers.toArray(a);
|
||||
}
|
||||
|
||||
public synchronized boolean add(String e) {
|
||||
return deadServers.add(e);
|
||||
}
|
||||
|
||||
public synchronized boolean remove(Object o) {
|
||||
return deadServers.remove(o);
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public synchronized boolean containsAll(Collection<?> c) {
|
||||
|
|
|
@ -664,7 +664,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
|||
abbreviate(this.assignmentManager.getRegionsInTransition().toString(), 256));
|
||||
return false;
|
||||
}
|
||||
if (!this.serverManager.getDeadServers().isEmpty()) {
|
||||
if (!this.serverManager.areDeadServersInProgress()) {
|
||||
LOG.debug("Not running balancer because dead regionserver processing");
|
||||
}
|
||||
Map<HServerInfo, List<HRegionInfo>> assignments =
|
||||
|
|
|
@ -87,7 +87,7 @@ public class ServerManager {
|
|||
// Reporting to track master metrics.
|
||||
private final MasterMetrics metrics;
|
||||
|
||||
final DeadServer deadservers = new DeadServer();
|
||||
private final DeadServer deadservers;
|
||||
|
||||
private final long maxSkew;
|
||||
|
||||
|
@ -104,6 +104,8 @@ public class ServerManager {
|
|||
this.metrics = metrics;
|
||||
Configuration c = master.getConfiguration();
|
||||
maxSkew = c.getLong("hbase.master.maxclockskew", 30000);
|
||||
this.deadservers =
|
||||
new DeadServer(c.getInt("hbase.master.maxdeadservers", 100));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -399,6 +401,14 @@ public class ServerManager {
|
|||
return this.deadservers.clone();
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if any dead servers are currently in progress.
|
||||
* @return true if any RS are being processed as dead, false if not
|
||||
*/
|
||||
public boolean areDeadServersInProgress() {
|
||||
return this.deadservers.areDeadServersInProgress();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param hsa
|
||||
* @return The HServerInfo whose HServerAddress is <code>hsa</code> or null
|
||||
|
|
|
@ -147,7 +147,7 @@ public class ServerShutdownHandler extends EventHandler {
|
|||
this.services.getAssignmentManager().assign(e.getKey(), true);
|
||||
}
|
||||
}
|
||||
this.deadServers.remove(serverName);
|
||||
this.deadServers.finish(serverName);
|
||||
LOG.info("Finished processing of shutdown of " + serverName);
|
||||
}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.junit.Test;
|
|||
|
||||
public class TestDeadServer {
|
||||
@Test public void testIsDead() {
|
||||
DeadServer ds = new DeadServer();
|
||||
DeadServer ds = new DeadServer(2);
|
||||
final String hostname123 = "127.0.0.1,123,3";
|
||||
assertFalse(ds.isDeadServer(hostname123, false));
|
||||
assertFalse(ds.isDeadServer(hostname123, true));
|
||||
|
@ -34,5 +34,25 @@ public class TestDeadServer {
|
|||
assertFalse(ds.isDeadServer("127.0.0.1:1", true));
|
||||
assertFalse(ds.isDeadServer("127.0.0.1:1234", true));
|
||||
assertTrue(ds.isDeadServer("127.0.0.1:123", true));
|
||||
assertTrue(ds.areDeadServersInProgress());
|
||||
ds.finish(hostname123);
|
||||
assertFalse(ds.areDeadServersInProgress());
|
||||
final String hostname1234 = "127.0.0.2,1234,4";
|
||||
ds.add(hostname1234);
|
||||
assertTrue(ds.isDeadServer(hostname123, false));
|
||||
assertTrue(ds.isDeadServer(hostname1234, false));
|
||||
assertTrue(ds.areDeadServersInProgress());
|
||||
ds.finish(hostname1234);
|
||||
assertFalse(ds.areDeadServersInProgress());
|
||||
final String hostname12345 = "127.0.0.2,12345,4";
|
||||
ds.add(hostname12345);
|
||||
// hostname123 should now be evicted
|
||||
assertFalse(ds.isDeadServer(hostname123, false));
|
||||
// but others should still be dead
|
||||
assertTrue(ds.isDeadServer(hostname1234, false));
|
||||
assertTrue(ds.isDeadServer(hostname12345, false));
|
||||
assertTrue(ds.areDeadServersInProgress());
|
||||
ds.finish(hostname12345);
|
||||
assertFalse(ds.areDeadServersInProgress());
|
||||
}
|
||||
}
|
|
@ -305,14 +305,14 @@ public class TestRollingRestart {
|
|||
String serverName) throws InterruptedException {
|
||||
ServerManager sm = activeMaster.getMaster().getServerManager();
|
||||
// First wait for it to be in dead list
|
||||
while (!sm.deadservers.isDeadServer(serverName)) {
|
||||
while (!sm.getDeadServers().contains(serverName)) {
|
||||
log("Waiting for [" + serverName + "] to be listed as dead in master");
|
||||
Thread.sleep(1);
|
||||
}
|
||||
log("Server [" + serverName + "] marked as dead, waiting for it to " +
|
||||
"finish dead processing");
|
||||
while (sm.deadservers.isDeadServer(serverName)) {
|
||||
log("Server [" + serverName + "] still marked as dead, waiting");
|
||||
while (sm.areDeadServersInProgress()) {
|
||||
log("Server [" + serverName + "] still being processed, waiting");
|
||||
Thread.sleep(100);
|
||||
}
|
||||
log("Server [" + serverName + "] done with server shutdown processing");
|
||||
|
|
Loading…
Reference in New Issue