HBASE-3282 Need to retain DeadServers to ensure we don't allow previously expired RS instances to rejoin cluster
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1040291 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
eb3beb9c24
commit
c3d25923a2
|
@ -7,6 +7,8 @@ Release 0.91.0 - Unreleased
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
HBASE-3280 YouAreDeadException being swallowed in HRS getMaster
|
HBASE-3280 YouAreDeadException being swallowed in HRS getMaster
|
||||||
|
HBASE-3282 Need to retain DeadServers to ensure we don't allow
|
||||||
|
previously expired RS instances to rejoin cluster
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via
|
HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via
|
||||||
|
|
|
@ -22,6 +22,8 @@ package org.apache.hadoop.hbase.master;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.commons.lang.NotImplementedException;
|
import org.apache.commons.lang.NotImplementedException;
|
||||||
|
@ -40,6 +42,20 @@ public class DeadServer implements Set<String> {
|
||||||
*/
|
*/
|
||||||
private final Set<String> deadServers = new HashSet<String>();
|
private final Set<String> deadServers = new HashSet<String>();
|
||||||
|
|
||||||
|
/** Linked list of dead servers used to bound size of dead server set */
|
||||||
|
private final List<String> deadServerList = new LinkedList<String>();
|
||||||
|
|
||||||
|
/** Maximum number of dead servers to keep track of */
|
||||||
|
private final int maxDeadServers;
|
||||||
|
|
||||||
|
/** Number of dead servers currently being processed */
|
||||||
|
private int numProcessing;
|
||||||
|
|
||||||
|
public DeadServer(int maxDeadServers) {
|
||||||
|
super();
|
||||||
|
this.maxDeadServers = maxDeadServers;
|
||||||
|
this.numProcessing = 0;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param serverName
|
* @param serverName
|
||||||
|
@ -61,12 +77,36 @@ public class DeadServer implements Set<String> {
|
||||||
return HServerInfo.isServer(this, serverName, hostAndPortOnly);
|
return HServerInfo.isServer(this, serverName, hostAndPortOnly);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if there are currently any dead servers being processed by the
|
||||||
|
* master. Returns true if at least one region server is currently being
|
||||||
|
* processed as dead.
|
||||||
|
* @return true if any RS are being processed as dead
|
||||||
|
*/
|
||||||
|
public boolean areDeadServersInProgress() {
|
||||||
|
return numProcessing != 0;
|
||||||
|
}
|
||||||
|
|
||||||
public synchronized Set<String> clone() {
|
public synchronized Set<String> clone() {
|
||||||
Set<String> clone = new HashSet<String>(this.deadServers.size());
|
Set<String> clone = new HashSet<String>(this.deadServers.size());
|
||||||
clone.addAll(this.deadServers);
|
clone.addAll(this.deadServers);
|
||||||
return clone;
|
return clone;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public synchronized boolean add(String e) {
|
||||||
|
this.numProcessing++;
|
||||||
|
// Check to see if we are at capacity for dead servers
|
||||||
|
if (deadServerList.size() == this.maxDeadServers) {
|
||||||
|
deadServers.remove(deadServerList.remove(0));
|
||||||
|
}
|
||||||
|
deadServerList.add(e);
|
||||||
|
return deadServers.add(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void finish(String e) {
|
||||||
|
this.numProcessing--;
|
||||||
|
}
|
||||||
|
|
||||||
public synchronized int size() {
|
public synchronized int size() {
|
||||||
return deadServers.size();
|
return deadServers.size();
|
||||||
}
|
}
|
||||||
|
@ -91,12 +131,8 @@ public class DeadServer implements Set<String> {
|
||||||
return deadServers.toArray(a);
|
return deadServers.toArray(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized boolean add(String e) {
|
|
||||||
return deadServers.add(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized boolean remove(Object o) {
|
public synchronized boolean remove(Object o) {
|
||||||
return deadServers.remove(o);
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized boolean containsAll(Collection<?> c) {
|
public synchronized boolean containsAll(Collection<?> c) {
|
||||||
|
|
|
@ -664,7 +664,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
||||||
abbreviate(this.assignmentManager.getRegionsInTransition().toString(), 256));
|
abbreviate(this.assignmentManager.getRegionsInTransition().toString(), 256));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!this.serverManager.getDeadServers().isEmpty()) {
|
if (!this.serverManager.areDeadServersInProgress()) {
|
||||||
LOG.debug("Not running balancer because dead regionserver processing");
|
LOG.debug("Not running balancer because dead regionserver processing");
|
||||||
}
|
}
|
||||||
Map<HServerInfo, List<HRegionInfo>> assignments =
|
Map<HServerInfo, List<HRegionInfo>> assignments =
|
||||||
|
|
|
@ -87,7 +87,7 @@ public class ServerManager {
|
||||||
// Reporting to track master metrics.
|
// Reporting to track master metrics.
|
||||||
private final MasterMetrics metrics;
|
private final MasterMetrics metrics;
|
||||||
|
|
||||||
final DeadServer deadservers = new DeadServer();
|
private final DeadServer deadservers;
|
||||||
|
|
||||||
private final long maxSkew;
|
private final long maxSkew;
|
||||||
|
|
||||||
|
@ -104,6 +104,8 @@ public class ServerManager {
|
||||||
this.metrics = metrics;
|
this.metrics = metrics;
|
||||||
Configuration c = master.getConfiguration();
|
Configuration c = master.getConfiguration();
|
||||||
maxSkew = c.getLong("hbase.master.maxclockskew", 30000);
|
maxSkew = c.getLong("hbase.master.maxclockskew", 30000);
|
||||||
|
this.deadservers =
|
||||||
|
new DeadServer(c.getInt("hbase.master.maxdeadservers", 100));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -399,6 +401,14 @@ public class ServerManager {
|
||||||
return this.deadservers.clone();
|
return this.deadservers.clone();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if any dead servers are currently in progress.
|
||||||
|
* @return true if any RS are being processed as dead, false if not
|
||||||
|
*/
|
||||||
|
public boolean areDeadServersInProgress() {
|
||||||
|
return this.deadservers.areDeadServersInProgress();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param hsa
|
* @param hsa
|
||||||
* @return The HServerInfo whose HServerAddress is <code>hsa</code> or null
|
* @return The HServerInfo whose HServerAddress is <code>hsa</code> or null
|
||||||
|
|
|
@ -147,7 +147,7 @@ public class ServerShutdownHandler extends EventHandler {
|
||||||
this.services.getAssignmentManager().assign(e.getKey(), true);
|
this.services.getAssignmentManager().assign(e.getKey(), true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this.deadServers.remove(serverName);
|
this.deadServers.finish(serverName);
|
||||||
LOG.info("Finished processing of shutdown of " + serverName);
|
LOG.info("Finished processing of shutdown of " + serverName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,7 @@ import org.junit.Test;
|
||||||
|
|
||||||
public class TestDeadServer {
|
public class TestDeadServer {
|
||||||
@Test public void testIsDead() {
|
@Test public void testIsDead() {
|
||||||
DeadServer ds = new DeadServer();
|
DeadServer ds = new DeadServer(2);
|
||||||
final String hostname123 = "127.0.0.1,123,3";
|
final String hostname123 = "127.0.0.1,123,3";
|
||||||
assertFalse(ds.isDeadServer(hostname123, false));
|
assertFalse(ds.isDeadServer(hostname123, false));
|
||||||
assertFalse(ds.isDeadServer(hostname123, true));
|
assertFalse(ds.isDeadServer(hostname123, true));
|
||||||
|
@ -34,5 +34,25 @@ public class TestDeadServer {
|
||||||
assertFalse(ds.isDeadServer("127.0.0.1:1", true));
|
assertFalse(ds.isDeadServer("127.0.0.1:1", true));
|
||||||
assertFalse(ds.isDeadServer("127.0.0.1:1234", true));
|
assertFalse(ds.isDeadServer("127.0.0.1:1234", true));
|
||||||
assertTrue(ds.isDeadServer("127.0.0.1:123", true));
|
assertTrue(ds.isDeadServer("127.0.0.1:123", true));
|
||||||
|
assertTrue(ds.areDeadServersInProgress());
|
||||||
|
ds.finish(hostname123);
|
||||||
|
assertFalse(ds.areDeadServersInProgress());
|
||||||
|
final String hostname1234 = "127.0.0.2,1234,4";
|
||||||
|
ds.add(hostname1234);
|
||||||
|
assertTrue(ds.isDeadServer(hostname123, false));
|
||||||
|
assertTrue(ds.isDeadServer(hostname1234, false));
|
||||||
|
assertTrue(ds.areDeadServersInProgress());
|
||||||
|
ds.finish(hostname1234);
|
||||||
|
assertFalse(ds.areDeadServersInProgress());
|
||||||
|
final String hostname12345 = "127.0.0.2,12345,4";
|
||||||
|
ds.add(hostname12345);
|
||||||
|
// hostname123 should now be evicted
|
||||||
|
assertFalse(ds.isDeadServer(hostname123, false));
|
||||||
|
// but others should still be dead
|
||||||
|
assertTrue(ds.isDeadServer(hostname1234, false));
|
||||||
|
assertTrue(ds.isDeadServer(hostname12345, false));
|
||||||
|
assertTrue(ds.areDeadServersInProgress());
|
||||||
|
ds.finish(hostname12345);
|
||||||
|
assertFalse(ds.areDeadServersInProgress());
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -305,14 +305,14 @@ public class TestRollingRestart {
|
||||||
String serverName) throws InterruptedException {
|
String serverName) throws InterruptedException {
|
||||||
ServerManager sm = activeMaster.getMaster().getServerManager();
|
ServerManager sm = activeMaster.getMaster().getServerManager();
|
||||||
// First wait for it to be in dead list
|
// First wait for it to be in dead list
|
||||||
while (!sm.deadservers.isDeadServer(serverName)) {
|
while (!sm.getDeadServers().contains(serverName)) {
|
||||||
log("Waiting for [" + serverName + "] to be listed as dead in master");
|
log("Waiting for [" + serverName + "] to be listed as dead in master");
|
||||||
Thread.sleep(1);
|
Thread.sleep(1);
|
||||||
}
|
}
|
||||||
log("Server [" + serverName + "] marked as dead, waiting for it to " +
|
log("Server [" + serverName + "] marked as dead, waiting for it to " +
|
||||||
"finish dead processing");
|
"finish dead processing");
|
||||||
while (sm.deadservers.isDeadServer(serverName)) {
|
while (sm.areDeadServersInProgress()) {
|
||||||
log("Server [" + serverName + "] still marked as dead, waiting");
|
log("Server [" + serverName + "] still being processed, waiting");
|
||||||
Thread.sleep(100);
|
Thread.sleep(100);
|
||||||
}
|
}
|
||||||
log("Server [" + serverName + "] done with server shutdown processing");
|
log("Server [" + serverName + "] done with server shutdown processing");
|
||||||
|
|
Loading…
Reference in New Issue