HBASE-7407 TestMasterFailover under tests some cases and over tests some others

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1445074 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
nkeywal 2013-02-12 09:38:57 +00:00
parent e6ef5ce981
commit d2fb5a546f
8 changed files with 373 additions and 289 deletions

View File

@ -52,4 +52,8 @@ public class DoNotRetryIOException extends HBaseIOException {
public DoNotRetryIOException(String message, Throwable cause) { public DoNotRetryIOException(String message, Throwable cause) {
super(message, cause); super(message, cause);
} }
public DoNotRetryIOException(Throwable cause) {
super(cause);
}
} }

View File

@ -18,7 +18,6 @@
*/ */
package org.apache.hadoop.hbase; package org.apache.hadoop.hbase;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability;
@ -26,13 +25,21 @@ import org.apache.hadoop.classification.InterfaceStability;
* This exception is thrown by the master when a region server was shut down and * This exception is thrown by the master when a region server was shut down and
* restarted so fast that the master still hasn't processed the server shutdown * restarted so fast that the master still hasn't processed the server shutdown
* of the first instance, or when master is initializing and client call admin * of the first instance, or when master is initializing and client call admin
* operations * operations, or when an operation is performed on a region server that is still starting.
*/ */
@SuppressWarnings("serial") @SuppressWarnings("serial")
@InterfaceAudience.Public @InterfaceAudience.Public
@InterfaceStability.Stable @InterfaceStability.Stable
public class PleaseHoldException extends IOException { public class PleaseHoldException extends HBaseIOException {
public PleaseHoldException(String message) { public PleaseHoldException(String message) {
super(message); super(message);
} }
public PleaseHoldException(String message, Throwable cause) {
super(message, cause);
}
public PleaseHoldException(Throwable cause) {
super(cause);
}
} }

View File

@ -143,7 +143,8 @@ public abstract class EventHandler implements Runnable, Comparable<Runnable> {
// Master controlled events to be executed on the master // Master controlled events to be executed on the master
M_SERVER_SHUTDOWN (70, ExecutorType.MASTER_SERVER_OPERATIONS), // Master is processing shutdown of a RS M_SERVER_SHUTDOWN (70, ExecutorType.MASTER_SERVER_OPERATIONS), // Master is processing shutdown of a RS
M_META_SERVER_SHUTDOWN (72, ExecutorType.MASTER_META_SERVER_OPERATIONS); // Master is processing shutdown of RS hosting a meta region (-ROOT- or .META.). M_META_SERVER_SHUTDOWN (72, ExecutorType.MASTER_META_SERVER_OPERATIONS), // Master is processing shutdown of RS hosting a meta region (-ROOT- or .META.).
M_MASTER_RECOVERY (73, ExecutorType.MASTER_SERVER_OPERATIONS); // Master is processing recovery of regions found in ZK RIT
private final int code; private final int code;
private final ExecutorService.ExecutorType executor; private final ExecutorService.ExecutorType executor;

View File

@ -68,6 +68,7 @@ import org.apache.hadoop.hbase.master.handler.SplitRegionHandler;
import org.apache.hadoop.hbase.regionserver.RegionAlreadyInTransitionException; import org.apache.hadoop.hbase.regionserver.RegionAlreadyInTransitionException;
import org.apache.hadoop.hbase.regionserver.RegionOpeningState; import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException; import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.KeyLocker; import org.apache.hadoop.hbase.util.KeyLocker;
import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.util.Threads;
@ -410,7 +411,7 @@ public class AssignmentManager extends ZooKeeperListener {
LOG.info("Found regions out on cluster or in RIT; failover"); LOG.info("Found regions out on cluster or in RIT; failover");
// Process list of dead servers and regions in RIT. // Process list of dead servers and regions in RIT.
// See HBASE-4580 for more information. // See HBASE-4580 for more information.
processDeadServersAndRecoverLostRegions(deadServers, nodes); processDeadServersAndRecoverLostRegions(deadServers);
} else { } else {
// Fresh cluster startup. // Fresh cluster startup.
LOG.info("Clean cluster startup. Assigning userregions"); LOG.info("Clean cluster startup. Assigning userregions");
@ -491,13 +492,14 @@ public class AssignmentManager extends ZooKeeperListener {
*/ */
void processRegionsInTransition( void processRegionsInTransition(
final RegionTransition rt, final HRegionInfo regionInfo, final RegionTransition rt, final HRegionInfo regionInfo,
int expectedVersion) throws KeeperException { final int expectedVersion) throws KeeperException {
EventType et = rt.getEventType(); EventType et = rt.getEventType();
// Get ServerName. Could not be null. // Get ServerName. Could not be null.
ServerName sn = rt.getServerName(); final ServerName sn = rt.getServerName();
String encodedRegionName = regionInfo.getEncodedName(); String encodedRegionName = regionInfo.getEncodedName();
LOG.info("Processing region " + regionInfo.getRegionNameAsString() + " in state " + et); LOG.info("Processing region " + regionInfo.getRegionNameAsString() + " in state " + et);
if (regionStates.isRegionInTransition(encodedRegionName)) { if (regionStates.isRegionInTransition(encodedRegionName)) {
// Just return // Just return
return; return;
@ -511,9 +513,22 @@ public class AssignmentManager extends ZooKeeperListener {
// will get it reassigned if appropriate // will get it reassigned if appropriate
forceOffline(regionInfo, rt); forceOffline(regionInfo, rt);
} else { } else {
// Just insert region into RIT. // Insert into RIT & resend the query to the region server: may be the previous master
// If this never updates the timeout will trigger new assignment // died before sending the query the first time.
regionStates.updateRegionState(rt, RegionState.State.CLOSING); regionStates.updateRegionState(rt, RegionState.State.CLOSING);
final RegionState rs = regionStates.getRegionState(regionInfo);
this.executorService.submit(
new EventHandler(server, EventType.M_MASTER_RECOVERY) {
@Override
public void process() throws IOException {
ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName());
try {
unassign(regionInfo, rs, expectedVersion, sn, true);
} finally {
lock.unlock();
}
}
});
} }
break; break;
@ -530,28 +545,29 @@ public class AssignmentManager extends ZooKeeperListener {
// Region is offline, insert into RIT and handle it like a closed // Region is offline, insert into RIT and handle it like a closed
addToRITandCallClose(regionInfo, RegionState.State.OFFLINE, rt); addToRITandCallClose(regionInfo, RegionState.State.OFFLINE, rt);
} else { } else {
// Just insert region into RIT. // Insert in RIT and resend to the regionserver
// If this never updates the timeout will trigger new assignment
regionStates.updateRegionState(rt, RegionState.State.PENDING_OPEN); regionStates.updateRegionState(rt, RegionState.State.PENDING_OPEN);
final RegionState rs = regionStates.getRegionState(regionInfo);
this.executorService.submit(
new EventHandler(server, EventType.M_MASTER_RECOVERY) {
@Override
public void process() throws IOException {
ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName());
try {
assign(rs, false, false);
} finally {
lock.unlock();
}
}
});
} }
break; break;
case RS_ZK_REGION_OPENING: case RS_ZK_REGION_OPENING:
if (!serverManager.isServerOnline(sn)) {
forceOffline(regionInfo, rt);
} else {
regionStates.updateRegionState(rt, RegionState.State.OPENING); regionStates.updateRegionState(rt, RegionState.State.OPENING);
if (regionInfo.isMetaTable() || !serverManager.isServerOnline(sn)) {
// If ROOT or .META. table is waiting for timeout monitor to assign
// it may take lot of time when the assignment.timeout.period is
// the default value which may be very long. We will not be able
// to serve any request during this time.
// So we will assign the ROOT and .META. region immediately.
// For a user region, if the server is not online, it takes
// some time for timeout monitor to kick in. We know the region
// won't open. So we will assign the opening
// region immediately too.
//
// Otherwise, just insert region into RIT. If the state never
// updates, the timeout will trigger new assignment
processOpeningState(regionInfo);
} }
break; break;
@ -560,18 +576,38 @@ public class AssignmentManager extends ZooKeeperListener {
forceOffline(regionInfo, rt); forceOffline(regionInfo, rt);
} else { } else {
// Region is opened, insert into RIT and handle it // Region is opened, insert into RIT and handle it
// This could be done asynchronously, we would need then to acquire the lock in the
// handler.
regionStates.updateRegionState(rt, RegionState.State.OPEN); regionStates.updateRegionState(rt, RegionState.State.OPEN);
new OpenedRegionHandler(server, this, regionInfo, sn, expectedVersion).process(); new OpenedRegionHandler(server, this, regionInfo, sn, expectedVersion).process();
} }
break; break;
case RS_ZK_REGION_SPLITTING: case RS_ZK_REGION_SPLITTING:
LOG.debug("Processed region in state : " + et); if (!serverManager.isServerOnline(sn)) {
// The regionserver started the split, but died before updating the status.
// It means (hopefully) that the split was not finished
// TBD - to study. In the meantime, do nothing as in the past.
LOG.warn("Processed region " + regionInfo.getEncodedName() + " in state : " + et +
" on a dead regionserver: " + sn + " doing nothing");
} else {
LOG.info("Processed region " + regionInfo.getEncodedName() + " in state : " +
et + " nothing to do.");
// We don't do anything. The way the code is written in RS_ZK_REGION_SPLIT management,
// it adds the RS_ZK_REGION_SPLITTING state if needed. So we don't have to do it here.
}
break; break;
case RS_ZK_REGION_SPLIT: case RS_ZK_REGION_SPLIT:
LOG.debug("Processed region in state : " + et); if (!serverManager.isServerOnline(sn)) {
forceOffline(regionInfo, rt);
} else {
LOG.info("Processed region " + regionInfo.getEncodedName() + " in state : " +
et + " nothing to do.");
// We don't do anything. The regionserver is supposed to update the znode
// multiple times so if it's still up we will receive an update soon.
}
break; break;
default: default:
throw new IllegalStateException("Received region in state :" + et + " is not valid"); throw new IllegalStateException("Received region in state :" + et + " is not valid.");
} }
} }
@ -711,7 +747,7 @@ public class AssignmentManager extends ZooKeeperListener {
} }
// Check it has daughters. // Check it has daughters.
byte [] payload = rt.getPayload(); byte [] payload = rt.getPayload();
List<HRegionInfo> daughters = null; List<HRegionInfo> daughters;
try { try {
daughters = HRegionInfo.parseDelimitedFrom(payload, 0, payload.length); daughters = HRegionInfo.parseDelimitedFrom(payload, 0, payload.length);
} catch (IOException e) { } catch (IOException e) {
@ -851,7 +887,7 @@ public class AssignmentManager extends ZooKeeperListener {
*/ */
private boolean convertPendingCloseToSplitting(final RegionState rs) { private boolean convertPendingCloseToSplitting(final RegionState rs) {
if (!rs.isPendingClose()) return false; if (!rs.isPendingClose()) return false;
LOG.debug("Converting PENDING_CLOSE to SPLITING; rs=" + rs); LOG.debug("Converting PENDING_CLOSE to SPLITTING; rs=" + rs);
regionStates.updateRegionState( regionStates.updateRegionState(
rs.getRegion(), RegionState.State.SPLITTING); rs.getRegion(), RegionState.State.SPLITTING);
// Clean up existing state. Clear from region plans seems all we // Clean up existing state. Clear from region plans seems all we
@ -874,7 +910,7 @@ public class AssignmentManager extends ZooKeeperListener {
RegionState regionState = regionStates.getRegionTransitionState(encodedName); RegionState regionState = regionStates.getRegionTransitionState(encodedName);
switch (rt.getEventType()) { switch (rt.getEventType()) {
case M_ZK_REGION_OFFLINE: case M_ZK_REGION_OFFLINE:
HRegionInfo regionInfo = null; HRegionInfo regionInfo;
if (regionState != null) { if (regionState != null) {
regionInfo = regionState.getRegion(); regionInfo = regionState.getRegion();
} else { } else {
@ -1367,7 +1403,7 @@ public class AssignmentManager extends ZooKeeperListener {
HRegionInfo region = state.getRegion(); HRegionInfo region = state.getRegion();
String encodedRegionName = region.getEncodedName(); String encodedRegionName = region.getEncodedName();
Integer nodeVersion = offlineNodesVersions.get(encodedRegionName); Integer nodeVersion = offlineNodesVersions.get(encodedRegionName);
if (nodeVersion == null || nodeVersion.intValue() == -1) { if (nodeVersion == null || nodeVersion == -1) {
LOG.warn("failed to offline in zookeeper: " + region); LOG.warn("failed to offline in zookeeper: " + region);
failedToOpenRegions.add(region); // assign individually later failedToOpenRegions.add(region); // assign individually later
Lock lock = locks.remove(encodedRegionName); Lock lock = locks.remove(encodedRegionName);
@ -1572,12 +1608,13 @@ public class AssignmentManager extends ZooKeeperListener {
RegionPlan plan = null; RegionPlan plan = null;
long maxRegionServerStartupWaitTime = -1; long maxRegionServerStartupWaitTime = -1;
HRegionInfo region = state.getRegion(); HRegionInfo region = state.getRegion();
RegionOpeningState regionOpenState;
for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) { for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) {
if (plan == null) { // Get a server for the region at first if (plan == null) { // Get a server for the region at first
plan = getRegionPlan(region, forceNewPlan); plan = getRegionPlan(region, forceNewPlan);
} }
if (plan == null) { if (plan == null) {
LOG.debug("Unable to determine a plan to assign " + region); LOG.warn("Unable to determine a plan to assign " + region);
this.timeoutMonitor.setAllRegionServersOffline(true); this.timeoutMonitor.setAllRegionServersOffline(true);
return; // Should get reassigned later when RIT times out. return; // Should get reassigned later when RIT times out.
} }
@ -1609,52 +1646,73 @@ public class AssignmentManager extends ZooKeeperListener {
LOG.debug("Server stopped; skipping assign of " + region); LOG.debug("Server stopped; skipping assign of " + region);
return; return;
} }
try {
LOG.info("Assigning region " + region.getRegionNameAsString() + LOG.info("Assigning region " + region.getRegionNameAsString() +
" to " + plan.getDestination().toString()); " to " + plan.getDestination().toString());
// Transition RegionState to PENDING_OPEN // Transition RegionState to PENDING_OPEN
currentState = regionStates.updateRegionState(region, currentState = regionStates.updateRegionState(region,
RegionState.State.PENDING_OPEN, plan.getDestination()); RegionState.State.PENDING_OPEN, plan.getDestination());
// Send OPEN RPC. This can fail if the server on other end is is not up.
// Pass the version that was obtained while setting the node to OFFLINE. boolean needNewPlan;
RegionOpeningState regionOpenState = serverManager.sendRegionOpen(plan final String assignMsg = "Failed assignment of " + region.getRegionNameAsString() +
.getDestination(), region, versionOfOfflineNode); " to " + plan.getDestination();
try {
regionOpenState = serverManager.sendRegionOpen(
plan.getDestination(), region, versionOfOfflineNode);
if (regionOpenState == RegionOpeningState.FAILED_OPENING) {
// Failed opening this region, looping again on a new server.
needNewPlan = true;
LOG.warn(assignMsg + ", regionserver says 'FAILED_OPENING', " +
" trying to assign elsewhere instead; " +
"try=" + i + " of " + this.maximumAttempts);
} else {
// we're done
if (regionOpenState == RegionOpeningState.ALREADY_OPENED) { if (regionOpenState == RegionOpeningState.ALREADY_OPENED) {
processAlreadyOpenedRegion(region, plan.getDestination()); processAlreadyOpenedRegion(region, plan.getDestination());
} else if (regionOpenState == RegionOpeningState.FAILED_OPENING) {
// Failed opening this region
throw new Exception("Get regionOpeningState=" + regionOpenState);
} }
break; return;
}
} catch (Throwable t) { } catch (Throwable t) {
if (t instanceof RemoteException) { if (t instanceof RemoteException) {
t = ((RemoteException) t).unwrapRemoteException(); t = ((RemoteException) t).unwrapRemoteException();
} }
boolean regionAlreadyInTransitionException = false;
boolean serverNotRunningYet = false; // Should we wait a little before retrying? If the server is starting it's yes.
boolean socketTimedOut = false; // If the region is already in transition, it's yes as well: we want to be sure that
if (t instanceof RegionAlreadyInTransitionException) { // the region will get opened but we don't want a double assignment.
regionAlreadyInTransitionException = true; boolean hold = (t instanceof RegionAlreadyInTransitionException ||
if (LOG.isDebugEnabled()) { t instanceof ServerNotRunningYetException);
LOG.debug("Failed assignment in: " + plan.getDestination() + " due to "
+ t.getMessage()); // In case socket is timed out and the region server is still online,
} // the openRegion RPC could have been accepted by the server and
} else if (t instanceof ServerNotRunningYetException) { // just the response didn't go through. So we will retry to
// open the region on the same server to avoid possible
// double assignment.
boolean retry = !hold && (t instanceof java.net.SocketTimeoutException
&& this.serverManager.isServerOnline(plan.getDestination()));
if (hold) {
LOG.warn(assignMsg + ", waiting a little before trying on the same region server " +
"try=" + i + " of " + this.maximumAttempts, t);
if (maxRegionServerStartupWaitTime < 0) { if (maxRegionServerStartupWaitTime < 0) {
maxRegionServerStartupWaitTime = System.currentTimeMillis() + maxRegionServerStartupWaitTime = EnvironmentEdgeManager.currentTimeMillis() +
this.server.getConfiguration(). this.server.getConfiguration().
getLong("hbase.regionserver.rpc.startup.waittime", 60000); getLong("hbase.regionserver.rpc.startup.waittime", 60000);
} }
try { try {
long now = System.currentTimeMillis(); long now = EnvironmentEdgeManager.currentTimeMillis();
if (now < maxRegionServerStartupWaitTime) { if (now < maxRegionServerStartupWaitTime) {
LOG.debug("Server is not yet up; waiting up to " + LOG.debug("Server is not yet up; waiting up to " +
(maxRegionServerStartupWaitTime - now) + "ms", t); (maxRegionServerStartupWaitTime - now) + "ms", t);
serverNotRunningYet = true;
Thread.sleep(100); Thread.sleep(100);
i--; // reset the try count i--; // reset the try count
needNewPlan = false;
} else { } else {
LOG.debug("Server is not up for a while; try a new one", t); LOG.debug("Server is not up for a while; try a new one", t);
needNewPlan = true;
} }
} catch (InterruptedException ie) { } catch (InterruptedException ie) {
LOG.warn("Failed to assign " LOG.warn("Failed to assign "
@ -1662,34 +1720,17 @@ public class AssignmentManager extends ZooKeeperListener {
Thread.currentThread().interrupt(); Thread.currentThread().interrupt();
return; return;
} }
} else if (t instanceof java.net.SocketTimeoutException } else if (retry) {
&& this.serverManager.isServerOnline(plan.getDestination())) { needNewPlan = false;
// In case socket is timed out and the region server is still online, LOG.warn(assignMsg + ", trying to assign to the same region server " +
// the openRegion RPC could have been accepted by the server and "try=" + i + " of " + this.maximumAttempts, t);
// just the response didn't go through. So we will retry to } else {
// open the region on the same server to avoid possible needNewPlan = true;
// double assignment. LOG.warn(assignMsg + ", trying to assign elsewhere instead;" +
socketTimedOut = true; " try=" + i + " of " + this.maximumAttempts, t);
if (LOG.isDebugEnabled()) {
LOG.debug("Call openRegion() to " + plan.getDestination()
+ " has timed out when trying to assign "
+ region.getRegionNameAsString()
+ ", but the region might already be opened on "
+ plan.getDestination() + ".", t);
} }
} }
LOG.warn("Failed assignment of "
+ region.getRegionNameAsString()
+ " to "
+ plan.getDestination()
+ ", trying to assign "
+ (regionAlreadyInTransitionException || serverNotRunningYet || socketTimedOut
? "to the same region server because of RegionAlreadyInTransitionException"
+ "/ServerNotRunningYetException/SocketTimeoutException;"
: "elsewhere instead; ")
+ "try=" + i + " of " + this.maximumAttempts, t);
if (i == this.maximumAttempts) { if (i == this.maximumAttempts) {
// Don't reset the region state or get a new plan any more. // Don't reset the region state or get a new plan any more.
// This is the last try. // This is the last try.
@ -1699,28 +1740,25 @@ public class AssignmentManager extends ZooKeeperListener {
// If region opened on destination of present plan, reassigning to new // If region opened on destination of present plan, reassigning to new
// RS may cause double assignments. In case of RegionAlreadyInTransitionException // RS may cause double assignments. In case of RegionAlreadyInTransitionException
// reassigning to same RS. // reassigning to same RS.
RegionPlan newPlan = plan; if (needNewPlan) {
if (!(regionAlreadyInTransitionException
|| serverNotRunningYet || socketTimedOut)) {
// Force a new plan and reassign. Will return null if no servers. // Force a new plan and reassign. Will return null if no servers.
// The new plan could be the same as the existing plan since we don't // The new plan could be the same as the existing plan since we don't
// exclude the server of the original plan, which should not be // exclude the server of the original plan, which should not be
// excluded since it could be the only server up now. // excluded since it could be the only server up now.
newPlan = getRegionPlan(region, true); RegionPlan newPlan = getRegionPlan(region, true);
}
if (newPlan == null) { if (newPlan == null) {
this.timeoutMonitor.setAllRegionServersOffline(true); this.timeoutMonitor.setAllRegionServersOffline(true);
LOG.warn("Unable to find a viable location to assign region " + LOG.warn("Unable to find a viable location to assign region " +
region.getRegionNameAsString()); region.getRegionNameAsString());
return; return;
} }
if (plan != newPlan
&& !plan.getDestination().equals(newPlan.getDestination())) { if (plan != newPlan && !plan.getDestination().equals(newPlan.getDestination())) {
// Clean out plan we failed execute and one that doesn't look like it'll // Clean out plan we failed execute and one that doesn't look like it'll
// succeed anyways; we need a new plan! // succeed anyways; we need a new plan!
// Transition back to OFFLINE // Transition back to OFFLINE
currentState = regionStates.updateRegionState( currentState = regionStates.updateRegionState(region, RegionState.State.OFFLINE);
region, RegionState.State.OFFLINE);
versionOfOfflineNode = -1; versionOfOfflineNode = -1;
plan = newPlan; plan = newPlan;
} }
@ -1778,7 +1816,7 @@ public class AssignmentManager extends ZooKeeperListener {
} }
regionStates.updateRegionState(state.getRegion(), regionStates.updateRegionState(state.getRegion(),
RegionState.State.OFFLINE); RegionState.State.OFFLINE);
int versionOfOfflineNode = -1; int versionOfOfflineNode;
try { try {
// get the version after setting the znode to OFFLINE // get the version after setting the znode to OFFLINE
versionOfOfflineNode = ZKAssign.createOrForceNodeOffline(watcher, versionOfOfflineNode = ZKAssign.createOrForceNodeOffline(watcher,
@ -1829,7 +1867,7 @@ public class AssignmentManager extends ZooKeeperListener {
RegionPlan randomPlan = null; RegionPlan randomPlan = null;
boolean newPlan = false; boolean newPlan = false;
RegionPlan existingPlan = null; RegionPlan existingPlan;
synchronized (this.regionPlans) { synchronized (this.regionPlans) {
existingPlan = this.regionPlans.get(encodedName); existingPlan = this.regionPlans.get(encodedName);
@ -2034,7 +2072,6 @@ public class AssignmentManager extends ZooKeeperListener {
server.abort( server.abort(
"Unexpected ZK exception deleting node CLOSING/CLOSED for the region " "Unexpected ZK exception deleting node CLOSING/CLOSED for the region "
+ encodedName, ke); + encodedName, ke);
return;
} }
} }
@ -2410,16 +2447,15 @@ public class AssignmentManager extends ZooKeeperListener {
* that were in RIT. * that were in RIT.
* <p> * <p>
* *
*
* @param deadServers * @param deadServers
* The list of dead servers which failed while there was no active * The list of dead servers which failed while there was no active
* master. Can be null. * master. Can be null.
* @param nodes
* The regions in RIT
* @throws IOException * @throws IOException
* @throws KeeperException * @throws KeeperException
*/ */
private void processDeadServersAndRecoverLostRegions( private void processDeadServersAndRecoverLostRegions(
Map<ServerName, List<HRegionInfo>> deadServers, List<String> nodes) Map<ServerName, List<HRegionInfo>> deadServers)
throws IOException, KeeperException { throws IOException, KeeperException {
if (deadServers != null) { if (deadServers != null) {
for (Map.Entry<ServerName, List<HRegionInfo>> server: deadServers.entrySet()) { for (Map.Entry<ServerName, List<HRegionInfo>> server: deadServers.entrySet()) {
@ -2429,7 +2465,7 @@ public class AssignmentManager extends ZooKeeperListener {
} }
} }
} }
nodes = ZKUtil.listChildrenAndWatchForNewChildren( List<String> nodes = ZKUtil.listChildrenAndWatchForNewChildren(
this.watcher, this.watcher.assignmentZNode); this.watcher, this.watcher.assignmentZNode);
if (!nodes.isEmpty()) { if (!nodes.isEmpty()) {
for (String encodedRegionName : nodes) { for (String encodedRegionName : nodes) {
@ -2672,12 +2708,9 @@ public class AssignmentManager extends ZooKeeperListener {
invokeAssign(regionInfo); invokeAssign(regionInfo);
} catch (KeeperException ke) { } catch (KeeperException ke) {
LOG.error("Unexpected ZK exception timing out CLOSING region", ke); LOG.error("Unexpected ZK exception timing out CLOSING region", ke);
return;
} catch (DeserializationException e) { } catch (DeserializationException e) {
LOG.error("Unexpected exception parsing CLOSING region", e); LOG.error("Unexpected exception parsing CLOSING region", e);
return;
} }
return;
} }
void invokeAssign(HRegionInfo regionInfo) { void invokeAssign(HRegionInfo regionInfo) {

View File

@ -1408,7 +1408,7 @@ Server {
* @param b If false, the catalog janitor won't do anything. * @param b If false, the catalog janitor won't do anything.
*/ */
public void setCatalogJanitorEnabled(final boolean b) { public void setCatalogJanitorEnabled(final boolean b) {
((CatalogJanitor)this.catalogJanitorChore).setEnabled(b); this.catalogJanitorChore.setEnabled(b);
} }
@Override @Override
@ -1908,7 +1908,7 @@ Server {
public String[] getCoprocessors() { public String[] getCoprocessors() {
Set<String> masterCoprocessors = Set<String> masterCoprocessors =
getCoprocessorHost().getCoprocessors(); getCoprocessorHost().getCoprocessors();
return masterCoprocessors.toArray(new String[0]); return masterCoprocessors.toArray(new String[masterCoprocessors.size()]);
} }
@Override @Override

View File

@ -186,6 +186,8 @@ public class TestAssignmentManager {
@Test(timeout = 5000) @Test(timeout = 5000)
public void testBalanceOnMasterFailoverScenarioWithOpenedNode() public void testBalanceOnMasterFailoverScenarioWithOpenedNode()
throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException { throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, REGIONINFO, 0, null, true)).
thenReturn(true);
AssignmentManagerWithExtrasForTesting am = AssignmentManagerWithExtrasForTesting am =
setUpMockedAssignmentManager(this.server, this.serverManager); setUpMockedAssignmentManager(this.server, this.serverManager);
try { try {
@ -232,6 +234,8 @@ public class TestAssignmentManager {
@Test(timeout = 5000) @Test(timeout = 5000)
public void testBalanceOnMasterFailoverScenarioWithClosedNode() public void testBalanceOnMasterFailoverScenarioWithClosedNode()
throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException { throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, REGIONINFO, 0, null, true)).
thenReturn(true);
AssignmentManagerWithExtrasForTesting am = AssignmentManagerWithExtrasForTesting am =
setUpMockedAssignmentManager(this.server, this.serverManager); setUpMockedAssignmentManager(this.server, this.serverManager);
try { try {
@ -279,6 +283,8 @@ public class TestAssignmentManager {
@Test(timeout = 5000) @Test(timeout = 5000)
public void testBalanceOnMasterFailoverScenarioWithOfflineNode() public void testBalanceOnMasterFailoverScenarioWithOfflineNode()
throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException { throws IOException, KeeperException, InterruptedException, ServiceException, DeserializationException {
Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, REGIONINFO, 0, null, true)).
thenReturn(true);
AssignmentManagerWithExtrasForTesting am = AssignmentManagerWithExtrasForTesting am =
setUpMockedAssignmentManager(this.server, this.serverManager); setUpMockedAssignmentManager(this.server, this.serverManager);
try { try {
@ -574,7 +580,7 @@ public class TestAssignmentManager {
ClientProtocol implementation = Mockito.mock(ClientProtocol.class); ClientProtocol implementation = Mockito.mock(ClientProtocol.class);
// Get a meta row result that has region up on SERVERNAME_A // Get a meta row result that has region up on SERVERNAME_A
Result r = null; Result r;
if (splitRegion) { if (splitRegion) {
r = MetaMockingUtil.getMetaTableRowResultAsSplitRegion(REGIONINFO, SERVERNAME_A); r = MetaMockingUtil.getMetaTableRowResultAsSplitRegion(REGIONINFO, SERVERNAME_A);
} else { } else {
@ -942,6 +948,30 @@ public class TestAssignmentManager {
} }
} }
/**
* Scenario:<ul>
* <li> master starts a close, and creates a znode</li>
* <li> it fails just at this moment, before contacting the RS</li>
* <li> while the second master is coming up, the targeted RS dies. But it's before ZK timeout so
* we don't know, and we have an exception.</li>
* <li> the master must handle this nicely and reassign.
* </ul>
*/
@Test
public void testClosingFailureDuringRecovery() throws Exception {
AssignmentManagerWithExtrasForTesting am =
setUpMockedAssignmentManager(this.server, this.serverManager);
ZKAssign.createNodeClosing(this.watcher, REGIONINFO, SERVERNAME_A);
am.getRegionStates().createRegionState(REGIONINFO);
assertFalse( am.getRegionStates().isRegionsInTransition() );
am.processRegionInTransition(REGIONINFO.getEncodedName(), REGIONINFO);
assertTrue( am.getRegionStates().isRegionsInTransition() );
}
/** /**
* Creates a new ephemeral node in the SPLITTING state for the specified region. * Creates a new ephemeral node in the SPLITTING state for the specified region.
* Create it ephemeral in case regionserver dies mid-split. * Create it ephemeral in case regionserver dies mid-split.
@ -1138,7 +1168,7 @@ public class TestAssignmentManager {
} catch (InterruptedException e) { } catch (InterruptedException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
}; }
}; };
t.start(); t.start();
while (!t.isAlive()) Threads.sleep(1); while (!t.isAlive()) Threads.sleep(1);

View File

@ -43,7 +43,6 @@ import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.LargeTests; import org.apache.hadoop.hbase.LargeTests;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.RegionTransition; import org.apache.hadoop.hbase.RegionTransition;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
@ -154,11 +153,6 @@ public class TestMasterFailover {
// Create config to use for this cluster // Create config to use for this cluster
Configuration conf = HBaseConfiguration.create(); Configuration conf = HBaseConfiguration.create();
// Need to drop the timeout much lower
conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 3);
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 3);
// Start the cluster // Start the cluster
HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
@ -278,6 +272,8 @@ public class TestMasterFailover {
*/ */
// Region that should be assigned but is not and is in ZK as OFFLINE // Region that should be assigned but is not and is in ZK as OFFLINE
// Cause: This can happen if the master crashed after creating the znode but before sending the
// request to the region server
HRegionInfo region = enabledRegions.remove(0); HRegionInfo region = enabledRegions.remove(0);
regionsThatShouldBeOnline.add(region); regionsThatShouldBeOnline.add(region);
ZKAssign.createNodeOffline(zkw, region, serverName); ZKAssign.createNodeOffline(zkw, region, serverName);
@ -285,6 +281,7 @@ public class TestMasterFailover {
/* /*
* ZK = CLOSING * ZK = CLOSING
*/ */
// Cause: Same as offline.
regionsThatShouldBeOnline.add(closingRegion); regionsThatShouldBeOnline.add(closingRegion);
ZKAssign.createNodeClosing(zkw, closingRegion, serverName); ZKAssign.createNodeClosing(zkw, closingRegion, serverName);
@ -293,6 +290,7 @@ public class TestMasterFailover {
*/ */
// Region of enabled table closed but not ack // Region of enabled table closed but not ack
//Cause: Master was down while the region server updated the ZK status.
region = enabledRegions.remove(0); region = enabledRegions.remove(0);
regionsThatShouldBeOnline.add(region); regionsThatShouldBeOnline.add(region);
int version = ZKAssign.createNodeClosing(zkw, region, serverName); int version = ZKAssign.createNodeClosing(zkw, region, serverName);
@ -304,21 +302,12 @@ public class TestMasterFailover {
version = ZKAssign.createNodeClosing(zkw, region, serverName); version = ZKAssign.createNodeClosing(zkw, region, serverName);
ZKAssign.transitionNodeClosed(zkw, region, serverName, version); ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
/*
* ZK = OPENING
*/
// RS was opening a region of enabled table but never finishes
region = enabledRegions.remove(0);
regionsThatShouldBeOnline.add(region);
ZKAssign.createNodeOffline(zkw, region, serverName);
ZKAssign.transitionNodeOpening(zkw, region, serverName);
/* /*
* ZK = OPENED * ZK = OPENED
*/ */
// Region of enabled table was opened on RS // Region of enabled table was opened on RS
// Cause: as offline
region = enabledRegions.remove(0); region = enabledRegions.remove(0);
regionsThatShouldBeOnline.add(region); regionsThatShouldBeOnline.add(region);
ZKAssign.createNodeOffline(zkw, region, serverName); ZKAssign.createNodeOffline(zkw, region, serverName);
@ -333,6 +322,7 @@ public class TestMasterFailover {
} }
// Region of disable table was opened on RS // Region of disable table was opened on RS
// Cause: Master failed while updating the status for this region server.
region = disabledRegions.remove(0); region = disabledRegions.remove(0);
regionsThatShouldBeOffline.add(region); regionsThatShouldBeOffline.add(region);
ZKAssign.createNodeOffline(zkw, region, serverName); ZKAssign.createNodeOffline(zkw, region, serverName);
@ -457,9 +447,7 @@ public class TestMasterFailover {
// Create and start the cluster // Create and start the cluster
HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
Configuration conf = TEST_UTIL.getConfiguration(); Configuration conf = TEST_UTIL.getConfiguration();
// Need to drop the timeout much lower
conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1); conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 2); conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 2);
TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
@ -771,25 +759,6 @@ public class TestMasterFailover {
assertTrue(cluster.waitForActiveAndReadyMaster()); assertTrue(cluster.waitForActiveAndReadyMaster());
log("Master is ready"); log("Master is ready");
// Let's add some weird states to master in-memory state
// After HBASE-3181, we need to have some ZK state if we're PENDING_OPEN
// b/c it is impossible for us to get into this state w/o a zk node
// this is not true of PENDING_CLOSE
// PENDING_OPEN and enabled
region = enabledRegions.remove(0);
regionsThatShouldBeOnline.add(region);
master.getAssignmentManager().getRegionStates().updateRegionState(
region, RegionState.State.PENDING_OPEN);
ZKAssign.createNodeOffline(zkw, region, master.getServerName());
// PENDING_OPEN and disabled
region = disabledRegions.remove(0);
regionsThatShouldBeOffline.add(region);
master.getAssignmentManager().getRegionStates().updateRegionState(
region, RegionState.State.PENDING_OPEN);
ZKAssign.createNodeOffline(zkw, region, master.getServerName());
// Failover should be completed, now wait for no RIT // Failover should be completed, now wait for no RIT
log("Waiting for no more RIT"); log("Waiting for no more RIT");
ZKAssign.blockUntilNoRIT(zkw); ZKAssign.blockUntilNoRIT(zkw);
@ -863,8 +832,6 @@ public class TestMasterFailover {
// Start the cluster // Start the cluster
HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
Configuration conf = TEST_UTIL.getConfiguration(); Configuration conf = TEST_UTIL.getConfiguration();
conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 8000);
conf.setInt("hbase.master.info.port", -1); conf.setInt("hbase.master.info.port", -1);
TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
@ -1016,84 +983,5 @@ public class TestMasterFailover {
// Stop the cluster // Stop the cluster
TEST_UTIL.shutdownMiniCluster(); TEST_UTIL.shutdownMiniCluster();
} }
/**
* return the index of the active master in the cluster
* @throws MasterNotRunningException if no active master found
*/
private int getActiveMasterIndex(MiniHBaseCluster cluster) throws MasterNotRunningException {
// get all the master threads
List<MasterThread> masterThreads = cluster.getMasterThreads();
for (int i = 0; i < masterThreads.size(); i++) {
if (masterThreads.get(i).getMaster().isActiveMaster()) {
return i;
}
}
throw new MasterNotRunningException();
}
/**
* Kill the master and wait for a new active master to show up
* @param cluster
* @return the new active master
* @throws InterruptedException
* @throws IOException
*/
private HMaster killActiveAndWaitForNewActive(MiniHBaseCluster cluster)
throws InterruptedException, IOException {
int activeIndex = getActiveMasterIndex(cluster);
HMaster active = cluster.getMaster();
cluster.stopMaster(activeIndex);
cluster.waitOnMaster(activeIndex);
assertTrue(cluster.waitForActiveAndReadyMaster());
// double check this is actually a new master
HMaster newActive = cluster.getMaster();
assertFalse(active == newActive);
return newActive;
}
/**
* Test that if the master fails, the load balancer maintains its
* state (running or not) when the next master takes over
* @throws Exception
*/
@Test (timeout=240000)
public void testMasterFailoverBalancerPersistence() throws Exception {
final int NUM_MASTERS = 3;
final int NUM_RS = 1;
// Start the cluster
HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
assertTrue(cluster.waitForActiveAndReadyMaster());
HMaster active = cluster.getMaster();
// check that the balancer is on by default for the active master
ClusterStatus clusterStatus = active.getClusterStatus();
assertTrue(clusterStatus.isBalancerOn());
active = killActiveAndWaitForNewActive(cluster);
// ensure the load balancer is still running on new master
clusterStatus = active.getClusterStatus();
assertTrue(clusterStatus.isBalancerOn());
// turn off the load balancer
active.balanceSwitch(false);
// once more, kill active master and wait for new active master to show up
active = killActiveAndWaitForNewActive(cluster);
// ensure the load balancer is not running on the new master
clusterStatus = active.getClusterStatus();
assertFalse(clusterStatus.isBalancerOn());
// Stop the cluster
TEST_UTIL.shutdownMiniCluster();
}
} }

View File

@ -0,0 +1,121 @@
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.LargeTests;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.util.JVMClusterUtil;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import java.io.IOException;
import java.util.List;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@Category(LargeTests.class)
public class TestMasterFailoverBalancerPersistence {
/**
* Test that if the master fails, the load balancer maintains its
* state (running or not) when the next master takes over
*
* @throws Exception
*/
@Test(timeout = 240000)
public void testMasterFailoverBalancerPersistence() throws Exception {
final int NUM_MASTERS = 3;
final int NUM_RS = 1;
// Start the cluster
HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
assertTrue(cluster.waitForActiveAndReadyMaster());
HMaster active = cluster.getMaster();
// check that the balancer is on by default for the active master
ClusterStatus clusterStatus = active.getClusterStatus();
assertTrue(clusterStatus.isBalancerOn());
active = killActiveAndWaitForNewActive(cluster);
// ensure the load balancer is still running on new master
clusterStatus = active.getClusterStatus();
assertTrue(clusterStatus.isBalancerOn());
// turn off the load balancer
active.balanceSwitch(false);
// once more, kill active master and wait for new active master to show up
active = killActiveAndWaitForNewActive(cluster);
// ensure the load balancer is not running on the new master
clusterStatus = active.getClusterStatus();
assertFalse(clusterStatus.isBalancerOn());
// Stop the cluster
TEST_UTIL.shutdownMiniCluster();
}
/**
* Kill the master and wait for a new active master to show up
*
* @param cluster
* @return the new active master
* @throws InterruptedException
* @throws java.io.IOException
*/
private HMaster killActiveAndWaitForNewActive(MiniHBaseCluster cluster)
throws InterruptedException, IOException {
int activeIndex = getActiveMasterIndex(cluster);
HMaster active = cluster.getMaster();
cluster.stopMaster(activeIndex);
cluster.waitOnMaster(activeIndex);
assertTrue(cluster.waitForActiveAndReadyMaster());
// double check this is actually a new master
HMaster newActive = cluster.getMaster();
assertFalse(active == newActive);
return newActive;
}
/**
* return the index of the active master in the cluster
*
* @throws org.apache.hadoop.hbase.MasterNotRunningException
* if no active master found
*/
private int getActiveMasterIndex(MiniHBaseCluster cluster) throws MasterNotRunningException {
// get all the master threads
List<JVMClusterUtil.MasterThread> masterThreads = cluster.getMasterThreads();
for (int i = 0; i < masterThreads.size(); i++) {
if (masterThreads.get(i).getMaster().isActiveMaster()) {
return i;
}
}
throw new MasterNotRunningException();
}
}