HBASE-8137 Add failed to open/close region state
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1459384 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
66d90f865e
commit
e38dcba6c3
|
@ -44,7 +44,9 @@ public class RegionState implements org.apache.hadoop.io.Writable {
|
|||
CLOSING, // server has begun to close but not yet done
|
||||
CLOSED, // server closed region and updated meta
|
||||
SPLITTING, // server started split of a region
|
||||
SPLIT // server completed split of a region
|
||||
SPLIT, // server completed split of a region
|
||||
FAILED_OPEN, // failed to open, and won't retry any more
|
||||
FAILED_CLOSE // failed to close, and won't retry any more
|
||||
}
|
||||
|
||||
// Many threads can update the state at the stamp at the same time
|
||||
|
@ -126,6 +128,14 @@ public class RegionState implements org.apache.hadoop.io.Writable {
|
|||
return state == State.SPLIT;
|
||||
}
|
||||
|
||||
public boolean isFailedOpen() {
|
||||
return state == State.FAILED_OPEN;
|
||||
}
|
||||
|
||||
public boolean isFailedClose() {
|
||||
return state == State.FAILED_CLOSE;
|
||||
}
|
||||
|
||||
public boolean isPendingOpenOrOpeningOnServer(final ServerName sn) {
|
||||
return isOnServer(sn) && (isPendingOpen() || isOpening());
|
||||
}
|
||||
|
@ -195,6 +205,12 @@ public class RegionState implements org.apache.hadoop.io.Writable {
|
|||
case SPLIT:
|
||||
rs = ClusterStatusProtos.RegionState.State.SPLIT;
|
||||
break;
|
||||
case FAILED_OPEN:
|
||||
rs = ClusterStatusProtos.RegionState.State.FAILED_OPEN;
|
||||
break;
|
||||
case FAILED_CLOSE:
|
||||
rs = ClusterStatusProtos.RegionState.State.FAILED_CLOSE;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("");
|
||||
}
|
||||
|
@ -239,6 +255,12 @@ public class RegionState implements org.apache.hadoop.io.Writable {
|
|||
case SPLIT:
|
||||
state = State.SPLIT;
|
||||
break;
|
||||
case FAILED_OPEN:
|
||||
state = State.FAILED_OPEN;
|
||||
break;
|
||||
case FAILED_CLOSE:
|
||||
state = State.FAILED_CLOSE;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("");
|
||||
}
|
||||
|
|
|
@ -63,6 +63,8 @@ public final class ClusterStatusProtos {
|
|||
CLOSED(6, 6),
|
||||
SPLITTING(7, 7),
|
||||
SPLIT(8, 8),
|
||||
FAILED_OPEN(9, 9),
|
||||
FAILED_CLOSE(10, 10),
|
||||
;
|
||||
|
||||
public static final int OFFLINE_VALUE = 0;
|
||||
|
@ -74,6 +76,8 @@ public final class ClusterStatusProtos {
|
|||
public static final int CLOSED_VALUE = 6;
|
||||
public static final int SPLITTING_VALUE = 7;
|
||||
public static final int SPLIT_VALUE = 8;
|
||||
public static final int FAILED_OPEN_VALUE = 9;
|
||||
public static final int FAILED_CLOSE_VALUE = 10;
|
||||
|
||||
|
||||
public final int getNumber() { return value; }
|
||||
|
@ -89,6 +93,8 @@ public final class ClusterStatusProtos {
|
|||
case 6: return CLOSED;
|
||||
case 7: return SPLITTING;
|
||||
case 8: return SPLIT;
|
||||
case 9: return FAILED_OPEN;
|
||||
case 10: return FAILED_CLOSE;
|
||||
default: return null;
|
||||
}
|
||||
}
|
||||
|
@ -119,7 +125,7 @@ public final class ClusterStatusProtos {
|
|||
}
|
||||
|
||||
private static final State[] VALUES = {
|
||||
OFFLINE, PENDING_OPEN, OPENING, OPEN, PENDING_CLOSE, CLOSING, CLOSED, SPLITTING, SPLIT,
|
||||
OFFLINE, PENDING_OPEN, OPENING, OPEN, PENDING_CLOSE, CLOSING, CLOSED, SPLITTING, SPLIT, FAILED_OPEN, FAILED_CLOSE,
|
||||
};
|
||||
|
||||
public static State valueOf(
|
||||
|
@ -4350,28 +4356,28 @@ public final class ClusterStatusProtos {
|
|||
static {
|
||||
java.lang.String[] descriptorData = {
|
||||
"\n\023ClusterStatus.proto\032\013hbase.proto\032\017Clus" +
|
||||
"terId.proto\032\010FS.proto\"\346\001\n\013RegionState\022\037\n" +
|
||||
"terId.proto\032\010FS.proto\"\211\002\n\013RegionState\022\037\n" +
|
||||
"\nregionInfo\030\001 \002(\0132\013.RegionInfo\022!\n\005state\030" +
|
||||
"\002 \002(\0162\022.RegionState.State\022\r\n\005stamp\030\003 \001(\004" +
|
||||
"\"\203\001\n\005State\022\013\n\007OFFLINE\020\000\022\020\n\014PENDING_OPEN\020" +
|
||||
"\"\246\001\n\005State\022\013\n\007OFFLINE\020\000\022\020\n\014PENDING_OPEN\020" +
|
||||
"\001\022\013\n\007OPENING\020\002\022\010\n\004OPEN\020\003\022\021\n\rPENDING_CLOS" +
|
||||
"E\020\004\022\013\n\007CLOSING\020\005\022\n\n\006CLOSED\020\006\022\r\n\tSPLITTIN" +
|
||||
"G\020\007\022\t\n\005SPLIT\020\010\"W\n\022RegionInTransition\022\036\n\004" +
|
||||
"spec\030\001 \002(\0132\020.RegionSpecifier\022!\n\013regionSt" +
|
||||
"ate\030\002 \002(\0132\014.RegionState\"N\n\016LiveServerInf",
|
||||
"o\022\033\n\006server\030\001 \002(\0132\013.ServerName\022\037\n\nserver" +
|
||||
"Load\030\002 \002(\0132\013.ServerLoad\"\327\002\n\rClusterStatu" +
|
||||
"s\022.\n\014hbaseVersion\030\001 \001(\0132\030.HBaseVersionFi" +
|
||||
"leContent\022$\n\013liveServers\030\002 \003(\0132\017.LiveSer" +
|
||||
"verInfo\022 \n\013deadServers\030\003 \003(\0132\013.ServerNam" +
|
||||
"e\0220\n\023regionsInTransition\030\004 \003(\0132\023.RegionI" +
|
||||
"nTransition\022\035\n\tclusterId\030\005 \001(\0132\n.Cluster" +
|
||||
"Id\022(\n\022masterCoprocessors\030\006 \003(\0132\014.Coproce" +
|
||||
"ssor\022\033\n\006master\030\007 \001(\0132\013.ServerName\022\"\n\rbac" +
|
||||
"kupMasters\030\010 \003(\0132\013.ServerName\022\022\n\nbalance",
|
||||
"rOn\030\t \001(\010BF\n*org.apache.hadoop.hbase.pro" +
|
||||
"tobuf.generatedB\023ClusterStatusProtosH\001\240\001" +
|
||||
"\001"
|
||||
"G\020\007\022\t\n\005SPLIT\020\010\022\017\n\013FAILED_OPEN\020\t\022\020\n\014FAILE" +
|
||||
"D_CLOSE\020\n\"W\n\022RegionInTransition\022\036\n\004spec\030" +
|
||||
"\001 \002(\0132\020.RegionSpecifier\022!\n\013regionState\030\002",
|
||||
" \002(\0132\014.RegionState\"N\n\016LiveServerInfo\022\033\n\006" +
|
||||
"server\030\001 \002(\0132\013.ServerName\022\037\n\nserverLoad\030" +
|
||||
"\002 \002(\0132\013.ServerLoad\"\327\002\n\rClusterStatus\022.\n\014" +
|
||||
"hbaseVersion\030\001 \001(\0132\030.HBaseVersionFileCon" +
|
||||
"tent\022$\n\013liveServers\030\002 \003(\0132\017.LiveServerIn" +
|
||||
"fo\022 \n\013deadServers\030\003 \003(\0132\013.ServerName\0220\n\023" +
|
||||
"regionsInTransition\030\004 \003(\0132\023.RegionInTran" +
|
||||
"sition\022\035\n\tclusterId\030\005 \001(\0132\n.ClusterId\022(\n" +
|
||||
"\022masterCoprocessors\030\006 \003(\0132\014.Coprocessor\022" +
|
||||
"\033\n\006master\030\007 \001(\0132\013.ServerName\022\"\n\rbackupMa",
|
||||
"sters\030\010 \003(\0132\013.ServerName\022\022\n\nbalancerOn\030\t" +
|
||||
" \001(\010BF\n*org.apache.hadoop.hbase.protobuf" +
|
||||
".generatedB\023ClusterStatusProtosH\001\240\001\001"
|
||||
};
|
||||
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
|
||||
new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
|
||||
|
|
|
@ -41,6 +41,8 @@ message RegionState {
|
|||
CLOSED = 6; // server closed region and updated meta
|
||||
SPLITTING = 7; // server started split of a region
|
||||
SPLIT = 8; // server completed split of a region
|
||||
FAILED_OPEN = 9; // failed to open, and won't retry any more
|
||||
FAILED_CLOSE = 10; // failed to close, and won't retry any more
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1529,20 +1529,19 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
private void unassign(final HRegionInfo region,
|
||||
final RegionState state, final int versionOfClosingNode,
|
||||
final ServerName dest, final boolean transitionInZK) {
|
||||
// Send CLOSE RPC
|
||||
ServerName server = state.getServerName();
|
||||
// ClosedRegionhandler can remove the server from this.regions
|
||||
if (!serverManager.isServerOnline(server)) {
|
||||
if (transitionInZK) {
|
||||
// delete the node. if no node exists need not bother.
|
||||
deleteClosingOrClosedNode(region);
|
||||
}
|
||||
regionOffline(region);
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = 1; i <= this.maximumAttempts; i++) {
|
||||
// ClosedRegionhandler can remove the server from this.regions
|
||||
if (!serverManager.isServerOnline(server)) {
|
||||
if (transitionInZK) {
|
||||
// delete the node. if no node exists need not bother.
|
||||
deleteClosingOrClosedNode(region);
|
||||
}
|
||||
regionOffline(region);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
// Send CLOSE RPC
|
||||
if (serverManager.sendRegionClose(server, region,
|
||||
versionOfClosingNode, dest, transitionInZK)) {
|
||||
LOG.debug("Sent CLOSE to " + server + " for region " +
|
||||
|
@ -1557,7 +1556,8 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
if (t instanceof RemoteException) {
|
||||
t = ((RemoteException)t).unwrapRemoteException();
|
||||
}
|
||||
if (t instanceof NotServingRegionException) {
|
||||
if (t instanceof NotServingRegionException
|
||||
|| t instanceof RegionServerStoppedException) {
|
||||
if (transitionInZK) {
|
||||
deleteClosingOrClosedNode(region);
|
||||
}
|
||||
|
@ -1574,6 +1574,10 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
// Presume retry or server will expire.
|
||||
}
|
||||
}
|
||||
// Run out of attempts
|
||||
if (!tomActivated) {
|
||||
regionStates.updateRegionState(region, RegionState.State.FAILED_CLOSE);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1597,13 +1601,15 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
}
|
||||
case CLOSING:
|
||||
case PENDING_CLOSE:
|
||||
case FAILED_CLOSE:
|
||||
unassign(region, state, -1, null, false);
|
||||
state = regionStates.getRegionState(region);
|
||||
if (state.isOffline()) break;
|
||||
case FAILED_OPEN:
|
||||
case CLOSED:
|
||||
if (!state.isOffline()) {
|
||||
LOG.debug("Forcing OFFLINE; was=" + state);
|
||||
state = regionStates.updateRegionState(
|
||||
region, RegionState.State.OFFLINE);
|
||||
}
|
||||
LOG.debug("Forcing OFFLINE; was=" + state);
|
||||
state = regionStates.updateRegionState(
|
||||
region, RegionState.State.OFFLINE);
|
||||
case OFFLINE:
|
||||
break;
|
||||
default:
|
||||
|
@ -1637,6 +1643,8 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
LOG.warn("Unable to determine a plan to assign " + region);
|
||||
if (tomActivated){
|
||||
this.timeoutMonitor.setAllRegionServersOffline(true);
|
||||
} else {
|
||||
regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -1662,6 +1670,10 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
}
|
||||
}
|
||||
if (setOfflineInZK && versionOfOfflineNode == -1) {
|
||||
LOG.warn("Unable to set offline in ZooKeeper to assign " + region);
|
||||
if (!tomActivated) {
|
||||
regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (this.server.isStopped()) {
|
||||
|
@ -1740,6 +1752,9 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
LOG.warn("Failed to assign "
|
||||
+ region.getRegionNameAsString() + " since interrupted", ie);
|
||||
Thread.currentThread().interrupt();
|
||||
if (!tomActivated) {
|
||||
regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
|
||||
}
|
||||
return;
|
||||
}
|
||||
} else if (retry) {
|
||||
|
@ -1772,6 +1787,8 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
if (newPlan == null) {
|
||||
if (tomActivated) {
|
||||
this.timeoutMonitor.setAllRegionServersOffline(true);
|
||||
} else {
|
||||
regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
|
||||
}
|
||||
LOG.warn("Unable to find a viable location to assign region " +
|
||||
region.getRegionNameAsString());
|
||||
|
@ -1788,6 +1805,10 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
}
|
||||
}
|
||||
}
|
||||
// Run out of attempts
|
||||
if (!tomActivated) {
|
||||
regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
|
||||
}
|
||||
}
|
||||
|
||||
private void processAlreadyOpenedRegion(HRegionInfo region, ServerName sn) {
|
||||
|
@ -1914,6 +1935,10 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
}
|
||||
|
||||
if (newPlan) {
|
||||
if (randomPlan.getDestination() == null) {
|
||||
LOG.warn("Can't find a destination for region" + encodedName);
|
||||
return null;
|
||||
}
|
||||
LOG.debug("No previous transition plan was found (or we are ignoring " +
|
||||
"an existing plan) for " + region.getRegionNameAsString() +
|
||||
" so generated a random one; " + randomPlan + "; " +
|
||||
|
@ -2052,10 +2077,18 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
return;
|
||||
}
|
||||
state = regionStates.updateRegionState(region, RegionState.State.PENDING_CLOSE);
|
||||
} else if (force && (state.isPendingClose() || state.isClosing())) {
|
||||
} else if (state.isFailedOpen()) {
|
||||
// The region is not open yet
|
||||
regionOffline(region);
|
||||
return;
|
||||
} else if (force && (state.isPendingClose()
|
||||
|| state.isClosing() || state.isFailedClose())) {
|
||||
LOG.debug("Attempting to unassign region " + region.getRegionNameAsString() +
|
||||
" which is already " + state.getState() +
|
||||
" but forcing to send a CLOSE RPC again ");
|
||||
if (state.isFailedClose()) {
|
||||
state = regionStates.updateRegionState(region, RegionState.State.PENDING_CLOSE);
|
||||
}
|
||||
state.updateTimestampToNow();
|
||||
} else {
|
||||
LOG.debug("Attempting to unassign region " +
|
||||
|
@ -2134,15 +2167,20 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
* @param regionInfo region to wait on assignment for
|
||||
* @throws InterruptedException
|
||||
*/
|
||||
public void waitForAssignment(HRegionInfo regionInfo)
|
||||
public boolean waitForAssignment(HRegionInfo regionInfo)
|
||||
throws InterruptedException {
|
||||
while(!this.server.isStopped() &&
|
||||
!regionStates.isRegionAssigned(regionInfo)) {
|
||||
while (!regionStates.isRegionAssigned(regionInfo)) {
|
||||
if (regionStates.isRegionFailedToOpen(regionInfo)
|
||||
|| this.server.isStopped()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// We should receive a notification, but it's
|
||||
// better to have a timeout to recheck the condition here:
|
||||
// it lowers the impact of a race condition if any
|
||||
regionStates.waitForUpdate(100);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2690,6 +2728,8 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
|
||||
case SPLIT:
|
||||
case SPLITTING:
|
||||
case FAILED_OPEN:
|
||||
case FAILED_CLOSE:
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -92,7 +92,6 @@ import org.apache.hadoop.hbase.master.handler.DeleteTableHandler;
|
|||
import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
|
||||
import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
|
||||
import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
|
||||
import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
|
||||
import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
|
||||
import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
|
||||
import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
|
||||
|
|
|
@ -132,6 +132,24 @@ public class RegionStates {
|
|||
return regionAssignments.containsKey(hri);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True if specified region failed to open.
|
||||
*/
|
||||
public synchronized boolean isRegionFailedToOpen(final HRegionInfo hri) {
|
||||
RegionState regionState = getRegionTransitionState(hri);
|
||||
State state = regionState != null ? regionState.getState() : null;
|
||||
return state == State.FAILED_OPEN;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True if specified region failed to close.
|
||||
*/
|
||||
public synchronized boolean isRegionFailedToClose(final HRegionInfo hri) {
|
||||
RegionState regionState = getRegionTransitionState(hri);
|
||||
State state = regionState != null ? regionState.getState() : null;
|
||||
return state == State.FAILED_CLOSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for the state map to be updated by assignment manager.
|
||||
*/
|
||||
|
|
|
@ -36,6 +36,7 @@ import org.apache.hadoop.hbase.master.HMaster;
|
|||
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
|
||||
import org.apache.hadoop.hbase.master.MasterFileSystem;
|
||||
import org.apache.hadoop.hbase.master.MasterServices;
|
||||
import org.apache.hadoop.hbase.master.RegionStates;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.Threads;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
|
@ -66,17 +67,21 @@ public class DeleteTableHandler extends TableEventHandler {
|
|||
|
||||
// 1. Wait because of region in transition
|
||||
AssignmentManager am = this.masterServices.getAssignmentManager();
|
||||
RegionStates states = am.getRegionStates();
|
||||
long waitTime = server.getConfiguration().
|
||||
getLong("hbase.master.wait.on.region", 5 * 60 * 1000);
|
||||
for (HRegionInfo region : regions) {
|
||||
long done = System.currentTimeMillis() + waitTime;
|
||||
while (System.currentTimeMillis() < done) {
|
||||
if (!am.getRegionStates().isRegionInTransition(region)) break;
|
||||
if (states.isRegionFailedToOpen(region)) {
|
||||
am.regionOffline(region);
|
||||
}
|
||||
if (!states.isRegionInTransition(region)) break;
|
||||
Threads.sleep(waitingTimeForEvents);
|
||||
LOG.debug("Waiting on region to clear regions in transition; "
|
||||
+ am.getRegionStates().getRegionTransitionState(region));
|
||||
}
|
||||
if (am.getRegionStates().isRegionInTransition(region)) {
|
||||
if (states.isRegionInTransition(region)) {
|
||||
throw new IOException("Waited hbase.master.wait.on.region (" +
|
||||
waitTime + "ms) for region to leave region " +
|
||||
region.getRegionNameAsString() + " in transitions");
|
||||
|
|
|
@ -202,11 +202,12 @@ public class DisableTableHandler extends EventHandler {
|
|||
protected void populatePool(ExecutorService pool) {
|
||||
RegionStates regionStates = assignmentManager.getRegionStates();
|
||||
for (HRegionInfo region: regions) {
|
||||
if (regionStates.isRegionInTransition(region)) continue;
|
||||
if (regionStates.isRegionInTransition(region)
|
||||
&& !regionStates.isRegionFailedToClose(region)) continue;
|
||||
final HRegionInfo hri = region;
|
||||
pool.execute(Trace.wrap(new Runnable() {
|
||||
public void run() {
|
||||
assignmentManager.unassign(hri);
|
||||
assignmentManager.unassign(hri, true);
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.hadoop.hbase.HRegionInfo;
|
|||
import org.apache.hadoop.hbase.RegionTransition;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.executor.EventType;
|
||||
import org.apache.hadoop.hbase.master.RegionState.State;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
|
||||
|
@ -34,6 +35,22 @@ import org.apache.zookeeper.KeeperException;
|
|||
*/
|
||||
public class Mocking {
|
||||
|
||||
static void waitForRegionFailedToCloseAndSetToPendingClose(
|
||||
AssignmentManager am, HRegionInfo hri) throws InterruptedException {
|
||||
// Since region server is fake, sendRegionClose will fail, and closing
|
||||
// region will fail. For testing purpose, moving it back to pending close
|
||||
boolean wait = true;
|
||||
while (wait) {
|
||||
RegionState state = am.getRegionStates().getRegionState(hri);
|
||||
if (state != null && state.isFailedClose()){
|
||||
am.getRegionStates().updateRegionState(hri, State.PENDING_CLOSE);
|
||||
wait = false;
|
||||
} else {
|
||||
Thread.sleep(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void waitForRegionPendingOpenInRIT(AssignmentManager am, String encodedName)
|
||||
throws InterruptedException {
|
||||
// We used to do a check like this:
|
||||
|
@ -53,7 +70,6 @@ public class Mocking {
|
|||
Thread.sleep(1);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -201,6 +201,8 @@ public class TestAssignmentManager {
|
|||
// let's assume it is going to open on server b:
|
||||
am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
|
||||
|
||||
Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
|
||||
|
||||
// Now fake the region closing successfully over on the regionserver; the
|
||||
// regionserver will have set the region in CLOSED state. This will
|
||||
// trigger callback into AM. The below zk close call is from the RS close
|
||||
|
@ -249,6 +251,8 @@ public class TestAssignmentManager {
|
|||
// let's assume it is going to open on server b:
|
||||
am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
|
||||
|
||||
Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
|
||||
|
||||
// Now fake the region closing successfully over on the regionserver; the
|
||||
// regionserver will have set the region in CLOSED state. This will
|
||||
// trigger callback into AM. The below zk close call is from the RS close
|
||||
|
@ -298,6 +302,8 @@ public class TestAssignmentManager {
|
|||
// let's assume it is going to open on server b:
|
||||
am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
|
||||
|
||||
Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
|
||||
|
||||
// Now fake the region closing successfully over on the regionserver; the
|
||||
// regionserver will have set the region in CLOSED state. This will
|
||||
// trigger callback into AM. The below zk close call is from the RS close
|
||||
|
@ -341,7 +347,6 @@ public class TestAssignmentManager {
|
|||
am.balance(new RegionPlan(hri, from, to));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Tests AssignmentManager balance function. Runs a balance moving a region
|
||||
* from one server to another mocking regionserver responding over zk.
|
||||
|
@ -375,6 +380,11 @@ public class TestAssignmentManager {
|
|||
RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B);
|
||||
am.balance(plan);
|
||||
|
||||
// Must be failed to close since the server is fake
|
||||
assertTrue(am.getRegionStates().isRegionFailedToClose(REGIONINFO));
|
||||
// Move it back to pending_close
|
||||
am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE);
|
||||
|
||||
// Now fake the region closing successfully over on the regionserver; the
|
||||
// regionserver will have set the region in CLOSED state. This will
|
||||
// trigger callback into AM. The below zk close call is from the RS close
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
*/
|
||||
package org.apache.hadoop.hbase.master;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
|
@ -34,6 +36,12 @@ import org.apache.hadoop.hbase.ServerName;
|
|||
import org.apache.hadoop.hbase.catalog.MetaEditor;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
|
||||
import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
|
||||
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
|
||||
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
|
||||
import org.apache.hadoop.hbase.coprocessor.RegionObserver;
|
||||
import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.junit.AfterClass;
|
||||
|
@ -53,6 +61,12 @@ public class TestAssignmentManagerOnCluster {
|
|||
|
||||
@BeforeClass
|
||||
public static void setUpBeforeClass() throws Exception {
|
||||
// Using the test load balancer to control region plans
|
||||
conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
|
||||
TestLoadBalancer.class, LoadBalancer.class);
|
||||
conf.setClass(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
|
||||
TestRegionObserver.class, RegionObserver.class);
|
||||
|
||||
TEST_UTIL.startMiniCluster(3);
|
||||
admin = TEST_UTIL.getHBaseAdmin();
|
||||
}
|
||||
|
@ -191,4 +205,110 @@ public class TestAssignmentManagerOnCluster {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This tests region close failed
|
||||
*/
|
||||
@Test
|
||||
public void testCloseFailed() throws Exception {
|
||||
String table = "testCloseFailed";
|
||||
try {
|
||||
HTableDescriptor desc = new HTableDescriptor(table);
|
||||
desc.addFamily(new HColumnDescriptor(FAMILY));
|
||||
admin.createTable(desc);
|
||||
|
||||
HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
|
||||
HRegionInfo hri = new HRegionInfo(
|
||||
desc.getName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
|
||||
MetaEditor.addRegionToMeta(meta, hri);
|
||||
|
||||
HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
|
||||
master.assignRegion(hri);
|
||||
AssignmentManager am = master.getAssignmentManager();
|
||||
assertTrue(am.waitForAssignment(hri));
|
||||
|
||||
TestRegionObserver.enabled = true;
|
||||
am.unassign(hri);
|
||||
RegionState state = am.getRegionStates().getRegionState(hri);
|
||||
assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
|
||||
|
||||
TestRegionObserver.enabled = false;
|
||||
am.unassign(hri, true);
|
||||
state = am.getRegionStates().getRegionState(hri);
|
||||
assertTrue(RegionState.State.FAILED_CLOSE != state.getState());
|
||||
|
||||
am.assign(hri, true, true);
|
||||
assertTrue(am.waitForAssignment(hri));
|
||||
|
||||
ServerName serverName = master.getAssignmentManager().
|
||||
getRegionStates().getRegionServerOfRegion(hri);
|
||||
TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
|
||||
} finally {
|
||||
TestRegionObserver.enabled = false;
|
||||
TEST_UTIL.deleteTable(Bytes.toBytes(table));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This tests region open failed
|
||||
*/
|
||||
@Test
|
||||
public void testOpenFailed() throws Exception {
|
||||
String table = "testOpenFailed";
|
||||
try {
|
||||
HTableDescriptor desc = new HTableDescriptor(table);
|
||||
desc.addFamily(new HColumnDescriptor(FAMILY));
|
||||
admin.createTable(desc);
|
||||
|
||||
HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
|
||||
HRegionInfo hri = new HRegionInfo(
|
||||
desc.getName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
|
||||
MetaEditor.addRegionToMeta(meta, hri);
|
||||
|
||||
TestLoadBalancer.controledRegion = hri.getEncodedName();
|
||||
|
||||
HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
|
||||
master.assignRegion(hri);
|
||||
AssignmentManager am = master.getAssignmentManager();
|
||||
assertFalse(am.waitForAssignment(hri));
|
||||
|
||||
RegionState state = am.getRegionStates().getRegionState(hri);
|
||||
assertEquals(RegionState.State.FAILED_OPEN, state.getState());
|
||||
|
||||
TestLoadBalancer.controledRegion = null;
|
||||
master.assignRegion(hri);
|
||||
assertTrue(am.waitForAssignment(hri));
|
||||
|
||||
ServerName serverName = master.getAssignmentManager().
|
||||
getRegionStates().getRegionServerOfRegion(hri);
|
||||
TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
|
||||
} finally {
|
||||
TestLoadBalancer.controledRegion = null;
|
||||
TEST_UTIL.deleteTable(Bytes.toBytes(table));
|
||||
}
|
||||
}
|
||||
|
||||
static class TestLoadBalancer extends StochasticLoadBalancer {
|
||||
// For this region, if specified, always assign to nowhere
|
||||
static volatile String controledRegion = null;
|
||||
|
||||
@Override
|
||||
public ServerName randomAssignment(HRegionInfo regionInfo,
|
||||
List<ServerName> servers) {
|
||||
if (regionInfo.getEncodedName().equals(controledRegion)) {
|
||||
return null;
|
||||
}
|
||||
return super.randomAssignment(regionInfo, servers);
|
||||
}
|
||||
}
|
||||
|
||||
public static class TestRegionObserver extends BaseRegionObserver {
|
||||
// If enabled, fail all preClose calls
|
||||
static volatile boolean enabled = false;
|
||||
|
||||
@Override
|
||||
public void preClose(ObserverContext<RegionCoprocessorEnvironment> c,
|
||||
boolean abortRequested) throws IOException {
|
||||
if (enabled) throw new IOException("fail preClose from coprocessor");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue