HBASE-8137 Add failed to open/close region state
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1459384 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
66d90f865e
commit
e38dcba6c3
|
@ -44,7 +44,9 @@ public class RegionState implements org.apache.hadoop.io.Writable {
|
||||||
CLOSING, // server has begun to close but not yet done
|
CLOSING, // server has begun to close but not yet done
|
||||||
CLOSED, // server closed region and updated meta
|
CLOSED, // server closed region and updated meta
|
||||||
SPLITTING, // server started split of a region
|
SPLITTING, // server started split of a region
|
||||||
SPLIT // server completed split of a region
|
SPLIT, // server completed split of a region
|
||||||
|
FAILED_OPEN, // failed to open, and won't retry any more
|
||||||
|
FAILED_CLOSE // failed to close, and won't retry any more
|
||||||
}
|
}
|
||||||
|
|
||||||
// Many threads can update the state at the stamp at the same time
|
// Many threads can update the state at the stamp at the same time
|
||||||
|
@ -126,6 +128,14 @@ public class RegionState implements org.apache.hadoop.io.Writable {
|
||||||
return state == State.SPLIT;
|
return state == State.SPLIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isFailedOpen() {
|
||||||
|
return state == State.FAILED_OPEN;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isFailedClose() {
|
||||||
|
return state == State.FAILED_CLOSE;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isPendingOpenOrOpeningOnServer(final ServerName sn) {
|
public boolean isPendingOpenOrOpeningOnServer(final ServerName sn) {
|
||||||
return isOnServer(sn) && (isPendingOpen() || isOpening());
|
return isOnServer(sn) && (isPendingOpen() || isOpening());
|
||||||
}
|
}
|
||||||
|
@ -195,6 +205,12 @@ public class RegionState implements org.apache.hadoop.io.Writable {
|
||||||
case SPLIT:
|
case SPLIT:
|
||||||
rs = ClusterStatusProtos.RegionState.State.SPLIT;
|
rs = ClusterStatusProtos.RegionState.State.SPLIT;
|
||||||
break;
|
break;
|
||||||
|
case FAILED_OPEN:
|
||||||
|
rs = ClusterStatusProtos.RegionState.State.FAILED_OPEN;
|
||||||
|
break;
|
||||||
|
case FAILED_CLOSE:
|
||||||
|
rs = ClusterStatusProtos.RegionState.State.FAILED_CLOSE;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
throw new IllegalStateException("");
|
throw new IllegalStateException("");
|
||||||
}
|
}
|
||||||
|
@ -239,6 +255,12 @@ public class RegionState implements org.apache.hadoop.io.Writable {
|
||||||
case SPLIT:
|
case SPLIT:
|
||||||
state = State.SPLIT;
|
state = State.SPLIT;
|
||||||
break;
|
break;
|
||||||
|
case FAILED_OPEN:
|
||||||
|
state = State.FAILED_OPEN;
|
||||||
|
break;
|
||||||
|
case FAILED_CLOSE:
|
||||||
|
state = State.FAILED_CLOSE;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
throw new IllegalStateException("");
|
throw new IllegalStateException("");
|
||||||
}
|
}
|
||||||
|
|
|
@ -63,6 +63,8 @@ public final class ClusterStatusProtos {
|
||||||
CLOSED(6, 6),
|
CLOSED(6, 6),
|
||||||
SPLITTING(7, 7),
|
SPLITTING(7, 7),
|
||||||
SPLIT(8, 8),
|
SPLIT(8, 8),
|
||||||
|
FAILED_OPEN(9, 9),
|
||||||
|
FAILED_CLOSE(10, 10),
|
||||||
;
|
;
|
||||||
|
|
||||||
public static final int OFFLINE_VALUE = 0;
|
public static final int OFFLINE_VALUE = 0;
|
||||||
|
@ -74,6 +76,8 @@ public final class ClusterStatusProtos {
|
||||||
public static final int CLOSED_VALUE = 6;
|
public static final int CLOSED_VALUE = 6;
|
||||||
public static final int SPLITTING_VALUE = 7;
|
public static final int SPLITTING_VALUE = 7;
|
||||||
public static final int SPLIT_VALUE = 8;
|
public static final int SPLIT_VALUE = 8;
|
||||||
|
public static final int FAILED_OPEN_VALUE = 9;
|
||||||
|
public static final int FAILED_CLOSE_VALUE = 10;
|
||||||
|
|
||||||
|
|
||||||
public final int getNumber() { return value; }
|
public final int getNumber() { return value; }
|
||||||
|
@ -89,6 +93,8 @@ public final class ClusterStatusProtos {
|
||||||
case 6: return CLOSED;
|
case 6: return CLOSED;
|
||||||
case 7: return SPLITTING;
|
case 7: return SPLITTING;
|
||||||
case 8: return SPLIT;
|
case 8: return SPLIT;
|
||||||
|
case 9: return FAILED_OPEN;
|
||||||
|
case 10: return FAILED_CLOSE;
|
||||||
default: return null;
|
default: return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -119,7 +125,7 @@ public final class ClusterStatusProtos {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final State[] VALUES = {
|
private static final State[] VALUES = {
|
||||||
OFFLINE, PENDING_OPEN, OPENING, OPEN, PENDING_CLOSE, CLOSING, CLOSED, SPLITTING, SPLIT,
|
OFFLINE, PENDING_OPEN, OPENING, OPEN, PENDING_CLOSE, CLOSING, CLOSED, SPLITTING, SPLIT, FAILED_OPEN, FAILED_CLOSE,
|
||||||
};
|
};
|
||||||
|
|
||||||
public static State valueOf(
|
public static State valueOf(
|
||||||
|
@ -4350,28 +4356,28 @@ public final class ClusterStatusProtos {
|
||||||
static {
|
static {
|
||||||
java.lang.String[] descriptorData = {
|
java.lang.String[] descriptorData = {
|
||||||
"\n\023ClusterStatus.proto\032\013hbase.proto\032\017Clus" +
|
"\n\023ClusterStatus.proto\032\013hbase.proto\032\017Clus" +
|
||||||
"terId.proto\032\010FS.proto\"\346\001\n\013RegionState\022\037\n" +
|
"terId.proto\032\010FS.proto\"\211\002\n\013RegionState\022\037\n" +
|
||||||
"\nregionInfo\030\001 \002(\0132\013.RegionInfo\022!\n\005state\030" +
|
"\nregionInfo\030\001 \002(\0132\013.RegionInfo\022!\n\005state\030" +
|
||||||
"\002 \002(\0162\022.RegionState.State\022\r\n\005stamp\030\003 \001(\004" +
|
"\002 \002(\0162\022.RegionState.State\022\r\n\005stamp\030\003 \001(\004" +
|
||||||
"\"\203\001\n\005State\022\013\n\007OFFLINE\020\000\022\020\n\014PENDING_OPEN\020" +
|
"\"\246\001\n\005State\022\013\n\007OFFLINE\020\000\022\020\n\014PENDING_OPEN\020" +
|
||||||
"\001\022\013\n\007OPENING\020\002\022\010\n\004OPEN\020\003\022\021\n\rPENDING_CLOS" +
|
"\001\022\013\n\007OPENING\020\002\022\010\n\004OPEN\020\003\022\021\n\rPENDING_CLOS" +
|
||||||
"E\020\004\022\013\n\007CLOSING\020\005\022\n\n\006CLOSED\020\006\022\r\n\tSPLITTIN" +
|
"E\020\004\022\013\n\007CLOSING\020\005\022\n\n\006CLOSED\020\006\022\r\n\tSPLITTIN" +
|
||||||
"G\020\007\022\t\n\005SPLIT\020\010\"W\n\022RegionInTransition\022\036\n\004" +
|
"G\020\007\022\t\n\005SPLIT\020\010\022\017\n\013FAILED_OPEN\020\t\022\020\n\014FAILE" +
|
||||||
"spec\030\001 \002(\0132\020.RegionSpecifier\022!\n\013regionSt" +
|
"D_CLOSE\020\n\"W\n\022RegionInTransition\022\036\n\004spec\030" +
|
||||||
"ate\030\002 \002(\0132\014.RegionState\"N\n\016LiveServerInf",
|
"\001 \002(\0132\020.RegionSpecifier\022!\n\013regionState\030\002",
|
||||||
"o\022\033\n\006server\030\001 \002(\0132\013.ServerName\022\037\n\nserver" +
|
" \002(\0132\014.RegionState\"N\n\016LiveServerInfo\022\033\n\006" +
|
||||||
"Load\030\002 \002(\0132\013.ServerLoad\"\327\002\n\rClusterStatu" +
|
"server\030\001 \002(\0132\013.ServerName\022\037\n\nserverLoad\030" +
|
||||||
"s\022.\n\014hbaseVersion\030\001 \001(\0132\030.HBaseVersionFi" +
|
"\002 \002(\0132\013.ServerLoad\"\327\002\n\rClusterStatus\022.\n\014" +
|
||||||
"leContent\022$\n\013liveServers\030\002 \003(\0132\017.LiveSer" +
|
"hbaseVersion\030\001 \001(\0132\030.HBaseVersionFileCon" +
|
||||||
"verInfo\022 \n\013deadServers\030\003 \003(\0132\013.ServerNam" +
|
"tent\022$\n\013liveServers\030\002 \003(\0132\017.LiveServerIn" +
|
||||||
"e\0220\n\023regionsInTransition\030\004 \003(\0132\023.RegionI" +
|
"fo\022 \n\013deadServers\030\003 \003(\0132\013.ServerName\0220\n\023" +
|
||||||
"nTransition\022\035\n\tclusterId\030\005 \001(\0132\n.Cluster" +
|
"regionsInTransition\030\004 \003(\0132\023.RegionInTran" +
|
||||||
"Id\022(\n\022masterCoprocessors\030\006 \003(\0132\014.Coproce" +
|
"sition\022\035\n\tclusterId\030\005 \001(\0132\n.ClusterId\022(\n" +
|
||||||
"ssor\022\033\n\006master\030\007 \001(\0132\013.ServerName\022\"\n\rbac" +
|
"\022masterCoprocessors\030\006 \003(\0132\014.Coprocessor\022" +
|
||||||
"kupMasters\030\010 \003(\0132\013.ServerName\022\022\n\nbalance",
|
"\033\n\006master\030\007 \001(\0132\013.ServerName\022\"\n\rbackupMa",
|
||||||
"rOn\030\t \001(\010BF\n*org.apache.hadoop.hbase.pro" +
|
"sters\030\010 \003(\0132\013.ServerName\022\022\n\nbalancerOn\030\t" +
|
||||||
"tobuf.generatedB\023ClusterStatusProtosH\001\240\001" +
|
" \001(\010BF\n*org.apache.hadoop.hbase.protobuf" +
|
||||||
"\001"
|
".generatedB\023ClusterStatusProtosH\001\240\001\001"
|
||||||
};
|
};
|
||||||
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
|
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
|
||||||
new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
|
new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
|
||||||
|
|
|
@ -41,6 +41,8 @@ message RegionState {
|
||||||
CLOSED = 6; // server closed region and updated meta
|
CLOSED = 6; // server closed region and updated meta
|
||||||
SPLITTING = 7; // server started split of a region
|
SPLITTING = 7; // server started split of a region
|
||||||
SPLIT = 8; // server completed split of a region
|
SPLIT = 8; // server completed split of a region
|
||||||
|
FAILED_OPEN = 9; // failed to open, and won't retry any more
|
||||||
|
FAILED_CLOSE = 10; // failed to close, and won't retry any more
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1529,20 +1529,19 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
private void unassign(final HRegionInfo region,
|
private void unassign(final HRegionInfo region,
|
||||||
final RegionState state, final int versionOfClosingNode,
|
final RegionState state, final int versionOfClosingNode,
|
||||||
final ServerName dest, final boolean transitionInZK) {
|
final ServerName dest, final boolean transitionInZK) {
|
||||||
// Send CLOSE RPC
|
|
||||||
ServerName server = state.getServerName();
|
ServerName server = state.getServerName();
|
||||||
// ClosedRegionhandler can remove the server from this.regions
|
|
||||||
if (!serverManager.isServerOnline(server)) {
|
|
||||||
if (transitionInZK) {
|
|
||||||
// delete the node. if no node exists need not bother.
|
|
||||||
deleteClosingOrClosedNode(region);
|
|
||||||
}
|
|
||||||
regionOffline(region);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 1; i <= this.maximumAttempts; i++) {
|
for (int i = 1; i <= this.maximumAttempts; i++) {
|
||||||
|
// ClosedRegionhandler can remove the server from this.regions
|
||||||
|
if (!serverManager.isServerOnline(server)) {
|
||||||
|
if (transitionInZK) {
|
||||||
|
// delete the node. if no node exists need not bother.
|
||||||
|
deleteClosingOrClosedNode(region);
|
||||||
|
}
|
||||||
|
regionOffline(region);
|
||||||
|
return;
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
|
// Send CLOSE RPC
|
||||||
if (serverManager.sendRegionClose(server, region,
|
if (serverManager.sendRegionClose(server, region,
|
||||||
versionOfClosingNode, dest, transitionInZK)) {
|
versionOfClosingNode, dest, transitionInZK)) {
|
||||||
LOG.debug("Sent CLOSE to " + server + " for region " +
|
LOG.debug("Sent CLOSE to " + server + " for region " +
|
||||||
|
@ -1557,7 +1556,8 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
if (t instanceof RemoteException) {
|
if (t instanceof RemoteException) {
|
||||||
t = ((RemoteException)t).unwrapRemoteException();
|
t = ((RemoteException)t).unwrapRemoteException();
|
||||||
}
|
}
|
||||||
if (t instanceof NotServingRegionException) {
|
if (t instanceof NotServingRegionException
|
||||||
|
|| t instanceof RegionServerStoppedException) {
|
||||||
if (transitionInZK) {
|
if (transitionInZK) {
|
||||||
deleteClosingOrClosedNode(region);
|
deleteClosingOrClosedNode(region);
|
||||||
}
|
}
|
||||||
|
@ -1574,6 +1574,10 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
// Presume retry or server will expire.
|
// Presume retry or server will expire.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Run out of attempts
|
||||||
|
if (!tomActivated) {
|
||||||
|
regionStates.updateRegionState(region, RegionState.State.FAILED_CLOSE);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1597,13 +1601,15 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
}
|
}
|
||||||
case CLOSING:
|
case CLOSING:
|
||||||
case PENDING_CLOSE:
|
case PENDING_CLOSE:
|
||||||
|
case FAILED_CLOSE:
|
||||||
unassign(region, state, -1, null, false);
|
unassign(region, state, -1, null, false);
|
||||||
|
state = regionStates.getRegionState(region);
|
||||||
|
if (state.isOffline()) break;
|
||||||
|
case FAILED_OPEN:
|
||||||
case CLOSED:
|
case CLOSED:
|
||||||
if (!state.isOffline()) {
|
LOG.debug("Forcing OFFLINE; was=" + state);
|
||||||
LOG.debug("Forcing OFFLINE; was=" + state);
|
state = regionStates.updateRegionState(
|
||||||
state = regionStates.updateRegionState(
|
region, RegionState.State.OFFLINE);
|
||||||
region, RegionState.State.OFFLINE);
|
|
||||||
}
|
|
||||||
case OFFLINE:
|
case OFFLINE:
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -1637,6 +1643,8 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
LOG.warn("Unable to determine a plan to assign " + region);
|
LOG.warn("Unable to determine a plan to assign " + region);
|
||||||
if (tomActivated){
|
if (tomActivated){
|
||||||
this.timeoutMonitor.setAllRegionServersOffline(true);
|
this.timeoutMonitor.setAllRegionServersOffline(true);
|
||||||
|
} else {
|
||||||
|
regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1662,6 +1670,10 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (setOfflineInZK && versionOfOfflineNode == -1) {
|
if (setOfflineInZK && versionOfOfflineNode == -1) {
|
||||||
|
LOG.warn("Unable to set offline in ZooKeeper to assign " + region);
|
||||||
|
if (!tomActivated) {
|
||||||
|
regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (this.server.isStopped()) {
|
if (this.server.isStopped()) {
|
||||||
|
@ -1740,6 +1752,9 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
LOG.warn("Failed to assign "
|
LOG.warn("Failed to assign "
|
||||||
+ region.getRegionNameAsString() + " since interrupted", ie);
|
+ region.getRegionNameAsString() + " since interrupted", ie);
|
||||||
Thread.currentThread().interrupt();
|
Thread.currentThread().interrupt();
|
||||||
|
if (!tomActivated) {
|
||||||
|
regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} else if (retry) {
|
} else if (retry) {
|
||||||
|
@ -1772,6 +1787,8 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
if (newPlan == null) {
|
if (newPlan == null) {
|
||||||
if (tomActivated) {
|
if (tomActivated) {
|
||||||
this.timeoutMonitor.setAllRegionServersOffline(true);
|
this.timeoutMonitor.setAllRegionServersOffline(true);
|
||||||
|
} else {
|
||||||
|
regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
|
||||||
}
|
}
|
||||||
LOG.warn("Unable to find a viable location to assign region " +
|
LOG.warn("Unable to find a viable location to assign region " +
|
||||||
region.getRegionNameAsString());
|
region.getRegionNameAsString());
|
||||||
|
@ -1788,6 +1805,10 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Run out of attempts
|
||||||
|
if (!tomActivated) {
|
||||||
|
regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void processAlreadyOpenedRegion(HRegionInfo region, ServerName sn) {
|
private void processAlreadyOpenedRegion(HRegionInfo region, ServerName sn) {
|
||||||
|
@ -1914,6 +1935,10 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (newPlan) {
|
if (newPlan) {
|
||||||
|
if (randomPlan.getDestination() == null) {
|
||||||
|
LOG.warn("Can't find a destination for region" + encodedName);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
LOG.debug("No previous transition plan was found (or we are ignoring " +
|
LOG.debug("No previous transition plan was found (or we are ignoring " +
|
||||||
"an existing plan) for " + region.getRegionNameAsString() +
|
"an existing plan) for " + region.getRegionNameAsString() +
|
||||||
" so generated a random one; " + randomPlan + "; " +
|
" so generated a random one; " + randomPlan + "; " +
|
||||||
|
@ -2052,10 +2077,18 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
state = regionStates.updateRegionState(region, RegionState.State.PENDING_CLOSE);
|
state = regionStates.updateRegionState(region, RegionState.State.PENDING_CLOSE);
|
||||||
} else if (force && (state.isPendingClose() || state.isClosing())) {
|
} else if (state.isFailedOpen()) {
|
||||||
|
// The region is not open yet
|
||||||
|
regionOffline(region);
|
||||||
|
return;
|
||||||
|
} else if (force && (state.isPendingClose()
|
||||||
|
|| state.isClosing() || state.isFailedClose())) {
|
||||||
LOG.debug("Attempting to unassign region " + region.getRegionNameAsString() +
|
LOG.debug("Attempting to unassign region " + region.getRegionNameAsString() +
|
||||||
" which is already " + state.getState() +
|
" which is already " + state.getState() +
|
||||||
" but forcing to send a CLOSE RPC again ");
|
" but forcing to send a CLOSE RPC again ");
|
||||||
|
if (state.isFailedClose()) {
|
||||||
|
state = regionStates.updateRegionState(region, RegionState.State.PENDING_CLOSE);
|
||||||
|
}
|
||||||
state.updateTimestampToNow();
|
state.updateTimestampToNow();
|
||||||
} else {
|
} else {
|
||||||
LOG.debug("Attempting to unassign region " +
|
LOG.debug("Attempting to unassign region " +
|
||||||
|
@ -2134,15 +2167,20 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
* @param regionInfo region to wait on assignment for
|
* @param regionInfo region to wait on assignment for
|
||||||
* @throws InterruptedException
|
* @throws InterruptedException
|
||||||
*/
|
*/
|
||||||
public void waitForAssignment(HRegionInfo regionInfo)
|
public boolean waitForAssignment(HRegionInfo regionInfo)
|
||||||
throws InterruptedException {
|
throws InterruptedException {
|
||||||
while(!this.server.isStopped() &&
|
while (!regionStates.isRegionAssigned(regionInfo)) {
|
||||||
!regionStates.isRegionAssigned(regionInfo)) {
|
if (regionStates.isRegionFailedToOpen(regionInfo)
|
||||||
|
|| this.server.isStopped()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// We should receive a notification, but it's
|
// We should receive a notification, but it's
|
||||||
// better to have a timeout to recheck the condition here:
|
// better to have a timeout to recheck the condition here:
|
||||||
// it lowers the impact of a race condition if any
|
// it lowers the impact of a race condition if any
|
||||||
regionStates.waitForUpdate(100);
|
regionStates.waitForUpdate(100);
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -2690,6 +2728,8 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
|
|
||||||
case SPLIT:
|
case SPLIT:
|
||||||
case SPLITTING:
|
case SPLITTING:
|
||||||
|
case FAILED_OPEN:
|
||||||
|
case FAILED_CLOSE:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -92,7 +92,6 @@ import org.apache.hadoop.hbase.master.handler.DeleteTableHandler;
|
||||||
import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
|
import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
|
||||||
import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
|
import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
|
||||||
import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
|
import org.apache.hadoop.hbase.master.handler.ModifyTableHandler;
|
||||||
import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
|
|
||||||
import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
|
import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler;
|
||||||
import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
|
import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
|
||||||
import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
|
import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
|
||||||
|
|
|
@ -132,6 +132,24 @@ public class RegionStates {
|
||||||
return regionAssignments.containsKey(hri);
|
return regionAssignments.containsKey(hri);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return True if specified region failed to open.
|
||||||
|
*/
|
||||||
|
public synchronized boolean isRegionFailedToOpen(final HRegionInfo hri) {
|
||||||
|
RegionState regionState = getRegionTransitionState(hri);
|
||||||
|
State state = regionState != null ? regionState.getState() : null;
|
||||||
|
return state == State.FAILED_OPEN;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return True if specified region failed to close.
|
||||||
|
*/
|
||||||
|
public synchronized boolean isRegionFailedToClose(final HRegionInfo hri) {
|
||||||
|
RegionState regionState = getRegionTransitionState(hri);
|
||||||
|
State state = regionState != null ? regionState.getState() : null;
|
||||||
|
return state == State.FAILED_CLOSE;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wait for the state map to be updated by assignment manager.
|
* Wait for the state map to be updated by assignment manager.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -36,6 +36,7 @@ import org.apache.hadoop.hbase.master.HMaster;
|
||||||
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
|
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
|
||||||
import org.apache.hadoop.hbase.master.MasterFileSystem;
|
import org.apache.hadoop.hbase.master.MasterFileSystem;
|
||||||
import org.apache.hadoop.hbase.master.MasterServices;
|
import org.apache.hadoop.hbase.master.MasterServices;
|
||||||
|
import org.apache.hadoop.hbase.master.RegionStates;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.hbase.util.Threads;
|
import org.apache.hadoop.hbase.util.Threads;
|
||||||
import org.apache.zookeeper.KeeperException;
|
import org.apache.zookeeper.KeeperException;
|
||||||
|
@ -66,17 +67,21 @@ public class DeleteTableHandler extends TableEventHandler {
|
||||||
|
|
||||||
// 1. Wait because of region in transition
|
// 1. Wait because of region in transition
|
||||||
AssignmentManager am = this.masterServices.getAssignmentManager();
|
AssignmentManager am = this.masterServices.getAssignmentManager();
|
||||||
|
RegionStates states = am.getRegionStates();
|
||||||
long waitTime = server.getConfiguration().
|
long waitTime = server.getConfiguration().
|
||||||
getLong("hbase.master.wait.on.region", 5 * 60 * 1000);
|
getLong("hbase.master.wait.on.region", 5 * 60 * 1000);
|
||||||
for (HRegionInfo region : regions) {
|
for (HRegionInfo region : regions) {
|
||||||
long done = System.currentTimeMillis() + waitTime;
|
long done = System.currentTimeMillis() + waitTime;
|
||||||
while (System.currentTimeMillis() < done) {
|
while (System.currentTimeMillis() < done) {
|
||||||
if (!am.getRegionStates().isRegionInTransition(region)) break;
|
if (states.isRegionFailedToOpen(region)) {
|
||||||
|
am.regionOffline(region);
|
||||||
|
}
|
||||||
|
if (!states.isRegionInTransition(region)) break;
|
||||||
Threads.sleep(waitingTimeForEvents);
|
Threads.sleep(waitingTimeForEvents);
|
||||||
LOG.debug("Waiting on region to clear regions in transition; "
|
LOG.debug("Waiting on region to clear regions in transition; "
|
||||||
+ am.getRegionStates().getRegionTransitionState(region));
|
+ am.getRegionStates().getRegionTransitionState(region));
|
||||||
}
|
}
|
||||||
if (am.getRegionStates().isRegionInTransition(region)) {
|
if (states.isRegionInTransition(region)) {
|
||||||
throw new IOException("Waited hbase.master.wait.on.region (" +
|
throw new IOException("Waited hbase.master.wait.on.region (" +
|
||||||
waitTime + "ms) for region to leave region " +
|
waitTime + "ms) for region to leave region " +
|
||||||
region.getRegionNameAsString() + " in transitions");
|
region.getRegionNameAsString() + " in transitions");
|
||||||
|
|
|
@ -202,11 +202,12 @@ public class DisableTableHandler extends EventHandler {
|
||||||
protected void populatePool(ExecutorService pool) {
|
protected void populatePool(ExecutorService pool) {
|
||||||
RegionStates regionStates = assignmentManager.getRegionStates();
|
RegionStates regionStates = assignmentManager.getRegionStates();
|
||||||
for (HRegionInfo region: regions) {
|
for (HRegionInfo region: regions) {
|
||||||
if (regionStates.isRegionInTransition(region)) continue;
|
if (regionStates.isRegionInTransition(region)
|
||||||
|
&& !regionStates.isRegionFailedToClose(region)) continue;
|
||||||
final HRegionInfo hri = region;
|
final HRegionInfo hri = region;
|
||||||
pool.execute(Trace.wrap(new Runnable() {
|
pool.execute(Trace.wrap(new Runnable() {
|
||||||
public void run() {
|
public void run() {
|
||||||
assignmentManager.unassign(hri);
|
assignmentManager.unassign(hri, true);
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.hadoop.hbase.HRegionInfo;
|
||||||
import org.apache.hadoop.hbase.RegionTransition;
|
import org.apache.hadoop.hbase.RegionTransition;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.executor.EventType;
|
import org.apache.hadoop.hbase.executor.EventType;
|
||||||
|
import org.apache.hadoop.hbase.master.RegionState.State;
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
|
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
|
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
|
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
|
||||||
|
@ -34,6 +35,22 @@ import org.apache.zookeeper.KeeperException;
|
||||||
*/
|
*/
|
||||||
public class Mocking {
|
public class Mocking {
|
||||||
|
|
||||||
|
static void waitForRegionFailedToCloseAndSetToPendingClose(
|
||||||
|
AssignmentManager am, HRegionInfo hri) throws InterruptedException {
|
||||||
|
// Since region server is fake, sendRegionClose will fail, and closing
|
||||||
|
// region will fail. For testing purpose, moving it back to pending close
|
||||||
|
boolean wait = true;
|
||||||
|
while (wait) {
|
||||||
|
RegionState state = am.getRegionStates().getRegionState(hri);
|
||||||
|
if (state != null && state.isFailedClose()){
|
||||||
|
am.getRegionStates().updateRegionState(hri, State.PENDING_CLOSE);
|
||||||
|
wait = false;
|
||||||
|
} else {
|
||||||
|
Thread.sleep(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void waitForRegionPendingOpenInRIT(AssignmentManager am, String encodedName)
|
static void waitForRegionPendingOpenInRIT(AssignmentManager am, String encodedName)
|
||||||
throws InterruptedException {
|
throws InterruptedException {
|
||||||
// We used to do a check like this:
|
// We used to do a check like this:
|
||||||
|
@ -53,7 +70,6 @@ public class Mocking {
|
||||||
Thread.sleep(1);
|
Thread.sleep(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -201,6 +201,8 @@ public class TestAssignmentManager {
|
||||||
// let's assume it is going to open on server b:
|
// let's assume it is going to open on server b:
|
||||||
am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
|
am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
|
||||||
|
|
||||||
|
Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
|
||||||
|
|
||||||
// Now fake the region closing successfully over on the regionserver; the
|
// Now fake the region closing successfully over on the regionserver; the
|
||||||
// regionserver will have set the region in CLOSED state. This will
|
// regionserver will have set the region in CLOSED state. This will
|
||||||
// trigger callback into AM. The below zk close call is from the RS close
|
// trigger callback into AM. The below zk close call is from the RS close
|
||||||
|
@ -249,6 +251,8 @@ public class TestAssignmentManager {
|
||||||
// let's assume it is going to open on server b:
|
// let's assume it is going to open on server b:
|
||||||
am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
|
am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
|
||||||
|
|
||||||
|
Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
|
||||||
|
|
||||||
// Now fake the region closing successfully over on the regionserver; the
|
// Now fake the region closing successfully over on the regionserver; the
|
||||||
// regionserver will have set the region in CLOSED state. This will
|
// regionserver will have set the region in CLOSED state. This will
|
||||||
// trigger callback into AM. The below zk close call is from the RS close
|
// trigger callback into AM. The below zk close call is from the RS close
|
||||||
|
@ -298,6 +302,8 @@ public class TestAssignmentManager {
|
||||||
// let's assume it is going to open on server b:
|
// let's assume it is going to open on server b:
|
||||||
am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
|
am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B));
|
||||||
|
|
||||||
|
Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO);
|
||||||
|
|
||||||
// Now fake the region closing successfully over on the regionserver; the
|
// Now fake the region closing successfully over on the regionserver; the
|
||||||
// regionserver will have set the region in CLOSED state. This will
|
// regionserver will have set the region in CLOSED state. This will
|
||||||
// trigger callback into AM. The below zk close call is from the RS close
|
// trigger callback into AM. The below zk close call is from the RS close
|
||||||
|
@ -341,7 +347,6 @@ public class TestAssignmentManager {
|
||||||
am.balance(new RegionPlan(hri, from, to));
|
am.balance(new RegionPlan(hri, from, to));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests AssignmentManager balance function. Runs a balance moving a region
|
* Tests AssignmentManager balance function. Runs a balance moving a region
|
||||||
* from one server to another mocking regionserver responding over zk.
|
* from one server to another mocking regionserver responding over zk.
|
||||||
|
@ -375,6 +380,11 @@ public class TestAssignmentManager {
|
||||||
RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B);
|
RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B);
|
||||||
am.balance(plan);
|
am.balance(plan);
|
||||||
|
|
||||||
|
// Must be failed to close since the server is fake
|
||||||
|
assertTrue(am.getRegionStates().isRegionFailedToClose(REGIONINFO));
|
||||||
|
// Move it back to pending_close
|
||||||
|
am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE);
|
||||||
|
|
||||||
// Now fake the region closing successfully over on the regionserver; the
|
// Now fake the region closing successfully over on the regionserver; the
|
||||||
// regionserver will have set the region in CLOSED state. This will
|
// regionserver will have set the region in CLOSED state. This will
|
||||||
// trigger callback into AM. The below zk close call is from the RS close
|
// trigger callback into AM. The below zk close call is from the RS close
|
||||||
|
|
|
@ -17,6 +17,8 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hbase.master;
|
package org.apache.hadoop.hbase.master;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
|
@ -34,6 +36,12 @@ import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.catalog.MetaEditor;
|
import org.apache.hadoop.hbase.catalog.MetaEditor;
|
||||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||||
import org.apache.hadoop.hbase.client.HTable;
|
import org.apache.hadoop.hbase.client.HTable;
|
||||||
|
import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
|
||||||
|
import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
|
||||||
|
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
|
||||||
|
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
|
||||||
|
import org.apache.hadoop.hbase.coprocessor.RegionObserver;
|
||||||
|
import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
|
@ -53,6 +61,12 @@ public class TestAssignmentManagerOnCluster {
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void setUpBeforeClass() throws Exception {
|
public static void setUpBeforeClass() throws Exception {
|
||||||
|
// Using the test load balancer to control region plans
|
||||||
|
conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
|
||||||
|
TestLoadBalancer.class, LoadBalancer.class);
|
||||||
|
conf.setClass(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
|
||||||
|
TestRegionObserver.class, RegionObserver.class);
|
||||||
|
|
||||||
TEST_UTIL.startMiniCluster(3);
|
TEST_UTIL.startMiniCluster(3);
|
||||||
admin = TEST_UTIL.getHBaseAdmin();
|
admin = TEST_UTIL.getHBaseAdmin();
|
||||||
}
|
}
|
||||||
|
@ -191,4 +205,110 @@ public class TestAssignmentManagerOnCluster {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This tests region close failed
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testCloseFailed() throws Exception {
|
||||||
|
String table = "testCloseFailed";
|
||||||
|
try {
|
||||||
|
HTableDescriptor desc = new HTableDescriptor(table);
|
||||||
|
desc.addFamily(new HColumnDescriptor(FAMILY));
|
||||||
|
admin.createTable(desc);
|
||||||
|
|
||||||
|
HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
|
||||||
|
HRegionInfo hri = new HRegionInfo(
|
||||||
|
desc.getName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
|
||||||
|
MetaEditor.addRegionToMeta(meta, hri);
|
||||||
|
|
||||||
|
HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
|
||||||
|
master.assignRegion(hri);
|
||||||
|
AssignmentManager am = master.getAssignmentManager();
|
||||||
|
assertTrue(am.waitForAssignment(hri));
|
||||||
|
|
||||||
|
TestRegionObserver.enabled = true;
|
||||||
|
am.unassign(hri);
|
||||||
|
RegionState state = am.getRegionStates().getRegionState(hri);
|
||||||
|
assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
|
||||||
|
|
||||||
|
TestRegionObserver.enabled = false;
|
||||||
|
am.unassign(hri, true);
|
||||||
|
state = am.getRegionStates().getRegionState(hri);
|
||||||
|
assertTrue(RegionState.State.FAILED_CLOSE != state.getState());
|
||||||
|
|
||||||
|
am.assign(hri, true, true);
|
||||||
|
assertTrue(am.waitForAssignment(hri));
|
||||||
|
|
||||||
|
ServerName serverName = master.getAssignmentManager().
|
||||||
|
getRegionStates().getRegionServerOfRegion(hri);
|
||||||
|
TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
|
||||||
|
} finally {
|
||||||
|
TestRegionObserver.enabled = false;
|
||||||
|
TEST_UTIL.deleteTable(Bytes.toBytes(table));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This tests region open failed
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testOpenFailed() throws Exception {
|
||||||
|
String table = "testOpenFailed";
|
||||||
|
try {
|
||||||
|
HTableDescriptor desc = new HTableDescriptor(table);
|
||||||
|
desc.addFamily(new HColumnDescriptor(FAMILY));
|
||||||
|
admin.createTable(desc);
|
||||||
|
|
||||||
|
HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
|
||||||
|
HRegionInfo hri = new HRegionInfo(
|
||||||
|
desc.getName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
|
||||||
|
MetaEditor.addRegionToMeta(meta, hri);
|
||||||
|
|
||||||
|
TestLoadBalancer.controledRegion = hri.getEncodedName();
|
||||||
|
|
||||||
|
HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
|
||||||
|
master.assignRegion(hri);
|
||||||
|
AssignmentManager am = master.getAssignmentManager();
|
||||||
|
assertFalse(am.waitForAssignment(hri));
|
||||||
|
|
||||||
|
RegionState state = am.getRegionStates().getRegionState(hri);
|
||||||
|
assertEquals(RegionState.State.FAILED_OPEN, state.getState());
|
||||||
|
|
||||||
|
TestLoadBalancer.controledRegion = null;
|
||||||
|
master.assignRegion(hri);
|
||||||
|
assertTrue(am.waitForAssignment(hri));
|
||||||
|
|
||||||
|
ServerName serverName = master.getAssignmentManager().
|
||||||
|
getRegionStates().getRegionServerOfRegion(hri);
|
||||||
|
TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
|
||||||
|
} finally {
|
||||||
|
TestLoadBalancer.controledRegion = null;
|
||||||
|
TEST_UTIL.deleteTable(Bytes.toBytes(table));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static class TestLoadBalancer extends StochasticLoadBalancer {
|
||||||
|
// For this region, if specified, always assign to nowhere
|
||||||
|
static volatile String controledRegion = null;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ServerName randomAssignment(HRegionInfo regionInfo,
|
||||||
|
List<ServerName> servers) {
|
||||||
|
if (regionInfo.getEncodedName().equals(controledRegion)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return super.randomAssignment(regionInfo, servers);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class TestRegionObserver extends BaseRegionObserver {
|
||||||
|
// If enabled, fail all preClose calls
|
||||||
|
static volatile boolean enabled = false;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void preClose(ObserverContext<RegionCoprocessorEnvironment> c,
|
||||||
|
boolean abortRequested) throws IOException {
|
||||||
|
if (enabled) throw new IOException("fail preClose from coprocessor");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue