HBASE-11732 Should not preemptively offline a region

This commit is contained in:
Jimmy Xiang 2014-08-13 09:15:39 -07:00
parent 9db1f2cc37
commit 783d87b3c0
9 changed files with 51 additions and 74 deletions

View File

@ -837,25 +837,18 @@ public class AssignmentManager {
* on an unexpected server scenario, for an example)
*/
private void unassign(final HRegionInfo region,
final RegionState state, final ServerName dest,
final ServerName src) {
ServerName server = src;
if (state != null) {
server = state.getServerName();
}
final RegionState state, final ServerName dest) {
ServerName server = state.getServerName();
long maxWaitTime = -1;
for (int i = 1; i <= this.maximumAttempts; i++) {
if (this.server.isStopped() || this.server.isAborted()) {
LOG.debug("Server stopped/aborted; skipping unassign of " + region);
return;
}
// ClosedRegionhandler can remove the server from this.regions
if (!serverManager.isServerOnline(server)) {
LOG.debug("Offline " + region.getRegionNameAsString()
+ ", no need to unassign since it's on a dead server: " + server);
if (state != null) {
regionOffline(region);
}
regionStates.updateRegionState(region, State.OFFLINE);
return;
}
try {
@ -879,12 +872,10 @@ public class AssignmentManager {
|| t instanceof ServerNotRunningYetException) {
LOG.debug("Offline " + region.getRegionNameAsString()
+ ", it's not any more on " + server, t);
if (state != null) {
regionOffline(region);
}
regionStates.updateRegionState(region, State.OFFLINE);
return;
} else if ((t instanceof FailedServerException) || (state != null &&
t instanceof RegionAlreadyInTransitionException)) {
} else if (t instanceof FailedServerException
|| t instanceof RegionAlreadyInTransitionException) {
long sleepTime = 0;
Configuration conf = this.server.getConfiguration();
if(t instanceof FailedServerException) {
@ -963,7 +954,7 @@ public class AssignmentManager {
}
case FAILED_CLOSE:
case FAILED_OPEN:
unassign(region, state, null, null);
unassign(region, state, null);
state = regionStates.getRegionState(region);
if (state.isFailedClose()) {
// If we can't close the region, we can't re-assign
@ -1296,7 +1287,7 @@ public class AssignmentManager {
* @param region server to be unassigned
*/
public void unassign(HRegionInfo region) {
unassign(region, false);
unassign(region, null);
}
@ -1312,9 +1303,9 @@ public class AssignmentManager {
* If a RegionPlan is already set, it will remain.
*
* @param region server to be unassigned
* @param force if region should be closed even if already closing
* @param dest the destination server of the region
*/
public void unassign(HRegionInfo region, boolean force, ServerName dest) {
public void unassign(HRegionInfo region, ServerName dest) {
// TODO: Method needs refactoring. Ugly buried returns throughout. Beware!
LOG.debug("Starting unassign of " + region.getRegionNameAsString()
+ " (offlining), current state: " + regionStates.getRegionState(region));
@ -1325,59 +1316,51 @@ public class AssignmentManager {
// creation
ReentrantLock lock = locker.acquireLock(encodedName);
RegionState state = regionStates.getRegionTransitionState(encodedName);
boolean reassign = true;
try {
if (state == null) {
// Region is not in transition.
// We can unassign it only if it's not SPLIT/MERGED.
state = regionStates.getRegionState(encodedName);
if (state != null && state.isUnassignable()) {
LOG.info("Attempting to unassign " + state + ", ignored");
// Offline region will be reassigned below
return;
if (state == null || state.isFailedClose()) {
if (state == null) {
// Region is not in transition.
// We can unassign it only if it's not SPLIT/MERGED.
state = regionStates.getRegionState(encodedName);
if (state != null && state.isUnassignable()) {
LOG.info("Attempting to unassign " + state + ", ignored");
// Offline region will be reassigned below
return;
}
if (state == null || state.getServerName() == null) {
// We don't know where the region is, offline it.
// No need to send CLOSE RPC
LOG.warn("Attempting to unassign a region not in RegionStates"
+ region.getRegionNameAsString() + ", offlined");
regionOffline(region);
return;
}
}
if (state == null || state.getServerName() == null) {
// We don't know where the region is, offline it.
// No need to send CLOSE RPC
LOG.warn("Attempting to unassign a region not in RegionStates"
+ region.getRegionNameAsString() + ", offlined");
regionOffline(region);
return;
}
state = regionStates.updateRegionState(region, State.PENDING_CLOSE);
state = regionStates.updateRegionState(
region, State.PENDING_CLOSE);
} else if (state.isFailedOpen()) {
// The region is not open yet
regionOffline(region);
return;
} else if (force && state.isPendingCloseOrClosing()) {
LOG.debug("Attempting to unassign " + region.getRegionNameAsString() +
" which is already " + state.getState() +
" but forcing to send a CLOSE RPC again ");
if (state.isFailedClose()) {
state = regionStates.updateRegionState(region, State.PENDING_CLOSE);
}
} else {
LOG.debug("Attempting to unassign " +
region.getRegionNameAsString() + " but it is " +
"already in transition (" + state.getState() + ", force=" + force + ")");
"already in transition (" + state.getState());
return;
}
unassign(region, state, dest, null);
unassign(region, state, dest);
} finally {
lock.unlock();
// Region is expected to be reassigned afterwards
if (!replicasToClose.contains(region) && reassign && regionStates.isRegionOffline(region)) {
if (!replicasToClose.contains(region)
&& regionStates.isRegionInState(region, State.OFFLINE)) {
assign(region);
}
}
}
public void unassign(HRegionInfo region, boolean force){
unassign(region, force, null);
}
/**
* Used by unit tests. Return the number of regions opened so far in the life
* of the master. Increases by one every time the master opens a region
@ -2078,7 +2061,7 @@ public class AssignmentManager {
synchronized (this.regionPlans) {
this.regionPlans.put(plan.getRegionName(), plan);
}
unassign(hri, false, plan.getDestination());
unassign(hri, plan.getDestination());
} finally {
lock.unlock();
}

View File

@ -123,7 +123,7 @@ public class BulkReOpen extends BulkAssigner {
if (regionStates.isRegionInTransition(region)) {
continue;
}
assignmentManager.unassign(region, false);
assignmentManager.unassign(region);
while (regionStates.isRegionInTransition(region)
&& !server.isStopped()) {
regionStates.waitForUpdate(100);

View File

@ -1229,12 +1229,7 @@ public class MasterRpcServices extends RSRpcServices
}
LOG.debug(master.getClientIdAuditPrefix() + " unassign " + hri.getRegionNameAsString()
+ " in current location if it is online and reassign.force=" + force);
master.assignmentManager.unassign(hri, force);
if (master.assignmentManager.getRegionStates().isRegionOffline(hri)) {
LOG.debug("Region " + hri.getRegionNameAsString()
+ " is not online on any region server, reassigning it.");
master.assignRegion(hri);
}
master.assignmentManager.unassign(hri);
if (master.cpHost != null) {
master.cpHost.postUnassign(hri, force);
}

View File

@ -41,7 +41,7 @@ public class UnAssignCallable implements Callable<Object> {
@Override
public Object call() throws Exception {
assignmentManager.unassign(hri, true);
assignmentManager.unassign(hri);
return null;
}
}

View File

@ -213,7 +213,7 @@ public class DisableTableHandler extends EventHandler {
final HRegionInfo hri = region;
pool.execute(Trace.wrap("DisableTableHandler.BulkDisabler",new Runnable() {
public void run() {
assignmentManager.unassign(hri, true);
assignmentManager.unassign(hri);
}
}));
}

View File

@ -238,7 +238,7 @@ public class ServerShutdownHandler extends EventHandler {
}
toAssignRegions.add(hri);
} else if (rit != null) {
if (rit.isPendingCloseOrClosing()
if ((rit.isPendingCloseOrClosing() || rit.isOffline())
&& am.getTableStateManager().isTableState(hri.getTable(),
ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING) ||
am.getReplicasToClose().contains(hri)) {

View File

@ -428,7 +428,7 @@ public class TestAssignmentManagerOnCluster {
assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
MyRegionObserver.preCloseEnabled.set(false);
am.unassign(hri, true);
am.unassign(hri);
// region is closing now, will be re-assigned automatically.
// now, let's forcefully assign it again. it should be
@ -475,7 +475,7 @@ public class TestAssignmentManagerOnCluster {
assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
MyRegionObserver.preCloseEnabled.set(false);
am.unassign(hri, true);
am.unassign(hri);
// region may still be assigned now since it's closing,
// let's check if it's assigned after it's out of transition
@ -647,14 +647,13 @@ public class TestAssignmentManagerOnCluster {
MyRegionObserver.postCloseEnabled.set(true);
am.unassign(hri);
// Now region should pending_close or closing
// Unassign it again forcefully so that we can trigger already
// Unassign it again so that we can trigger already
// in transition exception. This test is to make sure this scenario
// is handled properly.
am.server.getConfiguration().setLong(
AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000);
am.unassign(hri, true);
RegionState state = am.getRegionStates().getRegionState(hri);
assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
am.getRegionStates().updateRegionState(hri, RegionState.State.FAILED_CLOSE);
am.unassign(hri);
// Let region closing move ahead. The region should be closed
// properly and re-assigned automatically
@ -798,7 +797,7 @@ public class TestAssignmentManagerOnCluster {
assertTrue(state.isFailedClose());
// You can't unassign a dead region before SSH either
am.unassign(hri, true);
am.unassign(hri);
assertTrue(state.isFailedClose());
// Enable SSH so that log can be split
@ -855,7 +854,7 @@ public class TestAssignmentManagerOnCluster {
assertTrue(regionStates.isRegionOffline(hri));
// You can't unassign a disabled region either
am.unassign(hri, true);
am.unassign(hri);
assertTrue(regionStates.isRegionOffline(hri));
} finally {
TEST_UTIL.deleteTable(table);
@ -911,7 +910,7 @@ public class TestAssignmentManagerOnCluster {
assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
// Try to unassign the dead region before SSH
am.unassign(hri, false);
am.unassign(hri);
// The region should be moved to offline since the server is dead
RegionState state = regionStates.getRegionState(hri);
assertTrue(state.isOffline());
@ -990,7 +989,7 @@ public class TestAssignmentManagerOnCluster {
assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
// Try to unassign the dead region before SSH
am.unassign(hri, false);
am.unassign(hri);
// The region should be moved to offline since the server is dead
RegionState state = regionStates.getRegionState(hri);
assertTrue(state.isOffline());

View File

@ -147,7 +147,7 @@ public class TestRegionMergeTransactionOnCluster {
assertTrue(regionStates.isRegionInState(hri, State.MERGED));
// We should not be able to unassign it either
am.unassign(hri, true, null);
am.unassign(hri, null);
assertFalse("Merged region can't be unassigned",
regionStates.isRegionInTransition(hri));
assertTrue(regionStates.isRegionInState(hri, State.MERGED));

View File

@ -813,7 +813,7 @@ public class TestSplitTransactionOnCluster {
assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
// We should not be able to unassign it either
am.unassign(hri, true, null);
am.unassign(hri, null);
assertFalse("Split region can't be unassigned",
regionStates.isRegionInTransition(hri));
assertTrue(regionStates.isRegionInState(hri, State.SPLIT));