HBASE-11732 Should not preemptively offline a region
This commit is contained in:
parent
9db1f2cc37
commit
783d87b3c0
|
@ -837,25 +837,18 @@ public class AssignmentManager {
|
|||
* on an unexpected server scenario, for an example)
|
||||
*/
|
||||
private void unassign(final HRegionInfo region,
|
||||
final RegionState state, final ServerName dest,
|
||||
final ServerName src) {
|
||||
ServerName server = src;
|
||||
if (state != null) {
|
||||
server = state.getServerName();
|
||||
}
|
||||
final RegionState state, final ServerName dest) {
|
||||
ServerName server = state.getServerName();
|
||||
long maxWaitTime = -1;
|
||||
for (int i = 1; i <= this.maximumAttempts; i++) {
|
||||
if (this.server.isStopped() || this.server.isAborted()) {
|
||||
LOG.debug("Server stopped/aborted; skipping unassign of " + region);
|
||||
return;
|
||||
}
|
||||
// ClosedRegionhandler can remove the server from this.regions
|
||||
if (!serverManager.isServerOnline(server)) {
|
||||
LOG.debug("Offline " + region.getRegionNameAsString()
|
||||
+ ", no need to unassign since it's on a dead server: " + server);
|
||||
if (state != null) {
|
||||
regionOffline(region);
|
||||
}
|
||||
regionStates.updateRegionState(region, State.OFFLINE);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
|
@ -879,12 +872,10 @@ public class AssignmentManager {
|
|||
|| t instanceof ServerNotRunningYetException) {
|
||||
LOG.debug("Offline " + region.getRegionNameAsString()
|
||||
+ ", it's not any more on " + server, t);
|
||||
if (state != null) {
|
||||
regionOffline(region);
|
||||
}
|
||||
regionStates.updateRegionState(region, State.OFFLINE);
|
||||
return;
|
||||
} else if ((t instanceof FailedServerException) || (state != null &&
|
||||
t instanceof RegionAlreadyInTransitionException)) {
|
||||
} else if (t instanceof FailedServerException
|
||||
|| t instanceof RegionAlreadyInTransitionException) {
|
||||
long sleepTime = 0;
|
||||
Configuration conf = this.server.getConfiguration();
|
||||
if(t instanceof FailedServerException) {
|
||||
|
@ -963,7 +954,7 @@ public class AssignmentManager {
|
|||
}
|
||||
case FAILED_CLOSE:
|
||||
case FAILED_OPEN:
|
||||
unassign(region, state, null, null);
|
||||
unassign(region, state, null);
|
||||
state = regionStates.getRegionState(region);
|
||||
if (state.isFailedClose()) {
|
||||
// If we can't close the region, we can't re-assign
|
||||
|
@ -1296,7 +1287,7 @@ public class AssignmentManager {
|
|||
* @param region server to be unassigned
|
||||
*/
|
||||
public void unassign(HRegionInfo region) {
|
||||
unassign(region, false);
|
||||
unassign(region, null);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1312,9 +1303,9 @@ public class AssignmentManager {
|
|||
* If a RegionPlan is already set, it will remain.
|
||||
*
|
||||
* @param region server to be unassigned
|
||||
* @param force if region should be closed even if already closing
|
||||
* @param dest the destination server of the region
|
||||
*/
|
||||
public void unassign(HRegionInfo region, boolean force, ServerName dest) {
|
||||
public void unassign(HRegionInfo region, ServerName dest) {
|
||||
// TODO: Method needs refactoring. Ugly buried returns throughout. Beware!
|
||||
LOG.debug("Starting unassign of " + region.getRegionNameAsString()
|
||||
+ " (offlining), current state: " + regionStates.getRegionState(region));
|
||||
|
@ -1325,8 +1316,8 @@ public class AssignmentManager {
|
|||
// creation
|
||||
ReentrantLock lock = locker.acquireLock(encodedName);
|
||||
RegionState state = regionStates.getRegionTransitionState(encodedName);
|
||||
boolean reassign = true;
|
||||
try {
|
||||
if (state == null || state.isFailedClose()) {
|
||||
if (state == null) {
|
||||
// Region is not in transition.
|
||||
// We can unassign it only if it's not SPLIT/MERGED.
|
||||
|
@ -1344,40 +1335,32 @@ public class AssignmentManager {
|
|||
regionOffline(region);
|
||||
return;
|
||||
}
|
||||
state = regionStates.updateRegionState(region, State.PENDING_CLOSE);
|
||||
}
|
||||
state = regionStates.updateRegionState(
|
||||
region, State.PENDING_CLOSE);
|
||||
} else if (state.isFailedOpen()) {
|
||||
// The region is not open yet
|
||||
regionOffline(region);
|
||||
return;
|
||||
} else if (force && state.isPendingCloseOrClosing()) {
|
||||
LOG.debug("Attempting to unassign " + region.getRegionNameAsString() +
|
||||
" which is already " + state.getState() +
|
||||
" but forcing to send a CLOSE RPC again ");
|
||||
if (state.isFailedClose()) {
|
||||
state = regionStates.updateRegionState(region, State.PENDING_CLOSE);
|
||||
}
|
||||
} else {
|
||||
LOG.debug("Attempting to unassign " +
|
||||
region.getRegionNameAsString() + " but it is " +
|
||||
"already in transition (" + state.getState() + ", force=" + force + ")");
|
||||
"already in transition (" + state.getState());
|
||||
return;
|
||||
}
|
||||
|
||||
unassign(region, state, dest, null);
|
||||
unassign(region, state, dest);
|
||||
} finally {
|
||||
lock.unlock();
|
||||
|
||||
// Region is expected to be reassigned afterwards
|
||||
if (!replicasToClose.contains(region) && reassign && regionStates.isRegionOffline(region)) {
|
||||
if (!replicasToClose.contains(region)
|
||||
&& regionStates.isRegionInState(region, State.OFFLINE)) {
|
||||
assign(region);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void unassign(HRegionInfo region, boolean force){
|
||||
unassign(region, force, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Used by unit tests. Return the number of regions opened so far in the life
|
||||
* of the master. Increases by one every time the master opens a region
|
||||
|
@ -2078,7 +2061,7 @@ public class AssignmentManager {
|
|||
synchronized (this.regionPlans) {
|
||||
this.regionPlans.put(plan.getRegionName(), plan);
|
||||
}
|
||||
unassign(hri, false, plan.getDestination());
|
||||
unassign(hri, plan.getDestination());
|
||||
} finally {
|
||||
lock.unlock();
|
||||
}
|
||||
|
|
|
@ -123,7 +123,7 @@ public class BulkReOpen extends BulkAssigner {
|
|||
if (regionStates.isRegionInTransition(region)) {
|
||||
continue;
|
||||
}
|
||||
assignmentManager.unassign(region, false);
|
||||
assignmentManager.unassign(region);
|
||||
while (regionStates.isRegionInTransition(region)
|
||||
&& !server.isStopped()) {
|
||||
regionStates.waitForUpdate(100);
|
||||
|
|
|
@ -1229,12 +1229,7 @@ public class MasterRpcServices extends RSRpcServices
|
|||
}
|
||||
LOG.debug(master.getClientIdAuditPrefix() + " unassign " + hri.getRegionNameAsString()
|
||||
+ " in current location if it is online and reassign.force=" + force);
|
||||
master.assignmentManager.unassign(hri, force);
|
||||
if (master.assignmentManager.getRegionStates().isRegionOffline(hri)) {
|
||||
LOG.debug("Region " + hri.getRegionNameAsString()
|
||||
+ " is not online on any region server, reassigning it.");
|
||||
master.assignRegion(hri);
|
||||
}
|
||||
master.assignmentManager.unassign(hri);
|
||||
if (master.cpHost != null) {
|
||||
master.cpHost.postUnassign(hri, force);
|
||||
}
|
||||
|
|
|
@ -41,7 +41,7 @@ public class UnAssignCallable implements Callable<Object> {
|
|||
|
||||
@Override
|
||||
public Object call() throws Exception {
|
||||
assignmentManager.unassign(hri, true);
|
||||
assignmentManager.unassign(hri);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -213,7 +213,7 @@ public class DisableTableHandler extends EventHandler {
|
|||
final HRegionInfo hri = region;
|
||||
pool.execute(Trace.wrap("DisableTableHandler.BulkDisabler",new Runnable() {
|
||||
public void run() {
|
||||
assignmentManager.unassign(hri, true);
|
||||
assignmentManager.unassign(hri);
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
|
|
@ -238,7 +238,7 @@ public class ServerShutdownHandler extends EventHandler {
|
|||
}
|
||||
toAssignRegions.add(hri);
|
||||
} else if (rit != null) {
|
||||
if (rit.isPendingCloseOrClosing()
|
||||
if ((rit.isPendingCloseOrClosing() || rit.isOffline())
|
||||
&& am.getTableStateManager().isTableState(hri.getTable(),
|
||||
ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING) ||
|
||||
am.getReplicasToClose().contains(hri)) {
|
||||
|
|
|
@ -428,7 +428,7 @@ public class TestAssignmentManagerOnCluster {
|
|||
assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
|
||||
|
||||
MyRegionObserver.preCloseEnabled.set(false);
|
||||
am.unassign(hri, true);
|
||||
am.unassign(hri);
|
||||
|
||||
// region is closing now, will be re-assigned automatically.
|
||||
// now, let's forcefully assign it again. it should be
|
||||
|
@ -475,7 +475,7 @@ public class TestAssignmentManagerOnCluster {
|
|||
assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
|
||||
|
||||
MyRegionObserver.preCloseEnabled.set(false);
|
||||
am.unassign(hri, true);
|
||||
am.unassign(hri);
|
||||
|
||||
// region may still be assigned now since it's closing,
|
||||
// let's check if it's assigned after it's out of transition
|
||||
|
@ -647,14 +647,13 @@ public class TestAssignmentManagerOnCluster {
|
|||
MyRegionObserver.postCloseEnabled.set(true);
|
||||
am.unassign(hri);
|
||||
// Now region should pending_close or closing
|
||||
// Unassign it again forcefully so that we can trigger already
|
||||
// Unassign it again so that we can trigger already
|
||||
// in transition exception. This test is to make sure this scenario
|
||||
// is handled properly.
|
||||
am.server.getConfiguration().setLong(
|
||||
AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000);
|
||||
am.unassign(hri, true);
|
||||
RegionState state = am.getRegionStates().getRegionState(hri);
|
||||
assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
|
||||
am.getRegionStates().updateRegionState(hri, RegionState.State.FAILED_CLOSE);
|
||||
am.unassign(hri);
|
||||
|
||||
// Let region closing move ahead. The region should be closed
|
||||
// properly and re-assigned automatically
|
||||
|
@ -798,7 +797,7 @@ public class TestAssignmentManagerOnCluster {
|
|||
assertTrue(state.isFailedClose());
|
||||
|
||||
// You can't unassign a dead region before SSH either
|
||||
am.unassign(hri, true);
|
||||
am.unassign(hri);
|
||||
assertTrue(state.isFailedClose());
|
||||
|
||||
// Enable SSH so that log can be split
|
||||
|
@ -855,7 +854,7 @@ public class TestAssignmentManagerOnCluster {
|
|||
assertTrue(regionStates.isRegionOffline(hri));
|
||||
|
||||
// You can't unassign a disabled region either
|
||||
am.unassign(hri, true);
|
||||
am.unassign(hri);
|
||||
assertTrue(regionStates.isRegionOffline(hri));
|
||||
} finally {
|
||||
TEST_UTIL.deleteTable(table);
|
||||
|
@ -911,7 +910,7 @@ public class TestAssignmentManagerOnCluster {
|
|||
assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
|
||||
|
||||
// Try to unassign the dead region before SSH
|
||||
am.unassign(hri, false);
|
||||
am.unassign(hri);
|
||||
// The region should be moved to offline since the server is dead
|
||||
RegionState state = regionStates.getRegionState(hri);
|
||||
assertTrue(state.isOffline());
|
||||
|
@ -990,7 +989,7 @@ public class TestAssignmentManagerOnCluster {
|
|||
assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
|
||||
|
||||
// Try to unassign the dead region before SSH
|
||||
am.unassign(hri, false);
|
||||
am.unassign(hri);
|
||||
// The region should be moved to offline since the server is dead
|
||||
RegionState state = regionStates.getRegionState(hri);
|
||||
assertTrue(state.isOffline());
|
||||
|
|
|
@ -147,7 +147,7 @@ public class TestRegionMergeTransactionOnCluster {
|
|||
assertTrue(regionStates.isRegionInState(hri, State.MERGED));
|
||||
|
||||
// We should not be able to unassign it either
|
||||
am.unassign(hri, true, null);
|
||||
am.unassign(hri, null);
|
||||
assertFalse("Merged region can't be unassigned",
|
||||
regionStates.isRegionInTransition(hri));
|
||||
assertTrue(regionStates.isRegionInState(hri, State.MERGED));
|
||||
|
|
|
@ -813,7 +813,7 @@ public class TestSplitTransactionOnCluster {
|
|||
assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
|
||||
|
||||
// We should not be able to unassign it either
|
||||
am.unassign(hri, true, null);
|
||||
am.unassign(hri, null);
|
||||
assertFalse("Split region can't be unassigned",
|
||||
regionStates.isRegionInTransition(hri));
|
||||
assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
|
||||
|
|
Loading…
Reference in New Issue