HBASE-11732 Should not preemptively offline a region

This commit is contained in:
Jimmy Xiang 2014-08-13 09:15:39 -07:00
parent 9db1f2cc37
commit 783d87b3c0
9 changed files with 51 additions and 74 deletions

View File

@ -837,25 +837,18 @@ public class AssignmentManager {
* on an unexpected server scenario, for an example) * on an unexpected server scenario, for an example)
*/ */
private void unassign(final HRegionInfo region, private void unassign(final HRegionInfo region,
final RegionState state, final ServerName dest, final RegionState state, final ServerName dest) {
final ServerName src) { ServerName server = state.getServerName();
ServerName server = src;
if (state != null) {
server = state.getServerName();
}
long maxWaitTime = -1; long maxWaitTime = -1;
for (int i = 1; i <= this.maximumAttempts; i++) { for (int i = 1; i <= this.maximumAttempts; i++) {
if (this.server.isStopped() || this.server.isAborted()) { if (this.server.isStopped() || this.server.isAborted()) {
LOG.debug("Server stopped/aborted; skipping unassign of " + region); LOG.debug("Server stopped/aborted; skipping unassign of " + region);
return; return;
} }
// ClosedRegionhandler can remove the server from this.regions
if (!serverManager.isServerOnline(server)) { if (!serverManager.isServerOnline(server)) {
LOG.debug("Offline " + region.getRegionNameAsString() LOG.debug("Offline " + region.getRegionNameAsString()
+ ", no need to unassign since it's on a dead server: " + server); + ", no need to unassign since it's on a dead server: " + server);
if (state != null) { regionStates.updateRegionState(region, State.OFFLINE);
regionOffline(region);
}
return; return;
} }
try { try {
@ -879,12 +872,10 @@ public class AssignmentManager {
|| t instanceof ServerNotRunningYetException) { || t instanceof ServerNotRunningYetException) {
LOG.debug("Offline " + region.getRegionNameAsString() LOG.debug("Offline " + region.getRegionNameAsString()
+ ", it's not any more on " + server, t); + ", it's not any more on " + server, t);
if (state != null) { regionStates.updateRegionState(region, State.OFFLINE);
regionOffline(region);
}
return; return;
} else if ((t instanceof FailedServerException) || (state != null && } else if (t instanceof FailedServerException
t instanceof RegionAlreadyInTransitionException)) { || t instanceof RegionAlreadyInTransitionException) {
long sleepTime = 0; long sleepTime = 0;
Configuration conf = this.server.getConfiguration(); Configuration conf = this.server.getConfiguration();
if(t instanceof FailedServerException) { if(t instanceof FailedServerException) {
@ -963,7 +954,7 @@ public class AssignmentManager {
} }
case FAILED_CLOSE: case FAILED_CLOSE:
case FAILED_OPEN: case FAILED_OPEN:
unassign(region, state, null, null); unassign(region, state, null);
state = regionStates.getRegionState(region); state = regionStates.getRegionState(region);
if (state.isFailedClose()) { if (state.isFailedClose()) {
// If we can't close the region, we can't re-assign // If we can't close the region, we can't re-assign
@ -1296,7 +1287,7 @@ public class AssignmentManager {
* @param region server to be unassigned * @param region server to be unassigned
*/ */
public void unassign(HRegionInfo region) { public void unassign(HRegionInfo region) {
unassign(region, false); unassign(region, null);
} }
@ -1312,9 +1303,9 @@ public class AssignmentManager {
* If a RegionPlan is already set, it will remain. * If a RegionPlan is already set, it will remain.
* *
* @param region server to be unassigned * @param region server to be unassigned
* @param force if region should be closed even if already closing * @param dest the destination server of the region
*/ */
public void unassign(HRegionInfo region, boolean force, ServerName dest) { public void unassign(HRegionInfo region, ServerName dest) {
// TODO: Method needs refactoring. Ugly buried returns throughout. Beware! // TODO: Method needs refactoring. Ugly buried returns throughout. Beware!
LOG.debug("Starting unassign of " + region.getRegionNameAsString() LOG.debug("Starting unassign of " + region.getRegionNameAsString()
+ " (offlining), current state: " + regionStates.getRegionState(region)); + " (offlining), current state: " + regionStates.getRegionState(region));
@ -1325,59 +1316,51 @@ public class AssignmentManager {
// creation // creation
ReentrantLock lock = locker.acquireLock(encodedName); ReentrantLock lock = locker.acquireLock(encodedName);
RegionState state = regionStates.getRegionTransitionState(encodedName); RegionState state = regionStates.getRegionTransitionState(encodedName);
boolean reassign = true;
try { try {
if (state == null) { if (state == null || state.isFailedClose()) {
// Region is not in transition. if (state == null) {
// We can unassign it only if it's not SPLIT/MERGED. // Region is not in transition.
state = regionStates.getRegionState(encodedName); // We can unassign it only if it's not SPLIT/MERGED.
if (state != null && state.isUnassignable()) { state = regionStates.getRegionState(encodedName);
LOG.info("Attempting to unassign " + state + ", ignored"); if (state != null && state.isUnassignable()) {
// Offline region will be reassigned below LOG.info("Attempting to unassign " + state + ", ignored");
return; // Offline region will be reassigned below
return;
}
if (state == null || state.getServerName() == null) {
// We don't know where the region is, offline it.
// No need to send CLOSE RPC
LOG.warn("Attempting to unassign a region not in RegionStates"
+ region.getRegionNameAsString() + ", offlined");
regionOffline(region);
return;
}
} }
if (state == null || state.getServerName() == null) { state = regionStates.updateRegionState(
// We don't know where the region is, offline it. region, State.PENDING_CLOSE);
// No need to send CLOSE RPC
LOG.warn("Attempting to unassign a region not in RegionStates"
+ region.getRegionNameAsString() + ", offlined");
regionOffline(region);
return;
}
state = regionStates.updateRegionState(region, State.PENDING_CLOSE);
} else if (state.isFailedOpen()) { } else if (state.isFailedOpen()) {
// The region is not open yet // The region is not open yet
regionOffline(region); regionOffline(region);
return; return;
} else if (force && state.isPendingCloseOrClosing()) {
LOG.debug("Attempting to unassign " + region.getRegionNameAsString() +
" which is already " + state.getState() +
" but forcing to send a CLOSE RPC again ");
if (state.isFailedClose()) {
state = regionStates.updateRegionState(region, State.PENDING_CLOSE);
}
} else { } else {
LOG.debug("Attempting to unassign " + LOG.debug("Attempting to unassign " +
region.getRegionNameAsString() + " but it is " + region.getRegionNameAsString() + " but it is " +
"already in transition (" + state.getState() + ", force=" + force + ")"); "already in transition (" + state.getState());
return; return;
} }
unassign(region, state, dest, null); unassign(region, state, dest);
} finally { } finally {
lock.unlock(); lock.unlock();
// Region is expected to be reassigned afterwards // Region is expected to be reassigned afterwards
if (!replicasToClose.contains(region) && reassign && regionStates.isRegionOffline(region)) { if (!replicasToClose.contains(region)
&& regionStates.isRegionInState(region, State.OFFLINE)) {
assign(region); assign(region);
} }
} }
} }
public void unassign(HRegionInfo region, boolean force){
unassign(region, force, null);
}
/** /**
* Used by unit tests. Return the number of regions opened so far in the life * Used by unit tests. Return the number of regions opened so far in the life
* of the master. Increases by one every time the master opens a region * of the master. Increases by one every time the master opens a region
@ -2078,7 +2061,7 @@ public class AssignmentManager {
synchronized (this.regionPlans) { synchronized (this.regionPlans) {
this.regionPlans.put(plan.getRegionName(), plan); this.regionPlans.put(plan.getRegionName(), plan);
} }
unassign(hri, false, plan.getDestination()); unassign(hri, plan.getDestination());
} finally { } finally {
lock.unlock(); lock.unlock();
} }

View File

@ -123,7 +123,7 @@ public class BulkReOpen extends BulkAssigner {
if (regionStates.isRegionInTransition(region)) { if (regionStates.isRegionInTransition(region)) {
continue; continue;
} }
assignmentManager.unassign(region, false); assignmentManager.unassign(region);
while (regionStates.isRegionInTransition(region) while (regionStates.isRegionInTransition(region)
&& !server.isStopped()) { && !server.isStopped()) {
regionStates.waitForUpdate(100); regionStates.waitForUpdate(100);

View File

@ -1229,12 +1229,7 @@ public class MasterRpcServices extends RSRpcServices
} }
LOG.debug(master.getClientIdAuditPrefix() + " unassign " + hri.getRegionNameAsString() LOG.debug(master.getClientIdAuditPrefix() + " unassign " + hri.getRegionNameAsString()
+ " in current location if it is online and reassign.force=" + force); + " in current location if it is online and reassign.force=" + force);
master.assignmentManager.unassign(hri, force); master.assignmentManager.unassign(hri);
if (master.assignmentManager.getRegionStates().isRegionOffline(hri)) {
LOG.debug("Region " + hri.getRegionNameAsString()
+ " is not online on any region server, reassigning it.");
master.assignRegion(hri);
}
if (master.cpHost != null) { if (master.cpHost != null) {
master.cpHost.postUnassign(hri, force); master.cpHost.postUnassign(hri, force);
} }

View File

@ -41,7 +41,7 @@ public class UnAssignCallable implements Callable<Object> {
@Override @Override
public Object call() throws Exception { public Object call() throws Exception {
assignmentManager.unassign(hri, true); assignmentManager.unassign(hri);
return null; return null;
} }
} }

View File

@ -213,7 +213,7 @@ public class DisableTableHandler extends EventHandler {
final HRegionInfo hri = region; final HRegionInfo hri = region;
pool.execute(Trace.wrap("DisableTableHandler.BulkDisabler",new Runnable() { pool.execute(Trace.wrap("DisableTableHandler.BulkDisabler",new Runnable() {
public void run() { public void run() {
assignmentManager.unassign(hri, true); assignmentManager.unassign(hri);
} }
})); }));
} }

View File

@ -238,7 +238,7 @@ public class ServerShutdownHandler extends EventHandler {
} }
toAssignRegions.add(hri); toAssignRegions.add(hri);
} else if (rit != null) { } else if (rit != null) {
if (rit.isPendingCloseOrClosing() if ((rit.isPendingCloseOrClosing() || rit.isOffline())
&& am.getTableStateManager().isTableState(hri.getTable(), && am.getTableStateManager().isTableState(hri.getTable(),
ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING) || ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING) ||
am.getReplicasToClose().contains(hri)) { am.getReplicasToClose().contains(hri)) {

View File

@ -428,7 +428,7 @@ public class TestAssignmentManagerOnCluster {
assertEquals(RegionState.State.FAILED_CLOSE, state.getState()); assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
MyRegionObserver.preCloseEnabled.set(false); MyRegionObserver.preCloseEnabled.set(false);
am.unassign(hri, true); am.unassign(hri);
// region is closing now, will be re-assigned automatically. // region is closing now, will be re-assigned automatically.
// now, let's forcefully assign it again. it should be // now, let's forcefully assign it again. it should be
@ -475,7 +475,7 @@ public class TestAssignmentManagerOnCluster {
assertEquals(RegionState.State.FAILED_CLOSE, state.getState()); assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
MyRegionObserver.preCloseEnabled.set(false); MyRegionObserver.preCloseEnabled.set(false);
am.unassign(hri, true); am.unassign(hri);
// region may still be assigned now since it's closing, // region may still be assigned now since it's closing,
// let's check if it's assigned after it's out of transition // let's check if it's assigned after it's out of transition
@ -647,14 +647,13 @@ public class TestAssignmentManagerOnCluster {
MyRegionObserver.postCloseEnabled.set(true); MyRegionObserver.postCloseEnabled.set(true);
am.unassign(hri); am.unassign(hri);
// Now region should pending_close or closing // Now region should pending_close or closing
// Unassign it again forcefully so that we can trigger already // Unassign it again so that we can trigger already
// in transition exception. This test is to make sure this scenario // in transition exception. This test is to make sure this scenario
// is handled properly. // is handled properly.
am.server.getConfiguration().setLong( am.server.getConfiguration().setLong(
AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000); AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000);
am.unassign(hri, true); am.getRegionStates().updateRegionState(hri, RegionState.State.FAILED_CLOSE);
RegionState state = am.getRegionStates().getRegionState(hri); am.unassign(hri);
assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
// Let region closing move ahead. The region should be closed // Let region closing move ahead. The region should be closed
// properly and re-assigned automatically // properly and re-assigned automatically
@ -798,7 +797,7 @@ public class TestAssignmentManagerOnCluster {
assertTrue(state.isFailedClose()); assertTrue(state.isFailedClose());
// You can't unassign a dead region before SSH either // You can't unassign a dead region before SSH either
am.unassign(hri, true); am.unassign(hri);
assertTrue(state.isFailedClose()); assertTrue(state.isFailedClose());
// Enable SSH so that log can be split // Enable SSH so that log can be split
@ -855,7 +854,7 @@ public class TestAssignmentManagerOnCluster {
assertTrue(regionStates.isRegionOffline(hri)); assertTrue(regionStates.isRegionOffline(hri));
// You can't unassign a disabled region either // You can't unassign a disabled region either
am.unassign(hri, true); am.unassign(hri);
assertTrue(regionStates.isRegionOffline(hri)); assertTrue(regionStates.isRegionOffline(hri));
} finally { } finally {
TEST_UTIL.deleteTable(table); TEST_UTIL.deleteTable(table);
@ -911,7 +910,7 @@ public class TestAssignmentManagerOnCluster {
assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri)); assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
// Try to unassign the dead region before SSH // Try to unassign the dead region before SSH
am.unassign(hri, false); am.unassign(hri);
// The region should be moved to offline since the server is dead // The region should be moved to offline since the server is dead
RegionState state = regionStates.getRegionState(hri); RegionState state = regionStates.getRegionState(hri);
assertTrue(state.isOffline()); assertTrue(state.isOffline());
@ -990,7 +989,7 @@ public class TestAssignmentManagerOnCluster {
assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri)); assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
// Try to unassign the dead region before SSH // Try to unassign the dead region before SSH
am.unassign(hri, false); am.unassign(hri);
// The region should be moved to offline since the server is dead // The region should be moved to offline since the server is dead
RegionState state = regionStates.getRegionState(hri); RegionState state = regionStates.getRegionState(hri);
assertTrue(state.isOffline()); assertTrue(state.isOffline());

View File

@ -147,7 +147,7 @@ public class TestRegionMergeTransactionOnCluster {
assertTrue(regionStates.isRegionInState(hri, State.MERGED)); assertTrue(regionStates.isRegionInState(hri, State.MERGED));
// We should not be able to unassign it either // We should not be able to unassign it either
am.unassign(hri, true, null); am.unassign(hri, null);
assertFalse("Merged region can't be unassigned", assertFalse("Merged region can't be unassigned",
regionStates.isRegionInTransition(hri)); regionStates.isRegionInTransition(hri));
assertTrue(regionStates.isRegionInState(hri, State.MERGED)); assertTrue(regionStates.isRegionInState(hri, State.MERGED));

View File

@ -813,7 +813,7 @@ public class TestSplitTransactionOnCluster {
assertTrue(regionStates.isRegionInState(hri, State.SPLIT)); assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
// We should not be able to unassign it either // We should not be able to unassign it either
am.unassign(hri, true, null); am.unassign(hri, null);
assertFalse("Split region can't be unassigned", assertFalse("Split region can't be unassigned",
regionStates.isRegionInTransition(hri)); regionStates.isRegionInTransition(hri));
assertTrue(regionStates.isRegionInState(hri, State.SPLIT)); assertTrue(regionStates.isRegionInState(hri, State.SPLIT));