HBASE-11732 Should not preemptively offline a region
This commit is contained in:
parent
9db1f2cc37
commit
783d87b3c0
@ -837,25 +837,18 @@ public class AssignmentManager {
|
|||||||
* on an unexpected server scenario, for an example)
|
* on an unexpected server scenario, for an example)
|
||||||
*/
|
*/
|
||||||
private void unassign(final HRegionInfo region,
|
private void unassign(final HRegionInfo region,
|
||||||
final RegionState state, final ServerName dest,
|
final RegionState state, final ServerName dest) {
|
||||||
final ServerName src) {
|
ServerName server = state.getServerName();
|
||||||
ServerName server = src;
|
|
||||||
if (state != null) {
|
|
||||||
server = state.getServerName();
|
|
||||||
}
|
|
||||||
long maxWaitTime = -1;
|
long maxWaitTime = -1;
|
||||||
for (int i = 1; i <= this.maximumAttempts; i++) {
|
for (int i = 1; i <= this.maximumAttempts; i++) {
|
||||||
if (this.server.isStopped() || this.server.isAborted()) {
|
if (this.server.isStopped() || this.server.isAborted()) {
|
||||||
LOG.debug("Server stopped/aborted; skipping unassign of " + region);
|
LOG.debug("Server stopped/aborted; skipping unassign of " + region);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// ClosedRegionhandler can remove the server from this.regions
|
|
||||||
if (!serverManager.isServerOnline(server)) {
|
if (!serverManager.isServerOnline(server)) {
|
||||||
LOG.debug("Offline " + region.getRegionNameAsString()
|
LOG.debug("Offline " + region.getRegionNameAsString()
|
||||||
+ ", no need to unassign since it's on a dead server: " + server);
|
+ ", no need to unassign since it's on a dead server: " + server);
|
||||||
if (state != null) {
|
regionStates.updateRegionState(region, State.OFFLINE);
|
||||||
regionOffline(region);
|
|
||||||
}
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
@ -879,12 +872,10 @@ public class AssignmentManager {
|
|||||||
|| t instanceof ServerNotRunningYetException) {
|
|| t instanceof ServerNotRunningYetException) {
|
||||||
LOG.debug("Offline " + region.getRegionNameAsString()
|
LOG.debug("Offline " + region.getRegionNameAsString()
|
||||||
+ ", it's not any more on " + server, t);
|
+ ", it's not any more on " + server, t);
|
||||||
if (state != null) {
|
regionStates.updateRegionState(region, State.OFFLINE);
|
||||||
regionOffline(region);
|
|
||||||
}
|
|
||||||
return;
|
return;
|
||||||
} else if ((t instanceof FailedServerException) || (state != null &&
|
} else if (t instanceof FailedServerException
|
||||||
t instanceof RegionAlreadyInTransitionException)) {
|
|| t instanceof RegionAlreadyInTransitionException) {
|
||||||
long sleepTime = 0;
|
long sleepTime = 0;
|
||||||
Configuration conf = this.server.getConfiguration();
|
Configuration conf = this.server.getConfiguration();
|
||||||
if(t instanceof FailedServerException) {
|
if(t instanceof FailedServerException) {
|
||||||
@ -963,7 +954,7 @@ public class AssignmentManager {
|
|||||||
}
|
}
|
||||||
case FAILED_CLOSE:
|
case FAILED_CLOSE:
|
||||||
case FAILED_OPEN:
|
case FAILED_OPEN:
|
||||||
unassign(region, state, null, null);
|
unassign(region, state, null);
|
||||||
state = regionStates.getRegionState(region);
|
state = regionStates.getRegionState(region);
|
||||||
if (state.isFailedClose()) {
|
if (state.isFailedClose()) {
|
||||||
// If we can't close the region, we can't re-assign
|
// If we can't close the region, we can't re-assign
|
||||||
@ -1296,7 +1287,7 @@ public class AssignmentManager {
|
|||||||
* @param region server to be unassigned
|
* @param region server to be unassigned
|
||||||
*/
|
*/
|
||||||
public void unassign(HRegionInfo region) {
|
public void unassign(HRegionInfo region) {
|
||||||
unassign(region, false);
|
unassign(region, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1312,9 +1303,9 @@ public class AssignmentManager {
|
|||||||
* If a RegionPlan is already set, it will remain.
|
* If a RegionPlan is already set, it will remain.
|
||||||
*
|
*
|
||||||
* @param region server to be unassigned
|
* @param region server to be unassigned
|
||||||
* @param force if region should be closed even if already closing
|
* @param dest the destination server of the region
|
||||||
*/
|
*/
|
||||||
public void unassign(HRegionInfo region, boolean force, ServerName dest) {
|
public void unassign(HRegionInfo region, ServerName dest) {
|
||||||
// TODO: Method needs refactoring. Ugly buried returns throughout. Beware!
|
// TODO: Method needs refactoring. Ugly buried returns throughout. Beware!
|
||||||
LOG.debug("Starting unassign of " + region.getRegionNameAsString()
|
LOG.debug("Starting unassign of " + region.getRegionNameAsString()
|
||||||
+ " (offlining), current state: " + regionStates.getRegionState(region));
|
+ " (offlining), current state: " + regionStates.getRegionState(region));
|
||||||
@ -1325,59 +1316,51 @@ public class AssignmentManager {
|
|||||||
// creation
|
// creation
|
||||||
ReentrantLock lock = locker.acquireLock(encodedName);
|
ReentrantLock lock = locker.acquireLock(encodedName);
|
||||||
RegionState state = regionStates.getRegionTransitionState(encodedName);
|
RegionState state = regionStates.getRegionTransitionState(encodedName);
|
||||||
boolean reassign = true;
|
|
||||||
try {
|
try {
|
||||||
if (state == null) {
|
if (state == null || state.isFailedClose()) {
|
||||||
// Region is not in transition.
|
if (state == null) {
|
||||||
// We can unassign it only if it's not SPLIT/MERGED.
|
// Region is not in transition.
|
||||||
state = regionStates.getRegionState(encodedName);
|
// We can unassign it only if it's not SPLIT/MERGED.
|
||||||
if (state != null && state.isUnassignable()) {
|
state = regionStates.getRegionState(encodedName);
|
||||||
LOG.info("Attempting to unassign " + state + ", ignored");
|
if (state != null && state.isUnassignable()) {
|
||||||
// Offline region will be reassigned below
|
LOG.info("Attempting to unassign " + state + ", ignored");
|
||||||
return;
|
// Offline region will be reassigned below
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (state == null || state.getServerName() == null) {
|
||||||
|
// We don't know where the region is, offline it.
|
||||||
|
// No need to send CLOSE RPC
|
||||||
|
LOG.warn("Attempting to unassign a region not in RegionStates"
|
||||||
|
+ region.getRegionNameAsString() + ", offlined");
|
||||||
|
regionOffline(region);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (state == null || state.getServerName() == null) {
|
state = regionStates.updateRegionState(
|
||||||
// We don't know where the region is, offline it.
|
region, State.PENDING_CLOSE);
|
||||||
// No need to send CLOSE RPC
|
|
||||||
LOG.warn("Attempting to unassign a region not in RegionStates"
|
|
||||||
+ region.getRegionNameAsString() + ", offlined");
|
|
||||||
regionOffline(region);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
state = regionStates.updateRegionState(region, State.PENDING_CLOSE);
|
|
||||||
} else if (state.isFailedOpen()) {
|
} else if (state.isFailedOpen()) {
|
||||||
// The region is not open yet
|
// The region is not open yet
|
||||||
regionOffline(region);
|
regionOffline(region);
|
||||||
return;
|
return;
|
||||||
} else if (force && state.isPendingCloseOrClosing()) {
|
|
||||||
LOG.debug("Attempting to unassign " + region.getRegionNameAsString() +
|
|
||||||
" which is already " + state.getState() +
|
|
||||||
" but forcing to send a CLOSE RPC again ");
|
|
||||||
if (state.isFailedClose()) {
|
|
||||||
state = regionStates.updateRegionState(region, State.PENDING_CLOSE);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
LOG.debug("Attempting to unassign " +
|
LOG.debug("Attempting to unassign " +
|
||||||
region.getRegionNameAsString() + " but it is " +
|
region.getRegionNameAsString() + " but it is " +
|
||||||
"already in transition (" + state.getState() + ", force=" + force + ")");
|
"already in transition (" + state.getState());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
unassign(region, state, dest, null);
|
unassign(region, state, dest);
|
||||||
} finally {
|
} finally {
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
|
|
||||||
// Region is expected to be reassigned afterwards
|
// Region is expected to be reassigned afterwards
|
||||||
if (!replicasToClose.contains(region) && reassign && regionStates.isRegionOffline(region)) {
|
if (!replicasToClose.contains(region)
|
||||||
|
&& regionStates.isRegionInState(region, State.OFFLINE)) {
|
||||||
assign(region);
|
assign(region);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void unassign(HRegionInfo region, boolean force){
|
|
||||||
unassign(region, force, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Used by unit tests. Return the number of regions opened so far in the life
|
* Used by unit tests. Return the number of regions opened so far in the life
|
||||||
* of the master. Increases by one every time the master opens a region
|
* of the master. Increases by one every time the master opens a region
|
||||||
@ -2078,7 +2061,7 @@ public class AssignmentManager {
|
|||||||
synchronized (this.regionPlans) {
|
synchronized (this.regionPlans) {
|
||||||
this.regionPlans.put(plan.getRegionName(), plan);
|
this.regionPlans.put(plan.getRegionName(), plan);
|
||||||
}
|
}
|
||||||
unassign(hri, false, plan.getDestination());
|
unassign(hri, plan.getDestination());
|
||||||
} finally {
|
} finally {
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
}
|
}
|
||||||
|
@ -123,7 +123,7 @@ public class BulkReOpen extends BulkAssigner {
|
|||||||
if (regionStates.isRegionInTransition(region)) {
|
if (regionStates.isRegionInTransition(region)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
assignmentManager.unassign(region, false);
|
assignmentManager.unassign(region);
|
||||||
while (regionStates.isRegionInTransition(region)
|
while (regionStates.isRegionInTransition(region)
|
||||||
&& !server.isStopped()) {
|
&& !server.isStopped()) {
|
||||||
regionStates.waitForUpdate(100);
|
regionStates.waitForUpdate(100);
|
||||||
|
@ -1229,12 +1229,7 @@ public class MasterRpcServices extends RSRpcServices
|
|||||||
}
|
}
|
||||||
LOG.debug(master.getClientIdAuditPrefix() + " unassign " + hri.getRegionNameAsString()
|
LOG.debug(master.getClientIdAuditPrefix() + " unassign " + hri.getRegionNameAsString()
|
||||||
+ " in current location if it is online and reassign.force=" + force);
|
+ " in current location if it is online and reassign.force=" + force);
|
||||||
master.assignmentManager.unassign(hri, force);
|
master.assignmentManager.unassign(hri);
|
||||||
if (master.assignmentManager.getRegionStates().isRegionOffline(hri)) {
|
|
||||||
LOG.debug("Region " + hri.getRegionNameAsString()
|
|
||||||
+ " is not online on any region server, reassigning it.");
|
|
||||||
master.assignRegion(hri);
|
|
||||||
}
|
|
||||||
if (master.cpHost != null) {
|
if (master.cpHost != null) {
|
||||||
master.cpHost.postUnassign(hri, force);
|
master.cpHost.postUnassign(hri, force);
|
||||||
}
|
}
|
||||||
|
@ -41,7 +41,7 @@ public class UnAssignCallable implements Callable<Object> {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object call() throws Exception {
|
public Object call() throws Exception {
|
||||||
assignmentManager.unassign(hri, true);
|
assignmentManager.unassign(hri);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -213,7 +213,7 @@ public class DisableTableHandler extends EventHandler {
|
|||||||
final HRegionInfo hri = region;
|
final HRegionInfo hri = region;
|
||||||
pool.execute(Trace.wrap("DisableTableHandler.BulkDisabler",new Runnable() {
|
pool.execute(Trace.wrap("DisableTableHandler.BulkDisabler",new Runnable() {
|
||||||
public void run() {
|
public void run() {
|
||||||
assignmentManager.unassign(hri, true);
|
assignmentManager.unassign(hri);
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
@ -238,7 +238,7 @@ public class ServerShutdownHandler extends EventHandler {
|
|||||||
}
|
}
|
||||||
toAssignRegions.add(hri);
|
toAssignRegions.add(hri);
|
||||||
} else if (rit != null) {
|
} else if (rit != null) {
|
||||||
if (rit.isPendingCloseOrClosing()
|
if ((rit.isPendingCloseOrClosing() || rit.isOffline())
|
||||||
&& am.getTableStateManager().isTableState(hri.getTable(),
|
&& am.getTableStateManager().isTableState(hri.getTable(),
|
||||||
ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING) ||
|
ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING) ||
|
||||||
am.getReplicasToClose().contains(hri)) {
|
am.getReplicasToClose().contains(hri)) {
|
||||||
|
@ -428,7 +428,7 @@ public class TestAssignmentManagerOnCluster {
|
|||||||
assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
|
assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
|
||||||
|
|
||||||
MyRegionObserver.preCloseEnabled.set(false);
|
MyRegionObserver.preCloseEnabled.set(false);
|
||||||
am.unassign(hri, true);
|
am.unassign(hri);
|
||||||
|
|
||||||
// region is closing now, will be re-assigned automatically.
|
// region is closing now, will be re-assigned automatically.
|
||||||
// now, let's forcefully assign it again. it should be
|
// now, let's forcefully assign it again. it should be
|
||||||
@ -475,7 +475,7 @@ public class TestAssignmentManagerOnCluster {
|
|||||||
assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
|
assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
|
||||||
|
|
||||||
MyRegionObserver.preCloseEnabled.set(false);
|
MyRegionObserver.preCloseEnabled.set(false);
|
||||||
am.unassign(hri, true);
|
am.unassign(hri);
|
||||||
|
|
||||||
// region may still be assigned now since it's closing,
|
// region may still be assigned now since it's closing,
|
||||||
// let's check if it's assigned after it's out of transition
|
// let's check if it's assigned after it's out of transition
|
||||||
@ -647,14 +647,13 @@ public class TestAssignmentManagerOnCluster {
|
|||||||
MyRegionObserver.postCloseEnabled.set(true);
|
MyRegionObserver.postCloseEnabled.set(true);
|
||||||
am.unassign(hri);
|
am.unassign(hri);
|
||||||
// Now region should pending_close or closing
|
// Now region should pending_close or closing
|
||||||
// Unassign it again forcefully so that we can trigger already
|
// Unassign it again so that we can trigger already
|
||||||
// in transition exception. This test is to make sure this scenario
|
// in transition exception. This test is to make sure this scenario
|
||||||
// is handled properly.
|
// is handled properly.
|
||||||
am.server.getConfiguration().setLong(
|
am.server.getConfiguration().setLong(
|
||||||
AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000);
|
AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000);
|
||||||
am.unassign(hri, true);
|
am.getRegionStates().updateRegionState(hri, RegionState.State.FAILED_CLOSE);
|
||||||
RegionState state = am.getRegionStates().getRegionState(hri);
|
am.unassign(hri);
|
||||||
assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
|
|
||||||
|
|
||||||
// Let region closing move ahead. The region should be closed
|
// Let region closing move ahead. The region should be closed
|
||||||
// properly and re-assigned automatically
|
// properly and re-assigned automatically
|
||||||
@ -798,7 +797,7 @@ public class TestAssignmentManagerOnCluster {
|
|||||||
assertTrue(state.isFailedClose());
|
assertTrue(state.isFailedClose());
|
||||||
|
|
||||||
// You can't unassign a dead region before SSH either
|
// You can't unassign a dead region before SSH either
|
||||||
am.unassign(hri, true);
|
am.unassign(hri);
|
||||||
assertTrue(state.isFailedClose());
|
assertTrue(state.isFailedClose());
|
||||||
|
|
||||||
// Enable SSH so that log can be split
|
// Enable SSH so that log can be split
|
||||||
@ -855,7 +854,7 @@ public class TestAssignmentManagerOnCluster {
|
|||||||
assertTrue(regionStates.isRegionOffline(hri));
|
assertTrue(regionStates.isRegionOffline(hri));
|
||||||
|
|
||||||
// You can't unassign a disabled region either
|
// You can't unassign a disabled region either
|
||||||
am.unassign(hri, true);
|
am.unassign(hri);
|
||||||
assertTrue(regionStates.isRegionOffline(hri));
|
assertTrue(regionStates.isRegionOffline(hri));
|
||||||
} finally {
|
} finally {
|
||||||
TEST_UTIL.deleteTable(table);
|
TEST_UTIL.deleteTable(table);
|
||||||
@ -911,7 +910,7 @@ public class TestAssignmentManagerOnCluster {
|
|||||||
assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
|
assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
|
||||||
|
|
||||||
// Try to unassign the dead region before SSH
|
// Try to unassign the dead region before SSH
|
||||||
am.unassign(hri, false);
|
am.unassign(hri);
|
||||||
// The region should be moved to offline since the server is dead
|
// The region should be moved to offline since the server is dead
|
||||||
RegionState state = regionStates.getRegionState(hri);
|
RegionState state = regionStates.getRegionState(hri);
|
||||||
assertTrue(state.isOffline());
|
assertTrue(state.isOffline());
|
||||||
@ -990,7 +989,7 @@ public class TestAssignmentManagerOnCluster {
|
|||||||
assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
|
assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
|
||||||
|
|
||||||
// Try to unassign the dead region before SSH
|
// Try to unassign the dead region before SSH
|
||||||
am.unassign(hri, false);
|
am.unassign(hri);
|
||||||
// The region should be moved to offline since the server is dead
|
// The region should be moved to offline since the server is dead
|
||||||
RegionState state = regionStates.getRegionState(hri);
|
RegionState state = regionStates.getRegionState(hri);
|
||||||
assertTrue(state.isOffline());
|
assertTrue(state.isOffline());
|
||||||
|
@ -147,7 +147,7 @@ public class TestRegionMergeTransactionOnCluster {
|
|||||||
assertTrue(regionStates.isRegionInState(hri, State.MERGED));
|
assertTrue(regionStates.isRegionInState(hri, State.MERGED));
|
||||||
|
|
||||||
// We should not be able to unassign it either
|
// We should not be able to unassign it either
|
||||||
am.unassign(hri, true, null);
|
am.unassign(hri, null);
|
||||||
assertFalse("Merged region can't be unassigned",
|
assertFalse("Merged region can't be unassigned",
|
||||||
regionStates.isRegionInTransition(hri));
|
regionStates.isRegionInTransition(hri));
|
||||||
assertTrue(regionStates.isRegionInState(hri, State.MERGED));
|
assertTrue(regionStates.isRegionInState(hri, State.MERGED));
|
||||||
|
@ -813,7 +813,7 @@ public class TestSplitTransactionOnCluster {
|
|||||||
assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
|
assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
|
||||||
|
|
||||||
// We should not be able to unassign it either
|
// We should not be able to unassign it either
|
||||||
am.unassign(hri, true, null);
|
am.unassign(hri, null);
|
||||||
assertFalse("Split region can't be unassigned",
|
assertFalse("Split region can't be unassigned",
|
||||||
regionStates.isRegionInTransition(hri));
|
regionStates.isRegionInTransition(hri));
|
||||||
assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
|
assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user