HBASE-4397 -ROOT-, .META. tables stay offline for too long in recovery phase after all RSs

are shutdown at the same time (Ming Ma)


git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1226110 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Zhihong Yu 2011-12-31 15:43:19 +00:00
parent ceafb4aab8
commit 453de3a66c
2 changed files with 21 additions and 1 deletions

View File

@ -466,6 +466,8 @@ Release 0.92.0 - Unreleased
HBASE-5099 ZK event thread waiting for root region assignment may block server HBASE-5099 ZK event thread waiting for root region assignment may block server
shutdown handler for the region sever the root region was on (Jimmy) shutdown handler for the region sever the root region was on (Jimmy)
HBASE-5100 Rollback of split could cause closed region to be opened again (Chunhui) HBASE-5100 Rollback of split could cause closed region to be opened again (Chunhui)
HBASE-4397 -ROOT-, .META. tables stay offline for too long in recovery phase after all RSs
are shutdown at the same time (Ming Ma)
TESTS TESTS
HBASE-4450 test for number of blocks read: to serve as baseline for expected HBASE-4450 test for number of blocks read: to serve as baseline for expected

View File

@ -191,7 +191,7 @@ public class AssignmentManager extends ZooKeeperListener {
Configuration conf = master.getConfiguration(); Configuration conf = master.getConfiguration();
this.timeoutMonitor = new TimeoutMonitor( this.timeoutMonitor = new TimeoutMonitor(
conf.getInt("hbase.master.assignment.timeoutmonitor.period", 10000), conf.getInt("hbase.master.assignment.timeoutmonitor.period", 10000),
master, master, serverManager,
conf.getInt("hbase.master.assignment.timeoutmonitor.timeout", 1800000)); conf.getInt("hbase.master.assignment.timeoutmonitor.timeout", 1800000));
Threads.setDaemonThreadRunning(timeoutMonitor.getThread(), Threads.setDaemonThreadRunning(timeoutMonitor.getThread(),
master.getServerName() + ".timeoutMonitor"); master.getServerName() + ".timeoutMonitor");
@ -1498,6 +1498,7 @@ public class AssignmentManager extends ZooKeeperListener {
state.update(RegionState.State.OFFLINE); state.update(RegionState.State.OFFLINE);
// Force a new plan and reassign. Will return null if no servers. // Force a new plan and reassign. Will return null if no servers.
if (getRegionPlan(state, plan.getDestination(), true) == null) { if (getRegionPlan(state, plan.getDestination(), true) == null) {
this.timeoutMonitor.setAllRegionServersOffline(true);
LOG.warn("Unable to find a viable location to assign region " + LOG.warn("Unable to find a viable location to assign region " +
state.getRegion().getRegionNameAsString()); state.getRegion().getRegionNameAsString());
return; return;
@ -2512,6 +2513,8 @@ public class AssignmentManager extends ZooKeeperListener {
public class TimeoutMonitor extends Chore { public class TimeoutMonitor extends Chore {
private final int timeout; private final int timeout;
private boolean bulkAssign = false; private boolean bulkAssign = false;
private boolean allRegionServersOffline = false;
private ServerManager serverManager;
/** /**
* Creates a periodic monitor to check for time outs on region transition * Creates a periodic monitor to check for time outs on region transition
@ -2523,9 +2526,11 @@ public class AssignmentManager extends ZooKeeperListener {
* @param timeout * @param timeout
*/ */
public TimeoutMonitor(final int period, final Stoppable stopper, public TimeoutMonitor(final int period, final Stoppable stopper,
ServerManager serverManager,
final int timeout) { final int timeout) {
super("AssignmentTimeoutMonitor", period, stopper); super("AssignmentTimeoutMonitor", period, stopper);
this.timeout = timeout; this.timeout = timeout;
this.serverManager = serverManager;
} }
/** /**
@ -2539,10 +2544,18 @@ public class AssignmentManager extends ZooKeeperListener {
return result; return result;
} }
private synchronized void setAllRegionServersOffline(
boolean allRegionServersOffline) {
this.allRegionServersOffline = allRegionServersOffline;
}
@Override @Override
protected void chore() { protected void chore() {
// If bulkAssign in progress, suspend checks // If bulkAssign in progress, suspend checks
if (this.bulkAssign) return; if (this.bulkAssign) return;
boolean allRSsOffline = this.serverManager.getOnlineServersList().
isEmpty();
synchronized (regionsInTransition) { synchronized (regionsInTransition) {
// Iterate all regions in transition checking for time outs // Iterate all regions in transition checking for time outs
long now = System.currentTimeMillis(); long now = System.currentTimeMillis();
@ -2550,9 +2563,14 @@ public class AssignmentManager extends ZooKeeperListener {
if (regionState.getStamp() + timeout <= now) { if (regionState.getStamp() + timeout <= now) {
//decide on action upon timeout //decide on action upon timeout
actOnTimeOut(regionState); actOnTimeOut(regionState);
} else if (this.allRegionServersOffline && !allRSsOffline) {
// if some RSs just came back online, we can start the
// the assignment right away
actOnTimeOut(regionState);
} }
} }
} }
setAllRegionServersOffline(allRSsOffline);
} }
private void actOnTimeOut(RegionState regionState) { private void actOnTimeOut(RegionState regionState) {