HBASE-4397 -ROOT-, .META. tables stay offline for too long in recovery phase after all RSs
are shutdown at the same time (Ming Ma) git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1226110 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ceafb4aab8
commit
453de3a66c
|
@ -466,6 +466,8 @@ Release 0.92.0 - Unreleased
|
||||||
HBASE-5099 ZK event thread waiting for root region assignment may block server
|
HBASE-5099 ZK event thread waiting for root region assignment may block server
|
||||||
shutdown handler for the region sever the root region was on (Jimmy)
|
shutdown handler for the region sever the root region was on (Jimmy)
|
||||||
HBASE-5100 Rollback of split could cause closed region to be opened again (Chunhui)
|
HBASE-5100 Rollback of split could cause closed region to be opened again (Chunhui)
|
||||||
|
HBASE-4397 -ROOT-, .META. tables stay offline for too long in recovery phase after all RSs
|
||||||
|
are shutdown at the same time (Ming Ma)
|
||||||
|
|
||||||
TESTS
|
TESTS
|
||||||
HBASE-4450 test for number of blocks read: to serve as baseline for expected
|
HBASE-4450 test for number of blocks read: to serve as baseline for expected
|
||||||
|
|
|
@ -191,7 +191,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
Configuration conf = master.getConfiguration();
|
Configuration conf = master.getConfiguration();
|
||||||
this.timeoutMonitor = new TimeoutMonitor(
|
this.timeoutMonitor = new TimeoutMonitor(
|
||||||
conf.getInt("hbase.master.assignment.timeoutmonitor.period", 10000),
|
conf.getInt("hbase.master.assignment.timeoutmonitor.period", 10000),
|
||||||
master,
|
master, serverManager,
|
||||||
conf.getInt("hbase.master.assignment.timeoutmonitor.timeout", 1800000));
|
conf.getInt("hbase.master.assignment.timeoutmonitor.timeout", 1800000));
|
||||||
Threads.setDaemonThreadRunning(timeoutMonitor.getThread(),
|
Threads.setDaemonThreadRunning(timeoutMonitor.getThread(),
|
||||||
master.getServerName() + ".timeoutMonitor");
|
master.getServerName() + ".timeoutMonitor");
|
||||||
|
@ -1498,6 +1498,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
state.update(RegionState.State.OFFLINE);
|
state.update(RegionState.State.OFFLINE);
|
||||||
// Force a new plan and reassign. Will return null if no servers.
|
// Force a new plan and reassign. Will return null if no servers.
|
||||||
if (getRegionPlan(state, plan.getDestination(), true) == null) {
|
if (getRegionPlan(state, plan.getDestination(), true) == null) {
|
||||||
|
this.timeoutMonitor.setAllRegionServersOffline(true);
|
||||||
LOG.warn("Unable to find a viable location to assign region " +
|
LOG.warn("Unable to find a viable location to assign region " +
|
||||||
state.getRegion().getRegionNameAsString());
|
state.getRegion().getRegionNameAsString());
|
||||||
return;
|
return;
|
||||||
|
@ -2512,6 +2513,8 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
public class TimeoutMonitor extends Chore {
|
public class TimeoutMonitor extends Chore {
|
||||||
private final int timeout;
|
private final int timeout;
|
||||||
private boolean bulkAssign = false;
|
private boolean bulkAssign = false;
|
||||||
|
private boolean allRegionServersOffline = false;
|
||||||
|
private ServerManager serverManager;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a periodic monitor to check for time outs on region transition
|
* Creates a periodic monitor to check for time outs on region transition
|
||||||
|
@ -2523,9 +2526,11 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
* @param timeout
|
* @param timeout
|
||||||
*/
|
*/
|
||||||
public TimeoutMonitor(final int period, final Stoppable stopper,
|
public TimeoutMonitor(final int period, final Stoppable stopper,
|
||||||
|
ServerManager serverManager,
|
||||||
final int timeout) {
|
final int timeout) {
|
||||||
super("AssignmentTimeoutMonitor", period, stopper);
|
super("AssignmentTimeoutMonitor", period, stopper);
|
||||||
this.timeout = timeout;
|
this.timeout = timeout;
|
||||||
|
this.serverManager = serverManager;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -2539,10 +2544,18 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private synchronized void setAllRegionServersOffline(
|
||||||
|
boolean allRegionServersOffline) {
|
||||||
|
this.allRegionServersOffline = allRegionServersOffline;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void chore() {
|
protected void chore() {
|
||||||
// If bulkAssign in progress, suspend checks
|
// If bulkAssign in progress, suspend checks
|
||||||
if (this.bulkAssign) return;
|
if (this.bulkAssign) return;
|
||||||
|
boolean allRSsOffline = this.serverManager.getOnlineServersList().
|
||||||
|
isEmpty();
|
||||||
|
|
||||||
synchronized (regionsInTransition) {
|
synchronized (regionsInTransition) {
|
||||||
// Iterate all regions in transition checking for time outs
|
// Iterate all regions in transition checking for time outs
|
||||||
long now = System.currentTimeMillis();
|
long now = System.currentTimeMillis();
|
||||||
|
@ -2550,9 +2563,14 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
if (regionState.getStamp() + timeout <= now) {
|
if (regionState.getStamp() + timeout <= now) {
|
||||||
//decide on action upon timeout
|
//decide on action upon timeout
|
||||||
actOnTimeOut(regionState);
|
actOnTimeOut(regionState);
|
||||||
|
} else if (this.allRegionServersOffline && !allRSsOffline) {
|
||||||
|
// if some RSs just came back online, we can start the
|
||||||
|
// the assignment right away
|
||||||
|
actOnTimeOut(regionState);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
setAllRegionServersOffline(allRSsOffline);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void actOnTimeOut(RegionState regionState) {
|
private void actOnTimeOut(RegionState regionState) {
|
||||||
|
|
Loading…
Reference in New Issue