HBASE-6389 Modify the conditions to ensure that Master waits for sufficient number of Region Servers before starting region assignments
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1405111 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
624021fb4d
commit
6f6f425a31
|
@ -88,6 +88,18 @@ import com.google.protobuf.ServiceException;
|
|||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class ServerManager {
|
||||
public static final String WAIT_ON_REGIONSERVERS_MAXTOSTART =
|
||||
"hbase.master.wait.on.regionservers.maxtostart";
|
||||
|
||||
public static final String WAIT_ON_REGIONSERVERS_MINTOSTART =
|
||||
"hbase.master.wait.on.regionservers.mintostart";
|
||||
|
||||
public static final String WAIT_ON_REGIONSERVERS_TIMEOUT =
|
||||
"hbase.master.wait.on.regionservers.timeout";
|
||||
|
||||
public static final String WAIT_ON_REGIONSERVERS_INTERVAL =
|
||||
"hbase.master.wait.on.regionservers.interval";
|
||||
|
||||
private static final Log LOG = LogFactory.getLog(ServerManager.class);
|
||||
|
||||
// Set if we are to shutdown the cluster.
|
||||
|
@ -681,25 +693,38 @@ public class ServerManager {
|
|||
* Wait for the region servers to report in.
|
||||
* We will wait until one of this condition is met:
|
||||
* - the master is stopped
|
||||
* - the 'hbase.master.wait.on.regionservers.timeout' is reached
|
||||
* - the 'hbase.master.wait.on.regionservers.maxtostart' number of
|
||||
* region servers is reached
|
||||
* - the 'hbase.master.wait.on.regionservers.mintostart' is reached AND
|
||||
* there have been no new region server in for
|
||||
* 'hbase.master.wait.on.regionservers.interval' time
|
||||
* 'hbase.master.wait.on.regionservers.interval' time AND
|
||||
* the 'hbase.master.wait.on.regionservers.timeout' is reached
|
||||
*
|
||||
* @throws InterruptedException
|
||||
*/
|
||||
public void waitForRegionServers(MonitoredTask status)
|
||||
throws InterruptedException {
|
||||
final long interval = this.master.getConfiguration().
|
||||
getLong("hbase.master.wait.on.regionservers.interval", 1500);
|
||||
getLong(WAIT_ON_REGIONSERVERS_INTERVAL, 1500);
|
||||
final long timeout = this.master.getConfiguration().
|
||||
getLong("hbase.master.wait.on.regionservers.timeout", 4500);
|
||||
final int minToStart = this.master.getConfiguration().
|
||||
getInt("hbase.master.wait.on.regionservers.mintostart", 1);
|
||||
final int maxToStart = this.master.getConfiguration().
|
||||
getInt("hbase.master.wait.on.regionservers.maxtostart", Integer.MAX_VALUE);
|
||||
getLong(WAIT_ON_REGIONSERVERS_TIMEOUT, 4500);
|
||||
int minToStart = this.master.getConfiguration().
|
||||
getInt(WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
|
||||
if (minToStart < 1) {
|
||||
LOG.warn(String.format(
|
||||
"The value of '%s' (%d) can not be less than 1, ignoring.",
|
||||
WAIT_ON_REGIONSERVERS_MINTOSTART, minToStart));
|
||||
minToStart = 1;
|
||||
}
|
||||
int maxToStart = this.master.getConfiguration().
|
||||
getInt(WAIT_ON_REGIONSERVERS_MAXTOSTART, Integer.MAX_VALUE);
|
||||
if (maxToStart < minToStart) {
|
||||
LOG.warn(String.format(
|
||||
"The value of '%s' (%d) is set less than '%s' (%d), ignoring.",
|
||||
WAIT_ON_REGIONSERVERS_MAXTOSTART, maxToStart,
|
||||
WAIT_ON_REGIONSERVERS_MINTOSTART, minToStart));
|
||||
maxToStart = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
long now = System.currentTimeMillis();
|
||||
final long startTime = now;
|
||||
|
@ -710,9 +735,8 @@ public class ServerManager {
|
|||
int oldCount = 0;
|
||||
while (
|
||||
!this.master.isStopped() &&
|
||||
slept < timeout &&
|
||||
count < maxToStart &&
|
||||
(lastCountChange+interval > now || count < minToStart)
|
||||
(lastCountChange+interval > now || timeout > slept || count < minToStart)
|
||||
){
|
||||
|
||||
// Log some info at every interval time or if there is a change
|
||||
|
|
|
@ -69,6 +69,7 @@ import org.apache.hadoop.hbase.io.hfile.Compression;
|
|||
import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm;
|
||||
import org.apache.hadoop.hbase.mapreduce.MapreduceTestingShim;
|
||||
import org.apache.hadoop.hbase.master.HMaster;
|
||||
import org.apache.hadoop.hbase.master.ServerManager;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||
import org.apache.hadoop.hbase.regionserver.HStore;
|
||||
|
@ -79,6 +80,7 @@ import org.apache.hadoop.hbase.security.User;
|
|||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.FSUtils;
|
||||
import org.apache.hadoop.hbase.util.JVMClusterUtil;
|
||||
import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
|
||||
import org.apache.hadoop.hbase.util.RegionSplitter;
|
||||
import org.apache.hadoop.hbase.util.Threads;
|
||||
import org.apache.hadoop.hbase.zookeeper.EmptyWatcher;
|
||||
|
@ -731,8 +733,12 @@ public class HBaseTestingUtility {
|
|||
|
||||
// These settings will make the server waits until this exact number of
|
||||
// regions servers are connected.
|
||||
conf.setInt("hbase.master.wait.on.regionservers.mintostart", numSlaves);
|
||||
conf.setInt("hbase.master.wait.on.regionservers.maxtostart", numSlaves);
|
||||
if (conf.getInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, -1) == -1) {
|
||||
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, numSlaves);
|
||||
}
|
||||
if (conf.getInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, -1) == -1) {
|
||||
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, numSlaves);
|
||||
}
|
||||
|
||||
Configuration c = new Configuration(this.conf);
|
||||
this.hbaseCluster =
|
||||
|
@ -816,6 +822,9 @@ public class HBaseTestingUtility {
|
|||
zooKeeperWatcher = null;
|
||||
}
|
||||
|
||||
// unset the configuration for MIN and MAX RS to start
|
||||
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, -1);
|
||||
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, -1);
|
||||
if (this.hbaseCluster != null) {
|
||||
this.hbaseCluster.shutdown();
|
||||
// Wait till hbase is down before going on to shutdown zk.
|
||||
|
@ -1542,9 +1551,28 @@ public class HBaseTestingUtility {
|
|||
public void expireRegionServerSession(int index) throws Exception {
|
||||
HRegionServer rs = getMiniHBaseCluster().getRegionServer(index);
|
||||
expireSession(rs.getZooKeeper(), false);
|
||||
decrementMinRegionServerCount();
|
||||
}
|
||||
|
||||
private void decrementMinRegionServerCount() {
|
||||
// decrement the count for this.conf, for newly spwaned master
|
||||
// this.hbaseCluster shares this configuration too
|
||||
decrementMinRegionServerCount(getConfiguration());
|
||||
|
||||
// each master thread keeps a copy of configuration
|
||||
for (MasterThread master : getHBaseCluster().getMasterThreads()) {
|
||||
decrementMinRegionServerCount(master.getMaster().getConfiguration());
|
||||
}
|
||||
}
|
||||
|
||||
private void decrementMinRegionServerCount(Configuration conf) {
|
||||
int currentCount = conf.getInt(
|
||||
ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, -1);
|
||||
if (currentCount != -1) {
|
||||
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART,
|
||||
Math.max(currentCount - 1, 1));
|
||||
}
|
||||
}
|
||||
|
||||
public void expireSession(ZooKeeperWatcher nodeZK) throws Exception {
|
||||
expireSession(nodeZK, false);
|
||||
|
|
|
@ -444,7 +444,7 @@ public class TestZooKeeper {
|
|||
* Tests whether the logs are split when master recovers from a expired zookeeper session and an
|
||||
* RS goes down.
|
||||
*/
|
||||
@Test(timeout = 180000)
|
||||
@Test(timeout = 240000)
|
||||
public void testLogSplittingAfterMasterRecoveryDueToZKExpiry() throws IOException,
|
||||
KeeperException, InterruptedException {
|
||||
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
|
||||
|
|
|
@ -157,8 +157,8 @@ public class TestMasterFailover {
|
|||
// Need to drop the timeout much lower
|
||||
conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
|
||||
conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
|
||||
conf.setInt("hbase.master.wait.on.regionservers.mintostart", 3);
|
||||
conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 3);
|
||||
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 3);
|
||||
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 3);
|
||||
|
||||
// Start the cluster
|
||||
HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
|
||||
|
@ -460,8 +460,8 @@ public class TestMasterFailover {
|
|||
// Need to drop the timeout much lower
|
||||
conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
|
||||
conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
|
||||
conf.setInt("hbase.master.wait.on.regionservers.mintostart", 1);
|
||||
conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 2);
|
||||
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
|
||||
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 2);
|
||||
TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
|
||||
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
|
||||
log("Cluster started");
|
||||
|
|
|
@ -247,8 +247,8 @@ public class TestMasterNoCluster {
|
|||
public void testCatalogDeploys()
|
||||
throws IOException, KeeperException, InterruptedException, DeserializationException, ServiceException {
|
||||
final Configuration conf = TESTUTIL.getConfiguration();
|
||||
conf.setInt("hbase.master.wait.on.regionservers.mintostart", 1);
|
||||
conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 1);
|
||||
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
|
||||
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 1);
|
||||
|
||||
final long now = System.currentTimeMillis();
|
||||
// Name for our single mocked up regionserver.
|
||||
|
|
|
@ -69,8 +69,11 @@ public class TestRSKilledWhenMasterInitializing {
|
|||
@BeforeClass
|
||||
public static void setUpBeforeClass() throws Exception {
|
||||
// Set it so that this test runs with my custom master
|
||||
TESTUTIL.getConfiguration().setClass(HConstants.MASTER_IMPL,
|
||||
TestingMaster.class, HMaster.class);
|
||||
Configuration conf = TESTUTIL.getConfiguration();
|
||||
conf.setClass(HConstants.MASTER_IMPL, TestingMaster.class, HMaster.class);
|
||||
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 3);
|
||||
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 4);
|
||||
|
||||
// Start up the cluster.
|
||||
TESTUTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue