HBASE-4610 Port HBASE-3380 (Master failover can split logs of live servers) to 92/trunk

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1211695 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Zhihong Yu 2011-12-07 22:59:06 +00:00
parent 7af49fc56d
commit 7a36cab817
3 changed files with 30 additions and 2 deletions

View File

@ -452,6 +452,7 @@ Release 0.92.0 - Unreleased
HBASE-4878 Master crash when splitting hlog may cause data loss (Chunhui Shen) HBASE-4878 Master crash when splitting hlog may cause data loss (Chunhui Shen)
HBASE-4945 NPE in HRegion.bulkLoadHFiles (Andrew P and Lars H) HBASE-4945 NPE in HRegion.bulkLoadHFiles (Andrew P and Lars H)
HBASE-4942 HMaster is unable to start of HFile V1 is used (Honghua Zhu) HBASE-4942 HMaster is unable to start of HFile V1 is used (Honghua Zhu)
HBASE-4610 Port HBASE-3380 (Master failover can split logs of live servers) to 92/trunk
TESTS TESTS
HBASE-4450 test for number of blocks read: to serve as baseline for expected HBASE-4450 test for number of blocks read: to serve as baseline for expected

View File

@ -522,16 +522,34 @@ public class ServerManager {
public void waitForRegionServers(MonitoredTask status) public void waitForRegionServers(MonitoredTask status)
throws InterruptedException { throws InterruptedException {
long interval = this.master.getConfiguration(). long interval = this.master.getConfiguration().
getLong("hbase.master.wait.on.regionservers.interval", 3000); getLong("hbase.master.wait.on.regionservers.interval", 1500);
long timeout = this.master.getConfiguration().
getLong("hbase.master.wait.on.regionservers.timeout", 4500);
int minToStart = this.master.getConfiguration().
getInt("hbase.master.wait.on.regionservers.mintostart", 1);
int maxToStart = this.master.getConfiguration().
getInt("hbase.master.wait.on.regionservers.maxtostart", Integer.MAX_VALUE);
// So, number of regionservers > 0 and its been n since last check in, break, // So, number of regionservers > 0 and its been n since last check in, break,
// else just stall here // else just stall here
int count = 0; int count = 0;
long slept = 0;
for (int oldcount = countOfRegionServers(); !this.master.isStopped();) { for (int oldcount = countOfRegionServers(); !this.master.isStopped();) {
Thread.sleep(interval); Thread.sleep(interval);
slept += interval;
count = countOfRegionServers(); count = countOfRegionServers();
if (count == oldcount && count > 0) break; if (count == oldcount && count > 0) break;
String msg; String msg;
if (count == oldcount && count >= minToStart && slept >= timeout) {
LOG.info("Finished waiting for regionserver count to settle; " +
"count=" + count + ", sleptFor=" + slept);
break;
}
if (count >= maxToStart) {
LOG.info("At least the max configured number of regionserver(s) have " +
"checked in: " + count);
break;
}
if (count == 0) { if (count == 0) {
msg = "Waiting on regionserver(s) to checkin"; msg = "Waiting on regionserver(s) to checkin";
} else { } else {

View File

@ -69,8 +69,13 @@ public class TestMasterFailover {
final int NUM_MASTERS = 3; final int NUM_MASTERS = 3;
final int NUM_RS = 3; final int NUM_RS = 3;
// Create config to use for this cluster
Configuration conf = HBaseConfiguration.create();
conf.setInt("hbase.master.wait.on.regionservers.mintostart", 3);
conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 3);
// Start the cluster // Start the cluster
HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
@ -298,6 +303,8 @@ public class TestMasterFailover {
// Need to drop the timeout much lower // Need to drop the timeout much lower
conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000); conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000); conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
conf.setInt("hbase.master.wait.on.regionservers.mintostart", 3);
conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 3);
// Start the cluster // Start the cluster
HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
@ -596,6 +603,8 @@ public class TestMasterFailover {
// Need to drop the timeout much lower // Need to drop the timeout much lower
conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000); conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000); conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
conf.setInt("hbase.master.wait.on.regionservers.mintostart", 1);
conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 2);
// Create and start the cluster // Create and start the cluster
HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);