HBASE-5063 RegionServers fail to report to backup HMaster after primary goes down

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1221121 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2011-12-20 06:46:17 +00:00
parent 77e16e8027
commit 8fe40ce0e9
2 changed files with 26 additions and 14 deletions

View File

@ -1738,21 +1738,25 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
private ServerName getMaster() {
ServerName masterServerName = null;
long previousLogTime = 0;
while ((masterServerName = this.masterAddressManager.getMasterAddress()) == null) {
if (!keepLooping()) return null;
if (System.currentTimeMillis() > (previousLogTime+1000)){
LOG.debug("No master found; retry");
previousLogTime = System.currentTimeMillis();
}
try {
Thread.sleep(100);
} catch (InterruptedException ignored) {
}
}
InetSocketAddress isa =
new InetSocketAddress(masterServerName.getHostname(), masterServerName.getPort());
HMasterRegionInterface master = null;
while (keepLooping() && master == null) {
masterServerName = this.masterAddressManager.getMasterAddress();
if (masterServerName == null) {
if (!keepLooping()) {
// give up with no connection.
LOG.debug("No master found and cluster is stopped; bailing out");
return null;
}
LOG.debug("No master found; retry");
previousLogTime = System.currentTimeMillis();
sleeper.sleep();
continue;
}
InetSocketAddress isa =
new InetSocketAddress(masterServerName.getHostname(), masterServerName.getPort());
LOG.info("Attempting connect to Master server at " +
this.masterAddressManager.getMasterAddress());
try {

View File

@ -115,6 +115,9 @@ public class TestMasterFailover {
}
assertEquals(1, numActive);
assertEquals(2, masterThreads.size());
int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
LOG.info("Active master managing " + rsCount + " regions servers");
assertEquals(3, rsCount);
// kill the active master
LOG.debug("\n\nStopping the active master\n");
@ -127,8 +130,13 @@ public class TestMasterFailover {
LOG.debug("\n\nVerifying backup master is now active\n");
// should only have one master now
assertEquals(1, masterThreads.size());
// and he should be active
assertTrue(masterThreads.get(0).getMaster().isActiveMaster());
HMaster active = masterThreads.get(0).getMaster();
int rss = active.getClusterStatus().getServersSize();
LOG.info("Active master managing " + rss + " regions servers");
assertTrue(active.isActiveMaster());
assertEquals(3, rss);
// Stop the cluster
TEST_UTIL.shutdownMiniCluster();