HADOOP-9220. Unnecessary transition to standby in ActiveStandbyElector. Contributed by Tom White and Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1482401 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Todd Lipcon 2013-05-14 15:37:12 +00:00
parent e29170e771
commit d5a6e764dc
4 changed files with 23 additions and 11 deletions

View File

@ -720,6 +720,9 @@ Release 2.0.5-beta - UNRELEASED
HADOOP-9307. BufferedFSInputStream.read returns wrong results
after certain seeks. (todd)
HADOOP-9220. Unnecessary transition to standby in ActiveStandbyElector.
(tom and todd via todd)
Release 2.0.4-alpha - 2013-04-25
INCOMPATIBLE CHANGES

View File

@ -159,6 +159,7 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
private int createRetryCount = 0;
private int statRetryCount = 0;
private ZooKeeper zkClient;
private WatcherWithClientRef watcher;
private ConnectionState zkConnectionState = ConnectionState.TERMINATED;
private final ActiveStandbyElectorCallback appClient;
@ -246,6 +247,11 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
if (data == null) {
throw new HadoopIllegalArgumentException("data cannot be null");
}
if (wantToBeInElection) {
LOG.info("Already in election. Not re-connecting.");
return;
}
appData = new byte[data.length];
System.arraycopy(data, 0, appData, 0, data.length);
@ -615,7 +621,7 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
// watcher after constructing ZooKeeper, we may miss that event. Instead,
// we construct the watcher first, and have it block any events it receives
// before we can set its ZooKeeper reference.
WatcherWithClientRef watcher = new WatcherWithClientRef();
watcher = new WatcherWithClientRef();
ZooKeeper zk = new ZooKeeper(zkHostPort, zkSessionTimeout, watcher);
watcher.setZooKeeperRef(zk);
@ -753,6 +759,7 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
e);
}
zkClient = null;
watcher = null;
}
zkClient = getNewZooKeeper();
LOG.debug("Created new connection for " + this);
@ -765,12 +772,14 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
LOG.debug("Terminating ZK connection for " + this);
ZooKeeper tempZk = zkClient;
zkClient = null;
watcher = null;
try {
tempZk.close();
} catch(InterruptedException e) {
LOG.warn(e);
}
zkConnectionState = ConnectionState.TERMINATED;
wantToBeInElection = false;
}
private void reset() {
@ -914,7 +923,7 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
private void monitorLockNodeAsync() {
zkClient.exists(zkLockFilePath,
new WatcherWithClientRef(zkClient), this,
watcher, this,
zkClient);
}
@ -1015,13 +1024,6 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
* Latch used to wait until the reference to ZooKeeper is set.
*/
private CountDownLatch hasSetZooKeeper = new CountDownLatch(1);
private WatcherWithClientRef() {
}
private WatcherWithClientRef(ZooKeeper zk) {
setZooKeeperRef(zk);
}
/**
* Waits for the next event from ZooKeeper to arrive.

View File

@ -49,6 +49,7 @@ class DummyHAService extends HAServiceTarget {
DummySharedResource sharedResource;
public int fenceCount = 0;
public int activeTransitionCount = 0;
static ArrayList<DummyHAService> instances = Lists.newArrayList();
int index;
@ -139,6 +140,7 @@ class DummyHAService extends HAServiceTarget {
@Override
public void transitionToActive(StateChangeRequestInfo req) throws ServiceFailedException,
AccessControlException, IOException {
activeTransitionCount++;
checkUnreachable();
if (failToBecomeActive) {
throw new ServiceFailedException("injected failure");

View File

@ -422,7 +422,7 @@ public class TestZKFailoverController extends ClientBaseWithFixes {
}
}
@Test(timeout=15000)
@Test(timeout=25000)
public void testGracefulFailover() throws Exception {
try {
cluster.start();
@ -430,11 +430,16 @@ public class TestZKFailoverController extends ClientBaseWithFixes {
cluster.waitForActiveLockHolder(0);
cluster.getService(1).getZKFCProxy(conf, 5000).gracefulFailover();
cluster.waitForActiveLockHolder(1);
cluster.getService(0).getZKFCProxy(conf, 5000).gracefulFailover();
cluster.waitForActiveLockHolder(0);
Thread.sleep(10000); // allow to quiesce
assertEquals(0, cluster.getService(0).fenceCount);
assertEquals(0, cluster.getService(1).fenceCount);
assertEquals(2, cluster.getService(0).activeTransitionCount);
assertEquals(1, cluster.getService(1).activeTransitionCount);
} finally {
cluster.stop();
}