diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 8d99d0bc63e..805258f566d 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -720,6 +720,9 @@ Release 2.0.5-beta - UNRELEASED HADOOP-9307. BufferedFSInputStream.read returns wrong results after certain seeks. (todd) + HADOOP-9220. Unnecessary transition to standby in ActiveStandbyElector. + (tom and todd via todd) + Release 2.0.4-alpha - 2013-04-25 INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java index b6d736e084e..68ddc64fa31 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java @@ -159,6 +159,7 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { private int createRetryCount = 0; private int statRetryCount = 0; private ZooKeeper zkClient; + private WatcherWithClientRef watcher; private ConnectionState zkConnectionState = ConnectionState.TERMINATED; private final ActiveStandbyElectorCallback appClient; @@ -246,6 +247,11 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { if (data == null) { throw new HadoopIllegalArgumentException("data cannot be null"); } + + if (wantToBeInElection) { + LOG.info("Already in election. Not re-connecting."); + return; + } appData = new byte[data.length]; System.arraycopy(data, 0, appData, 0, data.length); @@ -615,7 +621,7 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { // watcher after constructing ZooKeeper, we may miss that event. Instead, // we construct the watcher first, and have it block any events it receives // before we can set its ZooKeeper reference. - WatcherWithClientRef watcher = new WatcherWithClientRef(); + watcher = new WatcherWithClientRef(); ZooKeeper zk = new ZooKeeper(zkHostPort, zkSessionTimeout, watcher); watcher.setZooKeeperRef(zk); @@ -753,6 +759,7 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { e); } zkClient = null; + watcher = null; } zkClient = getNewZooKeeper(); LOG.debug("Created new connection for " + this); @@ -765,12 +772,14 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { LOG.debug("Terminating ZK connection for " + this); ZooKeeper tempZk = zkClient; zkClient = null; + watcher = null; try { tempZk.close(); } catch(InterruptedException e) { LOG.warn(e); } zkConnectionState = ConnectionState.TERMINATED; + wantToBeInElection = false; } private void reset() { @@ -914,7 +923,7 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { private void monitorLockNodeAsync() { zkClient.exists(zkLockFilePath, - new WatcherWithClientRef(zkClient), this, + watcher, this, zkClient); } @@ -1015,13 +1024,6 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { * Latch used to wait until the reference to ZooKeeper is set. */ private CountDownLatch hasSetZooKeeper = new CountDownLatch(1); - - private WatcherWithClientRef() { - } - - private WatcherWithClientRef(ZooKeeper zk) { - setZooKeeperRef(zk); - } /** * Waits for the next event from ZooKeeper to arrive. diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/DummyHAService.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/DummyHAService.java index 0985af18c68..e9189e27243 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/DummyHAService.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/DummyHAService.java @@ -49,6 +49,7 @@ class DummyHAService extends HAServiceTarget { DummySharedResource sharedResource; public int fenceCount = 0; + public int activeTransitionCount = 0; static ArrayList instances = Lists.newArrayList(); int index; @@ -139,6 +140,7 @@ class DummyHAService extends HAServiceTarget { @Override public void transitionToActive(StateChangeRequestInfo req) throws ServiceFailedException, AccessControlException, IOException { + activeTransitionCount++; checkUnreachable(); if (failToBecomeActive) { throw new ServiceFailedException("injected failure"); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java index 59f693a9c1d..5b722e1cc50 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java @@ -422,7 +422,7 @@ public class TestZKFailoverController extends ClientBaseWithFixes { } } - @Test(timeout=15000) + @Test(timeout=25000) public void testGracefulFailover() throws Exception { try { cluster.start(); @@ -430,11 +430,16 @@ public class TestZKFailoverController extends ClientBaseWithFixes { cluster.waitForActiveLockHolder(0); cluster.getService(1).getZKFCProxy(conf, 5000).gracefulFailover(); cluster.waitForActiveLockHolder(1); + cluster.getService(0).getZKFCProxy(conf, 5000).gracefulFailover(); cluster.waitForActiveLockHolder(0); - + + Thread.sleep(10000); // allow to quiesce + assertEquals(0, cluster.getService(0).fenceCount); assertEquals(0, cluster.getService(1).fenceCount); + assertEquals(2, cluster.getService(0).activeTransitionCount); + assertEquals(1, cluster.getService(1).activeTransitionCount); } finally { cluster.stop(); }