diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 474f7a1f310..c2d3485e76c 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -155,6 +155,10 @@ Release 2.0.5-beta - UNRELEASED HADOOP-9485. No default value in the code for hadoop.rpc.socket.factory.class.default. (Colin Patrick McCabe via atm) + HADOOP-9459. ActiveStandbyElector can join election even before + Service HEALTHY, and results in null data at ActiveBreadCrumb. + (Vinay and todd via todd) + Release 2.0.4-alpha - 2013-04-25 INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java index 4d8499f2d73..42bbed3084f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java @@ -643,6 +643,8 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { } private void joinElectionInternal() { + Preconditions.checkState(appData != null, + "trying to join election without any app data"); if (zkClient == null) { if (!reEstablishSession()) { fatalError("Failed to reEstablish connection with ZooKeeper"); @@ -669,8 +671,14 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { try { terminateConnection(); sleepFor(sleepTime); - - joinElectionInternal(); + // Should not join election even before the SERVICE is reported + // as HEALTHY from ZKFC monitoring. + if (appData != null) { + joinElectionInternal(); + } else { + LOG.info("Not joining election since service has not yet been " + + "reported as healthy."); + } } finally { sessionReestablishLockForTests.unlock(); } @@ -798,6 +806,8 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { */ private void writeBreadCrumbNode(Stat oldBreadcrumbStat) throws KeeperException, InterruptedException { + Preconditions.checkState(appData != null, "no appdata"); + LOG.info("Writing znode " + zkBreadCrumbPath + " to indicate that the local node is the most recent active..."); if (oldBreadcrumbStat == null) { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java index c2dc23abccd..309c7ad6ed7 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java @@ -721,4 +721,22 @@ public class TestActiveStandbyElector { GenericTestUtils.assertExceptionContains( "ConnectionLoss", ke); } } + + /** + * joinElection(..) should happen only after SERVICE_HEALTHY. + */ + @Test + public void testBecomeActiveBeforeServiceHealthy() throws Exception { + mockNoPriorActive(); + WatchedEvent mockEvent = Mockito.mock(WatchedEvent.class); + Mockito.when(mockEvent.getType()).thenReturn(Event.EventType.None); + // session expired should enter safe mode + // But for first time, before the SERVICE_HEALTY i.e. appData is set, + // should not enter the election. + Mockito.when(mockEvent.getState()).thenReturn(Event.KeeperState.Expired); + elector.processWatchEvent(mockZK, mockEvent); + // joinElection should not be called. + Mockito.verify(mockZK, Mockito.times(0)).create(ZK_LOCK_NAME, null, + Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, elector, mockZK); + } }