diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java index ef054562492..e23260ecf97 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java @@ -470,13 +470,18 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { } errorMessage = errorMessage + ". Not retrying further znode monitoring connection errors."; + } else if (isSessionExpired(code)) { + // This isn't fatal - the client Watcher will re-join the election + LOG.warn("Lock monitoring failed because session was lost"); + return; } fatalError(errorMessage); } /** - * interface implementation of Zookeeper watch events (connection and node) + * interface implementation of Zookeeper watch events (connection and node), + * proxied by {@link WatcherWithClientRef}. */ synchronized void processWatchEvent(ZooKeeper zk, WatchedEvent event) { Event.EventType eventType = event.getType(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java index 3a0fa5f981f..1256e028a0e 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java @@ -26,6 +26,7 @@ import java.util.UUID; import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.ha.ActiveStandbyElector.ActiveStandbyElectorCallback; +import org.apache.hadoop.ha.ActiveStandbyElector.State; import org.apache.log4j.Level; import org.apache.zookeeper.ZooDefs.Ids; import org.apache.zookeeper.server.ZooKeeperServer; @@ -196,4 +197,48 @@ public class TestActiveStandbyElectorRealZK extends ClientBase { checkFatalsAndReset(); } + + @Test(timeout=15000) + public void testHandleSessionExpirationOfStandby() throws Exception { + // Let elector 0 be active + electors[0].ensureParentZNode(); + electors[0].joinElection(appDatas[0]); + ZooKeeperServer zks = getServer(serverFactory); + ActiveStandbyElectorTestUtil.waitForActiveLockData(null, + zks, PARENT_DIR, appDatas[0]); + Mockito.verify(cbs[0], Mockito.timeout(1000)).becomeActive(); + checkFatalsAndReset(); + + // Let elector 1 be standby + electors[1].joinElection(appDatas[1]); + ActiveStandbyElectorTestUtil.waitForElectorState(null, electors[1], + State.STANDBY); + + LOG.info("========================== Expiring standby's session"); + zks.closeSession(electors[1].getZKSessionIdForTests()); + + // Should enter neutral mode when disconnected + Mockito.verify(cbs[1], Mockito.timeout(1000)).enterNeutralMode(); + + // Should re-join the election and go back to STANDBY + ActiveStandbyElectorTestUtil.waitForElectorState(null, electors[1], + State.STANDBY); + checkFatalsAndReset(); + + LOG.info("========================== Quitting election"); + electors[1].quitElection(false); + + // Double check that we don't accidentally re-join the election + // by quitting elector 0 and ensuring elector 1 doesn't become active + electors[0].quitElection(false); + + // due to receiving the "expired" event. + Thread.sleep(1000); + Mockito.verify(cbs[1], Mockito.never()).becomeActive(); + ActiveStandbyElectorTestUtil.waitForActiveLockData(null, + zks, PARENT_DIR, null); + + checkFatalsAndReset(); + } + }