HADOOP-9459. ActiveStandbyElector can join election even before Service HEALTHY, and results in null data at ActiveBreadCrumb. Contributed by Vinay and Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1482226 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Todd Lipcon 2013-05-14 06:43:38 +00:00
parent 44a9273286
commit b30d21bd13
3 changed files with 34 additions and 2 deletions

View File

@ -155,6 +155,10 @@ Release 2.0.5-beta - UNRELEASED
HADOOP-9485. No default value in the code for HADOOP-9485. No default value in the code for
hadoop.rpc.socket.factory.class.default. (Colin Patrick McCabe via atm) hadoop.rpc.socket.factory.class.default. (Colin Patrick McCabe via atm)
HADOOP-9459. ActiveStandbyElector can join election even before
Service HEALTHY, and results in null data at ActiveBreadCrumb.
(Vinay and todd via todd)
Release 2.0.4-alpha - 2013-04-25 Release 2.0.4-alpha - 2013-04-25
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -643,6 +643,8 @@ private void monitorActiveStatus() {
} }
private void joinElectionInternal() { private void joinElectionInternal() {
Preconditions.checkState(appData != null,
"trying to join election without any app data");
if (zkClient == null) { if (zkClient == null) {
if (!reEstablishSession()) { if (!reEstablishSession()) {
fatalError("Failed to reEstablish connection with ZooKeeper"); fatalError("Failed to reEstablish connection with ZooKeeper");
@ -669,8 +671,14 @@ private void reJoinElection(int sleepTime) {
try { try {
terminateConnection(); terminateConnection();
sleepFor(sleepTime); sleepFor(sleepTime);
// Should not join election even before the SERVICE is reported
joinElectionInternal(); // as HEALTHY from ZKFC monitoring.
if (appData != null) {
joinElectionInternal();
} else {
LOG.info("Not joining election since service has not yet been " +
"reported as healthy.");
}
} finally { } finally {
sessionReestablishLockForTests.unlock(); sessionReestablishLockForTests.unlock();
} }
@ -798,6 +806,8 @@ private boolean becomeActive() {
*/ */
private void writeBreadCrumbNode(Stat oldBreadcrumbStat) private void writeBreadCrumbNode(Stat oldBreadcrumbStat)
throws KeeperException, InterruptedException { throws KeeperException, InterruptedException {
Preconditions.checkState(appData != null, "no appdata");
LOG.info("Writing znode " + zkBreadCrumbPath + LOG.info("Writing znode " + zkBreadCrumbPath +
" to indicate that the local node is the most recent active..."); " to indicate that the local node is the most recent active...");
if (oldBreadcrumbStat == null) { if (oldBreadcrumbStat == null) {

View File

@ -721,4 +721,22 @@ public void testWithoutZKServer() throws Exception {
GenericTestUtils.assertExceptionContains( "ConnectionLoss", ke); GenericTestUtils.assertExceptionContains( "ConnectionLoss", ke);
} }
} }
/**
* joinElection(..) should happen only after SERVICE_HEALTHY.
*/
@Test
public void testBecomeActiveBeforeServiceHealthy() throws Exception {
mockNoPriorActive();
WatchedEvent mockEvent = Mockito.mock(WatchedEvent.class);
Mockito.when(mockEvent.getType()).thenReturn(Event.EventType.None);
// session expired should enter safe mode
// But for first time, before the SERVICE_HEALTY i.e. appData is set,
// should not enter the election.
Mockito.when(mockEvent.getState()).thenReturn(Event.KeeperState.Expired);
elector.processWatchEvent(mockZK, mockEvent);
// joinElection should not be called.
Mockito.verify(mockZK, Mockito.times(0)).create(ZK_LOCK_NAME, null,
Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, elector, mockZK);
}
} }