YARN-2588. Standby RM fails to transitionToActive if previous transitionToActive failed with ZK exception. Contributed by Rohith Sharmaks

(cherry picked from commit a6aa6e42ca)
This commit is contained in:
Jian He 2014-10-17 10:54:24 -07:00
parent 778fbcd5d3
commit b7504f0394
3 changed files with 63 additions and 2 deletions

View File

@ -607,6 +607,9 @@ Release 2.6.0 - UNRELEASED
YARN-2566. DefaultContainerExecutor should pick a working directory randomly.
(Zhihai Xu via kasha)
YARN-2588. Standby RM fails to transitionToActive if previous
transitionToActive failed with ZK exception. (Rohith Sharmaks via jianhe)
Release 2.5.1 - 2014-09-05
INCOMPATIBLE CHANGES

View File

@ -1023,8 +1023,14 @@ public class ResourceManager extends CompositeService implements Recoverable {
this.rmLoginUGI.doAs(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws Exception {
startActiveServices();
return null;
try {
startActiveServices();
return null;
} catch (Exception e) {
resetDispatcher();
createAndInitActiveServices();
throw e;
}
}
});

View File

@ -47,6 +47,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
@ -398,6 +399,57 @@ public class TestRMHA {
innerTestHAWithRMHostName(true);
}
@Test(timeout = 30000)
public void testFailoverWhenTransitionToActiveThrowException()
throws Exception {
configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
Configuration conf = new YarnConfiguration(configuration);
MemoryRMStateStore memStore = new MemoryRMStateStore() {
int count = 0;
@Override
public synchronized void startInternal() throws Exception {
// first time throw exception
if (count++ == 0) {
throw new Exception("Session Expired");
}
}
};
// start RM
memStore.init(conf);
rm = new MockRM(conf, memStore);
rm.init(conf);
StateChangeRequestInfo requestInfo =
new StateChangeRequestInfo(
HAServiceProtocol.RequestSource.REQUEST_BY_USER);
assertEquals(STATE_ERR, HAServiceState.INITIALIZING, rm.adminService
.getServiceStatus().getState());
assertFalse("RM is ready to become active before being started",
rm.adminService.getServiceStatus().isReadyToBecomeActive());
checkMonitorHealth();
rm.start();
checkMonitorHealth();
checkStandbyRMFunctionality();
// 2. Try Transition to active, throw exception
try {
rm.adminService.transitionToActive(requestInfo);
Assert.fail("Transitioned to Active should throw exception.");
} catch (Exception e) {
assertTrue("Error when transitioning to Active mode".contains(e
.getMessage()));
}
// 3. Transition to active, success
rm.adminService.transitionToActive(requestInfo);
checkMonitorHealth();
checkActiveRMFunctionality();
}
public void innerTestHAWithRMHostName(boolean includeBindHost) {
//this is run two times, with and without a bind host configured
if (includeBindHost) {