YARN-2588. Standby RM fails to transitionToActive if previous transitionToActive failed with ZK exception. Contributed by Rohith Sharmaks
This commit is contained in:
parent
abae63caf9
commit
a6aa6e42ca
|
@ -677,6 +677,9 @@ Release 2.6.0 - UNRELEASED
|
||||||
YARN-2566. DefaultContainerExecutor should pick a working directory randomly.
|
YARN-2566. DefaultContainerExecutor should pick a working directory randomly.
|
||||||
(Zhihai Xu via kasha)
|
(Zhihai Xu via kasha)
|
||||||
|
|
||||||
|
YARN-2588. Standby RM fails to transitionToActive if previous
|
||||||
|
transitionToActive failed with ZK exception. (Rohith Sharmaks via jianhe)
|
||||||
|
|
||||||
Release 2.5.1 - 2014-09-05
|
Release 2.5.1 - 2014-09-05
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -1023,8 +1023,14 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
this.rmLoginUGI.doAs(new PrivilegedExceptionAction<Void>() {
|
this.rmLoginUGI.doAs(new PrivilegedExceptionAction<Void>() {
|
||||||
@Override
|
@Override
|
||||||
public Void run() throws Exception {
|
public Void run() throws Exception {
|
||||||
startActiveServices();
|
try {
|
||||||
return null;
|
startActiveServices();
|
||||||
|
return null;
|
||||||
|
} catch (Exception e) {
|
||||||
|
resetDispatcher();
|
||||||
|
createAndInitActiveServices();
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -47,6 +47,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.event.Dispatcher;
|
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||||
import org.apache.hadoop.yarn.event.EventHandler;
|
import org.apache.hadoop.yarn.event.EventHandler;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||||
|
@ -398,6 +399,57 @@ public class TestRMHA {
|
||||||
innerTestHAWithRMHostName(true);
|
innerTestHAWithRMHostName(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 30000)
|
||||||
|
public void testFailoverWhenTransitionToActiveThrowException()
|
||||||
|
throws Exception {
|
||||||
|
configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
|
||||||
|
Configuration conf = new YarnConfiguration(configuration);
|
||||||
|
|
||||||
|
MemoryRMStateStore memStore = new MemoryRMStateStore() {
|
||||||
|
int count = 0;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void startInternal() throws Exception {
|
||||||
|
// first time throw exception
|
||||||
|
if (count++ == 0) {
|
||||||
|
throw new Exception("Session Expired");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
// start RM
|
||||||
|
memStore.init(conf);
|
||||||
|
|
||||||
|
rm = new MockRM(conf, memStore);
|
||||||
|
rm.init(conf);
|
||||||
|
StateChangeRequestInfo requestInfo =
|
||||||
|
new StateChangeRequestInfo(
|
||||||
|
HAServiceProtocol.RequestSource.REQUEST_BY_USER);
|
||||||
|
|
||||||
|
assertEquals(STATE_ERR, HAServiceState.INITIALIZING, rm.adminService
|
||||||
|
.getServiceStatus().getState());
|
||||||
|
assertFalse("RM is ready to become active before being started",
|
||||||
|
rm.adminService.getServiceStatus().isReadyToBecomeActive());
|
||||||
|
checkMonitorHealth();
|
||||||
|
|
||||||
|
rm.start();
|
||||||
|
checkMonitorHealth();
|
||||||
|
checkStandbyRMFunctionality();
|
||||||
|
|
||||||
|
// 2. Try Transition to active, throw exception
|
||||||
|
try {
|
||||||
|
rm.adminService.transitionToActive(requestInfo);
|
||||||
|
Assert.fail("Transitioned to Active should throw exception.");
|
||||||
|
} catch (Exception e) {
|
||||||
|
assertTrue("Error when transitioning to Active mode".contains(e
|
||||||
|
.getMessage()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Transition to active, success
|
||||||
|
rm.adminService.transitionToActive(requestInfo);
|
||||||
|
checkMonitorHealth();
|
||||||
|
checkActiveRMFunctionality();
|
||||||
|
}
|
||||||
|
|
||||||
public void innerTestHAWithRMHostName(boolean includeBindHost) {
|
public void innerTestHAWithRMHostName(boolean includeBindHost) {
|
||||||
//this is run two times, with and without a bind host configured
|
//this is run two times, with and without a bind host configured
|
||||||
if (includeBindHost) {
|
if (includeBindHost) {
|
||||||
|
|
Loading…
Reference in New Issue