YARN-1405. Fixed ResourceManager to not hang when init/start fails with an exception w.r.t state-store. Contributed by Jian He.

svn merge --ignore-ancestry -c 1548992 ../../trunk/


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1548993 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2013-12-08 04:30:58 +00:00
parent 82f1335e36
commit eeeca16747
3 changed files with 29 additions and 9 deletions

View File

@ -201,6 +201,9 @@ Release 2.4.0 - UNRELEASED
YARN-1450. Fixed test failure in TestUnmanagedAMLauncher by removing its
dependency on distributed-shell. (Binglin Chang via vinodkv)
YARN-1405. Fixed ResourceManager to not hang when init/start fails with an
exception w.r.t state-store. (Jian He via vinodkv)
Release 2.3.0 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -362,7 +362,7 @@ protected void serviceInit(Configuration configuration) throws Exception {
// the Exception from stateStore.init() needs to be handled for
// HA and we need to give up master status if we got fenced
LOG.error("Failed to init state store", e);
ExitUtil.terminate(1, e);
throw e;
}
rmContext.setStateStore(rmStore);
@ -470,7 +470,7 @@ protected void serviceStart() throws Exception {
// the Exception from loadState() needs to be handled for
// HA and we need to give up master status if we got fenced
LOG.error("Failed to load/recover state", e);
ExitUtil.terminate(1, e);
throw e;
}
}

View File

@ -34,7 +34,6 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.LinkedBlockingQueue;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
@ -46,7 +45,7 @@
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.delegation.DelegationKey;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.service.Service.STATE;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
@ -68,9 +67,6 @@
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.AsyncDispatcher;
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.Event;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
@ -94,7 +90,6 @@
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.mortbay.log.Log;
public class TestRMRestart {
@ -107,7 +102,6 @@ public class TestRMRestart {
public void setup() throws UnknownHostException {
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.DEBUG);
ExitUtil.disableSystemExit();
conf = new YarnConfiguration();
UserGroupInformation.setConfiguration(conf);
conf.set(YarnConfiguration.RECOVERY_ENABLED, "true");
@ -1477,6 +1471,29 @@ public void testFinishedAppRemovalAfterRMRestart() throws Exception {
rm2.stop();
}
// This is to test RM does not get hang on shutdown.
@Test (timeout = 10000)
public void testRMShutdown() throws Exception {
MemoryRMStateStore memStore = new MemoryRMStateStore() {
@Override
public synchronized void checkVersion()
throws Exception {
throw new Exception("Invalid version.");
}
};
// start RM
memStore.init(conf);
MockRM rm1 = null;
try {
rm1 = new MockRM(conf, memStore);
rm1.start();
Assert.fail();
} catch (Exception e) {
Assert.assertTrue(e.getMessage().contains("Invalid version."));
}
Assert.assertTrue(rm1.getServiceState() == STATE.STOPPED);
}
public static class TestSecurityMockRM extends MockRM {
public TestSecurityMockRM(Configuration conf, RMStateStore store) {