YARN-1405. Fixed ResourceManager to not hang when init/start fails with an exception w.r.t state-store. Contributed by Jian He.
svn merge --ignore-ancestry -c 1548992 ../../trunk/ git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1548993 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
82f1335e36
commit
eeeca16747
|
@ -201,6 +201,9 @@ Release 2.4.0 - UNRELEASED
|
||||||
YARN-1450. Fixed test failure in TestUnmanagedAMLauncher by removing its
|
YARN-1450. Fixed test failure in TestUnmanagedAMLauncher by removing its
|
||||||
dependency on distributed-shell. (Binglin Chang via vinodkv)
|
dependency on distributed-shell. (Binglin Chang via vinodkv)
|
||||||
|
|
||||||
|
YARN-1405. Fixed ResourceManager to not hang when init/start fails with an
|
||||||
|
exception w.r.t state-store. (Jian He via vinodkv)
|
||||||
|
|
||||||
Release 2.3.0 - UNRELEASED
|
Release 2.3.0 - UNRELEASED
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -362,7 +362,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
// the Exception from stateStore.init() needs to be handled for
|
// the Exception from stateStore.init() needs to be handled for
|
||||||
// HA and we need to give up master status if we got fenced
|
// HA and we need to give up master status if we got fenced
|
||||||
LOG.error("Failed to init state store", e);
|
LOG.error("Failed to init state store", e);
|
||||||
ExitUtil.terminate(1, e);
|
throw e;
|
||||||
}
|
}
|
||||||
rmContext.setStateStore(rmStore);
|
rmContext.setStateStore(rmStore);
|
||||||
|
|
||||||
|
@ -470,7 +470,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
// the Exception from loadState() needs to be handled for
|
// the Exception from loadState() needs to be handled for
|
||||||
// HA and we need to give up master status if we got fenced
|
// HA and we need to give up master status if we got fenced
|
||||||
LOG.error("Failed to load/recover state", e);
|
LOG.error("Failed to load/recover state", e);
|
||||||
ExitUtil.terminate(1, e);
|
throw e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -34,7 +34,6 @@ import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.LinkedBlockingQueue;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
||||||
|
@ -46,7 +45,7 @@ import org.apache.hadoop.security.SecurityUtil;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.security.token.Token;
|
import org.apache.hadoop.security.token.Token;
|
||||||
import org.apache.hadoop.security.token.delegation.DelegationKey;
|
import org.apache.hadoop.security.token.delegation.DelegationKey;
|
||||||
import org.apache.hadoop.util.ExitUtil;
|
import org.apache.hadoop.service.Service.STATE;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
|
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
|
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
|
||||||
|
@ -68,9 +67,6 @@ import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.event.AsyncDispatcher;
|
|
||||||
import org.apache.hadoop.yarn.event.Dispatcher;
|
|
||||||
import org.apache.hadoop.yarn.event.Event;
|
|
||||||
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
||||||
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
|
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
||||||
|
@ -94,7 +90,6 @@ import org.apache.log4j.Logger;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.mortbay.log.Log;
|
|
||||||
|
|
||||||
public class TestRMRestart {
|
public class TestRMRestart {
|
||||||
|
|
||||||
|
@ -107,7 +102,6 @@ public class TestRMRestart {
|
||||||
public void setup() throws UnknownHostException {
|
public void setup() throws UnknownHostException {
|
||||||
Logger rootLogger = LogManager.getRootLogger();
|
Logger rootLogger = LogManager.getRootLogger();
|
||||||
rootLogger.setLevel(Level.DEBUG);
|
rootLogger.setLevel(Level.DEBUG);
|
||||||
ExitUtil.disableSystemExit();
|
|
||||||
conf = new YarnConfiguration();
|
conf = new YarnConfiguration();
|
||||||
UserGroupInformation.setConfiguration(conf);
|
UserGroupInformation.setConfiguration(conf);
|
||||||
conf.set(YarnConfiguration.RECOVERY_ENABLED, "true");
|
conf.set(YarnConfiguration.RECOVERY_ENABLED, "true");
|
||||||
|
@ -1477,6 +1471,29 @@ public class TestRMRestart {
|
||||||
rm2.stop();
|
rm2.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This is to test RM does not get hang on shutdown.
|
||||||
|
@Test (timeout = 10000)
|
||||||
|
public void testRMShutdown() throws Exception {
|
||||||
|
MemoryRMStateStore memStore = new MemoryRMStateStore() {
|
||||||
|
@Override
|
||||||
|
public synchronized void checkVersion()
|
||||||
|
throws Exception {
|
||||||
|
throw new Exception("Invalid version.");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
// start RM
|
||||||
|
memStore.init(conf);
|
||||||
|
MockRM rm1 = null;
|
||||||
|
try {
|
||||||
|
rm1 = new MockRM(conf, memStore);
|
||||||
|
rm1.start();
|
||||||
|
Assert.fail();
|
||||||
|
} catch (Exception e) {
|
||||||
|
Assert.assertTrue(e.getMessage().contains("Invalid version."));
|
||||||
|
}
|
||||||
|
Assert.assertTrue(rm1.getServiceState() == STATE.STOPPED);
|
||||||
|
}
|
||||||
|
|
||||||
public static class TestSecurityMockRM extends MockRM {
|
public static class TestSecurityMockRM extends MockRM {
|
||||||
|
|
||||||
public TestSecurityMockRM(Configuration conf, RMStateStore store) {
|
public TestSecurityMockRM(Configuration conf, RMStateStore store) {
|
||||||
|
|
Loading…
Reference in New Issue