From eeeca1674745c5719b2707564e5c3b91aaf8a554 Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Sun, 8 Dec 2013 04:30:58 +0000 Subject: [PATCH] YARN-1405. Fixed ResourceManager to not hang when init/start fails with an exception w.r.t state-store. Contributed by Jian He. svn merge --ignore-ancestry -c 1548992 ../../trunk/ git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1548993 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 ++ .../resourcemanager/ResourceManager.java | 4 +-- .../server/resourcemanager/TestRMRestart.java | 31 ++++++++++++++----- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index bd353851b50..590f9dd9d1b 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -201,6 +201,9 @@ Release 2.4.0 - UNRELEASED YARN-1450. Fixed test failure in TestUnmanagedAMLauncher by removing its dependency on distributed-shell. (Binglin Chang via vinodkv) + YARN-1405. Fixed ResourceManager to not hang when init/start fails with an + exception w.r.t state-store. (Jian He via vinodkv) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index 2fced0f3f19..597d18c112e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -362,7 +362,7 @@ public class ResourceManager extends CompositeService implements Recoverable { // the Exception from stateStore.init() needs to be handled for // HA and we need to give up master status if we got fenced LOG.error("Failed to init state store", e); - ExitUtil.terminate(1, e); + throw e; } rmContext.setStateStore(rmStore); @@ -470,7 +470,7 @@ public class ResourceManager extends CompositeService implements Recoverable { // the Exception from loadState() needs to be handled for // HA and we need to give up master status if we got fenced LOG.error("Failed to load/recover state", e); - ExitUtil.terminate(1, e); + throw e; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java index acedd6e9eaa..d396262fc80 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java @@ -34,7 +34,6 @@ import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.concurrent.LinkedBlockingQueue; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; @@ -46,7 +45,7 @@ import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.delegation.DelegationKey; -import org.apache.hadoop.util.ExitUtil; +import org.apache.hadoop.service.Service.STATE; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; @@ -68,9 +67,6 @@ import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.apache.hadoop.yarn.event.AsyncDispatcher; -import org.apache.hadoop.yarn.event.Dispatcher; -import org.apache.hadoop.yarn.event.Event; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; @@ -94,7 +90,6 @@ import org.apache.log4j.Logger; import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import org.mortbay.log.Log; public class TestRMRestart { @@ -107,7 +102,6 @@ public class TestRMRestart { public void setup() throws UnknownHostException { Logger rootLogger = LogManager.getRootLogger(); rootLogger.setLevel(Level.DEBUG); - ExitUtil.disableSystemExit(); conf = new YarnConfiguration(); UserGroupInformation.setConfiguration(conf); conf.set(YarnConfiguration.RECOVERY_ENABLED, "true"); @@ -1477,6 +1471,29 @@ public class TestRMRestart { rm2.stop(); } + // This is to test RM does not get hang on shutdown. + @Test (timeout = 10000) + public void testRMShutdown() throws Exception { + MemoryRMStateStore memStore = new MemoryRMStateStore() { + @Override + public synchronized void checkVersion() + throws Exception { + throw new Exception("Invalid version."); + } + }; + // start RM + memStore.init(conf); + MockRM rm1 = null; + try { + rm1 = new MockRM(conf, memStore); + rm1.start(); + Assert.fail(); + } catch (Exception e) { + Assert.assertTrue(e.getMessage().contains("Invalid version.")); + } + Assert.assertTrue(rm1.getServiceState() == STATE.STOPPED); + } + public static class TestSecurityMockRM extends MockRM { public TestSecurityMockRM(Configuration conf, RMStateStore store) {