diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index 67d1841e7d0..418c79316ae 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -502,12 +502,19 @@ public class YarnConfiguration extends Configuration {
public static final int DEFAULT_RM_ADMIN_CLIENT_THREAD_COUNT = 1;
/**
- * The maximum number of application attempts.
- * It's a global setting for all application masters.
+ * The maximum number of application attempts for
+ * an application, if unset by user.
*/
public static final String RM_AM_MAX_ATTEMPTS =
RM_PREFIX + "am.max-attempts";
public static final int DEFAULT_RM_AM_MAX_ATTEMPTS = 2;
+
+ /**
+ * The maximum number of application attempts.
+ * It's a global setting for all application masters.
+ */
+ public static final String GLOBAL_RM_AM_MAX_ATTEMPTS =
+ RM_PREFIX + "am.global.max-attempts";
/** The keytab for the resource manager.*/
public static final String RM_KEYTAB =
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index 49b3bb293a3..bf4a916ec90 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -327,11 +327,10 @@
- The maximum number of application attempts. It's a global
- setting for all application masters. Each application master can specify
- its individual maximum number of application attempts via the API, but the
- individual number cannot be more than the global upper bound. If it is,
- the resourcemanager will override it. The default number is set to 2, to
+ The default maximum number of application attempts, if unset by
+ the user. Each application master can specify its individual maximum number of application
+ attempts via the API, but the individual number cannot be more than the global upper bound in
+ yarn.resourcemanager.am.global.max-attempts. The default number is set to 2, to
allow at least one retry for AM.
yarn.resourcemanager.am.max-attempts
2
@@ -4534,4 +4533,18 @@
yarn.webapp.enable-rest-app-submissions
true
+
+
+
+ The maximum number of application attempts. It's a global
+ setting for all application masters. Each application master can specify
+ its individual maximum number of application attempts via the API, but the
+ individual number cannot be more than the global upper bound. If it is,
+ the resourcemanager will override it. The default number value is set to
+ yarn.resourcemanager.am.max-attempts.
+
+ yarn.resourcemanager.am.global.max-attempts
+
+
+
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
index e71d3c78d4f..48cbd8f6fc5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
@@ -613,12 +613,20 @@ public class ResourceManager extends CompositeService
// sanity check for configurations
protected static void validateConfigs(Configuration conf) {
// validate max-attempts
- int globalMaxAppAttempts =
- conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
+ int rmMaxAppAttempts = conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
+ if (rmMaxAppAttempts <= 0) {
+ throw new YarnRuntimeException("Invalid rm am max attempts configuration"
+ + ", " + YarnConfiguration.RM_AM_MAX_ATTEMPTS
+ + "=" + rmMaxAppAttempts + ", it should be a positive integer.");
+ }
+ int globalMaxAppAttempts = conf.getInt(
+ YarnConfiguration.GLOBAL_RM_AM_MAX_ATTEMPTS,
+ conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
+ YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS));
if (globalMaxAppAttempts <= 0) {
throw new YarnRuntimeException("Invalid global max attempts configuration"
- + ", " + YarnConfiguration.RM_AM_MAX_ATTEMPTS
+ + ", " + YarnConfiguration.GLOBAL_RM_AM_MAX_ATTEMPTS
+ "=" + globalMaxAppAttempts + ", it should be a positive integer.");
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
index 8223fe2778e..83188671f82 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
@@ -453,11 +453,20 @@ public class RMAppImpl implements RMApp, Recoverable {
this.applicationPriority = Priority.newInstance(0);
}
- int globalMaxAppAttempts = conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
+ int globalMaxAppAttempts = conf.getInt(
+ YarnConfiguration.GLOBAL_RM_AM_MAX_ATTEMPTS,
+ conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
+ YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS));
+ int rmMaxAppAttempts = conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
int individualMaxAppAttempts = submissionContext.getMaxAppAttempts();
- if (individualMaxAppAttempts <= 0 ||
- individualMaxAppAttempts > globalMaxAppAttempts) {
+ if (individualMaxAppAttempts <= 0) {
+ this.maxAppAttempts = rmMaxAppAttempts;
+ LOG.warn("The specific max attempts: " + individualMaxAppAttempts
+ + " for application: " + applicationId.getId()
+ + " is invalid, because it is less than or equal to zero."
+ + " Use the rm max attempts instead.");
+ } else if (individualMaxAppAttempts > globalMaxAppAttempts) {
this.maxAppAttempts = globalMaxAppAttempts;
LOG.warn("The specific max attempts: " + individualMaxAppAttempts
+ " for application: " + applicationId.getId()
@@ -1211,8 +1220,9 @@ public class RMAppImpl implements RMApp, Recoverable {
+ " failed due to " + failedEvent.getDiagnosticMsg()
+ ". Failing the application.";
} else if (this.isNumAttemptsBeyondThreshold) {
- int globalLimit = conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
- YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
+ int globalLimit = conf.getInt(YarnConfiguration.GLOBAL_RM_AM_MAX_ATTEMPTS,
+ conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
+ YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS));
msg = String.format(
"Application %s failed %d times%s%s due to %s. Failing the application.",
getApplicationId(),
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java
index 3e749003c69..77eb7cb8cb4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java
@@ -980,17 +980,20 @@ public class TestAppManager extends AppManagerTestBase{
@Test (timeout = 30000)
public void testRMAppSubmitMaxAppAttempts() throws Exception {
int[] globalMaxAppAttempts = new int[] { 10, 1 };
+ int[] rmAmMaxAttempts = new int[] { 8, 1 };
int[][] individualMaxAppAttempts = new int[][]{
new int[]{ 9, 10, 11, 0 },
new int[]{ 1, 10, 0, -1 }};
int[][] expectedNums = new int[][]{
- new int[]{ 9, 10, 10, 10 },
+ new int[]{ 9, 10, 10, 8 },
new int[]{ 1, 1, 1, 1 }};
for (int i = 0; i < globalMaxAppAttempts.length; ++i) {
for (int j = 0; j < individualMaxAppAttempts.length; ++j) {
ResourceScheduler scheduler = mockResourceScheduler();
Configuration conf = new Configuration();
- conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, globalMaxAppAttempts[i]);
+ conf.setInt(YarnConfiguration.GLOBAL_RM_AM_MAX_ATTEMPTS,
+ globalMaxAppAttempts[i]);
+ conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, rmAmMaxAttempts[i]);
ApplicationMasterService masterService =
new ApplicationMasterService(rmContext, scheduler);
TestRMAppManager appMonitor = new TestRMAppManager(rmContext,
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java
index b9ff5882530..411b8482170 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java
@@ -237,7 +237,7 @@ public class TestResourceManager {
@Test (timeout = 30000)
public void testResourceManagerInitConfigValidation() throws Exception {
Configuration conf = new YarnConfiguration();
- conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, -1);
+ conf.setInt(YarnConfiguration.GLOBAL_RM_AM_MAX_ATTEMPTS, -1);
try {
resourceManager = new MockRM(conf);
fail("Exception is expected because the global max attempts" +
@@ -247,6 +247,17 @@ public class TestResourceManager {
if (!e.getMessage().startsWith(
"Invalid global max attempts configuration")) throw e;
}
+ Configuration yarnConf = new YarnConfiguration();
+ yarnConf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, -1);
+ try {
+ resourceManager = new MockRM(yarnConf);
+ fail("Exception is expected because AM max attempts" +
+ " is negative.");
+ } catch (YarnRuntimeException e) {
+ // Exception is expected.
+ if (!e.getMessage().startsWith(
+ "Invalid rm am max attempts configuration")) throw e;
+ }
}
@Test