YARN-542. Changed the default global AM max-attempts value to be not one. Contributed by Zhijie Shen.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1470094 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2013-04-20 00:35:29 +00:00
parent c570309b07
commit a91067fc5e
6 changed files with 22 additions and 8 deletions

View File

@ -164,6 +164,9 @@ Release 2.0.5-beta - UNRELEASED
YARN-586. Fixed a typo in ApplicationSubmissionContext#setApplicationId.
(Zhijie Shen via vinodkv)
YARN-542. Changed the default global AM max-attempts value to be not one.
(Zhijie Shen via vinodkv)
OPTIMIZATIONS
BUG FIXES

View File

@ -186,7 +186,7 @@ public class YarnConfiguration extends Configuration {
*/
public static final String RM_AM_MAX_ATTEMPTS =
RM_PREFIX + "am.max-attempts";
public static final int DEFAULT_RM_AM_MAX_ATTEMPTS = 1;
public static final int DEFAULT_RM_AM_MAX_ATTEMPTS = 2;
/** The keytab for the resource manager.*/
public static final String RM_KEYTAB =

View File

@ -145,9 +145,10 @@
setting for all application masters. Each application master can specify
its individual maximum number of application attempts via the API, but the
individual number cannot be more than the global upper bound. If it is,
the resourcemanager will override it.</description>
the resourcemanager will override it. The default number is set to 2, to
allow at least one retry for AM.</description>
<name>yarn.resourcemanager.am.max-attempts</name>
<value>1</value>
<value>2</value>
</property>
<property>

View File

@ -64,7 +64,9 @@ public class TestRMRestart {
"org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore");
conf.set(YarnConfiguration.RM_SCHEDULER,
"org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler");
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 5);
Assert.assertTrue(YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS > 1);
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
@ -321,7 +323,9 @@ public class TestRMRestart {
conf.set(YarnConfiguration.RECOVERY_ENABLED, "true");
conf.set(YarnConfiguration.RM_STORE,
"org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore");
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
Assert.assertTrue(YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS > 1);
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);

View File

@ -62,7 +62,8 @@ public class TestRMAppTransitions {
static final Log LOG = LogFactory.getLog(TestRMAppTransitions.class);
private RMContext rmContext;
private static int maxAppAttempts = 4;
private static int maxAppAttempts =
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS;
private static int appId = 1;
private DrainDispatcher rmDispatcher;
@ -499,6 +500,7 @@ public class TestRMAppTransitions {
RMApp application = testCreateAppAccepted(null);
// ACCEPTED => ACCEPTED event RMAppEventType.RMAppEventType.ATTEMPT_FAILED
Assert.assertTrue(maxAppAttempts > 1);
for (int i=1; i < maxAppAttempts; i++) {
RMAppEvent event =
new RMAppFailedAttemptEvent(application.getApplicationId(),
@ -562,6 +564,7 @@ public class TestRMAppTransitions {
Assert.assertEquals(expectedAttemptId,
appAttempt.getAppAttemptId().getAttemptId());
// RUNNING => FAILED/RESTARTING event RMAppEventType.ATTEMPT_FAILED
Assert.assertTrue(maxAppAttempts > 1);
for (int i=1; i<maxAppAttempts; i++) {
RMAppEvent event =
new RMAppFailedAttemptEvent(application.getApplicationId(),

View File

@ -83,7 +83,8 @@ public class TestRMWebServicesApps extends JerseyTest {
bind(RMWebServices.class);
bind(GenericExceptionHandler.class);
Configuration conf = new Configuration();
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class,
ResourceScheduler.class);
rm = new MockRM(conf);
@ -871,8 +872,10 @@ public class TestRMWebServicesApps extends JerseyTest {
MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
RMApp app1 = rm.submitApp(1024, "testwordcount", "user1");
amNodeManager.nodeHeartbeat(true);
int maxAppAttempts = rm.getConfig().getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
int maxAppAttempts = rm.getConfig().getInt(
YarnConfiguration.RM_AM_MAX_ATTEMPTS,
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
assertTrue(maxAppAttempts > 1);
int retriesLeft = maxAppAttempts;
while (--retriesLeft > 0) {
RMAppEvent event =