YARN-542. Changed the default global AM max-attempts value to be not one. Contributed by Zhijie Shen.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1470094 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2013-04-20 00:35:29 +00:00
parent c570309b07
commit a91067fc5e
6 changed files with 22 additions and 8 deletions

View File

@ -164,6 +164,9 @@ Release 2.0.5-beta - UNRELEASED
YARN-586. Fixed a typo in ApplicationSubmissionContext#setApplicationId. YARN-586. Fixed a typo in ApplicationSubmissionContext#setApplicationId.
(Zhijie Shen via vinodkv) (Zhijie Shen via vinodkv)
YARN-542. Changed the default global AM max-attempts value to be not one.
(Zhijie Shen via vinodkv)
OPTIMIZATIONS OPTIMIZATIONS
BUG FIXES BUG FIXES

View File

@ -186,7 +186,7 @@ public class YarnConfiguration extends Configuration {
*/ */
public static final String RM_AM_MAX_ATTEMPTS = public static final String RM_AM_MAX_ATTEMPTS =
RM_PREFIX + "am.max-attempts"; RM_PREFIX + "am.max-attempts";
public static final int DEFAULT_RM_AM_MAX_ATTEMPTS = 1; public static final int DEFAULT_RM_AM_MAX_ATTEMPTS = 2;
/** The keytab for the resource manager.*/ /** The keytab for the resource manager.*/
public static final String RM_KEYTAB = public static final String RM_KEYTAB =

View File

@ -145,9 +145,10 @@
setting for all application masters. Each application master can specify setting for all application masters. Each application master can specify
its individual maximum number of application attempts via the API, but the its individual maximum number of application attempts via the API, but the
individual number cannot be more than the global upper bound. If it is, individual number cannot be more than the global upper bound. If it is,
the resourcemanager will override it.</description> the resourcemanager will override it. The default number is set to 2, to
allow at least one retry for AM.</description>
<name>yarn.resourcemanager.am.max-attempts</name> <name>yarn.resourcemanager.am.max-attempts</name>
<value>1</value> <value>2</value>
</property> </property>
<property> <property>

View File

@ -64,7 +64,9 @@ public class TestRMRestart {
"org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore"); "org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore");
conf.set(YarnConfiguration.RM_SCHEDULER, conf.set(YarnConfiguration.RM_SCHEDULER,
"org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler"); "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler");
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 5); Assert.assertTrue(YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS > 1);
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
MemoryRMStateStore memStore = new MemoryRMStateStore(); MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf); memStore.init(conf);
@ -321,7 +323,9 @@ public class TestRMRestart {
conf.set(YarnConfiguration.RECOVERY_ENABLED, "true"); conf.set(YarnConfiguration.RECOVERY_ENABLED, "true");
conf.set(YarnConfiguration.RM_STORE, conf.set(YarnConfiguration.RM_STORE,
"org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore"); "org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore");
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2); Assert.assertTrue(YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS > 1);
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
MemoryRMStateStore memStore = new MemoryRMStateStore(); MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf); memStore.init(conf);

View File

@ -62,7 +62,8 @@ public class TestRMAppTransitions {
static final Log LOG = LogFactory.getLog(TestRMAppTransitions.class); static final Log LOG = LogFactory.getLog(TestRMAppTransitions.class);
private RMContext rmContext; private RMContext rmContext;
private static int maxAppAttempts = 4; private static int maxAppAttempts =
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS;
private static int appId = 1; private static int appId = 1;
private DrainDispatcher rmDispatcher; private DrainDispatcher rmDispatcher;
@ -499,6 +500,7 @@ public class TestRMAppTransitions {
RMApp application = testCreateAppAccepted(null); RMApp application = testCreateAppAccepted(null);
// ACCEPTED => ACCEPTED event RMAppEventType.RMAppEventType.ATTEMPT_FAILED // ACCEPTED => ACCEPTED event RMAppEventType.RMAppEventType.ATTEMPT_FAILED
Assert.assertTrue(maxAppAttempts > 1);
for (int i=1; i < maxAppAttempts; i++) { for (int i=1; i < maxAppAttempts; i++) {
RMAppEvent event = RMAppEvent event =
new RMAppFailedAttemptEvent(application.getApplicationId(), new RMAppFailedAttemptEvent(application.getApplicationId(),
@ -562,6 +564,7 @@ public class TestRMAppTransitions {
Assert.assertEquals(expectedAttemptId, Assert.assertEquals(expectedAttemptId,
appAttempt.getAppAttemptId().getAttemptId()); appAttempt.getAppAttemptId().getAttemptId());
// RUNNING => FAILED/RESTARTING event RMAppEventType.ATTEMPT_FAILED // RUNNING => FAILED/RESTARTING event RMAppEventType.ATTEMPT_FAILED
Assert.assertTrue(maxAppAttempts > 1);
for (int i=1; i<maxAppAttempts; i++) { for (int i=1; i<maxAppAttempts; i++) {
RMAppEvent event = RMAppEvent event =
new RMAppFailedAttemptEvent(application.getApplicationId(), new RMAppFailedAttemptEvent(application.getApplicationId(),

View File

@ -83,7 +83,8 @@ public class TestRMWebServicesApps extends JerseyTest {
bind(RMWebServices.class); bind(RMWebServices.class);
bind(GenericExceptionHandler.class); bind(GenericExceptionHandler.class);
Configuration conf = new Configuration(); Configuration conf = new Configuration();
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2); conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class, conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class,
ResourceScheduler.class); ResourceScheduler.class);
rm = new MockRM(conf); rm = new MockRM(conf);
@ -871,8 +872,10 @@ public class TestRMWebServicesApps extends JerseyTest {
MockNM amNodeManager = rm.registerNode("amNM:1234", 2048); MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
RMApp app1 = rm.submitApp(1024, "testwordcount", "user1"); RMApp app1 = rm.submitApp(1024, "testwordcount", "user1");
amNodeManager.nodeHeartbeat(true); amNodeManager.nodeHeartbeat(true);
int maxAppAttempts = rm.getConfig().getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, int maxAppAttempts = rm.getConfig().getInt(
YarnConfiguration.RM_AM_MAX_ATTEMPTS,
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS); YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
assertTrue(maxAppAttempts > 1);
int retriesLeft = maxAppAttempts; int retriesLeft = maxAppAttempts;
while (--retriesLeft > 0) { while (--retriesLeft > 0) {
RMAppEvent event = RMAppEvent event =