YARN-542. Changed the default global AM max-attempts value to be not one. Contributed by Zhijie Shen.

svn merge --ignore-ancestry -c 1470094 ../../trunk/


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1470095 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2013-04-20 00:36:07 +00:00
parent a02788f864
commit e2b5e3c2b4
6 changed files with 22 additions and 8 deletions

View File

@ -93,6 +93,9 @@ Release 2.0.5-beta - UNRELEASED
YARN-586. Fixed a typo in ApplicationSubmissionContext#setApplicationId.
(Zhijie Shen via vinodkv)
YARN-542. Changed the default global AM max-attempts value to be not one.
(Zhijie Shen via vinodkv)
OPTIMIZATIONS
BUG FIXES

View File

@ -186,7 +186,7 @@ public class YarnConfiguration extends Configuration {
*/
public static final String RM_AM_MAX_ATTEMPTS =
RM_PREFIX + "am.max-attempts";
public static final int DEFAULT_RM_AM_MAX_ATTEMPTS = 1;
public static final int DEFAULT_RM_AM_MAX_ATTEMPTS = 2;
/** The keytab for the resource manager.*/
public static final String RM_KEYTAB =

View File

@ -145,9 +145,10 @@
setting for all application masters. Each application master can specify
its individual maximum number of application attempts via the API, but the
individual number cannot be more than the global upper bound. If it is,
the resourcemanager will override it.</description>
the resourcemanager will override it. The default number is set to 2, to
allow at least one retry for AM.</description>
<name>yarn.resourcemanager.am.max-attempts</name>
<value>1</value>
<value>2</value>
</property>
<property>

View File

@ -64,7 +64,9 @@ public void testRMRestart() throws Exception {
"org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore");
conf.set(YarnConfiguration.RM_SCHEDULER,
"org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler");
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 5);
Assert.assertTrue(YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS > 1);
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
@ -321,7 +323,9 @@ public void testRMRestartOnMaxAppAttempts() throws Exception {
conf.set(YarnConfiguration.RECOVERY_ENABLED, "true");
conf.set(YarnConfiguration.RM_STORE,
"org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore");
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
Assert.assertTrue(YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS > 1);
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);

View File

@ -62,7 +62,8 @@ public class TestRMAppTransitions {
static final Log LOG = LogFactory.getLog(TestRMAppTransitions.class);
private RMContext rmContext;
private static int maxAppAttempts = 4;
private static int maxAppAttempts =
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS;
private static int appId = 1;
private DrainDispatcher rmDispatcher;
@ -499,6 +500,7 @@ public void testAppAcceptedFailed() throws IOException {
RMApp application = testCreateAppAccepted(null);
// ACCEPTED => ACCEPTED event RMAppEventType.RMAppEventType.ATTEMPT_FAILED
Assert.assertTrue(maxAppAttempts > 1);
for (int i=1; i < maxAppAttempts; i++) {
RMAppEvent event =
new RMAppFailedAttemptEvent(application.getApplicationId(),
@ -562,6 +564,7 @@ public void testAppRunningFailed() throws IOException {
Assert.assertEquals(expectedAttemptId,
appAttempt.getAppAttemptId().getAttemptId());
// RUNNING => FAILED/RESTARTING event RMAppEventType.ATTEMPT_FAILED
Assert.assertTrue(maxAppAttempts > 1);
for (int i=1; i<maxAppAttempts; i++) {
RMAppEvent event =
new RMAppFailedAttemptEvent(application.getApplicationId(),

View File

@ -83,7 +83,8 @@ protected void configureServlets() {
bind(RMWebServices.class);
bind(GenericExceptionHandler.class);
Configuration conf = new Configuration();
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class,
ResourceScheduler.class);
rm = new MockRM(conf);
@ -871,8 +872,10 @@ public void testMultipleAppAttempts() throws JSONException, Exception {
MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
RMApp app1 = rm.submitApp(1024, "testwordcount", "user1");
amNodeManager.nodeHeartbeat(true);
int maxAppAttempts = rm.getConfig().getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
int maxAppAttempts = rm.getConfig().getInt(
YarnConfiguration.RM_AM_MAX_ATTEMPTS,
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
assertTrue(maxAppAttempts > 1);
int retriesLeft = maxAppAttempts;
while (--retriesLeft > 0) {
RMAppEvent event =