YARN-542. Changed the default global AM max-attempts value to be not one. Contributed by Zhijie Shen.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1470094 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c570309b07
commit
a91067fc5e
|
@ -164,6 +164,9 @@ Release 2.0.5-beta - UNRELEASED
|
|||
YARN-586. Fixed a typo in ApplicationSubmissionContext#setApplicationId.
|
||||
(Zhijie Shen via vinodkv)
|
||||
|
||||
YARN-542. Changed the default global AM max-attempts value to be not one.
|
||||
(Zhijie Shen via vinodkv)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
|
|
|
@ -186,7 +186,7 @@ public class YarnConfiguration extends Configuration {
|
|||
*/
|
||||
public static final String RM_AM_MAX_ATTEMPTS =
|
||||
RM_PREFIX + "am.max-attempts";
|
||||
public static final int DEFAULT_RM_AM_MAX_ATTEMPTS = 1;
|
||||
public static final int DEFAULT_RM_AM_MAX_ATTEMPTS = 2;
|
||||
|
||||
/** The keytab for the resource manager.*/
|
||||
public static final String RM_KEYTAB =
|
||||
|
|
|
@ -145,9 +145,10 @@
|
|||
setting for all application masters. Each application master can specify
|
||||
its individual maximum number of application attempts via the API, but the
|
||||
individual number cannot be more than the global upper bound. If it is,
|
||||
the resourcemanager will override it.</description>
|
||||
the resourcemanager will override it. The default number is set to 2, to
|
||||
allow at least one retry for AM.</description>
|
||||
<name>yarn.resourcemanager.am.max-attempts</name>
|
||||
<value>1</value>
|
||||
<value>2</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
|
|
|
@ -64,7 +64,9 @@ public class TestRMRestart {
|
|||
"org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore");
|
||||
conf.set(YarnConfiguration.RM_SCHEDULER,
|
||||
"org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler");
|
||||
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 5);
|
||||
Assert.assertTrue(YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS > 1);
|
||||
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
|
||||
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
|
||||
|
||||
MemoryRMStateStore memStore = new MemoryRMStateStore();
|
||||
memStore.init(conf);
|
||||
|
@ -321,7 +323,9 @@ public class TestRMRestart {
|
|||
conf.set(YarnConfiguration.RECOVERY_ENABLED, "true");
|
||||
conf.set(YarnConfiguration.RM_STORE,
|
||||
"org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore");
|
||||
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
|
||||
Assert.assertTrue(YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS > 1);
|
||||
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
|
||||
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
|
||||
|
||||
MemoryRMStateStore memStore = new MemoryRMStateStore();
|
||||
memStore.init(conf);
|
||||
|
|
|
@ -62,7 +62,8 @@ public class TestRMAppTransitions {
|
|||
static final Log LOG = LogFactory.getLog(TestRMAppTransitions.class);
|
||||
|
||||
private RMContext rmContext;
|
||||
private static int maxAppAttempts = 4;
|
||||
private static int maxAppAttempts =
|
||||
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS;
|
||||
private static int appId = 1;
|
||||
private DrainDispatcher rmDispatcher;
|
||||
|
||||
|
@ -499,6 +500,7 @@ public class TestRMAppTransitions {
|
|||
|
||||
RMApp application = testCreateAppAccepted(null);
|
||||
// ACCEPTED => ACCEPTED event RMAppEventType.RMAppEventType.ATTEMPT_FAILED
|
||||
Assert.assertTrue(maxAppAttempts > 1);
|
||||
for (int i=1; i < maxAppAttempts; i++) {
|
||||
RMAppEvent event =
|
||||
new RMAppFailedAttemptEvent(application.getApplicationId(),
|
||||
|
@ -562,6 +564,7 @@ public class TestRMAppTransitions {
|
|||
Assert.assertEquals(expectedAttemptId,
|
||||
appAttempt.getAppAttemptId().getAttemptId());
|
||||
// RUNNING => FAILED/RESTARTING event RMAppEventType.ATTEMPT_FAILED
|
||||
Assert.assertTrue(maxAppAttempts > 1);
|
||||
for (int i=1; i<maxAppAttempts; i++) {
|
||||
RMAppEvent event =
|
||||
new RMAppFailedAttemptEvent(application.getApplicationId(),
|
||||
|
|
|
@ -83,7 +83,8 @@ public class TestRMWebServicesApps extends JerseyTest {
|
|||
bind(RMWebServices.class);
|
||||
bind(GenericExceptionHandler.class);
|
||||
Configuration conf = new Configuration();
|
||||
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
|
||||
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
|
||||
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
|
||||
conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class,
|
||||
ResourceScheduler.class);
|
||||
rm = new MockRM(conf);
|
||||
|
@ -871,8 +872,10 @@ public class TestRMWebServicesApps extends JerseyTest {
|
|||
MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
|
||||
RMApp app1 = rm.submitApp(1024, "testwordcount", "user1");
|
||||
amNodeManager.nodeHeartbeat(true);
|
||||
int maxAppAttempts = rm.getConfig().getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
|
||||
int maxAppAttempts = rm.getConfig().getInt(
|
||||
YarnConfiguration.RM_AM_MAX_ATTEMPTS,
|
||||
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
|
||||
assertTrue(maxAppAttempts > 1);
|
||||
int retriesLeft = maxAppAttempts;
|
||||
while (--retriesLeft > 0) {
|
||||
RMAppEvent event =
|
||||
|
|
Loading…
Reference in New Issue