YARN-542. Changed the default global AM max-attempts value to be not one. Contributed by Zhijie Shen.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1470094 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c570309b07
commit
a91067fc5e
|
@ -164,6 +164,9 @@ Release 2.0.5-beta - UNRELEASED
|
||||||
YARN-586. Fixed a typo in ApplicationSubmissionContext#setApplicationId.
|
YARN-586. Fixed a typo in ApplicationSubmissionContext#setApplicationId.
|
||||||
(Zhijie Shen via vinodkv)
|
(Zhijie Shen via vinodkv)
|
||||||
|
|
||||||
|
YARN-542. Changed the default global AM max-attempts value to be not one.
|
||||||
|
(Zhijie Shen via vinodkv)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
|
@ -186,7 +186,7 @@ public class YarnConfiguration extends Configuration {
|
||||||
*/
|
*/
|
||||||
public static final String RM_AM_MAX_ATTEMPTS =
|
public static final String RM_AM_MAX_ATTEMPTS =
|
||||||
RM_PREFIX + "am.max-attempts";
|
RM_PREFIX + "am.max-attempts";
|
||||||
public static final int DEFAULT_RM_AM_MAX_ATTEMPTS = 1;
|
public static final int DEFAULT_RM_AM_MAX_ATTEMPTS = 2;
|
||||||
|
|
||||||
/** The keytab for the resource manager.*/
|
/** The keytab for the resource manager.*/
|
||||||
public static final String RM_KEYTAB =
|
public static final String RM_KEYTAB =
|
||||||
|
|
|
@ -145,9 +145,10 @@
|
||||||
setting for all application masters. Each application master can specify
|
setting for all application masters. Each application master can specify
|
||||||
its individual maximum number of application attempts via the API, but the
|
its individual maximum number of application attempts via the API, but the
|
||||||
individual number cannot be more than the global upper bound. If it is,
|
individual number cannot be more than the global upper bound. If it is,
|
||||||
the resourcemanager will override it.</description>
|
the resourcemanager will override it. The default number is set to 2, to
|
||||||
|
allow at least one retry for AM.</description>
|
||||||
<name>yarn.resourcemanager.am.max-attempts</name>
|
<name>yarn.resourcemanager.am.max-attempts</name>
|
||||||
<value>1</value>
|
<value>2</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
|
|
|
@ -64,7 +64,9 @@ public class TestRMRestart {
|
||||||
"org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore");
|
"org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore");
|
||||||
conf.set(YarnConfiguration.RM_SCHEDULER,
|
conf.set(YarnConfiguration.RM_SCHEDULER,
|
||||||
"org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler");
|
"org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler");
|
||||||
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 5);
|
Assert.assertTrue(YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS > 1);
|
||||||
|
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
|
||||||
|
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
|
||||||
|
|
||||||
MemoryRMStateStore memStore = new MemoryRMStateStore();
|
MemoryRMStateStore memStore = new MemoryRMStateStore();
|
||||||
memStore.init(conf);
|
memStore.init(conf);
|
||||||
|
@ -321,7 +323,9 @@ public class TestRMRestart {
|
||||||
conf.set(YarnConfiguration.RECOVERY_ENABLED, "true");
|
conf.set(YarnConfiguration.RECOVERY_ENABLED, "true");
|
||||||
conf.set(YarnConfiguration.RM_STORE,
|
conf.set(YarnConfiguration.RM_STORE,
|
||||||
"org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore");
|
"org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore");
|
||||||
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
|
Assert.assertTrue(YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS > 1);
|
||||||
|
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
|
||||||
|
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
|
||||||
|
|
||||||
MemoryRMStateStore memStore = new MemoryRMStateStore();
|
MemoryRMStateStore memStore = new MemoryRMStateStore();
|
||||||
memStore.init(conf);
|
memStore.init(conf);
|
||||||
|
|
|
@ -62,7 +62,8 @@ public class TestRMAppTransitions {
|
||||||
static final Log LOG = LogFactory.getLog(TestRMAppTransitions.class);
|
static final Log LOG = LogFactory.getLog(TestRMAppTransitions.class);
|
||||||
|
|
||||||
private RMContext rmContext;
|
private RMContext rmContext;
|
||||||
private static int maxAppAttempts = 4;
|
private static int maxAppAttempts =
|
||||||
|
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS;
|
||||||
private static int appId = 1;
|
private static int appId = 1;
|
||||||
private DrainDispatcher rmDispatcher;
|
private DrainDispatcher rmDispatcher;
|
||||||
|
|
||||||
|
@ -499,6 +500,7 @@ public class TestRMAppTransitions {
|
||||||
|
|
||||||
RMApp application = testCreateAppAccepted(null);
|
RMApp application = testCreateAppAccepted(null);
|
||||||
// ACCEPTED => ACCEPTED event RMAppEventType.RMAppEventType.ATTEMPT_FAILED
|
// ACCEPTED => ACCEPTED event RMAppEventType.RMAppEventType.ATTEMPT_FAILED
|
||||||
|
Assert.assertTrue(maxAppAttempts > 1);
|
||||||
for (int i=1; i < maxAppAttempts; i++) {
|
for (int i=1; i < maxAppAttempts; i++) {
|
||||||
RMAppEvent event =
|
RMAppEvent event =
|
||||||
new RMAppFailedAttemptEvent(application.getApplicationId(),
|
new RMAppFailedAttemptEvent(application.getApplicationId(),
|
||||||
|
@ -562,6 +564,7 @@ public class TestRMAppTransitions {
|
||||||
Assert.assertEquals(expectedAttemptId,
|
Assert.assertEquals(expectedAttemptId,
|
||||||
appAttempt.getAppAttemptId().getAttemptId());
|
appAttempt.getAppAttemptId().getAttemptId());
|
||||||
// RUNNING => FAILED/RESTARTING event RMAppEventType.ATTEMPT_FAILED
|
// RUNNING => FAILED/RESTARTING event RMAppEventType.ATTEMPT_FAILED
|
||||||
|
Assert.assertTrue(maxAppAttempts > 1);
|
||||||
for (int i=1; i<maxAppAttempts; i++) {
|
for (int i=1; i<maxAppAttempts; i++) {
|
||||||
RMAppEvent event =
|
RMAppEvent event =
|
||||||
new RMAppFailedAttemptEvent(application.getApplicationId(),
|
new RMAppFailedAttemptEvent(application.getApplicationId(),
|
||||||
|
|
|
@ -83,7 +83,8 @@ public class TestRMWebServicesApps extends JerseyTest {
|
||||||
bind(RMWebServices.class);
|
bind(RMWebServices.class);
|
||||||
bind(GenericExceptionHandler.class);
|
bind(GenericExceptionHandler.class);
|
||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
|
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
|
||||||
|
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
|
||||||
conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class,
|
conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class,
|
||||||
ResourceScheduler.class);
|
ResourceScheduler.class);
|
||||||
rm = new MockRM(conf);
|
rm = new MockRM(conf);
|
||||||
|
@ -871,8 +872,10 @@ public class TestRMWebServicesApps extends JerseyTest {
|
||||||
MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
|
MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
|
||||||
RMApp app1 = rm.submitApp(1024, "testwordcount", "user1");
|
RMApp app1 = rm.submitApp(1024, "testwordcount", "user1");
|
||||||
amNodeManager.nodeHeartbeat(true);
|
amNodeManager.nodeHeartbeat(true);
|
||||||
int maxAppAttempts = rm.getConfig().getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
|
int maxAppAttempts = rm.getConfig().getInt(
|
||||||
|
YarnConfiguration.RM_AM_MAX_ATTEMPTS,
|
||||||
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
|
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
|
||||||
|
assertTrue(maxAppAttempts > 1);
|
||||||
int retriesLeft = maxAppAttempts;
|
int retriesLeft = maxAppAttempts;
|
||||||
while (--retriesLeft > 0) {
|
while (--retriesLeft > 0) {
|
||||||
RMAppEvent event =
|
RMAppEvent event =
|
||||||
|
|
Loading…
Reference in New Issue