MAPREDUCE-6776. yarn.app.mapreduce.client.job.max-retries should have a more useful default (miklos.szegedi@cloudera.com via rkanter)

(cherry picked from commit f3f37e6fb8)
This commit is contained in:
Robert Kanter 2016-10-07 14:47:06 -07:00
parent b9761f2fc9
commit 48ca6be76e
3 changed files with 35 additions and 13 deletions

View File

@ -502,7 +502,7 @@ public interface MRJobConfig {
*/
public static final String MR_CLIENT_JOB_MAX_RETRIES =
MR_PREFIX + "client.job.max-retries";
public static final int DEFAULT_MR_CLIENT_JOB_MAX_RETRIES = 0;
public static final int DEFAULT_MR_CLIENT_JOB_MAX_RETRIES = 3;
/**
* How long to wait between jobclient retries on failure

View File

@ -1502,12 +1502,12 @@
<property>
<name>yarn.app.mapreduce.client.job.max-retries</name>
<value>0</value>
<value>3</value>
<description>The number of retries the client will make for getJob and
dependent calls. The default is 0 as this is generally only needed for
non-HDFS DFS where additional, high level retries are required to avoid
spurious failures during the getJob call. 30 is a good value for
WASB</description>
dependent calls.
This is needed for non-HDFS DFS where additional, high level
retries are required to avoid spurious failures during the getJob call.
30 is a good value for WASB</description>
</property>
<property>

View File

@ -225,10 +225,10 @@ public class JobClientUnitTest {
//To prevent the test from running for a very long time, lower the retry
JobConf conf = new JobConf();
conf.set(MRJobConfig.MR_CLIENT_JOB_MAX_RETRIES, "3");
conf.setInt(MRJobConfig.MR_CLIENT_JOB_MAX_RETRIES, 2);
TestJobClientGetJob client = new TestJobClientGetJob(conf);
JobID id = new JobID("ajob",1);
JobID id = new JobID("ajob", 1);
RunningJob rj = mock(RunningJob.class);
client.setRunningJob(rj);
@ -236,13 +236,35 @@ public class JobClientUnitTest {
assertNotNull(client.getJob(id));
assertEquals(client.getLastGetJobRetriesCounter(), 0);
//3 retry
client.setGetJobRetries(3);
//2 retries
client.setGetJobRetries(2);
assertNotNull(client.getJob(id));
assertEquals(client.getLastGetJobRetriesCounter(), 3);
assertEquals(client.getLastGetJobRetriesCounter(), 2);
//beyond MAPREDUCE_JOBCLIENT_GETJOB_MAX_RETRY_KEY, will get null
client.setGetJobRetries(5);
//beyond yarn.app.mapreduce.client.job.max-retries, will get null
client.setGetJobRetries(3);
assertNull(client.getJob(id));
}
@Test
public void testGetJobRetryDefault() throws Exception {
//To prevent the test from running for a very long time, lower the retry
JobConf conf = new JobConf();
TestJobClientGetJob client = new TestJobClientGetJob(conf);
JobID id = new JobID("ajob", 1);
RunningJob rj = mock(RunningJob.class);
client.setRunningJob(rj);
//3 retries (default)
client.setGetJobRetries(MRJobConfig.DEFAULT_MR_CLIENT_JOB_MAX_RETRIES);
assertNotNull(client.getJob(id));
assertEquals(client.getLastGetJobRetriesCounter(),
MRJobConfig.DEFAULT_MR_CLIENT_JOB_MAX_RETRIES);
//beyond yarn.app.mapreduce.client.job.max-retries, will get null
client.setGetJobRetries(MRJobConfig.DEFAULT_MR_CLIENT_JOB_MAX_RETRIES + 1);
assertNull(client.getJob(id));
}