MAPREDUCE-6776. yarn.app.mapreduce.client.job.max-retries should have a more useful default (miklos.szegedi@cloudera.com via rkanter)
(cherry picked from commit f3f37e6fb8
)
This commit is contained in:
parent
b9761f2fc9
commit
48ca6be76e
|
@ -502,7 +502,7 @@ public interface MRJobConfig {
|
|||
*/
|
||||
public static final String MR_CLIENT_JOB_MAX_RETRIES =
|
||||
MR_PREFIX + "client.job.max-retries";
|
||||
public static final int DEFAULT_MR_CLIENT_JOB_MAX_RETRIES = 0;
|
||||
public static final int DEFAULT_MR_CLIENT_JOB_MAX_RETRIES = 3;
|
||||
|
||||
/**
|
||||
* How long to wait between jobclient retries on failure
|
||||
|
|
|
@ -1502,12 +1502,12 @@
|
|||
|
||||
<property>
|
||||
<name>yarn.app.mapreduce.client.job.max-retries</name>
|
||||
<value>0</value>
|
||||
<value>3</value>
|
||||
<description>The number of retries the client will make for getJob and
|
||||
dependent calls. The default is 0 as this is generally only needed for
|
||||
non-HDFS DFS where additional, high level retries are required to avoid
|
||||
spurious failures during the getJob call. 30 is a good value for
|
||||
WASB</description>
|
||||
dependent calls.
|
||||
This is needed for non-HDFS DFS where additional, high level
|
||||
retries are required to avoid spurious failures during the getJob call.
|
||||
30 is a good value for WASB</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
|
|
|
@ -225,7 +225,7 @@ public class JobClientUnitTest {
|
|||
|
||||
//To prevent the test from running for a very long time, lower the retry
|
||||
JobConf conf = new JobConf();
|
||||
conf.set(MRJobConfig.MR_CLIENT_JOB_MAX_RETRIES, "3");
|
||||
conf.setInt(MRJobConfig.MR_CLIENT_JOB_MAX_RETRIES, 2);
|
||||
|
||||
TestJobClientGetJob client = new TestJobClientGetJob(conf);
|
||||
JobID id = new JobID("ajob", 1);
|
||||
|
@ -236,13 +236,35 @@ public class JobClientUnitTest {
|
|||
assertNotNull(client.getJob(id));
|
||||
assertEquals(client.getLastGetJobRetriesCounter(), 0);
|
||||
|
||||
//3 retry
|
||||
client.setGetJobRetries(3);
|
||||
//2 retries
|
||||
client.setGetJobRetries(2);
|
||||
assertNotNull(client.getJob(id));
|
||||
assertEquals(client.getLastGetJobRetriesCounter(), 3);
|
||||
assertEquals(client.getLastGetJobRetriesCounter(), 2);
|
||||
|
||||
//beyond MAPREDUCE_JOBCLIENT_GETJOB_MAX_RETRY_KEY, will get null
|
||||
client.setGetJobRetries(5);
|
||||
//beyond yarn.app.mapreduce.client.job.max-retries, will get null
|
||||
client.setGetJobRetries(3);
|
||||
assertNull(client.getJob(id));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetJobRetryDefault() throws Exception {
|
||||
|
||||
//To prevent the test from running for a very long time, lower the retry
|
||||
JobConf conf = new JobConf();
|
||||
|
||||
TestJobClientGetJob client = new TestJobClientGetJob(conf);
|
||||
JobID id = new JobID("ajob", 1);
|
||||
RunningJob rj = mock(RunningJob.class);
|
||||
client.setRunningJob(rj);
|
||||
|
||||
//3 retries (default)
|
||||
client.setGetJobRetries(MRJobConfig.DEFAULT_MR_CLIENT_JOB_MAX_RETRIES);
|
||||
assertNotNull(client.getJob(id));
|
||||
assertEquals(client.getLastGetJobRetriesCounter(),
|
||||
MRJobConfig.DEFAULT_MR_CLIENT_JOB_MAX_RETRIES);
|
||||
|
||||
//beyond yarn.app.mapreduce.client.job.max-retries, will get null
|
||||
client.setGetJobRetries(MRJobConfig.DEFAULT_MR_CLIENT_JOB_MAX_RETRIES + 1);
|
||||
assertNull(client.getJob(id));
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue