MAPREDUCE-6776. yarn.app.mapreduce.client.job.max-retries should have a more useful default (miklos.szegedi@cloudera.com via rkanter)
(cherry picked from commit f3f37e6fb8
)
This commit is contained in:
parent
b9761f2fc9
commit
48ca6be76e
|
@ -502,7 +502,7 @@ public interface MRJobConfig {
|
||||||
*/
|
*/
|
||||||
public static final String MR_CLIENT_JOB_MAX_RETRIES =
|
public static final String MR_CLIENT_JOB_MAX_RETRIES =
|
||||||
MR_PREFIX + "client.job.max-retries";
|
MR_PREFIX + "client.job.max-retries";
|
||||||
public static final int DEFAULT_MR_CLIENT_JOB_MAX_RETRIES = 0;
|
public static final int DEFAULT_MR_CLIENT_JOB_MAX_RETRIES = 3;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* How long to wait between jobclient retries on failure
|
* How long to wait between jobclient retries on failure
|
||||||
|
|
|
@ -1502,12 +1502,12 @@
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>yarn.app.mapreduce.client.job.max-retries</name>
|
<name>yarn.app.mapreduce.client.job.max-retries</name>
|
||||||
<value>0</value>
|
<value>3</value>
|
||||||
<description>The number of retries the client will make for getJob and
|
<description>The number of retries the client will make for getJob and
|
||||||
dependent calls. The default is 0 as this is generally only needed for
|
dependent calls.
|
||||||
non-HDFS DFS where additional, high level retries are required to avoid
|
This is needed for non-HDFS DFS where additional, high level
|
||||||
spurious failures during the getJob call. 30 is a good value for
|
retries are required to avoid spurious failures during the getJob call.
|
||||||
WASB</description>
|
30 is a good value for WASB</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
|
|
|
@ -225,7 +225,7 @@ public class JobClientUnitTest {
|
||||||
|
|
||||||
//To prevent the test from running for a very long time, lower the retry
|
//To prevent the test from running for a very long time, lower the retry
|
||||||
JobConf conf = new JobConf();
|
JobConf conf = new JobConf();
|
||||||
conf.set(MRJobConfig.MR_CLIENT_JOB_MAX_RETRIES, "3");
|
conf.setInt(MRJobConfig.MR_CLIENT_JOB_MAX_RETRIES, 2);
|
||||||
|
|
||||||
TestJobClientGetJob client = new TestJobClientGetJob(conf);
|
TestJobClientGetJob client = new TestJobClientGetJob(conf);
|
||||||
JobID id = new JobID("ajob", 1);
|
JobID id = new JobID("ajob", 1);
|
||||||
|
@ -236,13 +236,35 @@ public class JobClientUnitTest {
|
||||||
assertNotNull(client.getJob(id));
|
assertNotNull(client.getJob(id));
|
||||||
assertEquals(client.getLastGetJobRetriesCounter(), 0);
|
assertEquals(client.getLastGetJobRetriesCounter(), 0);
|
||||||
|
|
||||||
//3 retry
|
//2 retries
|
||||||
client.setGetJobRetries(3);
|
client.setGetJobRetries(2);
|
||||||
assertNotNull(client.getJob(id));
|
assertNotNull(client.getJob(id));
|
||||||
assertEquals(client.getLastGetJobRetriesCounter(), 3);
|
assertEquals(client.getLastGetJobRetriesCounter(), 2);
|
||||||
|
|
||||||
//beyond MAPREDUCE_JOBCLIENT_GETJOB_MAX_RETRY_KEY, will get null
|
//beyond yarn.app.mapreduce.client.job.max-retries, will get null
|
||||||
client.setGetJobRetries(5);
|
client.setGetJobRetries(3);
|
||||||
|
assertNull(client.getJob(id));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetJobRetryDefault() throws Exception {
|
||||||
|
|
||||||
|
//To prevent the test from running for a very long time, lower the retry
|
||||||
|
JobConf conf = new JobConf();
|
||||||
|
|
||||||
|
TestJobClientGetJob client = new TestJobClientGetJob(conf);
|
||||||
|
JobID id = new JobID("ajob", 1);
|
||||||
|
RunningJob rj = mock(RunningJob.class);
|
||||||
|
client.setRunningJob(rj);
|
||||||
|
|
||||||
|
//3 retries (default)
|
||||||
|
client.setGetJobRetries(MRJobConfig.DEFAULT_MR_CLIENT_JOB_MAX_RETRIES);
|
||||||
|
assertNotNull(client.getJob(id));
|
||||||
|
assertEquals(client.getLastGetJobRetriesCounter(),
|
||||||
|
MRJobConfig.DEFAULT_MR_CLIENT_JOB_MAX_RETRIES);
|
||||||
|
|
||||||
|
//beyond yarn.app.mapreduce.client.job.max-retries, will get null
|
||||||
|
client.setGetJobRetries(MRJobConfig.DEFAULT_MR_CLIENT_JOB_MAX_RETRIES + 1);
|
||||||
assertNull(client.getJob(id));
|
assertNull(client.getJob(id));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue