From 48ca6be76e711554631d9babb2ebf74e5adc0f50 Mon Sep 17 00:00:00 2001 From: Robert Kanter Date: Fri, 7 Oct 2016 14:47:06 -0700 Subject: [PATCH] MAPREDUCE-6776. yarn.app.mapreduce.client.job.max-retries should have a more useful default (miklos.szegedi@cloudera.com via rkanter) (cherry picked from commit f3f37e6fb8172f6434e06eb9a137c0c155b3952e) --- .../apache/hadoop/mapreduce/MRJobConfig.java | 2 +- .../src/main/resources/mapred-default.xml | 10 +++--- .../hadoop/mapred/JobClientUnitTest.java | 36 +++++++++++++++---- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index 60d357f7612..e880670d191 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -502,7 +502,7 @@ public interface MRJobConfig { */ public static final String MR_CLIENT_JOB_MAX_RETRIES = MR_PREFIX + "client.job.max-retries"; - public static final int DEFAULT_MR_CLIENT_JOB_MAX_RETRIES = 0; + public static final int DEFAULT_MR_CLIENT_JOB_MAX_RETRIES = 3; /** * How long to wait between jobclient retries on failure diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index 4669a2e1ff7..84f054e03d1 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -1502,12 +1502,12 @@ yarn.app.mapreduce.client.job.max-retries - 0 + 3 The number of retries the client will make for getJob and - dependent calls. The default is 0 as this is generally only needed for - non-HDFS DFS where additional, high level retries are required to avoid - spurious failures during the getJob call. 30 is a good value for - WASB + dependent calls. + This is needed for non-HDFS DFS where additional, high level + retries are required to avoid spurious failures during the getJob call. + 30 is a good value for WASB diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/JobClientUnitTest.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/JobClientUnitTest.java index 84b76bfbcf6..b5edf2d0b9c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/JobClientUnitTest.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/JobClientUnitTest.java @@ -225,10 +225,10 @@ public void testGetJobRetry() throws Exception { //To prevent the test from running for a very long time, lower the retry JobConf conf = new JobConf(); - conf.set(MRJobConfig.MR_CLIENT_JOB_MAX_RETRIES, "3"); + conf.setInt(MRJobConfig.MR_CLIENT_JOB_MAX_RETRIES, 2); TestJobClientGetJob client = new TestJobClientGetJob(conf); - JobID id = new JobID("ajob",1); + JobID id = new JobID("ajob", 1); RunningJob rj = mock(RunningJob.class); client.setRunningJob(rj); @@ -236,13 +236,35 @@ public void testGetJobRetry() throws Exception { assertNotNull(client.getJob(id)); assertEquals(client.getLastGetJobRetriesCounter(), 0); - //3 retry - client.setGetJobRetries(3); + //2 retries + client.setGetJobRetries(2); assertNotNull(client.getJob(id)); - assertEquals(client.getLastGetJobRetriesCounter(), 3); + assertEquals(client.getLastGetJobRetriesCounter(), 2); - //beyond MAPREDUCE_JOBCLIENT_GETJOB_MAX_RETRY_KEY, will get null - client.setGetJobRetries(5); + //beyond yarn.app.mapreduce.client.job.max-retries, will get null + client.setGetJobRetries(3); + assertNull(client.getJob(id)); + } + + @Test + public void testGetJobRetryDefault() throws Exception { + + //To prevent the test from running for a very long time, lower the retry + JobConf conf = new JobConf(); + + TestJobClientGetJob client = new TestJobClientGetJob(conf); + JobID id = new JobID("ajob", 1); + RunningJob rj = mock(RunningJob.class); + client.setRunningJob(rj); + + //3 retries (default) + client.setGetJobRetries(MRJobConfig.DEFAULT_MR_CLIENT_JOB_MAX_RETRIES); + assertNotNull(client.getJob(id)); + assertEquals(client.getLastGetJobRetriesCounter(), + MRJobConfig.DEFAULT_MR_CLIENT_JOB_MAX_RETRIES); + + //beyond yarn.app.mapreduce.client.job.max-retries, will get null + client.setGetJobRetries(MRJobConfig.DEFAULT_MR_CLIENT_JOB_MAX_RETRIES + 1); assertNull(client.getJob(id)); }