diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 0bff2f228e5..b7aeb591e76 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -217,6 +217,9 @@ Release 2.3.0 - UNRELEASED MAPREDUCE-5610. TestSleepJob fails in jdk7 (Jonathan Eagles via jlowe) + MAPREDUCE-5616. MR Client-AppMaster RPC max retries on socket timeout is too + high. (cnauroth) + Release 2.2.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index 76097de896f..2622ec5da66 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -357,13 +357,21 @@ public interface MRJobConfig { public static final String MR_AM_PREFIX = MR_PREFIX + "am."; - /** The number of client retires to the AM - before reconnecting to the RM + /** The number of client retries to the AM - before reconnecting to the RM * to fetch Application State. */ public static final String MR_CLIENT_TO_AM_IPC_MAX_RETRIES = MR_PREFIX + "client-am.ipc.max-retries"; public static final int DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES = 3; + /** The number of client retries on socket timeouts to the AM - before + * reconnecting to the RM to fetch Application Status. + */ + public static final String MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS = + MR_PREFIX + "yarn.app.mapreduce.client-am.ipc.max-retries-on-timeouts"; + public static final int + DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS = 3; + /** * The number of client retries to the RM/HS before throwing exception. */ diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index 4a228820b23..fcaa275cc1a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -1010,6 +1010,13 @@ to the RM to fetch Application Status. + + yarn.app.mapreduce.client-am.ipc.max-retries-on-timeouts + 3 + The number of client retries on socket timeouts to the AM - before + reconnecting to the RM to fetch Application Status. + + yarn.app.mapreduce.client.max-retries 3 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java index d9dd777efa5..c3eee2cfdcf 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java @@ -107,6 +107,10 @@ public class ClientServiceDelegate { CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, this.conf.getInt(MRJobConfig.MR_CLIENT_TO_AM_IPC_MAX_RETRIES, MRJobConfig.DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES)); + this.conf.setInt( + CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, + this.conf.getInt(MRJobConfig.MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS, + MRJobConfig.DEFAULT_MR_CLIENT_TO_AM_IPC_MAX_RETRIES_ON_TIMEOUTS)); this.rm = rm; this.jobId = jobId; this.historyServerProxy = historyServerProxy;