diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index 0547c3b3b70..1663eee16c5 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -21,3 +21,6 @@ HADOOP-7928. HA: Client failover policy is incorrectly trying to fail over all
HADOOP-7925. Add interface and update CLI to query current state to
HAServiceProtocol (eli via todd)
+
+HADOOP-7932. Make client connection retries on socket time outs configurable.
+ (Uma Maheswara Rao G via todd)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java
index 3ea4ed70c2f..534046a9abb 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java
@@ -172,6 +172,11 @@ public class CommonConfigurationKeysPublic {
/** Default value for IPC_CLIENT_CONNECT_MAX_RETRIES_KEY */
public static final int IPC_CLIENT_CONNECT_MAX_RETRIES_DEFAULT = 10;
/** See core-default.xml */
+ public static final String IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY =
+ "ipc.client.connect.max.retries.on.timeouts";
+ /** Default value for IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY */
+ public static final int IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT = 45;
+ /** See core-default.xml */
public static final String IPC_CLIENT_TCPNODELAY_KEY =
"ipc.client.tcpnodelay";
/** Defalt value for IPC_CLIENT_TCPNODELAY_KEY */
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
index a6c2b472825..c6364939111 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
@@ -48,6 +48,7 @@ import org.apache.commons.logging.*;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.ipc.RpcPayloadHeader.*;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.Text;
@@ -224,6 +225,8 @@ public class Client {
private int maxIdleTime; //connections will be culled if it was idle for
//maxIdleTime msecs
private int maxRetries; //the max. no. of retries for socket connections
+ // the max. no. of retries for socket connections on time out exceptions
+ private int maxRetriesOnSocketTimeouts;
private boolean tcpNoDelay; // if T then disable Nagle's Algorithm
private boolean doPing; //do we need to send ping message
private int pingInterval; // how often sends ping to the server in msecs
@@ -247,6 +250,7 @@ public class Client {
this.rpcTimeout = remoteId.getRpcTimeout();
this.maxIdleTime = remoteId.getMaxIdleTime();
this.maxRetries = remoteId.getMaxRetries();
+ this.maxRetriesOnSocketTimeouts = remoteId.getMaxRetriesOnSocketTimeouts();
this.tcpNoDelay = remoteId.getTcpNoDelay();
this.doPing = remoteId.getDoPing();
this.pingInterval = remoteId.getPingInterval();
@@ -475,11 +479,8 @@ public class Client {
if (updateAddress()) {
timeoutFailures = ioFailures = 0;
}
- /*
- * The max number of retries is 45, which amounts to 20s*45 = 15
- * minutes retries.
- */
- handleConnectionFailure(timeoutFailures++, 45, toe);
+ handleConnectionFailure(timeoutFailures++,
+ maxRetriesOnSocketTimeouts, toe);
} catch (IOException ie) {
if (updateAddress()) {
timeoutFailures = ioFailures = 0;
@@ -1263,6 +1264,8 @@ public class Client {
private int maxIdleTime; //connections will be culled if it was idle for
//maxIdleTime msecs
private int maxRetries; //the max. no. of retries for socket connections
+ // the max. no. of retries for socket connections on time out exceptions
+ private int maxRetriesOnSocketTimeouts;
private boolean tcpNoDelay; // if T then disable Nagle's Algorithm
private boolean doPing; //do we need to send ping message
private int pingInterval; // how often sends ping to the server in msecs
@@ -1270,8 +1273,8 @@ public class Client {
ConnectionId(InetSocketAddress address, Class> protocol,
UserGroupInformation ticket, int rpcTimeout,
String serverPrincipal, int maxIdleTime,
- int maxRetries, boolean tcpNoDelay,
- boolean doPing, int pingInterval) {
+ int maxRetries, int maxRetriesOnSocketTimeouts,
+ boolean tcpNoDelay, boolean doPing, int pingInterval) {
this.protocol = protocol;
this.address = address;
this.ticket = ticket;
@@ -1279,6 +1282,7 @@ public class Client {
this.serverPrincipal = serverPrincipal;
this.maxIdleTime = maxIdleTime;
this.maxRetries = maxRetries;
+ this.maxRetriesOnSocketTimeouts = maxRetriesOnSocketTimeouts;
this.tcpNoDelay = tcpNoDelay;
this.doPing = doPing;
this.pingInterval = pingInterval;
@@ -1312,6 +1316,11 @@ public class Client {
return maxRetries;
}
+ /** max connection retries on socket time outs */
+ public int getMaxRetriesOnSocketTimeouts() {
+ return maxRetriesOnSocketTimeouts;
+ }
+
boolean getTcpNoDelay() {
return tcpNoDelay;
}
@@ -1343,6 +1352,9 @@ public class Client {
rpcTimeout, remotePrincipal,
conf.getInt("ipc.client.connection.maxidletime", 10000), // 10s
conf.getInt("ipc.client.connect.max.retries", 10),
+ conf.getInt(
+ CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
+ CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT),
conf.getBoolean("ipc.client.tcpnodelay", false),
doPing,
(doPing ? Client.getPingInterval(conf) : 0));
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java
index 1f3e67a4f9a..efb2dc1126d 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java
@@ -20,7 +20,9 @@ package org.apache.hadoop.ipc;
import org.apache.commons.logging.*;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind;
@@ -590,6 +592,38 @@ public class TestIPC {
Server.RECEIVED_HTTP_REQ_RESPONSE.getBytes());
}
+ @Test
+ public void testConnectionRetriesOnSocketTimeoutExceptions() throws Exception {
+ Configuration conf = new Configuration();
+ // set max retries to 0
+ conf.setInt(
+ CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
+ 0);
+ assertRetriesOnSocketTimeouts(conf, 1);
+
+ // set max retries to 3
+ conf.setInt(
+ CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
+ 3);
+ assertRetriesOnSocketTimeouts(conf, 4);
+ }
+
+ private void assertRetriesOnSocketTimeouts(Configuration conf,
+ int maxTimeoutRetries) throws IOException, InterruptedException {
+ SocketFactory mockFactory = Mockito.mock(SocketFactory.class);
+ doThrow(new SocketTimeoutException()).when(mockFactory).createSocket();
+ Client client = new Client(IntWritable.class, conf, mockFactory);
+ InetSocketAddress address = new InetSocketAddress("127.0.0.1", 9090);
+ try {
+ client.call(new IntWritable(RANDOM.nextInt()), address, null, null, 0,
+ conf);
+ fail("Not throwing the SocketTimeoutException");
+ } catch (SocketTimeoutException e) {
+ Mockito.verify(mockFactory, Mockito.times(maxTimeoutRetries))
+ .createSocket();
+ }
+ }
+
private void doIpcVersionTest(
byte[] requestData,
byte[] expectedResponse) throws Exception {