diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt index 0547c3b3b70..1663eee16c5 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt @@ -21,3 +21,6 @@ HADOOP-7928. HA: Client failover policy is incorrectly trying to fail over all HADOOP-7925. Add interface and update CLI to query current state to HAServiceProtocol (eli via todd) + +HADOOP-7932. Make client connection retries on socket time outs configurable. + (Uma Maheswara Rao G via todd) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java index 3ea4ed70c2f..534046a9abb 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java @@ -172,6 +172,11 @@ public class CommonConfigurationKeysPublic { /** Default value for IPC_CLIENT_CONNECT_MAX_RETRIES_KEY */ public static final int IPC_CLIENT_CONNECT_MAX_RETRIES_DEFAULT = 10; /** See core-default.xml */ + public static final String IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY = + "ipc.client.connect.max.retries.on.timeouts"; + /** Default value for IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY */ + public static final int IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT = 45; + /** See core-default.xml */ public static final String IPC_CLIENT_TCPNODELAY_KEY = "ipc.client.tcpnodelay"; /** Defalt value for IPC_CLIENT_TCPNODELAY_KEY */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java index a6c2b472825..c6364939111 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java @@ -48,6 +48,7 @@ import org.apache.commons.logging.*; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.ipc.RpcPayloadHeader.*; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.Text; @@ -224,6 +225,8 @@ public class Client { private int maxIdleTime; //connections will be culled if it was idle for //maxIdleTime msecs private int maxRetries; //the max. no. of retries for socket connections + // the max. no. of retries for socket connections on time out exceptions + private int maxRetriesOnSocketTimeouts; private boolean tcpNoDelay; // if T then disable Nagle's Algorithm private boolean doPing; //do we need to send ping message private int pingInterval; // how often sends ping to the server in msecs @@ -247,6 +250,7 @@ public class Client { this.rpcTimeout = remoteId.getRpcTimeout(); this.maxIdleTime = remoteId.getMaxIdleTime(); this.maxRetries = remoteId.getMaxRetries(); + this.maxRetriesOnSocketTimeouts = remoteId.getMaxRetriesOnSocketTimeouts(); this.tcpNoDelay = remoteId.getTcpNoDelay(); this.doPing = remoteId.getDoPing(); this.pingInterval = remoteId.getPingInterval(); @@ -475,11 +479,8 @@ public class Client { if (updateAddress()) { timeoutFailures = ioFailures = 0; } - /* - * The max number of retries is 45, which amounts to 20s*45 = 15 - * minutes retries. - */ - handleConnectionFailure(timeoutFailures++, 45, toe); + handleConnectionFailure(timeoutFailures++, + maxRetriesOnSocketTimeouts, toe); } catch (IOException ie) { if (updateAddress()) { timeoutFailures = ioFailures = 0; @@ -1263,6 +1264,8 @@ public class Client { private int maxIdleTime; //connections will be culled if it was idle for //maxIdleTime msecs private int maxRetries; //the max. no. of retries for socket connections + // the max. no. of retries for socket connections on time out exceptions + private int maxRetriesOnSocketTimeouts; private boolean tcpNoDelay; // if T then disable Nagle's Algorithm private boolean doPing; //do we need to send ping message private int pingInterval; // how often sends ping to the server in msecs @@ -1270,8 +1273,8 @@ public class Client { ConnectionId(InetSocketAddress address, Class protocol, UserGroupInformation ticket, int rpcTimeout, String serverPrincipal, int maxIdleTime, - int maxRetries, boolean tcpNoDelay, - boolean doPing, int pingInterval) { + int maxRetries, int maxRetriesOnSocketTimeouts, + boolean tcpNoDelay, boolean doPing, int pingInterval) { this.protocol = protocol; this.address = address; this.ticket = ticket; @@ -1279,6 +1282,7 @@ public class Client { this.serverPrincipal = serverPrincipal; this.maxIdleTime = maxIdleTime; this.maxRetries = maxRetries; + this.maxRetriesOnSocketTimeouts = maxRetriesOnSocketTimeouts; this.tcpNoDelay = tcpNoDelay; this.doPing = doPing; this.pingInterval = pingInterval; @@ -1312,6 +1316,11 @@ public class Client { return maxRetries; } + /** max connection retries on socket time outs */ + public int getMaxRetriesOnSocketTimeouts() { + return maxRetriesOnSocketTimeouts; + } + boolean getTcpNoDelay() { return tcpNoDelay; } @@ -1343,6 +1352,9 @@ public class Client { rpcTimeout, remotePrincipal, conf.getInt("ipc.client.connection.maxidletime", 10000), // 10s conf.getInt("ipc.client.connect.max.retries", 10), + conf.getInt( + CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, + CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT), conf.getBoolean("ipc.client.tcpnodelay", false), doPing, (doPing ? Client.getPingInterval(conf) : 0)); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java index 1f3e67a4f9a..efb2dc1126d 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java @@ -20,7 +20,9 @@ package org.apache.hadoop.ipc; import org.apache.commons.logging.*; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind; @@ -590,6 +592,38 @@ public class TestIPC { Server.RECEIVED_HTTP_REQ_RESPONSE.getBytes()); } + @Test + public void testConnectionRetriesOnSocketTimeoutExceptions() throws Exception { + Configuration conf = new Configuration(); + // set max retries to 0 + conf.setInt( + CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, + 0); + assertRetriesOnSocketTimeouts(conf, 1); + + // set max retries to 3 + conf.setInt( + CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, + 3); + assertRetriesOnSocketTimeouts(conf, 4); + } + + private void assertRetriesOnSocketTimeouts(Configuration conf, + int maxTimeoutRetries) throws IOException, InterruptedException { + SocketFactory mockFactory = Mockito.mock(SocketFactory.class); + doThrow(new SocketTimeoutException()).when(mockFactory).createSocket(); + Client client = new Client(IntWritable.class, conf, mockFactory); + InetSocketAddress address = new InetSocketAddress("127.0.0.1", 9090); + try { + client.call(new IntWritable(RANDOM.nextInt()), address, null, null, 0, + conf); + fail("Not throwing the SocketTimeoutException"); + } catch (SocketTimeoutException e) { + Mockito.verify(mockFactory, Mockito.times(maxTimeoutRetries)) + .createSocket(); + } + } + private void doIpcVersionTest( byte[] requestData, byte[] expectedResponse) throws Exception {