HDFS-3504. Support configurable retry policy in DFSClient for RPC connections and RPC calls, and add MultipleLinearRandomRetry, a new retry policy.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1349124 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tsz-wo Sze 2012-06-12 05:27:15 +00:00
parent 8879653ab4
commit 45fafc2b8f
16 changed files with 676 additions and 121 deletions

View File

@ -33,7 +33,7 @@ import org.apache.hadoop.ipc.RpcInvocationHandler;
class RetryInvocationHandler implements RpcInvocationHandler { class RetryInvocationHandler implements RpcInvocationHandler {
public static final Log LOG = LogFactory.getLog(RetryInvocationHandler.class); public static final Log LOG = LogFactory.getLog(RetryInvocationHandler.class);
private FailoverProxyProvider proxyProvider; private final FailoverProxyProvider proxyProvider;
/** /**
* The number of times the associated proxyProvider has ever been failed over. * The number of times the associated proxyProvider has ever been failed over.
@ -41,26 +41,25 @@ class RetryInvocationHandler implements RpcInvocationHandler {
private long proxyProviderFailoverCount = 0; private long proxyProviderFailoverCount = 0;
private volatile boolean hasMadeASuccessfulCall = false; private volatile boolean hasMadeASuccessfulCall = false;
private RetryPolicy defaultPolicy; private final RetryPolicy defaultPolicy;
private Map<String,RetryPolicy> methodNameToPolicyMap; private final Map<String,RetryPolicy> methodNameToPolicyMap;
private Object currentProxy; private Object currentProxy;
public RetryInvocationHandler(FailoverProxyProvider proxyProvider, public RetryInvocationHandler(FailoverProxyProvider proxyProvider,
RetryPolicy retryPolicy) { RetryPolicy retryPolicy) {
this.proxyProvider = proxyProvider; this(proxyProvider, retryPolicy, Collections.<String, RetryPolicy>emptyMap());
this.defaultPolicy = retryPolicy;
this.methodNameToPolicyMap = Collections.emptyMap();
this.currentProxy = proxyProvider.getProxy();
} }
public RetryInvocationHandler(FailoverProxyProvider proxyProvider, public RetryInvocationHandler(FailoverProxyProvider proxyProvider,
RetryPolicy defaultPolicy,
Map<String, RetryPolicy> methodNameToPolicyMap) { Map<String, RetryPolicy> methodNameToPolicyMap) {
this.proxyProvider = proxyProvider; this.proxyProvider = proxyProvider;
this.defaultPolicy = RetryPolicies.TRY_ONCE_THEN_FAIL; this.defaultPolicy = defaultPolicy;
this.methodNameToPolicyMap = methodNameToPolicyMap; this.methodNameToPolicyMap = methodNameToPolicyMap;
this.currentProxy = proxyProvider.getProxy(); this.currentProxy = proxyProvider.getProxy();
} }
@Override
public Object invoke(Object proxy, Method method, Object[] args) public Object invoke(Object proxy, Method method, Object[] args)
throws Throwable { throws Throwable {
RetryPolicy policy = methodNameToPolicyMap.get(method.getName()); RetryPolicy policy = methodNameToPolicyMap.get(method.getName());

View File

@ -22,10 +22,13 @@ import java.net.ConnectException;
import java.net.NoRouteToHostException; import java.net.NoRouteToHostException;
import java.net.SocketException; import java.net.SocketException;
import java.net.UnknownHostException; import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Random;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.Random;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
@ -33,8 +36,6 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.ipc.StandbyException; import org.apache.hadoop.ipc.StandbyException;
import com.google.common.annotations.VisibleForTesting;
/** /**
* <p> * <p>
* A collection of useful implementations of {@link RetryPolicy}. * A collection of useful implementations of {@link RetryPolicy}.
@ -44,7 +45,12 @@ public class RetryPolicies {
public static final Log LOG = LogFactory.getLog(RetryPolicies.class); public static final Log LOG = LogFactory.getLog(RetryPolicies.class);
private static final Random RAND = new Random(); private static ThreadLocal<Random> RANDOM = new ThreadLocal<Random>() {
@Override
protected Random initialValue() {
return new Random();
}
};
/** /**
* <p> * <p>
@ -157,17 +163,35 @@ public class RetryPolicies {
} }
} }
/**
* Retry up to maxRetries.
* The actual sleep time of the n-th retry is f(n, sleepTime),
* where f is a function provided by the subclass implementation.
*
* The object of the subclasses should be immutable;
* otherwise, the subclass must override hashCode(), equals(..) and toString().
*/
static abstract class RetryLimited implements RetryPolicy { static abstract class RetryLimited implements RetryPolicy {
int maxRetries; final int maxRetries;
long sleepTime; final long sleepTime;
TimeUnit timeUnit; final TimeUnit timeUnit;
private String myString;
RetryLimited(int maxRetries, long sleepTime, TimeUnit timeUnit) {
if (maxRetries < 0) {
throw new IllegalArgumentException("maxRetries = " + maxRetries+" < 0");
}
if (sleepTime < 0) {
throw new IllegalArgumentException("sleepTime = " + sleepTime + " < 0");
}
public RetryLimited(int maxRetries, long sleepTime, TimeUnit timeUnit) {
this.maxRetries = maxRetries; this.maxRetries = maxRetries;
this.sleepTime = sleepTime; this.sleepTime = sleepTime;
this.timeUnit = timeUnit; this.timeUnit = timeUnit;
} }
@Override
public RetryAction shouldRetry(Exception e, int retries, int failovers, public RetryAction shouldRetry(Exception e, int retries, int failovers,
boolean isMethodIdempotent) throws Exception { boolean isMethodIdempotent) throws Exception {
if (retries >= maxRetries) { if (retries >= maxRetries) {
@ -178,6 +202,30 @@ public class RetryPolicies {
} }
protected abstract long calculateSleepTime(int retries); protected abstract long calculateSleepTime(int retries);
@Override
public int hashCode() {
return toString().hashCode();
}
@Override
public boolean equals(final Object that) {
if (this == that) {
return true;
} else if (that == null || this.getClass() != that.getClass()) {
return false;
}
return this.toString().equals(that.toString());
}
@Override
public String toString() {
if (myString == null) {
myString = getClass().getSimpleName() + "(maxRetries=" + maxRetries
+ ", sleepTime=" + sleepTime + " " + timeUnit + ")";
}
return myString;
}
} }
static class RetryUpToMaximumCountWithFixedSleep extends RetryLimited { static class RetryUpToMaximumCountWithFixedSleep extends RetryLimited {
@ -208,6 +256,169 @@ public class RetryPolicies {
} }
} }
/**
* Given pairs of number of retries and sleep time (n0, t0), (n1, t1), ...,
* the first n0 retries sleep t0 milliseconds on average,
* the following n1 retries sleep t1 milliseconds on average, and so on.
*
* For all the sleep, the actual sleep time is randomly uniform distributed
* in the close interval [0.5t, 1.5t], where t is the sleep time specified.
*
* The objects of this class are immutable.
*/
public static class MultipleLinearRandomRetry implements RetryPolicy {
/** Pairs of numRetries and sleepSeconds */
public static class Pair {
final int numRetries;
final int sleepMillis;
public Pair(final int numRetries, final int sleepMillis) {
if (numRetries < 0) {
throw new IllegalArgumentException("numRetries = " + numRetries+" < 0");
}
if (sleepMillis < 0) {
throw new IllegalArgumentException("sleepMillis = " + sleepMillis + " < 0");
}
this.numRetries = numRetries;
this.sleepMillis = sleepMillis;
}
@Override
public String toString() {
return numRetries + "x" + sleepMillis + "ms";
}
}
private final List<Pair> pairs;
private String myString;
public MultipleLinearRandomRetry(List<Pair> pairs) {
if (pairs == null || pairs.isEmpty()) {
throw new IllegalArgumentException("pairs must be neither null nor empty.");
}
this.pairs = Collections.unmodifiableList(pairs);
}
@Override
public RetryAction shouldRetry(Exception e, int curRetry, int failovers,
boolean isMethodIdempotent) throws Exception {
final Pair p = searchPair(curRetry);
if (p == null) {
//no more retries.
return RetryAction.FAIL;
}
//calculate sleep time and return.
final double ratio = RANDOM.get().nextDouble() + 0.5;//0.5 <= ratio <=1.5
final long sleepTime = Math.round(p.sleepMillis * ratio);
return new RetryAction(RetryAction.RetryDecision.RETRY, sleepTime);
}
/**
* Given the current number of retry, search the corresponding pair.
* @return the corresponding pair,
* or null if the current number of retry > maximum number of retry.
*/
private Pair searchPair(int curRetry) {
int i = 0;
for(; i < pairs.size() && curRetry > pairs.get(i).numRetries; i++) {
curRetry -= pairs.get(i).numRetries;
}
return i == pairs.size()? null: pairs.get(i);
}
@Override
public int hashCode() {
return toString().hashCode();
}
@Override
public boolean equals(final Object that) {
if (this == that) {
return true;
} else if (that == null || this.getClass() != that.getClass()) {
return false;
}
return this.toString().equals(that.toString());
}
@Override
public String toString() {
if (myString == null) {
myString = getClass().getSimpleName() + pairs;
}
return myString;
}
/**
* Parse the given string as a MultipleLinearRandomRetry object.
* The format of the string is "t_1, n_1, t_2, n_2, ...",
* where t_i and n_i are the i-th pair of sleep time and number of retires.
* Note that the white spaces in the string are ignored.
*
* @return the parsed object, or null if the parsing fails.
*/
public static MultipleLinearRandomRetry parseCommaSeparatedString(String s) {
final String[] elements = s.split(",");
if (elements.length == 0) {
LOG.warn("Illegal value: there is no element in \"" + s + "\".");
return null;
}
if (elements.length % 2 != 0) {
LOG.warn("Illegal value: the number of elements in \"" + s + "\" is "
+ elements.length + " but an even number of elements is expected.");
return null;
}
final List<RetryPolicies.MultipleLinearRandomRetry.Pair> pairs
= new ArrayList<RetryPolicies.MultipleLinearRandomRetry.Pair>();
for(int i = 0; i < elements.length; ) {
//parse the i-th sleep-time
final int sleep = parsePositiveInt(elements, i++, s);
if (sleep == -1) {
return null; //parse fails
}
//parse the i-th number-of-retries
final int retries = parsePositiveInt(elements, i++, s);
if (retries == -1) {
return null; //parse fails
}
pairs.add(new RetryPolicies.MultipleLinearRandomRetry.Pair(retries, sleep));
}
return new RetryPolicies.MultipleLinearRandomRetry(pairs);
}
/**
* Parse the i-th element as an integer.
* @return -1 if the parsing fails or the parsed value <= 0;
* otherwise, return the parsed value.
*/
private static int parsePositiveInt(final String[] elements,
final int i, final String originalString) {
final String s = elements[i].trim();
final int n;
try {
n = Integer.parseInt(s);
} catch(NumberFormatException nfe) {
LOG.warn("Failed to parse \"" + s + "\", which is the index " + i
+ " element in \"" + originalString + "\"", nfe);
return -1;
}
if (n <= 0) {
LOG.warn("The value " + n + " <= 0: it is parsed from the string \""
+ s + "\" which is the index " + i + " element in \""
+ originalString + "\"");
return -1;
}
return n;
}
}
static class ExceptionDependentRetry implements RetryPolicy { static class ExceptionDependentRetry implements RetryPolicy {
RetryPolicy defaultPolicy; RetryPolicy defaultPolicy;
@ -265,6 +476,14 @@ public class RetryPolicies {
public ExponentialBackoffRetry( public ExponentialBackoffRetry(
int maxRetries, long sleepTime, TimeUnit timeUnit) { int maxRetries, long sleepTime, TimeUnit timeUnit) {
super(maxRetries, sleepTime, timeUnit); super(maxRetries, sleepTime, timeUnit);
if (maxRetries < 0) {
throw new IllegalArgumentException("maxRetries = " + maxRetries + " < 0");
} else if (maxRetries >= Long.SIZE - 1) {
//calculateSleepTime may overflow.
throw new IllegalArgumentException("maxRetries = " + maxRetries
+ " >= " + (Long.SIZE - 1));
}
} }
@Override @Override
@ -353,11 +572,10 @@ public class RetryPolicies {
* @param cap value at which to cap the base sleep time * @param cap value at which to cap the base sleep time
* @return an amount of time to sleep * @return an amount of time to sleep
*/ */
@VisibleForTesting private static long calculateExponentialTime(long time, int retries,
public static long calculateExponentialTime(long time, int retries,
long cap) { long cap) {
long baseTime = Math.min(time * ((long)1 << retries), cap); long baseTime = Math.min(time * (1L << retries), cap);
return (long) (baseTime * (RAND.nextFloat() + 0.5)); return (long) (baseTime * (RANDOM.get().nextDouble() + 0.5));
} }
private static long calculateExponentialTime(long time, int retries) { private static long calculateExponentialTime(long time, int retries) {

View File

@ -60,6 +60,12 @@ public interface RetryPolicy {
this.reason = reason; this.reason = reason;
} }
@Override
public String toString() {
return getClass().getSimpleName() + "(action=" + action
+ ", delayMillis=" + delayMillis + ", reason=" + reason + ")";
}
public enum RetryDecision { public enum RetryDecision {
FAIL, FAIL,
RETRY, RETRY,

View File

@ -75,9 +75,10 @@ public class RetryProxy {
*/ */
public static Object create(Class<?> iface, Object implementation, public static Object create(Class<?> iface, Object implementation,
Map<String,RetryPolicy> methodNameToPolicyMap) { Map<String,RetryPolicy> methodNameToPolicyMap) {
return RetryProxy.create(iface, return create(iface,
new DefaultFailoverProxyProvider(iface, implementation), new DefaultFailoverProxyProvider(iface, implementation),
methodNameToPolicyMap); methodNameToPolicyMap,
RetryPolicies.TRY_ONCE_THEN_FAIL);
} }
/** /**
@ -92,11 +93,13 @@ public class RetryProxy {
* @return the retry proxy * @return the retry proxy
*/ */
public static Object create(Class<?> iface, FailoverProxyProvider proxyProvider, public static Object create(Class<?> iface, FailoverProxyProvider proxyProvider,
Map<String,RetryPolicy> methodNameToPolicyMap) { Map<String,RetryPolicy> methodNameToPolicyMap,
RetryPolicy defaultPolicy) {
return Proxy.newProxyInstance( return Proxy.newProxyInstance(
proxyProvider.getInterface().getClassLoader(), proxyProvider.getInterface().getClassLoader(),
new Class<?>[] { iface }, new Class<?>[] { iface },
new RetryInvocationHandler(proxyProvider, methodNameToPolicyMap) new RetryInvocationHandler(proxyProvider, defaultPolicy,
methodNameToPolicyMap)
); );
} }
} }

View File

@ -18,47 +18,51 @@
package org.apache.hadoop.ipc; package org.apache.hadoop.ipc;
import java.net.InetAddress;
import java.net.Socket;
import java.net.InetSocketAddress;
import java.net.SocketTimeoutException;
import java.net.UnknownHostException;
import java.io.IOException;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.BufferedInputStream; import java.io.BufferedInputStream;
import java.io.BufferedOutputStream; import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.FilterInputStream; import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.InterruptedIOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.Socket;
import java.net.SocketTimeoutException;
import java.net.UnknownHostException;
import java.security.PrivilegedExceptionAction; import java.security.PrivilegedExceptionAction;
import java.util.Hashtable; import java.util.Hashtable;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map.Entry;
import java.util.Random; import java.util.Random;
import java.util.Set; import java.util.Set;
import java.util.Map.Entry; import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
import javax.net.SocketFactory; import javax.net.SocketFactory;
import org.apache.commons.logging.*; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.io.retry.RetryPolicies;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.io.retry.RetryPolicy.RetryAction;
import org.apache.hadoop.ipc.protobuf.IpcConnectionContextProtos.IpcConnectionContextProto; import org.apache.hadoop.ipc.protobuf.IpcConnectionContextProtos.IpcConnectionContextProto;
import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcPayloadHeaderProto; import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcPayloadHeaderProto;
import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcPayloadOperationProto; import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcPayloadOperationProto;
import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcResponseHeaderProto; import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcResponseHeaderProto;
import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcStatusProto; import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcStatusProto;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.KerberosInfo; import org.apache.hadoop.security.KerberosInfo;
import org.apache.hadoop.security.SaslRpcClient; import org.apache.hadoop.security.SaslRpcClient;
@ -67,8 +71,8 @@ import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.security.token.TokenSelector;
import org.apache.hadoop.security.token.TokenInfo; import org.apache.hadoop.security.token.TokenInfo;
import org.apache.hadoop.security.token.TokenSelector;
import org.apache.hadoop.util.ProtoUtil; import org.apache.hadoop.util.ProtoUtil;
import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.ReflectionUtils;
@ -80,8 +84,8 @@ import org.apache.hadoop.util.ReflectionUtils;
*/ */
public class Client { public class Client {
public static final Log LOG = public static final Log LOG = LogFactory.getLog(Client.class);
LogFactory.getLog(Client.class);
private Hashtable<ConnectionId, Connection> connections = private Hashtable<ConnectionId, Connection> connections =
new Hashtable<ConnectionId, Connection>(); new Hashtable<ConnectionId, Connection>();
@ -228,8 +232,7 @@ public class Client {
private int rpcTimeout; private int rpcTimeout;
private int maxIdleTime; //connections will be culled if it was idle for private int maxIdleTime; //connections will be culled if it was idle for
//maxIdleTime msecs //maxIdleTime msecs
private int maxRetries; //the max. no. of retries for socket connections private final RetryPolicy connectionRetryPolicy;
// the max. no. of retries for socket connections on time out exceptions
private int maxRetriesOnSocketTimeouts; private int maxRetriesOnSocketTimeouts;
private boolean tcpNoDelay; // if T then disable Nagle's Algorithm private boolean tcpNoDelay; // if T then disable Nagle's Algorithm
private boolean doPing; //do we need to send ping message private boolean doPing; //do we need to send ping message
@ -253,7 +256,7 @@ public class Client {
} }
this.rpcTimeout = remoteId.getRpcTimeout(); this.rpcTimeout = remoteId.getRpcTimeout();
this.maxIdleTime = remoteId.getMaxIdleTime(); this.maxIdleTime = remoteId.getMaxIdleTime();
this.maxRetries = remoteId.getMaxRetries(); this.connectionRetryPolicy = remoteId.connectionRetryPolicy;
this.maxRetriesOnSocketTimeouts = remoteId.getMaxRetriesOnSocketTimeouts(); this.maxRetriesOnSocketTimeouts = remoteId.getMaxRetriesOnSocketTimeouts();
this.tcpNoDelay = remoteId.getTcpNoDelay(); this.tcpNoDelay = remoteId.getTcpNoDelay();
this.doPing = remoteId.getDoPing(); this.doPing = remoteId.getDoPing();
@ -488,7 +491,7 @@ public class Client {
if (updateAddress()) { if (updateAddress()) {
timeoutFailures = ioFailures = 0; timeoutFailures = ioFailures = 0;
} }
handleConnectionFailure(ioFailures++, maxRetries, ie); handleConnectionFailure(ioFailures++, ie);
} }
} }
} }
@ -680,8 +683,36 @@ public class Client {
Thread.sleep(1000); Thread.sleep(1000);
} catch (InterruptedException ignored) {} } catch (InterruptedException ignored) {}
LOG.info("Retrying connect to server: " + server + LOG.info("Retrying connect to server: " + server + ". Already tried "
". Already tried " + curRetries + " time(s)."); + curRetries + " time(s); maxRetries=" + maxRetries);
}
private void handleConnectionFailure(int curRetries, IOException ioe
) throws IOException {
closeConnection();
final RetryAction action;
try {
action = connectionRetryPolicy.shouldRetry(ioe, curRetries, 0, true);
} catch(Exception e) {
throw e instanceof IOException? (IOException)e: new IOException(e);
}
if (action.action == RetryAction.RetryDecision.FAIL) {
if (action.reason != null) {
LOG.warn("Failed to connect to server: " + server + ": "
+ action.reason, ioe);
}
throw ioe;
}
try {
Thread.sleep(action.delayMillis);
} catch (InterruptedException e) {
throw (IOException)new InterruptedIOException("Interrupted: action="
+ action + ", retry policy=" + connectionRetryPolicy).initCause(e);
}
LOG.info("Retrying connect to server: " + server + ". Already tried "
+ curRetries + " time(s); retry policy is " + connectionRetryPolicy);
} }
/** /**
@ -849,6 +880,10 @@ public class Client {
try { try {
RpcResponseHeaderProto response = RpcResponseHeaderProto response =
RpcResponseHeaderProto.parseDelimitedFrom(in); RpcResponseHeaderProto.parseDelimitedFrom(in);
if (response == null) {
throw new IOException("Response is null.");
}
int callId = response.getCallId(); int callId = response.getCallId();
if (LOG.isDebugEnabled()) if (LOG.isDebugEnabled())
LOG.debug(getName() + " got value #" + callId); LOG.debug(getName() + " got value #" + callId);
@ -1287,7 +1322,7 @@ public class Client {
private final String serverPrincipal; private final String serverPrincipal;
private final int maxIdleTime; //connections will be culled if it was idle for private final int maxIdleTime; //connections will be culled if it was idle for
//maxIdleTime msecs //maxIdleTime msecs
private final int maxRetries; //the max. no. of retries for socket connections private final RetryPolicy connectionRetryPolicy;
// the max. no. of retries for socket connections on time out exceptions // the max. no. of retries for socket connections on time out exceptions
private final int maxRetriesOnSocketTimeouts; private final int maxRetriesOnSocketTimeouts;
private final boolean tcpNoDelay; // if T then disable Nagle's Algorithm private final boolean tcpNoDelay; // if T then disable Nagle's Algorithm
@ -1297,7 +1332,7 @@ public class Client {
ConnectionId(InetSocketAddress address, Class<?> protocol, ConnectionId(InetSocketAddress address, Class<?> protocol,
UserGroupInformation ticket, int rpcTimeout, UserGroupInformation ticket, int rpcTimeout,
String serverPrincipal, int maxIdleTime, String serverPrincipal, int maxIdleTime,
int maxRetries, int maxRetriesOnSocketTimeouts, RetryPolicy connectionRetryPolicy, int maxRetriesOnSocketTimeouts,
boolean tcpNoDelay, boolean doPing, int pingInterval) { boolean tcpNoDelay, boolean doPing, int pingInterval) {
this.protocol = protocol; this.protocol = protocol;
this.address = address; this.address = address;
@ -1305,7 +1340,7 @@ public class Client {
this.rpcTimeout = rpcTimeout; this.rpcTimeout = rpcTimeout;
this.serverPrincipal = serverPrincipal; this.serverPrincipal = serverPrincipal;
this.maxIdleTime = maxIdleTime; this.maxIdleTime = maxIdleTime;
this.maxRetries = maxRetries; this.connectionRetryPolicy = connectionRetryPolicy;
this.maxRetriesOnSocketTimeouts = maxRetriesOnSocketTimeouts; this.maxRetriesOnSocketTimeouts = maxRetriesOnSocketTimeouts;
this.tcpNoDelay = tcpNoDelay; this.tcpNoDelay = tcpNoDelay;
this.doPing = doPing; this.doPing = doPing;
@ -1336,10 +1371,6 @@ public class Client {
return maxIdleTime; return maxIdleTime;
} }
int getMaxRetries() {
return maxRetries;
}
/** max connection retries on socket time outs */ /** max connection retries on socket time outs */
public int getMaxRetriesOnSocketTimeouts() { public int getMaxRetriesOnSocketTimeouts() {
return maxRetriesOnSocketTimeouts; return maxRetriesOnSocketTimeouts;
@ -1357,6 +1388,12 @@ public class Client {
return pingInterval; return pingInterval;
} }
static ConnectionId getConnectionId(InetSocketAddress addr,
Class<?> protocol, UserGroupInformation ticket, int rpcTimeout,
Configuration conf) throws IOException {
return getConnectionId(addr, protocol, ticket, rpcTimeout, null, conf);
}
/** /**
* Returns a ConnectionId object. * Returns a ConnectionId object.
* @param addr Remote address for the connection. * @param addr Remote address for the connection.
@ -1367,9 +1404,18 @@ public class Client {
* @return A ConnectionId instance * @return A ConnectionId instance
* @throws IOException * @throws IOException
*/ */
public static ConnectionId getConnectionId(InetSocketAddress addr, static ConnectionId getConnectionId(InetSocketAddress addr,
Class<?> protocol, UserGroupInformation ticket, int rpcTimeout, Class<?> protocol, UserGroupInformation ticket, int rpcTimeout,
Configuration conf) throws IOException { RetryPolicy connectionRetryPolicy, Configuration conf) throws IOException {
if (connectionRetryPolicy == null) {
final int max = conf.getInt(
CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,
CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_DEFAULT);
connectionRetryPolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep(
max, 1, TimeUnit.SECONDS);
}
String remotePrincipal = getRemotePrincipal(conf, addr, protocol); String remotePrincipal = getRemotePrincipal(conf, addr, protocol);
boolean doPing = boolean doPing =
conf.getBoolean(CommonConfigurationKeys.IPC_CLIENT_PING_KEY, true); conf.getBoolean(CommonConfigurationKeys.IPC_CLIENT_PING_KEY, true);
@ -1377,8 +1423,7 @@ public class Client {
rpcTimeout, remotePrincipal, rpcTimeout, remotePrincipal,
conf.getInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY, conf.getInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_DEFAULT), CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_DEFAULT),
conf.getInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, connectionRetryPolicy,
CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_DEFAULT),
conf.getInt( conf.getInt(
CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT), CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT),
@ -1421,7 +1466,7 @@ public class Client {
return isEqual(this.address, that.address) return isEqual(this.address, that.address)
&& this.doPing == that.doPing && this.doPing == that.doPing
&& this.maxIdleTime == that.maxIdleTime && this.maxIdleTime == that.maxIdleTime
&& this.maxRetries == that.maxRetries && isEqual(this.connectionRetryPolicy, that.connectionRetryPolicy)
&& this.pingInterval == that.pingInterval && this.pingInterval == that.pingInterval
&& isEqual(this.protocol, that.protocol) && isEqual(this.protocol, that.protocol)
&& this.rpcTimeout == that.rpcTimeout && this.rpcTimeout == that.rpcTimeout
@ -1434,11 +1479,10 @@ public class Client {
@Override @Override
public int hashCode() { public int hashCode() {
int result = 1; int result = connectionRetryPolicy.hashCode();
result = PRIME * result + ((address == null) ? 0 : address.hashCode()); result = PRIME * result + ((address == null) ? 0 : address.hashCode());
result = PRIME * result + (doPing ? 1231 : 1237); result = PRIME * result + (doPing ? 1231 : 1237);
result = PRIME * result + maxIdleTime; result = PRIME * result + maxIdleTime;
result = PRIME * result + maxRetries;
result = PRIME * result + pingInterval; result = PRIME * result + pingInterval;
result = PRIME * result + ((protocol == null) ? 0 : protocol.hashCode()); result = PRIME * result + ((protocol == null) ? 0 : protocol.hashCode());
result = PRIME * result + rpcTimeout; result = PRIME * result + rpcTimeout;

View File

@ -36,9 +36,9 @@ import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DataOutputOutputStream; import org.apache.hadoop.io.DataOutputOutputStream;
import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.ipc.Client.ConnectionId; import org.apache.hadoop.ipc.Client.ConnectionId;
import org.apache.hadoop.ipc.RPC.RpcInvoker; import org.apache.hadoop.ipc.RPC.RpcInvoker;
import org.apache.hadoop.ipc.protobuf.HadoopRpcProtos.HadoopRpcRequestProto; import org.apache.hadoop.ipc.protobuf.HadoopRpcProtos.HadoopRpcRequestProto;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.SecretManager; import org.apache.hadoop.security.token.SecretManager;
@ -66,15 +66,24 @@ public class ProtobufRpcEngine implements RpcEngine {
private static final ClientCache CLIENTS = new ClientCache(); private static final ClientCache CLIENTS = new ClientCache();
public <T> ProtocolProxy<T> getProxy(Class<T> protocol, long clientVersion,
InetSocketAddress addr, UserGroupInformation ticket, Configuration conf,
SocketFactory factory, int rpcTimeout) throws IOException {
return getProxy(protocol, clientVersion, addr, ticket, conf, factory,
rpcTimeout, null);
}
@Override @Override
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public <T> ProtocolProxy<T> getProxy(Class<T> protocol, long clientVersion, public <T> ProtocolProxy<T> getProxy(Class<T> protocol, long clientVersion,
InetSocketAddress addr, UserGroupInformation ticket, Configuration conf, InetSocketAddress addr, UserGroupInformation ticket, Configuration conf,
SocketFactory factory, int rpcTimeout) throws IOException { SocketFactory factory, int rpcTimeout, RetryPolicy connectionRetryPolicy
) throws IOException {
return new ProtocolProxy<T>(protocol, (T) Proxy.newProxyInstance(protocol final Invoker invoker = new Invoker(protocol, addr, ticket, conf, factory,
.getClassLoader(), new Class[] { protocol }, new Invoker(protocol, rpcTimeout, connectionRetryPolicy);
addr, ticket, conf, factory, rpcTimeout)), false); return new ProtocolProxy<T>(protocol, (T) Proxy.newProxyInstance(
protocol.getClassLoader(), new Class[]{protocol}, invoker), false);
} }
@Override @Override
@ -97,11 +106,12 @@ public class ProtobufRpcEngine implements RpcEngine {
private final long clientProtocolVersion; private final long clientProtocolVersion;
private final String protocolName; private final String protocolName;
public Invoker(Class<?> protocol, InetSocketAddress addr, private Invoker(Class<?> protocol, InetSocketAddress addr,
UserGroupInformation ticket, Configuration conf, SocketFactory factory, UserGroupInformation ticket, Configuration conf, SocketFactory factory,
int rpcTimeout) throws IOException { int rpcTimeout, RetryPolicy connectionRetryPolicy) throws IOException {
this(protocol, Client.ConnectionId.getConnectionId(addr, protocol, this(protocol, Client.ConnectionId.getConnectionId(
ticket, rpcTimeout, conf), conf, factory); addr, protocol, ticket, rpcTimeout, connectionRetryPolicy, conf),
conf, factory);
} }
/** /**

View File

@ -41,6 +41,7 @@ import org.apache.commons.logging.*;
import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.io.*; import org.apache.hadoop.io.*;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.ipc.Client.ConnectionId; import org.apache.hadoop.ipc.Client.ConnectionId;
import org.apache.hadoop.ipc.protobuf.ProtocolInfoProtos.ProtocolInfoService; import org.apache.hadoop.ipc.protobuf.ProtocolInfoProtos.ProtocolInfoService;
import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetUtils;
@ -326,7 +327,7 @@ public class RPC {
long clientVersion, long clientVersion,
InetSocketAddress addr, Configuration conf, InetSocketAddress addr, Configuration conf,
long connTimeout) throws IOException { long connTimeout) throws IOException {
return waitForProtocolProxy(protocol, clientVersion, addr, conf, 0, connTimeout); return waitForProtocolProxy(protocol, clientVersion, addr, conf, 0, null, connTimeout);
} }
/** /**
@ -347,7 +348,7 @@ public class RPC {
int rpcTimeout, int rpcTimeout,
long timeout) throws IOException { long timeout) throws IOException {
return waitForProtocolProxy(protocol, clientVersion, addr, return waitForProtocolProxy(protocol, clientVersion, addr,
conf, rpcTimeout, timeout).getProxy(); conf, rpcTimeout, null, timeout).getProxy();
} }
/** /**
@ -367,6 +368,7 @@ public class RPC {
long clientVersion, long clientVersion,
InetSocketAddress addr, Configuration conf, InetSocketAddress addr, Configuration conf,
int rpcTimeout, int rpcTimeout,
RetryPolicy connectionRetryPolicy,
long timeout) throws IOException { long timeout) throws IOException {
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
IOException ioe; IOException ioe;
@ -374,7 +376,7 @@ public class RPC {
try { try {
return getProtocolProxy(protocol, clientVersion, addr, return getProtocolProxy(protocol, clientVersion, addr,
UserGroupInformation.getCurrentUser(), conf, NetUtils UserGroupInformation.getCurrentUser(), conf, NetUtils
.getDefaultSocketFactory(conf), rpcTimeout); .getDefaultSocketFactory(conf), rpcTimeout, connectionRetryPolicy);
} catch(ConnectException se) { // namenode has not been started } catch(ConnectException se) { // namenode has not been started
LOG.info("Server at " + addr + " not available yet, Zzzzz..."); LOG.info("Server at " + addr + " not available yet, Zzzzz...");
ioe = se; ioe = se;
@ -463,7 +465,7 @@ public class RPC {
Configuration conf, Configuration conf,
SocketFactory factory) throws IOException { SocketFactory factory) throws IOException {
return getProtocolProxy( return getProtocolProxy(
protocol, clientVersion, addr, ticket, conf, factory, 0); protocol, clientVersion, addr, ticket, conf, factory, 0, null);
} }
/** /**
@ -489,7 +491,7 @@ public class RPC {
SocketFactory factory, SocketFactory factory,
int rpcTimeout) throws IOException { int rpcTimeout) throws IOException {
return getProtocolProxy(protocol, clientVersion, addr, ticket, return getProtocolProxy(protocol, clientVersion, addr, ticket,
conf, factory, rpcTimeout).getProxy(); conf, factory, rpcTimeout, null).getProxy();
} }
/** /**
@ -512,12 +514,13 @@ public class RPC {
UserGroupInformation ticket, UserGroupInformation ticket,
Configuration conf, Configuration conf,
SocketFactory factory, SocketFactory factory,
int rpcTimeout) throws IOException { int rpcTimeout,
RetryPolicy connectionRetryPolicy) throws IOException {
if (UserGroupInformation.isSecurityEnabled()) { if (UserGroupInformation.isSecurityEnabled()) {
SaslRpcServer.init(conf); SaslRpcServer.init(conf);
} }
return getProtocolEngine(protocol,conf).getProxy(protocol, return getProtocolEngine(protocol,conf).getProxy(protocol, clientVersion,
clientVersion, addr, ticket, conf, factory, rpcTimeout); addr, ticket, conf, factory, rpcTimeout, connectionRetryPolicy);
} }
/** /**

View File

@ -98,7 +98,8 @@ public class RemoteException extends IOException {
attrs.getValue("message")); attrs.getValue("message"));
} }
@Override
public String toString() { public String toString() {
return className + ": " + getMessage(); return getClass().getName() + "(" + className + "): " + getMessage();
} }
} }

View File

@ -26,6 +26,7 @@ import javax.net.SocketFactory;
import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.ipc.Client.ConnectionId; import org.apache.hadoop.ipc.Client.ConnectionId;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.SecretManager; import org.apache.hadoop.security.token.SecretManager;
@ -40,7 +41,8 @@ public interface RpcEngine {
<T> ProtocolProxy<T> getProxy(Class<T> protocol, <T> ProtocolProxy<T> getProxy(Class<T> protocol,
long clientVersion, InetSocketAddress addr, long clientVersion, InetSocketAddress addr,
UserGroupInformation ticket, Configuration conf, UserGroupInformation ticket, Configuration conf,
SocketFactory factory, int rpcTimeout) throws IOException; SocketFactory factory, int rpcTimeout,
RetryPolicy connectionRetryPolicy) throws IOException;
/** Expert: Make multiple, parallel calls to a set of servers. */ /** Expert: Make multiple, parallel calls to a set of servers. */
Object[] call(Method method, Object[][] params, InetSocketAddress[] addrs, Object[] call(Method method, Object[][] params, InetSocketAddress[] addrs,

View File

@ -31,6 +31,7 @@ import javax.net.SocketFactory;
import org.apache.commons.logging.*; import org.apache.commons.logging.*;
import org.apache.hadoop.io.*; import org.apache.hadoop.io.*;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.ipc.Client.ConnectionId; import org.apache.hadoop.ipc.Client.ConnectionId;
import org.apache.hadoop.ipc.RPC.RpcInvoker; import org.apache.hadoop.ipc.RPC.RpcInvoker;
import org.apache.hadoop.ipc.VersionedProtocol; import org.apache.hadoop.ipc.VersionedProtocol;
@ -259,9 +260,14 @@ public class WritableRpcEngine implements RpcEngine {
public <T> ProtocolProxy<T> getProxy(Class<T> protocol, long clientVersion, public <T> ProtocolProxy<T> getProxy(Class<T> protocol, long clientVersion,
InetSocketAddress addr, UserGroupInformation ticket, InetSocketAddress addr, UserGroupInformation ticket,
Configuration conf, SocketFactory factory, Configuration conf, SocketFactory factory,
int rpcTimeout) int rpcTimeout, RetryPolicy connectionRetryPolicy)
throws IOException { throws IOException {
if (connectionRetryPolicy != null) {
throw new UnsupportedOperationException(
"Not supported: connectionRetryPolicy=" + connectionRetryPolicy);
}
T proxy = (T) Proxy.newProxyInstance(protocol.getClassLoader(), T proxy = (T) Proxy.newProxyInstance(protocol.getClassLoader(),
new Class[] { protocol }, new Invoker(protocol, addr, ticket, conf, new Class[] { protocol }, new Invoker(protocol, addr, ticket, conf,
factory, rpcTimeout)); factory, rpcTimeout));

View File

@ -18,50 +18,55 @@
package org.apache.hadoop.ipc; package org.apache.hadoop.ipc;
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
import static org.apache.hadoop.test.MetricsAsserts.assertCounterGt;
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotSame;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.net.ConnectException;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.lang.management.ManagementFactory; import java.lang.management.ManagementFactory;
import java.lang.management.ThreadInfo; import java.lang.management.ThreadInfo;
import java.lang.management.ThreadMXBean; import java.lang.management.ThreadMXBean;
import java.lang.reflect.InvocationHandler; import java.lang.reflect.InvocationHandler;
import java.lang.reflect.Method; import java.lang.reflect.Method;
import java.lang.reflect.Proxy; import java.lang.reflect.Proxy;
import java.net.ConnectException;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.util.Arrays; import java.util.Arrays;
import javax.net.SocketFactory; import javax.net.SocketFactory;
import org.apache.commons.logging.*; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.io.UTF8; import org.apache.hadoop.io.UTF8;
import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.io.retry.RetryPolicies;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.io.retry.RetryProxy; import org.apache.hadoop.io.retry.RetryProxy;
import org.apache.hadoop.ipc.Client.ConnectionId; import org.apache.hadoop.ipc.Client.ConnectionId;
import org.apache.hadoop.ipc.TestSaslRPC.TestSaslImpl;
import org.apache.hadoop.ipc.TestSaslRPC.TestSaslProtocol;
import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.authorize.AuthorizationException; import org.apache.hadoop.security.authorize.AuthorizationException;
import org.apache.hadoop.security.authorize.PolicyProvider; import org.apache.hadoop.security.authorize.PolicyProvider;
import org.apache.hadoop.security.authorize.Service; import org.apache.hadoop.security.authorize.Service;
import org.apache.hadoop.security.token.SecretManager; import org.apache.hadoop.security.token.SecretManager;
import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.test.MockitoUtil; import org.apache.hadoop.test.MockitoUtil;
import org.junit.Test; import org.junit.Test;
import static org.junit.Assert.*;
import com.google.protobuf.DescriptorProtos; import com.google.protobuf.DescriptorProtos;
import com.google.protobuf.DescriptorProtos.EnumDescriptorProto; import com.google.protobuf.DescriptorProtos.EnumDescriptorProto;
import static org.apache.hadoop.test.MetricsAsserts.*;
/** Unit tests for RPC. */ /** Unit tests for RPC. */
@SuppressWarnings("deprecation") @SuppressWarnings("deprecation")
public class TestRPC { public class TestRPC {
@ -250,7 +255,8 @@ public class TestRPC {
@Override @Override
public <T> ProtocolProxy<T> getProxy(Class<T> protocol, long clientVersion, public <T> ProtocolProxy<T> getProxy(Class<T> protocol, long clientVersion,
InetSocketAddress addr, UserGroupInformation ticket, Configuration conf, InetSocketAddress addr, UserGroupInformation ticket, Configuration conf,
SocketFactory factory, int rpcTimeout) throws IOException { SocketFactory factory, int rpcTimeout, RetryPolicy connectionRetryPolicy
) throws IOException {
T proxy = (T) Proxy.newProxyInstance(protocol.getClassLoader(), T proxy = (T) Proxy.newProxyInstance(protocol.getClassLoader(),
new Class[] { protocol }, new StoppedInvocationHandler()); new Class[] { protocol }, new StoppedInvocationHandler());
return new ProtocolProxy<T>(protocol, proxy, false); return new ProtocolProxy<T>(protocol, proxy, false);

View File

@ -222,6 +222,10 @@ Branch-2 ( Unreleased changes )
HDFS-3520. Add transfer rate logging to TransferFsImage. (eli) HDFS-3520. Add transfer rate logging to TransferFsImage. (eli)
HDFS-3504. Support configurable retry policy in DFSClient for RPC
connections and RPC calls, and add MultipleLinearRandomRetry, a new retry
policy. (szetszwo)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-2982. Startup performance suffers when there are many edit log HDFS-2982. Startup performance suffers when there are many edit log

View File

@ -38,6 +38,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final int DFS_STREAM_BUFFER_SIZE_DEFAULT = 4096; public static final int DFS_STREAM_BUFFER_SIZE_DEFAULT = 4096;
public static final String DFS_BYTES_PER_CHECKSUM_KEY = "dfs.bytes-per-checksum"; public static final String DFS_BYTES_PER_CHECKSUM_KEY = "dfs.bytes-per-checksum";
public static final int DFS_BYTES_PER_CHECKSUM_DEFAULT = 512; public static final int DFS_BYTES_PER_CHECKSUM_DEFAULT = 512;
public static final String DFS_CLIENT_RETRY_POLICY_ENABLED_KEY = "dfs.client.retry.policy.enabled";
public static final boolean DFS_CLIENT_RETRY_POLICY_ENABLED_DEFAULT = false;
public static final String DFS_CLIENT_RETRY_POLICY_SPEC_KEY = "dfs.client.retry.policy.spec";
public static final String DFS_CLIENT_RETRY_POLICY_SPEC_DEFAULT = "10000,6,60000,10"; //t1,n1,t2,n2,...
public static final String DFS_CHECKSUM_TYPE_KEY = "dfs.checksum.type"; public static final String DFS_CHECKSUM_TYPE_KEY = "dfs.checksum.type";
public static final String DFS_CHECKSUM_TYPE_DEFAULT = "CRC32C"; public static final String DFS_CHECKSUM_TYPE_DEFAULT = "CRC32C";
public static final String DFS_CLIENT_WRITE_PACKET_SIZE_KEY = "dfs.client-write-packet-size"; public static final String DFS_CLIENT_WRITE_PACKET_SIZE_KEY = "dfs.client-write-packet-size";

View File

@ -47,10 +47,12 @@ import org.apache.hadoop.hdfs.protocolPB.RefreshAuthorizationPolicyProtocolPB;
import org.apache.hadoop.hdfs.protocolPB.RefreshUserMappingsProtocolClientSideTranslatorPB; import org.apache.hadoop.hdfs.protocolPB.RefreshUserMappingsProtocolClientSideTranslatorPB;
import org.apache.hadoop.hdfs.protocolPB.RefreshUserMappingsProtocolPB; import org.apache.hadoop.hdfs.protocolPB.RefreshUserMappingsProtocolPB;
import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
import org.apache.hadoop.hdfs.server.protocol.JournalProtocol; import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.retry.DefaultFailoverProxyProvider;
import org.apache.hadoop.io.retry.FailoverProxyProvider; import org.apache.hadoop.io.retry.FailoverProxyProvider;
import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.io.retry.RetryPolicies;
import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.io.retry.RetryPolicy;
@ -66,6 +68,7 @@ import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol;
import org.apache.hadoop.tools.GetUserMappingsProtocol; import org.apache.hadoop.tools.GetUserMappingsProtocol;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import com.google.protobuf.ServiceException;
/** /**
* Create proxy objects to communicate with a remote NN. All remote access to an * Create proxy objects to communicate with a remote NN. All remote access to an
@ -240,12 +243,106 @@ public class NameNodeProxies {
return new NamenodeProtocolTranslatorPB(proxy); return new NamenodeProtocolTranslatorPB(proxy);
} }
/**
* Return the default retry policy used in RPC.
*
* If dfs.client.retry.policy.enabled == false, use TRY_ONCE_THEN_FAIL.
*
* Otherwise, first unwrap ServiceException if possible, and then
* (1) use multipleLinearRandomRetry for
* - SafeModeException, or
* - IOException other than RemoteException, or
* - ServiceException; and
* (2) use TRY_ONCE_THEN_FAIL for
* - non-SafeMode RemoteException, or
* - non-IOException.
*
* Note that dfs.client.retry.max < 0 is not allowed.
*/
private static RetryPolicy getDefaultRpcRetryPolicy(Configuration conf) {
final RetryPolicy multipleLinearRandomRetry = getMultipleLinearRandomRetry(conf);
if (LOG.isDebugEnabled()) {
LOG.debug("multipleLinearRandomRetry = " + multipleLinearRandomRetry);
}
if (multipleLinearRandomRetry == null) {
//no retry
return RetryPolicies.TRY_ONCE_THEN_FAIL;
} else {
return new RetryPolicy() {
@Override
public RetryAction shouldRetry(Exception e, int retries, int failovers,
boolean isMethodIdempotent) throws Exception {
if (e instanceof ServiceException) {
//unwrap ServiceException
final Throwable cause = e.getCause();
if (cause != null && cause instanceof Exception) {
e = (Exception)cause;
}
}
//see (1) and (2) in the javadoc of this method.
final RetryPolicy p;
if (e instanceof RemoteException) {
final RemoteException re = (RemoteException)e;
p = SafeModeException.class.getName().equals(re.getClassName())?
multipleLinearRandomRetry: RetryPolicies.TRY_ONCE_THEN_FAIL;
} else if (e instanceof IOException || e instanceof ServiceException) {
p = multipleLinearRandomRetry;
} else { //non-IOException
p = RetryPolicies.TRY_ONCE_THEN_FAIL;
}
if (LOG.isDebugEnabled()) {
LOG.debug("RETRY " + retries + ") policy="
+ p.getClass().getSimpleName() + ", exception=" + e);
}
LOG.info("RETRY " + retries + ") policy="
+ p.getClass().getSimpleName() + ", exception=" + e);
return p.shouldRetry(e, retries, failovers, isMethodIdempotent);
}
};
}
}
/**
* Return the MultipleLinearRandomRetry policy specified in the conf,
* or null if the feature is disabled.
* If the policy is specified in the conf but the policy cannot be parsed,
* the default policy is returned.
*
* Conf property: N pairs of sleep-time and number-of-retries
* dfs.client.retry.policy = "s1,n1,s2,n2,..."
*/
private static RetryPolicy getMultipleLinearRandomRetry(Configuration conf) {
final boolean enabled = conf.getBoolean(
DFSConfigKeys.DFS_CLIENT_RETRY_POLICY_ENABLED_KEY,
DFSConfigKeys.DFS_CLIENT_RETRY_POLICY_ENABLED_DEFAULT);
if (!enabled) {
return null;
}
final String policy = conf.get(
DFSConfigKeys.DFS_CLIENT_RETRY_POLICY_SPEC_KEY,
DFSConfigKeys.DFS_CLIENT_RETRY_POLICY_SPEC_DEFAULT);
final RetryPolicy r = RetryPolicies.MultipleLinearRandomRetry.parseCommaSeparatedString(policy);
return r != null? r: RetryPolicies.MultipleLinearRandomRetry.parseCommaSeparatedString(
DFSConfigKeys.DFS_CLIENT_RETRY_POLICY_SPEC_DEFAULT);
}
private static ClientProtocol createNNProxyWithClientProtocol( private static ClientProtocol createNNProxyWithClientProtocol(
InetSocketAddress address, Configuration conf, UserGroupInformation ugi, InetSocketAddress address, Configuration conf, UserGroupInformation ugi,
boolean withRetries) throws IOException { boolean withRetries) throws IOException {
ClientNamenodeProtocolPB proxy = (ClientNamenodeProtocolPB) NameNodeProxies RPC.setProtocolEngine(conf, ClientNamenodeProtocolPB.class, ProtobufRpcEngine.class);
.createNameNodeProxy(address, conf, ugi, ClientNamenodeProtocolPB.class, 0);
final RetryPolicy defaultPolicy = getDefaultRpcRetryPolicy(conf);
final long version = RPC.getProtocolVersion(ClientNamenodeProtocolPB.class);
ClientNamenodeProtocolPB proxy = RPC.getProtocolProxy(
ClientNamenodeProtocolPB.class, version, address, ugi, conf,
NetUtils.getDefaultSocketFactory(conf), 0, defaultPolicy).getProxy();
if (withRetries) { // create the proxy with retries if (withRetries) { // create the proxy with retries
RetryPolicy createPolicy = RetryPolicies RetryPolicy createPolicy = RetryPolicies
.retryUpToMaximumCountWithFixedSleep(5, .retryUpToMaximumCountWithFixedSleep(5,
HdfsConstants.LEASE_SOFTLIMIT_PERIOD, TimeUnit.MILLISECONDS); HdfsConstants.LEASE_SOFTLIMIT_PERIOD, TimeUnit.MILLISECONDS);
@ -258,17 +355,21 @@ public class NameNodeProxies {
Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap
= new HashMap<Class<? extends Exception>, RetryPolicy>(); = new HashMap<Class<? extends Exception>, RetryPolicy>();
exceptionToPolicyMap.put(RemoteException.class, RetryPolicies exceptionToPolicyMap.put(RemoteException.class, RetryPolicies
.retryByRemoteException(RetryPolicies.TRY_ONCE_THEN_FAIL, .retryByRemoteException(defaultPolicy,
remoteExceptionToPolicyMap)); remoteExceptionToPolicyMap));
RetryPolicy methodPolicy = RetryPolicies.retryByException( RetryPolicy methodPolicy = RetryPolicies.retryByException(
RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap); defaultPolicy, exceptionToPolicyMap);
Map<String, RetryPolicy> methodNameToPolicyMap Map<String, RetryPolicy> methodNameToPolicyMap
= new HashMap<String, RetryPolicy>(); = new HashMap<String, RetryPolicy>();
methodNameToPolicyMap.put("create", methodPolicy); methodNameToPolicyMap.put("create", methodPolicy);
proxy = (ClientNamenodeProtocolPB) RetryProxy proxy = (ClientNamenodeProtocolPB) RetryProxy.create(
.create(ClientNamenodeProtocolPB.class, proxy, methodNameToPolicyMap); ClientNamenodeProtocolPB.class,
new DefaultFailoverProxyProvider<ClientNamenodeProtocolPB>(
ClientNamenodeProtocolPB.class, proxy),
methodNameToPolicyMap,
defaultPolicy);
} }
return new ClientNamenodeProtocolTranslatorPB(proxy); return new ClientNamenodeProtocolTranslatorPB(proxy);
} }

View File

@ -25,8 +25,6 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HTTP_ADDRESS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HTTP_ADDRESS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_IPC_ADDRESS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_IPC_ADDRESS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMESERVICES;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMESERVICE_ID;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY;
@ -39,6 +37,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMESERVICES;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMESERVICE_ID;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI; import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
@ -66,12 +66,9 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.ha.HAServiceProtocol;
import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
import org.apache.hadoop.ha.HAServiceProtocolHelper;
import org.apache.hadoop.ha.ServiceFailedException;
import org.apache.hadoop.ha.HAServiceProtocol.RequestSource; import org.apache.hadoop.ha.HAServiceProtocol.RequestSource;
import org.apache.hadoop.ha.protocolPB.HAServiceProtocolClientSideTranslatorPB; import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
import org.apache.hadoop.ha.ServiceFailedException;
import org.apache.hadoop.hdfs.MiniDFSNNTopology.NNConf; import org.apache.hadoop.hdfs.MiniDFSNNTopology.NNConf;
import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
@ -1401,7 +1398,6 @@ public class MiniDFSCluster {
waitClusterUp(); waitClusterUp();
LOG.info("Restarted the namenode"); LOG.info("Restarted the namenode");
waitActive(); waitActive();
LOG.info("Cluster is active");
} }
} }
@ -1777,6 +1773,7 @@ public class MiniDFSCluster {
} }
} }
} }
LOG.info("Cluster is active");
} }
private synchronized boolean shouldWait(DatanodeInfo[] dnInfo, private synchronized boolean shouldWait(DatanodeInfo[] dnInfo,

View File

@ -25,46 +25,53 @@ import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy; import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
import java.net.SocketTimeoutException; import java.io.FileNotFoundException;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.LongWritable;
import java.io.IOException; import java.io.IOException;
import java.net.InetSocketAddress;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.net.InetSocketAddress;
import java.net.SocketTimeoutException;
import java.security.MessageDigest; import java.security.MessageDigest;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit;
import junit.framework.TestCase; import junit.framework.TestCase;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileChecksum; import org.apache.hadoop.fs.FileChecksum;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.fs.UnresolvedLinkException;
import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol; import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NotReplicatedYetException; import org.apache.hadoop.hdfs.server.namenode.NotReplicatedYetException;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.retry.RetryPolicies.MultipleLinearRandomRetry;
import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.ipc.Server; import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils;
import org.apache.log4j.Level;
import org.mockito.Mockito; import org.mockito.Mockito;
import org.mockito.internal.stubbing.answers.ThrowsException; import org.mockito.internal.stubbing.answers.ThrowsException;
import org.mockito.invocation.InvocationOnMock; import org.mockito.invocation.InvocationOnMock;
@ -341,7 +348,7 @@ public class TestDFSClientRetries extends TestCase {
// We shouldn't have gained an extra block by the RPC. // We shouldn't have gained an extra block by the RPC.
assertEquals(blockCount, blockCount2); assertEquals(blockCount, blockCount2);
return (LocatedBlock) ret2; return ret2;
} }
}).when(spyNN).addBlock(Mockito.anyString(), Mockito.anyString(), }).when(spyNN).addBlock(Mockito.anyString(), Mockito.anyString(),
Mockito.<ExtendedBlock>any(), Mockito.<DatanodeInfo[]>any()); Mockito.<ExtendedBlock>any(), Mockito.<DatanodeInfo[]>any());
@ -798,5 +805,149 @@ public class TestDFSClientRetries extends TestCase {
cluster.shutdown(); cluster.shutdown();
} }
} }
/** Test client retry with namenode restarting. */
public void testNamenodeRestart() throws Exception {
((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL);
final List<Exception> exceptions = new ArrayList<Exception>();
final Path dir = new Path("/testNamenodeRestart");
final Configuration conf = new Configuration();
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_RETRY_POLICY_ENABLED_KEY, true);
final short numDatanodes = 3;
final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(numDatanodes)
.build();
try {
cluster.waitActive();
//create a file
final DistributedFileSystem dfs = cluster.getFileSystem();
final long length = 1L << 20;
final Path file1 = new Path(dir, "foo");
DFSTestUtil.createFile(dfs, file1, length, numDatanodes, 20120406L);
//get file status
final FileStatus s1 = dfs.getFileStatus(file1);
assertEquals(length, s1.getLen());
//shutdown namenode
cluster.shutdownNameNode(0);
//namenode is down, create another file in a thread
final Path file3 = new Path(dir, "file");
final Thread thread = new Thread(new Runnable() {
@Override
public void run() {
try {
//it should retry till namenode is up.
final FileSystem fs = AppendTestUtil.createHdfsWithDifferentUsername(conf);
DFSTestUtil.createFile(fs, file3, length, numDatanodes, 20120406L);
} catch (Exception e) {
exceptions.add(e);
}
}
});
thread.start();
//restart namenode in a new thread
new Thread(new Runnable() {
@Override
public void run() {
try {
//sleep, restart, and then wait active
TimeUnit.SECONDS.sleep(30);
cluster.restartNameNode(0, false);
cluster.waitActive();
} catch (Exception e) {
exceptions.add(e);
}
}
}).start();
//namenode is down, it should retry until namenode is up again.
final FileStatus s2 = dfs.getFileStatus(file1);
assertEquals(s1, s2);
//check file1 and file3
thread.join();
assertEquals(dfs.getFileChecksum(file1), dfs.getFileChecksum(file3));
//enter safe mode
dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
//leave safe mode in a new thread
new Thread(new Runnable() {
@Override
public void run() {
try {
//sleep and then leave safe mode
TimeUnit.SECONDS.sleep(30);
dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
} catch (Exception e) {
exceptions.add(e);
}
}
}).start();
//namenode is in safe mode, create should retry until it leaves safe mode.
final Path file2 = new Path(dir, "bar");
DFSTestUtil.createFile(dfs, file2, length, numDatanodes, 20120406L);
assertEquals(dfs.getFileChecksum(file1), dfs.getFileChecksum(file2));
//make sure it won't retry on exceptions like FileNotFoundException
final Path nonExisting = new Path(dir, "nonExisting");
LOG.info("setPermission: " + nonExisting);
try {
dfs.setPermission(nonExisting, new FsPermission((short)0));
fail();
} catch(FileNotFoundException fnfe) {
LOG.info("GOOD!", fnfe);
} }
if (!exceptions.isEmpty()) {
LOG.error("There are " + exceptions.size() + " exception(s):");
for(int i = 0; i < exceptions.size(); i++) {
LOG.error("Exception " + i, exceptions.get(i));
}
fail();
}
} finally {
cluster.shutdown();
}
}
public void testMultipleLinearRandomRetry() {
parseMultipleLinearRandomRetry(null, "");
parseMultipleLinearRandomRetry(null, "11");
parseMultipleLinearRandomRetry(null, "11,22,33");
parseMultipleLinearRandomRetry(null, "11,22,33,44,55");
parseMultipleLinearRandomRetry(null, "AA");
parseMultipleLinearRandomRetry(null, "11,AA");
parseMultipleLinearRandomRetry(null, "11,22,33,FF");
parseMultipleLinearRandomRetry(null, "11,-22");
parseMultipleLinearRandomRetry(null, "-11,22");
parseMultipleLinearRandomRetry("[22x11ms]",
"11,22");
parseMultipleLinearRandomRetry("[22x11ms, 44x33ms]",
"11,22,33,44");
parseMultipleLinearRandomRetry("[22x11ms, 44x33ms, 66x55ms]",
"11,22,33,44,55,66");
parseMultipleLinearRandomRetry("[22x11ms, 44x33ms, 66x55ms]",
" 11, 22, 33, 44, 55, 66 ");
}
static void parseMultipleLinearRandomRetry(String expected, String s) {
final MultipleLinearRandomRetry r = MultipleLinearRandomRetry.parseCommaSeparatedString(s);
LOG.info("input=" + s + ", parsed=" + r + ", expected=" + expected);
if (r == null) {
assertEquals(expected, null);
} else {
assertEquals("MultipleLinearRandomRetry" + expected, r.toString());
}
}
}