HBASE-16239 Better logging for RPC related exceptions

Signed-off-by: Chia-Ping Tsai <chia7712@gmail.com>
This commit is contained in:
Enis Soztutar 2017-12-02 08:08:20 +08:00 committed by Chia-Ping Tsai
parent 6c6910b0b4
commit 4f642f659f
2 changed files with 44 additions and 21 deletions

View File

@ -28,17 +28,18 @@ import java.net.SocketTimeoutException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.CallQueueTooBigException;
import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.exceptions.PreemptiveFastFailException;
import org.apache.hadoop.hbase.shaded.com.google.protobuf.ServiceException;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.ExceptionUtil;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.util.StringUtils;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.shaded.com.google.protobuf.ServiceException;
/**
* Runs an rpc'ing {@link RetryingCallable}. Sets into rpc client
@ -119,10 +120,20 @@ public class RpcRetryingCallerImpl<T> implements RpcRetryingCaller<T> {
t = translateException(t);
if (tries > startLogErrorsCnt) {
LOG.info("Call exception, tries=" + tries + ", maxAttempts=" + maxAttempts + ", started="
+ (EnvironmentEdgeManager.currentTime() - tracker.getStartTime()) + " ms ago, "
+ "cancelled=" + cancelled.get() + ", msg="
+ t.getMessage() + " " + callable.getExceptionMessageAdditionalDetail());
if (LOG.isInfoEnabled()) {
StringBuilder builder = new StringBuilder("Call exception, tries=").append(tries)
.append(", retries=").append(tries).append(", started=")
.append((EnvironmentEdgeManager.currentTime() - tracker.getStartTime()))
.append(" ms ago, ").append("cancelled=").append(cancelled.get())
.append(", msg=").append(t.getMessage())
.append(", details=").append(callable.getExceptionMessageAdditionalDetail());
if (LOG.isDebugEnabled()) {
builder.append(", exception=").append(StringUtils.stringifyException(t));
LOG.debug(builder.toString());
} else {
LOG.info(builder.toString());
}
}
}
callable.throwable(t, maxAttempts != 1);

View File

@ -43,9 +43,7 @@ import java.util.Queue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ThreadLocalRandom;
import javax.security.sasl.SaslException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@ -57,6 +55,17 @@ import org.apache.hadoop.hbase.ipc.HBaseRpcController.CancellationCallback;
import org.apache.hadoop.hbase.security.HBaseSaslRpcClient;
import org.apache.hadoop.hbase.security.SaslUtil;
import org.apache.hadoop.hbase.security.SaslUtil.QualityOfProtection;
import org.apache.hadoop.hbase.trace.TraceUtil;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.ExceptionUtil;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import org.apache.htrace.core.TraceScope;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.shaded.com.google.protobuf.Message;
import org.apache.hadoop.hbase.shaded.com.google.protobuf.Message.Builder;
import org.apache.hadoop.hbase.shaded.com.google.protobuf.RpcCallback;
@ -67,15 +76,6 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.RPCProtos.ConnectionHea
import org.apache.hadoop.hbase.shaded.protobuf.generated.RPCProtos.ExceptionResponse;
import org.apache.hadoop.hbase.shaded.protobuf.generated.RPCProtos.RequestHeader;
import org.apache.hadoop.hbase.shaded.protobuf.generated.RPCProtos.ResponseHeader;
import org.apache.hadoop.hbase.trace.TraceUtil;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.ExceptionUtil;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.htrace.core.TraceScope;
import org.apache.yetus.audience.InterfaceAudience;
/**
* Thread that reads responses and notifies callers. Each connection owns a socket connected to a
@ -263,8 +263,16 @@ class BlockingRpcConnection extends RpcConnection implements Runnable {
/*
* The max number of retries is 45, which amounts to 20s*45 = 15 minutes retries.
*/
if (LOG.isDebugEnabled()) {
LOG.debug("Received exception in connection setup.\n" +
StringUtils.stringifyException(toe));
}
handleConnectionFailure(timeoutFailures++, this.rpcClient.maxRetries, toe);
} catch (IOException ie) {
if (LOG.isDebugEnabled()) {
LOG.debug("Received exception in connection setup.\n" +
StringUtils.stringifyException(ie));
}
handleConnectionFailure(ioFailures++, this.rpcClient.maxRetries, ie);
}
}
@ -296,8 +304,11 @@ class BlockingRpcConnection extends RpcConnection implements Runnable {
ExceptionUtil.rethrowIfInterrupt(ie);
}
LOG.info("Retrying connect to server: " + remoteId.getAddress() + " after sleeping "
+ this.rpcClient.failureSleep + "ms. Already tried " + curRetries + " time(s).");
if (LOG.isInfoEnabled()) {
LOG.info("Retrying connect to server: " + remoteId.getAddress() +
" after sleeping " + this.rpcClient.failureSleep + "ms. Already tried " + curRetries +
" time(s).");
}
}
/*
@ -381,7 +392,8 @@ class BlockingRpcConnection extends RpcConnection implements Runnable {
if (shouldAuthenticateOverKrb()) {
if (currRetries < maxRetries) {
if (LOG.isDebugEnabled()) {
LOG.debug("Exception encountered while connecting to " + "the server : " + ex);
LOG.debug("Exception encountered while connecting to " +
"the server : " + StringUtils.stringifyException(ex));
}
// try re-login
relogin();