HBASE-13477 Create metrics on failed requests
Summary: Add metrics on how many requests are exceptions and what type. Test Plan: behold unit tests. Differential Revision: https://reviews.facebook.net/D37167
This commit is contained in:
parent
3ccae37866
commit
2c5dc53a32
|
@ -58,6 +58,16 @@ public interface MetricsHBaseServerSource extends BaseSource {
|
|||
String NUM_ACTIVE_HANDLER_NAME = "numActiveHandler";
|
||||
String NUM_ACTIVE_HANDLER_DESC = "Number of active rpc handlers.";
|
||||
|
||||
String EXCEPTIONS_NAME="exceptions";
|
||||
String EXCEPTIONS_DESC="Exceptions caused by requests";
|
||||
String EXCEPTIONS_TYPE_DESC="Number of requests that resulted in the specified type of Exception";
|
||||
String EXCEPTIONS_OOO_NAME="exceptions.OutOfOrderScannerNextException";
|
||||
String EXCEPTIONS_BUSY_NAME="exceptions.RegionTooBusyException";
|
||||
String EXCEPTIONS_UNKNOWN_NAME="exceptions.UnknownScannerException";
|
||||
String EXCEPTIONS_SANITY_NAME="exceptions.FailedSanityCheckException";
|
||||
String EXCEPTIONS_MOVED_NAME="exceptions.RegionMovedException";
|
||||
String EXCEPTIONS_NSRE_NAME="exceptions.NotServingRegionException";
|
||||
|
||||
void authorizationSuccess();
|
||||
|
||||
void authorizationFailure();
|
||||
|
@ -66,6 +76,18 @@ public interface MetricsHBaseServerSource extends BaseSource {
|
|||
|
||||
void authenticationFailure();
|
||||
|
||||
void exception();
|
||||
|
||||
/**
|
||||
* Different types of exceptions
|
||||
*/
|
||||
void outOfOrderException();
|
||||
void failedSanityException();
|
||||
void movedRegionException();
|
||||
void notServingRegionException();
|
||||
void unknownScannerException();
|
||||
void tooBusyException();
|
||||
|
||||
void sentBytes(long count);
|
||||
|
||||
void receivedBytes(int count);
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|||
|
||||
@InterfaceAudience.Private
|
||||
public class MetricsHBaseServerSourceFactoryImpl extends MetricsHBaseServerSourceFactory {
|
||||
private static enum SourceStorage {
|
||||
private enum SourceStorage {
|
||||
INSTANCE;
|
||||
HashMap<String, MetricsHBaseServerSource>
|
||||
sources =
|
||||
|
@ -39,7 +39,7 @@ public class MetricsHBaseServerSourceFactoryImpl extends MetricsHBaseServerSourc
|
|||
}
|
||||
|
||||
private static synchronized MetricsHBaseServerSource getSource(String serverName,
|
||||
MetricsHBaseServerWrapper wrapper) {
|
||||
MetricsHBaseServerWrapper wrap) {
|
||||
String context = createContextName(serverName);
|
||||
MetricsHBaseServerSource source = SourceStorage.INSTANCE.sources.get(context);
|
||||
|
||||
|
@ -49,7 +49,7 @@ public class MetricsHBaseServerSourceFactoryImpl extends MetricsHBaseServerSourc
|
|||
context,
|
||||
METRICS_DESCRIPTION,
|
||||
context.toLowerCase(),
|
||||
context + METRICS_JMX_CONTEXT_SUFFIX, wrapper);
|
||||
context + METRICS_JMX_CONTEXT_SUFFIX, wrap);
|
||||
|
||||
//Store back in storage
|
||||
SourceStorage.INSTANCE.sources.put(context, source);
|
||||
|
|
|
@ -38,6 +38,16 @@ public class MetricsHBaseServerSourceImpl extends BaseSourceImpl
|
|||
private final MutableCounterLong authenticationFailures;
|
||||
private final MutableCounterLong sentBytes;
|
||||
private final MutableCounterLong receivedBytes;
|
||||
|
||||
private final MutableCounterLong exceptions;
|
||||
private final MutableCounterLong exceptionsOOO;
|
||||
private final MutableCounterLong exceptionsBusy;
|
||||
private final MutableCounterLong exceptionsUnknown;
|
||||
private final MutableCounterLong exceptionsSanity;
|
||||
private final MutableCounterLong exceptionsNSRE;
|
||||
private final MutableCounterLong exceptionsMoved;
|
||||
|
||||
|
||||
private MutableHistogram queueCallTime;
|
||||
private MutableHistogram processCallTime;
|
||||
private MutableHistogram totalCallTime;
|
||||
|
@ -51,18 +61,32 @@ public class MetricsHBaseServerSourceImpl extends BaseSourceImpl
|
|||
this.wrapper = wrapper;
|
||||
|
||||
this.authorizationSuccesses = this.getMetricsRegistry().newCounter(AUTHORIZATION_SUCCESSES_NAME,
|
||||
AUTHORIZATION_SUCCESSES_DESC, 0l);
|
||||
AUTHORIZATION_SUCCESSES_DESC, 0L);
|
||||
this.authorizationFailures = this.getMetricsRegistry().newCounter(AUTHORIZATION_FAILURES_NAME,
|
||||
AUTHORIZATION_FAILURES_DESC, 0l);
|
||||
AUTHORIZATION_FAILURES_DESC, 0L);
|
||||
|
||||
this.exceptions = this.getMetricsRegistry().newCounter(EXCEPTIONS_NAME, EXCEPTIONS_DESC, 0L);
|
||||
this.exceptionsOOO = this.getMetricsRegistry()
|
||||
.newCounter(EXCEPTIONS_OOO_NAME, EXCEPTIONS_TYPE_DESC, 0L);
|
||||
this.exceptionsBusy = this.getMetricsRegistry()
|
||||
.newCounter(EXCEPTIONS_BUSY_NAME, EXCEPTIONS_TYPE_DESC, 0L);
|
||||
this.exceptionsUnknown = this.getMetricsRegistry()
|
||||
.newCounter(EXCEPTIONS_UNKNOWN_NAME, EXCEPTIONS_TYPE_DESC, 0L);
|
||||
this.exceptionsSanity = this.getMetricsRegistry()
|
||||
.newCounter(EXCEPTIONS_SANITY_NAME, EXCEPTIONS_TYPE_DESC, 0L);
|
||||
this.exceptionsMoved = this.getMetricsRegistry()
|
||||
.newCounter(EXCEPTIONS_MOVED_NAME, EXCEPTIONS_TYPE_DESC, 0L);
|
||||
this.exceptionsNSRE = this.getMetricsRegistry()
|
||||
.newCounter(EXCEPTIONS_NSRE_NAME, EXCEPTIONS_TYPE_DESC, 0L);
|
||||
|
||||
this.authenticationSuccesses = this.getMetricsRegistry().newCounter(
|
||||
AUTHENTICATION_SUCCESSES_NAME, AUTHENTICATION_SUCCESSES_DESC, 0l);
|
||||
AUTHENTICATION_SUCCESSES_NAME, AUTHENTICATION_SUCCESSES_DESC, 0L);
|
||||
this.authenticationFailures = this.getMetricsRegistry().newCounter(AUTHENTICATION_FAILURES_NAME,
|
||||
AUTHENTICATION_FAILURES_DESC, 0l);
|
||||
AUTHENTICATION_FAILURES_DESC, 0L);
|
||||
this.sentBytes = this.getMetricsRegistry().newCounter(SENT_BYTES_NAME,
|
||||
SENT_BYTES_DESC, 0l);
|
||||
SENT_BYTES_DESC, 0L);
|
||||
this.receivedBytes = this.getMetricsRegistry().newCounter(RECEIVED_BYTES_NAME,
|
||||
RECEIVED_BYTES_DESC, 0l);
|
||||
RECEIVED_BYTES_DESC, 0L);
|
||||
this.queueCallTime = this.getMetricsRegistry().newHistogram(QUEUE_CALL_TIME_NAME,
|
||||
QUEUE_CALL_TIME_DESC);
|
||||
this.processCallTime = this.getMetricsRegistry().newHistogram(PROCESS_CALL_TIME_NAME,
|
||||
|
@ -86,6 +110,41 @@ public class MetricsHBaseServerSourceImpl extends BaseSourceImpl
|
|||
authenticationFailures.incr();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void exception() {
|
||||
exceptions.incr();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void outOfOrderException() {
|
||||
exceptionsOOO.incr();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void failedSanityException() {
|
||||
exceptionsSanity.incr();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void movedRegionException() {
|
||||
exceptionsMoved.incr();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void notServingRegionException() {
|
||||
exceptionsNSRE.incr();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void unknownScannerException() {
|
||||
exceptionsUnknown.incr();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tooBusyException() {
|
||||
exceptionsBusy.incr();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void authenticationSuccess() {
|
||||
authenticationSuccesses.incr();
|
||||
|
|
|
@ -19,8 +19,14 @@
|
|||
|
||||
package org.apache.hadoop.hbase.ipc;
|
||||
|
||||
import org.apache.hadoop.hbase.NotServingRegionException;
|
||||
import org.apache.hadoop.hbase.RegionTooBusyException;
|
||||
import org.apache.hadoop.hbase.UnknownScannerException;
|
||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
|
||||
import org.apache.hadoop.hbase.exceptions.FailedSanityCheckException;
|
||||
import org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException;
|
||||
import org.apache.hadoop.hbase.exceptions.RegionMovedException;
|
||||
|
||||
@InterfaceAudience.Private
|
||||
public class MetricsHBaseServer {
|
||||
|
@ -67,6 +73,34 @@ public class MetricsHBaseServer {
|
|||
source.queuedAndProcessedCall(totalTime);
|
||||
}
|
||||
|
||||
public void exception(Throwable throwable) {
|
||||
source.exception();
|
||||
|
||||
/**
|
||||
* Keep some metrics for commonly seen exceptions
|
||||
*
|
||||
* Try and put the most common types first.
|
||||
* Place child types before the parent type that they extend.
|
||||
*
|
||||
* If this gets much larger we might have to go to a hashmap
|
||||
*/
|
||||
if (throwable != null) {
|
||||
if (throwable instanceof OutOfOrderScannerNextException) {
|
||||
source.outOfOrderException();
|
||||
} else if (throwable instanceof RegionTooBusyException) {
|
||||
source.tooBusyException();
|
||||
} else if (throwable instanceof UnknownScannerException) {
|
||||
source.unknownScannerException();
|
||||
} else if (throwable instanceof RegionMovedException) {
|
||||
source.movedRegionException();
|
||||
} else if (throwable instanceof NotServingRegionException) {
|
||||
source.notServingRegionException();
|
||||
} else if (throwable instanceof FailedSanityCheckException) {
|
||||
source.failedSanityException();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public MetricsHBaseServerSource getMetricsSource() {
|
||||
return source;
|
||||
}
|
||||
|
|
|
@ -158,6 +158,8 @@ import com.google.protobuf.TextFormat;
|
|||
@InterfaceStability.Evolving
|
||||
public class RpcServer implements RpcServerInterface {
|
||||
public static final Log LOG = LogFactory.getLog(RpcServer.class);
|
||||
private static final CallQueueTooBigException CALL_QUEUE_TOO_BIG_EXCEPTION
|
||||
= new CallQueueTooBigException();
|
||||
|
||||
private final boolean authorize;
|
||||
private boolean isSecurityEnabled;
|
||||
|
@ -1465,6 +1467,7 @@ public class RpcServer implements RpcServerInterface {
|
|||
saslServer.dispose();
|
||||
saslServer = null;
|
||||
} catch (SaslException ignored) {
|
||||
// Ignored. This is being disposed of anyway.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1787,7 +1790,8 @@ public class RpcServer implements RpcServerInterface {
|
|||
new Call(id, this.service, null, null, null, null, this,
|
||||
responder, totalRequestSize, null, null);
|
||||
ByteArrayOutputStream responseBuffer = new ByteArrayOutputStream();
|
||||
setupResponse(responseBuffer, callTooBig, new CallQueueTooBigException(),
|
||||
metrics.exception(CALL_QUEUE_TOO_BIG_EXCEPTION);
|
||||
setupResponse(responseBuffer, callTooBig, CALL_QUEUE_TOO_BIG_EXCEPTION,
|
||||
"Call queue is full on " + getListenerAddress() +
|
||||
", is hbase.ipc.server.max.callqueue.size too small?");
|
||||
responder.doRespond(callTooBig);
|
||||
|
@ -1819,6 +1823,8 @@ public class RpcServer implements RpcServerInterface {
|
|||
getHostAddress();
|
||||
LOG.warn(msg, t);
|
||||
|
||||
metrics.exception(t);
|
||||
|
||||
// probably the hbase hadoop version does not match the running hadoop version
|
||||
if (t instanceof LinkageError) {
|
||||
t = new DoNotRetryIOException(t);
|
||||
|
@ -2140,6 +2146,10 @@ public class RpcServer implements RpcServerInterface {
|
|||
// putting it on the wire. Its needed to adhere to the pb Service Interface but we don't
|
||||
// need to pass it over the wire.
|
||||
if (e instanceof ServiceException) e = e.getCause();
|
||||
|
||||
// increment the number of requests that were exceptions.
|
||||
metrics.exception(e);
|
||||
|
||||
if (e instanceof LinkageError) throw new DoNotRetryIOException(e);
|
||||
if (e instanceof IOException) throw (IOException)e;
|
||||
LOG.error("Unexpected throwable object ", e);
|
||||
|
|
|
@ -553,6 +553,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
|
|||
.setName(result.getClass().getName())
|
||||
.setValue(result.toByteString())));
|
||||
} catch (IOException ioe) {
|
||||
rpcServer.getMetrics().exception(ioe);
|
||||
resultOrExceptionBuilder.setException(ResponseConverter.buildException(ioe));
|
||||
}
|
||||
} else if (action.hasMutation()) {
|
||||
|
@ -602,6 +603,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
|
|||
// case the corresponding ResultOrException instance for the Put or Delete will be added
|
||||
// down in the doBatchOp method call rather than up here.
|
||||
} catch (IOException ie) {
|
||||
rpcServer.getMetrics().exception(ie);
|
||||
resultOrExceptionBuilder = ResultOrException.newBuilder().
|
||||
setException(ResponseConverter.buildException(ie));
|
||||
}
|
||||
|
@ -1902,6 +1904,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
|
|||
region = getRegion(regionAction.getRegion());
|
||||
quota = getQuotaManager().checkQuota(region, regionAction.getActionList());
|
||||
} catch (IOException e) {
|
||||
rpcServer.getMetrics().exception(e);
|
||||
regionActionResultBuilder.setException(ResponseConverter.buildException(e));
|
||||
responseBuilder.addRegionActionResult(regionActionResultBuilder.build());
|
||||
continue; // For this region it's a failure.
|
||||
|
@ -1932,6 +1935,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
|
|||
processed = Boolean.TRUE;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
rpcServer.getMetrics().exception(e);
|
||||
// As it's atomic, we may expect it's a global failure.
|
||||
regionActionResultBuilder.setException(ResponseConverter.buildException(e));
|
||||
}
|
||||
|
|
|
@ -20,6 +20,11 @@
|
|||
package org.apache.hadoop.hbase.ipc;
|
||||
|
||||
import org.apache.hadoop.hbase.CompatibilityFactory;
|
||||
import org.apache.hadoop.hbase.NotServingRegionException;
|
||||
import org.apache.hadoop.hbase.RegionTooBusyException;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException;
|
||||
import org.apache.hadoop.hbase.exceptions.RegionMovedException;
|
||||
import org.apache.hadoop.hbase.testclassification.RPCTests;
|
||||
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||
import org.apache.hadoop.hbase.test.MetricsAssertHelper;
|
||||
|
@ -113,6 +118,19 @@ public class TestRpcMetrics {
|
|||
|
||||
HELPER.assertCounter("sentBytes", 309, serverSource);
|
||||
HELPER.assertCounter("receivedBytes", 208, serverSource);
|
||||
|
||||
mrpc.exception(null);
|
||||
HELPER.assertCounter("exceptions", 1, serverSource);
|
||||
|
||||
mrpc.exception(new RegionMovedException(ServerName.parseServerName("localhost:60020"), 100));
|
||||
mrpc.exception(new RegionTooBusyException());
|
||||
mrpc.exception(new OutOfOrderScannerNextException());
|
||||
mrpc.exception(new NotServingRegionException());
|
||||
HELPER.assertCounter("exceptions.RegionMovedException", 1, serverSource);
|
||||
HELPER.assertCounter("exceptions.RegionTooBusyException", 1, serverSource);
|
||||
HELPER.assertCounter("exceptions.OutOfOrderScannerNextException", 1, serverSource);
|
||||
HELPER.assertCounter("exceptions.NotServingRegionException", 1, serverSource);
|
||||
HELPER.assertCounter("exceptions", 5, serverSource);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue