HBASE-13477 Create metrics on failed requests

Summary: Add metrics on how many requests are exceptions and what type.

Test Plan: behold unit tests.

Differential Revision: https://reviews.facebook.net/D37167
This commit is contained in:
Elliott Clark 2015-04-15 13:49:55 -07:00
parent 3ccae37866
commit 2c5dc53a32
7 changed files with 160 additions and 13 deletions

View File

@ -58,6 +58,16 @@ public interface MetricsHBaseServerSource extends BaseSource {
String NUM_ACTIVE_HANDLER_NAME = "numActiveHandler"; String NUM_ACTIVE_HANDLER_NAME = "numActiveHandler";
String NUM_ACTIVE_HANDLER_DESC = "Number of active rpc handlers."; String NUM_ACTIVE_HANDLER_DESC = "Number of active rpc handlers.";
String EXCEPTIONS_NAME="exceptions";
String EXCEPTIONS_DESC="Exceptions caused by requests";
String EXCEPTIONS_TYPE_DESC="Number of requests that resulted in the specified type of Exception";
String EXCEPTIONS_OOO_NAME="exceptions.OutOfOrderScannerNextException";
String EXCEPTIONS_BUSY_NAME="exceptions.RegionTooBusyException";
String EXCEPTIONS_UNKNOWN_NAME="exceptions.UnknownScannerException";
String EXCEPTIONS_SANITY_NAME="exceptions.FailedSanityCheckException";
String EXCEPTIONS_MOVED_NAME="exceptions.RegionMovedException";
String EXCEPTIONS_NSRE_NAME="exceptions.NotServingRegionException";
void authorizationSuccess(); void authorizationSuccess();
void authorizationFailure(); void authorizationFailure();
@ -66,6 +76,18 @@ public interface MetricsHBaseServerSource extends BaseSource {
void authenticationFailure(); void authenticationFailure();
void exception();
/**
* Different types of exceptions
*/
void outOfOrderException();
void failedSanityException();
void movedRegionException();
void notServingRegionException();
void unknownScannerException();
void tooBusyException();
void sentBytes(long count); void sentBytes(long count);
void receivedBytes(int count); void receivedBytes(int count);

View File

@ -25,7 +25,7 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience;
@InterfaceAudience.Private @InterfaceAudience.Private
public class MetricsHBaseServerSourceFactoryImpl extends MetricsHBaseServerSourceFactory { public class MetricsHBaseServerSourceFactoryImpl extends MetricsHBaseServerSourceFactory {
private static enum SourceStorage { private enum SourceStorage {
INSTANCE; INSTANCE;
HashMap<String, MetricsHBaseServerSource> HashMap<String, MetricsHBaseServerSource>
sources = sources =
@ -39,7 +39,7 @@ public class MetricsHBaseServerSourceFactoryImpl extends MetricsHBaseServerSourc
} }
private static synchronized MetricsHBaseServerSource getSource(String serverName, private static synchronized MetricsHBaseServerSource getSource(String serverName,
MetricsHBaseServerWrapper wrapper) { MetricsHBaseServerWrapper wrap) {
String context = createContextName(serverName); String context = createContextName(serverName);
MetricsHBaseServerSource source = SourceStorage.INSTANCE.sources.get(context); MetricsHBaseServerSource source = SourceStorage.INSTANCE.sources.get(context);
@ -49,7 +49,7 @@ public class MetricsHBaseServerSourceFactoryImpl extends MetricsHBaseServerSourc
context, context,
METRICS_DESCRIPTION, METRICS_DESCRIPTION,
context.toLowerCase(), context.toLowerCase(),
context + METRICS_JMX_CONTEXT_SUFFIX, wrapper); context + METRICS_JMX_CONTEXT_SUFFIX, wrap);
//Store back in storage //Store back in storage
SourceStorage.INSTANCE.sources.put(context, source); SourceStorage.INSTANCE.sources.put(context, source);

View File

@ -38,6 +38,16 @@ public class MetricsHBaseServerSourceImpl extends BaseSourceImpl
private final MutableCounterLong authenticationFailures; private final MutableCounterLong authenticationFailures;
private final MutableCounterLong sentBytes; private final MutableCounterLong sentBytes;
private final MutableCounterLong receivedBytes; private final MutableCounterLong receivedBytes;
private final MutableCounterLong exceptions;
private final MutableCounterLong exceptionsOOO;
private final MutableCounterLong exceptionsBusy;
private final MutableCounterLong exceptionsUnknown;
private final MutableCounterLong exceptionsSanity;
private final MutableCounterLong exceptionsNSRE;
private final MutableCounterLong exceptionsMoved;
private MutableHistogram queueCallTime; private MutableHistogram queueCallTime;
private MutableHistogram processCallTime; private MutableHistogram processCallTime;
private MutableHistogram totalCallTime; private MutableHistogram totalCallTime;
@ -51,18 +61,32 @@ public class MetricsHBaseServerSourceImpl extends BaseSourceImpl
this.wrapper = wrapper; this.wrapper = wrapper;
this.authorizationSuccesses = this.getMetricsRegistry().newCounter(AUTHORIZATION_SUCCESSES_NAME, this.authorizationSuccesses = this.getMetricsRegistry().newCounter(AUTHORIZATION_SUCCESSES_NAME,
AUTHORIZATION_SUCCESSES_DESC, 0l); AUTHORIZATION_SUCCESSES_DESC, 0L);
this.authorizationFailures = this.getMetricsRegistry().newCounter(AUTHORIZATION_FAILURES_NAME, this.authorizationFailures = this.getMetricsRegistry().newCounter(AUTHORIZATION_FAILURES_NAME,
AUTHORIZATION_FAILURES_DESC, 0l); AUTHORIZATION_FAILURES_DESC, 0L);
this.exceptions = this.getMetricsRegistry().newCounter(EXCEPTIONS_NAME, EXCEPTIONS_DESC, 0L);
this.exceptionsOOO = this.getMetricsRegistry()
.newCounter(EXCEPTIONS_OOO_NAME, EXCEPTIONS_TYPE_DESC, 0L);
this.exceptionsBusy = this.getMetricsRegistry()
.newCounter(EXCEPTIONS_BUSY_NAME, EXCEPTIONS_TYPE_DESC, 0L);
this.exceptionsUnknown = this.getMetricsRegistry()
.newCounter(EXCEPTIONS_UNKNOWN_NAME, EXCEPTIONS_TYPE_DESC, 0L);
this.exceptionsSanity = this.getMetricsRegistry()
.newCounter(EXCEPTIONS_SANITY_NAME, EXCEPTIONS_TYPE_DESC, 0L);
this.exceptionsMoved = this.getMetricsRegistry()
.newCounter(EXCEPTIONS_MOVED_NAME, EXCEPTIONS_TYPE_DESC, 0L);
this.exceptionsNSRE = this.getMetricsRegistry()
.newCounter(EXCEPTIONS_NSRE_NAME, EXCEPTIONS_TYPE_DESC, 0L);
this.authenticationSuccesses = this.getMetricsRegistry().newCounter( this.authenticationSuccesses = this.getMetricsRegistry().newCounter(
AUTHENTICATION_SUCCESSES_NAME, AUTHENTICATION_SUCCESSES_DESC, 0l); AUTHENTICATION_SUCCESSES_NAME, AUTHENTICATION_SUCCESSES_DESC, 0L);
this.authenticationFailures = this.getMetricsRegistry().newCounter(AUTHENTICATION_FAILURES_NAME, this.authenticationFailures = this.getMetricsRegistry().newCounter(AUTHENTICATION_FAILURES_NAME,
AUTHENTICATION_FAILURES_DESC, 0l); AUTHENTICATION_FAILURES_DESC, 0L);
this.sentBytes = this.getMetricsRegistry().newCounter(SENT_BYTES_NAME, this.sentBytes = this.getMetricsRegistry().newCounter(SENT_BYTES_NAME,
SENT_BYTES_DESC, 0l); SENT_BYTES_DESC, 0L);
this.receivedBytes = this.getMetricsRegistry().newCounter(RECEIVED_BYTES_NAME, this.receivedBytes = this.getMetricsRegistry().newCounter(RECEIVED_BYTES_NAME,
RECEIVED_BYTES_DESC, 0l); RECEIVED_BYTES_DESC, 0L);
this.queueCallTime = this.getMetricsRegistry().newHistogram(QUEUE_CALL_TIME_NAME, this.queueCallTime = this.getMetricsRegistry().newHistogram(QUEUE_CALL_TIME_NAME,
QUEUE_CALL_TIME_DESC); QUEUE_CALL_TIME_DESC);
this.processCallTime = this.getMetricsRegistry().newHistogram(PROCESS_CALL_TIME_NAME, this.processCallTime = this.getMetricsRegistry().newHistogram(PROCESS_CALL_TIME_NAME,
@ -86,6 +110,41 @@ public class MetricsHBaseServerSourceImpl extends BaseSourceImpl
authenticationFailures.incr(); authenticationFailures.incr();
} }
@Override
public void exception() {
exceptions.incr();
}
@Override
public void outOfOrderException() {
exceptionsOOO.incr();
}
@Override
public void failedSanityException() {
exceptionsSanity.incr();
}
@Override
public void movedRegionException() {
exceptionsMoved.incr();
}
@Override
public void notServingRegionException() {
exceptionsNSRE.incr();
}
@Override
public void unknownScannerException() {
exceptionsUnknown.incr();
}
@Override
public void tooBusyException() {
exceptionsBusy.incr();
}
@Override @Override
public void authenticationSuccess() { public void authenticationSuccess() {
authenticationSuccesses.incr(); authenticationSuccesses.incr();

View File

@ -19,8 +19,14 @@
package org.apache.hadoop.hbase.ipc; package org.apache.hadoop.hbase.ipc;
import org.apache.hadoop.hbase.NotServingRegionException;
import org.apache.hadoop.hbase.RegionTooBusyException;
import org.apache.hadoop.hbase.UnknownScannerException;
import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.CompatibilitySingletonFactory; import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
import org.apache.hadoop.hbase.exceptions.FailedSanityCheckException;
import org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException;
import org.apache.hadoop.hbase.exceptions.RegionMovedException;
@InterfaceAudience.Private @InterfaceAudience.Private
public class MetricsHBaseServer { public class MetricsHBaseServer {
@ -67,6 +73,34 @@ public class MetricsHBaseServer {
source.queuedAndProcessedCall(totalTime); source.queuedAndProcessedCall(totalTime);
} }
public void exception(Throwable throwable) {
source.exception();
/**
* Keep some metrics for commonly seen exceptions
*
* Try and put the most common types first.
* Place child types before the parent type that they extend.
*
* If this gets much larger we might have to go to a hashmap
*/
if (throwable != null) {
if (throwable instanceof OutOfOrderScannerNextException) {
source.outOfOrderException();
} else if (throwable instanceof RegionTooBusyException) {
source.tooBusyException();
} else if (throwable instanceof UnknownScannerException) {
source.unknownScannerException();
} else if (throwable instanceof RegionMovedException) {
source.movedRegionException();
} else if (throwable instanceof NotServingRegionException) {
source.notServingRegionException();
} else if (throwable instanceof FailedSanityCheckException) {
source.failedSanityException();
}
}
}
public MetricsHBaseServerSource getMetricsSource() { public MetricsHBaseServerSource getMetricsSource() {
return source; return source;
} }

View File

@ -158,6 +158,8 @@ import com.google.protobuf.TextFormat;
@InterfaceStability.Evolving @InterfaceStability.Evolving
public class RpcServer implements RpcServerInterface { public class RpcServer implements RpcServerInterface {
public static final Log LOG = LogFactory.getLog(RpcServer.class); public static final Log LOG = LogFactory.getLog(RpcServer.class);
private static final CallQueueTooBigException CALL_QUEUE_TOO_BIG_EXCEPTION
= new CallQueueTooBigException();
private final boolean authorize; private final boolean authorize;
private boolean isSecurityEnabled; private boolean isSecurityEnabled;
@ -1465,6 +1467,7 @@ public class RpcServer implements RpcServerInterface {
saslServer.dispose(); saslServer.dispose();
saslServer = null; saslServer = null;
} catch (SaslException ignored) { } catch (SaslException ignored) {
// Ignored. This is being disposed of anyway.
} }
} }
} }
@ -1542,7 +1545,7 @@ public class RpcServer implements RpcServerInterface {
// Else it will be length of the data to read (or -1 if a ping). We catch the integer // Else it will be length of the data to read (or -1 if a ping). We catch the integer
// length into the 4-byte this.dataLengthBuffer. // length into the 4-byte this.dataLengthBuffer.
int count = read4Bytes(); int count = read4Bytes();
if (count < 0 || dataLengthBuffer.remaining() > 0 ) { if (count < 0 || dataLengthBuffer.remaining() > 0) {
return count; return count;
} }
@ -1787,7 +1790,8 @@ public class RpcServer implements RpcServerInterface {
new Call(id, this.service, null, null, null, null, this, new Call(id, this.service, null, null, null, null, this,
responder, totalRequestSize, null, null); responder, totalRequestSize, null, null);
ByteArrayOutputStream responseBuffer = new ByteArrayOutputStream(); ByteArrayOutputStream responseBuffer = new ByteArrayOutputStream();
setupResponse(responseBuffer, callTooBig, new CallQueueTooBigException(), metrics.exception(CALL_QUEUE_TOO_BIG_EXCEPTION);
setupResponse(responseBuffer, callTooBig, CALL_QUEUE_TOO_BIG_EXCEPTION,
"Call queue is full on " + getListenerAddress() + "Call queue is full on " + getListenerAddress() +
", is hbase.ipc.server.max.callqueue.size too small?"); ", is hbase.ipc.server.max.callqueue.size too small?");
responder.doRespond(callTooBig); responder.doRespond(callTooBig);
@ -1819,6 +1823,8 @@ public class RpcServer implements RpcServerInterface {
getHostAddress(); getHostAddress();
LOG.warn(msg, t); LOG.warn(msg, t);
metrics.exception(t);
// probably the hbase hadoop version does not match the running hadoop version // probably the hbase hadoop version does not match the running hadoop version
if (t instanceof LinkageError) { if (t instanceof LinkageError) {
t = new DoNotRetryIOException(t); t = new DoNotRetryIOException(t);
@ -2140,6 +2146,10 @@ public class RpcServer implements RpcServerInterface {
// putting it on the wire. Its needed to adhere to the pb Service Interface but we don't // putting it on the wire. Its needed to adhere to the pb Service Interface but we don't
// need to pass it over the wire. // need to pass it over the wire.
if (e instanceof ServiceException) e = e.getCause(); if (e instanceof ServiceException) e = e.getCause();
// increment the number of requests that were exceptions.
metrics.exception(e);
if (e instanceof LinkageError) throw new DoNotRetryIOException(e); if (e instanceof LinkageError) throw new DoNotRetryIOException(e);
if (e instanceof IOException) throw (IOException)e; if (e instanceof IOException) throw (IOException)e;
LOG.error("Unexpected throwable object ", e); LOG.error("Unexpected throwable object ", e);

View File

@ -553,6 +553,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
.setName(result.getClass().getName()) .setName(result.getClass().getName())
.setValue(result.toByteString()))); .setValue(result.toByteString())));
} catch (IOException ioe) { } catch (IOException ioe) {
rpcServer.getMetrics().exception(ioe);
resultOrExceptionBuilder.setException(ResponseConverter.buildException(ioe)); resultOrExceptionBuilder.setException(ResponseConverter.buildException(ioe));
} }
} else if (action.hasMutation()) { } else if (action.hasMutation()) {
@ -602,6 +603,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
// case the corresponding ResultOrException instance for the Put or Delete will be added // case the corresponding ResultOrException instance for the Put or Delete will be added
// down in the doBatchOp method call rather than up here. // down in the doBatchOp method call rather than up here.
} catch (IOException ie) { } catch (IOException ie) {
rpcServer.getMetrics().exception(ie);
resultOrExceptionBuilder = ResultOrException.newBuilder(). resultOrExceptionBuilder = ResultOrException.newBuilder().
setException(ResponseConverter.buildException(ie)); setException(ResponseConverter.buildException(ie));
} }
@ -1902,6 +1904,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
region = getRegion(regionAction.getRegion()); region = getRegion(regionAction.getRegion());
quota = getQuotaManager().checkQuota(region, regionAction.getActionList()); quota = getQuotaManager().checkQuota(region, regionAction.getActionList());
} catch (IOException e) { } catch (IOException e) {
rpcServer.getMetrics().exception(e);
regionActionResultBuilder.setException(ResponseConverter.buildException(e)); regionActionResultBuilder.setException(ResponseConverter.buildException(e));
responseBuilder.addRegionActionResult(regionActionResultBuilder.build()); responseBuilder.addRegionActionResult(regionActionResultBuilder.build());
continue; // For this region it's a failure. continue; // For this region it's a failure.
@ -1932,6 +1935,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
processed = Boolean.TRUE; processed = Boolean.TRUE;
} }
} catch (IOException e) { } catch (IOException e) {
rpcServer.getMetrics().exception(e);
// As it's atomic, we may expect it's a global failure. // As it's atomic, we may expect it's a global failure.
regionActionResultBuilder.setException(ResponseConverter.buildException(e)); regionActionResultBuilder.setException(ResponseConverter.buildException(e));
} }

View File

@ -20,6 +20,11 @@
package org.apache.hadoop.hbase.ipc; package org.apache.hadoop.hbase.ipc;
import org.apache.hadoop.hbase.CompatibilityFactory; import org.apache.hadoop.hbase.CompatibilityFactory;
import org.apache.hadoop.hbase.NotServingRegionException;
import org.apache.hadoop.hbase.RegionTooBusyException;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException;
import org.apache.hadoop.hbase.exceptions.RegionMovedException;
import org.apache.hadoop.hbase.testclassification.RPCTests; import org.apache.hadoop.hbase.testclassification.RPCTests;
import org.apache.hadoop.hbase.testclassification.SmallTests; import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.test.MetricsAssertHelper; import org.apache.hadoop.hbase.test.MetricsAssertHelper;
@ -113,6 +118,19 @@ public class TestRpcMetrics {
HELPER.assertCounter("sentBytes", 309, serverSource); HELPER.assertCounter("sentBytes", 309, serverSource);
HELPER.assertCounter("receivedBytes", 208, serverSource); HELPER.assertCounter("receivedBytes", 208, serverSource);
mrpc.exception(null);
HELPER.assertCounter("exceptions", 1, serverSource);
mrpc.exception(new RegionMovedException(ServerName.parseServerName("localhost:60020"), 100));
mrpc.exception(new RegionTooBusyException());
mrpc.exception(new OutOfOrderScannerNextException());
mrpc.exception(new NotServingRegionException());
HELPER.assertCounter("exceptions.RegionMovedException", 1, serverSource);
HELPER.assertCounter("exceptions.RegionTooBusyException", 1, serverSource);
HELPER.assertCounter("exceptions.OutOfOrderScannerNextException", 1, serverSource);
HELPER.assertCounter("exceptions.NotServingRegionException", 1, serverSource);
HELPER.assertCounter("exceptions", 5, serverSource);
} }
} }