HBASE-13477 Create metrics on failed requests

Summary: Add metrics on how many requests are exceptions and what type.

Test Plan: behold unit tests.

Differential Revision: https://reviews.facebook.net/D37167
This commit is contained in:
Elliott Clark 2015-04-15 13:49:55 -07:00
parent 3ccae37866
commit 2c5dc53a32
7 changed files with 160 additions and 13 deletions

View File

@ -58,6 +58,16 @@ public interface MetricsHBaseServerSource extends BaseSource {
String NUM_ACTIVE_HANDLER_NAME = "numActiveHandler";
String NUM_ACTIVE_HANDLER_DESC = "Number of active rpc handlers.";
String EXCEPTIONS_NAME="exceptions";
String EXCEPTIONS_DESC="Exceptions caused by requests";
String EXCEPTIONS_TYPE_DESC="Number of requests that resulted in the specified type of Exception";
String EXCEPTIONS_OOO_NAME="exceptions.OutOfOrderScannerNextException";
String EXCEPTIONS_BUSY_NAME="exceptions.RegionTooBusyException";
String EXCEPTIONS_UNKNOWN_NAME="exceptions.UnknownScannerException";
String EXCEPTIONS_SANITY_NAME="exceptions.FailedSanityCheckException";
String EXCEPTIONS_MOVED_NAME="exceptions.RegionMovedException";
String EXCEPTIONS_NSRE_NAME="exceptions.NotServingRegionException";
void authorizationSuccess();
void authorizationFailure();
@ -66,6 +76,18 @@ public interface MetricsHBaseServerSource extends BaseSource {
void authenticationFailure();
void exception();
/**
* Different types of exceptions
*/
void outOfOrderException();
void failedSanityException();
void movedRegionException();
void notServingRegionException();
void unknownScannerException();
void tooBusyException();
void sentBytes(long count);
void receivedBytes(int count);

View File

@ -25,7 +25,7 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience;
@InterfaceAudience.Private
public class MetricsHBaseServerSourceFactoryImpl extends MetricsHBaseServerSourceFactory {
private static enum SourceStorage {
private enum SourceStorage {
INSTANCE;
HashMap<String, MetricsHBaseServerSource>
sources =
@ -39,7 +39,7 @@ public class MetricsHBaseServerSourceFactoryImpl extends MetricsHBaseServerSourc
}
private static synchronized MetricsHBaseServerSource getSource(String serverName,
MetricsHBaseServerWrapper wrapper) {
MetricsHBaseServerWrapper wrap) {
String context = createContextName(serverName);
MetricsHBaseServerSource source = SourceStorage.INSTANCE.sources.get(context);
@ -49,7 +49,7 @@ public class MetricsHBaseServerSourceFactoryImpl extends MetricsHBaseServerSourc
context,
METRICS_DESCRIPTION,
context.toLowerCase(),
context + METRICS_JMX_CONTEXT_SUFFIX, wrapper);
context + METRICS_JMX_CONTEXT_SUFFIX, wrap);
//Store back in storage
SourceStorage.INSTANCE.sources.put(context, source);

View File

@ -38,6 +38,16 @@ public class MetricsHBaseServerSourceImpl extends BaseSourceImpl
private final MutableCounterLong authenticationFailures;
private final MutableCounterLong sentBytes;
private final MutableCounterLong receivedBytes;
private final MutableCounterLong exceptions;
private final MutableCounterLong exceptionsOOO;
private final MutableCounterLong exceptionsBusy;
private final MutableCounterLong exceptionsUnknown;
private final MutableCounterLong exceptionsSanity;
private final MutableCounterLong exceptionsNSRE;
private final MutableCounterLong exceptionsMoved;
private MutableHistogram queueCallTime;
private MutableHistogram processCallTime;
private MutableHistogram totalCallTime;
@ -51,18 +61,32 @@ public class MetricsHBaseServerSourceImpl extends BaseSourceImpl
this.wrapper = wrapper;
this.authorizationSuccesses = this.getMetricsRegistry().newCounter(AUTHORIZATION_SUCCESSES_NAME,
AUTHORIZATION_SUCCESSES_DESC, 0l);
AUTHORIZATION_SUCCESSES_DESC, 0L);
this.authorizationFailures = this.getMetricsRegistry().newCounter(AUTHORIZATION_FAILURES_NAME,
AUTHORIZATION_FAILURES_DESC, 0l);
AUTHORIZATION_FAILURES_DESC, 0L);
this.exceptions = this.getMetricsRegistry().newCounter(EXCEPTIONS_NAME, EXCEPTIONS_DESC, 0L);
this.exceptionsOOO = this.getMetricsRegistry()
.newCounter(EXCEPTIONS_OOO_NAME, EXCEPTIONS_TYPE_DESC, 0L);
this.exceptionsBusy = this.getMetricsRegistry()
.newCounter(EXCEPTIONS_BUSY_NAME, EXCEPTIONS_TYPE_DESC, 0L);
this.exceptionsUnknown = this.getMetricsRegistry()
.newCounter(EXCEPTIONS_UNKNOWN_NAME, EXCEPTIONS_TYPE_DESC, 0L);
this.exceptionsSanity = this.getMetricsRegistry()
.newCounter(EXCEPTIONS_SANITY_NAME, EXCEPTIONS_TYPE_DESC, 0L);
this.exceptionsMoved = this.getMetricsRegistry()
.newCounter(EXCEPTIONS_MOVED_NAME, EXCEPTIONS_TYPE_DESC, 0L);
this.exceptionsNSRE = this.getMetricsRegistry()
.newCounter(EXCEPTIONS_NSRE_NAME, EXCEPTIONS_TYPE_DESC, 0L);
this.authenticationSuccesses = this.getMetricsRegistry().newCounter(
AUTHENTICATION_SUCCESSES_NAME, AUTHENTICATION_SUCCESSES_DESC, 0l);
AUTHENTICATION_SUCCESSES_NAME, AUTHENTICATION_SUCCESSES_DESC, 0L);
this.authenticationFailures = this.getMetricsRegistry().newCounter(AUTHENTICATION_FAILURES_NAME,
AUTHENTICATION_FAILURES_DESC, 0l);
AUTHENTICATION_FAILURES_DESC, 0L);
this.sentBytes = this.getMetricsRegistry().newCounter(SENT_BYTES_NAME,
SENT_BYTES_DESC, 0l);
SENT_BYTES_DESC, 0L);
this.receivedBytes = this.getMetricsRegistry().newCounter(RECEIVED_BYTES_NAME,
RECEIVED_BYTES_DESC, 0l);
RECEIVED_BYTES_DESC, 0L);
this.queueCallTime = this.getMetricsRegistry().newHistogram(QUEUE_CALL_TIME_NAME,
QUEUE_CALL_TIME_DESC);
this.processCallTime = this.getMetricsRegistry().newHistogram(PROCESS_CALL_TIME_NAME,
@ -86,6 +110,41 @@ public class MetricsHBaseServerSourceImpl extends BaseSourceImpl
authenticationFailures.incr();
}
@Override
public void exception() {
exceptions.incr();
}
@Override
public void outOfOrderException() {
exceptionsOOO.incr();
}
@Override
public void failedSanityException() {
exceptionsSanity.incr();
}
@Override
public void movedRegionException() {
exceptionsMoved.incr();
}
@Override
public void notServingRegionException() {
exceptionsNSRE.incr();
}
@Override
public void unknownScannerException() {
exceptionsUnknown.incr();
}
@Override
public void tooBusyException() {
exceptionsBusy.incr();
}
@Override
public void authenticationSuccess() {
authenticationSuccesses.incr();

View File

@ -19,8 +19,14 @@
package org.apache.hadoop.hbase.ipc;
import org.apache.hadoop.hbase.NotServingRegionException;
import org.apache.hadoop.hbase.RegionTooBusyException;
import org.apache.hadoop.hbase.UnknownScannerException;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
import org.apache.hadoop.hbase.exceptions.FailedSanityCheckException;
import org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException;
import org.apache.hadoop.hbase.exceptions.RegionMovedException;
@InterfaceAudience.Private
public class MetricsHBaseServer {
@ -67,6 +73,34 @@ public class MetricsHBaseServer {
source.queuedAndProcessedCall(totalTime);
}
public void exception(Throwable throwable) {
source.exception();
/**
* Keep some metrics for commonly seen exceptions
*
* Try and put the most common types first.
* Place child types before the parent type that they extend.
*
* If this gets much larger we might have to go to a hashmap
*/
if (throwable != null) {
if (throwable instanceof OutOfOrderScannerNextException) {
source.outOfOrderException();
} else if (throwable instanceof RegionTooBusyException) {
source.tooBusyException();
} else if (throwable instanceof UnknownScannerException) {
source.unknownScannerException();
} else if (throwable instanceof RegionMovedException) {
source.movedRegionException();
} else if (throwable instanceof NotServingRegionException) {
source.notServingRegionException();
} else if (throwable instanceof FailedSanityCheckException) {
source.failedSanityException();
}
}
}
public MetricsHBaseServerSource getMetricsSource() {
return source;
}

View File

@ -158,6 +158,8 @@ import com.google.protobuf.TextFormat;
@InterfaceStability.Evolving
public class RpcServer implements RpcServerInterface {
public static final Log LOG = LogFactory.getLog(RpcServer.class);
private static final CallQueueTooBigException CALL_QUEUE_TOO_BIG_EXCEPTION
= new CallQueueTooBigException();
private final boolean authorize;
private boolean isSecurityEnabled;
@ -1465,6 +1467,7 @@ public class RpcServer implements RpcServerInterface {
saslServer.dispose();
saslServer = null;
} catch (SaslException ignored) {
// Ignored. This is being disposed of anyway.
}
}
}
@ -1787,7 +1790,8 @@ public class RpcServer implements RpcServerInterface {
new Call(id, this.service, null, null, null, null, this,
responder, totalRequestSize, null, null);
ByteArrayOutputStream responseBuffer = new ByteArrayOutputStream();
setupResponse(responseBuffer, callTooBig, new CallQueueTooBigException(),
metrics.exception(CALL_QUEUE_TOO_BIG_EXCEPTION);
setupResponse(responseBuffer, callTooBig, CALL_QUEUE_TOO_BIG_EXCEPTION,
"Call queue is full on " + getListenerAddress() +
", is hbase.ipc.server.max.callqueue.size too small?");
responder.doRespond(callTooBig);
@ -1819,6 +1823,8 @@ public class RpcServer implements RpcServerInterface {
getHostAddress();
LOG.warn(msg, t);
metrics.exception(t);
// probably the hbase hadoop version does not match the running hadoop version
if (t instanceof LinkageError) {
t = new DoNotRetryIOException(t);
@ -2140,6 +2146,10 @@ public class RpcServer implements RpcServerInterface {
// putting it on the wire. Its needed to adhere to the pb Service Interface but we don't
// need to pass it over the wire.
if (e instanceof ServiceException) e = e.getCause();
// increment the number of requests that were exceptions.
metrics.exception(e);
if (e instanceof LinkageError) throw new DoNotRetryIOException(e);
if (e instanceof IOException) throw (IOException)e;
LOG.error("Unexpected throwable object ", e);

View File

@ -553,6 +553,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
.setName(result.getClass().getName())
.setValue(result.toByteString())));
} catch (IOException ioe) {
rpcServer.getMetrics().exception(ioe);
resultOrExceptionBuilder.setException(ResponseConverter.buildException(ioe));
}
} else if (action.hasMutation()) {
@ -602,6 +603,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
// case the corresponding ResultOrException instance for the Put or Delete will be added
// down in the doBatchOp method call rather than up here.
} catch (IOException ie) {
rpcServer.getMetrics().exception(ie);
resultOrExceptionBuilder = ResultOrException.newBuilder().
setException(ResponseConverter.buildException(ie));
}
@ -1902,6 +1904,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
region = getRegion(regionAction.getRegion());
quota = getQuotaManager().checkQuota(region, regionAction.getActionList());
} catch (IOException e) {
rpcServer.getMetrics().exception(e);
regionActionResultBuilder.setException(ResponseConverter.buildException(e));
responseBuilder.addRegionActionResult(regionActionResultBuilder.build());
continue; // For this region it's a failure.
@ -1932,6 +1935,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
processed = Boolean.TRUE;
}
} catch (IOException e) {
rpcServer.getMetrics().exception(e);
// As it's atomic, we may expect it's a global failure.
regionActionResultBuilder.setException(ResponseConverter.buildException(e));
}

View File

@ -20,6 +20,11 @@
package org.apache.hadoop.hbase.ipc;
import org.apache.hadoop.hbase.CompatibilityFactory;
import org.apache.hadoop.hbase.NotServingRegionException;
import org.apache.hadoop.hbase.RegionTooBusyException;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException;
import org.apache.hadoop.hbase.exceptions.RegionMovedException;
import org.apache.hadoop.hbase.testclassification.RPCTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.test.MetricsAssertHelper;
@ -113,6 +118,19 @@ public class TestRpcMetrics {
HELPER.assertCounter("sentBytes", 309, serverSource);
HELPER.assertCounter("receivedBytes", 208, serverSource);
mrpc.exception(null);
HELPER.assertCounter("exceptions", 1, serverSource);
mrpc.exception(new RegionMovedException(ServerName.parseServerName("localhost:60020"), 100));
mrpc.exception(new RegionTooBusyException());
mrpc.exception(new OutOfOrderScannerNextException());
mrpc.exception(new NotServingRegionException());
HELPER.assertCounter("exceptions.RegionMovedException", 1, serverSource);
HELPER.assertCounter("exceptions.RegionTooBusyException", 1, serverSource);
HELPER.assertCounter("exceptions.OutOfOrderScannerNextException", 1, serverSource);
HELPER.assertCounter("exceptions.NotServingRegionException", 1, serverSource);
HELPER.assertCounter("exceptions", 5, serverSource);
}
}