HBASE-15242: add client side metrics for timeout and remote exceptions. (#5023)
Signed-off-by: Andrew Purtell <apurtell@apache.org>
This commit is contained in:
parent
3f25f33bc3
commit
0747f0c934
|
@ -37,6 +37,7 @@ import java.util.function.Supplier;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.apache.hadoop.ipc.RemoteException;
|
||||||
import org.apache.yetus.audience.InterfaceAudience;
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
|
|
||||||
import org.apache.hbase.thirdparty.com.google.protobuf.Descriptors.MethodDescriptor;
|
import org.apache.hbase.thirdparty.com.google.protobuf.Descriptors.MethodDescriptor;
|
||||||
|
@ -118,6 +119,9 @@ public final class MetricsConnection implements StatisticTrackable {
|
||||||
|
|
||||||
private static final String CNT_BASE = "rpcCount_";
|
private static final String CNT_BASE = "rpcCount_";
|
||||||
private static final String FAILURE_CNT_BASE = "rpcFailureCount_";
|
private static final String FAILURE_CNT_BASE = "rpcFailureCount_";
|
||||||
|
private static final String TOTAL_EXCEPTION_CNT = "rpcTotalExceptions";
|
||||||
|
private static final String LOCAL_EXCEPTION_CNT_BASE = "rpcLocalExceptions_";
|
||||||
|
private static final String REMOTE_EXCEPTION_CNT_BASE = "rpcRemoteExceptions_";
|
||||||
private static final String DRTN_BASE = "rpcCallDurationMs_";
|
private static final String DRTN_BASE = "rpcCallDurationMs_";
|
||||||
private static final String REQ_BASE = "rpcCallRequestSizeBytes_";
|
private static final String REQ_BASE = "rpcCallRequestSizeBytes_";
|
||||||
private static final String RESP_BASE = "rpcCallResponseSizeBytes_";
|
private static final String RESP_BASE = "rpcCallResponseSizeBytes_";
|
||||||
|
@ -638,7 +642,7 @@ public final class MetricsConnection implements StatisticTrackable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Report RPC context to metrics system. */
|
/** Report RPC context to metrics system. */
|
||||||
public void updateRpc(MethodDescriptor method, Message param, CallStats stats, boolean failed) {
|
public void updateRpc(MethodDescriptor method, Message param, CallStats stats, Throwable e) {
|
||||||
int callsPerServer = stats.getConcurrentCallsPerServer();
|
int callsPerServer = stats.getConcurrentCallsPerServer();
|
||||||
if (callsPerServer > 0) {
|
if (callsPerServer > 0) {
|
||||||
concurrentCallsPerServerHist.update(callsPerServer);
|
concurrentCallsPerServerHist.update(callsPerServer);
|
||||||
|
@ -646,8 +650,19 @@ public final class MetricsConnection implements StatisticTrackable {
|
||||||
// Update the counter that tracks RPCs by type.
|
// Update the counter that tracks RPCs by type.
|
||||||
final String methodName = method.getService().getName() + "_" + method.getName();
|
final String methodName = method.getService().getName() + "_" + method.getName();
|
||||||
getMetric(CNT_BASE + methodName, rpcCounters, counterFactory).inc();
|
getMetric(CNT_BASE + methodName, rpcCounters, counterFactory).inc();
|
||||||
if (failed) {
|
if (e != null) {
|
||||||
getMetric(FAILURE_CNT_BASE + methodName, rpcCounters, counterFactory).inc();
|
getMetric(FAILURE_CNT_BASE + methodName, rpcCounters, counterFactory).inc();
|
||||||
|
getMetric(TOTAL_EXCEPTION_CNT, rpcCounters, counterFactory).inc();
|
||||||
|
if (e instanceof RemoteException) {
|
||||||
|
String fullClassName = ((RemoteException) e).getClassName();
|
||||||
|
String simpleClassName = (fullClassName != null)
|
||||||
|
? fullClassName.substring(fullClassName.lastIndexOf(".") + 1)
|
||||||
|
: "unknown";
|
||||||
|
getMetric(REMOTE_EXCEPTION_CNT_BASE + simpleClassName, rpcCounters, counterFactory).inc();
|
||||||
|
} else {
|
||||||
|
getMetric(LOCAL_EXCEPTION_CNT_BASE + e.getClass().getSimpleName(), rpcCounters,
|
||||||
|
counterFactory).inc();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// this implementation is tied directly to protobuf implementation details. would be better
|
// this implementation is tied directly to protobuf implementation details. would be better
|
||||||
// if we could dispatch based on something static, ie, request Message type.
|
// if we could dispatch based on something static, ie, request Message type.
|
||||||
|
|
|
@ -375,16 +375,15 @@ public abstract class AbstractRpcClient<T extends RpcConnection> implements RpcC
|
||||||
private void onCallFinished(Call call, HBaseRpcController hrc, Address addr,
|
private void onCallFinished(Call call, HBaseRpcController hrc, Address addr,
|
||||||
RpcCallback<Message> callback) {
|
RpcCallback<Message> callback) {
|
||||||
call.callStats.setCallTimeMs(EnvironmentEdgeManager.currentTime() - call.getStartTime());
|
call.callStats.setCallTimeMs(EnvironmentEdgeManager.currentTime() - call.getStartTime());
|
||||||
final boolean failed = (call.error != null) ? true : false;
|
|
||||||
if (metrics != null) {
|
if (metrics != null) {
|
||||||
metrics.updateRpc(call.md, call.param, call.callStats, failed);
|
metrics.updateRpc(call.md, call.param, call.callStats, call.error);
|
||||||
}
|
}
|
||||||
if (LOG.isTraceEnabled()) {
|
if (LOG.isTraceEnabled()) {
|
||||||
LOG.trace("CallId: {}, call: {}, startTime: {}ms, callTime: {}ms, status: {}", call.id,
|
LOG.trace("CallId: {}, call: {}, startTime: {}ms, callTime: {}ms, status: {}", call.id,
|
||||||
call.md.getName(), call.getStartTime(), call.callStats.getCallTimeMs(),
|
call.md.getName(), call.getStartTime(), call.callStats.getCallTimeMs(),
|
||||||
failed ? "failed" : "successful");
|
call.error != null ? "failed" : "successful");
|
||||||
}
|
}
|
||||||
if (failed) {
|
if (call.error != null) {
|
||||||
if (call.error instanceof RemoteException) {
|
if (call.error instanceof RemoteException) {
|
||||||
call.error.fillInStackTrace();
|
call.error.fillInStackTrace();
|
||||||
hrc.setFailed(call.error);
|
hrc.setFailed(call.error);
|
||||||
|
|
|
@ -33,6 +33,8 @@ import java.util.concurrent.Executors;
|
||||||
import java.util.concurrent.ThreadPoolExecutor;
|
import java.util.concurrent.ThreadPoolExecutor;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||||
|
import org.apache.hadoop.hbase.ipc.CallTimeoutException;
|
||||||
|
import org.apache.hadoop.hbase.ipc.RemoteWithExtrasException;
|
||||||
import org.apache.hadoop.hbase.security.User;
|
import org.apache.hadoop.hbase.security.User;
|
||||||
import org.apache.hadoop.hbase.testclassification.ClientTests;
|
import org.apache.hadoop.hbase.testclassification.ClientTests;
|
||||||
import org.apache.hadoop.hbase.testclassification.MetricsTests;
|
import org.apache.hadoop.hbase.testclassification.MetricsTests;
|
||||||
|
@ -177,51 +179,77 @@ public class TestMetricsConnection {
|
||||||
|
|
||||||
for (int i = 0; i < loop; i++) {
|
for (int i = 0; i < loop; i++) {
|
||||||
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Get"),
|
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Get"),
|
||||||
GetRequest.getDefaultInstance(), MetricsConnection.newCallStats(), false);
|
GetRequest.getDefaultInstance(), MetricsConnection.newCallStats(), null);
|
||||||
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Scan"),
|
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Scan"),
|
||||||
ScanRequest.getDefaultInstance(), MetricsConnection.newCallStats(), false);
|
ScanRequest.getDefaultInstance(), MetricsConnection.newCallStats(),
|
||||||
|
new RemoteWithExtrasException("java.io.IOException", null, false, false));
|
||||||
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Multi"),
|
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Multi"),
|
||||||
MultiRequest.getDefaultInstance(), MetricsConnection.newCallStats(), true);
|
MultiRequest.getDefaultInstance(), MetricsConnection.newCallStats(),
|
||||||
|
new CallTimeoutException("test with CallTimeoutException"));
|
||||||
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
|
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
|
||||||
MutateRequest.newBuilder()
|
MutateRequest.newBuilder()
|
||||||
.setMutation(ProtobufUtil.toMutation(MutationType.APPEND, new Append(foo)))
|
.setMutation(ProtobufUtil.toMutation(MutationType.APPEND, new Append(foo)))
|
||||||
.setRegion(region).build(),
|
.setRegion(region).build(),
|
||||||
MetricsConnection.newCallStats(), false);
|
MetricsConnection.newCallStats(), null);
|
||||||
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
|
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
|
||||||
MutateRequest.newBuilder()
|
MutateRequest.newBuilder()
|
||||||
.setMutation(ProtobufUtil.toMutation(MutationType.DELETE, new Delete(foo)))
|
.setMutation(ProtobufUtil.toMutation(MutationType.DELETE, new Delete(foo)))
|
||||||
.setRegion(region).build(),
|
.setRegion(region).build(),
|
||||||
MetricsConnection.newCallStats(), false);
|
MetricsConnection.newCallStats(), null);
|
||||||
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
|
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
|
||||||
MutateRequest.newBuilder()
|
MutateRequest.newBuilder()
|
||||||
.setMutation(ProtobufUtil.toMutation(MutationType.INCREMENT, new Increment(foo)))
|
.setMutation(ProtobufUtil.toMutation(MutationType.INCREMENT, new Increment(foo)))
|
||||||
.setRegion(region).build(),
|
.setRegion(region).build(),
|
||||||
MetricsConnection.newCallStats(), false);
|
MetricsConnection.newCallStats(), null);
|
||||||
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
|
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
|
||||||
MutateRequest.newBuilder()
|
MutateRequest.newBuilder()
|
||||||
.setMutation(ProtobufUtil.toMutation(MutationType.PUT, new Put(foo))).setRegion(region)
|
.setMutation(ProtobufUtil.toMutation(MutationType.PUT, new Put(foo))).setRegion(region)
|
||||||
.build(),
|
.build(),
|
||||||
MetricsConnection.newCallStats(), false);
|
MetricsConnection.newCallStats(), null);
|
||||||
}
|
}
|
||||||
|
|
||||||
final String rpcCountPrefix = "rpcCount_" + ClientService.getDescriptor().getName() + "_";
|
final String rpcCountPrefix = "rpcCount_" + ClientService.getDescriptor().getName() + "_";
|
||||||
final String rpcFailureCountPrefix =
|
final String rpcFailureCountPrefix =
|
||||||
"rpcFailureCount_" + ClientService.getDescriptor().getName() + "_";
|
"rpcFailureCount_" + ClientService.getDescriptor().getName() + "_";
|
||||||
String metricKey;
|
String metricKey;
|
||||||
long metricVal;
|
long metricVal;
|
||||||
Counter counter;
|
Counter counter;
|
||||||
for (String method : new String[] { "Get", "Scan", "Mutate" }) {
|
|
||||||
|
for (String method : new String[] { "Get", "Scan", "Multi", "Mutate" }) {
|
||||||
metricKey = rpcCountPrefix + method;
|
metricKey = rpcCountPrefix + method;
|
||||||
metricVal = METRICS.getRpcCounters().get(metricKey).getCount();
|
metricVal = METRICS.getRpcCounters().get(metricKey).getCount();
|
||||||
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal >= loop);
|
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal >= loop);
|
||||||
|
|
||||||
metricKey = rpcFailureCountPrefix + method;
|
metricKey = rpcFailureCountPrefix + method;
|
||||||
counter = METRICS.getRpcCounters().get(metricKey);
|
counter = METRICS.getRpcCounters().get(metricKey);
|
||||||
metricVal = (counter != null) ? counter.getCount() : 0;
|
metricVal = (counter != null) ? counter.getCount() : 0;
|
||||||
|
if (method.equals("Get") || method.equals("Mutate")) {
|
||||||
|
// no failure
|
||||||
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == 0);
|
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == 0);
|
||||||
|
} else {
|
||||||
|
// has failure
|
||||||
|
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == loop);
|
||||||
}
|
}
|
||||||
metricKey = rpcFailureCountPrefix + "Multi";
|
}
|
||||||
|
|
||||||
|
// remote exception
|
||||||
|
metricKey = "rpcRemoteExceptions_IOException";
|
||||||
counter = METRICS.getRpcCounters().get(metricKey);
|
counter = METRICS.getRpcCounters().get(metricKey);
|
||||||
metricVal = (counter != null) ? counter.getCount() : 0;
|
metricVal = (counter != null) ? counter.getCount() : 0;
|
||||||
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == loop);
|
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == loop);
|
||||||
|
|
||||||
|
// local exception
|
||||||
|
metricKey = "rpcLocalExceptions_CallTimeoutException";
|
||||||
|
counter = METRICS.getRpcCounters().get(metricKey);
|
||||||
|
metricVal = (counter != null) ? counter.getCount() : 0;
|
||||||
|
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == loop);
|
||||||
|
|
||||||
|
// total exception
|
||||||
|
metricKey = "rpcTotalExceptions";
|
||||||
|
counter = METRICS.getRpcCounters().get(metricKey);
|
||||||
|
metricVal = (counter != null) ? counter.getCount() : 0;
|
||||||
|
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == loop * 2);
|
||||||
|
|
||||||
for (MetricsConnection.CallTracker t : new MetricsConnection.CallTracker[] {
|
for (MetricsConnection.CallTracker t : new MetricsConnection.CallTracker[] {
|
||||||
METRICS.getGetTracker(), METRICS.getScanTracker(), METRICS.getMultiTracker(),
|
METRICS.getGetTracker(), METRICS.getScanTracker(), METRICS.getMultiTracker(),
|
||||||
METRICS.getAppendTracker(), METRICS.getDeleteTracker(), METRICS.getIncrementTracker(),
|
METRICS.getAppendTracker(), METRICS.getDeleteTracker(), METRICS.getIncrementTracker(),
|
||||||
|
|
Loading…
Reference in New Issue