HBASE-27540 add client side counter metrics for failed rpc calls (#4929)

Signed-off-by: Bryan Beaudreault <bbeaudreault@apache.org>
This commit is contained in:
Victor 2022-12-21 05:49:43 -08:00 committed by GitHub
parent 222ec684d6
commit db2646be91
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 35 additions and 14 deletions

View File

@ -117,6 +117,7 @@ public final class MetricsConnection implements StatisticTrackable {
} }
private static final String CNT_BASE = "rpcCount_"; private static final String CNT_BASE = "rpcCount_";
private static final String FAILURE_CNT_BASE = "rpcFailureCount_";
private static final String DRTN_BASE = "rpcCallDurationMs_"; private static final String DRTN_BASE = "rpcCallDurationMs_";
private static final String REQ_BASE = "rpcCallRequestSizeBytes_"; private static final String REQ_BASE = "rpcCallRequestSizeBytes_";
private static final String RESP_BASE = "rpcCallResponseSizeBytes_"; private static final String RESP_BASE = "rpcCallResponseSizeBytes_";
@ -637,7 +638,7 @@ public final class MetricsConnection implements StatisticTrackable {
} }
/** Report RPC context to metrics system. */ /** Report RPC context to metrics system. */
public void updateRpc(MethodDescriptor method, Message param, CallStats stats) { public void updateRpc(MethodDescriptor method, Message param, CallStats stats, boolean failed) {
int callsPerServer = stats.getConcurrentCallsPerServer(); int callsPerServer = stats.getConcurrentCallsPerServer();
if (callsPerServer > 0) { if (callsPerServer > 0) {
concurrentCallsPerServerHist.update(callsPerServer); concurrentCallsPerServerHist.update(callsPerServer);
@ -645,6 +646,9 @@ public final class MetricsConnection implements StatisticTrackable {
// Update the counter that tracks RPCs by type. // Update the counter that tracks RPCs by type.
final String methodName = method.getService().getName() + "_" + method.getName(); final String methodName = method.getService().getName() + "_" + method.getName();
getMetric(CNT_BASE + methodName, rpcCounters, counterFactory).inc(); getMetric(CNT_BASE + methodName, rpcCounters, counterFactory).inc();
if (failed) {
getMetric(FAILURE_CNT_BASE + methodName, rpcCounters, counterFactory).inc();
}
// this implementation is tied directly to protobuf implementation details. would be better // this implementation is tied directly to protobuf implementation details. would be better
// if we could dispatch based on something static, ie, request Message type. // if we could dispatch based on something static, ie, request Message type.
if (method.getService() == ClientService.getDescriptor()) { if (method.getService() == ClientService.getDescriptor()) {

View File

@ -375,14 +375,16 @@ public abstract class AbstractRpcClient<T extends RpcConnection> implements RpcC
private void onCallFinished(Call call, HBaseRpcController hrc, Address addr, private void onCallFinished(Call call, HBaseRpcController hrc, Address addr,
RpcCallback<Message> callback) { RpcCallback<Message> callback) {
call.callStats.setCallTimeMs(EnvironmentEdgeManager.currentTime() - call.getStartTime()); call.callStats.setCallTimeMs(EnvironmentEdgeManager.currentTime() - call.getStartTime());
final boolean failed = (call.error != null) ? true : false;
if (metrics != null) { if (metrics != null) {
metrics.updateRpc(call.md, call.param, call.callStats); metrics.updateRpc(call.md, call.param, call.callStats, failed);
} }
if (LOG.isTraceEnabled()) { if (LOG.isTraceEnabled()) {
LOG.trace("CallId: {}, call: {}, startTime: {}ms, callTime: {}ms", call.id, call.md.getName(), LOG.trace("CallId: {}, call: {}, startTime: {}ms, callTime: {}ms, status: {}", call.id,
call.getStartTime(), call.callStats.getCallTimeMs()); call.md.getName(), call.getStartTime(), call.callStats.getCallTimeMs(),
failed ? "failed" : "successful");
} }
if (call.error != null) { if (failed) {
if (call.error instanceof RemoteException) { if (call.error instanceof RemoteException) {
call.error.fillInStackTrace(); call.error.fillInStackTrace();
hrc.setFailed(call.error); hrc.setFailed(call.error);

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.client;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import com.codahale.metrics.Counter;
import com.codahale.metrics.RatioGauge; import com.codahale.metrics.RatioGauge;
import com.codahale.metrics.RatioGauge.Ratio; import com.codahale.metrics.RatioGauge.Ratio;
import java.io.IOException; import java.io.IOException;
@ -149,37 +150,51 @@ public class TestMetricsConnection {
for (int i = 0; i < loop; i++) { for (int i = 0; i < loop; i++) {
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Get"), METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Get"),
GetRequest.getDefaultInstance(), MetricsConnection.newCallStats()); GetRequest.getDefaultInstance(), MetricsConnection.newCallStats(), false);
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Scan"), METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Scan"),
ScanRequest.getDefaultInstance(), MetricsConnection.newCallStats()); ScanRequest.getDefaultInstance(), MetricsConnection.newCallStats(), false);
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Multi"), METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Multi"),
MultiRequest.getDefaultInstance(), MetricsConnection.newCallStats()); MultiRequest.getDefaultInstance(), MetricsConnection.newCallStats(), true);
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"), METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
MutateRequest.newBuilder() MutateRequest.newBuilder()
.setMutation(ProtobufUtil.toMutation(MutationType.APPEND, new Append(foo))) .setMutation(ProtobufUtil.toMutation(MutationType.APPEND, new Append(foo)))
.setRegion(region).build(), .setRegion(region).build(),
MetricsConnection.newCallStats()); MetricsConnection.newCallStats(), false);
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"), METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
MutateRequest.newBuilder() MutateRequest.newBuilder()
.setMutation(ProtobufUtil.toMutation(MutationType.DELETE, new Delete(foo))) .setMutation(ProtobufUtil.toMutation(MutationType.DELETE, new Delete(foo)))
.setRegion(region).build(), .setRegion(region).build(),
MetricsConnection.newCallStats()); MetricsConnection.newCallStats(), false);
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"), METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
MutateRequest.newBuilder() MutateRequest.newBuilder()
.setMutation(ProtobufUtil.toMutation(MutationType.INCREMENT, new Increment(foo))) .setMutation(ProtobufUtil.toMutation(MutationType.INCREMENT, new Increment(foo)))
.setRegion(region).build(), .setRegion(region).build(),
MetricsConnection.newCallStats()); MetricsConnection.newCallStats(), false);
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"), METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
MutateRequest.newBuilder() MutateRequest.newBuilder()
.setMutation(ProtobufUtil.toMutation(MutationType.PUT, new Put(foo))).setRegion(region) .setMutation(ProtobufUtil.toMutation(MutationType.PUT, new Put(foo))).setRegion(region)
.build(), .build(),
MetricsConnection.newCallStats()); MetricsConnection.newCallStats(), false);
} }
final String rpcCountPrefix = "rpcCount_" + ClientService.getDescriptor().getName() + "_";
final String rpcFailureCountPrefix =
"rpcFailureCount_" + ClientService.getDescriptor().getName() + "_";
String metricKey;
long metricVal;
Counter counter;
for (String method : new String[] { "Get", "Scan", "Mutate" }) { for (String method : new String[] { "Get", "Scan", "Mutate" }) {
final String metricKey = "rpcCount_" + ClientService.getDescriptor().getName() + "_" + method; metricKey = rpcCountPrefix + method;
final long metricVal = METRICS.getRpcCounters().get(metricKey).getCount(); metricVal = METRICS.getRpcCounters().get(metricKey).getCount();
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal >= loop); assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal >= loop);
metricKey = rpcFailureCountPrefix + method;
counter = METRICS.getRpcCounters().get(metricKey);
metricVal = (counter != null) ? counter.getCount() : 0;
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == 0);
} }
metricKey = rpcFailureCountPrefix + "Multi";
counter = METRICS.getRpcCounters().get(metricKey);
metricVal = (counter != null) ? counter.getCount() : 0;
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == loop);
for (MetricsConnection.CallTracker t : new MetricsConnection.CallTracker[] { for (MetricsConnection.CallTracker t : new MetricsConnection.CallTracker[] {
METRICS.getGetTracker(), METRICS.getScanTracker(), METRICS.getMultiTracker(), METRICS.getGetTracker(), METRICS.getScanTracker(), METRICS.getMultiTracker(),
METRICS.getAppendTracker(), METRICS.getDeleteTracker(), METRICS.getIncrementTracker(), METRICS.getAppendTracker(), METRICS.getDeleteTracker(), METRICS.getIncrementTracker(),