HBASE-25677 Server+table counters on each scan #nextRaw invocation becomes a bottleneck when heavy load (#3061)

Don't have every handler update regionserver metrics on each
scan#nextRaw; instead, do a batch update just before Scan
returns. Otherwise, all running handlers end up contending
on metrics update.

M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
 Update of regionserver metrics counters moved out to caller where
 can be done as a batch update instead of per-next.

M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServer.java
 Class doc to encourage batch updating metrics.
 Remove the single update as unused anymore.

M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
 Count calls to nextRaw. Update regionserver count in finally block when
 scan is done rather than per nextRaw call. Move all metrics updates to
 finally.

Signed-off-by: Reid Chan <reidchan@apache.org>
Signed-off-by: Baiqiang Zhao <ZhaoBQ>
This commit is contained in:
Michael Stack 2021-03-18 11:33:45 -07:00 committed by stack
parent c485a6bb79
commit 3331e8307a
3 changed files with 19 additions and 21 deletions

View File

@ -7282,9 +7282,6 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
metricsRegion.updateReadRequestCount(); metricsRegion.updateReadRequestCount();
} }
} }
if (rsServices != null && rsServices.getMetrics() != null) {
rsServices.getMetrics().updateReadQueryMeter(getRegionInfo().getTable());
}
// If the size limit was reached it means a partial Result is being returned. Returning a // If the size limit was reached it means a partial Result is being returned. Returning a
// partial Result means that we should not reset the filters; filters should only be reset in // partial Result means that we should not reset the filters; filters should only be reset in

View File

@ -28,12 +28,11 @@ import org.apache.yetus.audience.InterfaceAudience;
import org.apache.yetus.audience.InterfaceStability; import org.apache.yetus.audience.InterfaceStability;
/** /**
* <p> * Maintains regionserver statistics and publishes them through the metrics interfaces.
* This class is for maintaining the various regionserver statistics
* and publishing them through the metrics interfaces.
* </p>
* This class has a number of metrics variables that are publicly accessible; * This class has a number of metrics variables that are publicly accessible;
* these variables (objects) have methods to update their values. * these variables (objects) have methods to update their values. Batch your updates rather than
* call on each instance else all threads will do nothing but contend trying to maintain metric
* counters!
*/ */
@InterfaceStability.Evolving @InterfaceStability.Evolving
@InterfaceAudience.Private @InterfaceAudience.Private
@ -52,7 +51,9 @@ public class MetricsRegionServer {
private MetricRegistry metricRegistry; private MetricRegistry metricRegistry;
private Timer bulkLoadTimer; private Timer bulkLoadTimer;
// Incremented once for each call to Scan#nextRaw
private Meter serverReadQueryMeter; private Meter serverReadQueryMeter;
// Incremented per write.
private Meter serverWriteQueryMeter; private Meter serverWriteQueryMeter;
protected long slowMetricTime; protected long slowMetricTime;
protected static final int DEFAULT_SLOW_METRIC_TIME = 1000; // milliseconds protected static final int DEFAULT_SLOW_METRIC_TIME = 1000; // milliseconds
@ -272,13 +273,6 @@ public class MetricsRegionServer {
this.serverReadQueryMeter.mark(count); this.serverReadQueryMeter.mark(count);
} }
public void updateReadQueryMeter(TableName tn) {
if (tableMetrics != null && tn != null) {
tableMetrics.updateTableReadQueryMeter(tn);
}
this.serverReadQueryMeter.mark();
}
public void updateWriteQueryMeter(TableName tn, long count) { public void updateWriteQueryMeter(TableName tn, long count) {
if (tableMetrics != null && tn != null) { if (tableMetrics != null && tn != null) {
tableMetrics.updateTableWriteQueryMeter(tn, count); tableMetrics.updateTableWriteQueryMeter(tn, count);

View File

@ -3282,10 +3282,13 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
// arbitrary 32. TODO: keep record of general size of results being returned. // arbitrary 32. TODO: keep record of general size of results being returned.
List<Cell> values = new ArrayList<>(32); List<Cell> values = new ArrayList<>(32);
region.startRegionOperation(Operation.SCAN); region.startRegionOperation(Operation.SCAN);
long before = EnvironmentEdgeManager.currentTime();
// Used to check if we've matched the row limit set on the Scan
int numOfCompleteRows = 0;
// Count of times we call nextRaw; can be > numOfCompleteRows.
int numOfNextRawCalls = 0;
try { try {
int numOfResults = 0; int numOfResults = 0;
int numOfCompleteRows = 0;
long before = EnvironmentEdgeManager.currentTime();
synchronized (scanner) { synchronized (scanner) {
boolean stale = (region.getRegionInfo().getReplicaId() != 0); boolean stale = (region.getRegionInfo().getReplicaId() != 0);
boolean clientHandlesPartials = boolean clientHandlesPartials =
@ -3341,6 +3344,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
// Collect values to be returned here // Collect values to be returned here
moreRows = scanner.nextRaw(values, scannerContext); moreRows = scanner.nextRaw(values, scannerContext);
numOfNextRawCalls++;
if (!values.isEmpty()) { if (!values.isEmpty()) {
if (limitOfRows > 0) { if (limitOfRows > 0) {
@ -3432,18 +3436,21 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
builder.setScanMetrics(metricBuilder.build()); builder.setScanMetrics(metricBuilder.build());
} }
} }
} finally {
region.closeRegionOperation();
// Update serverside metrics, even on error.
long end = EnvironmentEdgeManager.currentTime(); long end = EnvironmentEdgeManager.currentTime();
long responseCellSize = context != null ? context.getResponseCellSize() : 0; long responseCellSize = context != null ? context.getResponseCellSize() : 0;
region.getMetrics().updateScanTime(end - before); region.getMetrics().updateScanTime(end - before);
final MetricsRegionServer metricsRegionServer = regionServer.getMetrics(); final MetricsRegionServer metricsRegionServer = regionServer.getMetrics();
if (metricsRegionServer != null) { if (metricsRegionServer != null) {
metricsRegionServer.updateScanSize( metricsRegionServer.updateScanSize(
region.getTableDescriptor().getTableName(), responseCellSize); region.getTableDescriptor().getTableName(), responseCellSize);
metricsRegionServer.updateScanTime( metricsRegionServer.updateScanTime(
region.getTableDescriptor().getTableName(), end - before); region.getTableDescriptor().getTableName(), end - before);
metricsRegionServer.updateReadQueryMeter(region.getRegionInfo().getTable(),
numOfNextRawCalls);
} }
} finally {
region.closeRegionOperation();
} }
// coprocessor postNext hook // coprocessor postNext hook
if (region.getCoprocessorHost() != null) { if (region.getCoprocessorHost() != null) {