From 7ac1c8bbf8da31586e786819ab515236ce6dbbdc Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Thu, 18 Mar 2021 11:33:45 -0700 Subject: [PATCH] HBASE-25677 Server+table counters on each scan #nextRaw invocation becomes a bottleneck when heavy load (#3061) Don't have every handler update regionserver metrics on each scan#nextRaw; instead, do a batch update just before Scan returns. Otherwise, all running handlers end up contending on metrics update. M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java Update of regionserver metrics counters moved out to caller where can be done as a batch update instead of per-next. M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServer.java Class doc to encourage batch updating metrics. Remove the single update as unused anymore. M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java Count calls to nextRaw. Update regionserver count in finally block when scan is done rather than per nextRaw call. Move all metrics updates to finally. Signed-off-by: Reid Chan Signed-off-by: Baiqiang Zhao --- .../regionserver/MetricsRegionServer.java | 18 ++++++------------ .../hbase/regionserver/RSRpcServices.java | 19 +++++++++++++------ .../hbase/regionserver/RegionScannerImpl.java | 5 +---- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServer.java index 86b97a2afb9..1d9fb2d03e2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServer.java @@ -28,12 +28,11 @@ import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceStability; /** - *

- * This class is for maintaining the various regionserver statistics - * and publishing them through the metrics interfaces. - *

+ * Maintains regionserver statistics and publishes them through the metrics interfaces. * This class has a number of metrics variables that are publicly accessible; - * these variables (objects) have methods to update their values. + * these variables (objects) have methods to update their values. Batch your updates rather than + * call on each instance else all threads will do nothing but contend trying to maintain metric + * counters! */ @InterfaceStability.Evolving @InterfaceAudience.Private @@ -52,7 +51,9 @@ public class MetricsRegionServer { private MetricRegistry metricRegistry; private Timer bulkLoadTimer; + // Incremented once for each call to Scan#nextRaw private Meter serverReadQueryMeter; + // Incremented per write. private Meter serverWriteQueryMeter; protected long slowMetricTime; protected static final int DEFAULT_SLOW_METRIC_TIME = 1000; // milliseconds @@ -286,13 +287,6 @@ public class MetricsRegionServer { this.serverReadQueryMeter.mark(count); } - public void updateReadQueryMeter(TableName tn) { - if (tableMetrics != null && tn != null) { - tableMetrics.updateTableReadQueryMeter(tn); - } - this.serverReadQueryMeter.mark(); - } - public void updateWriteQueryMeter(TableName tn, long count) { if (tableMetrics != null && tn != null) { tableMetrics.updateTableWriteQueryMeter(tn, count); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java index d06778a2102..5b0b77d0139 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java @@ -3361,10 +3361,13 @@ public class RSRpcServices implements HBaseRPCErrorHandler, // arbitrary 32. TODO: keep record of general size of results being returned. List values = new ArrayList<>(32); region.startRegionOperation(Operation.SCAN); + long before = EnvironmentEdgeManager.currentTime(); + // Used to check if we've matched the row limit set on the Scan + int numOfCompleteRows = 0; + // Count of times we call nextRaw; can be > numOfCompleteRows. + int numOfNextRawCalls = 0; try { int numOfResults = 0; - int numOfCompleteRows = 0; - long before = EnvironmentEdgeManager.currentTime(); synchronized (scanner) { boolean stale = (region.getRegionInfo().getReplicaId() != 0); boolean clientHandlesPartials = @@ -3420,6 +3423,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, // Collect values to be returned here moreRows = scanner.nextRaw(values, scannerContext); + numOfNextRawCalls++; if (!values.isEmpty()) { if (limitOfRows > 0) { @@ -3511,18 +3515,21 @@ public class RSRpcServices implements HBaseRPCErrorHandler, builder.setScanMetrics(metricBuilder.build()); } } + } finally { + region.closeRegionOperation(); + // Update serverside metrics, even on error. long end = EnvironmentEdgeManager.currentTime(); long responseCellSize = context != null ? context.getResponseCellSize() : 0; region.getMetrics().updateScanTime(end - before); final MetricsRegionServer metricsRegionServer = regionServer.getMetrics(); if (metricsRegionServer != null) { metricsRegionServer.updateScanSize( - region.getTableDescriptor().getTableName(), responseCellSize); + region.getTableDescriptor().getTableName(), responseCellSize); metricsRegionServer.updateScanTime( - region.getTableDescriptor().getTableName(), end - before); + region.getTableDescriptor().getTableName(), end - before); + metricsRegionServer.updateReadQueryMeter(region.getRegionInfo().getTable(), + numOfNextRawCalls); } - } finally { - region.closeRegionOperation(); } // coprocessor postNext hook if (region.getCoprocessorHost() != null) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java index 5d81687cbf4..a59f643322f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java @@ -275,9 +275,6 @@ class RegionScannerImpl implements RegionScanner, Shipper, RpcCallback { region.getMetrics().updateReadRequestCount(); } } - if (rsServices != null && rsServices.getMetrics() != null) { - rsServices.getMetrics().updateReadQueryMeter(getRegionInfo().getTable()); - } // If the size limit was reached it means a partial Result is being returned. Returning a // partial Result means that we should not reset the filters; filters should only be reset in @@ -779,4 +776,4 @@ class RegionScannerImpl implements RegionScanner, Shipper, RpcCallback { // callback this.close(); } -} \ No newline at end of file +}