HBASE-25677 Server+table counters on each scan #nextRaw invocation becomes a bottleneck when heavy load (#3061)
Don't have every handler update regionserver metrics on each scan#nextRaw; instead, do a batch update just before Scan returns. Otherwise, all running handlers end up contending on metrics update. M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java Update of regionserver metrics counters moved out to caller where can be done as a batch update instead of per-next. M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServer.java Class doc to encourage batch updating metrics. Remove the single update as unused anymore. M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java Count calls to nextRaw. Update regionserver count in finally block when scan is done rather than per nextRaw call. Move all metrics updates to finally. Signed-off-by: Reid Chan <reidchan@apache.org> Signed-off-by: Baiqiang Zhao <ZhaoBQ>
This commit is contained in:
parent
c485a6bb79
commit
3331e8307a
|
@ -7282,9 +7282,6 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
|
||||||
metricsRegion.updateReadRequestCount();
|
metricsRegion.updateReadRequestCount();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (rsServices != null && rsServices.getMetrics() != null) {
|
|
||||||
rsServices.getMetrics().updateReadQueryMeter(getRegionInfo().getTable());
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the size limit was reached it means a partial Result is being returned. Returning a
|
// If the size limit was reached it means a partial Result is being returned. Returning a
|
||||||
// partial Result means that we should not reset the filters; filters should only be reset in
|
// partial Result means that we should not reset the filters; filters should only be reset in
|
||||||
|
|
|
@ -28,12 +28,11 @@ import org.apache.yetus.audience.InterfaceAudience;
|
||||||
import org.apache.yetus.audience.InterfaceStability;
|
import org.apache.yetus.audience.InterfaceStability;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>
|
* Maintains regionserver statistics and publishes them through the metrics interfaces.
|
||||||
* This class is for maintaining the various regionserver statistics
|
|
||||||
* and publishing them through the metrics interfaces.
|
|
||||||
* </p>
|
|
||||||
* This class has a number of metrics variables that are publicly accessible;
|
* This class has a number of metrics variables that are publicly accessible;
|
||||||
* these variables (objects) have methods to update their values.
|
* these variables (objects) have methods to update their values. Batch your updates rather than
|
||||||
|
* call on each instance else all threads will do nothing but contend trying to maintain metric
|
||||||
|
* counters!
|
||||||
*/
|
*/
|
||||||
@InterfaceStability.Evolving
|
@InterfaceStability.Evolving
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
|
@ -52,7 +51,9 @@ public class MetricsRegionServer {
|
||||||
|
|
||||||
private MetricRegistry metricRegistry;
|
private MetricRegistry metricRegistry;
|
||||||
private Timer bulkLoadTimer;
|
private Timer bulkLoadTimer;
|
||||||
|
// Incremented once for each call to Scan#nextRaw
|
||||||
private Meter serverReadQueryMeter;
|
private Meter serverReadQueryMeter;
|
||||||
|
// Incremented per write.
|
||||||
private Meter serverWriteQueryMeter;
|
private Meter serverWriteQueryMeter;
|
||||||
protected long slowMetricTime;
|
protected long slowMetricTime;
|
||||||
protected static final int DEFAULT_SLOW_METRIC_TIME = 1000; // milliseconds
|
protected static final int DEFAULT_SLOW_METRIC_TIME = 1000; // milliseconds
|
||||||
|
@ -272,13 +273,6 @@ public class MetricsRegionServer {
|
||||||
this.serverReadQueryMeter.mark(count);
|
this.serverReadQueryMeter.mark(count);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void updateReadQueryMeter(TableName tn) {
|
|
||||||
if (tableMetrics != null && tn != null) {
|
|
||||||
tableMetrics.updateTableReadQueryMeter(tn);
|
|
||||||
}
|
|
||||||
this.serverReadQueryMeter.mark();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void updateWriteQueryMeter(TableName tn, long count) {
|
public void updateWriteQueryMeter(TableName tn, long count) {
|
||||||
if (tableMetrics != null && tn != null) {
|
if (tableMetrics != null && tn != null) {
|
||||||
tableMetrics.updateTableWriteQueryMeter(tn, count);
|
tableMetrics.updateTableWriteQueryMeter(tn, count);
|
||||||
|
|
|
@ -3282,10 +3282,13 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
|
||||||
// arbitrary 32. TODO: keep record of general size of results being returned.
|
// arbitrary 32. TODO: keep record of general size of results being returned.
|
||||||
List<Cell> values = new ArrayList<>(32);
|
List<Cell> values = new ArrayList<>(32);
|
||||||
region.startRegionOperation(Operation.SCAN);
|
region.startRegionOperation(Operation.SCAN);
|
||||||
|
long before = EnvironmentEdgeManager.currentTime();
|
||||||
|
// Used to check if we've matched the row limit set on the Scan
|
||||||
|
int numOfCompleteRows = 0;
|
||||||
|
// Count of times we call nextRaw; can be > numOfCompleteRows.
|
||||||
|
int numOfNextRawCalls = 0;
|
||||||
try {
|
try {
|
||||||
int numOfResults = 0;
|
int numOfResults = 0;
|
||||||
int numOfCompleteRows = 0;
|
|
||||||
long before = EnvironmentEdgeManager.currentTime();
|
|
||||||
synchronized (scanner) {
|
synchronized (scanner) {
|
||||||
boolean stale = (region.getRegionInfo().getReplicaId() != 0);
|
boolean stale = (region.getRegionInfo().getReplicaId() != 0);
|
||||||
boolean clientHandlesPartials =
|
boolean clientHandlesPartials =
|
||||||
|
@ -3341,6 +3344,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
|
||||||
|
|
||||||
// Collect values to be returned here
|
// Collect values to be returned here
|
||||||
moreRows = scanner.nextRaw(values, scannerContext);
|
moreRows = scanner.nextRaw(values, scannerContext);
|
||||||
|
numOfNextRawCalls++;
|
||||||
|
|
||||||
if (!values.isEmpty()) {
|
if (!values.isEmpty()) {
|
||||||
if (limitOfRows > 0) {
|
if (limitOfRows > 0) {
|
||||||
|
@ -3432,18 +3436,21 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
|
||||||
builder.setScanMetrics(metricBuilder.build());
|
builder.setScanMetrics(metricBuilder.build());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} finally {
|
||||||
|
region.closeRegionOperation();
|
||||||
|
// Update serverside metrics, even on error.
|
||||||
long end = EnvironmentEdgeManager.currentTime();
|
long end = EnvironmentEdgeManager.currentTime();
|
||||||
long responseCellSize = context != null ? context.getResponseCellSize() : 0;
|
long responseCellSize = context != null ? context.getResponseCellSize() : 0;
|
||||||
region.getMetrics().updateScanTime(end - before);
|
region.getMetrics().updateScanTime(end - before);
|
||||||
final MetricsRegionServer metricsRegionServer = regionServer.getMetrics();
|
final MetricsRegionServer metricsRegionServer = regionServer.getMetrics();
|
||||||
if (metricsRegionServer != null) {
|
if (metricsRegionServer != null) {
|
||||||
metricsRegionServer.updateScanSize(
|
metricsRegionServer.updateScanSize(
|
||||||
region.getTableDescriptor().getTableName(), responseCellSize);
|
region.getTableDescriptor().getTableName(), responseCellSize);
|
||||||
metricsRegionServer.updateScanTime(
|
metricsRegionServer.updateScanTime(
|
||||||
region.getTableDescriptor().getTableName(), end - before);
|
region.getTableDescriptor().getTableName(), end - before);
|
||||||
|
metricsRegionServer.updateReadQueryMeter(region.getRegionInfo().getTable(),
|
||||||
|
numOfNextRawCalls);
|
||||||
}
|
}
|
||||||
} finally {
|
|
||||||
region.closeRegionOperation();
|
|
||||||
}
|
}
|
||||||
// coprocessor postNext hook
|
// coprocessor postNext hook
|
||||||
if (region.getCoprocessorHost() != null) {
|
if (region.getCoprocessorHost() != null) {
|
||||||
|
|
Loading…
Reference in New Issue