HBASE-14693 Add client-side metrics for received pushback signals
Signed-off-by: Andrew Purtell <apurtell@apache.org>
This commit is contained in:
parent
44367f55e8
commit
086bacd12d
|
@ -1009,6 +1009,9 @@ class AsyncProcess {
|
||||||
int numAttempt) {
|
int numAttempt) {
|
||||||
// no stats to manage, just do the standard action
|
// no stats to manage, just do the standard action
|
||||||
if (AsyncProcess.this.connection.getStatisticsTracker() == null) {
|
if (AsyncProcess.this.connection.getStatisticsTracker() == null) {
|
||||||
|
if (connection.getConnectionMetrics() != null) {
|
||||||
|
connection.getConnectionMetrics().incrNormalRunners();
|
||||||
|
}
|
||||||
return Collections.singletonList(Trace.wrap("AsyncProcess.sendMultiAction",
|
return Collections.singletonList(Trace.wrap("AsyncProcess.sendMultiAction",
|
||||||
new SingleServerRequestRunnable(multiAction, numAttempt, server, callsInProgress)));
|
new SingleServerRequestRunnable(multiAction, numAttempt, server, callsInProgress)));
|
||||||
}
|
}
|
||||||
|
@ -1039,6 +1042,14 @@ class AsyncProcess {
|
||||||
runner.setRunner(runnable);
|
runner.setRunner(runnable);
|
||||||
traceText = "AsyncProcess.clientBackoff.sendMultiAction";
|
traceText = "AsyncProcess.clientBackoff.sendMultiAction";
|
||||||
runnable = runner;
|
runnable = runner;
|
||||||
|
if (connection.getConnectionMetrics() != null) {
|
||||||
|
connection.getConnectionMetrics().incrDelayRunners();
|
||||||
|
connection.getConnectionMetrics().updateDelayInterval(runner.getSleepTime());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (connection.getConnectionMetrics() != null) {
|
||||||
|
connection.getConnectionMetrics().incrNormalRunners();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
runnable = Trace.wrap(traceText, runnable);
|
runnable = Trace.wrap(traceText, runnable);
|
||||||
toReturn.add(runnable);
|
toReturn.add(runnable);
|
||||||
|
@ -1267,6 +1278,12 @@ class AsyncProcess {
|
||||||
++failed;
|
++failed;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
|
if (AsyncProcess.this.connection.getConnectionMetrics() != null) {
|
||||||
|
AsyncProcess.this.connection.getConnectionMetrics().
|
||||||
|
updateServerStats(server, regionName, result);
|
||||||
|
}
|
||||||
|
|
||||||
// update the stats about the region, if its a user table. We don't want to slow down
|
// update the stats about the region, if its a user table. We don't want to slow down
|
||||||
// updates to meta tables, especially from internal updates (master, etc).
|
// updates to meta tables, especially from internal updates (master, etc).
|
||||||
if (AsyncProcess.this.connection.getStatisticsTracker() != null) {
|
if (AsyncProcess.this.connection.getStatisticsTracker() != null) {
|
||||||
|
|
|
@ -26,12 +26,16 @@ import com.yammer.metrics.core.MetricsRegistry;
|
||||||
import com.yammer.metrics.core.Timer;
|
import com.yammer.metrics.core.Timer;
|
||||||
import com.yammer.metrics.reporting.JmxReporter;
|
import com.yammer.metrics.reporting.JmxReporter;
|
||||||
import com.yammer.metrics.util.RatioGauge;
|
import com.yammer.metrics.util.RatioGauge;
|
||||||
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ClientService;
|
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ClientService;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutateRequest;
|
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutateRequest;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.MutationType;
|
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.MutationType;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.concurrent.ConcurrentSkipListMap;
|
||||||
import java.util.concurrent.ConcurrentMap;
|
import java.util.concurrent.ConcurrentMap;
|
||||||
import java.util.concurrent.ThreadPoolExecutor;
|
import java.util.concurrent.ThreadPoolExecutor;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
@ -54,6 +58,8 @@ public class MetricsConnection {
|
||||||
private static final String DRTN_BASE = "rpcCallDurationMs_";
|
private static final String DRTN_BASE = "rpcCallDurationMs_";
|
||||||
private static final String REQ_BASE = "rpcCallRequestSizeBytes_";
|
private static final String REQ_BASE = "rpcCallRequestSizeBytes_";
|
||||||
private static final String RESP_BASE = "rpcCallResponseSizeBytes_";
|
private static final String RESP_BASE = "rpcCallResponseSizeBytes_";
|
||||||
|
private static final String MEMLOAD_BASE = "memstoreLoad_";
|
||||||
|
private static final String HEAP_BASE = "heapOccupancy_";
|
||||||
private static final String CLIENT_SVC = ClientService.getDescriptor().getName();
|
private static final String CLIENT_SVC = ClientService.getDescriptor().getName();
|
||||||
|
|
||||||
/** A container class for collecting details about the RPC call as it percolates. */
|
/** A container class for collecting details about the RPC call as it percolates. */
|
||||||
|
@ -130,6 +136,88 @@ public class MetricsConnection {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected static class RegionStats {
|
||||||
|
final String name;
|
||||||
|
final Histogram memstoreLoadHist;
|
||||||
|
final Histogram heapOccupancyHist;
|
||||||
|
|
||||||
|
public RegionStats(MetricsRegistry registry, String name) {
|
||||||
|
this.name = name;
|
||||||
|
this.memstoreLoadHist = registry.newHistogram(MetricsConnection.class,
|
||||||
|
MEMLOAD_BASE + this.name);
|
||||||
|
this.heapOccupancyHist = registry.newHistogram(MetricsConnection.class,
|
||||||
|
HEAP_BASE + this.name);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void update(ClientProtos.RegionLoadStats regionStatistics) {
|
||||||
|
this.memstoreLoadHist.update(regionStatistics.getMemstoreLoad());
|
||||||
|
this.heapOccupancyHist.update(regionStatistics.getHeapOccupancy());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
protected static class RunnerStats {
|
||||||
|
final Counter normalRunners;
|
||||||
|
final Counter delayRunners;
|
||||||
|
final Histogram delayIntevalHist;
|
||||||
|
|
||||||
|
public RunnerStats(MetricsRegistry registry) {
|
||||||
|
this.normalRunners = registry.newCounter(MetricsConnection.class, "normalRunnersCount");
|
||||||
|
this.delayRunners = registry.newCounter(MetricsConnection.class, "delayRunnersCount");
|
||||||
|
this.delayIntevalHist = registry.newHistogram(MetricsConnection.class, "delayIntervalHist");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrNormalRunners() {
|
||||||
|
this.normalRunners.inc();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrDelayRunners() {
|
||||||
|
this.delayRunners.inc();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void updateDelayInterval(long interval) {
|
||||||
|
this.delayIntevalHist.update(interval);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
protected ConcurrentHashMap<ServerName, ConcurrentMap<byte[], RegionStats>> serverStats
|
||||||
|
= new ConcurrentHashMap<ServerName, ConcurrentMap<byte[], RegionStats>>();
|
||||||
|
|
||||||
|
public void updateServerStats(ServerName serverName, byte[] regionName,
|
||||||
|
Object r) {
|
||||||
|
if (!(r instanceof Result)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Result result = (Result) r;
|
||||||
|
ClientProtos.RegionLoadStats stats = result.getStats();
|
||||||
|
if(stats == null){
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
String name = serverName.getServerName() + "," + Bytes.toStringBinary(regionName);
|
||||||
|
ConcurrentMap<byte[], RegionStats> rsStats = null;
|
||||||
|
if (serverStats.containsKey(serverName)) {
|
||||||
|
rsStats = serverStats.get(serverName);
|
||||||
|
} else {
|
||||||
|
rsStats = serverStats.putIfAbsent(serverName,
|
||||||
|
new ConcurrentSkipListMap<byte[], RegionStats>(Bytes.BYTES_COMPARATOR));
|
||||||
|
if (rsStats == null) {
|
||||||
|
rsStats = serverStats.get(serverName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
RegionStats regionStats = null;
|
||||||
|
if (rsStats.containsKey(regionName)) {
|
||||||
|
regionStats = rsStats.get(regionName);
|
||||||
|
} else {
|
||||||
|
regionStats = rsStats.putIfAbsent(regionName, new RegionStats(this.registry, name));
|
||||||
|
if (regionStats == null) {
|
||||||
|
regionStats = rsStats.get(regionName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
regionStats.update(stats);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/** A lambda for dispatching to the appropriate metric factory method */
|
/** A lambda for dispatching to the appropriate metric factory method */
|
||||||
private static interface NewMetric<T> {
|
private static interface NewMetric<T> {
|
||||||
T newMetric(Class<?> clazz, String name, String scope);
|
T newMetric(Class<?> clazz, String name, String scope);
|
||||||
|
@ -172,6 +260,7 @@ public class MetricsConnection {
|
||||||
@VisibleForTesting protected final CallTracker incrementTracker;
|
@VisibleForTesting protected final CallTracker incrementTracker;
|
||||||
@VisibleForTesting protected final CallTracker putTracker;
|
@VisibleForTesting protected final CallTracker putTracker;
|
||||||
@VisibleForTesting protected final CallTracker multiTracker;
|
@VisibleForTesting protected final CallTracker multiTracker;
|
||||||
|
@VisibleForTesting protected final RunnerStats runnerStats;
|
||||||
|
|
||||||
// dynamic metrics
|
// dynamic metrics
|
||||||
|
|
||||||
|
@ -217,6 +306,8 @@ public class MetricsConnection {
|
||||||
this.incrementTracker = new CallTracker(this.registry, "Mutate", "Increment", scope);
|
this.incrementTracker = new CallTracker(this.registry, "Mutate", "Increment", scope);
|
||||||
this.putTracker = new CallTracker(this.registry, "Mutate", "Put", scope);
|
this.putTracker = new CallTracker(this.registry, "Mutate", "Put", scope);
|
||||||
this.multiTracker = new CallTracker(this.registry, "Multi", scope);
|
this.multiTracker = new CallTracker(this.registry, "Multi", scope);
|
||||||
|
this.runnerStats = new RunnerStats(this.registry);
|
||||||
|
|
||||||
this.reporter = new JmxReporter(this.registry);
|
this.reporter = new JmxReporter(this.registry);
|
||||||
this.reporter.start();
|
this.reporter.start();
|
||||||
}
|
}
|
||||||
|
@ -242,6 +333,21 @@ public class MetricsConnection {
|
||||||
metaCacheMisses.inc();
|
metaCacheMisses.inc();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Increment the number of normal runner counts. */
|
||||||
|
public void incrNormalRunners() {
|
||||||
|
this.runnerStats.incrNormalRunners();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Increment the number of delay runner counts. */
|
||||||
|
public void incrDelayRunners() {
|
||||||
|
this.runnerStats.incrDelayRunners();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Update delay interval of delay runner. */
|
||||||
|
public void updateDelayInterval(long interval) {
|
||||||
|
this.runnerStats.updateDelayInterval(interval);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get a metric for {@code key} from {@code map}, or create it with {@code factory}.
|
* Get a metric for {@code key} from {@code map}, or create it with {@code factory}.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -42,6 +42,7 @@ import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.experimental.categories.Category;
|
import org.junit.experimental.categories.Category;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.hbase.client.MetricsConnection.CLIENT_SIDE_METRICS_ENABLED_KEY;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertNotEquals;
|
import static org.junit.Assert.assertNotEquals;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
|
@ -74,7 +75,7 @@ public class TestClientPushback {
|
||||||
conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, flushSizeBytes);
|
conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, flushSizeBytes);
|
||||||
// ensure we block the flushes when we are double that flushsize
|
// ensure we block the flushes when we are double that flushsize
|
||||||
conf.setLong(HConstants.HREGION_MEMSTORE_BLOCK_MULTIPLIER, HConstants.DEFAULT_HREGION_MEMSTORE_BLOCK_MULTIPLIER);
|
conf.setLong(HConstants.HREGION_MEMSTORE_BLOCK_MULTIPLIER, HConstants.DEFAULT_HREGION_MEMSTORE_BLOCK_MULTIPLIER);
|
||||||
|
conf.setBoolean(CLIENT_SIDE_METRICS_ENABLED_KEY, true);
|
||||||
UTIL.startMiniCluster(1);
|
UTIL.startMiniCluster(1);
|
||||||
UTIL.createTable(tableName, family);
|
UTIL.createTable(tableName, family);
|
||||||
}
|
}
|
||||||
|
@ -87,6 +88,7 @@ public class TestClientPushback {
|
||||||
@Test(timeout=60000)
|
@Test(timeout=60000)
|
||||||
public void testClientTracksServerPushback() throws Exception{
|
public void testClientTracksServerPushback() throws Exception{
|
||||||
Configuration conf = UTIL.getConfiguration();
|
Configuration conf = UTIL.getConfiguration();
|
||||||
|
|
||||||
ClusterConnection conn = (ClusterConnection) ConnectionFactory.createConnection(conf);
|
ClusterConnection conn = (ClusterConnection) ConnectionFactory.createConnection(conf);
|
||||||
HTable table = (HTable) conn.getTable(tableName);
|
HTable table = (HTable) conn.getTable(tableName);
|
||||||
|
|
||||||
|
@ -119,7 +121,6 @@ public class TestClientPushback {
|
||||||
ServerStatistics.RegionStatistics regionStats = serverStats.getStatsForRegion(regionName);
|
ServerStatistics.RegionStatistics regionStats = serverStats.getStatsForRegion(regionName);
|
||||||
assertEquals("We did not find some load on the memstore", load,
|
assertEquals("We did not find some load on the memstore", load,
|
||||||
regionStats.getMemstoreLoadPercent());
|
regionStats.getMemstoreLoadPercent());
|
||||||
|
|
||||||
// check that the load reported produces a nonzero delay
|
// check that the load reported produces a nonzero delay
|
||||||
long backoffTime = backoffPolicy.getBackoffTime(server, regionName, serverStats);
|
long backoffTime = backoffPolicy.getBackoffTime(server, regionName, serverStats);
|
||||||
assertNotEquals("Reported load does not produce a backoff", backoffTime, 0);
|
assertNotEquals("Reported load does not produce a backoff", backoffTime, 0);
|
||||||
|
@ -145,6 +146,21 @@ public class TestClientPushback {
|
||||||
// produces a backoffTime of 151 milliseconds. This is long enough so the
|
// produces a backoffTime of 151 milliseconds. This is long enough so the
|
||||||
// wait and related checks below are reasonable. Revisit if the backoff
|
// wait and related checks below are reasonable. Revisit if the backoff
|
||||||
// time reported by above debug logging has significantly deviated.
|
// time reported by above debug logging has significantly deviated.
|
||||||
|
String name = server.getServerName() + "," + Bytes.toStringBinary(regionName);
|
||||||
|
MetricsConnection.RegionStats rsStats = conn.getConnectionMetrics().
|
||||||
|
serverStats.get(server).get(regionName);
|
||||||
|
assertEquals(name, rsStats.name);
|
||||||
|
assertEquals(rsStats.heapOccupancyHist.mean(),
|
||||||
|
(double)regionStats.getHeapOccupancyPercent(), 0.1 );
|
||||||
|
assertEquals(rsStats.memstoreLoadHist.mean(),
|
||||||
|
(double)regionStats.getMemstoreLoadPercent(), 0.1);
|
||||||
|
|
||||||
|
MetricsConnection.RunnerStats runnerStats = conn.getConnectionMetrics().runnerStats;
|
||||||
|
|
||||||
|
assertEquals(runnerStats.delayRunners.count(), 1);
|
||||||
|
assertEquals(runnerStats.normalRunners.count(), 1);
|
||||||
|
assertEquals("", runnerStats.delayIntevalHist.mean(), (double)backoffTime, 0.1);
|
||||||
|
|
||||||
latch.await(backoffTime * 2, TimeUnit.MILLISECONDS);
|
latch.await(backoffTime * 2, TimeUnit.MILLISECONDS);
|
||||||
assertNotEquals("AsyncProcess did not submit the work time", endTime.get(), 0);
|
assertNotEquals("AsyncProcess did not submit the work time", endTime.get(), 0);
|
||||||
assertTrue("AsyncProcess did not delay long enough", endTime.get() - startTime >= backoffTime);
|
assertTrue("AsyncProcess did not delay long enough", endTime.get() - startTime >= backoffTime);
|
||||||
|
|
Loading…
Reference in New Issue