HBASE-27684: add client metrics related to user region lock. (#5081)
Signed-off-by: Andrew Purtell <apurtell@apache.org> Signed-off-by: David Manning <david.manning@salesforce.com> Signed-off-by: Rushabh Shah <shahrs87@apache.org> Signed-off-by: Tanuj Khurana <tkhurana@apache.org>
This commit is contained in:
parent
e7a958a45f
commit
ff9754049b
|
@ -1003,6 +1003,7 @@ public class ConnectionImplementation implements ClusterConnection, Closeable {
|
|||
// Query the meta region
|
||||
long pauseBase = connectionConfig.getPauseMillis();
|
||||
takeUserRegionLock();
|
||||
final long lockStartTime = EnvironmentEdgeManager.currentTime();
|
||||
try {
|
||||
// We don't need to check if useCache is enabled or not. Even if useCache is false
|
||||
// we already cleared the cache for this row before acquiring userRegion lock so if this
|
||||
|
@ -1113,6 +1114,10 @@ public class ConnectionImplementation implements ClusterConnection, Closeable {
|
|||
}
|
||||
} finally {
|
||||
userRegionLock.unlock();
|
||||
// update duration of the lock being held
|
||||
if (metrics != null) {
|
||||
metrics.updateUserRegionLockHeld(EnvironmentEdgeManager.currentTime() - lockStartTime);
|
||||
}
|
||||
}
|
||||
try {
|
||||
Thread.sleep(ConnectionUtils.getPauseTime(pauseBase, tries));
|
||||
|
@ -1126,9 +1131,19 @@ public class ConnectionImplementation implements ClusterConnection, Closeable {
|
|||
void takeUserRegionLock() throws IOException {
|
||||
try {
|
||||
long waitTime = connectionConfig.getMetaOperationTimeout();
|
||||
if (metrics != null) {
|
||||
metrics.updateUserRegionLockQueue(userRegionLock.getQueueLength());
|
||||
}
|
||||
final long waitStartTime = EnvironmentEdgeManager.currentTime();
|
||||
if (!userRegionLock.tryLock(waitTime, TimeUnit.MILLISECONDS)) {
|
||||
if (metrics != null) {
|
||||
metrics.incrUserRegionLockTimeout();
|
||||
}
|
||||
throw new LockTimeoutException("Failed to get user region lock in" + waitTime + " ms. "
|
||||
+ " for accessing meta region server.");
|
||||
} else if (metrics != null) {
|
||||
// successfully grabbed the lock, start timer of holding the lock
|
||||
metrics.updateUserRegionLockWaiting(EnvironmentEdgeManager.currentTime() - waitStartTime);
|
||||
}
|
||||
} catch (InterruptedException ie) {
|
||||
LOG.error("Interrupted while waiting for a lock", ie);
|
||||
|
|
|
@ -359,6 +359,10 @@ public final class MetricsConnection implements StatisticTrackable {
|
|||
private final Counter nsLookups;
|
||||
private final Counter nsLookupsFailed;
|
||||
private final Timer overloadedBackoffTimer;
|
||||
private final Counter userRegionLockTimeoutCount;
|
||||
private final Timer userRegionLockWaitingTimer;
|
||||
private final Timer userRegionLockHeldTimer;
|
||||
private final Histogram userRegionLockQueueHist;
|
||||
|
||||
// dynamic metrics
|
||||
|
||||
|
@ -443,6 +447,15 @@ public final class MetricsConnection implements StatisticTrackable {
|
|||
this.nsLookups = registry.counter(name(this.getClass(), NS_LOOKUPS, scope));
|
||||
this.nsLookupsFailed = registry.counter(name(this.getClass(), NS_LOOKUPS_FAILED, scope));
|
||||
|
||||
this.userRegionLockTimeoutCount =
|
||||
registry.counter(name(this.getClass(), "userRegionLockTimeoutCount", scope));
|
||||
this.userRegionLockWaitingTimer =
|
||||
registry.timer(name(this.getClass(), "userRegionLockWaitingDuration", scope));
|
||||
this.userRegionLockHeldTimer =
|
||||
registry.timer(name(this.getClass(), "userRegionLockHeldDuration", scope));
|
||||
this.userRegionLockQueueHist =
|
||||
registry.histogram(name(MetricsConnection.class, "userRegionLockQueueLength", scope));
|
||||
|
||||
this.overloadedBackoffTimer =
|
||||
registry.timer(name(this.getClass(), "overloadedBackoffDurationMs", scope));
|
||||
|
||||
|
@ -602,6 +615,41 @@ public final class MetricsConnection implements StatisticTrackable {
|
|||
overloadedBackoffTimer.update(time, timeUnit);
|
||||
}
|
||||
|
||||
/** incr */
|
||||
public void incrUserRegionLockTimeout() {
|
||||
userRegionLockTimeoutCount.inc();
|
||||
}
|
||||
|
||||
/** get */
|
||||
public Counter getUserRegionLockTimeout() {
|
||||
return userRegionLockTimeoutCount;
|
||||
}
|
||||
|
||||
public Timer getUserRegionLockWaitingTimer() {
|
||||
return userRegionLockWaitingTimer;
|
||||
}
|
||||
|
||||
public Timer getUserRegionLockHeldTimer() {
|
||||
return userRegionLockHeldTimer;
|
||||
}
|
||||
|
||||
public Histogram getUserRegionLockQueue() {
|
||||
return userRegionLockQueueHist;
|
||||
}
|
||||
|
||||
/** update */
|
||||
public void updateUserRegionLockWaiting(long duration) {
|
||||
userRegionLockWaitingTimer.update(duration, TimeUnit.MILLISECONDS);
|
||||
}
|
||||
|
||||
public void updateUserRegionLockHeld(long duration) {
|
||||
userRegionLockHeldTimer.update(duration, TimeUnit.MILLISECONDS);
|
||||
}
|
||||
|
||||
public void updateUserRegionLockQueue(int count) {
|
||||
userRegionLockQueueHist.update(count);
|
||||
}
|
||||
|
||||
/** Return the connection count of the metrics within a scope */
|
||||
public long getConnectionCount() {
|
||||
return connectionCount.getCount();
|
||||
|
|
|
@ -563,6 +563,7 @@ public class TestMetaCache {
|
|||
conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 0);
|
||||
conf.setLong(HConstants.HBASE_CLIENT_META_OPERATION_TIMEOUT, 2000);
|
||||
conf.setLong(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 2000);
|
||||
conf.setBoolean(MetricsConnection.CLIENT_SIDE_METRICS_ENABLED_KEY, true);
|
||||
|
||||
try (ConnectionImplementation conn =
|
||||
(ConnectionImplementation) ConnectionFactory.createConnection(conf)) {
|
||||
|
@ -587,6 +588,28 @@ public class TestMetaCache {
|
|||
|
||||
assertTrue(client1.getException() instanceof LockTimeoutException
|
||||
^ client2.getException() instanceof LockTimeoutException);
|
||||
|
||||
// obtain the client metrics
|
||||
MetricsConnection metrics = conn.getConnectionMetrics();
|
||||
long queueCount = metrics.getUserRegionLockQueue().getCount();
|
||||
assertEquals("Queue of userRegionLock should be updated twice. queueCount: " + queueCount,
|
||||
queueCount, 2);
|
||||
|
||||
long timeoutCount = metrics.getUserRegionLockTimeout().getCount();
|
||||
assertEquals("Timeout of userRegionLock should happen once. timeoutCount: " + timeoutCount,
|
||||
timeoutCount, 1);
|
||||
|
||||
long waitingTimerCount = metrics.getUserRegionLockWaitingTimer().getCount();
|
||||
assertEquals("userRegionLock should be grabbed successfully once. waitingTimerCount: "
|
||||
+ waitingTimerCount, waitingTimerCount, 1);
|
||||
|
||||
long heldTimerCount = metrics.getUserRegionLockHeldTimer().getCount();
|
||||
assertEquals(
|
||||
"userRegionLock should be held successfully once. heldTimerCount: " + heldTimerCount,
|
||||
heldTimerCount, 1);
|
||||
double heldTime = metrics.getUserRegionLockHeldTimer().getSnapshot().getMax();
|
||||
assertTrue("Max held time should be greater than 2 seconds. heldTime: " + heldTime,
|
||||
heldTime >= 2E9);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue