HBASE-27897 ConnectionImplementation#locateRegionInMeta should pause and retry when taking user region lock failed (#5258)

Signed-off-by: Wellington Chevreuil <wchevreuil@apache.org>
This commit is contained in:
Xiaolin Ha 2023-06-07 16:31:39 +08:00 committed by haxiaolin
parent 556e11d9ad
commit 50a6249731
2 changed files with 18 additions and 13 deletions

View File

@ -993,9 +993,12 @@ public class ConnectionImplementation implements ClusterConnection, Closeable {
}
// Query the meta region
long pauseBase = connectionConfig.getPauseMillis();
takeUserRegionLock();
final long lockStartTime = EnvironmentEdgeManager.currentTime();
long lockStartTime = 0;
boolean lockedUserRegion = false;
try {
takeUserRegionLock();
lockStartTime = EnvironmentEdgeManager.currentTime();
lockedUserRegion = true;
// We don't need to check if useCache is enabled or not. Even if useCache is false
// we already cleared the cache for this row before acquiring userRegion lock so if this
// row is present in cache that means some other thread has populated it while we were
@ -1104,10 +1107,12 @@ public class ConnectionImplementation implements ClusterConnection, Closeable {
ConnectionUtils.getPauseTime(pauseBase, tries), TimeUnit.MILLISECONDS);
}
} finally {
userRegionLock.unlock();
// update duration of the lock being held
if (metrics != null) {
metrics.updateUserRegionLockHeld(EnvironmentEdgeManager.currentTime() - lockStartTime);
if (lockedUserRegion) {
userRegionLock.unlock();
// update duration of the lock being held
if (metrics != null) {
metrics.updateUserRegionLockHeld(EnvironmentEdgeManager.currentTime() - lockStartTime);
}
}
}
try {

View File

@ -592,21 +592,21 @@ public class TestMetaCache {
// obtain the client metrics
MetricsConnection metrics = conn.getConnectionMetrics();
long queueCount = metrics.getUserRegionLockQueue().getCount();
assertEquals("Queue of userRegionLock should be updated twice. queueCount: " + queueCount,
queueCount, 2);
assertEquals("Queue of userRegionLock should be updated twice. queueCount: " + queueCount, 2,
queueCount);
long timeoutCount = metrics.getUserRegionLockTimeout().getCount();
assertEquals("Timeout of userRegionLock should happen once. timeoutCount: " + timeoutCount,
timeoutCount, 1);
assertEquals("Timeout of userRegionLock should happen once. timeoutCount: " + timeoutCount, 1,
timeoutCount);
long waitingTimerCount = metrics.getUserRegionLockWaitingTimer().getCount();
assertEquals("userRegionLock should be grabbed successfully once. waitingTimerCount: "
+ waitingTimerCount, waitingTimerCount, 1);
+ waitingTimerCount, 1, waitingTimerCount);
long heldTimerCount = metrics.getUserRegionLockHeldTimer().getCount();
assertEquals(
"userRegionLock should be held successfully once. heldTimerCount: " + heldTimerCount,
heldTimerCount, 1);
"userRegionLock should be held successfully once. heldTimerCount: " + heldTimerCount, 1,
heldTimerCount);
double heldTime = metrics.getUserRegionLockHeldTimer().getSnapshot().getMax();
assertTrue("Max held time should be greater than 2 seconds. heldTime: " + heldTime,
heldTime >= 2E9);