From 308cd729d23329e6d8d4b9c17a645180374b5962 Mon Sep 17 00:00:00 2001 From: Bri Augenreich Date: Wed, 31 Aug 2022 21:23:40 -0400 Subject: [PATCH] HBASE-26809: Report client backoff time for server overloaded (#4729) Co-authored-by: Briana Augenreich Signed-off-by: Duo Zhang --- .../hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java | 5 +++++ .../hadoop/hbase/client/AsyncRpcRetryingCaller.java | 4 ++++ .../client/AsyncScanSingleRegionRpcRetryingCaller.java | 8 ++++++++ .../org/apache/hadoop/hbase/client/MetricsConnection.java | 8 ++++++++ 4 files changed, 25 insertions(+) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java index 0798915c08d..49cf7589207 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java @@ -487,6 +487,11 @@ class AsyncBatchRpcRetryingCaller { } else { delayNs = getPauseTime(pauseNsToUse, tries - 1); } + + if (isServerOverloaded) { + Optional metrics = conn.getConnectionMetrics(); + metrics.ifPresent(m -> m.incrementServerOverloadedBackoffTime(delayNs, TimeUnit.NANOSECONDS)); + } retryTimer.newTimeout(t -> groupAndSend(actions, tries + 1), delayNs, TimeUnit.NANOSECONDS); } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRpcRetryingCaller.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRpcRetryingCaller.java index a19d3b039f1..04e22710838 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRpcRetryingCaller.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRpcRetryingCaller.java @@ -139,6 +139,10 @@ public abstract class AsyncRpcRetryingCaller { delayNs = getPauseTime(pauseNsToUse, tries - 1); } tries++; + if (HBaseServerException.isServerOverloaded(error)) { + Optional metrics = conn.getConnectionMetrics(); + metrics.ifPresent(m -> m.incrementServerOverloadedBackoffTime(delayNs, TimeUnit.NANOSECONDS)); + } retryTimer.newTimeout(t -> doCall(), delayNs, TimeUnit.NANOSECONDS); } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncScanSingleRegionRpcRetryingCaller.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncScanSingleRegionRpcRetryingCaller.java index dbaae5c26e2..3ef7b9b6ccc 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncScanSingleRegionRpcRetryingCaller.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncScanSingleRegionRpcRetryingCaller.java @@ -113,6 +113,8 @@ class AsyncScanSingleRegionRpcRetryingCaller { private final Runnable completeWhenNoMoreResultsInRegion; + private final AsyncConnectionImpl conn; + private final CompletableFuture future; private final HBaseRpcController controller; @@ -318,6 +320,7 @@ class AsyncScanSingleRegionRpcRetryingCaller { long pauseNsForServerOverloaded, int maxAttempts, long scanTimeoutNs, long rpcTimeoutNs, int startLogErrorsCnt) { this.retryTimer = retryTimer; + this.conn = conn; this.scan = scan; this.scanMetrics = scanMetrics; this.scannerId = scannerId; @@ -441,6 +444,11 @@ class AsyncScanSingleRegionRpcRetryingCaller { return; } tries++; + + if (HBaseServerException.isServerOverloaded(error)) { + Optional metrics = conn.getConnectionMetrics(); + metrics.ifPresent(m -> m.incrementServerOverloadedBackoffTime(delayNs, TimeUnit.NANOSECONDS)); + } retryTimer.newTimeout(t -> call(), delayNs, TimeUnit.NANOSECONDS); } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java index dc452bcd9d9..f844c47e406 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java @@ -315,6 +315,7 @@ public class MetricsConnection implements StatisticTrackable { protected final Histogram numActionsPerServerHist; protected final Counter nsLookups; protected final Counter nsLookupsFailed; + protected final Timer overloadedBackoffTimer; // dynamic metrics @@ -376,6 +377,8 @@ public class MetricsConnection implements StatisticTrackable { registry.histogram(name(MetricsConnection.class, "numActionsPerServer", scope)); this.nsLookups = registry.counter(name(this.getClass(), NS_LOOKUPS, scope)); this.nsLookupsFailed = registry.counter(name(this.getClass(), NS_LOOKUPS_FAILED, scope)); + this.overloadedBackoffTimer = + registry.timer(name(this.getClass(), "overloadedBackoffDurationMs", scope)); this.reporter = JmxReporter.forRegistry(this.registry).build(); this.reporter.start(); @@ -449,6 +452,11 @@ public class MetricsConnection implements StatisticTrackable { this.runnerStats.updateDelayInterval(interval); } + /** Update the overloaded backoff time **/ + public void incrementServerOverloadedBackoffTime(long time, TimeUnit timeUnit) { + overloadedBackoffTimer.update(time, timeUnit); + } + /** * Get a metric for {@code key} from {@code map}, or create it with {@code factory}. */