From d566e4e41d4c4d96c2c9a69569ecbcd6a319fad2 Mon Sep 17 00:00:00 2001 From: litao Date: Fri, 2 Apr 2021 09:33:40 +0800 Subject: [PATCH] HDFS-15892. Add metric for editPendingQ in FSEditLogAsync (#2770) Signed-off-by: Takanobu Asanuma (cherry picked from commit 4bd04126d64595ab0831626c80d8d1b788a35cd4) --- .../hadoop-common/src/site/markdown/Metrics.md | 1 + .../hadoop/hdfs/server/namenode/FSEditLogAsync.java | 12 ++++++++++++ .../server/namenode/metrics/NameNodeMetrics.java | 6 ++++++ 3 files changed, 19 insertions(+) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md index e611cdb1d14..0e48ca31703 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md @@ -216,6 +216,7 @@ Each metrics record contains tags such as ProcessName, SessionId, and Hostname a | `EditLogTailIntervalNumOps` | Total number of intervals between edit log tailings by standby NameNode | | `EditLogTailIntervalAvgTime` | Average time of intervals between edit log tailings by standby NameNode in milliseconds | | `EditLogTailInterval`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of time between edit log tailings by standby NameNode in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `PendingEditsCount` | Current number of pending edits | FSNamesystem ------------ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogAsync.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogAsync.java index 2b47398f40c..f6ae5dcbd83 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogAsync.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogAsync.java @@ -28,6 +28,8 @@ import java.util.concurrent.Semaphore; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; +import org.apache.hadoop.util.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -53,6 +55,8 @@ class FSEditLogAsync extends FSEditLog implements Runnable { // of the edit log buffer - ie. a sync will eventually be forced. private final Deque syncWaitQ = new ArrayDeque(); + private long lastFull = 0; + FSEditLogAsync(Configuration conf, NNStorage storage, List editsDirs) { super(conf, storage, editsDirs); // op instances cannot be shared due to queuing for background thread. @@ -188,6 +192,11 @@ class FSEditLogAsync extends FSEditLog implements Runnable { if (!editPendingQ.offer(edit)) { Preconditions.checkState( isSyncThreadAlive(), "sync thread is not alive"); + long now = Time.monotonicNow(); + if (now - lastFull > 4000) { + lastFull = now; + LOG.info("Edit pending queue is full"); + } if (Thread.holdsLock(this)) { // if queue is full, synchronized caller must immediately relinquish // the monitor before re-offering to avoid deadlock with sync thread @@ -225,15 +234,18 @@ class FSEditLogAsync extends FSEditLog implements Runnable { public void run() { try { while (true) { + NameNodeMetrics metrics = NameNode.getNameNodeMetrics(); boolean doSync; Edit edit = dequeueEdit(); if (edit != null) { // sync if requested by edit log. doSync = edit.logEdit(); syncWaitQ.add(edit); + metrics.setPendingEditsCount(editPendingQ.size() + 1); } else { // sync when editq runs dry, but have edits pending a sync. doSync = !syncWaitQ.isEmpty(); + metrics.setPendingEditsCount(0); } if (doSync) { // normally edit log exceptions cause the NN to terminate, but tests diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java index 5d43ebf95fe..49960df2089 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java @@ -85,6 +85,8 @@ public class NameNodeMetrics { MutableGaugeInt blockOpsQueued; @Metric("Number of blockReports and blockReceivedAndDeleted batch processed") MutableCounterLong blockOpsBatched; + @Metric("Number of pending edits") + MutableGaugeInt pendingEditsCount; @Metric("Number of file system operations") public long totalFileOps(){ @@ -334,6 +336,10 @@ public class NameNodeMetrics { blockOpsBatched.incr(count); } + public void setPendingEditsCount(int size) { + pendingEditsCount.set(size); + } + public void addTransaction(long latency) { transactions.add(latency); }