diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md index 8210eee0384..4b2abfb4235 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md @@ -228,6 +228,7 @@ Each metrics record contains tags such as ProcessName, SessionId, and Hostname a | `EditLogTailIntervalNumOps` | Total number of intervals between edit log tailings by standby NameNode | | `EditLogTailIntervalAvgTime` | Average time of intervals between edit log tailings by standby NameNode in milliseconds | | `EditLogTailInterval`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of time between edit log tailings by standby NameNode in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `PendingEditsCount` | Current number of pending edits | FSNamesystem ------------ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogAsync.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogAsync.java index e73dfa7797d..cfa086f8af6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogAsync.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogAsync.java @@ -28,6 +28,8 @@ import java.util.concurrent.Semaphore; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; +import org.apache.hadoop.util.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -53,6 +55,8 @@ class FSEditLogAsync extends FSEditLog implements Runnable { // of the edit log buffer - ie. a sync will eventually be forced. private final Deque syncWaitQ = new ArrayDeque(); + private long lastFull = 0; + FSEditLogAsync(Configuration conf, NNStorage storage, List editsDirs) { super(conf, storage, editsDirs); // op instances cannot be shared due to queuing for background thread. @@ -188,6 +192,11 @@ class FSEditLogAsync extends FSEditLog implements Runnable { if (!editPendingQ.offer(edit)) { Preconditions.checkState( isSyncThreadAlive(), "sync thread is not alive"); + long now = Time.monotonicNow(); + if (now - lastFull > 4000) { + lastFull = now; + LOG.info("Edit pending queue is full"); + } if (Thread.holdsLock(this)) { // if queue is full, synchronized caller must immediately relinquish // the monitor before re-offering to avoid deadlock with sync thread @@ -225,15 +234,18 @@ class FSEditLogAsync extends FSEditLog implements Runnable { public void run() { try { while (true) { + NameNodeMetrics metrics = NameNode.getNameNodeMetrics(); boolean doSync; Edit edit = dequeueEdit(); if (edit != null) { // sync if requested by edit log. doSync = edit.logEdit(); syncWaitQ.add(edit); + metrics.setPendingEditsCount(editPendingQ.size() + 1); } else { // sync when editq runs dry, but have edits pending a sync. doSync = !syncWaitQ.isEmpty(); + metrics.setPendingEditsCount(0); } if (doSync) { // normally edit log exceptions cause the NN to terminate, but tests diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java index c15cdbdd48e..4df9b5d6575 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java @@ -85,6 +85,8 @@ public class NameNodeMetrics { MutableGaugeInt blockOpsQueued; @Metric("Number of blockReports and blockReceivedAndDeleted batch processed") MutableCounterLong blockOpsBatched; + @Metric("Number of pending edits") + MutableGaugeInt pendingEditsCount; @Metric("Number of file system operations") public long totalFileOps(){ @@ -336,6 +338,10 @@ public class NameNodeMetrics { blockOpsBatched.incr(count); } + public void setPendingEditsCount(int size) { + pendingEditsCount.set(size); + } + public void addTransaction(long latency) { transactions.add(latency); }