From 925f3e62dbcff32d39f28b445fa55b44f882efae Mon Sep 17 00:00:00 2001 From: Yiqun Lin Date: Thu, 14 Dec 2017 16:00:53 +0800 Subject: [PATCH] HDFS-12883. RBF: Document Router and State Store metrics. Contributed by Yiqun Lin. (cherry picked from commit 91c96bdf8eb9a06193b719186b527563091d7666) --- .../src/site/markdown/Metrics.md | 36 +++++++++++++++++++ .../metrics/FederationRPCMetrics.java | 2 +- .../federation/metrics/StateStoreMetrics.java | 2 +- .../src/site/markdown/HDFSRouterFederation.md | 8 ++++- 4 files changed, 45 insertions(+), 3 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md index 367d9e0b25e..2e5eab9cc1d 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md @@ -401,6 +401,42 @@ contains tags such as Hostname as additional information along with metrics. | `FileIoErrorRateNumOps` | The number of file io error operations within an interval time of metric | | `FileIoErrorRateAvgTime` | It measures the mean time in milliseconds from the start of an operation to hitting a failure | +RouterRPCMetrics +---------------- +RouterRPCMetrics shows the statistics of the Router component in Router-based federation. + +| Name | Description | +|:---- |:---- | +| `ProcessingOp` | Number of operations the Router processed internally | +| `ProxyOp` | Number of operations the Router proxied to a Namenode | +| `ProxyOpFailureStandby` | Number of operations to fail to reach NN | +| `ProxyOpFailureCommunicate` | Number of operations to hit a standby NN | +| `ProxyOpNotImplemented` | Number of operations not implemented | +| `RouterFailureStateStore` | Number of failed requests due to State Store unavailable | +| `RouterFailureReadOnly` | Number of failed requests due to read only mount point | +| `RouterFailureLocked` | Number of failed requests due to locked path | +| `RouterFailureSafemode` | Number of failed requests due to safe mode | +| `ProcessingNumOps` | Number of operations the Router processed internally within an interval time of metric | +| `ProcessingAvgTime` | Average time for the Router to process operations in nanoseconds | +| `ProxyNumOps` | Number of times of that the Router to proxy operations to the Namenodes within an interval time of metric | +| `ProxyAvgTime` | Average time for the Router to proxy operations to the Namenodes in nanoseconds | + +StateStoreMetrics +----------------- +StateStoreMetrics shows the statistics of the State Store component in Router-based federation. + +| Name | Description | +|:---- |:---- | +| `ReadsNumOps` | Number of GET transactions for State Store within an interval time of metric | +| `ReadsAvgTime` | Average time of GET transactions for State Store in milliseconds | +| `WritesNumOps` | Number of PUT transactions for State Store within an interval time of metric | +| `WritesAvgTime` | Average time of PUT transactions for State Store in milliseconds | +| `RemovesNumOps` | Number of REMOVE transactions for State Store within an interval time of metric | +| `RemovesAvgTime` | Average time of REMOVE transactions for State Store in milliseconds | +| `FailuresNumOps` | Number of failed transactions for State Store within an interval time of metric | +| `FailuresAvgTime` | Average time of failed transactions for State Store in milliseconds | +| `Cache`*BaseRecord*`Size` | Number of store records to cache in State Store | + yarn context ============ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationRPCMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationRPCMetrics.java index 427bca28961..8995689b02f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationRPCMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationRPCMetrics.java @@ -34,7 +34,7 @@ import org.apache.hadoop.metrics2.lib.MutableRate; * Implementation of the RPC metrics collector. */ @Metrics(name = "RouterRPCActivity", about = "Router RPC Activity", - context = "router") + context = "dfs") public class FederationRPCMetrics implements FederationRPCMBean { private final MetricsRegistry registry = new MetricsRegistry("router"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/StateStoreMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/StateStoreMetrics.java index c17eabcae2a..40dcd40a829 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/StateStoreMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/StateStoreMetrics.java @@ -38,7 +38,7 @@ import com.google.common.annotations.VisibleForTesting; * Implementations of the JMX interface for the State Store metrics. */ @Metrics(name = "StateStoreActivity", about = "Router metrics", - context = "router") + context = "dfs") public final class StateStoreMetrics implements StateStoreMBean { private final MetricsRegistry registry = new MetricsRegistry("router"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSRouterFederation.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSRouterFederation.md index dad231da6ad..5075a220f02 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSRouterFederation.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSRouterFederation.md @@ -312,4 +312,10 @@ Monitor the namenodes in the subclusters for forwarding the client requests. | dfs.federation.router.heartbeat.enable | `true` | If `true`, the Router heartbeats into the State Store. | | dfs.federation.router.heartbeat.interval | 5000 | How often the Router should heartbeat into the State Store in milliseconds. | | dfs.federation.router.monitor.namenode | | The identifier of the namenodes to monitor and heartbeat. | -| dfs.federation.router.monitor.localnamenode.enable | `true` | If `true`, the Router should monitor the namenode in the local machine. | \ No newline at end of file +| dfs.federation.router.monitor.localnamenode.enable | `true` | If `true`, the Router should monitor the namenode in the local machine. | + +Metrics +------- + +The Router and State Store statistics are exposed in metrics/JMX. These info will be very useful for monitoring. +More metrics info can see [Router RPC Metrics](../../hadoop-project-dist/hadoop-common/Metrics.html#RouterRPCMetrics) and [State Store Metrics](../../hadoop-project-dist/hadoop-common/Metrics.html#StateStoreMetrics). \ No newline at end of file