HDFS-12883. RBF: Document Router and State Store metrics. Contributed by Yiqun Lin.
This commit is contained in:
parent
f86c81d923
commit
91c96bdf8e
|
@ -419,6 +419,42 @@ contains tags such as Hostname as additional information along with metrics.
|
|||
| `FileIoErrorRateNumOps` | The number of file io error operations within an interval time of metric |
|
||||
| `FileIoErrorRateAvgTime` | It measures the mean time in milliseconds from the start of an operation to hitting a failure |
|
||||
|
||||
RouterRPCMetrics
|
||||
----------------
|
||||
RouterRPCMetrics shows the statistics of the Router component in Router-based federation.
|
||||
|
||||
| Name | Description |
|
||||
|:---- |:---- |
|
||||
| `ProcessingOp` | Number of operations the Router processed internally |
|
||||
| `ProxyOp` | Number of operations the Router proxied to a Namenode |
|
||||
| `ProxyOpFailureStandby` | Number of operations to fail to reach NN |
|
||||
| `ProxyOpFailureCommunicate` | Number of operations to hit a standby NN |
|
||||
| `ProxyOpNotImplemented` | Number of operations not implemented |
|
||||
| `RouterFailureStateStore` | Number of failed requests due to State Store unavailable |
|
||||
| `RouterFailureReadOnly` | Number of failed requests due to read only mount point |
|
||||
| `RouterFailureLocked` | Number of failed requests due to locked path |
|
||||
| `RouterFailureSafemode` | Number of failed requests due to safe mode |
|
||||
| `ProcessingNumOps` | Number of operations the Router processed internally within an interval time of metric |
|
||||
| `ProcessingAvgTime` | Average time for the Router to process operations in nanoseconds |
|
||||
| `ProxyNumOps` | Number of times of that the Router to proxy operations to the Namenodes within an interval time of metric |
|
||||
| `ProxyAvgTime` | Average time for the Router to proxy operations to the Namenodes in nanoseconds |
|
||||
|
||||
StateStoreMetrics
|
||||
-----------------
|
||||
StateStoreMetrics shows the statistics of the State Store component in Router-based federation.
|
||||
|
||||
| Name | Description |
|
||||
|:---- |:---- |
|
||||
| `ReadsNumOps` | Number of GET transactions for State Store within an interval time of metric |
|
||||
| `ReadsAvgTime` | Average time of GET transactions for State Store in milliseconds |
|
||||
| `WritesNumOps` | Number of PUT transactions for State Store within an interval time of metric |
|
||||
| `WritesAvgTime` | Average time of PUT transactions for State Store in milliseconds |
|
||||
| `RemovesNumOps` | Number of REMOVE transactions for State Store within an interval time of metric |
|
||||
| `RemovesAvgTime` | Average time of REMOVE transactions for State Store in milliseconds |
|
||||
| `FailuresNumOps` | Number of failed transactions for State Store within an interval time of metric |
|
||||
| `FailuresAvgTime` | Average time of failed transactions for State Store in milliseconds |
|
||||
| `Cache`*BaseRecord*`Size` | Number of store records to cache in State Store |
|
||||
|
||||
yarn context
|
||||
============
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ import org.apache.hadoop.metrics2.lib.MutableRate;
|
|||
* Implementation of the RPC metrics collector.
|
||||
*/
|
||||
@Metrics(name = "RouterRPCActivity", about = "Router RPC Activity",
|
||||
context = "router")
|
||||
context = "dfs")
|
||||
public class FederationRPCMetrics implements FederationRPCMBean {
|
||||
|
||||
private final MetricsRegistry registry = new MetricsRegistry("router");
|
||||
|
|
|
@ -38,7 +38,7 @@ import com.google.common.annotations.VisibleForTesting;
|
|||
* Implementations of the JMX interface for the State Store metrics.
|
||||
*/
|
||||
@Metrics(name = "StateStoreActivity", about = "Router metrics",
|
||||
context = "router")
|
||||
context = "dfs")
|
||||
public final class StateStoreMetrics implements StateStoreMBean {
|
||||
|
||||
private final MetricsRegistry registry = new MetricsRegistry("router");
|
||||
|
|
|
@ -313,3 +313,9 @@ Monitor the namenodes in the subclusters for forwarding the client requests.
|
|||
| dfs.federation.router.heartbeat.interval | 5000 | How often the Router should heartbeat into the State Store in milliseconds. |
|
||||
| dfs.federation.router.monitor.namenode | | The identifier of the namenodes to monitor and heartbeat. |
|
||||
| dfs.federation.router.monitor.localnamenode.enable | `true` | If `true`, the Router should monitor the namenode in the local machine. |
|
||||
|
||||
Metrics
|
||||
-------
|
||||
|
||||
The Router and State Store statistics are exposed in metrics/JMX. These info will be very useful for monitoring.
|
||||
More metrics info can see [Router RPC Metrics](../../hadoop-project-dist/hadoop-common/Metrics.html#RouterRPCMetrics) and [State Store Metrics](../../hadoop-project-dist/hadoop-common/Metrics.html#StateStoreMetrics).
|
Loading…
Reference in New Issue