HDFS-10534. NameNode WebUI should display DataNode usage rate with a certain percentile. Contributed by Kai Sasaki.

This commit is contained in:
Zhe Zhang 2016-06-24 08:58:29 -07:00
parent 0f23cd497b
commit 0424056a77
5 changed files with 48 additions and 3 deletions

View File

@ -451,6 +451,11 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
"dfs.namenode.metrics.logger.period.seconds";
public static final int DFS_NAMENODE_METRICS_LOGGER_PERIOD_SECONDS_DEFAULT =
600;
public static final String DFS_NAMENODE_METRICS_NODE_USAGE_PERCENTILE =
"dfs.namenode.metrics.node-usage.percentile";
public static final double DFS_NAMENODE_METRICS_NODE_USAGE_PERCENTILE_DEFAULT
= 0.95;
public static final String DFS_DATANODE_METRICS_LOGGER_PERIOD_SECONDS_KEY =
"dfs.datanode.metrics.logger.period.seconds";
public static final int DFS_DATANODE_METRICS_LOGGER_PERIOD_SECONDS_DEFAULT =

View File

@ -66,6 +66,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CAC
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_INODE_ATTRIBUTES_PROVIDER_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LAZY_PERSIST_FILE_SCRUB_INTERVAL_SEC_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_METRICS_NODE_USAGE_PERCENTILE;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_METRICS_NODE_USAGE_PERCENTILE_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY;
@ -528,6 +530,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
private INodeAttributeProvider inodeAttributeProvider;
private final double percentileFactor;
/**
* If the NN is in safemode, and not due to manual / low resources, we
* assume it must be because of startup. If the NN had low resources during
@ -825,6 +829,14 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY,
DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT);
this.percentileFactor = conf.getDouble(
DFS_NAMENODE_METRICS_NODE_USAGE_PERCENTILE,
DFS_NAMENODE_METRICS_NODE_USAGE_PERCENTILE_DEFAULT);
Preconditions.checkArgument(0.0 < this.percentileFactor
&& this.percentileFactor <= 1.0, "Node usage percentile " +
"factor must be between 0 and 1.");
this.dtSecretManager = createDelegationTokenSecretManager(conf);
this.dir = new FSDirectory(this, conf);
this.snapshotManager = new SnapshotManager(dir);
@ -5614,6 +5626,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
float max = 0;
float min = 0;
float dev = 0;
float percentile = 0;
final Map<String, Map<String,Object>> info =
new HashMap<String, Map<String,Object>>();
@ -5639,6 +5652,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
median = usages[usages.length / 2];
max = usages[usages.length - 1];
min = usages[0];
percentile = usages[(int)((usages.length - 1) * percentileFactor)];
for (i = 0; i < usages.length; i++) {
dev += (usages[i] - totalDfsUsed) * (usages[i] - totalDfsUsed);
@ -5651,6 +5665,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
innerInfo.put("median", StringUtils.format("%.2f%%", median));
innerInfo.put("max", StringUtils.format("%.2f%%", max));
innerInfo.put("stdDev", StringUtils.format("%.2f%%", dev));
final Map<String, Object> percentileInfo = new HashMap<String, Object>();
percentileInfo.put("name", StringUtils.format("%dth percentile",
(int)(percentileFactor * 100)));
percentileInfo.put("value", StringUtils.format("%.2f%%", percentile));
innerInfo.put("percentile", percentileInfo);
info.put("nodeUsage", innerInfo);
return JSON.toString(info);

View File

@ -1896,6 +1896,17 @@
</description>
</property>
<property>
<name>dfs.namenode.metrics.node-usage.percentile</name>
<value>0.95</value>
<description>
This setting specifies the percentile level to report node usage metrics.
For example 0.95 means reporting the node usage for the 95th percentile
of all DataNodes. If this setting is at 0.95 and the reported node usage
is 70%, it means 95% of DataNodes have a usage below 70%.
</description>
</property>
<property>
<name>dfs.datanode.metrics.logger.period.seconds</name>
<value>600</value>

View File

@ -166,8 +166,8 @@
<tr><th> Non DFS Used:</th><td>{NonDfsUsedSpace|fmt_bytes}</td></tr>
<tr><th> DFS Remaining:</th><td>{Free|fmt_bytes} ({PercentRemaining|fmt_percentage})</td></tr>
<tr><th> Block Pool Used:</th><td>{BlockPoolUsedSpace|fmt_bytes} ({PercentBlockPoolUsed|fmt_percentage})</td></tr>
<tr><th> DataNodes usages% (Min/Median/Max/stdDev): </th>
<td>{#NodeUsage.nodeUsage}{min} / {median} / {max} / {stdDev}{/NodeUsage.nodeUsage}</td></tr>
<tr><th> {#NodeUsage.nodeUsage}DataNodes usages% (Min/Median/Max/stdDev/{percentile.name}):{/NodeUsage.nodeUsage} </th>
<td>{#NodeUsage.nodeUsage}{min} / {median} / {max} / {stdDev} / {percentile.value}{/NodeUsage.nodeUsage}</td></tr>
{/nn}
{#fs}

View File

@ -173,6 +173,16 @@ public class TestNameNodeMXBean {
String nodeUsage = (String) (mbs.getAttribute(mxbeanName,
"NodeUsage"));
assertEquals("Bad value for NodeUsage", fsn.getNodeUsage(), nodeUsage);
Map<String, Map<String, Object>> usage
= (Map<String, Map<String, Object>>)JSON.parse(nodeUsage);
assertTrue(usage.get("nodeUsage").containsKey("min"));
assertTrue(usage.get("nodeUsage").containsKey("median"));
assertTrue(usage.get("nodeUsage").containsKey("max"));
assertTrue(usage.get("nodeUsage").containsKey("percentile"));
Map<String, Object> percentileInfo
= (Map<String, Object>)usage.get("nodeUsage").get("percentile");
assertTrue(percentileInfo.containsKey("name"));
assertTrue(percentileInfo.containsKey("value"));
// get attribute NameJournalStatus
String nameJournalStatus = (String) (mbs.getAttribute(mxbeanName,
"NameJournalStatus"));