From dbd673d516be13c1d163bf0968b79fb899b988d0 Mon Sep 17 00:00:00 2001
From: Hanisha Koneru
Date: Fri, 17 Mar 2017 15:42:25 -0700
Subject: [PATCH] HDFS-11517. Expose slow disks via DataNode JMX. Contributed
by Hanisha Koneru
---
.../hadoop/hdfs/server/datanode/DataNode.java | 14 ++++++++
.../hdfs/server/datanode/DataNodeMXBean.java | 7 ++++
.../datanode/metrics/DataNodeDiskMetrics.java | 13 +++++++-
.../server/datanode/TestDataNodeMXBean.java | 33 ++++++++++++++++++-
4 files changed, 65 insertions(+), 2 deletions(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index 31f25af7717..d7f2abc6452 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -1782,6 +1782,10 @@ public class DataNode extends ReconfigurableBase
return metrics;
}
+ public DataNodeDiskMetrics getDiskMetrics() {
+ return diskMetrics;
+ }
+
public DataNodePeerMetrics getPeerMetrics() {
return peerMetrics;
}
@@ -3367,4 +3371,14 @@ public class DataNode extends ReconfigurableBase
return peerMetrics != null ?
peerMetrics.dumpSendPacketDownstreamAvgInfoAsJson() : null;
}
+
+ @Override // DataNodeMXBean
+ public String getSlowDisks() {
+ if (diskMetrics == null) {
+ //Disk Stats not enabled
+ return null;
+ }
+ Set slowDisks = diskMetrics.getDiskOutliersStats().keySet();
+ return JSON.toString(slowDisks);
+ }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeMXBean.java
index 5249ff50298..d867fa72fe8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeMXBean.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeMXBean.java
@@ -117,4 +117,11 @@ public interface DataNodeMXBean {
*
*/
String getSendPacketDownstreamAvgInfo();
+
+ /**
+ * Gets the slow disks in the Datanode.
+ *
+ * @return list of slow disks
+ */
+ String getSlowDisks();
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeDiskMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeDiskMetrics.java
index 32791b50188..7e6c909100e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeDiskMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeDiskMetrics.java
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hdfs.server.datanode.metrics;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.hadoop.classification.InterfaceAudience;
@@ -52,7 +54,8 @@ public class DataNodeDiskMetrics {
private volatile boolean shouldRun;
private OutlierDetector slowDiskDetector;
private Daemon slowDiskDetectionDaemon;
- private volatile Map> diskOutliersStats;
+ private volatile Map>
+ diskOutliersStats = Maps.newHashMap();
public DataNodeDiskMetrics(DataNode dn, long diskOutlierDetectionIntervalMs) {
this.dn = dn;
@@ -178,4 +181,12 @@ public class DataNodeDiskMetrics {
LOG.error("Disk Outlier Detection daemon did not shutdown", e);
}
}
+
+ /**
+ * Use only for testing.
+ */
+ @VisibleForTesting
+ public void addSlowDiskForTesting(String slowDiskPath) {
+ diskOutliersStats.put(slowDiskPath, ImmutableMap.of());
+ }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMXBean.java
index bbea08dd0a7..89a96be5e04 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMXBean.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMXBean.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.test.GenericTestUtils;
@@ -98,8 +99,12 @@ public class TestDataNodeMXBean {
String bpActorInfo = (String)mbs.getAttribute(mxbeanName,
"BPServiceActorInfo");
Assert.assertEquals(datanode.getBPServiceActorInfo(), bpActorInfo);
+ String slowDisks = (String)mbs.getAttribute(mxbeanName, "SlowDisks");
+ Assert.assertEquals(datanode.getSlowDisks(), slowDisks);
} finally {
- if (cluster != null) {cluster.shutdown();}
+ if (cluster != null) {
+ cluster.shutdown();
+ }
}
}
@@ -203,4 +208,30 @@ public class TestDataNodeMXBean {
}
return totalBlocks;
}
+
+ @Test
+ public void testDataNodeMXBeanSlowDisksEnabled() throws Exception {
+ Configuration conf = new Configuration();
+ conf.setDouble(DFSConfigKeys
+ .DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY, 1.0);
+ MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
+
+ try {
+ List datanodes = cluster.getDataNodes();
+ Assert.assertEquals(datanodes.size(), 1);
+ DataNode datanode = datanodes.get(0);
+ String slowDiskPath = "test/data1/slowVolume";
+ datanode.getDiskMetrics().addSlowDiskForTesting(slowDiskPath);
+
+ MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
+ ObjectName mxbeanName = new ObjectName(
+ "Hadoop:service=DataNode,name=DataNodeInfo");
+
+ String slowDisks = (String)mbs.getAttribute(mxbeanName, "SlowDisks");
+ Assert.assertEquals(datanode.getSlowDisks(), slowDisks);
+ Assert.assertTrue(slowDisks.contains(slowDiskPath));
+ } finally {
+ if (cluster != null) {cluster.shutdown();}
+ }
+ }
}