HDFS-11560. Expose slow disks via NameNode JMX. Contributed by Hanisha Koneru.
This commit is contained in:
parent
5485d93bda
commit
73835c73e2
|
@ -1907,5 +1907,14 @@ public class DatanodeManager {
|
||||||
public SlowDiskTracker getSlowDiskTracker() {
|
public SlowDiskTracker getSlowDiskTracker() {
|
||||||
return slowDiskTracker;
|
return slowDiskTracker;
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Retrieve information about slow disks as a JSON.
|
||||||
|
* Returns null if we are not tracking slow disks.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public String getSlowDisksReport() {
|
||||||
|
return slowDiskTracker != null ?
|
||||||
|
slowDiskTracker.getSlowDiskReportAsJsonString() : null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -256,6 +256,9 @@ public class SlowDiskTracker {
|
||||||
public String getSlowDiskReportAsJsonString() {
|
public String getSlowDiskReportAsJsonString() {
|
||||||
ObjectMapper objectMapper = new ObjectMapper();
|
ObjectMapper objectMapper = new ObjectMapper();
|
||||||
try {
|
try {
|
||||||
|
if (slowDisksReport.isEmpty()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
return objectMapper.writeValueAsString(slowDisksReport);
|
return objectMapper.writeValueAsString(slowDisksReport);
|
||||||
} catch (JsonProcessingException e) {
|
} catch (JsonProcessingException e) {
|
||||||
// Failed to serialize. Don't log the exception call stack.
|
// Failed to serialize. Don't log the exception call stack.
|
||||||
|
|
|
@ -1826,6 +1826,12 @@ public class NameNode extends ReconfigurableBase implements
|
||||||
.getSlowPeersReport();
|
.getSlowPeersReport();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override //NameNodeStatusMXBean
|
||||||
|
public String getSlowDisksReport() {
|
||||||
|
return namesystem.getBlockManager().getDatanodeManager()
|
||||||
|
.getSlowDisksReport();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Shutdown the NN immediately in an ungraceful way. Used when it would be
|
* Shutdown the NN immediately in an ungraceful way. Used when it would be
|
||||||
* unsafe for the NN to continue operating, e.g. during a failed HA state
|
* unsafe for the NN to continue operating, e.g. during a failed HA state
|
||||||
|
|
|
@ -75,4 +75,12 @@ public interface NameNodeStatusMXBean {
|
||||||
* enabled. The report is in a JSON format.
|
* enabled. The report is in a JSON format.
|
||||||
*/
|
*/
|
||||||
String getSlowPeersReport();
|
String getSlowPeersReport();
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the topN slow disks in the cluster, if the feature is enabled.
|
||||||
|
*
|
||||||
|
* @return JSON string of list of diskIDs and latencies
|
||||||
|
*/
|
||||||
|
String getSlowDisksReport();
|
||||||
}
|
}
|
||||||
|
|
|
@ -393,18 +393,9 @@ public class TestSlowDiskTracker {
|
||||||
timer.advance(reportValidityMs);
|
timer.advance(reportValidityMs);
|
||||||
|
|
||||||
tracker.updateSlowDiskReportAsync(timer.monotonicNow());
|
tracker.updateSlowDiskReportAsync(timer.monotonicNow());
|
||||||
|
Thread.sleep(OUTLIERS_REPORT_INTERVAL*2);
|
||||||
|
|
||||||
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
assertTrue(tracker.getSlowDiskReportAsJsonString() == null);
|
||||||
@Override
|
|
||||||
public Boolean get() {
|
|
||||||
return tracker.getSlowDiskReportAsJsonString() != null;
|
|
||||||
}
|
|
||||||
}, 500, 5000);
|
|
||||||
|
|
||||||
ArrayList<DiskLatency> jsonReport = getAndDeserializeJson(
|
|
||||||
tracker.getSlowDiskReportAsJsonString());
|
|
||||||
|
|
||||||
assertTrue(jsonReport.isEmpty());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isDiskInReports(ArrayList<DiskLatency> reports,
|
private boolean isDiskInReports(ArrayList<DiskLatency> reports,
|
||||||
|
|
|
@ -17,17 +17,23 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs.server.namenode;
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
|
import com.google.common.base.Supplier;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
|
||||||
|
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||||
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.TestDataNodeMXBean;
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import javax.management.MBeanServer;
|
import javax.management.MBeanServer;
|
||||||
import javax.management.ObjectName;
|
import javax.management.ObjectName;
|
||||||
import java.lang.management.ManagementFactory;
|
import java.lang.management.ManagementFactory;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class for testing {@link NameNodeStatusMXBean} implementation.
|
* Class for testing {@link NameNodeStatusMXBean} implementation.
|
||||||
|
@ -38,7 +44,7 @@ public class TestNameNodeStatusMXBean {
|
||||||
TestNameNodeStatusMXBean.class);
|
TestNameNodeStatusMXBean.class);
|
||||||
|
|
||||||
@Test(timeout = 120000L)
|
@Test(timeout = 120000L)
|
||||||
public void testDataNodeMXBean() throws Exception {
|
public void testNameNodeStatusMXBean() throws Exception {
|
||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
MiniDFSCluster cluster = null;
|
MiniDFSCluster cluster = null;
|
||||||
|
|
||||||
|
@ -84,6 +90,55 @@ public class TestNameNodeStatusMXBean {
|
||||||
String slowPeersReport = (String)mbs.getAttribute(mxbeanName,
|
String slowPeersReport = (String)mbs.getAttribute(mxbeanName,
|
||||||
"SlowPeersReport");
|
"SlowPeersReport");
|
||||||
Assert.assertEquals(nn.getSlowPeersReport(), slowPeersReport);
|
Assert.assertEquals(nn.getSlowPeersReport(), slowPeersReport);
|
||||||
|
|
||||||
|
// Get attribute "SlowDisksReport"
|
||||||
|
String slowDisksReport = (String)mbs.getAttribute(mxbeanName,
|
||||||
|
"SlowDisksReport");
|
||||||
|
Assert.assertEquals(nn.getSlowDisksReport(), slowDisksReport);
|
||||||
|
} finally {
|
||||||
|
if (cluster != null) {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNameNodeMXBeanSlowDisksEnabled() throws Exception {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setDouble(
|
||||||
|
DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING_SAMPLING_FRACTION_KEY, 1.0);
|
||||||
|
conf.setTimeDuration(
|
||||||
|
DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY,
|
||||||
|
1000, TimeUnit.MILLISECONDS);
|
||||||
|
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
|
||||||
|
|
||||||
|
try {
|
||||||
|
List<DataNode> datanodes = cluster.getDataNodes();
|
||||||
|
Assert.assertEquals(datanodes.size(), 1);
|
||||||
|
DataNode datanode = datanodes.get(0);
|
||||||
|
String slowDiskPath = "test/data1/slowVolume";
|
||||||
|
datanode.getDiskMetrics().addSlowDiskForTesting(slowDiskPath, null);
|
||||||
|
|
||||||
|
NameNode nn = cluster.getNameNode();
|
||||||
|
DatanodeManager datanodeManager = nn.getNamesystem().getBlockManager()
|
||||||
|
.getDatanodeManager();
|
||||||
|
|
||||||
|
MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
|
||||||
|
ObjectName mxbeanName = new ObjectName(
|
||||||
|
"Hadoop:service=NameNode,name=NameNodeStatus");
|
||||||
|
|
||||||
|
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||||
|
@Override
|
||||||
|
public Boolean get() {
|
||||||
|
return (datanodeManager.getSlowDisksReport() != null);
|
||||||
|
}
|
||||||
|
}, 1000, 100000);
|
||||||
|
|
||||||
|
String slowDisksReport = (String)mbs.getAttribute(
|
||||||
|
mxbeanName, "SlowDisksReport");
|
||||||
|
Assert.assertEquals(datanodeManager.getSlowDisksReport(),
|
||||||
|
slowDisksReport);
|
||||||
|
Assert.assertTrue(slowDisksReport.contains(slowDiskPath));
|
||||||
} finally {
|
} finally {
|
||||||
if (cluster != null) {
|
if (cluster != null) {
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
|
|
Loading…
Reference in New Issue