diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java index 81ce439e14b..3a77742adb2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java @@ -141,7 +141,7 @@ public static RollingUpgradeAction fromString(String s) { // type of the datanode report public enum DatanodeReportType { - ALL, LIVE, DEAD, DECOMMISSIONING, ENTERING_MAINTENANCE + ALL, LIVE, DEAD, DECOMMISSIONING, ENTERING_MAINTENANCE, IN_MAINTENANCE } /* Hidden constructor */ diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java index 29335e27880..9672bb32e2f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java @@ -1563,6 +1563,10 @@ public static DatanodeReportTypeProto convert(DatanodeReportType t) { case LIVE: return DatanodeReportTypeProto.LIVE; case DEAD: return DatanodeReportTypeProto.DEAD; case DECOMMISSIONING: return DatanodeReportTypeProto.DECOMMISSIONING; + case ENTERING_MAINTENANCE: + return DatanodeReportTypeProto.ENTERING_MAINTENANCE; + case IN_MAINTENANCE: + return DatanodeReportTypeProto.IN_MAINTENANCE; default: throw new IllegalArgumentException("Unexpected data type report:" + t); } @@ -1932,6 +1936,10 @@ public static DatanodeReportType convert(DatanodeReportTypeProto t) { case LIVE: return DatanodeReportType.LIVE; case DEAD: return DatanodeReportType.DEAD; case DECOMMISSIONING: return DatanodeReportType.DECOMMISSIONING; + case ENTERING_MAINTENANCE: + return DatanodeReportType.ENTERING_MAINTENANCE; + case IN_MAINTENANCE: + return DatanodeReportType.IN_MAINTENANCE; default: throw new IllegalArgumentException("Unexpected data type report:" + t); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto index 83fb296d075..18e430507a6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto @@ -327,6 +327,8 @@ enum DatanodeReportTypeProto { // type of the datanode report LIVE = 2; DEAD = 3; DECOMMISSIONING = 4; + ENTERING_MAINTENANCE = 5; + IN_MAINTENANCE = 6; } message GetDatanodeReportRequestProto { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index 5eb6760cdbb..bd9f4204499 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -1354,6 +1354,9 @@ public List getDatanodeListForReport( final boolean listEnteringMaintenanceNodes = type == DatanodeReportType.ALL || type == DatanodeReportType.ENTERING_MAINTENANCE; + final boolean listInMaintenanceNodes = + type == DatanodeReportType.ALL || + type == DatanodeReportType.IN_MAINTENANCE; ArrayList nodes; final HostSet foundNodes = new HostSet(); @@ -1366,11 +1369,13 @@ public List getDatanodeListForReport( final boolean isDead = isDatanodeDead(dn); final boolean isDecommissioning = dn.isDecommissionInProgress(); final boolean isEnteringMaintenance = dn.isEnteringMaintenance(); + final boolean isInMaintenance = dn.isInMaintenance(); if (((listLiveNodes && !isDead) || (listDeadNodes && isDead) || (listDecommissioningNodes && isDecommissioning) || - (listEnteringMaintenanceNodes && isEnteringMaintenance)) && + (listEnteringMaintenanceNodes && isEnteringMaintenance) || + (listInMaintenanceNodes && isInMaintenance)) && hostConfigManager.isIncluded(dn)) { nodes.add(dn); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java index 72a76f1c088..6a84ad51a1d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java @@ -421,7 +421,8 @@ static int run(DistributedFileSystem dfs, String[] argv, int idx) throws IOExcep * "hdfs dfsadmin" */ private static final String commonUsageSummary = - "\t[-report [-live] [-dead] [-decommissioning]]\n" + + "\t[-report [-live] [-dead] [-decommissioning] " + + "[-enteringmaintenance] [-inmaintenance]]\n" + "\t[-safemode ]\n" + "\t[-saveNamespace]\n" + "\t[-rollEdits]\n" + @@ -544,48 +545,51 @@ public void report(String[] argv, int i) throws IOException { final boolean listDead = StringUtils.popOption("-dead", args); final boolean listDecommissioning = StringUtils.popOption("-decommissioning", args); + final boolean listEnteringMaintenance = + StringUtils.popOption("-enteringmaintenance", args); + final boolean listInMaintenance = + StringUtils.popOption("-inmaintenance", args); + // If no filter flags are found, then list all DN types - boolean listAll = (!listLive && !listDead && !listDecommissioning); + boolean listAll = (!listLive && !listDead && !listDecommissioning + && !listEnteringMaintenance && !listInMaintenance); if (listAll || listLive) { - DatanodeInfo[] live = dfs.getDataNodeStats(DatanodeReportType.LIVE); - if (live.length > 0 || listLive) { - System.out.println("Live datanodes (" + live.length + "):\n"); - } - if (live.length > 0) { - for (DatanodeInfo dn : live) { - System.out.println(dn.getDatanodeReport()); - System.out.println(); - } - } + printDataNodeReports(dfs, DatanodeReportType.LIVE, listLive, "Live"); } if (listAll || listDead) { - DatanodeInfo[] dead = dfs.getDataNodeStats(DatanodeReportType.DEAD); - if (dead.length > 0 || listDead) { - System.out.println("Dead datanodes (" + dead.length + "):\n"); - } - if (dead.length > 0) { - for (DatanodeInfo dn : dead) { - System.out.println(dn.getDatanodeReport()); - System.out.println(); - } - } + printDataNodeReports(dfs, DatanodeReportType.DEAD, listDead, "Dead"); } if (listAll || listDecommissioning) { - DatanodeInfo[] decom = - dfs.getDataNodeStats(DatanodeReportType.DECOMMISSIONING); - if (decom.length > 0 || listDecommissioning) { - System.out.println("Decommissioning datanodes (" + decom.length - + "):\n"); - } - if (decom.length > 0) { - for (DatanodeInfo dn : decom) { - System.out.println(dn.getDatanodeReport()); - System.out.println(); - } + printDataNodeReports(dfs, DatanodeReportType.DECOMMISSIONING, + listDecommissioning, "Decommissioning"); + } + + if (listAll || listEnteringMaintenance) { + printDataNodeReports(dfs, DatanodeReportType.ENTERING_MAINTENANCE, + listEnteringMaintenance, "Entering maintenance"); + } + + if (listAll || listInMaintenance) { + printDataNodeReports(dfs, DatanodeReportType.IN_MAINTENANCE, + listInMaintenance, "In maintenance"); + } + } + + private static void printDataNodeReports(DistributedFileSystem dfs, + DatanodeReportType type, boolean listNodes, String nodeState) + throws IOException { + DatanodeInfo[] nodes = dfs.getDataNodeStats(type); + if (nodes.length > 0 || listNodes) { + System.out.println(nodeState + " datanodes (" + nodes.length + "):\n"); + } + if (nodes.length > 0) { + for (DatanodeInfo dn : nodes) { + System.out.println(dn.getDatanodeReport()); + System.out.println(); } } } @@ -961,12 +965,13 @@ private void printHelp(String cmd) { "hdfs dfsadmin\n" + commonUsageSummary; - String report ="-report [-live] [-dead] [-decommissioning]:\n" + - "\tReports basic filesystem information and statistics. \n" + - "\tThe dfs usage can be different from \"du\" usage, because it\n" + - "\tmeasures raw space used by replication, checksums, snapshots\n" + - "\tand etc. on all the DNs.\n" + - "\tOptional flags may be used to filter the list of displayed DNs.\n"; + String report ="-report [-live] [-dead] [-decommissioning] " + + "[-enteringmaintenance] [-inmaintenance]:\n" + + "\tReports basic filesystem information and statistics. \n" + + "\tThe dfs usage can be different from \"du\" usage, because it\n" + + "\tmeasures raw space used by replication, checksums, snapshots\n" + + "\tand etc. on all the DNs.\n" + + "\tOptional flags may be used to filter the list of displayed DNs.\n"; String safemode = "-safemode : Safe mode " + "maintenance command.\n" + @@ -1749,7 +1754,8 @@ public int genericRefresh(String[] argv, int i) throws IOException { private static void printUsage(String cmd) { if ("-report".equals(cmd)) { System.err.println("Usage: hdfs dfsadmin" - + " [-report] [-live] [-dead] [-decommissioning]"); + + " [-report] [-live] [-dead] [-decommissioning]" + + " [-enteringmaintenance] [-inmaintenance]"); } else if ("-safemode".equals(cmd)) { System.err.println("Usage: hdfs dfsadmin" + " [-safemode enter | leave | get | wait | forceExit]"); @@ -1888,7 +1894,7 @@ public int run(String[] argv) throws Exception { return exitCode; } } else if ("-report".equals(cmd)) { - if (argv.length < 1) { + if (argv.length > 6) { printUsage(cmd); return exitCode; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md index fd51924714f..7cffdb6516a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSCommands.md @@ -324,7 +324,7 @@ Runs a HDFS datanode. Usage: - hdfs dfsadmin [-report [-live] [-dead] [-decommissioning]] + hdfs dfsadmin [-report [-live] [-dead] [-decommissioning] [-enteringmaintenance] [-inmaintenance]] hdfs dfsadmin [-safemode enter | leave | get | wait | forceExit] hdfs dfsadmin [-saveNamespace] hdfs dfsadmin [-rollEdits] @@ -360,7 +360,7 @@ Usage: | COMMAND\_OPTION | Description | |:---- |:---- | -| `-report` `[-live]` `[-dead]` `[-decommissioning]` | Reports basic filesystem information and statistics, The dfs usage can be different from "du" usage, because it measures raw space used by replication, checksums, snapshots and etc. on all the DNs. Optional flags may be used to filter the list of displayed DataNodes. | +| `-report` `[-live]` `[-dead]` `[-decommissioning]` `[-enteringmaintenance]` `[-inmaintenance]` | Reports basic filesystem information and statistics, The dfs usage can be different from "du" usage, because it measures raw space used by replication, checksums, snapshots and etc. on all the DNs. Optional flags may be used to filter the list of displayed DataNodes. | | `-safemode` enter\|leave\|get\|wait\|forceExit | Safe mode maintenance command. Safe mode is a Namenode state in which it
1. does not accept changes to the name space (read-only)
2. does not replicate or delete blocks.
Safe mode is entered automatically at Namenode startup, and leaves safe mode automatically when the configured minimum percentage of blocks satisfies the minimum replication condition. If Namenode detects any anomaly then it will linger in safe mode till that issue is resolved. If that anomaly is the consequence of a deliberate action, then administrator can use -safemode forceExit to exit safe mode. The cases where forceExit may be required are
1. Namenode metadata is not consistent. If Namenode detects that metadata has been modified out of band and can cause data loss, then Namenode will enter forceExit state. At that point user can either restart Namenode with correct metadata files or forceExit (if data loss is acceptable).
2. Rollback causes metadata to be replaced and rarely it can trigger safe mode forceExit state in Namenode. In that case you may proceed by issuing -safemode forceExit.
Safe mode can also be entered manually, but then it can only be turned off manually as well. | | `-saveNamespace` | Save current namespace into storage directories and reset edits log. Requires safe mode. | | `-rollEdits` | Rolls the edit log on the active NameNode. | diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMaintenanceState.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMaintenanceState.java index b49fba0c572..e0dfb4a5151 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMaintenanceState.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMaintenanceState.java @@ -17,11 +17,18 @@ */ package org.apache.hadoop.hdfs; +import static org.hamcrest.CoreMatchers.allOf; +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.CoreMatchers.not; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.PrintStream; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -29,6 +36,7 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -43,8 +51,10 @@ import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; +import org.apache.hadoop.hdfs.tools.DFSAdmin; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.Time; +import org.apache.hadoop.util.ToolRunner; import org.junit.Assert; import org.junit.Test; import org.slf4j.Logger; @@ -1124,4 +1134,88 @@ static private DatanodeInfo[] getFirstBlockReplicasDatanodeInfos( return null; } } + + @Test(timeout = 120000) + public void testReportMaintenanceNodes() throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + ByteArrayOutputStream err = new ByteArrayOutputStream(); + System.setOut(new PrintStream(out)); + System.setErr(new PrintStream(err)); + + LOG.info("Starting testReportMaintenanceNodes"); + int expirationInMs = 30 * 1000; + int numNodes = 2; + setMinMaintenanceR(numNodes); + + startCluster(1, numNodes); + getCluster().waitActive(); + + FileSystem fileSys = getCluster().getFileSystem(0); + getConf().set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, + fileSys.getUri().toString()); + DFSAdmin dfsAdmin = new DFSAdmin(getConf()); + + FSNamesystem fsn = getCluster().getNameNode().getNamesystem(); + assertEquals(numNodes, fsn.getNumLiveDataNodes()); + + int ret = ToolRunner.run(dfsAdmin, + new String[] {"-report", "-enteringmaintenance", "-inmaintenance"}); + assertEquals(0, ret); + assertThat(out.toString(), + is(allOf(containsString("Entering maintenance datanodes (0):"), + containsString("In maintenance datanodes (0):"), + not(containsString( + getCluster().getDataNodes().get(0).getDisplayName())), + not(containsString( + getCluster().getDataNodes().get(1).getDisplayName()))))); + + final Path file = new Path("/testReportMaintenanceNodes.dat"); + writeFile(fileSys, file, numNodes, 1); + + DatanodeInfo[] nodes = getFirstBlockReplicasDatanodeInfos(fileSys, file); + // Request maintenance for DataNodes1. The DataNode1 will not transition + // to the next state AdminStates.IN_MAINTENANCE immediately since there + // are not enough candidate nodes to satisfy the min maintenance + // replication. + DatanodeInfo maintenanceDN = takeNodeOutofService(0, + nodes[0].getDatanodeUuid(), Time.now() + expirationInMs, null, null, + AdminStates.ENTERING_MAINTENANCE); + assertEquals(1, fsn.getNumEnteringMaintenanceDataNodes()); + + // reset stream + out.reset(); + err.reset(); + + ret = ToolRunner.run(dfsAdmin, + new String[] {"-report", "-enteringmaintenance"}); + assertEquals(0, ret); + assertThat(out.toString(), + is(allOf(containsString("Entering maintenance datanodes (1):"), + containsString(nodes[0].getXferAddr()), + not(containsString(nodes[1].getXferAddr()))))); + + // reset stream + out.reset(); + err.reset(); + + // start a new datanode to make state transition to + // AdminStates.IN_MAINTENANCE + getCluster().startDataNodes(getConf(), 1, true, null, null); + getCluster().waitActive(); + + waitNodeState(maintenanceDN, AdminStates.IN_MAINTENANCE); + assertEquals(1, fsn.getNumInMaintenanceLiveDataNodes()); + + ret = ToolRunner.run(dfsAdmin, + new String[] {"-report", "-inmaintenance"}); + assertEquals(0, ret); + assertThat(out.toString(), + is(allOf(containsString("In maintenance datanodes (1):"), + containsString(nodes[0].getXferAddr()), + not(containsString(nodes[1].getXferAddr())), + not(containsString( + getCluster().getDataNodes().get(2).getDisplayName()))))); + + cleanupFile(getCluster().getFileSystem(), file); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml index d1a66df8d6d..1f1b91ebe77 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml @@ -15665,7 +15665,7 @@ RegexpComparator - ^-report \[-live\] \[-dead\] \[-decommissioning\]:(.)* + ^-report \[-live\] \[-dead\] \[-decommissioning\] \[-enteringmaintenance\] \[-inmaintenance\]:(.)* RegexpComparator