HDFS-11359. DFSAdmin report command supports displaying maintenance state datanodes. Contributed by Yiqun Lin.

This commit is contained in:
Yiqun Lin 2017-06-02 12:48:30 +08:00 committed by Xiaoyu Yao
parent d48f2f6839
commit 60a7f57b61
8 changed files with 161 additions and 46 deletions

View File

@ -141,7 +141,7 @@ public final class HdfsConstants {
// type of the datanode report // type of the datanode report
public enum DatanodeReportType { public enum DatanodeReportType {
ALL, LIVE, DEAD, DECOMMISSIONING, ENTERING_MAINTENANCE ALL, LIVE, DEAD, DECOMMISSIONING, ENTERING_MAINTENANCE, IN_MAINTENANCE
} }
/* Hidden constructor */ /* Hidden constructor */

View File

@ -1725,6 +1725,10 @@ public class PBHelperClient {
case LIVE: return DatanodeReportTypeProto.LIVE; case LIVE: return DatanodeReportTypeProto.LIVE;
case DEAD: return DatanodeReportTypeProto.DEAD; case DEAD: return DatanodeReportTypeProto.DEAD;
case DECOMMISSIONING: return DatanodeReportTypeProto.DECOMMISSIONING; case DECOMMISSIONING: return DatanodeReportTypeProto.DECOMMISSIONING;
case ENTERING_MAINTENANCE:
return DatanodeReportTypeProto.ENTERING_MAINTENANCE;
case IN_MAINTENANCE:
return DatanodeReportTypeProto.IN_MAINTENANCE;
default: default:
throw new IllegalArgumentException("Unexpected data type report:" + t); throw new IllegalArgumentException("Unexpected data type report:" + t);
} }
@ -2128,6 +2132,10 @@ public class PBHelperClient {
case LIVE: return DatanodeReportType.LIVE; case LIVE: return DatanodeReportType.LIVE;
case DEAD: return DatanodeReportType.DEAD; case DEAD: return DatanodeReportType.DEAD;
case DECOMMISSIONING: return DatanodeReportType.DECOMMISSIONING; case DECOMMISSIONING: return DatanodeReportType.DECOMMISSIONING;
case ENTERING_MAINTENANCE:
return DatanodeReportType.ENTERING_MAINTENANCE;
case IN_MAINTENANCE:
return DatanodeReportType.IN_MAINTENANCE;
default: default:
throw new IllegalArgumentException("Unexpected data type report:" + t); throw new IllegalArgumentException("Unexpected data type report:" + t);
} }

View File

@ -332,6 +332,8 @@ enum DatanodeReportTypeProto { // type of the datanode report
LIVE = 2; LIVE = 2;
DEAD = 3; DEAD = 3;
DECOMMISSIONING = 4; DECOMMISSIONING = 4;
ENTERING_MAINTENANCE = 5;
IN_MAINTENANCE = 6;
} }
message GetDatanodeReportRequestProto { message GetDatanodeReportRequestProto {

View File

@ -1441,6 +1441,9 @@ public class DatanodeManager {
final boolean listEnteringMaintenanceNodes = final boolean listEnteringMaintenanceNodes =
type == DatanodeReportType.ALL || type == DatanodeReportType.ALL ||
type == DatanodeReportType.ENTERING_MAINTENANCE; type == DatanodeReportType.ENTERING_MAINTENANCE;
final boolean listInMaintenanceNodes =
type == DatanodeReportType.ALL ||
type == DatanodeReportType.IN_MAINTENANCE;
ArrayList<DatanodeDescriptor> nodes; ArrayList<DatanodeDescriptor> nodes;
final HostSet foundNodes = new HostSet(); final HostSet foundNodes = new HostSet();
@ -1453,11 +1456,13 @@ public class DatanodeManager {
final boolean isDead = isDatanodeDead(dn); final boolean isDead = isDatanodeDead(dn);
final boolean isDecommissioning = dn.isDecommissionInProgress(); final boolean isDecommissioning = dn.isDecommissionInProgress();
final boolean isEnteringMaintenance = dn.isEnteringMaintenance(); final boolean isEnteringMaintenance = dn.isEnteringMaintenance();
final boolean isInMaintenance = dn.isInMaintenance();
if (((listLiveNodes && !isDead) || if (((listLiveNodes && !isDead) ||
(listDeadNodes && isDead) || (listDeadNodes && isDead) ||
(listDecommissioningNodes && isDecommissioning) || (listDecommissioningNodes && isDecommissioning) ||
(listEnteringMaintenanceNodes && isEnteringMaintenance)) && (listEnteringMaintenanceNodes && isEnteringMaintenance) ||
(listInMaintenanceNodes && isInMaintenance)) &&
hostConfigManager.isIncluded(dn)) { hostConfigManager.isIncluded(dn)) {
nodes.add(dn); nodes.add(dn);
} }

View File

@ -421,7 +421,8 @@ public class DFSAdmin extends FsShell {
* "hdfs dfsadmin" * "hdfs dfsadmin"
*/ */
private static final String commonUsageSummary = private static final String commonUsageSummary =
"\t[-report [-live] [-dead] [-decommissioning]]\n" + "\t[-report [-live] [-dead] [-decommissioning] " +
"[-enteringmaintenance] [-inmaintenance]]\n" +
"\t[-safemode <enter | leave | get | wait>]\n" + "\t[-safemode <enter | leave | get | wait>]\n" +
"\t[-saveNamespace [-beforeShutdown]]\n" + "\t[-saveNamespace [-beforeShutdown]]\n" +
"\t[-rollEdits]\n" + "\t[-rollEdits]\n" +
@ -544,48 +545,51 @@ public class DFSAdmin extends FsShell {
final boolean listDead = StringUtils.popOption("-dead", args); final boolean listDead = StringUtils.popOption("-dead", args);
final boolean listDecommissioning = final boolean listDecommissioning =
StringUtils.popOption("-decommissioning", args); StringUtils.popOption("-decommissioning", args);
final boolean listEnteringMaintenance =
StringUtils.popOption("-enteringmaintenance", args);
final boolean listInMaintenance =
StringUtils.popOption("-inmaintenance", args);
// If no filter flags are found, then list all DN types // If no filter flags are found, then list all DN types
boolean listAll = (!listLive && !listDead && !listDecommissioning); boolean listAll = (!listLive && !listDead && !listDecommissioning
&& !listEnteringMaintenance && !listInMaintenance);
if (listAll || listLive) { if (listAll || listLive) {
DatanodeInfo[] live = dfs.getDataNodeStats(DatanodeReportType.LIVE); printDataNodeReports(dfs, DatanodeReportType.LIVE, listLive, "Live");
if (live.length > 0 || listLive) {
System.out.println("Live datanodes (" + live.length + "):\n");
}
if (live.length > 0) {
for (DatanodeInfo dn : live) {
System.out.println(dn.getDatanodeReport());
System.out.println();
}
}
} }
if (listAll || listDead) { if (listAll || listDead) {
DatanodeInfo[] dead = dfs.getDataNodeStats(DatanodeReportType.DEAD); printDataNodeReports(dfs, DatanodeReportType.DEAD, listDead, "Dead");
if (dead.length > 0 || listDead) {
System.out.println("Dead datanodes (" + dead.length + "):\n");
}
if (dead.length > 0) {
for (DatanodeInfo dn : dead) {
System.out.println(dn.getDatanodeReport());
System.out.println();
}
}
} }
if (listAll || listDecommissioning) { if (listAll || listDecommissioning) {
DatanodeInfo[] decom = printDataNodeReports(dfs, DatanodeReportType.DECOMMISSIONING,
dfs.getDataNodeStats(DatanodeReportType.DECOMMISSIONING); listDecommissioning, "Decommissioning");
if (decom.length > 0 || listDecommissioning) { }
System.out.println("Decommissioning datanodes (" + decom.length
+ "):\n"); if (listAll || listEnteringMaintenance) {
} printDataNodeReports(dfs, DatanodeReportType.ENTERING_MAINTENANCE,
if (decom.length > 0) { listEnteringMaintenance, "Entering maintenance");
for (DatanodeInfo dn : decom) { }
System.out.println(dn.getDatanodeReport());
System.out.println(); if (listAll || listInMaintenance) {
} printDataNodeReports(dfs, DatanodeReportType.IN_MAINTENANCE,
listInMaintenance, "In maintenance");
}
}
private static void printDataNodeReports(DistributedFileSystem dfs,
DatanodeReportType type, boolean listNodes, String nodeState)
throws IOException {
DatanodeInfo[] nodes = dfs.getDataNodeStats(type);
if (nodes.length > 0 || listNodes) {
System.out.println(nodeState + " datanodes (" + nodes.length + "):\n");
}
if (nodes.length > 0) {
for (DatanodeInfo dn : nodes) {
System.out.println(dn.getDatanodeReport());
System.out.println();
} }
} }
} }
@ -986,12 +990,13 @@ public class DFSAdmin extends FsShell {
"hdfs dfsadmin\n" + "hdfs dfsadmin\n" +
commonUsageSummary; commonUsageSummary;
String report ="-report [-live] [-dead] [-decommissioning]:\n" + String report ="-report [-live] [-dead] [-decommissioning] "
"\tReports basic filesystem information and statistics. \n" + + "[-enteringmaintenance] [-inmaintenance]:\n" +
"\tThe dfs usage can be different from \"du\" usage, because it\n" + "\tReports basic filesystem information and statistics. \n" +
"\tmeasures raw space used by replication, checksums, snapshots\n" + "\tThe dfs usage can be different from \"du\" usage, because it\n" +
"\tand etc. on all the DNs.\n" + "\tmeasures raw space used by replication, checksums, snapshots\n" +
"\tOptional flags may be used to filter the list of displayed DNs.\n"; "\tand etc. on all the DNs.\n" +
"\tOptional flags may be used to filter the list of displayed DNs.\n";
String safemode = "-safemode <enter|leave|get|wait|forceExit>: Safe mode " + String safemode = "-safemode <enter|leave|get|wait|forceExit>: Safe mode " +
"maintenance command.\n" + "maintenance command.\n" +
@ -1779,7 +1784,8 @@ public class DFSAdmin extends FsShell {
private static void printUsage(String cmd) { private static void printUsage(String cmd) {
if ("-report".equals(cmd)) { if ("-report".equals(cmd)) {
System.err.println("Usage: hdfs dfsadmin" System.err.println("Usage: hdfs dfsadmin"
+ " [-report] [-live] [-dead] [-decommissioning]"); + " [-report] [-live] [-dead] [-decommissioning]"
+ " [-enteringmaintenance] [-inmaintenance]");
} else if ("-safemode".equals(cmd)) { } else if ("-safemode".equals(cmd)) {
System.err.println("Usage: hdfs dfsadmin" System.err.println("Usage: hdfs dfsadmin"
+ " [-safemode enter | leave | get | wait | forceExit]"); + " [-safemode enter | leave | get | wait | forceExit]");
@ -1917,7 +1923,7 @@ public class DFSAdmin extends FsShell {
return exitCode; return exitCode;
} }
} else if ("-report".equals(cmd)) { } else if ("-report".equals(cmd)) {
if (argv.length > 4) { if (argv.length > 6) {
printUsage(cmd); printUsage(cmd);
return exitCode; return exitCode;
} }

View File

@ -338,7 +338,7 @@ Runs a HDFS datanode.
Usage: Usage:
hdfs dfsadmin [-report [-live] [-dead] [-decommissioning]] hdfs dfsadmin [-report [-live] [-dead] [-decommissioning] [-enteringmaintenance] [-inmaintenance]]
hdfs dfsadmin [-safemode enter | leave | get | wait | forceExit] hdfs dfsadmin [-safemode enter | leave | get | wait | forceExit]
hdfs dfsadmin [-saveNamespace [-beforeShutdown]] hdfs dfsadmin [-saveNamespace [-beforeShutdown]]
hdfs dfsadmin [-rollEdits] hdfs dfsadmin [-rollEdits]
@ -374,7 +374,7 @@ Usage:
| COMMAND\_OPTION | Description | | COMMAND\_OPTION | Description |
|:---- |:---- | |:---- |:---- |
| `-report` `[-live]` `[-dead]` `[-decommissioning]` | Reports basic filesystem information and statistics, The dfs usage can be different from "du" usage, because it measures raw space used by replication, checksums, snapshots and etc. on all the DNs. Optional flags may be used to filter the list of displayed DataNodes. | | `-report` `[-live]` `[-dead]` `[-decommissioning]` `[-enteringmaintenance]` `[-inmaintenance]` | Reports basic filesystem information and statistics, The dfs usage can be different from "du" usage, because it measures raw space used by replication, checksums, snapshots and etc. on all the DNs. Optional flags may be used to filter the list of displayed DataNodes. |
| `-safemode` enter\|leave\|get\|wait\|forceExit | Safe mode maintenance command. Safe mode is a Namenode state in which it <br/>1. does not accept changes to the name space (read-only) <br/>2. does not replicate or delete blocks. <br/>Safe mode is entered automatically at Namenode startup, and leaves safe mode automatically when the configured minimum percentage of blocks satisfies the minimum replication condition. If Namenode detects any anomaly then it will linger in safe mode till that issue is resolved. If that anomaly is the consequence of a deliberate action, then administrator can use -safemode forceExit to exit safe mode. The cases where forceExit may be required are<br/> 1. Namenode metadata is not consistent. If Namenode detects that metadata has been modified out of band and can cause data loss, then Namenode will enter forceExit state. At that point user can either restart Namenode with correct metadata files or forceExit (if data loss is acceptable).<br/>2. Rollback causes metadata to be replaced and rarely it can trigger safe mode forceExit state in Namenode. In that case you may proceed by issuing -safemode forceExit.<br/> Safe mode can also be entered manually, but then it can only be turned off manually as well. | | `-safemode` enter\|leave\|get\|wait\|forceExit | Safe mode maintenance command. Safe mode is a Namenode state in which it <br/>1. does not accept changes to the name space (read-only) <br/>2. does not replicate or delete blocks. <br/>Safe mode is entered automatically at Namenode startup, and leaves safe mode automatically when the configured minimum percentage of blocks satisfies the minimum replication condition. If Namenode detects any anomaly then it will linger in safe mode till that issue is resolved. If that anomaly is the consequence of a deliberate action, then administrator can use -safemode forceExit to exit safe mode. The cases where forceExit may be required are<br/> 1. Namenode metadata is not consistent. If Namenode detects that metadata has been modified out of band and can cause data loss, then Namenode will enter forceExit state. At that point user can either restart Namenode with correct metadata files or forceExit (if data loss is acceptable).<br/>2. Rollback causes metadata to be replaced and rarely it can trigger safe mode forceExit state in Namenode. In that case you may proceed by issuing -safemode forceExit.<br/> Safe mode can also be entered manually, but then it can only be turned off manually as well. |
| `-saveNamespace` `[-beforeShutdown]` | Save current namespace into storage directories and reset edits log. Requires safe mode. If the "beforeShutdown" option is given, the NameNode does a checkpoint if and only if no checkpoint has been done during a time window (a configurable number of checkpoint periods). This is usually used before shutting down the NameNode to prevent potential fsimage/editlog corruption. | | `-saveNamespace` `[-beforeShutdown]` | Save current namespace into storage directories and reset edits log. Requires safe mode. If the "beforeShutdown" option is given, the NameNode does a checkpoint if and only if no checkpoint has been done during a time window (a configurable number of checkpoint periods). This is usually used before shutting down the NameNode to prevent potential fsimage/editlog corruption. |
| `-rollEdits` | Rolls the edit log on the active NameNode. | | `-rollEdits` | Rolls the edit log on the active NameNode. |

View File

@ -17,11 +17,18 @@
*/ */
package org.apache.hadoop.hdfs; package org.apache.hadoop.hdfs;
import static org.hamcrest.CoreMatchers.allOf;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.CoreMatchers.not;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail; import static org.junit.Assert.fail;
import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
@ -29,6 +36,7 @@ import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
@ -43,8 +51,10 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.hdfs.tools.DFSAdmin;
import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Time;
import org.apache.hadoop.util.ToolRunner;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -1124,4 +1134,88 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
return null; return null;
} }
} }
@Test(timeout = 120000)
public void testReportMaintenanceNodes() throws Exception {
ByteArrayOutputStream out = new ByteArrayOutputStream();
ByteArrayOutputStream err = new ByteArrayOutputStream();
System.setOut(new PrintStream(out));
System.setErr(new PrintStream(err));
LOG.info("Starting testReportMaintenanceNodes");
int expirationInMs = 30 * 1000;
int numNodes = 2;
setMinMaintenanceR(numNodes);
startCluster(1, numNodes);
getCluster().waitActive();
FileSystem fileSys = getCluster().getFileSystem(0);
getConf().set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY,
fileSys.getUri().toString());
DFSAdmin dfsAdmin = new DFSAdmin(getConf());
FSNamesystem fsn = getCluster().getNameNode().getNamesystem();
assertEquals(numNodes, fsn.getNumLiveDataNodes());
int ret = ToolRunner.run(dfsAdmin,
new String[] {"-report", "-enteringmaintenance", "-inmaintenance"});
assertEquals(0, ret);
assertThat(out.toString(),
is(allOf(containsString("Entering maintenance datanodes (0):"),
containsString("In maintenance datanodes (0):"),
not(containsString(
getCluster().getDataNodes().get(0).getDisplayName())),
not(containsString(
getCluster().getDataNodes().get(1).getDisplayName())))));
final Path file = new Path("/testReportMaintenanceNodes.dat");
writeFile(fileSys, file, numNodes, 1);
DatanodeInfo[] nodes = getFirstBlockReplicasDatanodeInfos(fileSys, file);
// Request maintenance for DataNodes1. The DataNode1 will not transition
// to the next state AdminStates.IN_MAINTENANCE immediately since there
// are not enough candidate nodes to satisfy the min maintenance
// replication.
DatanodeInfo maintenanceDN = takeNodeOutofService(0,
nodes[0].getDatanodeUuid(), Time.now() + expirationInMs, null, null,
AdminStates.ENTERING_MAINTENANCE);
assertEquals(1, fsn.getNumEnteringMaintenanceDataNodes());
// reset stream
out.reset();
err.reset();
ret = ToolRunner.run(dfsAdmin,
new String[] {"-report", "-enteringmaintenance"});
assertEquals(0, ret);
assertThat(out.toString(),
is(allOf(containsString("Entering maintenance datanodes (1):"),
containsString(nodes[0].getXferAddr()),
not(containsString(nodes[1].getXferAddr())))));
// reset stream
out.reset();
err.reset();
// start a new datanode to make state transition to
// AdminStates.IN_MAINTENANCE
getCluster().startDataNodes(getConf(), 1, true, null, null);
getCluster().waitActive();
waitNodeState(maintenanceDN, AdminStates.IN_MAINTENANCE);
assertEquals(1, fsn.getNumInMaintenanceLiveDataNodes());
ret = ToolRunner.run(dfsAdmin,
new String[] {"-report", "-inmaintenance"});
assertEquals(0, ret);
assertThat(out.toString(),
is(allOf(containsString("In maintenance datanodes (1):"),
containsString(nodes[0].getXferAddr()),
not(containsString(nodes[1].getXferAddr())),
not(containsString(
getCluster().getDataNodes().get(2).getDisplayName())))));
cleanupFile(getCluster().getFileSystem(), file);
}
} }

View File

@ -15665,7 +15665,7 @@
<comparators> <comparators>
<comparator> <comparator>
<type>RegexpComparator</type> <type>RegexpComparator</type>
<expected-output>^-report \[-live\] \[-dead\] \[-decommissioning\]:(.)*</expected-output> <expected-output>^-report \[-live\] \[-dead\] \[-decommissioning\] \[-enteringmaintenance\] \[-inmaintenance\]:(.)*</expected-output>
</comparator> </comparator>
<comparator> <comparator>
<type>RegexpComparator</type> <type>RegexpComparator</type>