HDFS-11359. DFSAdmin report command supports displaying maintenance state datanodes. Contributed by Yiqun Lin.
This commit is contained in:
parent
d0a4e7e95d
commit
870b5e39d2
@ -141,7 +141,7 @@ public static RollingUpgradeAction fromString(String s) {
|
||||
|
||||
// type of the datanode report
|
||||
public enum DatanodeReportType {
|
||||
ALL, LIVE, DEAD, DECOMMISSIONING, ENTERING_MAINTENANCE
|
||||
ALL, LIVE, DEAD, DECOMMISSIONING, ENTERING_MAINTENANCE, IN_MAINTENANCE
|
||||
}
|
||||
|
||||
/* Hidden constructor */
|
||||
|
@ -1563,6 +1563,10 @@ public static DatanodeReportTypeProto convert(DatanodeReportType t) {
|
||||
case LIVE: return DatanodeReportTypeProto.LIVE;
|
||||
case DEAD: return DatanodeReportTypeProto.DEAD;
|
||||
case DECOMMISSIONING: return DatanodeReportTypeProto.DECOMMISSIONING;
|
||||
case ENTERING_MAINTENANCE:
|
||||
return DatanodeReportTypeProto.ENTERING_MAINTENANCE;
|
||||
case IN_MAINTENANCE:
|
||||
return DatanodeReportTypeProto.IN_MAINTENANCE;
|
||||
default:
|
||||
throw new IllegalArgumentException("Unexpected data type report:" + t);
|
||||
}
|
||||
@ -1932,6 +1936,10 @@ public static DatanodeReportType convert(DatanodeReportTypeProto t) {
|
||||
case LIVE: return DatanodeReportType.LIVE;
|
||||
case DEAD: return DatanodeReportType.DEAD;
|
||||
case DECOMMISSIONING: return DatanodeReportType.DECOMMISSIONING;
|
||||
case ENTERING_MAINTENANCE:
|
||||
return DatanodeReportType.ENTERING_MAINTENANCE;
|
||||
case IN_MAINTENANCE:
|
||||
return DatanodeReportType.IN_MAINTENANCE;
|
||||
default:
|
||||
throw new IllegalArgumentException("Unexpected data type report:" + t);
|
||||
}
|
||||
|
@ -327,6 +327,8 @@ enum DatanodeReportTypeProto { // type of the datanode report
|
||||
LIVE = 2;
|
||||
DEAD = 3;
|
||||
DECOMMISSIONING = 4;
|
||||
ENTERING_MAINTENANCE = 5;
|
||||
IN_MAINTENANCE = 6;
|
||||
}
|
||||
|
||||
message GetDatanodeReportRequestProto {
|
||||
|
@ -1354,6 +1354,9 @@ public List<DatanodeDescriptor> getDatanodeListForReport(
|
||||
final boolean listEnteringMaintenanceNodes =
|
||||
type == DatanodeReportType.ALL ||
|
||||
type == DatanodeReportType.ENTERING_MAINTENANCE;
|
||||
final boolean listInMaintenanceNodes =
|
||||
type == DatanodeReportType.ALL ||
|
||||
type == DatanodeReportType.IN_MAINTENANCE;
|
||||
|
||||
ArrayList<DatanodeDescriptor> nodes;
|
||||
final HostSet foundNodes = new HostSet();
|
||||
@ -1366,11 +1369,13 @@ public List<DatanodeDescriptor> getDatanodeListForReport(
|
||||
final boolean isDead = isDatanodeDead(dn);
|
||||
final boolean isDecommissioning = dn.isDecommissionInProgress();
|
||||
final boolean isEnteringMaintenance = dn.isEnteringMaintenance();
|
||||
final boolean isInMaintenance = dn.isInMaintenance();
|
||||
|
||||
if (((listLiveNodes && !isDead) ||
|
||||
(listDeadNodes && isDead) ||
|
||||
(listDecommissioningNodes && isDecommissioning) ||
|
||||
(listEnteringMaintenanceNodes && isEnteringMaintenance)) &&
|
||||
(listEnteringMaintenanceNodes && isEnteringMaintenance) ||
|
||||
(listInMaintenanceNodes && isInMaintenance)) &&
|
||||
hostConfigManager.isIncluded(dn)) {
|
||||
nodes.add(dn);
|
||||
}
|
||||
|
@ -421,7 +421,8 @@ static int run(DistributedFileSystem dfs, String[] argv, int idx) throws IOExcep
|
||||
* "hdfs dfsadmin"
|
||||
*/
|
||||
private static final String commonUsageSummary =
|
||||
"\t[-report [-live] [-dead] [-decommissioning]]\n" +
|
||||
"\t[-report [-live] [-dead] [-decommissioning] " +
|
||||
"[-enteringmaintenance] [-inmaintenance]]\n" +
|
||||
"\t[-safemode <enter | leave | get | wait>]\n" +
|
||||
"\t[-saveNamespace]\n" +
|
||||
"\t[-rollEdits]\n" +
|
||||
@ -544,48 +545,51 @@ public void report(String[] argv, int i) throws IOException {
|
||||
final boolean listDead = StringUtils.popOption("-dead", args);
|
||||
final boolean listDecommissioning =
|
||||
StringUtils.popOption("-decommissioning", args);
|
||||
final boolean listEnteringMaintenance =
|
||||
StringUtils.popOption("-enteringmaintenance", args);
|
||||
final boolean listInMaintenance =
|
||||
StringUtils.popOption("-inmaintenance", args);
|
||||
|
||||
|
||||
// If no filter flags are found, then list all DN types
|
||||
boolean listAll = (!listLive && !listDead && !listDecommissioning);
|
||||
boolean listAll = (!listLive && !listDead && !listDecommissioning
|
||||
&& !listEnteringMaintenance && !listInMaintenance);
|
||||
|
||||
if (listAll || listLive) {
|
||||
DatanodeInfo[] live = dfs.getDataNodeStats(DatanodeReportType.LIVE);
|
||||
if (live.length > 0 || listLive) {
|
||||
System.out.println("Live datanodes (" + live.length + "):\n");
|
||||
}
|
||||
if (live.length > 0) {
|
||||
for (DatanodeInfo dn : live) {
|
||||
System.out.println(dn.getDatanodeReport());
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
printDataNodeReports(dfs, DatanodeReportType.LIVE, listLive, "Live");
|
||||
}
|
||||
|
||||
if (listAll || listDead) {
|
||||
DatanodeInfo[] dead = dfs.getDataNodeStats(DatanodeReportType.DEAD);
|
||||
if (dead.length > 0 || listDead) {
|
||||
System.out.println("Dead datanodes (" + dead.length + "):\n");
|
||||
}
|
||||
if (dead.length > 0) {
|
||||
for (DatanodeInfo dn : dead) {
|
||||
System.out.println(dn.getDatanodeReport());
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
printDataNodeReports(dfs, DatanodeReportType.DEAD, listDead, "Dead");
|
||||
}
|
||||
|
||||
if (listAll || listDecommissioning) {
|
||||
DatanodeInfo[] decom =
|
||||
dfs.getDataNodeStats(DatanodeReportType.DECOMMISSIONING);
|
||||
if (decom.length > 0 || listDecommissioning) {
|
||||
System.out.println("Decommissioning datanodes (" + decom.length
|
||||
+ "):\n");
|
||||
}
|
||||
if (decom.length > 0) {
|
||||
for (DatanodeInfo dn : decom) {
|
||||
System.out.println(dn.getDatanodeReport());
|
||||
System.out.println();
|
||||
}
|
||||
printDataNodeReports(dfs, DatanodeReportType.DECOMMISSIONING,
|
||||
listDecommissioning, "Decommissioning");
|
||||
}
|
||||
|
||||
if (listAll || listEnteringMaintenance) {
|
||||
printDataNodeReports(dfs, DatanodeReportType.ENTERING_MAINTENANCE,
|
||||
listEnteringMaintenance, "Entering maintenance");
|
||||
}
|
||||
|
||||
if (listAll || listInMaintenance) {
|
||||
printDataNodeReports(dfs, DatanodeReportType.IN_MAINTENANCE,
|
||||
listInMaintenance, "In maintenance");
|
||||
}
|
||||
}
|
||||
|
||||
private static void printDataNodeReports(DistributedFileSystem dfs,
|
||||
DatanodeReportType type, boolean listNodes, String nodeState)
|
||||
throws IOException {
|
||||
DatanodeInfo[] nodes = dfs.getDataNodeStats(type);
|
||||
if (nodes.length > 0 || listNodes) {
|
||||
System.out.println(nodeState + " datanodes (" + nodes.length + "):\n");
|
||||
}
|
||||
if (nodes.length > 0) {
|
||||
for (DatanodeInfo dn : nodes) {
|
||||
System.out.println(dn.getDatanodeReport());
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -961,12 +965,13 @@ private void printHelp(String cmd) {
|
||||
"hdfs dfsadmin\n" +
|
||||
commonUsageSummary;
|
||||
|
||||
String report ="-report [-live] [-dead] [-decommissioning]:\n" +
|
||||
"\tReports basic filesystem information and statistics. \n" +
|
||||
"\tThe dfs usage can be different from \"du\" usage, because it\n" +
|
||||
"\tmeasures raw space used by replication, checksums, snapshots\n" +
|
||||
"\tand etc. on all the DNs.\n" +
|
||||
"\tOptional flags may be used to filter the list of displayed DNs.\n";
|
||||
String report ="-report [-live] [-dead] [-decommissioning] "
|
||||
+ "[-enteringmaintenance] [-inmaintenance]:\n" +
|
||||
"\tReports basic filesystem information and statistics. \n" +
|
||||
"\tThe dfs usage can be different from \"du\" usage, because it\n" +
|
||||
"\tmeasures raw space used by replication, checksums, snapshots\n" +
|
||||
"\tand etc. on all the DNs.\n" +
|
||||
"\tOptional flags may be used to filter the list of displayed DNs.\n";
|
||||
|
||||
String safemode = "-safemode <enter|leave|get|wait|forceExit>: Safe mode " +
|
||||
"maintenance command.\n" +
|
||||
@ -1749,7 +1754,8 @@ public int genericRefresh(String[] argv, int i) throws IOException {
|
||||
private static void printUsage(String cmd) {
|
||||
if ("-report".equals(cmd)) {
|
||||
System.err.println("Usage: hdfs dfsadmin"
|
||||
+ " [-report] [-live] [-dead] [-decommissioning]");
|
||||
+ " [-report] [-live] [-dead] [-decommissioning]"
|
||||
+ " [-enteringmaintenance] [-inmaintenance]");
|
||||
} else if ("-safemode".equals(cmd)) {
|
||||
System.err.println("Usage: hdfs dfsadmin"
|
||||
+ " [-safemode enter | leave | get | wait | forceExit]");
|
||||
@ -1888,7 +1894,7 @@ public int run(String[] argv) throws Exception {
|
||||
return exitCode;
|
||||
}
|
||||
} else if ("-report".equals(cmd)) {
|
||||
if (argv.length < 1) {
|
||||
if (argv.length > 6) {
|
||||
printUsage(cmd);
|
||||
return exitCode;
|
||||
}
|
||||
|
@ -324,7 +324,7 @@ Runs a HDFS datanode.
|
||||
|
||||
Usage:
|
||||
|
||||
hdfs dfsadmin [-report [-live] [-dead] [-decommissioning]]
|
||||
hdfs dfsadmin [-report [-live] [-dead] [-decommissioning] [-enteringmaintenance] [-inmaintenance]]
|
||||
hdfs dfsadmin [-safemode enter | leave | get | wait | forceExit]
|
||||
hdfs dfsadmin [-saveNamespace]
|
||||
hdfs dfsadmin [-rollEdits]
|
||||
@ -360,7 +360,7 @@ Usage:
|
||||
|
||||
| COMMAND\_OPTION | Description |
|
||||
|:---- |:---- |
|
||||
| `-report` `[-live]` `[-dead]` `[-decommissioning]` | Reports basic filesystem information and statistics, The dfs usage can be different from "du" usage, because it measures raw space used by replication, checksums, snapshots and etc. on all the DNs. Optional flags may be used to filter the list of displayed DataNodes. |
|
||||
| `-report` `[-live]` `[-dead]` `[-decommissioning]` `[-enteringmaintenance]` `[-inmaintenance]` | Reports basic filesystem information and statistics, The dfs usage can be different from "du" usage, because it measures raw space used by replication, checksums, snapshots and etc. on all the DNs. Optional flags may be used to filter the list of displayed DataNodes. |
|
||||
| `-safemode` enter\|leave\|get\|wait\|forceExit | Safe mode maintenance command. Safe mode is a Namenode state in which it <br/>1. does not accept changes to the name space (read-only) <br/>2. does not replicate or delete blocks. <br/>Safe mode is entered automatically at Namenode startup, and leaves safe mode automatically when the configured minimum percentage of blocks satisfies the minimum replication condition. If Namenode detects any anomaly then it will linger in safe mode till that issue is resolved. If that anomaly is the consequence of a deliberate action, then administrator can use -safemode forceExit to exit safe mode. The cases where forceExit may be required are<br/> 1. Namenode metadata is not consistent. If Namenode detects that metadata has been modified out of band and can cause data loss, then Namenode will enter forceExit state. At that point user can either restart Namenode with correct metadata files or forceExit (if data loss is acceptable).<br/>2. Rollback causes metadata to be replaced and rarely it can trigger safe mode forceExit state in Namenode. In that case you may proceed by issuing -safemode forceExit.<br/> Safe mode can also be entered manually, but then it can only be turned off manually as well. |
|
||||
| `-saveNamespace` | Save current namespace into storage directories and reset edits log. Requires safe mode. |
|
||||
| `-rollEdits` | Rolls the edit log on the active NameNode. |
|
||||
|
@ -17,11 +17,18 @@
|
||||
*/
|
||||
package org.apache.hadoop.hdfs;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.allOf;
|
||||
import static org.hamcrest.CoreMatchers.containsString;
|
||||
import static org.hamcrest.CoreMatchers.is;
|
||||
import static org.hamcrest.CoreMatchers.not;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertThat;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
@ -29,6 +36,7 @@
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
@ -43,8 +51,10 @@
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
||||
import org.apache.hadoop.hdfs.tools.DFSAdmin;
|
||||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
import org.apache.hadoop.util.Time;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.slf4j.Logger;
|
||||
@ -1124,4 +1134,88 @@ static private DatanodeInfo[] getFirstBlockReplicasDatanodeInfos(
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 120000)
|
||||
public void testReportMaintenanceNodes() throws Exception {
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
ByteArrayOutputStream err = new ByteArrayOutputStream();
|
||||
System.setOut(new PrintStream(out));
|
||||
System.setErr(new PrintStream(err));
|
||||
|
||||
LOG.info("Starting testReportMaintenanceNodes");
|
||||
int expirationInMs = 30 * 1000;
|
||||
int numNodes = 2;
|
||||
setMinMaintenanceR(numNodes);
|
||||
|
||||
startCluster(1, numNodes);
|
||||
getCluster().waitActive();
|
||||
|
||||
FileSystem fileSys = getCluster().getFileSystem(0);
|
||||
getConf().set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY,
|
||||
fileSys.getUri().toString());
|
||||
DFSAdmin dfsAdmin = new DFSAdmin(getConf());
|
||||
|
||||
FSNamesystem fsn = getCluster().getNameNode().getNamesystem();
|
||||
assertEquals(numNodes, fsn.getNumLiveDataNodes());
|
||||
|
||||
int ret = ToolRunner.run(dfsAdmin,
|
||||
new String[] {"-report", "-enteringmaintenance", "-inmaintenance"});
|
||||
assertEquals(0, ret);
|
||||
assertThat(out.toString(),
|
||||
is(allOf(containsString("Entering maintenance datanodes (0):"),
|
||||
containsString("In maintenance datanodes (0):"),
|
||||
not(containsString(
|
||||
getCluster().getDataNodes().get(0).getDisplayName())),
|
||||
not(containsString(
|
||||
getCluster().getDataNodes().get(1).getDisplayName())))));
|
||||
|
||||
final Path file = new Path("/testReportMaintenanceNodes.dat");
|
||||
writeFile(fileSys, file, numNodes, 1);
|
||||
|
||||
DatanodeInfo[] nodes = getFirstBlockReplicasDatanodeInfos(fileSys, file);
|
||||
// Request maintenance for DataNodes1. The DataNode1 will not transition
|
||||
// to the next state AdminStates.IN_MAINTENANCE immediately since there
|
||||
// are not enough candidate nodes to satisfy the min maintenance
|
||||
// replication.
|
||||
DatanodeInfo maintenanceDN = takeNodeOutofService(0,
|
||||
nodes[0].getDatanodeUuid(), Time.now() + expirationInMs, null, null,
|
||||
AdminStates.ENTERING_MAINTENANCE);
|
||||
assertEquals(1, fsn.getNumEnteringMaintenanceDataNodes());
|
||||
|
||||
// reset stream
|
||||
out.reset();
|
||||
err.reset();
|
||||
|
||||
ret = ToolRunner.run(dfsAdmin,
|
||||
new String[] {"-report", "-enteringmaintenance"});
|
||||
assertEquals(0, ret);
|
||||
assertThat(out.toString(),
|
||||
is(allOf(containsString("Entering maintenance datanodes (1):"),
|
||||
containsString(nodes[0].getXferAddr()),
|
||||
not(containsString(nodes[1].getXferAddr())))));
|
||||
|
||||
// reset stream
|
||||
out.reset();
|
||||
err.reset();
|
||||
|
||||
// start a new datanode to make state transition to
|
||||
// AdminStates.IN_MAINTENANCE
|
||||
getCluster().startDataNodes(getConf(), 1, true, null, null);
|
||||
getCluster().waitActive();
|
||||
|
||||
waitNodeState(maintenanceDN, AdminStates.IN_MAINTENANCE);
|
||||
assertEquals(1, fsn.getNumInMaintenanceLiveDataNodes());
|
||||
|
||||
ret = ToolRunner.run(dfsAdmin,
|
||||
new String[] {"-report", "-inmaintenance"});
|
||||
assertEquals(0, ret);
|
||||
assertThat(out.toString(),
|
||||
is(allOf(containsString("In maintenance datanodes (1):"),
|
||||
containsString(nodes[0].getXferAddr()),
|
||||
not(containsString(nodes[1].getXferAddr())),
|
||||
not(containsString(
|
||||
getCluster().getDataNodes().get(2).getDisplayName())))));
|
||||
|
||||
cleanupFile(getCluster().getFileSystem(), file);
|
||||
}
|
||||
}
|
||||
|
@ -15665,7 +15665,7 @@
|
||||
<comparators>
|
||||
<comparator>
|
||||
<type>RegexpComparator</type>
|
||||
<expected-output>^-report \[-live\] \[-dead\] \[-decommissioning\]:(.)*</expected-output>
|
||||
<expected-output>^-report \[-live\] \[-dead\] \[-decommissioning\] \[-enteringmaintenance\] \[-inmaintenance\]:(.)*</expected-output>
|
||||
</comparator>
|
||||
<comparator>
|
||||
<type>RegexpComparator</type>
|
||||
|
Loading…
x
Reference in New Issue
Block a user