HDFS-11359. DFSAdmin report command supports displaying maintenance state datanodes. Contributed by Yiqun Lin.
This commit is contained in:
parent
d48f2f6839
commit
60a7f57b61
|
@ -141,7 +141,7 @@ public final class HdfsConstants {
|
|||
|
||||
// type of the datanode report
|
||||
public enum DatanodeReportType {
|
||||
ALL, LIVE, DEAD, DECOMMISSIONING, ENTERING_MAINTENANCE
|
||||
ALL, LIVE, DEAD, DECOMMISSIONING, ENTERING_MAINTENANCE, IN_MAINTENANCE
|
||||
}
|
||||
|
||||
/* Hidden constructor */
|
||||
|
|
|
@ -1725,6 +1725,10 @@ public class PBHelperClient {
|
|||
case LIVE: return DatanodeReportTypeProto.LIVE;
|
||||
case DEAD: return DatanodeReportTypeProto.DEAD;
|
||||
case DECOMMISSIONING: return DatanodeReportTypeProto.DECOMMISSIONING;
|
||||
case ENTERING_MAINTENANCE:
|
||||
return DatanodeReportTypeProto.ENTERING_MAINTENANCE;
|
||||
case IN_MAINTENANCE:
|
||||
return DatanodeReportTypeProto.IN_MAINTENANCE;
|
||||
default:
|
||||
throw new IllegalArgumentException("Unexpected data type report:" + t);
|
||||
}
|
||||
|
@ -2128,6 +2132,10 @@ public class PBHelperClient {
|
|||
case LIVE: return DatanodeReportType.LIVE;
|
||||
case DEAD: return DatanodeReportType.DEAD;
|
||||
case DECOMMISSIONING: return DatanodeReportType.DECOMMISSIONING;
|
||||
case ENTERING_MAINTENANCE:
|
||||
return DatanodeReportType.ENTERING_MAINTENANCE;
|
||||
case IN_MAINTENANCE:
|
||||
return DatanodeReportType.IN_MAINTENANCE;
|
||||
default:
|
||||
throw new IllegalArgumentException("Unexpected data type report:" + t);
|
||||
}
|
||||
|
|
|
@ -332,6 +332,8 @@ enum DatanodeReportTypeProto { // type of the datanode report
|
|||
LIVE = 2;
|
||||
DEAD = 3;
|
||||
DECOMMISSIONING = 4;
|
||||
ENTERING_MAINTENANCE = 5;
|
||||
IN_MAINTENANCE = 6;
|
||||
}
|
||||
|
||||
message GetDatanodeReportRequestProto {
|
||||
|
|
|
@ -1441,6 +1441,9 @@ public class DatanodeManager {
|
|||
final boolean listEnteringMaintenanceNodes =
|
||||
type == DatanodeReportType.ALL ||
|
||||
type == DatanodeReportType.ENTERING_MAINTENANCE;
|
||||
final boolean listInMaintenanceNodes =
|
||||
type == DatanodeReportType.ALL ||
|
||||
type == DatanodeReportType.IN_MAINTENANCE;
|
||||
|
||||
ArrayList<DatanodeDescriptor> nodes;
|
||||
final HostSet foundNodes = new HostSet();
|
||||
|
@ -1453,11 +1456,13 @@ public class DatanodeManager {
|
|||
final boolean isDead = isDatanodeDead(dn);
|
||||
final boolean isDecommissioning = dn.isDecommissionInProgress();
|
||||
final boolean isEnteringMaintenance = dn.isEnteringMaintenance();
|
||||
final boolean isInMaintenance = dn.isInMaintenance();
|
||||
|
||||
if (((listLiveNodes && !isDead) ||
|
||||
(listDeadNodes && isDead) ||
|
||||
(listDecommissioningNodes && isDecommissioning) ||
|
||||
(listEnteringMaintenanceNodes && isEnteringMaintenance)) &&
|
||||
(listEnteringMaintenanceNodes && isEnteringMaintenance) ||
|
||||
(listInMaintenanceNodes && isInMaintenance)) &&
|
||||
hostConfigManager.isIncluded(dn)) {
|
||||
nodes.add(dn);
|
||||
}
|
||||
|
|
|
@ -421,7 +421,8 @@ public class DFSAdmin extends FsShell {
|
|||
* "hdfs dfsadmin"
|
||||
*/
|
||||
private static final String commonUsageSummary =
|
||||
"\t[-report [-live] [-dead] [-decommissioning]]\n" +
|
||||
"\t[-report [-live] [-dead] [-decommissioning] " +
|
||||
"[-enteringmaintenance] [-inmaintenance]]\n" +
|
||||
"\t[-safemode <enter | leave | get | wait>]\n" +
|
||||
"\t[-saveNamespace [-beforeShutdown]]\n" +
|
||||
"\t[-rollEdits]\n" +
|
||||
|
@ -544,48 +545,51 @@ public class DFSAdmin extends FsShell {
|
|||
final boolean listDead = StringUtils.popOption("-dead", args);
|
||||
final boolean listDecommissioning =
|
||||
StringUtils.popOption("-decommissioning", args);
|
||||
final boolean listEnteringMaintenance =
|
||||
StringUtils.popOption("-enteringmaintenance", args);
|
||||
final boolean listInMaintenance =
|
||||
StringUtils.popOption("-inmaintenance", args);
|
||||
|
||||
|
||||
// If no filter flags are found, then list all DN types
|
||||
boolean listAll = (!listLive && !listDead && !listDecommissioning);
|
||||
boolean listAll = (!listLive && !listDead && !listDecommissioning
|
||||
&& !listEnteringMaintenance && !listInMaintenance);
|
||||
|
||||
if (listAll || listLive) {
|
||||
DatanodeInfo[] live = dfs.getDataNodeStats(DatanodeReportType.LIVE);
|
||||
if (live.length > 0 || listLive) {
|
||||
System.out.println("Live datanodes (" + live.length + "):\n");
|
||||
}
|
||||
if (live.length > 0) {
|
||||
for (DatanodeInfo dn : live) {
|
||||
System.out.println(dn.getDatanodeReport());
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
printDataNodeReports(dfs, DatanodeReportType.LIVE, listLive, "Live");
|
||||
}
|
||||
|
||||
if (listAll || listDead) {
|
||||
DatanodeInfo[] dead = dfs.getDataNodeStats(DatanodeReportType.DEAD);
|
||||
if (dead.length > 0 || listDead) {
|
||||
System.out.println("Dead datanodes (" + dead.length + "):\n");
|
||||
}
|
||||
if (dead.length > 0) {
|
||||
for (DatanodeInfo dn : dead) {
|
||||
System.out.println(dn.getDatanodeReport());
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
printDataNodeReports(dfs, DatanodeReportType.DEAD, listDead, "Dead");
|
||||
}
|
||||
|
||||
if (listAll || listDecommissioning) {
|
||||
DatanodeInfo[] decom =
|
||||
dfs.getDataNodeStats(DatanodeReportType.DECOMMISSIONING);
|
||||
if (decom.length > 0 || listDecommissioning) {
|
||||
System.out.println("Decommissioning datanodes (" + decom.length
|
||||
+ "):\n");
|
||||
}
|
||||
if (decom.length > 0) {
|
||||
for (DatanodeInfo dn : decom) {
|
||||
System.out.println(dn.getDatanodeReport());
|
||||
System.out.println();
|
||||
}
|
||||
printDataNodeReports(dfs, DatanodeReportType.DECOMMISSIONING,
|
||||
listDecommissioning, "Decommissioning");
|
||||
}
|
||||
|
||||
if (listAll || listEnteringMaintenance) {
|
||||
printDataNodeReports(dfs, DatanodeReportType.ENTERING_MAINTENANCE,
|
||||
listEnteringMaintenance, "Entering maintenance");
|
||||
}
|
||||
|
||||
if (listAll || listInMaintenance) {
|
||||
printDataNodeReports(dfs, DatanodeReportType.IN_MAINTENANCE,
|
||||
listInMaintenance, "In maintenance");
|
||||
}
|
||||
}
|
||||
|
||||
private static void printDataNodeReports(DistributedFileSystem dfs,
|
||||
DatanodeReportType type, boolean listNodes, String nodeState)
|
||||
throws IOException {
|
||||
DatanodeInfo[] nodes = dfs.getDataNodeStats(type);
|
||||
if (nodes.length > 0 || listNodes) {
|
||||
System.out.println(nodeState + " datanodes (" + nodes.length + "):\n");
|
||||
}
|
||||
if (nodes.length > 0) {
|
||||
for (DatanodeInfo dn : nodes) {
|
||||
System.out.println(dn.getDatanodeReport());
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -986,12 +990,13 @@ public class DFSAdmin extends FsShell {
|
|||
"hdfs dfsadmin\n" +
|
||||
commonUsageSummary;
|
||||
|
||||
String report ="-report [-live] [-dead] [-decommissioning]:\n" +
|
||||
"\tReports basic filesystem information and statistics. \n" +
|
||||
"\tThe dfs usage can be different from \"du\" usage, because it\n" +
|
||||
"\tmeasures raw space used by replication, checksums, snapshots\n" +
|
||||
"\tand etc. on all the DNs.\n" +
|
||||
"\tOptional flags may be used to filter the list of displayed DNs.\n";
|
||||
String report ="-report [-live] [-dead] [-decommissioning] "
|
||||
+ "[-enteringmaintenance] [-inmaintenance]:\n" +
|
||||
"\tReports basic filesystem information and statistics. \n" +
|
||||
"\tThe dfs usage can be different from \"du\" usage, because it\n" +
|
||||
"\tmeasures raw space used by replication, checksums, snapshots\n" +
|
||||
"\tand etc. on all the DNs.\n" +
|
||||
"\tOptional flags may be used to filter the list of displayed DNs.\n";
|
||||
|
||||
String safemode = "-safemode <enter|leave|get|wait|forceExit>: Safe mode " +
|
||||
"maintenance command.\n" +
|
||||
|
@ -1779,7 +1784,8 @@ public class DFSAdmin extends FsShell {
|
|||
private static void printUsage(String cmd) {
|
||||
if ("-report".equals(cmd)) {
|
||||
System.err.println("Usage: hdfs dfsadmin"
|
||||
+ " [-report] [-live] [-dead] [-decommissioning]");
|
||||
+ " [-report] [-live] [-dead] [-decommissioning]"
|
||||
+ " [-enteringmaintenance] [-inmaintenance]");
|
||||
} else if ("-safemode".equals(cmd)) {
|
||||
System.err.println("Usage: hdfs dfsadmin"
|
||||
+ " [-safemode enter | leave | get | wait | forceExit]");
|
||||
|
@ -1917,7 +1923,7 @@ public class DFSAdmin extends FsShell {
|
|||
return exitCode;
|
||||
}
|
||||
} else if ("-report".equals(cmd)) {
|
||||
if (argv.length > 4) {
|
||||
if (argv.length > 6) {
|
||||
printUsage(cmd);
|
||||
return exitCode;
|
||||
}
|
||||
|
|
|
@ -338,7 +338,7 @@ Runs a HDFS datanode.
|
|||
|
||||
Usage:
|
||||
|
||||
hdfs dfsadmin [-report [-live] [-dead] [-decommissioning]]
|
||||
hdfs dfsadmin [-report [-live] [-dead] [-decommissioning] [-enteringmaintenance] [-inmaintenance]]
|
||||
hdfs dfsadmin [-safemode enter | leave | get | wait | forceExit]
|
||||
hdfs dfsadmin [-saveNamespace [-beforeShutdown]]
|
||||
hdfs dfsadmin [-rollEdits]
|
||||
|
@ -374,7 +374,7 @@ Usage:
|
|||
|
||||
| COMMAND\_OPTION | Description |
|
||||
|:---- |:---- |
|
||||
| `-report` `[-live]` `[-dead]` `[-decommissioning]` | Reports basic filesystem information and statistics, The dfs usage can be different from "du" usage, because it measures raw space used by replication, checksums, snapshots and etc. on all the DNs. Optional flags may be used to filter the list of displayed DataNodes. |
|
||||
| `-report` `[-live]` `[-dead]` `[-decommissioning]` `[-enteringmaintenance]` `[-inmaintenance]` | Reports basic filesystem information and statistics, The dfs usage can be different from "du" usage, because it measures raw space used by replication, checksums, snapshots and etc. on all the DNs. Optional flags may be used to filter the list of displayed DataNodes. |
|
||||
| `-safemode` enter\|leave\|get\|wait\|forceExit | Safe mode maintenance command. Safe mode is a Namenode state in which it <br/>1. does not accept changes to the name space (read-only) <br/>2. does not replicate or delete blocks. <br/>Safe mode is entered automatically at Namenode startup, and leaves safe mode automatically when the configured minimum percentage of blocks satisfies the minimum replication condition. If Namenode detects any anomaly then it will linger in safe mode till that issue is resolved. If that anomaly is the consequence of a deliberate action, then administrator can use -safemode forceExit to exit safe mode. The cases where forceExit may be required are<br/> 1. Namenode metadata is not consistent. If Namenode detects that metadata has been modified out of band and can cause data loss, then Namenode will enter forceExit state. At that point user can either restart Namenode with correct metadata files or forceExit (if data loss is acceptable).<br/>2. Rollback causes metadata to be replaced and rarely it can trigger safe mode forceExit state in Namenode. In that case you may proceed by issuing -safemode forceExit.<br/> Safe mode can also be entered manually, but then it can only be turned off manually as well. |
|
||||
| `-saveNamespace` `[-beforeShutdown]` | Save current namespace into storage directories and reset edits log. Requires safe mode. If the "beforeShutdown" option is given, the NameNode does a checkpoint if and only if no checkpoint has been done during a time window (a configurable number of checkpoint periods). This is usually used before shutting down the NameNode to prevent potential fsimage/editlog corruption. |
|
||||
| `-rollEdits` | Rolls the edit log on the active NameNode. |
|
||||
|
|
|
@ -17,11 +17,18 @@
|
|||
*/
|
||||
package org.apache.hadoop.hdfs;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.allOf;
|
||||
import static org.hamcrest.CoreMatchers.containsString;
|
||||
import static org.hamcrest.CoreMatchers.is;
|
||||
import static org.hamcrest.CoreMatchers.not;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertThat;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
|
@ -29,6 +36,7 @@ import java.util.Iterator;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
@ -43,8 +51,10 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
|
|||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
||||
import org.apache.hadoop.hdfs.tools.DFSAdmin;
|
||||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
import org.apache.hadoop.util.Time;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -1124,4 +1134,88 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
|
|||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 120000)
|
||||
public void testReportMaintenanceNodes() throws Exception {
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
ByteArrayOutputStream err = new ByteArrayOutputStream();
|
||||
System.setOut(new PrintStream(out));
|
||||
System.setErr(new PrintStream(err));
|
||||
|
||||
LOG.info("Starting testReportMaintenanceNodes");
|
||||
int expirationInMs = 30 * 1000;
|
||||
int numNodes = 2;
|
||||
setMinMaintenanceR(numNodes);
|
||||
|
||||
startCluster(1, numNodes);
|
||||
getCluster().waitActive();
|
||||
|
||||
FileSystem fileSys = getCluster().getFileSystem(0);
|
||||
getConf().set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY,
|
||||
fileSys.getUri().toString());
|
||||
DFSAdmin dfsAdmin = new DFSAdmin(getConf());
|
||||
|
||||
FSNamesystem fsn = getCluster().getNameNode().getNamesystem();
|
||||
assertEquals(numNodes, fsn.getNumLiveDataNodes());
|
||||
|
||||
int ret = ToolRunner.run(dfsAdmin,
|
||||
new String[] {"-report", "-enteringmaintenance", "-inmaintenance"});
|
||||
assertEquals(0, ret);
|
||||
assertThat(out.toString(),
|
||||
is(allOf(containsString("Entering maintenance datanodes (0):"),
|
||||
containsString("In maintenance datanodes (0):"),
|
||||
not(containsString(
|
||||
getCluster().getDataNodes().get(0).getDisplayName())),
|
||||
not(containsString(
|
||||
getCluster().getDataNodes().get(1).getDisplayName())))));
|
||||
|
||||
final Path file = new Path("/testReportMaintenanceNodes.dat");
|
||||
writeFile(fileSys, file, numNodes, 1);
|
||||
|
||||
DatanodeInfo[] nodes = getFirstBlockReplicasDatanodeInfos(fileSys, file);
|
||||
// Request maintenance for DataNodes1. The DataNode1 will not transition
|
||||
// to the next state AdminStates.IN_MAINTENANCE immediately since there
|
||||
// are not enough candidate nodes to satisfy the min maintenance
|
||||
// replication.
|
||||
DatanodeInfo maintenanceDN = takeNodeOutofService(0,
|
||||
nodes[0].getDatanodeUuid(), Time.now() + expirationInMs, null, null,
|
||||
AdminStates.ENTERING_MAINTENANCE);
|
||||
assertEquals(1, fsn.getNumEnteringMaintenanceDataNodes());
|
||||
|
||||
// reset stream
|
||||
out.reset();
|
||||
err.reset();
|
||||
|
||||
ret = ToolRunner.run(dfsAdmin,
|
||||
new String[] {"-report", "-enteringmaintenance"});
|
||||
assertEquals(0, ret);
|
||||
assertThat(out.toString(),
|
||||
is(allOf(containsString("Entering maintenance datanodes (1):"),
|
||||
containsString(nodes[0].getXferAddr()),
|
||||
not(containsString(nodes[1].getXferAddr())))));
|
||||
|
||||
// reset stream
|
||||
out.reset();
|
||||
err.reset();
|
||||
|
||||
// start a new datanode to make state transition to
|
||||
// AdminStates.IN_MAINTENANCE
|
||||
getCluster().startDataNodes(getConf(), 1, true, null, null);
|
||||
getCluster().waitActive();
|
||||
|
||||
waitNodeState(maintenanceDN, AdminStates.IN_MAINTENANCE);
|
||||
assertEquals(1, fsn.getNumInMaintenanceLiveDataNodes());
|
||||
|
||||
ret = ToolRunner.run(dfsAdmin,
|
||||
new String[] {"-report", "-inmaintenance"});
|
||||
assertEquals(0, ret);
|
||||
assertThat(out.toString(),
|
||||
is(allOf(containsString("In maintenance datanodes (1):"),
|
||||
containsString(nodes[0].getXferAddr()),
|
||||
not(containsString(nodes[1].getXferAddr())),
|
||||
not(containsString(
|
||||
getCluster().getDataNodes().get(2).getDisplayName())))));
|
||||
|
||||
cleanupFile(getCluster().getFileSystem(), file);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15665,7 +15665,7 @@
|
|||
<comparators>
|
||||
<comparator>
|
||||
<type>RegexpComparator</type>
|
||||
<expected-output>^-report \[-live\] \[-dead\] \[-decommissioning\]:(.)*</expected-output>
|
||||
<expected-output>^-report \[-live\] \[-dead\] \[-decommissioning\] \[-enteringmaintenance\] \[-inmaintenance\]:(.)*</expected-output>
|
||||
</comparator>
|
||||
<comparator>
|
||||
<type>RegexpComparator</type>
|
||||
|
|
Loading…
Reference in New Issue