HDFS-6295. Add decommissioning state and node state filtering to dfsadmin. Contributed by Andrew Wang.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1592439 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Wang 2014-05-05 03:40:00 +00:00
parent ffb4fc03c2
commit 4abbc157e1
8 changed files with 177 additions and 71 deletions

View File

@ -86,6 +86,9 @@ Release 2.5.0 - UNRELEASED
HDFS-6304. Consolidate the logic of path resolution in FSDirectory.
(wheat9)
HDFS-6295. Add "decommissioning" state and node state filtering to
dfsadmin. (wang)
OPTIMIZATIONS
HDFS-6214. Webhdfs has poor throughput for files >2GB (daryn)

View File

@ -106,7 +106,7 @@ public class HdfsConstants {
// type of the datanode report
public static enum DatanodeReportType {
ALL, LIVE, DEAD
ALL, LIVE, DEAD, DECOMMISSIONING
}
// An invalid transaction ID that will never be seen in a real namesystem.

View File

@ -1413,6 +1413,7 @@ public class PBHelper {
case ALL: return DatanodeReportTypeProto.ALL;
case LIVE: return DatanodeReportTypeProto.LIVE;
case DEAD: return DatanodeReportTypeProto.DEAD;
case DECOMMISSIONING: return DatanodeReportTypeProto.DECOMMISSIONING;
default:
throw new IllegalArgumentException("Unexpected data type report:" + t);
}
@ -1424,6 +1425,7 @@ public class PBHelper {
case ALL: return DatanodeReportType.ALL;
case LIVE: return DatanodeReportType.LIVE;
case DEAD: return DatanodeReportType.DEAD;
case DECOMMISSIONING: return DatanodeReportType.DECOMMISSIONING;
default:
throw new IllegalArgumentException("Unexpected data type report:" + t);
}

View File

@ -1238,10 +1238,15 @@ public class DatanodeManager {
/** For generating datanode reports */
public List<DatanodeDescriptor> getDatanodeListForReport(
final DatanodeReportType type) {
boolean listLiveNodes = type == DatanodeReportType.ALL ||
type == DatanodeReportType.LIVE;
boolean listDeadNodes = type == DatanodeReportType.ALL ||
type == DatanodeReportType.DEAD;
final boolean listLiveNodes =
type == DatanodeReportType.ALL ||
type == DatanodeReportType.LIVE;
final boolean listDeadNodes =
type == DatanodeReportType.ALL ||
type == DatanodeReportType.DEAD;
final boolean listDecommissioningNodes =
type == DatanodeReportType.ALL ||
type == DatanodeReportType.DECOMMISSIONING;
ArrayList<DatanodeDescriptor> nodes;
final HostFileManager.HostSet foundNodes = new HostFileManager.HostSet();
@ -1252,7 +1257,10 @@ public class DatanodeManager {
nodes = new ArrayList<DatanodeDescriptor>(datanodeMap.size());
for (DatanodeDescriptor dn : datanodeMap.values()) {
final boolean isDead = isDatanodeDead(dn);
if ((listLiveNodes && !isDead) || (listDeadNodes && isDead)) {
final boolean isDecommissioning = dn.isDecommissionInProgress();
if ((listLiveNodes && !isDead) ||
(listDeadNodes && isDead) ||
(listDecommissioningNodes && isDecommissioning)) {
nodes.add(dn);
}
foundNodes.add(HostFileManager.resolvedAddressFromDatanodeID(dn));

View File

@ -25,6 +25,7 @@ import java.net.URI;
import java.net.URL;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
@ -371,65 +372,96 @@ public class DFSAdmin extends FsShell {
* Gives a report on how the FileSystem is doing.
* @exception IOException if the filesystem does not exist.
*/
public void report() throws IOException {
DistributedFileSystem dfs = getDFS();
FsStatus ds = dfs.getStatus();
long capacity = ds.getCapacity();
long used = ds.getUsed();
long remaining = ds.getRemaining();
long presentCapacity = used + remaining;
boolean mode = dfs.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_GET);
if (mode) {
System.out.println("Safe mode is ON");
}
System.out.println("Configured Capacity: " + capacity
+ " (" + StringUtils.byteDesc(capacity) + ")");
System.out.println("Present Capacity: " + presentCapacity
+ " (" + StringUtils.byteDesc(presentCapacity) + ")");
System.out.println("DFS Remaining: " + remaining
+ " (" + StringUtils.byteDesc(remaining) + ")");
System.out.println("DFS Used: " + used
+ " (" + StringUtils.byteDesc(used) + ")");
System.out.println("DFS Used%: "
+ StringUtils.formatPercent(used/(double)presentCapacity, 2));
public void report(String[] argv, int i) throws IOException {
DistributedFileSystem dfs = getDFS();
FsStatus ds = dfs.getStatus();
long capacity = ds.getCapacity();
long used = ds.getUsed();
long remaining = ds.getRemaining();
long presentCapacity = used + remaining;
boolean mode = dfs.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_GET);
if (mode) {
System.out.println("Safe mode is ON");
}
System.out.println("Configured Capacity: " + capacity
+ " (" + StringUtils.byteDesc(capacity) + ")");
System.out.println("Present Capacity: " + presentCapacity
+ " (" + StringUtils.byteDesc(presentCapacity) + ")");
System.out.println("DFS Remaining: " + remaining
+ " (" + StringUtils.byteDesc(remaining) + ")");
System.out.println("DFS Used: " + used
+ " (" + StringUtils.byteDesc(used) + ")");
System.out.println("DFS Used%: "
+ StringUtils.formatPercent(used/(double)presentCapacity, 2));
/* These counts are not always upto date. They are updated after
* iteration of an internal list. Should be updated in a few seconds to
* minutes. Use "-metaSave" to list of all such blocks and accurate
* counts.
*/
System.out.println("Under replicated blocks: " +
dfs.getUnderReplicatedBlocksCount());
System.out.println("Blocks with corrupt replicas: " +
dfs.getCorruptBlocksCount());
System.out.println("Missing blocks: " +
dfs.getMissingBlocksCount());
/* These counts are not always upto date. They are updated after
* iteration of an internal list. Should be updated in a few seconds to
* minutes. Use "-metaSave" to list of all such blocks and accurate
* counts.
*/
System.out.println("Under replicated blocks: " +
dfs.getUnderReplicatedBlocksCount());
System.out.println("Blocks with corrupt replicas: " +
dfs.getCorruptBlocksCount());
System.out.println("Missing blocks: " +
dfs.getMissingBlocksCount());
System.out.println();
System.out.println();
System.out.println("-------------------------------------------------");
System.out.println("-------------------------------------------------");
// Parse arguments for filtering the node list
List<String> args = Arrays.asList(argv);
// Truncate already handled arguments before parsing report()-specific ones
args = new ArrayList<String>(args.subList(i, args.size()));
final boolean listLive = StringUtils.popOption("-live", args);
final boolean listDead = StringUtils.popOption("-dead", args);
final boolean listDecommissioning =
StringUtils.popOption("-decommissioning", args);
// If no filter flags are found, then list all DN types
boolean listAll = (!listLive && !listDead && !listDecommissioning);
if (listAll || listLive) {
DatanodeInfo[] live = dfs.getDataNodeStats(DatanodeReportType.LIVE);
DatanodeInfo[] dead = dfs.getDataNodeStats(DatanodeReportType.DEAD);
System.out.println("Datanodes available: " + live.length +
" (" + (live.length + dead.length) + " total, " +
dead.length + " dead)\n");
if(live.length > 0) {
System.out.println("Live datanodes:");
if (live.length > 0 || listLive) {
System.out.println("Live datanodes (" + live.length + "):\n");
}
if (live.length > 0) {
for (DatanodeInfo dn : live) {
System.out.println(dn.getDatanodeReport());
System.out.println();
}
}
}
if(dead.length > 0) {
System.out.println("Dead datanodes:");
if (listAll || listDead) {
DatanodeInfo[] dead = dfs.getDataNodeStats(DatanodeReportType.DEAD);
if (dead.length > 0 || listDead) {
System.out.println("Dead datanodes (" + dead.length + "):\n");
}
if (dead.length > 0) {
for (DatanodeInfo dn : dead) {
System.out.println(dn.getDatanodeReport());
System.out.println();
}
}
}
if (listAll || listDecommissioning) {
DatanodeInfo[] decom =
dfs.getDataNodeStats(DatanodeReportType.DECOMMISSIONING);
if (decom.length > 0 || listDecommissioning) {
System.out.println("Decommissioning datanodes (" + decom.length
+ "):\n");
}
if (decom.length > 0) {
for (DatanodeInfo dn : decom) {
System.out.println(dn.getDatanodeReport());
System.out.println();
}
}
}
}
/**
@ -639,7 +671,9 @@ public class DFSAdmin extends FsShell {
private void printHelp(String cmd) {
String summary = "hadoop dfsadmin performs DFS administrative commands.\n" +
"The full syntax is: \n\n" +
"hadoop dfsadmin [-report] [-safemode <enter | leave | get | wait>]\n" +
"hadoop dfsadmin\n" +
"\t[-report [-live] [-dead] [-decommissioning]]\n" +
"\t[-safemode <enter | leave | get | wait>]\n" +
"\t[-saveNamespace]\n" +
"\t[-rollEdits]\n" +
"\t[-restoreFailedStorage true|false|check]\n" +
@ -665,7 +699,10 @@ public class DFSAdmin extends FsShell {
"\t[-getDatanodeInfo <datanode_host:ipc_port>\n" +
"\t[-help [cmd]]\n";
String report ="-report: \tReports basic filesystem information and statistics.\n";
String report ="-report [-live] [-dead] [-decommissioning]:\n" +
"\tReports basic filesystem information and statistics.\n" +
"\tOptional flags may be used to filter the list of displayed DNs.\n";
String safemode = "-safemode <enter|leave|get|wait>: Safe mode maintenance command.\n" +
"\t\tSafe mode is a Namenode state in which it\n" +
@ -1069,7 +1106,7 @@ public class DFSAdmin extends FsShell {
private static void printUsage(String cmd) {
if ("-report".equals(cmd)) {
System.err.println("Usage: java DFSAdmin"
+ " [-report]");
+ " [-report] [-live] [-dead] [-decommissioning]");
} else if ("-safemode".equals(cmd)) {
System.err.println("Usage: java DFSAdmin"
+ " [-safemode enter | leave | get | wait]");
@ -1210,7 +1247,7 @@ public class DFSAdmin extends FsShell {
return exitCode;
}
} else if ("-report".equals(cmd)) {
if (argv.length != 1) {
if (argv.length < 1) {
printUsage(cmd);
return exitCode;
}
@ -1312,7 +1349,7 @@ public class DFSAdmin extends FsShell {
exitCode = 0;
try {
if ("-report".equals(cmd)) {
report();
report(argv, i);
} else if ("-safemode".equals(cmd)) {
setSafeMode(argv, i);
} else if ("-allowSnapshot".equalsIgnoreCase(cmd)) {

View File

@ -267,6 +267,7 @@ enum DatanodeReportTypeProto { // type of the datanode report
ALL = 1;
LIVE = 2;
DEAD = 3;
DECOMMISSIONING = 4;
}
message GetDatanodeReportRequestProto {

View File

@ -21,12 +21,15 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.io.PrintStream;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.fs.FSDataOutputStream;
@ -34,12 +37,15 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
import org.apache.hadoop.hdfs.tools.DFSAdmin;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
@ -180,6 +186,50 @@ public class TestDecommissioningStatus {
.getUnderReplicatedInOpenFiles(), expectedUnderRepInOpenFiles);
}
private void checkDFSAdminDecommissionStatus(
List<DatanodeDescriptor> expectedDecomm, DistributedFileSystem dfs,
DFSAdmin admin) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintStream ps = new PrintStream(baos);
PrintStream oldOut = System.out;
System.setOut(ps);
try {
// Parse DFSAdmin just to check the count
admin.report(new String[] {"-decommissioning"}, 0);
String[] lines = baos.toString().split("\n");
Integer num = null;
int count = 0;
for (String line: lines) {
if (line.startsWith("Decommissioning datanodes")) {
// Pull out the "(num)" and parse it into an int
String temp = line.split(" ")[2];
num =
Integer.parseInt((String) temp.subSequence(1, temp.length() - 2));
}
if (line.contains("Decommission in progress")) {
count++;
}
}
assertTrue("No decommissioning output", num != null);
assertEquals("Unexpected number of decomming DNs", expectedDecomm.size(),
num.intValue());
assertEquals("Unexpected number of decomming DNs", expectedDecomm.size(),
count);
// Check Java API for correct contents
List<DatanodeInfo> decomming =
new ArrayList<DatanodeInfo>(Arrays.asList(dfs
.getDataNodeStats(DatanodeReportType.DECOMMISSIONING)));
assertEquals("Unexpected number of decomming DNs", expectedDecomm.size(),
decomming.size());
for (DatanodeID id : expectedDecomm) {
assertTrue("Did not find expected decomming DN " + id,
decomming.contains(id));
}
} finally {
System.setOut(oldOut);
}
}
/**
* Tests Decommissioning Status in DFS.
*/
@ -191,7 +241,8 @@ public class TestDecommissioningStatus {
DFSClient client = new DFSClient(addr, conf);
DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE);
assertEquals("Number of Datanodes ", 2, info.length);
FileSystem fileSys = cluster.getFileSystem();
DistributedFileSystem fileSys = cluster.getFileSystem();
DFSAdmin admin = new DFSAdmin(cluster.getConfiguration(0));
short replicas = 2;
//
@ -216,12 +267,16 @@ public class TestDecommissioningStatus {
assertEquals(decommissioningNodes.size(), 1);
DatanodeDescriptor decommNode = decommissioningNodes.get(0);
checkDecommissionStatus(decommNode, 4, 0, 2);
checkDFSAdminDecommissionStatus(decommissioningNodes.subList(0, 1),
fileSys, admin);
} else {
assertEquals(decommissioningNodes.size(), 2);
DatanodeDescriptor decommNode1 = decommissioningNodes.get(0);
DatanodeDescriptor decommNode2 = decommissioningNodes.get(1);
checkDecommissionStatus(decommNode1, 4, 4, 2);
checkDecommissionStatus(decommNode2, 4, 4, 2);
checkDFSAdminDecommissionStatus(decommissioningNodes.subList(0, 2),
fileSys, admin);
}
}
// Call refreshNodes on FSNamesystem with empty exclude file.

View File

@ -15220,7 +15220,11 @@
<comparators>
<comparator>
<type>RegexpComparator</type>
<expected-output>^-report:( |\t)*Reports basic filesystem information and statistics.( )*</expected-output>
<expected-output>^-report \[-live\] \[-dead\] \[-decommissioning\]:(.)*</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>^[ \t]*Reports basic filesystem information and statistics.( )*</expected-output>
</comparator>
</comparators>
</test>
@ -15900,9 +15904,9 @@
<type>RegexpComparator</type>
<expected-output>DFS Used\%: [0-9\.]+%</expected-output>
</comparator>
<comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>Datanodes available: [0-9]+ \([0-9]+ total, [0-9]+ dead\)</expected-output>
<expected-output>Live datanodes \([0-9]+\):</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
@ -15930,7 +15934,7 @@
</comparator>
<comparator>
<type>TokenComparator</type>
<expected-output>Live datanodes:</expected-output>
<expected-output>Live datanodes</expected-output>
</comparator>
</comparators>
</test>
@ -16018,10 +16022,6 @@
<type>RegexpComparator</type>
<expected-output>DFS Used\%: [0-9\.]+%</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>Datanodes available: [0-9]+ \([0-9]+ total, [0-9]+ dead\)</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>Name: [0-9\.:]+ \([-.a-zA-z0-9\.]+\)</expected-output>
@ -16048,7 +16048,7 @@
</comparator>
<comparator>
<type>TokenComparator</type>
<expected-output>Live datanodes:</expected-output>
<expected-output>Live datanodes</expected-output>
</comparator>
</comparators>
</test>