HDFS-11259. Update fsck to display maintenance state info. (Manoj Govindassamy via lei)

This commit is contained in:
Lei Xu 2017-01-19 16:24:58 +08:00
parent 541efe18c7
commit 1cc5f460ed
4 changed files with 294 additions and 49 deletions

View File

@ -116,6 +116,9 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
public static final String HEALTHY_STATUS = "is HEALTHY"; public static final String HEALTHY_STATUS = "is HEALTHY";
public static final String DECOMMISSIONING_STATUS = "is DECOMMISSIONING"; public static final String DECOMMISSIONING_STATUS = "is DECOMMISSIONING";
public static final String DECOMMISSIONED_STATUS = "is DECOMMISSIONED"; public static final String DECOMMISSIONED_STATUS = "is DECOMMISSIONED";
public static final String ENTERING_MAINTENANCE_STATUS =
"is ENTERING MAINTENANCE";
public static final String IN_MAINTENANCE_STATUS = "is IN MAINTENANCE";
public static final String NONEXISTENT_STATUS = "does not exist"; public static final String NONEXISTENT_STATUS = "does not exist";
public static final String FAILURE_STATUS = "FAILED"; public static final String FAILURE_STATUS = "FAILED";
public static final String UNDEFINED = "undefined"; public static final String UNDEFINED = "undefined";
@ -138,6 +141,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
private boolean showReplicaDetails = false; private boolean showReplicaDetails = false;
private boolean showUpgradeDomains = false; private boolean showUpgradeDomains = false;
private boolean showMaintenanceState = false;
private long staleInterval; private long staleInterval;
private Tracer tracer; private Tracer tracer;
@ -220,6 +224,8 @@ else if (key.equals("replicadetails")) {
this.showReplicaDetails = true; this.showReplicaDetails = true;
} else if (key.equals("upgradedomains")) { } else if (key.equals("upgradedomains")) {
this.showUpgradeDomains = true; this.showUpgradeDomains = true;
} else if (key.equals("maintenance")) {
this.showMaintenanceState = true;
} else if (key.equals("storagepolicies")) { } else if (key.equals("storagepolicies")) {
this.showStoragePolcies = true; this.showStoragePolcies = true;
} else if (key.equals("openforwrite")) { } else if (key.equals("openforwrite")) {
@ -271,6 +277,12 @@ public void blockIdCK(String blockId) {
+ numberReplicas.decommissioned()); + numberReplicas.decommissioned());
out.println("No. of decommissioning Replica: " out.println("No. of decommissioning Replica: "
+ numberReplicas.decommissioning()); + numberReplicas.decommissioning());
if (this.showMaintenanceState) {
out.println("No. of entering maintenance Replica: "
+ numberReplicas.liveEnteringMaintenanceReplicas());
out.println("No. of in maintenance Replica: "
+ numberReplicas.maintenanceNotForReadReplicas());
}
out.println("No. of corrupted Replica: " + out.println("No. of corrupted Replica: " +
numberReplicas.corruptReplicas()); numberReplicas.corruptReplicas());
//record datanodes that have corrupted block replica //record datanodes that have corrupted block replica
@ -291,6 +303,10 @@ public void blockIdCK(String blockId) {
out.print(DECOMMISSIONED_STATUS); out.print(DECOMMISSIONED_STATUS);
} else if (dn.isDecommissionInProgress()) { } else if (dn.isDecommissionInProgress()) {
out.print(DECOMMISSIONING_STATUS); out.print(DECOMMISSIONING_STATUS);
} else if (this.showMaintenanceState && dn.isEnteringMaintenance()) {
out.print(ENTERING_MAINTENANCE_STATUS);
} else if (this.showMaintenanceState && dn.isInMaintenance()) {
out.print(IN_MAINTENANCE_STATUS);
} else { } else {
out.print(HEALTHY_STATUS); out.print(HEALTHY_STATUS);
} }
@ -567,13 +583,21 @@ private void collectBlocksSummary(String parent, HdfsFileStatus file,
NumberReplicas numberReplicas = bm.countNodes(storedBlock); NumberReplicas numberReplicas = bm.countNodes(storedBlock);
int decommissionedReplicas = numberReplicas.decommissioned();; int decommissionedReplicas = numberReplicas.decommissioned();;
int decommissioningReplicas = numberReplicas.decommissioning(); int decommissioningReplicas = numberReplicas.decommissioning();
int enteringMaintenanceReplicas =
numberReplicas.liveEnteringMaintenanceReplicas();
int inMaintenanceReplicas =
numberReplicas.maintenanceNotForReadReplicas();
res.decommissionedReplicas += decommissionedReplicas; res.decommissionedReplicas += decommissionedReplicas;
res.decommissioningReplicas += decommissioningReplicas; res.decommissioningReplicas += decommissioningReplicas;
res.enteringMaintenanceReplicas += enteringMaintenanceReplicas;
res.inMaintenanceReplicas += inMaintenanceReplicas;
// count total replicas // count total replicas
int liveReplicas = numberReplicas.liveReplicas(); int liveReplicas = numberReplicas.liveReplicas();
int totalReplicasPerBlock = liveReplicas + decommissionedReplicas + int totalReplicasPerBlock = liveReplicas + decommissionedReplicas
decommissioningReplicas; + decommissioningReplicas
+ enteringMaintenanceReplicas
+ inMaintenanceReplicas;
res.totalReplicas += totalReplicasPerBlock; res.totalReplicas += totalReplicasPerBlock;
// count expected replicas // count expected replicas
@ -612,12 +636,14 @@ private void collectBlocksSummary(String parent, HdfsFileStatus file,
if (!showFiles) { if (!showFiles) {
out.print("\n" + path + ": "); out.print("\n" + path + ": ");
} }
out.println(" Under replicated " + block + out.println(" Under replicated " + block + ". Target Replicas is "
". Target Replicas is " + + targetFileReplication + " but found "
targetFileReplication + " but found " + + liveReplicas+ " live replica(s), "
liveReplicas + " live replica(s), " + + decommissionedReplicas + " decommissioned replica(s), "
decommissionedReplicas + " decommissioned replica(s) and " + + decommissioningReplicas + " decommissioning replica(s)"
decommissioningReplicas + " decommissioning replica(s)."); + (this.showMaintenanceState ? (enteringMaintenanceReplicas
+ ", entering maintenance replica(s) and " + inMaintenanceReplicas
+ " in maintenance replica(s).") : "."));
} }
// count mis replicated blocks // count mis replicated blocks
@ -678,6 +704,12 @@ private void collectBlocksSummary(String parent, HdfsFileStatus file,
sb.append("DECOMMISSIONED)"); sb.append("DECOMMISSIONED)");
} else if (dnDesc.isDecommissionInProgress()) { } else if (dnDesc.isDecommissionInProgress()) {
sb.append("DECOMMISSIONING)"); sb.append("DECOMMISSIONING)");
} else if (this.showMaintenanceState &&
dnDesc.isEnteringMaintenance()) {
sb.append("ENTERING MAINTENANCE)");
} else if (this.showMaintenanceState &&
dnDesc.isInMaintenance()) {
sb.append("IN MAINTENANCE)");
} else if (corruptReplicas != null && corruptReplicas.contains(dnDesc)) { } else if (corruptReplicas != null && corruptReplicas.contains(dnDesc)) {
sb.append("CORRUPT)"); sb.append("CORRUPT)");
} else if (blocksExcess != null && blocksExcess.contains(block.getLocalBlock())) { } else if (blocksExcess != null && blocksExcess.contains(block.getLocalBlock())) {
@ -991,6 +1023,8 @@ static class Result {
long missingReplicas = 0L; long missingReplicas = 0L;
long decommissionedReplicas = 0L; long decommissionedReplicas = 0L;
long decommissioningReplicas = 0L; long decommissioningReplicas = 0L;
long enteringMaintenanceReplicas = 0L;
long inMaintenanceReplicas = 0L;
long numUnderMinReplicatedBlocks=0L; long numUnderMinReplicatedBlocks=0L;
long numOverReplicatedBlocks = 0L; long numOverReplicatedBlocks = 0L;
long numUnderReplicatedBlocks = 0L; long numUnderReplicatedBlocks = 0L;
@ -1133,6 +1167,14 @@ public String toString() {
res.append("\n DecommissioningReplicas:\t").append( res.append("\n DecommissioningReplicas:\t").append(
decommissioningReplicas); decommissioningReplicas);
} }
if (enteringMaintenanceReplicas > 0) {
res.append("\n EnteringMaintenanceReplicas:\t").append(
enteringMaintenanceReplicas);
}
if (inMaintenanceReplicas > 0) {
res.append("\n InMaintenanceReplicas:\t").append(
inMaintenanceReplicas);
}
return res.toString(); return res.toString();
} }
} }

View File

@ -80,7 +80,7 @@ public class DFSck extends Configured implements Tool {
+ "[-files [-blocks [-locations | -racks | -replicaDetails | " + + "[-files [-blocks [-locations | -racks | -replicaDetails | " +
"-upgradedomains]]]] " "-upgradedomains]]]] "
+ "[-includeSnapshots] " + "[-includeSnapshots] "
+ "[-storagepolicies] [-blockId <blk_Id>]\n" + "[-storagepolicies] [-maintenance] [-blockId <blk_Id>]\n"
+ "\t<path>\tstart checking from this path\n" + "\t<path>\tstart checking from this path\n"
+ "\t-move\tmove corrupted files to /lost+found\n" + "\t-move\tmove corrupted files to /lost+found\n"
+ "\t-delete\tdelete corrupted files\n" + "\t-delete\tdelete corrupted files\n"
@ -99,6 +99,7 @@ public class DFSck extends Configured implements Tool {
+ "\t-files -blocks -upgradedomains\tprint out upgrade domains for " + + "\t-files -blocks -upgradedomains\tprint out upgrade domains for " +
"every block\n" "every block\n"
+ "\t-storagepolicies\tprint out storage policy summary for the blocks\n" + "\t-storagepolicies\tprint out storage policy summary for the blocks\n"
+ "\t-maintenance\tprint out maintenance state node details\n"
+ "\t-blockId\tprint out which file this blockId belongs to, locations" + "\t-blockId\tprint out which file this blockId belongs to, locations"
+ " (nodes, racks) of this block, and other diagnostics info" + " (nodes, racks) of this block, and other diagnostics info"
+ " (under replicated, corrupted or not, etc)\n\n" + " (under replicated, corrupted or not, etc)\n\n"
@ -283,6 +284,8 @@ else if (args[idx].equals("-replicaDetails")) {
doListCorruptFileBlocks = true; doListCorruptFileBlocks = true;
} else if (args[idx].equals("-includeSnapshots")) { } else if (args[idx].equals("-includeSnapshots")) {
url.append("&includeSnapshots=1"); url.append("&includeSnapshots=1");
} else if (args[idx].equals("-maintenance")) {
url.append("&maintenance=1");
} else if (args[idx].equals("-blockId")) { } else if (args[idx].equals("-blockId")) {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
idx++; idx++;
@ -369,6 +372,10 @@ else if (args[idx].equals("-replicaDetails")) {
errCode = 2; errCode = 2;
} else if (lastLine.endsWith(NamenodeFsck.DECOMMISSIONING_STATUS)) { } else if (lastLine.endsWith(NamenodeFsck.DECOMMISSIONING_STATUS)) {
errCode = 3; errCode = 3;
} else if (lastLine.endsWith(NamenodeFsck.IN_MAINTENANCE_STATUS)) {
errCode = 4;
} else if (lastLine.endsWith(NamenodeFsck.ENTERING_MAINTENANCE_STATUS)) {
errCode = 5;
} }
return errCode; return errCode;
} }

View File

@ -112,7 +112,7 @@ Usage:
[-move | -delete | -openforwrite] [-move | -delete | -openforwrite]
[-files [-blocks [-locations | -racks | -replicaDetails | -upgradedomains]]] [-files [-blocks [-locations | -racks | -replicaDetails | -upgradedomains]]]
[-includeSnapshots] [-includeSnapshots]
[-storagepolicies] [-blockId <blk_Id>] [-storagepolicies] [-maintenance] [-blockId <blk_Id>]
| COMMAND\_OPTION | Description | | COMMAND\_OPTION | Description |
|:---- |:---- | |:---- |:---- |
@ -129,6 +129,7 @@ Usage:
| `-move` | Move corrupted files to /lost+found. | | `-move` | Move corrupted files to /lost+found. |
| `-openforwrite` | Print out files opened for write. | | `-openforwrite` | Print out files opened for write. |
| `-storagepolicies` | Print out storage policy summary for the blocks. | | `-storagepolicies` | Print out storage policy summary for the blocks. |
| `-maintenance` | Print out maintenance state node details. |
| `-blockId` | Print out information about the block. | | `-blockId` | Print out information about the block. |
Runs the HDFS filesystem checking utility. See [fsck](./HdfsUserGuide.html#fsck) for more info. Runs the HDFS filesystem checking utility. See [fsck](./HdfsUserGuide.html#fsck) for more info.

View File

@ -52,6 +52,7 @@
import java.util.Map; import java.util.Map;
import java.util.Random; import java.util.Random;
import java.util.Set; import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -155,11 +156,11 @@ static String runFsck(Configuration conf, int expectedErrCode,
PrintStream out = new PrintStream(bStream, true); PrintStream out = new PrintStream(bStream, true);
GenericTestUtils.setLogLevel(FSPermissionChecker.LOG, Level.ALL); GenericTestUtils.setLogLevel(FSPermissionChecker.LOG, Level.ALL);
int errCode = ToolRunner.run(new DFSck(conf, out), path); int errCode = ToolRunner.run(new DFSck(conf, out), path);
LOG.info("OUTPUT = " + bStream.toString());
if (checkErrorCode) { if (checkErrorCode) {
assertEquals(expectedErrCode, errCode); assertEquals(expectedErrCode, errCode);
} }
GenericTestUtils.setLogLevel(FSPermissionChecker.LOG, Level.INFO); GenericTestUtils.setLogLevel(FSPermissionChecker.LOG, Level.INFO);
LOG.info("OUTPUT = " + bStream.toString());
return bStream.toString(); return bStream.toString();
} }
@ -787,26 +788,24 @@ public void testUnderMinReplicatedBlock() throws Exception {
assertTrue(outStr.contains("dfs.namenode.replication.min:\t2")); assertTrue(outStr.contains("dfs.namenode.replication.min:\t2"));
} }
@Test(timeout = 60000) @Test(timeout = 90000)
public void testFsckReplicaDetails() throws Exception { public void testFsckReplicaDetails() throws Exception {
final short replFactor = 1; final short replFactor = 1;
short numDn = 1; short numDn = 1;
final long blockSize = 512; final long blockSize = 512;
final long fileSize = 1024; final long fileSize = 1024;
boolean checkDecommissionInProgress = false;
String[] racks = {"/rack1"}; String[] racks = {"/rack1"};
String[] hosts = {"host1"}; String[] hosts = {"host1"};
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1); conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1);
DistributedFileSystem dfs;
cluster = cluster =
new MiniDFSCluster.Builder(conf).numDataNodes(numDn).hosts(hosts) new MiniDFSCluster.Builder(conf).numDataNodes(numDn).hosts(hosts)
.racks(racks).build(); .racks(racks).build();
cluster.waitClusterUp(); cluster.waitClusterUp();
dfs = cluster.getFileSystem(); final DistributedFileSystem dfs = cluster.getFileSystem();
// create files // create files
final String testFile = new String("/testfile"); final String testFile = new String("/testfile");
@ -815,53 +814,132 @@ public void testFsckReplicaDetails() throws Exception {
DFSTestUtil.waitReplication(dfs, path, replFactor); DFSTestUtil.waitReplication(dfs, path, replFactor);
// make sure datanode that has replica is fine before decommission // make sure datanode that has replica is fine before decommission
String fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks", String fsckOut = runFsck(conf, 0, true, testFile, "-files",
"-replicaDetails"); "-maintenance", "-blocks", "-replicaDetails");
assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS)); assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
assertTrue(fsckOut.contains("(LIVE)")); assertTrue(fsckOut.contains("(LIVE)"));
assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
// decommission datanode // decommission datanode
ExtendedBlock eb = DFSTestUtil.getFirstBlock(dfs, path);
FSNamesystem fsn = cluster.getNameNode().getNamesystem(); FSNamesystem fsn = cluster.getNameNode().getNamesystem();
BlockManager bm = fsn.getBlockManager(); BlockManager bm = fsn.getBlockManager();
BlockCollection bc = null; final DatanodeManager dnm = bm.getDatanodeManager();
try { DatanodeDescriptor dnDesc0 = dnm.getDatanode(
fsn.writeLock(); cluster.getDataNodes().get(0).getDatanodeId());
BlockInfo bi = bm.getStoredBlock(eb.getLocalBlock());
bc = fsn.getBlockCollection(bi); bm.getDatanodeManager().getDecomManager().startDecommission(dnDesc0);
} finally { final String dn0Name = dnDesc0.getXferAddr();
fsn.writeUnlock();
}
DatanodeDescriptor dn = bc.getBlocks()[0]
.getDatanode(0);
bm.getDatanodeManager().getDecomManager().startDecommission(dn);
String dnName = dn.getXferAddr();
// check the replica status while decommissioning // check the replica status while decommissioning
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks", fsckOut = runFsck(conf, 0, true, testFile, "-files",
"-replicaDetails"); "-maintenance", "-blocks", "-replicaDetails");
assertTrue(fsckOut.contains("(DECOMMISSIONING)")); assertTrue(fsckOut.contains("(DECOMMISSIONING)"));
assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
// Start 2nd Datanode and wait for decommission to start // Start 2nd DataNode
cluster.startDataNodes(conf, 1, true, null, null, null); cluster.startDataNodes(conf, 1, true, null,
DatanodeInfo datanodeInfo = null; new String[] {"/rack2"}, new String[] {"host2"}, null, false);
do {
Thread.sleep(2000); // Wait for decommission to start
for (DatanodeInfo info : dfs.getDataNodeStats()) { final AtomicBoolean checkDecommissionInProgress =
if (dnName.equals(info.getXferAddr())) { new AtomicBoolean(false);
datanodeInfo = info; GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
DatanodeInfo datanodeInfo = null;
try {
for (DatanodeInfo info : dfs.getDataNodeStats()) {
if (dn0Name.equals(info.getXferAddr())) {
datanodeInfo = info;
}
}
if (!checkDecommissionInProgress.get() && datanodeInfo != null
&& datanodeInfo.isDecommissionInProgress()) {
checkDecommissionInProgress.set(true);
}
if (datanodeInfo != null && datanodeInfo.isDecommissioned()) {
return true;
}
} catch (Exception e) {
LOG.warn("Unexpected exception: " + e);
return false;
} }
return false;
} }
if (!checkDecommissionInProgress && datanodeInfo != null }, 500, 30000);
&& datanodeInfo.isDecommissionInProgress()) {
checkDecommissionInProgress = true;
}
} while (datanodeInfo != null && !datanodeInfo.isDecommissioned());
// check the replica status after decommission is done // check the replica status after decommission is done
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks", fsckOut = runFsck(conf, 0, true, testFile, "-files",
"-replicaDetails"); "-maintenance", "-blocks", "-replicaDetails");
assertTrue(fsckOut.contains("(DECOMMISSIONED)")); assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
DatanodeDescriptor dnDesc1 = dnm.getDatanode(
cluster.getDataNodes().get(1).getDatanodeId());
final String dn1Name = dnDesc1.getXferAddr();
bm.getDatanodeManager().getDecomManager().startMaintenance(dnDesc1,
Long.MAX_VALUE);
// check the replica status while entering maintenance
fsckOut = runFsck(conf, 0, true, testFile, "-files",
"-maintenance", "-blocks", "-replicaDetails");
assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
assertTrue(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
// check entering maintenance replicas are printed only when requested
fsckOut = runFsck(conf, 0, true, testFile, "-files",
"-blocks", "-replicaDetails");
assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
// Start 3rd DataNode
cluster.startDataNodes(conf, 1, true, null,
new String[] {"/rack3"}, new String[] {"host3"}, null, false);
// Wait for the 2nd node to reach in maintenance state
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
DatanodeInfo dnInfo = null;
try {
for (DatanodeInfo info : dfs.getDataNodeStats()) {
if (dn1Name.equals(info.getXferAddr())) {
dnInfo = info;
}
}
if (dnInfo != null && dnInfo.isInMaintenance()) {
return true;
}
} catch (Exception e) {
LOG.warn("Unexpected exception: " + e);
return false;
}
return false;
}
}, 500, 30000);
// check the replica status after decommission is done
fsckOut = runFsck(conf, 0, true, testFile, "-files",
"-maintenance", "-blocks", "-replicaDetails");
assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertTrue(fsckOut.contains("(IN MAINTENANCE)"));
// check in maintenance replicas are not printed when not requested
fsckOut = runFsck(conf, 0, true, testFile, "-files",
"-blocks", "-replicaDetails");
assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
assertFalse(fsckOut.contains("(ENTERING MAINTENANCE)"));
assertFalse(fsckOut.contains("(IN MAINTENANCE)"));
} }
/** Test if fsck can return -1 in case of failure. /** Test if fsck can return -1 in case of failure.
@ -1366,6 +1444,124 @@ public void testBlockIdCKDecommission() throws Exception {
assertTrue(fsckOut.contains(NamenodeFsck.DECOMMISSIONED_STATUS)); assertTrue(fsckOut.contains(NamenodeFsck.DECOMMISSIONED_STATUS));
} }
/**
* Test for blockIdCK with datanode maintenance.
*/
@Test (timeout = 90000)
public void testBlockIdCKMaintenance() throws Exception {
final short replFactor = 2;
short numDn = 2;
final long blockSize = 512;
String[] hosts = {"host1", "host2"};
String[] racks = {"/rack1", "/rack2"};
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, replFactor);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY, replFactor);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY,
replFactor);
cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(numDn)
.hosts(hosts)
.racks(racks)
.build();
assertNotNull("Failed Cluster Creation", cluster);
cluster.waitClusterUp();
final DistributedFileSystem dfs = cluster.getFileSystem();
assertNotNull("Failed to get FileSystem", dfs);
DFSTestUtil util = new DFSTestUtil.Builder().
setName(getClass().getSimpleName()).setNumFiles(1).build();
//create files
final String pathString = new String("/testfile");
final Path path = new Path(pathString);
util.createFile(dfs, path, 1024, replFactor, 1000L);
util.waitReplication(dfs, path, replFactor);
StringBuilder sb = new StringBuilder();
for (LocatedBlock lb: util.getAllBlocks(dfs, path)){
sb.append(lb.getBlock().getLocalBlock().getBlockName()+" ");
}
final String[] bIds = sb.toString().split(" ");
//make sure datanode that has replica is fine before maintenance
String outStr = runFsck(conf, 0, true, "/",
"-maintenance", "-blockId", bIds[0]);
System.out.println(outStr);
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
FSNamesystem fsn = cluster.getNameNode().getNamesystem();
BlockManager bm = fsn.getBlockManager();
DatanodeManager dnm = bm.getDatanodeManager();
DatanodeDescriptor dn = dnm.getDatanode(cluster.getDataNodes().get(0)
.getDatanodeId());
bm.getDatanodeManager().getDecomManager().startMaintenance(dn,
Long.MAX_VALUE);
final String dnName = dn.getXferAddr();
//wait for the node to enter maintenance state
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
DatanodeInfo datanodeInfo = null;
try {
for (DatanodeInfo info : dfs.getDataNodeStats()) {
if (dnName.equals(info.getXferAddr())) {
datanodeInfo = info;
}
}
if (datanodeInfo != null && datanodeInfo.isEnteringMaintenance()) {
String fsckOut = runFsck(conf, 5, false, "/",
"-maintenance", "-blockId", bIds[0]);
assertTrue(fsckOut.contains(
NamenodeFsck.ENTERING_MAINTENANCE_STATUS));
return true;
}
} catch (Exception e) {
LOG.warn("Unexpected exception: " + e);
return false;
}
return false;
}
}, 500, 30000);
// Start 3rd DataNode
cluster.startDataNodes(conf, 1, true, null,
new String[] {"/rack3"}, new String[] {"host3"}, null, false);
// Wait for 1st node to reach in maintenance state
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
try {
DatanodeInfo datanodeInfo = null;
for (DatanodeInfo info : dfs.getDataNodeStats()) {
if (dnName.equals(info.getXferAddr())) {
datanodeInfo = info;
}
}
if (datanodeInfo != null && datanodeInfo.isInMaintenance()) {
return true;
}
} catch (Exception e) {
LOG.warn("Unexpected exception: " + e);
return false;
}
return false;
}
}, 500, 30000);
//check in maintenance node
String fsckOut = runFsck(conf, 4, false, "/",
"-maintenance", "-blockId", bIds[0]);
assertTrue(fsckOut.contains(NamenodeFsck.IN_MAINTENANCE_STATUS));
//check in maintenance node are not printed when not requested
fsckOut = runFsck(conf, 4, false, "/", "-blockId", bIds[0]);
assertFalse(fsckOut.contains(NamenodeFsck.IN_MAINTENANCE_STATUS));
}
/** /**
* Test for blockIdCK with block corruption. * Test for blockIdCK with block corruption.
*/ */
@ -1385,14 +1581,13 @@ public void testBlockIdCKCorruption() throws Exception {
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1); conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1);
DistributedFileSystem dfs = null;
cluster = cluster =
new MiniDFSCluster.Builder(conf).numDataNodes(numDn).hosts(hosts) new MiniDFSCluster.Builder(conf).numDataNodes(numDn).hosts(hosts)
.racks(racks).build(); .racks(racks).build();
assertNotNull("Failed Cluster Creation", cluster); assertNotNull("Failed Cluster Creation", cluster);
cluster.waitClusterUp(); cluster.waitClusterUp();
dfs = cluster.getFileSystem(); final DistributedFileSystem dfs = cluster.getFileSystem();
assertNotNull("Failed to get FileSystem", dfs); assertNotNull("Failed to get FileSystem", dfs);
DFSTestUtil util = new DFSTestUtil.Builder(). DFSTestUtil util = new DFSTestUtil.Builder().