diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index ec52122de71..ff54f489130 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -109,6 +109,7 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.LightWeightGSet; +import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.VersionInfo; @@ -612,6 +613,48 @@ public class BlockManager implements BlockStatsMXBean { // Dump blocks that are waiting to be deleted invalidateBlocks.dump(out); + //Dump corrupt blocks and their storageIDs + Set corruptBlocks = corruptReplicas.getCorruptBlocks(); + out.println("Corrupt Blocks:"); + for(Block block : corruptBlocks) { + Collection corruptNodes = + corruptReplicas.getNodes(block); + if (corruptNodes == null) { + LOG.warn(block.getBlockId() + + " is corrupt but has no associated node."); + continue; + } + int numNodesToFind = corruptNodes.size(); + for (DatanodeStorageInfo storage : blocksMap.getStorages(block)) { + DatanodeDescriptor node = storage.getDatanodeDescriptor(); + if (corruptNodes.contains(node)) { + String storageId = storage.getStorageID(); + DatanodeStorageInfo storageInfo = node.getStorageInfo(storageId); + State state = (storageInfo == null) ? null : storageInfo.getState(); + out.println("Block=" + block.getBlockId() + "\tNode=" + node.getName() + + "\tStorageID=" + storageId + "\tStorageState=" + state + + "\tTotalReplicas=" + + blocksMap.numNodes(block) + + "\tReason=" + corruptReplicas.getCorruptReason(block, node)); + numNodesToFind--; + if (numNodesToFind == 0) { + break; + } + } + } + if (numNodesToFind > 0) { + String[] corruptNodesList = new String[corruptNodes.size()]; + int i = 0; + for (DatanodeDescriptor d : corruptNodes) { + corruptNodesList[i] = d.getHostName(); + i++; + } + out.println(block.getBlockId() + " corrupt on " + + StringUtils.join(",", corruptNodesList) + " but not all nodes are" + + "found in its block locations"); + } + } + // Dump all datanodes getDatanodeManager().datanodeDump(out); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java index bd57ea27049..35468da4dc7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java @@ -20,8 +20,10 @@ package org.apache.hadoop.hdfs.server.blockmanagement; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.Map; +import java.util.Set; import java.util.TreeMap; import org.apache.hadoop.classification.InterfaceAudience; @@ -231,6 +233,16 @@ public class CorruptReplicasMap{ return ret; } + /** + * method to get the set of corrupt blocks in corruptReplicasMap. + * @return Set of Block objects + */ + Set getCorruptBlocks() { + Set corruptBlocks = new HashSet(); + corruptBlocks.addAll(corruptReplicasMap.keySet()); + return corruptBlocks; + } + /** * return the reason about corrupted replica for a given block * on a given dn diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java index bdcc25147a6..3ee08102137 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java @@ -29,7 +29,13 @@ import static org.mockito.Mockito.reset; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.verify; +import java.io.BufferedReader; +import java.io.DataInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStreamReader; import java.io.IOException; +import java.io.PrintWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -527,6 +533,22 @@ public class TestBlockManager { return blockInfo; } + private BlockInfo addCorruptBlockOnNodes(long blockId, + List nodes) throws IOException { + long inodeId = ++mockINodeId; + final INodeFile bc = TestINodeFile.createINodeFile(inodeId); + + BlockInfo blockInfo = blockOnNodes(blockId, nodes); + blockInfo.setReplication((short) 3); + blockInfo.setBlockCollectionId(inodeId); + Mockito.doReturn(bc).when(fsn).getBlockCollection(inodeId); + bm.blocksMap.addBlockCollection(blockInfo, bc); + bm.markBlockReplicasAsCorrupt(blockInfo, blockInfo, + blockInfo.getGenerationStamp() + 1, blockInfo.getNumBytes(), + new DatanodeStorageInfo[]{nodes.get(0).getStorageInfos()[0]}); + return blockInfo; + } + private DatanodeStorageInfo[] scheduleSingleReplication(BlockInfo block) { // list for priority 1 List list_p1 = new ArrayList<>(); @@ -1129,4 +1151,40 @@ public class TestBlockManager { cluster.shutdown(); } } + + @Test + public void testMetaSaveCorruptBlocks() throws Exception { + List origStorages = getStorages(0, 1); + List origNodes = getNodes(origStorages); + addCorruptBlockOnNodes(0, origNodes); + File file = new File("test.log"); + PrintWriter out = new PrintWriter(file); + bm.metaSave(out); + out.flush(); + FileInputStream fstream = new FileInputStream(file); + DataInputStream in = new DataInputStream(fstream); + BufferedReader reader = new BufferedReader(new InputStreamReader(in)); + try { + for(int i =0;i<6;i++) { + reader.readLine(); + } + String corruptBlocksLine = reader.readLine(); + assertEquals("Unexpected text in metasave," + + "was expecting corrupt blocks section!", 0, + corruptBlocksLine.compareTo("Corrupt Blocks:")); + corruptBlocksLine = reader.readLine(); + String regex = "Block=[0-9]+\\tNode=.*\\tStorageID=.*StorageState.*" + + "TotalReplicas=.*Reason=GENSTAMP_MISMATCH"; + assertTrue("Unexpected corrupt block section in metasave!", + corruptBlocksLine.matches(regex)); + corruptBlocksLine = reader.readLine(); + regex = "Metasave: Number of datanodes.*"; + assertTrue("Unexpected corrupt block section in metasave!", + corruptBlocksLine.matches(regex)); + } finally { + if (reader != null) + reader.close(); + file.delete(); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java index c51ca5e4ab6..cd1d966117e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java @@ -159,9 +159,11 @@ public class TestMetaSave { assertTrue(line.equals("Metasave: Blocks being reconstructed: 0")); line = reader.readLine(); assertTrue(line.equals("Metasave: Blocks 2 waiting deletion from 1 datanodes.")); - //skip 2 lines to reach HDFS-9033 scenario. + //skip 2 lines to reach HDFS-9033 scenario. line = reader.readLine(); line = reader.readLine(); + // skip 1 line for Corrupt Blocks section. + line = reader.readLine(); line = reader.readLine(); assertTrue(line.equals("Metasave: Number of datanodes: 2")); line = reader.readLine();