HDFS-10330. Add Corrupt Blocks Information in Metasave output. Contributed by Kuhu Shukla.
(cherry picked from commit e181092b86
)
This commit is contained in:
parent
b39e9efeeb
commit
dd7c9f5b8f
|
@ -97,6 +97,7 @@ import org.apache.hadoop.security.UserGroupInformation;
|
|||
import org.apache.hadoop.util.Daemon;
|
||||
import org.apache.hadoop.util.ExitUtil;
|
||||
import org.apache.hadoop.util.LightWeightGSet;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.util.Time;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
|
@ -557,6 +558,48 @@ public class BlockManager implements BlockStatsMXBean {
|
|||
// Dump blocks that are waiting to be deleted
|
||||
invalidateBlocks.dump(out);
|
||||
|
||||
//Dump corrupt blocks and their storageIDs
|
||||
Set<Block> corruptBlocks = corruptReplicas.getCorruptBlocks();
|
||||
out.println("Corrupt Blocks:");
|
||||
for(Block block : corruptBlocks) {
|
||||
Collection<DatanodeDescriptor> corruptNodes =
|
||||
corruptReplicas.getNodes(block);
|
||||
if (corruptNodes == null) {
|
||||
LOG.warn(block.getBlockId() +
|
||||
" is corrupt but has no associated node.");
|
||||
continue;
|
||||
}
|
||||
int numNodesToFind = corruptNodes.size();
|
||||
for (DatanodeStorageInfo storage : blocksMap.getStorages(block)) {
|
||||
DatanodeDescriptor node = storage.getDatanodeDescriptor();
|
||||
if (corruptNodes.contains(node)) {
|
||||
String storageId = storage.getStorageID();
|
||||
DatanodeStorageInfo storageInfo = node.getStorageInfo(storageId);
|
||||
State state = (storageInfo == null) ? null : storageInfo.getState();
|
||||
out.println("Block=" + block.getBlockId() + "\tNode=" + node.getName()
|
||||
+ "\tStorageID=" + storageId + "\tStorageState=" + state
|
||||
+ "\tTotalReplicas=" +
|
||||
blocksMap.numNodes(block)
|
||||
+ "\tReason=" + corruptReplicas.getCorruptReason(block, node));
|
||||
numNodesToFind--;
|
||||
if (numNodesToFind == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (numNodesToFind > 0) {
|
||||
String[] corruptNodesList = new String[corruptNodes.size()];
|
||||
int i = 0;
|
||||
for (DatanodeDescriptor d : corruptNodes) {
|
||||
corruptNodesList[i] = d.getHostName();
|
||||
i++;
|
||||
}
|
||||
out.println(block.getBlockId() + " corrupt on " +
|
||||
StringUtils.join(",", corruptNodesList) + " but not all nodes are" +
|
||||
"found in its block locations");
|
||||
}
|
||||
}
|
||||
|
||||
// Dump all datanodes
|
||||
getDatanodeManager().datanodeDump(out);
|
||||
}
|
||||
|
|
|
@ -20,8 +20,10 @@ package org.apache.hadoop.hdfs.server.blockmanagement;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
@ -231,6 +233,16 @@ public class CorruptReplicasMap{
|
|||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* method to get the set of corrupt blocks in corruptReplicasMap.
|
||||
* @return Set of Block objects
|
||||
*/
|
||||
Set<Block> getCorruptBlocks() {
|
||||
Set<Block> corruptBlocks = new HashSet<Block>();
|
||||
corruptBlocks.addAll(corruptReplicasMap.keySet());
|
||||
return corruptBlocks;
|
||||
}
|
||||
|
||||
/**
|
||||
* return the reason about corrupted replica for a given block
|
||||
* on a given dn
|
||||
|
|
|
@ -29,7 +29,13 @@ import static org.mockito.Mockito.reset;
|
|||
import static org.mockito.Mockito.spy;
|
||||
import static org.mockito.Mockito.verify;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumSet;
|
||||
|
@ -526,6 +532,22 @@ public class TestBlockManager {
|
|||
return blockInfo;
|
||||
}
|
||||
|
||||
private BlockInfo addCorruptBlockOnNodes(long blockId,
|
||||
List<DatanodeDescriptor> nodes) throws IOException {
|
||||
long inodeId = ++mockINodeId;
|
||||
final INodeFile bc = TestINodeFile.createINodeFile(inodeId);
|
||||
|
||||
BlockInfo blockInfo = blockOnNodes(blockId, nodes);
|
||||
blockInfo.setReplication((short) 3);
|
||||
blockInfo.setBlockCollectionId(inodeId);
|
||||
Mockito.doReturn(bc).when(fsn).getBlockCollection(inodeId);
|
||||
bm.blocksMap.addBlockCollection(blockInfo, bc);
|
||||
bm.markBlockReplicasAsCorrupt(blockInfo,
|
||||
blockInfo.getGenerationStamp() + 1, blockInfo.getNumBytes(),
|
||||
new DatanodeStorageInfo[]{nodes.get(0).getStorageInfos()[0]});
|
||||
return blockInfo;
|
||||
}
|
||||
|
||||
private DatanodeStorageInfo[] scheduleSingleReplication(BlockInfo block) {
|
||||
// list for priority 1
|
||||
List<BlockInfo> list_p1 = new ArrayList<>();
|
||||
|
@ -1061,4 +1083,40 @@ public class TestBlockManager {
|
|||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMetaSaveCorruptBlocks() throws Exception {
|
||||
List<DatanodeStorageInfo> origStorages = getStorages(0, 1);
|
||||
List<DatanodeDescriptor> origNodes = getNodes(origStorages);
|
||||
addCorruptBlockOnNodes(0, origNodes);
|
||||
File file = new File("test.log");
|
||||
PrintWriter out = new PrintWriter(file);
|
||||
bm.metaSave(out);
|
||||
out.flush();
|
||||
FileInputStream fstream = new FileInputStream(file);
|
||||
DataInputStream in = new DataInputStream(fstream);
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
|
||||
try {
|
||||
for(int i =0;i<6;i++) {
|
||||
reader.readLine();
|
||||
}
|
||||
String corruptBlocksLine = reader.readLine();
|
||||
assertEquals("Unexpected text in metasave," +
|
||||
"was expecting corrupt blocks section!", 0,
|
||||
corruptBlocksLine.compareTo("Corrupt Blocks:"));
|
||||
corruptBlocksLine = reader.readLine();
|
||||
String regex = "Block=[0-9]+\\tNode=.*\\tStorageID=.*StorageState.*" +
|
||||
"TotalReplicas=.*Reason=GENSTAMP_MISMATCH";
|
||||
assertTrue("Unexpected corrupt block section in metasave!",
|
||||
corruptBlocksLine.matches(regex));
|
||||
corruptBlocksLine = reader.readLine();
|
||||
regex = "Metasave: Number of datanodes.*";
|
||||
assertTrue("Unexpected corrupt block section in metasave!",
|
||||
corruptBlocksLine.matches(regex));
|
||||
} finally {
|
||||
if (reader != null)
|
||||
reader.close();
|
||||
file.delete();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -169,9 +169,11 @@ public class TestMetaSave {
|
|||
assertTrue(line.equals("Metasave: Blocks being replicated: 0"));
|
||||
line = reader.readLine();
|
||||
assertTrue(line.equals("Metasave: Blocks 2 waiting deletion from 1 datanodes."));
|
||||
//skip 2 lines to reach HDFS-9033 scenario.
|
||||
//skip 2 lines to reach HDFS-9033 scenario.
|
||||
line = reader.readLine();
|
||||
line = reader.readLine();
|
||||
// skip 1 line for Corrupt Blocks section.
|
||||
line = reader.readLine();
|
||||
line = reader.readLine();
|
||||
assertTrue(line.equals("Metasave: Number of datanodes: 2"));
|
||||
line = reader.readLine();
|
||||
|
|
Loading…
Reference in New Issue