HDFS-10330. Add Corrupt Blocks Information in Metasave output. Contributed by Kuhu Shukla.
(cherry picked from commit 919a1d824a
)
Conflicts:
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
This commit is contained in:
parent
cf3e93ee73
commit
e181092b86
|
@ -96,6 +96,7 @@ import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.util.Daemon;
|
import org.apache.hadoop.util.Daemon;
|
||||||
import org.apache.hadoop.util.ExitUtil;
|
import org.apache.hadoop.util.ExitUtil;
|
||||||
import org.apache.hadoop.util.LightWeightGSet;
|
import org.apache.hadoop.util.LightWeightGSet;
|
||||||
|
import org.apache.hadoop.util.StringUtils;
|
||||||
import org.apache.hadoop.util.Time;
|
import org.apache.hadoop.util.Time;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
@ -566,6 +567,48 @@ public class BlockManager implements BlockStatsMXBean {
|
||||||
// Dump blocks that are waiting to be deleted
|
// Dump blocks that are waiting to be deleted
|
||||||
invalidateBlocks.dump(out);
|
invalidateBlocks.dump(out);
|
||||||
|
|
||||||
|
//Dump corrupt blocks and their storageIDs
|
||||||
|
Set<Block> corruptBlocks = corruptReplicas.getCorruptBlocks();
|
||||||
|
out.println("Corrupt Blocks:");
|
||||||
|
for(Block block : corruptBlocks) {
|
||||||
|
Collection<DatanodeDescriptor> corruptNodes =
|
||||||
|
corruptReplicas.getNodes(block);
|
||||||
|
if (corruptNodes == null) {
|
||||||
|
LOG.warn(block.getBlockId() +
|
||||||
|
" is corrupt but has no associated node.");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
int numNodesToFind = corruptNodes.size();
|
||||||
|
for (DatanodeStorageInfo storage : blocksMap.getStorages(block)) {
|
||||||
|
DatanodeDescriptor node = storage.getDatanodeDescriptor();
|
||||||
|
if (corruptNodes.contains(node)) {
|
||||||
|
String storageId = storage.getStorageID();
|
||||||
|
DatanodeStorageInfo storageInfo = node.getStorageInfo(storageId);
|
||||||
|
State state = (storageInfo == null) ? null : storageInfo.getState();
|
||||||
|
out.println("Block=" + block.getBlockId() + "\tNode=" + node.getName()
|
||||||
|
+ "\tStorageID=" + storageId + "\tStorageState=" + state
|
||||||
|
+ "\tTotalReplicas=" +
|
||||||
|
blocksMap.numNodes(block)
|
||||||
|
+ "\tReason=" + corruptReplicas.getCorruptReason(block, node));
|
||||||
|
numNodesToFind--;
|
||||||
|
if (numNodesToFind == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (numNodesToFind > 0) {
|
||||||
|
String[] corruptNodesList = new String[corruptNodes.size()];
|
||||||
|
int i = 0;
|
||||||
|
for (DatanodeDescriptor d : corruptNodes) {
|
||||||
|
corruptNodesList[i] = d.getHostName();
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
out.println(block.getBlockId() + " corrupt on " +
|
||||||
|
StringUtils.join(",", corruptNodesList) + " but not all nodes are" +
|
||||||
|
"found in its block locations");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Dump all datanodes
|
// Dump all datanodes
|
||||||
getDatanodeManager().datanodeDump(out);
|
getDatanodeManager().datanodeDump(out);
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,8 +20,10 @@ package org.apache.hadoop.hdfs.server.blockmanagement;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
@ -231,6 +233,16 @@ public class CorruptReplicasMap{
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* method to get the set of corrupt blocks in corruptReplicasMap.
|
||||||
|
* @return Set of Block objects
|
||||||
|
*/
|
||||||
|
Set<Block> getCorruptBlocks() {
|
||||||
|
Set<Block> corruptBlocks = new HashSet<Block>();
|
||||||
|
corruptBlocks.addAll(corruptReplicasMap.keySet());
|
||||||
|
return corruptBlocks;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* return the reason about corrupted replica for a given block
|
* return the reason about corrupted replica for a given block
|
||||||
* on a given dn
|
* on a given dn
|
||||||
|
|
|
@ -29,7 +29,13 @@ import static org.mockito.Mockito.reset;
|
||||||
import static org.mockito.Mockito.spy;
|
import static org.mockito.Mockito.spy;
|
||||||
import static org.mockito.Mockito.verify;
|
import static org.mockito.Mockito.verify;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.DataInputStream;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.PrintWriter;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
|
@ -526,6 +532,22 @@ public class TestBlockManager {
|
||||||
return blockInfo;
|
return blockInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private BlockInfo addCorruptBlockOnNodes(long blockId,
|
||||||
|
List<DatanodeDescriptor> nodes) throws IOException {
|
||||||
|
long inodeId = ++mockINodeId;
|
||||||
|
final INodeFile bc = TestINodeFile.createINodeFile(inodeId);
|
||||||
|
|
||||||
|
BlockInfo blockInfo = blockOnNodes(blockId, nodes);
|
||||||
|
blockInfo.setReplication((short) 3);
|
||||||
|
blockInfo.setBlockCollectionId(inodeId);
|
||||||
|
Mockito.doReturn(bc).when(fsn).getBlockCollection(inodeId);
|
||||||
|
bm.blocksMap.addBlockCollection(blockInfo, bc);
|
||||||
|
bm.markBlockReplicasAsCorrupt(blockInfo,
|
||||||
|
blockInfo.getGenerationStamp() + 1, blockInfo.getNumBytes(),
|
||||||
|
new DatanodeStorageInfo[]{nodes.get(0).getStorageInfos()[0]});
|
||||||
|
return blockInfo;
|
||||||
|
}
|
||||||
|
|
||||||
private DatanodeStorageInfo[] scheduleSingleReplication(BlockInfo block) {
|
private DatanodeStorageInfo[] scheduleSingleReplication(BlockInfo block) {
|
||||||
// list for priority 1
|
// list for priority 1
|
||||||
List<BlockInfo> list_p1 = new ArrayList<>();
|
List<BlockInfo> list_p1 = new ArrayList<>();
|
||||||
|
@ -1061,4 +1083,40 @@ public class TestBlockManager {
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMetaSaveCorruptBlocks() throws Exception {
|
||||||
|
List<DatanodeStorageInfo> origStorages = getStorages(0, 1);
|
||||||
|
List<DatanodeDescriptor> origNodes = getNodes(origStorages);
|
||||||
|
addCorruptBlockOnNodes(0, origNodes);
|
||||||
|
File file = new File("test.log");
|
||||||
|
PrintWriter out = new PrintWriter(file);
|
||||||
|
bm.metaSave(out);
|
||||||
|
out.flush();
|
||||||
|
FileInputStream fstream = new FileInputStream(file);
|
||||||
|
DataInputStream in = new DataInputStream(fstream);
|
||||||
|
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
|
||||||
|
try {
|
||||||
|
for(int i =0;i<6;i++) {
|
||||||
|
reader.readLine();
|
||||||
|
}
|
||||||
|
String corruptBlocksLine = reader.readLine();
|
||||||
|
assertEquals("Unexpected text in metasave," +
|
||||||
|
"was expecting corrupt blocks section!", 0,
|
||||||
|
corruptBlocksLine.compareTo("Corrupt Blocks:"));
|
||||||
|
corruptBlocksLine = reader.readLine();
|
||||||
|
String regex = "Block=[0-9]+\\tNode=.*\\tStorageID=.*StorageState.*" +
|
||||||
|
"TotalReplicas=.*Reason=GENSTAMP_MISMATCH";
|
||||||
|
assertTrue("Unexpected corrupt block section in metasave!",
|
||||||
|
corruptBlocksLine.matches(regex));
|
||||||
|
corruptBlocksLine = reader.readLine();
|
||||||
|
regex = "Metasave: Number of datanodes.*";
|
||||||
|
assertTrue("Unexpected corrupt block section in metasave!",
|
||||||
|
corruptBlocksLine.matches(regex));
|
||||||
|
} finally {
|
||||||
|
if (reader != null)
|
||||||
|
reader.close();
|
||||||
|
file.delete();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -169,9 +169,11 @@ public class TestMetaSave {
|
||||||
assertTrue(line.equals("Metasave: Blocks being replicated: 0"));
|
assertTrue(line.equals("Metasave: Blocks being replicated: 0"));
|
||||||
line = reader.readLine();
|
line = reader.readLine();
|
||||||
assertTrue(line.equals("Metasave: Blocks 2 waiting deletion from 1 datanodes."));
|
assertTrue(line.equals("Metasave: Blocks 2 waiting deletion from 1 datanodes."));
|
||||||
//skip 2 lines to reach HDFS-9033 scenario.
|
//skip 2 lines to reach HDFS-9033 scenario.
|
||||||
line = reader.readLine();
|
line = reader.readLine();
|
||||||
line = reader.readLine();
|
line = reader.readLine();
|
||||||
|
// skip 1 line for Corrupt Blocks section.
|
||||||
|
line = reader.readLine();
|
||||||
line = reader.readLine();
|
line = reader.readLine();
|
||||||
assertTrue(line.equals("Metasave: Number of datanodes: 2"));
|
assertTrue(line.equals("Metasave: Number of datanodes: 2"));
|
||||||
line = reader.readLine();
|
line = reader.readLine();
|
||||||
|
|
Loading…
Reference in New Issue