HDFS-8920. Erasure Coding: when recovering lost blocks, logs can be too verbose and hurt performance. Contributed by Rui Li

This commit is contained in:
Kai Zheng 2015-09-23 14:13:26 +08:00 committed by Zhe Zhang
parent 1080c37300
commit 7bff8ca1c8
3 changed files with 49 additions and 3 deletions

View File

@ -444,3 +444,6 @@
HDFS-9091. Erasure Coding: Provide DistributedFilesystem API to HDFS-9091. Erasure Coding: Provide DistributedFilesystem API to
getAllErasureCodingPolicies. (Rakesh R via zhz) getAllErasureCodingPolicies. (Rakesh R via zhz)
HDFS-8920. Erasure Coding: when recovering lost blocks, logs can be too
verbose and hurt performance. (Rui Li via Kai Zheng)

View File

@ -1057,9 +1057,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
} }
} }
if (chosenNode == null) { if (chosenNode == null) {
DFSClient.LOG.warn("No live nodes contain block " + block.getBlock() + reportLostBlock(block, ignoredNodes);
" after checking nodes = " + Arrays.toString(nodes) +
", ignoredNodes = " + ignoredNodes);
return null; return null;
} }
final String dnAddr = final String dnAddr =
@ -1071,6 +1069,17 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
return new DNAddrPair(chosenNode, targetAddr, storageType); return new DNAddrPair(chosenNode, targetAddr, storageType);
} }
/**
* Warn the user of a lost block
*/
protected void reportLostBlock(LocatedBlock lostBlock,
Collection<DatanodeInfo> ignoredNodes) {
DatanodeInfo[] nodes = lostBlock.getLocations();
DFSClient.LOG.warn("No live nodes contain block " + lostBlock.getBlock() +
" after checking nodes = " + Arrays.toString(nodes) +
", ignoredNodes = " + ignoredNodes);
}
private static String getBestNodeDNAddrPairErrorString( private static String getBestNodeDNAddrPairErrorString(
DatanodeInfo nodes[], AbstractMap<DatanodeInfo, DatanodeInfo nodes[], AbstractMap<DatanodeInfo,
DatanodeInfo> deadNodes, Collection<DatanodeInfo> ignoredNodes) { DatanodeInfo> deadNodes, Collection<DatanodeInfo> ignoredNodes) {

View File

@ -45,8 +45,11 @@ import java.io.EOFException;
import java.io.IOException; import java.io.IOException;
import java.io.InterruptedIOException; import java.io.InterruptedIOException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.Collection; import java.util.Collection;
import java.util.Map; import java.util.Map;
@ -154,6 +157,17 @@ public class DFSStripedInputStream extends DFSInputStream {
private StripeRange curStripeRange; private StripeRange curStripeRange;
private final CompletionService<Void> readingService; private final CompletionService<Void> readingService;
/**
* When warning the user of a lost block in striping mode, we remember the
* dead nodes we've logged. All other striping blocks on these nodes can be
* considered lost too, and we don't want to log a warning for each of them.
* This is to prevent the log from being too verbose. Refer to HDFS-8920.
*
* To minimize the overhead, we only store the datanodeUuid in this set
*/
private final Set<String> warnedNodes = Collections.newSetFromMap(
new ConcurrentHashMap<String, Boolean>());
DFSStripedInputStream(DFSClient dfsClient, String src, DFSStripedInputStream(DFSClient dfsClient, String src,
boolean verifyChecksum, ErasureCodingPolicy ecPolicy, boolean verifyChecksum, ErasureCodingPolicy ecPolicy,
LocatedBlocks locatedBlocks) throws IOException { LocatedBlocks locatedBlocks) throws IOException {
@ -527,6 +541,26 @@ public class DFSStripedInputStream extends DFSInputStream {
} }
} }
@Override
protected void reportLostBlock(LocatedBlock lostBlock,
Collection<DatanodeInfo> ignoredNodes) {
DatanodeInfo[] nodes = lostBlock.getLocations();
if (nodes != null && nodes.length > 0) {
List<String> dnUUIDs = new ArrayList<>();
for (DatanodeInfo node : nodes) {
dnUUIDs.add(node.getDatanodeUuid());
}
if (!warnedNodes.containsAll(dnUUIDs)) {
DFSClient.LOG.warn(Arrays.toString(nodes) + " are unavailable and " +
"all striping blocks on them are lost. " +
"IgnoredNodes = " + ignoredNodes);
warnedNodes.addAll(dnUUIDs);
}
} else {
super.reportLostBlock(lostBlock, ignoredNodes);
}
}
/** /**
* The reader for reading a complete {@link AlignedStripe}. Note that an * The reader for reading a complete {@link AlignedStripe}. Note that an
* {@link AlignedStripe} may cross multiple stripes with cellSize width. * {@link AlignedStripe} may cross multiple stripes with cellSize width.