HDFS-8920. Erasure Coding: when recovering lost blocks, logs can be too verbose and hurt performance. Contributed by Rui Li
This commit is contained in:
parent
1080c37300
commit
7bff8ca1c8
|
@ -444,3 +444,6 @@
|
||||||
|
|
||||||
HDFS-9091. Erasure Coding: Provide DistributedFilesystem API to
|
HDFS-9091. Erasure Coding: Provide DistributedFilesystem API to
|
||||||
getAllErasureCodingPolicies. (Rakesh R via zhz)
|
getAllErasureCodingPolicies. (Rakesh R via zhz)
|
||||||
|
|
||||||
|
HDFS-8920. Erasure Coding: when recovering lost blocks, logs can be too
|
||||||
|
verbose and hurt performance. (Rui Li via Kai Zheng)
|
|
@ -1057,9 +1057,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (chosenNode == null) {
|
if (chosenNode == null) {
|
||||||
DFSClient.LOG.warn("No live nodes contain block " + block.getBlock() +
|
reportLostBlock(block, ignoredNodes);
|
||||||
" after checking nodes = " + Arrays.toString(nodes) +
|
|
||||||
", ignoredNodes = " + ignoredNodes);
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
final String dnAddr =
|
final String dnAddr =
|
||||||
|
@ -1071,6 +1069,17 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
|
||||||
return new DNAddrPair(chosenNode, targetAddr, storageType);
|
return new DNAddrPair(chosenNode, targetAddr, storageType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Warn the user of a lost block
|
||||||
|
*/
|
||||||
|
protected void reportLostBlock(LocatedBlock lostBlock,
|
||||||
|
Collection<DatanodeInfo> ignoredNodes) {
|
||||||
|
DatanodeInfo[] nodes = lostBlock.getLocations();
|
||||||
|
DFSClient.LOG.warn("No live nodes contain block " + lostBlock.getBlock() +
|
||||||
|
" after checking nodes = " + Arrays.toString(nodes) +
|
||||||
|
", ignoredNodes = " + ignoredNodes);
|
||||||
|
}
|
||||||
|
|
||||||
private static String getBestNodeDNAddrPairErrorString(
|
private static String getBestNodeDNAddrPairErrorString(
|
||||||
DatanodeInfo nodes[], AbstractMap<DatanodeInfo,
|
DatanodeInfo nodes[], AbstractMap<DatanodeInfo,
|
||||||
DatanodeInfo> deadNodes, Collection<DatanodeInfo> ignoredNodes) {
|
DatanodeInfo> deadNodes, Collection<DatanodeInfo> ignoredNodes) {
|
||||||
|
|
|
@ -45,8 +45,11 @@ import java.io.EOFException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InterruptedIOException;
|
import java.io.InterruptedIOException;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -154,6 +157,17 @@ public class DFSStripedInputStream extends DFSInputStream {
|
||||||
private StripeRange curStripeRange;
|
private StripeRange curStripeRange;
|
||||||
private final CompletionService<Void> readingService;
|
private final CompletionService<Void> readingService;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* When warning the user of a lost block in striping mode, we remember the
|
||||||
|
* dead nodes we've logged. All other striping blocks on these nodes can be
|
||||||
|
* considered lost too, and we don't want to log a warning for each of them.
|
||||||
|
* This is to prevent the log from being too verbose. Refer to HDFS-8920.
|
||||||
|
*
|
||||||
|
* To minimize the overhead, we only store the datanodeUuid in this set
|
||||||
|
*/
|
||||||
|
private final Set<String> warnedNodes = Collections.newSetFromMap(
|
||||||
|
new ConcurrentHashMap<String, Boolean>());
|
||||||
|
|
||||||
DFSStripedInputStream(DFSClient dfsClient, String src,
|
DFSStripedInputStream(DFSClient dfsClient, String src,
|
||||||
boolean verifyChecksum, ErasureCodingPolicy ecPolicy,
|
boolean verifyChecksum, ErasureCodingPolicy ecPolicy,
|
||||||
LocatedBlocks locatedBlocks) throws IOException {
|
LocatedBlocks locatedBlocks) throws IOException {
|
||||||
|
@ -527,6 +541,26 @@ public class DFSStripedInputStream extends DFSInputStream {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void reportLostBlock(LocatedBlock lostBlock,
|
||||||
|
Collection<DatanodeInfo> ignoredNodes) {
|
||||||
|
DatanodeInfo[] nodes = lostBlock.getLocations();
|
||||||
|
if (nodes != null && nodes.length > 0) {
|
||||||
|
List<String> dnUUIDs = new ArrayList<>();
|
||||||
|
for (DatanodeInfo node : nodes) {
|
||||||
|
dnUUIDs.add(node.getDatanodeUuid());
|
||||||
|
}
|
||||||
|
if (!warnedNodes.containsAll(dnUUIDs)) {
|
||||||
|
DFSClient.LOG.warn(Arrays.toString(nodes) + " are unavailable and " +
|
||||||
|
"all striping blocks on them are lost. " +
|
||||||
|
"IgnoredNodes = " + ignoredNodes);
|
||||||
|
warnedNodes.addAll(dnUUIDs);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
super.reportLostBlock(lostBlock, ignoredNodes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The reader for reading a complete {@link AlignedStripe}. Note that an
|
* The reader for reading a complete {@link AlignedStripe}. Note that an
|
||||||
* {@link AlignedStripe} may cross multiple stripes with cellSize width.
|
* {@link AlignedStripe} may cross multiple stripes with cellSize width.
|
||||||
|
|
Loading…
Reference in New Issue