HDFS-6231. DFSClient hangs infinitely if using hedged reads and all eligible datanodes die. Contributed by Chris Nauroth.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1586551 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
dc84800184
commit
f8904ad299
|
@ -372,6 +372,9 @@ Release 2.4.1 - UNRELEASED
|
||||||
|
|
||||||
HDFS-6208. DataNode caching can leak file descriptors. (cnauroth)
|
HDFS-6208. DataNode caching can leak file descriptors. (cnauroth)
|
||||||
|
|
||||||
|
HDFS-6231. DFSClient hangs infinitely if using hedged reads and all eligible
|
||||||
|
datanodes die. (cnauroth)
|
||||||
|
|
||||||
Release 2.4.0 - 2014-04-07
|
Release 2.4.0 - 2014-04-07
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -983,12 +983,15 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
|
||||||
return new Callable<ByteBuffer>() {
|
return new Callable<ByteBuffer>() {
|
||||||
@Override
|
@Override
|
||||||
public ByteBuffer call() throws Exception {
|
public ByteBuffer call() throws Exception {
|
||||||
byte[] buf = bb.array();
|
try {
|
||||||
int offset = bb.position();
|
byte[] buf = bb.array();
|
||||||
actualGetFromOneDataNode(datanode, block, start, end, buf, offset,
|
int offset = bb.position();
|
||||||
corruptedBlockMap);
|
actualGetFromOneDataNode(datanode, block, start, end, buf, offset,
|
||||||
latch.countDown();
|
corruptedBlockMap);
|
||||||
return bb;
|
return bb;
|
||||||
|
} finally {
|
||||||
|
latch.countDown();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -1101,7 +1104,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
|
||||||
long end, byte[] buf, int offset,
|
long end, byte[] buf, int offset,
|
||||||
Map<ExtendedBlock, Set<DatanodeInfo>> corruptedBlockMap)
|
Map<ExtendedBlock, Set<DatanodeInfo>> corruptedBlockMap)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
ArrayList<Future<ByteBuffer>> futures = null;
|
ArrayList<Future<ByteBuffer>> futures = new ArrayList<Future<ByteBuffer>>();
|
||||||
ArrayList<DatanodeInfo> ignored = new ArrayList<DatanodeInfo>();
|
ArrayList<DatanodeInfo> ignored = new ArrayList<DatanodeInfo>();
|
||||||
ByteBuffer bb = null;
|
ByteBuffer bb = null;
|
||||||
int len = (int) (end - start + 1);
|
int len = (int) (end - start + 1);
|
||||||
|
@ -1112,7 +1115,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
|
||||||
DNAddrPair chosenNode = null;
|
DNAddrPair chosenNode = null;
|
||||||
Future<ByteBuffer> future = null;
|
Future<ByteBuffer> future = null;
|
||||||
// futures is null if there is no request already executing.
|
// futures is null if there is no request already executing.
|
||||||
if (futures == null) {
|
if (futures.isEmpty()) {
|
||||||
// chooseDataNode is a commitment. If no node, we go to
|
// chooseDataNode is a commitment. If no node, we go to
|
||||||
// the NN to reget block locations. Only go here on first read.
|
// the NN to reget block locations. Only go here on first read.
|
||||||
chosenNode = chooseDataNode(block, ignored);
|
chosenNode = chooseDataNode(block, ignored);
|
||||||
|
@ -1130,7 +1133,6 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
|
||||||
// Ignore this node on next go around.
|
// Ignore this node on next go around.
|
||||||
ignored.add(chosenNode.info);
|
ignored.add(chosenNode.info);
|
||||||
dfsClient.getHedgedReadMetrics().incHedgedReadOps();
|
dfsClient.getHedgedReadMetrics().incHedgedReadOps();
|
||||||
futures = new ArrayList<Future<ByteBuffer>>();
|
|
||||||
futures.add(future);
|
futures.add(future);
|
||||||
continue; // no need to refresh block locations
|
continue; // no need to refresh block locations
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
|
|
|
@ -237,7 +237,7 @@ public class TestPread {
|
||||||
public void testHedgedPreadDFSBasic() throws IOException {
|
public void testHedgedPreadDFSBasic() throws IOException {
|
||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
conf.setInt(DFSConfigKeys.DFS_DFSCLIENT_HEDGED_READ_THREADPOOL_SIZE, 5);
|
conf.setInt(DFSConfigKeys.DFS_DFSCLIENT_HEDGED_READ_THREADPOOL_SIZE, 5);
|
||||||
conf.setLong(DFSConfigKeys.DFS_DFSCLIENT_HEDGED_READ_THRESHOLD_MILLIS, 100);
|
conf.setLong(DFSConfigKeys.DFS_DFSCLIENT_HEDGED_READ_THRESHOLD_MILLIS, 1);
|
||||||
dfsPreadTest(conf, false, true); // normal pread
|
dfsPreadTest(conf, false, true); // normal pread
|
||||||
dfsPreadTest(conf, true, true); // trigger read code path without
|
dfsPreadTest(conf, true, true); // trigger read code path without
|
||||||
// transferTo.
|
// transferTo.
|
||||||
|
@ -273,6 +273,10 @@ public class TestPread {
|
||||||
DistributedFileSystem fileSys = cluster.getFileSystem();
|
DistributedFileSystem fileSys = cluster.getFileSystem();
|
||||||
DFSClient dfsClient = fileSys.getClient();
|
DFSClient dfsClient = fileSys.getClient();
|
||||||
DFSHedgedReadMetrics metrics = dfsClient.getHedgedReadMetrics();
|
DFSHedgedReadMetrics metrics = dfsClient.getHedgedReadMetrics();
|
||||||
|
// Metrics instance is static, so we need to reset counts from prior tests.
|
||||||
|
metrics.hedgedReadOps.set(0);
|
||||||
|
metrics.hedgedReadOpsWin.set(0);
|
||||||
|
metrics.hedgedReadOpsInCurThread.set(0);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Path file1 = new Path("hedgedReadMaxOut.dat");
|
Path file1 = new Path("hedgedReadMaxOut.dat");
|
||||||
|
|
Loading…
Reference in New Issue