HDFS-11738. Hedged pread takes more time when block moved from initial locations. Contributed by Vinayakumar B.

This commit is contained in:
John Zhuge 2017-08-21 13:44:32 -07:00
parent 736ceab2f5
commit b6bfb2fcb2
3 changed files with 112 additions and 61 deletions

View File

@ -61,4 +61,6 @@ public class DFSClientFaultInjector {
public boolean skipRollingRestartWait() { public boolean skipRollingRestartWait() {
return false; return false;
} }
public void sleepBeforeHedgedGet() {}
} }

View File

@ -830,11 +830,36 @@ public class DFSInputStream extends FSInputStream
private DNAddrPair chooseDataNode(LocatedBlock block, private DNAddrPair chooseDataNode(LocatedBlock block,
Collection<DatanodeInfo> ignoredNodes) throws IOException { Collection<DatanodeInfo> ignoredNodes) throws IOException {
return chooseDataNode(block, ignoredNodes, true);
}
/**
* Choose datanode to read from.
*
* @param block Block to choose datanode addr from
* @param ignoredNodes Ignored nodes inside.
* @param refetchIfRequired Whether to refetch if no nodes to chose
* from.
* @return Returns chosen DNAddrPair; Can be null if refetchIfRequired is
* false.
*/
private DNAddrPair chooseDataNode(LocatedBlock block,
Collection<DatanodeInfo> ignoredNodes, boolean refetchIfRequired)
throws IOException {
while (true) { while (true) {
DNAddrPair result = getBestNodeDNAddrPair(block, ignoredNodes); DNAddrPair result = getBestNodeDNAddrPair(block, ignoredNodes);
if (result != null) { if (result != null) {
return result; return result;
} else if (refetchIfRequired) {
block = refetchLocations(block, ignoredNodes);
} else { } else {
return null;
}
}
}
private LocatedBlock refetchLocations(LocatedBlock block,
Collection<DatanodeInfo> ignoredNodes) throws IOException {
String errMsg = getBestNodeDNAddrPairErrorString(block.getLocations(), String errMsg = getBestNodeDNAddrPairErrorString(block.getLocations(),
deadNodes, ignoredNodes); deadNodes, ignoredNodes);
String blockInfo = block.getBlock() + " file=" + src; String blockInfo = block.getBlock() + " file=" + src;
@ -864,7 +889,8 @@ public class DFSInputStream extends FSInputStream
// will wait 6000ms grace period before retry and the waiting window is // will wait 6000ms grace period before retry and the waiting window is
// expanded to 9000ms. // expanded to 9000ms.
final int timeWindow = dfsClient.getConf().getTimeWindow(); final int timeWindow = dfsClient.getConf().getTimeWindow();
double waitTime = timeWindow * failures + // grace period for the last round of attempt // grace period for the last round of attempt
double waitTime = timeWindow * failures +
// expanding time window for each failure // expanding time window for each failure
timeWindow * (failures + 1) * timeWindow * (failures + 1) *
ThreadLocalRandom.current().nextDouble(); ThreadLocalRandom.current().nextDouble();
@ -880,8 +906,7 @@ public class DFSInputStream extends FSInputStream
openInfo(true); openInfo(true);
block = refreshLocatedBlock(block); block = refreshLocatedBlock(block);
failures++; failures++;
} return block;
}
} }
/** /**
@ -985,6 +1010,7 @@ public class DFSInputStream extends FSInputStream
return new Callable<ByteBuffer>() { return new Callable<ByteBuffer>() {
@Override @Override
public ByteBuffer call() throws Exception { public ByteBuffer call() throws Exception {
DFSClientFaultInjector.get().sleepBeforeHedgedGet();
try (TraceScope ignored = dfsClient.getTracer(). try (TraceScope ignored = dfsClient.getTracer().
newScope("hedgedRead" + hedgedReadId, parentSpanId)) { newScope("hedgedRead" + hedgedReadId, parentSpanId)) {
actualGetFromOneDataNode(datanode, start, end, bb, corruptedBlocks); actualGetFromOneDataNode(datanode, start, end, bb, corruptedBlocks);
@ -1159,20 +1185,22 @@ public class DFSInputStream extends FSInputStream
// We are starting up a 'hedged' read. We have a read already // We are starting up a 'hedged' read. We have a read already
// ongoing. Call getBestNodeDNAddrPair instead of chooseDataNode. // ongoing. Call getBestNodeDNAddrPair instead of chooseDataNode.
// If no nodes to do hedged reads against, pass. // If no nodes to do hedged reads against, pass.
boolean refetch = false;
try { try {
chosenNode = getBestNodeDNAddrPair(block, ignored); chosenNode = chooseDataNode(block, ignored, false);
if (chosenNode == null) { if (chosenNode != null) {
chosenNode = chooseDataNode(block, ignored);
}
// Latest block, if refreshed internally // Latest block, if refreshed internally
block = chosenNode.block; block = chosenNode.block;
bb = ByteBuffer.allocate(len); bb = ByteBuffer.allocate(len);
Callable<ByteBuffer> getFromDataNodeCallable = getFromOneDataNode( Callable<ByteBuffer> getFromDataNodeCallable =
chosenNode, block, start, end, bb, getFromOneDataNode(chosenNode, block, start, end, bb,
corruptedBlocks, hedgedReadId++); corruptedBlocks, hedgedReadId++);
Future<ByteBuffer> oneMoreRequest = hedgedService Future<ByteBuffer> oneMoreRequest =
.submit(getFromDataNodeCallable); hedgedService.submit(getFromDataNodeCallable);
futures.add(oneMoreRequest); futures.add(oneMoreRequest);
} else {
refetch = true;
}
} catch (IOException ioe) { } catch (IOException ioe) {
DFSClient.LOG.debug("Failed getting node for hedged read: {}", DFSClient.LOG.debug("Failed getting node for hedged read: {}",
ioe.getMessage()); ioe.getMessage());
@ -1190,6 +1218,9 @@ public class DFSInputStream extends FSInputStream
} catch (InterruptedException ie) { } catch (InterruptedException ie) {
// Ignore and retry // Ignore and retry
} }
if (refetch) {
refetchLocations(block, ignored);
}
// We got here if exception. Ignore this node on next go around IFF // We got here if exception. Ignore this node on next go around IFF
// we found a chosenNode to hedge read against. // we found a chosenNode to hedge read against.
if (chosenNode != null && chosenNode.info != null) { if (chosenNode != null && chosenNode.info != null) {

View File

@ -626,7 +626,7 @@ public class TestPread {
*/ */
@Test @Test
public void testPreadFailureWithChangedBlockLocations() throws Exception { public void testPreadFailureWithChangedBlockLocations() throws Exception {
doPreadTestWithChangedLocations(); doPreadTestWithChangedLocations(1);
} }
/** /**
@ -639,21 +639,36 @@ public class TestPread {
* 7. Consider next calls to getBlockLocations() always returns DN3 as last * 7. Consider next calls to getBlockLocations() always returns DN3 as last
* location.<br> * location.<br>
*/ */
@Test @Test(timeout = 60000)
public void testPreadHedgedFailureWithChangedBlockLocations() public void testPreadHedgedFailureWithChangedBlockLocations()
throws Exception { throws Exception {
isHedgedRead = true; isHedgedRead = true;
doPreadTestWithChangedLocations(); DFSClientFaultInjector old = DFSClientFaultInjector.get();
try {
DFSClientFaultInjector.set(new DFSClientFaultInjector() {
public void sleepBeforeHedgedGet() {
try {
Thread.sleep(500);
} catch (InterruptedException e) {
}
}
});
doPreadTestWithChangedLocations(2);
} finally {
DFSClientFaultInjector.set(old);
}
} }
private void doPreadTestWithChangedLocations() private void doPreadTestWithChangedLocations(int maxFailures)
throws IOException, TimeoutException, InterruptedException { throws IOException, TimeoutException, InterruptedException {
GenericTestUtils.setLogLevel(DFSClient.LOG, Level.DEBUG); GenericTestUtils.setLogLevel(DFSClient.LOG, Level.DEBUG);
Configuration conf = new HdfsConfiguration(); Configuration conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 2); conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 2);
conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
if (isHedgedRead) { if (isHedgedRead) {
conf.setInt(HdfsClientConfigKeys.HedgedRead.THRESHOLD_MILLIS_KEY, 100);
conf.setInt(HdfsClientConfigKeys.HedgedRead.THREADPOOL_SIZE_KEY, 2); conf.setInt(HdfsClientConfigKeys.HedgedRead.THREADPOOL_SIZE_KEY, 2);
conf.setInt(HdfsClientConfigKeys.Retry.WINDOW_BASE_KEY, 1000);
} }
try (MiniDFSCluster cluster = try (MiniDFSCluster cluster =
new MiniDFSCluster.Builder(conf).numDataNodes(3).build()) { new MiniDFSCluster.Builder(conf).numDataNodes(3).build()) {
@ -747,6 +762,9 @@ public class TestPread {
int n = din.read(0, buf, 0, data.length()); int n = din.read(0, buf, 0, data.length());
assertEquals(data.length(), n); assertEquals(data.length(), n);
assertEquals("Data should be read", data, new String(buf, 0, n)); assertEquals("Data should be read", data, new String(buf, 0, n));
assertTrue("Read should complete with maximum " + maxFailures
+ " failures, but completed with " + din.failures,
din.failures <= maxFailures);
DFSClient.LOG.info("Read completed"); DFSClient.LOG.info("Read completed");
} }
} }