HDFS-15210. EC : File write hanged when DN is shutdown by admin command. Contributed by Surendra Singh Lilhore.
(cherry picked from commit db6252b6c3959220c6f985f940e2e731f99d8e30) (cherry picked from commit 7856af2cc65377352db5ebc93a778f373f71d215)
This commit is contained in:
parent
6863a5bb8a
commit
929411acce
@ -282,6 +282,7 @@ private void flipDataBuffers() {
|
|||||||
private ExecutorService flushAllExecutor;
|
private ExecutorService flushAllExecutor;
|
||||||
private CompletionService<Void> flushAllExecutorCompletionService;
|
private CompletionService<Void> flushAllExecutorCompletionService;
|
||||||
private int blockGroupIndex;
|
private int blockGroupIndex;
|
||||||
|
private long datanodeRestartTimeout;
|
||||||
|
|
||||||
/** Construct a new output stream for creating a file. */
|
/** Construct a new output stream for creating a file. */
|
||||||
DFSStripedOutputStream(DFSClient dfsClient, String src, HdfsFileStatus stat,
|
DFSStripedOutputStream(DFSClient dfsClient, String src, HdfsFileStatus stat,
|
||||||
@ -321,6 +322,7 @@ private void flipDataBuffers() {
|
|||||||
streamers.add(streamer);
|
streamers.add(streamer);
|
||||||
}
|
}
|
||||||
currentPackets = new DFSPacket[streamers.size()];
|
currentPackets = new DFSPacket[streamers.size()];
|
||||||
|
datanodeRestartTimeout = dfsClient.getConf().getDatanodeRestartTimeout();
|
||||||
setCurrentStreamer(0);
|
setCurrentStreamer(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -621,6 +623,11 @@ private Set<StripedDataStreamer> markExternalErrorOnStreamers() {
|
|||||||
"streamer: " + streamer);
|
"streamer: " + streamer);
|
||||||
streamer.setExternalError();
|
streamer.setExternalError();
|
||||||
healthySet.add(streamer);
|
healthySet.add(streamer);
|
||||||
|
} else if (!streamer.streamerClosed()
|
||||||
|
&& streamer.getErrorState().hasDatanodeError()
|
||||||
|
&& streamer.getErrorState().doWaitForRestart()) {
|
||||||
|
healthySet.add(streamer);
|
||||||
|
failedStreamers.remove(streamer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return healthySet;
|
return healthySet;
|
||||||
@ -685,6 +692,14 @@ private void checkStreamerFailures(boolean isNeedFlushAllPackets)
|
|||||||
for (int i = 0; i < numAllBlocks; i++) {
|
for (int i = 0; i < numAllBlocks; i++) {
|
||||||
coordinator.offerStreamerUpdateResult(i, newFailed.size() == 0);
|
coordinator.offerStreamerUpdateResult(i, newFailed.size() == 0);
|
||||||
}
|
}
|
||||||
|
//wait for get notify to failed stream
|
||||||
|
if (newFailed.size() != 0) {
|
||||||
|
try {
|
||||||
|
Thread.sleep(datanodeRestartTimeout);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
// Do nothing
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,7 +143,8 @@ protected void setupPipelineInternal(DatanodeInfo[] nodes,
|
|||||||
|
|
||||||
// set up the pipeline again with the remaining nodes. when a striped
|
// set up the pipeline again with the remaining nodes. when a striped
|
||||||
// data streamer comes here, it must be in external error state.
|
// data streamer comes here, it must be in external error state.
|
||||||
assert getErrorState().hasExternalError();
|
assert getErrorState().hasExternalError()
|
||||||
|
|| getErrorState().doWaitForRestart();
|
||||||
success = createBlockOutputStream(nodes, nodeStorageTypes,
|
success = createBlockOutputStream(nodes, nodeStorageTypes,
|
||||||
nodeStorageIDs, newGS, true);
|
nodeStorageIDs, newGS, true);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user