HDFS-15798. EC: Reconstruct task failed, and It would be XmitsInProgress of DN has negative number. Contributed by huhaiyang

(cherry picked from commit 7ef2875c89)
This commit is contained in:
fermi 2021-02-05 11:01:49 +08:00
parent 35c93ef5f3
commit ae80fc2477
1 changed files with 2 additions and 4 deletions

View File

@ -121,7 +121,6 @@ public final class ErasureCodingWorker {
public void processErasureCodingTasks( public void processErasureCodingTasks(
Collection<BlockECReconstructionInfo> ecTasks) { Collection<BlockECReconstructionInfo> ecTasks) {
for (BlockECReconstructionInfo reconInfo : ecTasks) { for (BlockECReconstructionInfo reconInfo : ecTasks) {
int xmitsSubmitted = 0;
try { try {
StripedReconstructionInfo stripedReconInfo = StripedReconstructionInfo stripedReconInfo =
new StripedReconstructionInfo( new StripedReconstructionInfo(
@ -134,20 +133,19 @@ public final class ErasureCodingWorker {
final StripedBlockReconstructor task = final StripedBlockReconstructor task =
new StripedBlockReconstructor(this, stripedReconInfo); new StripedBlockReconstructor(this, stripedReconInfo);
if (task.hasValidTargets()) { if (task.hasValidTargets()) {
stripedReconstructionPool.submit(task);
// See HDFS-12044. We increase xmitsInProgress even the task is only // See HDFS-12044. We increase xmitsInProgress even the task is only
// enqueued, so that // enqueued, so that
// 1) NN will not send more tasks than what DN can execute and // 1) NN will not send more tasks than what DN can execute and
// 2) DN will not throw away reconstruction tasks, and instead keeps // 2) DN will not throw away reconstruction tasks, and instead keeps
// an unbounded number of tasks in the executor's task queue. // an unbounded number of tasks in the executor's task queue.
xmitsSubmitted = Math.max((int)(task.getXmits() * xmitWeight), 1); int xmitsSubmitted = Math.max((int)(task.getXmits() * xmitWeight), 1);
getDatanode().incrementXmitsInProcess(xmitsSubmitted); getDatanode().incrementXmitsInProcess(xmitsSubmitted);
stripedReconstructionPool.submit(task);
} else { } else {
LOG.warn("No missing internal block. Skip reconstruction for task:{}", LOG.warn("No missing internal block. Skip reconstruction for task:{}",
reconInfo); reconInfo);
} }
} catch (Throwable e) { } catch (Throwable e) {
getDatanode().decrementXmitsInProgress(xmitsSubmitted);
LOG.warn("Failed to reconstruct striped block {}", LOG.warn("Failed to reconstruct striped block {}",
reconInfo.getExtendedBlock().getLocalBlock(), e); reconInfo.getExtendedBlock().getLocalBlock(), e);
} }