From 95a70c0fdfb2a2534a54d01012c49d4ff23117f6 Mon Sep 17 00:00:00 2001 From: Kihwal Lee Date: Fri, 16 Jul 2021 14:23:36 -0500 Subject: [PATCH] HDFS-16127. Improper pipeline close recovery causes a permanent write failure or data loss. Contributed by Kihwal Lee. (cherry picked from commit 47002719f2aa4ff58378d528d38b0f0962a45c25) --- .../java/org/apache/hadoop/hdfs/DataStreamer.java | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java index 110261b0d6b..e1d104e6529 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java @@ -778,7 +778,19 @@ public void run() { // Is this block full? if (one.isLastPacketInBlock()) { // wait for the close packet has been acked - waitForAllAcks(); + try { + waitForAllAcks(); + } catch (IOException ioe) { + // No need to do a close recovery if the last packet was acked. + // i.e. ackQueue is empty. waitForAllAcks() can get an exception + // (e.g. connection reset) while sending a heartbeat packet, + // if the DN sends the final ack and closes the connection. + synchronized (dataQueue) { + if (!ackQueue.isEmpty()) { + throw ioe; + } + } + } if (shouldStop()) { continue; }