HDFS-16127. Improper pipeline close recovery causes a permanent write failure or data loss. Contributed by Kihwal Lee.

(cherry picked from commit 47002719f2)
This commit is contained in:
Kihwal Lee 2021-07-16 14:23:36 -05:00
parent 14a3e74c5c
commit 95a70c0fdf
1 changed files with 13 additions and 1 deletions

View File

@ -778,7 +778,19 @@ public void run() {
// Is this block full? // Is this block full?
if (one.isLastPacketInBlock()) { if (one.isLastPacketInBlock()) {
// wait for the close packet has been acked // wait for the close packet has been acked
waitForAllAcks(); try {
waitForAllAcks();
} catch (IOException ioe) {
// No need to do a close recovery if the last packet was acked.
// i.e. ackQueue is empty. waitForAllAcks() can get an exception
// (e.g. connection reset) while sending a heartbeat packet,
// if the DN sends the final ack and closes the connection.
synchronized (dataQueue) {
if (!ackQueue.isEmpty()) {
throw ioe;
}
}
}
if (shouldStop()) { if (shouldStop()) {
continue; continue;
} }