diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 6b7f83148e6..258c509d295 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -308,6 +308,8 @@ Release 2.7.0 - UNRELEASED HDFS-7252. small refinement to the use of isInAnEZ in FSNamesystem. (Yi Liu via vinayakumarb) + HDFS-7226. Fix TestDNFencing.testQueueingWithAppend. (Yongjun Zhang via jing9) + Release 2.6.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java index 66f301b224d..75d5b70d5e3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java @@ -415,6 +415,7 @@ public void testQueueingWithAppend() throws Exception { int numQueued = 0; int numDN = cluster.getDataNodes().size(); + // case 1: create file and call hflush after write FSDataOutputStream out = fs.create(TEST_FILE_PATH); try { AppendTestUtil.write(out, 0, 10); @@ -422,29 +423,65 @@ public void testQueueingWithAppend() throws Exception { // Opening the file will report RBW replicas, but will be // queued on the StandbyNode. + // However, the delivery of RBW messages is delayed by HDFS-7217 fix. + // Apply cluster.triggerBlockReports() to trigger the reporting sooner. + // + cluster.triggerBlockReports(); numQueued += numDN; // RBW messages + + // The cluster.triggerBlockReports() call above does a full + // block report that incurs 3 extra RBW messages + numQueued += numDN; // RBW messages } finally { IOUtils.closeStream(out); numQueued += numDN; // blockReceived messages } - + cluster.triggerBlockReports(); numQueued += numDN; - + assertEquals(numQueued, cluster.getNameNode(1).getNamesystem(). + getPendingDataNodeMessageCount()); + + // case 2: append to file and call hflush after write try { out = fs.append(TEST_FILE_PATH); AppendTestUtil.write(out, 10, 10); - // RBW replicas once it's opened for append - numQueued += numDN; - + out.hflush(); + cluster.triggerBlockReports(); + numQueued += numDN * 2; // RBW messages, see comments in case 1 } finally { IOUtils.closeStream(out); numQueued += numDN; // blockReceived } - + assertEquals(numQueued, cluster.getNameNode(1).getNamesystem(). + getPendingDataNodeMessageCount()); + + // case 3: similar to case 2, except no hflush is called. + try { + out = fs.append(TEST_FILE_PATH); + AppendTestUtil.write(out, 20, 10); + } finally { + // The write operation in the try block is buffered, thus no RBW message + // is reported yet until the closeStream call here. When closeStream is + // called, before HDFS-7217 fix, there would be three RBW messages + // (blockReceiving), plus three FINALIZED messages (blockReceived) + // delivered to NN. However, because of HDFS-7217 fix, the reporting of + // RBW messages is postponed. In this case, they are even overwritten + // by the blockReceived messages of the same block when they are waiting + // to be delivered. All this happens within the closeStream() call. + // What's delivered to NN is the three blockReceived messages. See + // BPServiceActor#addPendingReplicationBlockInfo + // + IOUtils.closeStream(out); + numQueued += numDN; // blockReceived + } + cluster.triggerBlockReports(); numQueued += numDN; + LOG.info("Expect " + numQueued + " and got: " + cluster.getNameNode(1).getNamesystem(). + getPendingDataNodeMessageCount()); + assertEquals(numQueued, cluster.getNameNode(1).getNamesystem(). getPendingDataNodeMessageCount()); @@ -458,7 +495,7 @@ public void testQueueingWithAppend() throws Exception { assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks()); assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks()); - AppendTestUtil.check(fs, TEST_FILE_PATH, 20); + AppendTestUtil.check(fs, TEST_FILE_PATH, 30); } /**