diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index ea209bb1b50..a837c29abd7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -180,6 +180,9 @@ Release 2.4.0 - UNRELEASED HDFS-5942. Fix javadoc in OfflineImageViewer. (Akira Ajisaka via cnauroth) + HDFS-5780. TestRBWBlockInvalidation times out intemittently. (Mit Desai + via kihwal) + BREAKDOWN OF HDFS-5698 SUBTASKS AND RELATED JIRAS HDFS-5717. Save FSImage header in protobuf. (Haohui Mai via jing9) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestRBWBlockInvalidation.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestRBWBlockInvalidation.java index 2e5d70b0965..e909dc9d800 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestRBWBlockInvalidation.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestRBWBlockInvalidation.java @@ -66,7 +66,7 @@ private static NumberReplicas countReplicas(final FSNamesystem namesystem, * datanode, namenode should ask to invalidate that corrupted block and * schedule replication for one more replica for that under replicated block. */ - @Test(timeout=60000) + @Test(timeout=600000) public void testBlockInvalidationWhenRBWReplicaMissedInDN() throws IOException, InterruptedException { // This test cannot pass on Windows due to file locking enforcement. It will @@ -75,7 +75,7 @@ public void testBlockInvalidationWhenRBWReplicaMissedInDN() Configuration conf = new HdfsConfiguration(); conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 2); - conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 100); + conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 300); conf.setLong(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, 1); conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2) @@ -104,23 +104,24 @@ public void testBlockInvalidationWhenRBWReplicaMissedInDN() metaFile.delete()); out.close(); - - // Check datanode has reported the corrupt block. - int corruptReplicas = 0; + + int liveReplicas = 0; while (true) { - if ((corruptReplicas = countReplicas(namesystem, blk).corruptReplicas()) > 0) { + if ((liveReplicas = countReplicas(namesystem, blk).liveReplicas()) < 2) { + // This confirms we have a corrupt replica + LOG.info("Live Replicas after corruption: " + liveReplicas); break; } Thread.sleep(100); } - assertEquals("There should be 1 replica in the corruptReplicasMap", 1, - corruptReplicas); - - // Check the block has got replicated to another datanode. - blk = DFSTestUtil.getFirstBlock(fs, testPath); - int liveReplicas = 0; + assertEquals("There should be less than 2 replicas in the " + + "liveReplicasMap", 1, liveReplicas); + while (true) { - if ((liveReplicas = countReplicas(namesystem, blk).liveReplicas()) > 1) { + if ((liveReplicas = + countReplicas(namesystem, blk).liveReplicas()) > 1) { + //Wait till the live replica count becomes equal to Replication Factor + LOG.info("Live Replicas after Rereplication: " + liveReplicas); break; } Thread.sleep(100); @@ -128,9 +129,9 @@ public void testBlockInvalidationWhenRBWReplicaMissedInDN() assertEquals("There should be two live replicas", 2, liveReplicas); - // sleep for 1 second, so that by this time datanode reports the corrupt + // sleep for 2 seconds, so that by this time datanode reports the corrupt // block after a live replica of block got replicated. - Thread.sleep(1000); + Thread.sleep(2000); // Check that there is no corrupt block in the corruptReplicasMap. assertEquals("There should not be any replica in the corruptReplicasMap",