diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index c62044b39ed..523875b49bb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -226,6 +226,9 @@ Release 2.0.3-alpha - Unreleased HDFS-4164. fuse_dfs: add -lrt to the compiler command line on Linux. (Colin Patrick McCabe via eli) + HDFS-3921. NN will prematurely consider blocks missing when entering active + state while still in safe mode. (atm) + Release 2.0.2-alpha - 2012-09-07 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 3d8c0dea2ef..d11a1619a6b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -643,13 +643,17 @@ void startActiveServices() throws IOException { LOG.info("Catching up to latest edits from old active before " + "taking over writer role in edits logs"); editLogTailer.catchupDuringFailover(); - blockManager.setPostponeBlocksFromFuture(false); - LOG.info("Reprocessing replication and invalidation queues"); + blockManager.setPostponeBlocksFromFuture(false); blockManager.getDatanodeManager().markAllDatanodesStale(); blockManager.clearQueues(); blockManager.processAllPendingDNMessages(); - blockManager.processMisReplicatedBlocks(); + + if (!isInSafeMode() || + (isInSafeMode() && safeMode.isPopulatingReplQueues())) { + LOG.info("Reprocessing replication and invalidation queues"); + blockManager.processMisReplicatedBlocks(); + } if (LOG.isDebugEnabled()) { LOG.debug("NameNode metadata after re-processing " + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java index ee704462851..772b52598c1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java @@ -630,6 +630,32 @@ public Boolean get() { assertEquals(0L, nn1.getNamesystem().getPendingReplicationBlocks()); } + /** + * Make sure that when we transition to active in safe mode that we don't + * prematurely consider blocks missing just because not all DNs have reported + * yet. + * + * This is a regression test for HDFS-3921. + */ + @Test + public void testNoPopulatingReplQueuesWhenStartingActiveInSafeMode() + throws IOException { + DFSTestUtil.createFile(fs, new Path("/test"), 15*BLOCK_SIZE, (short)3, 1L); + + // Stop the DN so that when the NN restarts not all blocks wil be reported + // and the NN won't leave safe mode. + cluster.stopDataNode(1); + // Restart the namenode but don't wait for it to hear from all DNs (since + // one DN is deliberately shut down.) + cluster.restartNameNode(0, false); + cluster.transitionToActive(0); + + assertTrue(cluster.getNameNode(0).isInSafeMode()); + // We shouldn't yet consider any blocks "missing" since we're in startup + // safemode, i.e. not all DNs may have reported. + assertEquals(0, cluster.getNamesystem(0).getMissingBlocksCount()); + } + /** * Print a big banner in the test log to make debug easier. */