From b3e42a1ed56f83e2cc35e58f2ffd02c9ff3821e0 Mon Sep 17 00:00:00 2001 From: Aaron Myers Date: Tue, 13 Nov 2012 01:26:20 +0000 Subject: [PATCH] HDFS-3921. NN will prematurely consider blocks missing when entering active state while still in safe mode. Contributed by Aaron T. Myers. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1408531 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hdfs/server/namenode/FSNamesystem.java | 10 ++++--- .../server/namenode/ha/TestHASafeMode.java | 26 +++++++++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 55324ad8812..a5d03a724e3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -580,6 +580,9 @@ Release 2.0.3-alpha - Unreleased HDFS-4164. fuse_dfs: add -lrt to the compiler command line on Linux. (Colin Patrick McCabe via eli) + HDFS-3921. NN will prematurely consider blocks missing when entering active + state while still in safe mode. (atm) + Release 2.0.2-alpha - 2012-09-07 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index e24603018b7..f3a0d280464 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -660,13 +660,17 @@ public class FSNamesystem implements Namesystem, FSClusterStats, LOG.info("Catching up to latest edits from old active before " + "taking over writer role in edits logs"); editLogTailer.catchupDuringFailover(); - blockManager.setPostponeBlocksFromFuture(false); - LOG.info("Reprocessing replication and invalidation queues"); + blockManager.setPostponeBlocksFromFuture(false); blockManager.getDatanodeManager().markAllDatanodesStale(); blockManager.clearQueues(); blockManager.processAllPendingDNMessages(); - blockManager.processMisReplicatedBlocks(); + + if (!isInSafeMode() || + (isInSafeMode() && safeMode.isPopulatingReplQueues())) { + LOG.info("Reprocessing replication and invalidation queues"); + blockManager.processMisReplicatedBlocks(); + } if (LOG.isDebugEnabled()) { LOG.debug("NameNode metadata after re-processing " + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java index f385ea43a2a..75e410d5a6b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java @@ -630,6 +630,32 @@ public class TestHASafeMode { assertEquals(0L, nn1.getNamesystem().getPendingReplicationBlocks()); } + /** + * Make sure that when we transition to active in safe mode that we don't + * prematurely consider blocks missing just because not all DNs have reported + * yet. + * + * This is a regression test for HDFS-3921. + */ + @Test + public void testNoPopulatingReplQueuesWhenStartingActiveInSafeMode() + throws IOException { + DFSTestUtil.createFile(fs, new Path("/test"), 15*BLOCK_SIZE, (short)3, 1L); + + // Stop the DN so that when the NN restarts not all blocks wil be reported + // and the NN won't leave safe mode. + cluster.stopDataNode(1); + // Restart the namenode but don't wait for it to hear from all DNs (since + // one DN is deliberately shut down.) + cluster.restartNameNode(0, false); + cluster.transitionToActive(0); + + assertTrue(cluster.getNameNode(0).isInSafeMode()); + // We shouldn't yet consider any blocks "missing" since we're in startup + // safemode, i.e. not all DNs may have reported. + assertEquals(0, cluster.getNamesystem(0).getMissingBlocksCount()); + } + /** * Print a big banner in the test log to make debug easier. */