HDFS-3921. NN will prematurely consider blocks missing when entering active state while still in safe mode. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1408531 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Aaron Myers 2012-11-13 01:26:20 +00:00
parent 416c7ded54
commit b3e42a1ed5
3 changed files with 36 additions and 3 deletions

View File

@ -580,6 +580,9 @@ Release 2.0.3-alpha - Unreleased
HDFS-4164. fuse_dfs: add -lrt to the compiler command line on Linux.
(Colin Patrick McCabe via eli)
HDFS-3921. NN will prematurely consider blocks missing when entering active
state while still in safe mode. (atm)
Release 2.0.2-alpha - 2012-09-07
INCOMPATIBLE CHANGES

View File

@ -660,13 +660,17 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
LOG.info("Catching up to latest edits from old active before " +
"taking over writer role in edits logs");
editLogTailer.catchupDuringFailover();
blockManager.setPostponeBlocksFromFuture(false);
LOG.info("Reprocessing replication and invalidation queues");
blockManager.setPostponeBlocksFromFuture(false);
blockManager.getDatanodeManager().markAllDatanodesStale();
blockManager.clearQueues();
blockManager.processAllPendingDNMessages();
if (!isInSafeMode() ||
(isInSafeMode() && safeMode.isPopulatingReplQueues())) {
LOG.info("Reprocessing replication and invalidation queues");
blockManager.processMisReplicatedBlocks();
}
if (LOG.isDebugEnabled()) {
LOG.debug("NameNode metadata after re-processing " +

View File

@ -630,6 +630,32 @@ public class TestHASafeMode {
assertEquals(0L, nn1.getNamesystem().getPendingReplicationBlocks());
}
/**
* Make sure that when we transition to active in safe mode that we don't
* prematurely consider blocks missing just because not all DNs have reported
* yet.
*
* This is a regression test for HDFS-3921.
*/
@Test
public void testNoPopulatingReplQueuesWhenStartingActiveInSafeMode()
throws IOException {
DFSTestUtil.createFile(fs, new Path("/test"), 15*BLOCK_SIZE, (short)3, 1L);
// Stop the DN so that when the NN restarts not all blocks wil be reported
// and the NN won't leave safe mode.
cluster.stopDataNode(1);
// Restart the namenode but don't wait for it to hear from all DNs (since
// one DN is deliberately shut down.)
cluster.restartNameNode(0, false);
cluster.transitionToActive(0);
assertTrue(cluster.getNameNode(0).isInSafeMode());
// We shouldn't yet consider any blocks "missing" since we're in startup
// safemode, i.e. not all DNs may have reported.
assertEquals(0, cluster.getNamesystem(0).getMissingBlocksCount());
}
/**
* Print a big banner in the test log to make debug easier.
*/