From 0c1450ca5d922b5bf713bb8bb17459dc11a97330 Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Tue, 17 Jan 2012 03:21:08 +0000 Subject: [PATCH] HDFS-2795. Standby NN takes a long time to recover from a dead DN starting up. Contributed by Todd Lipcon. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1232285 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-hdfs/CHANGES.HDFS-1623.txt | 2 + .../server/blockmanagement/BlockManager.java | 3 + .../blockmanagement/BlockManagerTestUtil.java | 33 ++++++++ .../server/blockmanagement/TestNodeCount.java | 23 +----- .../server/namenode/ha/TestStandbyIsHot.java | 78 +++++++++++++++++++ 5 files changed, 120 insertions(+), 19 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt index e5f5ede8d90..605e7e37d2f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt @@ -111,3 +111,5 @@ HDFS-2747. Entering safe mode after starting SBN can NPE. (Uma Maheswara Rao G v HDFS-2772. On transition to active, standby should not swallow ELIE. (atm) HDFS-2767. ConfiguredFailoverProxyProvider should support NameNodeProtocol. (Uma Maheswara Rao G via todd) + +HDFS-2795. Standby NN takes a long time to recover from a dead DN starting up. (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index ce01502972b..551bcd13012 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -2502,6 +2502,9 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block final int curReplicasDelta, int expectedReplicasDelta) { namesystem.writeLock(); try { + if (!namesystem.isPopulatingReplQueues()) { + return; + } NumberReplicas repl = countNodes(block); int curExpectedReplicas = getReplication(block); if (isNeededReplication(block, curExpectedReplicas, repl.liveReplicas())) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java index 38de3deba81..66c10ceb253 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java @@ -24,8 +24,11 @@ import java.util.Iterator; import java.util.Set; import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; +import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.util.Daemon; +import org.junit.Assert; public class BlockManagerTestUtil { public static void setNodeReplicationLimit(final BlockManager blockManager, @@ -144,4 +147,34 @@ public class BlockManagerTestUtil { work += bm.computeReplicationWork(Integer.MAX_VALUE); return work; } + + /** + * Ensure that the given NameNode marks the specified DataNode as + * entirely dead/expired. + * @param nn the NameNode to manipulate + * @param dnName the name of the DataNode + */ + public static void noticeDeadDatanode(NameNode nn, String dnName) { + FSNamesystem namesystem = nn.getNamesystem(); + namesystem.writeLock(); + try { + DatanodeManager dnm = namesystem.getBlockManager().getDatanodeManager(); + HeartbeatManager hbm = dnm.getHeartbeatManager(); + DatanodeDescriptor[] dnds = hbm.getDatanodes(); + DatanodeDescriptor theDND = null; + for (DatanodeDescriptor dnd : dnds) { + if (dnd.getName().equals(dnName)) { + theDND = dnd; + } + } + Assert.assertNotNull("Could not find DN with name: " + dnName, theDND); + + synchronized (hbm) { + theDND.setLastUpdate(0); + hbm.heartbeatCheck(); + } + } finally { + namesystem.writeUnlock(); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNodeCount.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNodeCount.java index 986ca13ed1f..d47f1103446 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNodeCount.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNodeCount.java @@ -81,15 +81,8 @@ public class TestNodeCount extends TestCase { DataNodeProperties dnprop = cluster.stopDataNode(datanode.getName()); // make sure that NN detects that the datanode is down - try { - namesystem.writeLock(); - synchronized (hm) { - datanode.setLastUpdate(0); // mark it dead - hm.heartbeatCheck(); - } - } finally { - namesystem.writeUnlock(); - } + BlockManagerTestUtil.noticeDeadDatanode( + cluster.getNameNode(), datanode.getName()); // the block will be replicated DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR); @@ -121,16 +114,8 @@ public class TestNodeCount extends TestCase { // bring down non excessive datanode dnprop = cluster.stopDataNode(nonExcessDN.getName()); // make sure that NN detects that the datanode is down - - try { - namesystem.writeLock(); - synchronized(hm) { - nonExcessDN.setLastUpdate(0); // mark it dead - hm.heartbeatCheck(); - } - } finally { - namesystem.writeUnlock(); - } + BlockManagerTestUtil.noticeDeadDatanode( + cluster.getNameNode(), nonExcessDN.getName()); // The block should be replicated initializeTimeout(TIMEOUT); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java index ff87ebcc6fa..7bb8d814d22 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java @@ -33,13 +33,16 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.AppendTestUtil; +import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.HAUtil; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties; import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; @@ -131,6 +134,81 @@ public class TestStandbyIsHot { cluster.shutdown(); } } + + /** + * Regression test for HDFS-2795: + * - Start an HA cluster with a DN. + * - Write several blocks to the FS with replication 1. + * - Shutdown the DN + * - Wait for the NNs to declare the DN dead. All blocks will be under-replicated. + * - Restart the DN. + * In the bug, the standby node would only very slowly notice the blocks returning + * to the cluster. + */ + @Test + public void testDatanodeRestarts() throws Exception { + Configuration conf = new Configuration(); + conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024); + // We read from the standby to watch block locations + HAUtil.setAllowStandbyReads(conf, true); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(MiniDFSNNTopology.simpleHATopology()) + .numDataNodes(1) + .build(); + try { + NameNode nn0 = cluster.getNameNode(0); + NameNode nn1 = cluster.getNameNode(1); + nn1.getNamesystem().getEditLogTailer().setSleepTime(250); + nn1.getNamesystem().getEditLogTailer().interrupt(); + + cluster.transitionToActive(0); + + // Create 5 blocks. + DFSTestUtil.createFile(cluster.getFileSystem(0), + TEST_FILE_PATH, 5*1024, (short)1, 1L); + + HATestUtil.waitForStandbyToCatchUp(nn0, nn1); + + // Stop the DN. + DataNode dn = cluster.getDataNodes().get(0); + String dnName = dn.getDatanodeId().getName(); + DataNodeProperties dnProps = cluster.stopDataNode(0); + + // Make sure both NNs register it as dead. + BlockManagerTestUtil.noticeDeadDatanode(nn0, dnName); + BlockManagerTestUtil.noticeDeadDatanode(nn1, dnName); + + BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager()); + BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager()); + assertEquals(5, nn0.getNamesystem().getUnderReplicatedBlocks()); + + // The SBN will not have any blocks in its neededReplication queue + // since the SBN doesn't process replication. + assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks()); + + LocatedBlocks locs = nn1.getRpcServer().getBlockLocations( + TEST_FILE, 0, 1); + assertEquals("Standby should have registered that the block has no replicas", + 0, locs.get(0).getLocations().length); + + cluster.restartDataNode(dnProps); + // Wait for both NNs to re-register the DN. + cluster.waitActive(0); + cluster.waitActive(1); + + BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager()); + BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager()); + assertEquals(0, nn0.getNamesystem().getUnderReplicatedBlocks()); + assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks()); + + locs = nn1.getRpcServer().getBlockLocations( + TEST_FILE, 0, 1); + assertEquals("Standby should have registered that the block has replicas again", + 1, locs.get(0).getLocations().length); + } finally { + cluster.shutdown(); + } + } static void waitForBlockLocations(final MiniDFSCluster cluster, final NameNode nn,