HDFS-2795. Standby NN takes a long time to recover from a dead DN starting up. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1232285 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Todd Lipcon 2012-01-17 03:21:08 +00:00
parent 212678f036
commit 0c1450ca5d
5 changed files with 120 additions and 19 deletions

View File

@ -111,3 +111,5 @@ HDFS-2747. Entering safe mode after starting SBN can NPE. (Uma Maheswara Rao G v
HDFS-2772. On transition to active, standby should not swallow ELIE. (atm)
HDFS-2767. ConfiguredFailoverProxyProvider should support NameNodeProtocol. (Uma Maheswara Rao G via todd)
HDFS-2795. Standby NN takes a long time to recover from a dead DN starting up. (todd)

View File

@ -2502,6 +2502,9 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
final int curReplicasDelta, int expectedReplicasDelta) {
namesystem.writeLock();
try {
if (!namesystem.isPopulatingReplQueues()) {
return;
}
NumberReplicas repl = countNodes(block);
int curExpectedReplicas = getReplication(block);
if (isNeededReplication(block, curExpectedReplicas, repl.liveReplicas())) {

View File

@ -24,8 +24,11 @@ import java.util.Iterator;
import java.util.Set;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.util.Daemon;
import org.junit.Assert;
public class BlockManagerTestUtil {
public static void setNodeReplicationLimit(final BlockManager blockManager,
@ -144,4 +147,34 @@ public class BlockManagerTestUtil {
work += bm.computeReplicationWork(Integer.MAX_VALUE);
return work;
}
/**
* Ensure that the given NameNode marks the specified DataNode as
* entirely dead/expired.
* @param nn the NameNode to manipulate
* @param dnName the name of the DataNode
*/
public static void noticeDeadDatanode(NameNode nn, String dnName) {
FSNamesystem namesystem = nn.getNamesystem();
namesystem.writeLock();
try {
DatanodeManager dnm = namesystem.getBlockManager().getDatanodeManager();
HeartbeatManager hbm = dnm.getHeartbeatManager();
DatanodeDescriptor[] dnds = hbm.getDatanodes();
DatanodeDescriptor theDND = null;
for (DatanodeDescriptor dnd : dnds) {
if (dnd.getName().equals(dnName)) {
theDND = dnd;
}
}
Assert.assertNotNull("Could not find DN with name: " + dnName, theDND);
synchronized (hbm) {
theDND.setLastUpdate(0);
hbm.heartbeatCheck();
}
} finally {
namesystem.writeUnlock();
}
}
}

View File

@ -81,15 +81,8 @@ public class TestNodeCount extends TestCase {
DataNodeProperties dnprop = cluster.stopDataNode(datanode.getName());
// make sure that NN detects that the datanode is down
try {
namesystem.writeLock();
synchronized (hm) {
datanode.setLastUpdate(0); // mark it dead
hm.heartbeatCheck();
}
} finally {
namesystem.writeUnlock();
}
BlockManagerTestUtil.noticeDeadDatanode(
cluster.getNameNode(), datanode.getName());
// the block will be replicated
DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
@ -121,16 +114,8 @@ public class TestNodeCount extends TestCase {
// bring down non excessive datanode
dnprop = cluster.stopDataNode(nonExcessDN.getName());
// make sure that NN detects that the datanode is down
try {
namesystem.writeLock();
synchronized(hm) {
nonExcessDN.setLastUpdate(0); // mark it dead
hm.heartbeatCheck();
}
} finally {
namesystem.writeUnlock();
}
BlockManagerTestUtil.noticeDeadDatanode(
cluster.getNameNode(), nonExcessDN.getName());
// The block should be replicated
initializeTimeout(TIMEOUT);

View File

@ -33,13 +33,16 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.AppendTestUtil;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.HAUtil;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
@ -132,6 +135,81 @@ public class TestStandbyIsHot {
}
}
/**
* Regression test for HDFS-2795:
* - Start an HA cluster with a DN.
* - Write several blocks to the FS with replication 1.
* - Shutdown the DN
* - Wait for the NNs to declare the DN dead. All blocks will be under-replicated.
* - Restart the DN.
* In the bug, the standby node would only very slowly notice the blocks returning
* to the cluster.
*/
@Test
public void testDatanodeRestarts() throws Exception {
Configuration conf = new Configuration();
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
// We read from the standby to watch block locations
HAUtil.setAllowStandbyReads(conf, true);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
.nnTopology(MiniDFSNNTopology.simpleHATopology())
.numDataNodes(1)
.build();
try {
NameNode nn0 = cluster.getNameNode(0);
NameNode nn1 = cluster.getNameNode(1);
nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
nn1.getNamesystem().getEditLogTailer().interrupt();
cluster.transitionToActive(0);
// Create 5 blocks.
DFSTestUtil.createFile(cluster.getFileSystem(0),
TEST_FILE_PATH, 5*1024, (short)1, 1L);
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
// Stop the DN.
DataNode dn = cluster.getDataNodes().get(0);
String dnName = dn.getDatanodeId().getName();
DataNodeProperties dnProps = cluster.stopDataNode(0);
// Make sure both NNs register it as dead.
BlockManagerTestUtil.noticeDeadDatanode(nn0, dnName);
BlockManagerTestUtil.noticeDeadDatanode(nn1, dnName);
BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
assertEquals(5, nn0.getNamesystem().getUnderReplicatedBlocks());
// The SBN will not have any blocks in its neededReplication queue
// since the SBN doesn't process replication.
assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
LocatedBlocks locs = nn1.getRpcServer().getBlockLocations(
TEST_FILE, 0, 1);
assertEquals("Standby should have registered that the block has no replicas",
0, locs.get(0).getLocations().length);
cluster.restartDataNode(dnProps);
// Wait for both NNs to re-register the DN.
cluster.waitActive(0);
cluster.waitActive(1);
BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
assertEquals(0, nn0.getNamesystem().getUnderReplicatedBlocks());
assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
locs = nn1.getRpcServer().getBlockLocations(
TEST_FILE, 0, 1);
assertEquals("Standby should have registered that the block has replicas again",
1, locs.get(0).getLocations().length);
} finally {
cluster.shutdown();
}
}
static void waitForBlockLocations(final MiniDFSCluster cluster,
final NameNode nn,
final String path, final int expectedReplicas)