HDFS-2795. Standby NN takes a long time to recover from a dead DN starting up. Contributed by Todd Lipcon.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1232285 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
212678f036
commit
0c1450ca5d
|
@ -111,3 +111,5 @@ HDFS-2747. Entering safe mode after starting SBN can NPE. (Uma Maheswara Rao G v
|
|||
HDFS-2772. On transition to active, standby should not swallow ELIE. (atm)
|
||||
|
||||
HDFS-2767. ConfiguredFailoverProxyProvider should support NameNodeProtocol. (Uma Maheswara Rao G via todd)
|
||||
|
||||
HDFS-2795. Standby NN takes a long time to recover from a dead DN starting up. (todd)
|
||||
|
|
|
@ -2502,6 +2502,9 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
|||
final int curReplicasDelta, int expectedReplicasDelta) {
|
||||
namesystem.writeLock();
|
||||
try {
|
||||
if (!namesystem.isPopulatingReplQueues()) {
|
||||
return;
|
||||
}
|
||||
NumberReplicas repl = countNodes(block);
|
||||
int curExpectedReplicas = getReplication(block);
|
||||
if (isNeededReplication(block, curExpectedReplicas, repl.liveReplicas())) {
|
||||
|
|
|
@ -24,8 +24,11 @@ import java.util.Iterator;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.hadoop.hdfs.protocol.Block;
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||
import org.apache.hadoop.util.Daemon;
|
||||
import org.junit.Assert;
|
||||
|
||||
public class BlockManagerTestUtil {
|
||||
public static void setNodeReplicationLimit(final BlockManager blockManager,
|
||||
|
@ -144,4 +147,34 @@ public class BlockManagerTestUtil {
|
|||
work += bm.computeReplicationWork(Integer.MAX_VALUE);
|
||||
return work;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure that the given NameNode marks the specified DataNode as
|
||||
* entirely dead/expired.
|
||||
* @param nn the NameNode to manipulate
|
||||
* @param dnName the name of the DataNode
|
||||
*/
|
||||
public static void noticeDeadDatanode(NameNode nn, String dnName) {
|
||||
FSNamesystem namesystem = nn.getNamesystem();
|
||||
namesystem.writeLock();
|
||||
try {
|
||||
DatanodeManager dnm = namesystem.getBlockManager().getDatanodeManager();
|
||||
HeartbeatManager hbm = dnm.getHeartbeatManager();
|
||||
DatanodeDescriptor[] dnds = hbm.getDatanodes();
|
||||
DatanodeDescriptor theDND = null;
|
||||
for (DatanodeDescriptor dnd : dnds) {
|
||||
if (dnd.getName().equals(dnName)) {
|
||||
theDND = dnd;
|
||||
}
|
||||
}
|
||||
Assert.assertNotNull("Could not find DN with name: " + dnName, theDND);
|
||||
|
||||
synchronized (hbm) {
|
||||
theDND.setLastUpdate(0);
|
||||
hbm.heartbeatCheck();
|
||||
}
|
||||
} finally {
|
||||
namesystem.writeUnlock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -81,15 +81,8 @@ public class TestNodeCount extends TestCase {
|
|||
DataNodeProperties dnprop = cluster.stopDataNode(datanode.getName());
|
||||
|
||||
// make sure that NN detects that the datanode is down
|
||||
try {
|
||||
namesystem.writeLock();
|
||||
synchronized (hm) {
|
||||
datanode.setLastUpdate(0); // mark it dead
|
||||
hm.heartbeatCheck();
|
||||
}
|
||||
} finally {
|
||||
namesystem.writeUnlock();
|
||||
}
|
||||
BlockManagerTestUtil.noticeDeadDatanode(
|
||||
cluster.getNameNode(), datanode.getName());
|
||||
|
||||
// the block will be replicated
|
||||
DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
|
||||
|
@ -121,16 +114,8 @@ public class TestNodeCount extends TestCase {
|
|||
// bring down non excessive datanode
|
||||
dnprop = cluster.stopDataNode(nonExcessDN.getName());
|
||||
// make sure that NN detects that the datanode is down
|
||||
|
||||
try {
|
||||
namesystem.writeLock();
|
||||
synchronized(hm) {
|
||||
nonExcessDN.setLastUpdate(0); // mark it dead
|
||||
hm.heartbeatCheck();
|
||||
}
|
||||
} finally {
|
||||
namesystem.writeUnlock();
|
||||
}
|
||||
BlockManagerTestUtil.noticeDeadDatanode(
|
||||
cluster.getNameNode(), nonExcessDN.getName());
|
||||
|
||||
// The block should be replicated
|
||||
initializeTimeout(TIMEOUT);
|
||||
|
|
|
@ -33,13 +33,16 @@ import org.apache.hadoop.conf.Configuration;
|
|||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.AppendTestUtil;
|
||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||
import org.apache.hadoop.hdfs.HAUtil;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
|
||||
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||
|
@ -132,6 +135,81 @@ public class TestStandbyIsHot {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Regression test for HDFS-2795:
|
||||
* - Start an HA cluster with a DN.
|
||||
* - Write several blocks to the FS with replication 1.
|
||||
* - Shutdown the DN
|
||||
* - Wait for the NNs to declare the DN dead. All blocks will be under-replicated.
|
||||
* - Restart the DN.
|
||||
* In the bug, the standby node would only very slowly notice the blocks returning
|
||||
* to the cluster.
|
||||
*/
|
||||
@Test
|
||||
public void testDatanodeRestarts() throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
|
||||
// We read from the standby to watch block locations
|
||||
HAUtil.setAllowStandbyReads(conf, true);
|
||||
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
|
||||
.nnTopology(MiniDFSNNTopology.simpleHATopology())
|
||||
.numDataNodes(1)
|
||||
.build();
|
||||
try {
|
||||
NameNode nn0 = cluster.getNameNode(0);
|
||||
NameNode nn1 = cluster.getNameNode(1);
|
||||
nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
|
||||
nn1.getNamesystem().getEditLogTailer().interrupt();
|
||||
|
||||
cluster.transitionToActive(0);
|
||||
|
||||
// Create 5 blocks.
|
||||
DFSTestUtil.createFile(cluster.getFileSystem(0),
|
||||
TEST_FILE_PATH, 5*1024, (short)1, 1L);
|
||||
|
||||
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
|
||||
|
||||
// Stop the DN.
|
||||
DataNode dn = cluster.getDataNodes().get(0);
|
||||
String dnName = dn.getDatanodeId().getName();
|
||||
DataNodeProperties dnProps = cluster.stopDataNode(0);
|
||||
|
||||
// Make sure both NNs register it as dead.
|
||||
BlockManagerTestUtil.noticeDeadDatanode(nn0, dnName);
|
||||
BlockManagerTestUtil.noticeDeadDatanode(nn1, dnName);
|
||||
|
||||
BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
|
||||
BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
|
||||
assertEquals(5, nn0.getNamesystem().getUnderReplicatedBlocks());
|
||||
|
||||
// The SBN will not have any blocks in its neededReplication queue
|
||||
// since the SBN doesn't process replication.
|
||||
assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
|
||||
|
||||
LocatedBlocks locs = nn1.getRpcServer().getBlockLocations(
|
||||
TEST_FILE, 0, 1);
|
||||
assertEquals("Standby should have registered that the block has no replicas",
|
||||
0, locs.get(0).getLocations().length);
|
||||
|
||||
cluster.restartDataNode(dnProps);
|
||||
// Wait for both NNs to re-register the DN.
|
||||
cluster.waitActive(0);
|
||||
cluster.waitActive(1);
|
||||
|
||||
BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
|
||||
BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
|
||||
assertEquals(0, nn0.getNamesystem().getUnderReplicatedBlocks());
|
||||
assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
|
||||
|
||||
locs = nn1.getRpcServer().getBlockLocations(
|
||||
TEST_FILE, 0, 1);
|
||||
assertEquals("Standby should have registered that the block has replicas again",
|
||||
1, locs.get(0).getLocations().length);
|
||||
} finally {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
static void waitForBlockLocations(final MiniDFSCluster cluster,
|
||||
final NameNode nn,
|
||||
final String path, final int expectedReplicas)
|
||||
|
|
Loading…
Reference in New Issue