HDFS-2795. Standby NN takes a long time to recover from a dead DN starting up. Contributed by Todd Lipcon.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1232285 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
212678f036
commit
0c1450ca5d
|
@ -111,3 +111,5 @@ HDFS-2747. Entering safe mode after starting SBN can NPE. (Uma Maheswara Rao G v
|
||||||
HDFS-2772. On transition to active, standby should not swallow ELIE. (atm)
|
HDFS-2772. On transition to active, standby should not swallow ELIE. (atm)
|
||||||
|
|
||||||
HDFS-2767. ConfiguredFailoverProxyProvider should support NameNodeProtocol. (Uma Maheswara Rao G via todd)
|
HDFS-2767. ConfiguredFailoverProxyProvider should support NameNodeProtocol. (Uma Maheswara Rao G via todd)
|
||||||
|
|
||||||
|
HDFS-2795. Standby NN takes a long time to recover from a dead DN starting up. (todd)
|
||||||
|
|
|
@ -2502,6 +2502,9 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
||||||
final int curReplicasDelta, int expectedReplicasDelta) {
|
final int curReplicasDelta, int expectedReplicasDelta) {
|
||||||
namesystem.writeLock();
|
namesystem.writeLock();
|
||||||
try {
|
try {
|
||||||
|
if (!namesystem.isPopulatingReplQueues()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
NumberReplicas repl = countNodes(block);
|
NumberReplicas repl = countNodes(block);
|
||||||
int curExpectedReplicas = getReplication(block);
|
int curExpectedReplicas = getReplication(block);
|
||||||
if (isNeededReplication(block, curExpectedReplicas, repl.liveReplicas())) {
|
if (isNeededReplication(block, curExpectedReplicas, repl.liveReplicas())) {
|
||||||
|
|
|
@ -24,8 +24,11 @@ import java.util.Iterator;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.hadoop.hdfs.protocol.Block;
|
import org.apache.hadoop.hdfs.protocol.Block;
|
||||||
|
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||||
import org.apache.hadoop.util.Daemon;
|
import org.apache.hadoop.util.Daemon;
|
||||||
|
import org.junit.Assert;
|
||||||
|
|
||||||
public class BlockManagerTestUtil {
|
public class BlockManagerTestUtil {
|
||||||
public static void setNodeReplicationLimit(final BlockManager blockManager,
|
public static void setNodeReplicationLimit(final BlockManager blockManager,
|
||||||
|
@ -144,4 +147,34 @@ public class BlockManagerTestUtil {
|
||||||
work += bm.computeReplicationWork(Integer.MAX_VALUE);
|
work += bm.computeReplicationWork(Integer.MAX_VALUE);
|
||||||
return work;
|
return work;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure that the given NameNode marks the specified DataNode as
|
||||||
|
* entirely dead/expired.
|
||||||
|
* @param nn the NameNode to manipulate
|
||||||
|
* @param dnName the name of the DataNode
|
||||||
|
*/
|
||||||
|
public static void noticeDeadDatanode(NameNode nn, String dnName) {
|
||||||
|
FSNamesystem namesystem = nn.getNamesystem();
|
||||||
|
namesystem.writeLock();
|
||||||
|
try {
|
||||||
|
DatanodeManager dnm = namesystem.getBlockManager().getDatanodeManager();
|
||||||
|
HeartbeatManager hbm = dnm.getHeartbeatManager();
|
||||||
|
DatanodeDescriptor[] dnds = hbm.getDatanodes();
|
||||||
|
DatanodeDescriptor theDND = null;
|
||||||
|
for (DatanodeDescriptor dnd : dnds) {
|
||||||
|
if (dnd.getName().equals(dnName)) {
|
||||||
|
theDND = dnd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Assert.assertNotNull("Could not find DN with name: " + dnName, theDND);
|
||||||
|
|
||||||
|
synchronized (hbm) {
|
||||||
|
theDND.setLastUpdate(0);
|
||||||
|
hbm.heartbeatCheck();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
namesystem.writeUnlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -81,15 +81,8 @@ public class TestNodeCount extends TestCase {
|
||||||
DataNodeProperties dnprop = cluster.stopDataNode(datanode.getName());
|
DataNodeProperties dnprop = cluster.stopDataNode(datanode.getName());
|
||||||
|
|
||||||
// make sure that NN detects that the datanode is down
|
// make sure that NN detects that the datanode is down
|
||||||
try {
|
BlockManagerTestUtil.noticeDeadDatanode(
|
||||||
namesystem.writeLock();
|
cluster.getNameNode(), datanode.getName());
|
||||||
synchronized (hm) {
|
|
||||||
datanode.setLastUpdate(0); // mark it dead
|
|
||||||
hm.heartbeatCheck();
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
namesystem.writeUnlock();
|
|
||||||
}
|
|
||||||
|
|
||||||
// the block will be replicated
|
// the block will be replicated
|
||||||
DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
|
DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
|
||||||
|
@ -121,16 +114,8 @@ public class TestNodeCount extends TestCase {
|
||||||
// bring down non excessive datanode
|
// bring down non excessive datanode
|
||||||
dnprop = cluster.stopDataNode(nonExcessDN.getName());
|
dnprop = cluster.stopDataNode(nonExcessDN.getName());
|
||||||
// make sure that NN detects that the datanode is down
|
// make sure that NN detects that the datanode is down
|
||||||
|
BlockManagerTestUtil.noticeDeadDatanode(
|
||||||
try {
|
cluster.getNameNode(), nonExcessDN.getName());
|
||||||
namesystem.writeLock();
|
|
||||||
synchronized(hm) {
|
|
||||||
nonExcessDN.setLastUpdate(0); // mark it dead
|
|
||||||
hm.heartbeatCheck();
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
namesystem.writeUnlock();
|
|
||||||
}
|
|
||||||
|
|
||||||
// The block should be replicated
|
// The block should be replicated
|
||||||
initializeTimeout(TIMEOUT);
|
initializeTimeout(TIMEOUT);
|
||||||
|
|
|
@ -33,13 +33,16 @@ import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hdfs.AppendTestUtil;
|
import org.apache.hadoop.hdfs.AppendTestUtil;
|
||||||
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||||
import org.apache.hadoop.hdfs.HAUtil;
|
import org.apache.hadoop.hdfs.HAUtil;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
|
import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
|
import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||||
|
@ -131,6 +134,81 @@ public class TestStandbyIsHot {
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Regression test for HDFS-2795:
|
||||||
|
* - Start an HA cluster with a DN.
|
||||||
|
* - Write several blocks to the FS with replication 1.
|
||||||
|
* - Shutdown the DN
|
||||||
|
* - Wait for the NNs to declare the DN dead. All blocks will be under-replicated.
|
||||||
|
* - Restart the DN.
|
||||||
|
* In the bug, the standby node would only very slowly notice the blocks returning
|
||||||
|
* to the cluster.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDatanodeRestarts() throws Exception {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
|
||||||
|
// We read from the standby to watch block locations
|
||||||
|
HAUtil.setAllowStandbyReads(conf, true);
|
||||||
|
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
|
||||||
|
.nnTopology(MiniDFSNNTopology.simpleHATopology())
|
||||||
|
.numDataNodes(1)
|
||||||
|
.build();
|
||||||
|
try {
|
||||||
|
NameNode nn0 = cluster.getNameNode(0);
|
||||||
|
NameNode nn1 = cluster.getNameNode(1);
|
||||||
|
nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
|
||||||
|
nn1.getNamesystem().getEditLogTailer().interrupt();
|
||||||
|
|
||||||
|
cluster.transitionToActive(0);
|
||||||
|
|
||||||
|
// Create 5 blocks.
|
||||||
|
DFSTestUtil.createFile(cluster.getFileSystem(0),
|
||||||
|
TEST_FILE_PATH, 5*1024, (short)1, 1L);
|
||||||
|
|
||||||
|
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
|
||||||
|
|
||||||
|
// Stop the DN.
|
||||||
|
DataNode dn = cluster.getDataNodes().get(0);
|
||||||
|
String dnName = dn.getDatanodeId().getName();
|
||||||
|
DataNodeProperties dnProps = cluster.stopDataNode(0);
|
||||||
|
|
||||||
|
// Make sure both NNs register it as dead.
|
||||||
|
BlockManagerTestUtil.noticeDeadDatanode(nn0, dnName);
|
||||||
|
BlockManagerTestUtil.noticeDeadDatanode(nn1, dnName);
|
||||||
|
|
||||||
|
BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
|
||||||
|
BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
|
||||||
|
assertEquals(5, nn0.getNamesystem().getUnderReplicatedBlocks());
|
||||||
|
|
||||||
|
// The SBN will not have any blocks in its neededReplication queue
|
||||||
|
// since the SBN doesn't process replication.
|
||||||
|
assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
|
||||||
|
|
||||||
|
LocatedBlocks locs = nn1.getRpcServer().getBlockLocations(
|
||||||
|
TEST_FILE, 0, 1);
|
||||||
|
assertEquals("Standby should have registered that the block has no replicas",
|
||||||
|
0, locs.get(0).getLocations().length);
|
||||||
|
|
||||||
|
cluster.restartDataNode(dnProps);
|
||||||
|
// Wait for both NNs to re-register the DN.
|
||||||
|
cluster.waitActive(0);
|
||||||
|
cluster.waitActive(1);
|
||||||
|
|
||||||
|
BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
|
||||||
|
BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
|
||||||
|
assertEquals(0, nn0.getNamesystem().getUnderReplicatedBlocks());
|
||||||
|
assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
|
||||||
|
|
||||||
|
locs = nn1.getRpcServer().getBlockLocations(
|
||||||
|
TEST_FILE, 0, 1);
|
||||||
|
assertEquals("Standby should have registered that the block has replicas again",
|
||||||
|
1, locs.get(0).getLocations().length);
|
||||||
|
} finally {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void waitForBlockLocations(final MiniDFSCluster cluster,
|
static void waitForBlockLocations(final MiniDFSCluster cluster,
|
||||||
final NameNode nn,
|
final NameNode nn,
|
||||||
|
|
Loading…
Reference in New Issue