diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index ebcc6aa1b38..6797b51f1b1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1690,6 +1690,9 @@ Release 2.8.0 - UNRELEASED HDFS-9626. TestBlockReplacement#testBlockReplacement fails occasionally. (Xiao Chen via zhz) + HDFS-9493. Test o.a.h.hdfs.server.namenode.TestMetaSave fails in trunk. + (Tony Wu via lei) + Release 2.7.3 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java index 6e7b14e61b5..3e41a3313a6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java @@ -315,4 +315,17 @@ public class BlockManagerTestUtil { Whitebox.setInternalState(bmSafeMode, "extension", Integer.MAX_VALUE); Whitebox.setInternalState(bmSafeMode, "status", BMSafeModeStatus.EXTENSION); } + + /** + * Check if a given Datanode (specified by uuid) is removed. Removed means the + * Datanode is no longer present in HeartbeatManager and NetworkTopology. + * @param nn Namenode + * @param dnUuid Datanode UUID + * @return true if datanode is removed. + */ + public static boolean isDatanodeRemoved(NameNode nn, String dnUuid){ + final DatanodeManager dnm = + nn.getNamesystem().getBlockManager().getDatanodeManager(); + return !dnm.getNetworkTopology().contains(dnm.getDatanode(dnUuid)); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java index 786f83f3fe8..caca2178975 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java @@ -28,19 +28,25 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.Random; +import java.util.concurrent.TimeoutException; +import com.google.common.base.Supplier; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; +import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.junit.AfterClass; -import org.junit.BeforeClass; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; import org.junit.Test; /** @@ -65,8 +71,8 @@ public class TestMetaSave { stm.close(); } - @BeforeClass - public static void setUp() throws IOException { + @Before + public void setUp() throws IOException { // start a cluster Configuration conf = new HdfsConfiguration(); @@ -75,6 +81,7 @@ public class TestMetaSave { conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1000); conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1L); conf.setLong(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 1L); + conf.setLong(DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY, 5L); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build(); cluster.waitActive(); fileSys = cluster.getFileSystem(); @@ -85,15 +92,16 @@ public class TestMetaSave { * Tests metasave */ @Test - public void testMetaSave() throws IOException, InterruptedException { + public void testMetaSave() + throws IOException, InterruptedException, TimeoutException { for (int i = 0; i < 2; i++) { Path file = new Path("/filestatus" + i); createFile(fileSys, file); } - cluster.stopDataNode(1); - // wait for namenode to discover that a datanode is dead - Thread.sleep(15000); + // stop datanode and wait for namenode to discover that a datanode is dead + stopDatanodeAndWait(1); + nnRpc.setReplication("/filestatus0", (short) 4); nnRpc.metaSave("metasave.out.txt"); @@ -126,15 +134,15 @@ public class TestMetaSave { */ @Test public void testMetasaveAfterDelete() - throws IOException, InterruptedException { + throws IOException, InterruptedException, TimeoutException { for (int i = 0; i < 2; i++) { Path file = new Path("/filestatus" + i); createFile(fileSys, file); } - cluster.stopDataNode(1); - // wait for namenode to discover that a datanode is dead - Thread.sleep(15000); + // stop datanode and wait for namenode to discover that a datanode is dead + stopDatanodeAndWait(1); + nnRpc.setReplication("/filestatus0", (short) 4); nnRpc.delete("/filestatus0", true); nnRpc.delete("/filestatus1", true); @@ -211,8 +219,8 @@ public class TestMetaSave { } } - @AfterClass - public static void tearDown() throws IOException { + @After + public void tearDown() throws IOException { if (fileSys != null) fileSys.close(); if (cluster != null) @@ -228,4 +236,27 @@ public class TestMetaSave { private static File getLogFile(String name) { return new File(System.getProperty("hadoop.log.dir"), name); } + + /** + * Stop a DN, notify NN the death of DN and wait for NN to remove the DN. + * + * @param dnIdx Index of the Datanode in MiniDFSCluster + * @throws TimeoutException + * @throws InterruptedException + */ + private void stopDatanodeAndWait(final int dnIdx) + throws TimeoutException, InterruptedException { + final DataNode dnToStop = cluster.getDataNodes().get(dnIdx); + cluster.stopDataNode(dnIdx); + BlockManagerTestUtil.noticeDeadDatanode( + cluster.getNameNode(), dnToStop.getDatanodeId().getXferAddr()); + // wait for namenode to discover that a datanode is dead + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + return BlockManagerTestUtil.isDatanodeRemoved( + cluster.getNameNode(), dnToStop.getDatanodeUuid()); + } + }, 1000, 30000); + } }