diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 64cab7481cd..4963b198897 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -2573,6 +2573,9 @@ Release 2.8.0 - UNRELEASED HDFS-9619. SimulatedFSDataset sometimes can not find blockpool for the correct namenode (Wei-Chiu Chuang via vinayakumarb) + HDFS-9493. Test o.a.h.hdfs.server.namenode.TestMetaSave fails in trunk. + (Tony Wu via lei) + Release 2.7.3 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java index 7ea78c9b891..8a2b3dd3a48 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java @@ -323,4 +323,17 @@ public class BlockManagerTestUtil { Whitebox.setInternalState(bmSafeMode, "extension", Integer.MAX_VALUE); Whitebox.setInternalState(bmSafeMode, "status", BMSafeModeStatus.EXTENSION); } + + /** + * Check if a given Datanode (specified by uuid) is removed. Removed means the + * Datanode is no longer present in HeartbeatManager and NetworkTopology. + * @param nn Namenode + * @param dnUuid Datanode UUID + * @return true if datanode is removed. + */ + public static boolean isDatanodeRemoved(NameNode nn, String dnUuid){ + final DatanodeManager dnm = + nn.getNamesystem().getBlockManager().getDatanodeManager(); + return !dnm.getNetworkTopology().contains(dnm.getDatanode(dnUuid)); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java index f8189876233..690812ff7ad 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java @@ -27,19 +27,24 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; +import java.util.concurrent.TimeoutException; +import com.google.common.base.Supplier; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; +import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.junit.AfterClass; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.After; import org.junit.Assert; -import org.junit.BeforeClass; +import org.junit.Before; import org.junit.Test; /** @@ -53,8 +58,8 @@ public class TestMetaSave { private static FileSystem fileSys = null; private static NamenodeProtocols nnRpc = null; - @BeforeClass - public static void setUp() throws IOException { + @Before + public void setUp() throws IOException { // start a cluster Configuration conf = new HdfsConfiguration(); @@ -63,6 +68,7 @@ public class TestMetaSave { conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1000); conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1L); conf.setLong(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 1L); + conf.setLong(DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY, 5L); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build(); cluster.waitActive(); fileSys = cluster.getFileSystem(); @@ -73,16 +79,17 @@ public class TestMetaSave { * Tests metasave */ @Test - public void testMetaSave() throws IOException, InterruptedException { + public void testMetaSave() + throws IOException, InterruptedException, TimeoutException { for (int i = 0; i < 2; i++) { Path file = new Path("/filestatus" + i); DFSTestUtil.createFile(fileSys, file, 1024, 1024, blockSize, (short) 2, seed); } - cluster.stopDataNode(1); - // wait for namenode to discover that a datanode is dead - Thread.sleep(15000); + // stop datanode and wait for namenode to discover that a datanode is dead + stopDatanodeAndWait(1); + nnRpc.setReplication("/filestatus0", (short) 4); nnRpc.metaSave("metasave.out.txt"); @@ -116,16 +123,16 @@ public class TestMetaSave { */ @Test public void testMetasaveAfterDelete() - throws IOException, InterruptedException { + throws IOException, InterruptedException, TimeoutException { for (int i = 0; i < 2; i++) { Path file = new Path("/filestatus" + i); DFSTestUtil.createFile(fileSys, file, 1024, 1024, blockSize, (short) 2, seed); } - cluster.stopDataNode(1); - // wait for namenode to discover that a datanode is dead - Thread.sleep(15000); + // stop datanode and wait for namenode to discover that a datanode is dead + stopDatanodeAndWait(1); + nnRpc.setReplication("/filestatus0", (short) 4); nnRpc.delete("/filestatus0", true); nnRpc.delete("/filestatus1", true); @@ -202,8 +209,8 @@ public class TestMetaSave { } } - @AfterClass - public static void tearDown() throws IOException { + @After + public void tearDown() throws IOException { if (fileSys != null) fileSys.close(); if (cluster != null) @@ -219,4 +226,27 @@ public class TestMetaSave { private static File getLogFile(String name) { return new File(System.getProperty("hadoop.log.dir"), name); } + + /** + * Stop a DN, notify NN the death of DN and wait for NN to remove the DN. + * + * @param dnIdx Index of the Datanode in MiniDFSCluster + * @throws TimeoutException + * @throws InterruptedException + */ + private void stopDatanodeAndWait(final int dnIdx) + throws TimeoutException, InterruptedException { + final DataNode dnToStop = cluster.getDataNodes().get(dnIdx); + cluster.stopDataNode(dnIdx); + BlockManagerTestUtil.noticeDeadDatanode( + cluster.getNameNode(), dnToStop.getDatanodeId().getXferAddr()); + // wait for namenode to discover that a datanode is dead + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + return BlockManagerTestUtil.isDatanodeRemoved( + cluster.getNameNode(), dnToStop.getDatanodeUuid()); + } + }, 1000, 30000); + } }