From 98a27d110129c7b32455035831480f1c6197260b Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Wed, 5 Aug 2015 16:35:41 -0700 Subject: [PATCH] HDFS-8772. Fix TestStandbyIsHot#testDatanodeRestarts which occasionally fails. Contributed by Walter Su. --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../apache/hadoop/hdfs/MiniDFSCluster.java | 30 ++++++++++++++++++- .../server/namenode/ha/TestStandbyIsHot.java | 2 ++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 051dc8a604e..4e97b6b7ee3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -769,6 +769,9 @@ Release 2.8.0 - UNRELEASED HDFS-8856. Make LeaseManager#countPath O(1). (Arpit Agarwal) + HDFS-8772. Fix TestStandbyIsHot#testDatanodeRestarts which occasionally fails. + (Walter Su via wang) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java index 0a2188609e4..70523216831 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java @@ -61,7 +61,9 @@ import java.util.Map; import java.util.Random; import java.util.Set; +import java.util.concurrent.TimeoutException; +import com.google.common.base.Supplier; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.Multimap; import org.apache.commons.logging.Log; @@ -86,6 +88,7 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; +import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.Util; @@ -114,6 +117,7 @@ import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.ProxyUsers; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.ToolRunner; @@ -2386,7 +2390,31 @@ public void waitActive(int nnIndex) throws IOException { client.close(); } - + + /** Wait until the given namenode gets first block reports from all the datanodes */ + public void waitFirstBRCompleted(int nnIndex, int timeout) throws + IOException, TimeoutException, InterruptedException { + if (namenodes.size() == 0 || getNN(nnIndex) == null || getNN(nnIndex).nameNode == null) { + return; + } + + final FSNamesystem ns = getNamesystem(nnIndex); + final DatanodeManager dm = ns.getBlockManager().getDatanodeManager(); + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + List nodes = dm.getDatanodeListForReport + (DatanodeReportType.LIVE); + for (DatanodeDescriptor node : nodes) { + if (!node.checkBlockReportReceived()) { + return false; + } + } + return true; + } + }, 100, timeout); + } + /** * Wait until the cluster is active and running. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java index 622ed94861e..14c9dc264d8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java @@ -182,6 +182,8 @@ public void testDatanodeRestarts() throws Exception { // Wait for both NNs to re-register the DN. cluster.waitActive(0); cluster.waitActive(1); + cluster.waitFirstBRCompleted(0, 10000); + cluster.waitFirstBRCompleted(1, 10000); BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager()); BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());