HDFS-8772. Fix TestStandbyIsHot#testDatanodeRestarts which occasionally fails. Contributed by Walter Su.

This commit is contained in:
Andrew Wang 2015-08-07 09:53:04 -07:00
parent a0da1ec010
commit 0c4c7b3b62
3 changed files with 35 additions and 1 deletions

View File

@ -426,6 +426,9 @@ Release 2.8.0 - UNRELEASED
HDFS-8856. Make LeaseManager#countPath O(1). (Arpit Agarwal) HDFS-8856. Make LeaseManager#countPath O(1). (Arpit Agarwal)
HDFS-8772. Fix TestStandbyIsHot#testDatanodeRestarts which occasionally fails.
(Walter Su via wang).
OPTIMIZATIONS OPTIMIZATIONS
HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than

View File

@ -62,7 +62,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Random; import java.util.Random;
import java.util.Set; import java.util.Set;
import java.util.concurrent.TimeoutException;
import com.google.common.base.Supplier;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
@ -85,6 +87,7 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.Util; import org.apache.hadoop.hdfs.server.common.Util;
@ -114,6 +117,7 @@ import org.apache.hadoop.net.StaticMapping;
import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.authorize.ProxyUsers; import org.apache.hadoop.security.authorize.ProxyUsers;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.util.ToolRunner;
@ -2295,6 +2299,31 @@ public class MiniDFSCluster {
client.close(); client.close();
} }
/** Wait until the given namenode gets first block reports from all the datanodes */
public void waitFirstBRCompleted(int nnIndex, int timeout) throws
IOException, TimeoutException, InterruptedException {
if (nameNodes.length == 0 || nameNodes[nnIndex] == null
|| nameNodes[nnIndex].nameNode == null) {
return;
}
final FSNamesystem ns = getNamesystem(nnIndex);
final DatanodeManager dm = ns.getBlockManager().getDatanodeManager();
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
List<DatanodeDescriptor> nodes = dm.getDatanodeListForReport
(DatanodeReportType.LIVE);
for (DatanodeDescriptor node : nodes) {
if (!node.checkBlockReportReceived()) {
return false;
}
}
return true;
}
}, 100, timeout);
}
/** /**
* Wait until the cluster is active and running. * Wait until the cluster is active and running.
*/ */

View File

@ -182,6 +182,8 @@ public class TestStandbyIsHot {
// Wait for both NNs to re-register the DN. // Wait for both NNs to re-register the DN.
cluster.waitActive(0); cluster.waitActive(0);
cluster.waitActive(1); cluster.waitActive(1);
cluster.waitFirstBRCompleted(0, 10000);
cluster.waitFirstBRCompleted(1, 10000);
BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager()); BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager()); BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());