diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 3b2d997445f..a2e48241cbc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -2204,6 +2204,9 @@ Release 2.8.0 - UNRELEASED HDFS-9343. Empty caller context considered invalid. (Mingliang Liu via Arpit Agarwal) + HDFS-9329. TestBootstrapStandby#testRateThrottling is flaky because fsimage + size is smaller than IO buffer size. (zhz) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java index fd45816581a..9f0d95bfa94 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java @@ -25,13 +25,16 @@ import java.io.IOException; import java.net.URI; import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; import com.google.common.base.Supplier; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.server.namenode.CheckpointSignature; @@ -109,12 +112,16 @@ public void testSuccessfulBaseCase() throws Exception { "storage directory does not exist or is not accessible", ioe); } + int expectedCheckpointTxId = (int)NameNodeAdapter.getNamesystem(nn0) + .getFSImage().getMostRecentCheckpointTxId(); + int rc = BootstrapStandby.run(new String[] { "-nonInteractive" }, cluster.getConfiguration(index)); assertEquals(0, rc); // Should have copied over the namespace from the active - FSImageTestUtil.assertNNHasCheckpoints(cluster, index, ImmutableList.of(0)); + FSImageTestUtil.assertNNHasCheckpoints(cluster, index, + ImmutableList.of(expectedCheckpointTxId)); } // We should now be able to start the standbys successfully. @@ -221,7 +228,7 @@ public void testOtherNodeNotActive() throws Exception { * {@link DFSConfigKeys#DFS_IMAGE_TRANSFER_BOOTSTRAP_STANDBY_RATE_KEY} * created by HDFS-8808. */ - @Test + @Test(timeout=30000) public void testRateThrottling() throws Exception { cluster.getConfiguration(0).setLong( DFSConfigKeys.DFS_IMAGE_TRANSFER_RATE_KEY, 1); @@ -229,23 +236,46 @@ public void testRateThrottling() throws Exception { cluster.waitActive(); nn0 = cluster.getNameNode(0); cluster.transitionToActive(0); - // Each edit has at least 1 byte. So the lowRate definitely should cause - // a timeout, if enforced. If lowRate is not enforced, any reasonable test - // machine should at least download an image with 5 edits in 5 seconds. - for (int i = 0; i < 5; i++) { + // Any reasonable test machine should be able to transfer 1 byte per MS + // (which is ~1K/s) + final int minXferRatePerMS = 1; + int imageXferBufferSize = DFSUtilClient.getIoFileBufferSize( + new Configuration()); + File imageFile = null; + int dirIdx = 0; + while (imageFile == null || imageFile.length() < imageXferBufferSize) { + for (int i = 0; i < 5; i++) { + cluster.getFileSystem(0).mkdirs(new Path("/foo" + dirIdx++)); + } nn0.getRpcServer().rollEditLog(); + NameNodeAdapter.enterSafeMode(nn0, false); + NameNodeAdapter.saveNamespace(nn0); + NameNodeAdapter.leaveSafeMode(nn0); + imageFile = FSImageTestUtil.findLatestImageFile(FSImageTestUtil + .getFSImage(nn0).getStorage().getStorageDir(0)); } + + final int timeOut = (int)(imageFile.length() / minXferRatePerMS) + 1; // A very low DFS_IMAGE_TRANSFER_RATE_KEY value won't affect bootstrapping + final AtomicBoolean bootStrapped = new AtomicBoolean(false); + new Thread( + new Runnable() { + @Override + public void run() { + try { + testSuccessfulBaseCase(); + bootStrapped.set(true); + } catch (Exception e) { + fail(e.getMessage()); + } + } + } + ).start(); GenericTestUtils.waitFor(new Supplier() { public Boolean get() { - try { - testSuccessfulBaseCase(); - return true; - } catch (Exception e) { - return false; - } + return bootStrapped.get(); } - }, 500, 5000); + }, 50, timeOut); shutdownCluster(); setupCluster(); @@ -257,17 +287,26 @@ public Boolean get() { cluster.transitionToActive(0); // A very low DFS_IMAGE_TRANSFER_BOOTSTRAP_STANDBY_RATE_KEY value should // cause timeout + bootStrapped.set(false); + new Thread( + new Runnable() { + @Override + public void run() { + try { + testSuccessfulBaseCase(); + bootStrapped.set(true); + } catch (Exception e) { + LOG.info(e.getMessage()); + } + } + } + ).start(); try { GenericTestUtils.waitFor(new Supplier() { public Boolean get() { - try { - testSuccessfulBaseCase(); - return true; - } catch (Exception e) { - return false; - } + return bootStrapped.get(); } - }, 500, 5000); + }, 50, timeOut); fail("Did not timeout"); } catch (TimeoutException e) { LOG.info("Encountered expected timeout.");