From 228ad84e9c9938aebbc2d8547253bcce06b53c80 Mon Sep 17 00:00:00 2001 From: Colin Patrick Mccabe Date: Tue, 1 Dec 2015 23:21:21 -0800 Subject: [PATCH] HDFS-9429. Tests in TestDFSAdminWithHA intermittently fail with EOFException (Xiao Chen via Colin P. McCabe) (cherry picked from commit 53e3bf7e704c332fb119f55cb92520a51b644bfc) (cherry picked from commit 9b516a2a0501242b27f10a8f3e8551ed85a11320) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../hadoop/hdfs/TestRollingUpgrade.java | 1 + .../hdfs/TestRollingUpgradeRollback.java | 1 + .../hdfs/qjournal/MiniJournalCluster.java | 38 +++++++++++++++++++ .../hdfs/qjournal/MiniQJMHACluster.java | 1 + .../hdfs/qjournal/TestMiniJournalCluster.java | 1 + .../hadoop/hdfs/qjournal/TestNNWithQJM.java | 1 + .../hdfs/qjournal/TestSecureNNWithQJM.java | 1 + .../qjournal/client/TestEpochsAreUnique.java | 1 + .../qjournal/client/TestQJMWithFaults.java | 3 ++ .../client/TestQuorumJournalManager.java | 1 + .../server/TestJournalNodeMXBean.java | 10 +++-- 12 files changed, 58 insertions(+), 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 265c3b014b3..0231f93d357 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1553,6 +1553,9 @@ Release 2.8.0 - UNRELEASED HDFS-6533. TestBPOfferService#testBasicFunctionalitytest fails intermittently. (Wei-Chiu Chuang via Arpit Agarwal) + HDFS-9429. Tests in TestDFSAdminWithHA intermittently fail with + EOFException (Xiao Chen via Colin P. McCabe) + Release 2.7.3 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java index 9241851aa47..bf1b42aafb7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgrade.java @@ -168,6 +168,7 @@ public void testRollingUpgradeWithQJM() throws Exception { final Configuration conf = new HdfsConfiguration(); final MiniJournalCluster mjc = new MiniJournalCluster.Builder(conf).build(); + mjc.waitActive(); setConf(conf, nn1Dir, mjc); { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgradeRollback.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgradeRollback.java index c2e9d7cff2a..6fdf1ac9f32 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgradeRollback.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestRollingUpgradeRollback.java @@ -153,6 +153,7 @@ public void testRollbackWithQJM() throws Exception { try { mjc = new MiniJournalCluster.Builder(conf).numJournalNodes( NUM_JOURNAL_NODES).build(); + mjc.waitActive(); conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, mjc .getQuorumJournalURI(JOURNAL_ID).toString()); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniJournalCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniJournalCluster.java index 202188d2b7c..7b974c3f690 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniJournalCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniJournalCluster.java @@ -17,26 +17,34 @@ */ package org.apache.hadoop.hdfs.qjournal; +import static org.apache.hadoop.hdfs.qjournal.QJMTestUtil.FAKE_NSINFO; +import static org.junit.Assert.fail; + import java.io.File; import java.io.IOException; import java.net.InetSocketAddress; import java.net.URI; import java.net.URISyntaxException; import java.util.List; +import java.util.concurrent.TimeoutException; +import com.google.common.base.Supplier; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager; import org.apache.hadoop.hdfs.qjournal.server.JournalNode; import org.apache.hadoop.net.NetUtils; import com.google.common.base.Joiner; import com.google.common.collect.Lists; +import org.apache.hadoop.test.GenericTestUtils; public class MiniJournalCluster { + public static final String CLUSTER_WAITACTIVE_URI = "waitactive"; public static class Builder { private String baseDir; private int numJournalNodes = 3; @@ -217,4 +225,34 @@ public int getNumNodes() { return nodes.length; } + /** + * Wait until all the journalnodes start. + */ + public void waitActive() throws IOException { + for (int i = 0; i < nodes.length; i++) { + final int index = i; + try { + GenericTestUtils.waitFor(new Supplier() { + // wait until all JN's IPC server is running + @Override public Boolean get() { + try { + QuorumJournalManager qjm = + new QuorumJournalManager(nodes[index].node.getConf(), + getQuorumJournalURI(CLUSTER_WAITACTIVE_URI), FAKE_NSINFO); + qjm.hasSomeData(); + qjm.close(); + } catch (IOException e) { + // Exception from IPC call, likely due to server not ready yet. + return false; + } + return true; + } + }, 50, 3000); + } catch (TimeoutException e) { + fail("Time out while waiting for journal node " + index + " to start."); + } catch (InterruptedException ite) { + LOG.warn("Thread interrupted when waiting for node start", ite); + } + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniQJMHACluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniQJMHACluster.java index ef4c559f172..c642a07caa1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniQJMHACluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniQJMHACluster.java @@ -92,6 +92,7 @@ private MiniQJMHACluster(Builder builder) throws IOException { // start 3 journal nodes journalCluster = new MiniJournalCluster.Builder(conf).format(true) .build(); + journalCluster.waitActive(); URI journalURI = journalCluster.getQuorumJournalURI(NAMESERVICE); // start cluster with 2 NameNodes diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestMiniJournalCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestMiniJournalCluster.java index fbb51e12dc8..cace7c92891 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestMiniJournalCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestMiniJournalCluster.java @@ -36,6 +36,7 @@ public void testStartStop() throws IOException { Configuration conf = new Configuration(); MiniJournalCluster c = new MiniJournalCluster.Builder(conf) .build(); + c.waitActive(); try { URI uri = c.getQuorumJournalURI("myjournal"); String[] addrs = uri.getAuthority().split(";"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestNNWithQJM.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestNNWithQJM.java index 9f3126cda02..1961e3b36ed 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestNNWithQJM.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestNNWithQJM.java @@ -52,6 +52,7 @@ public void resetSystemExit() { @Before public void startJNs() throws Exception { mjc = new MiniJournalCluster.Builder(conf).build(); + mjc.waitActive(); } @After diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestSecureNNWithQJM.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestSecureNNWithQJM.java index f95594a81ef..166f18e5a29 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestSecureNNWithQJM.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestSecureNNWithQJM.java @@ -212,6 +212,7 @@ private void restartNameNode() throws IOException { private void startCluster() throws IOException { mjc = new MiniJournalCluster.Builder(conf) .build(); + mjc.waitActive(); conf.set(DFS_NAMENODE_EDITS_DIR_KEY, mjc.getQuorumJournalURI("myjournal").toString()); cluster = new MiniDFSCluster.Builder(conf) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestEpochsAreUnique.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestEpochsAreUnique.java index bd9cf6f7ea3..d57e0891a85 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestEpochsAreUnique.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestEpochsAreUnique.java @@ -51,6 +51,7 @@ public class TestEpochsAreUnique { public void testSingleThreaded() throws IOException { Configuration conf = new Configuration(); MiniJournalCluster cluster = new MiniJournalCluster.Builder(conf).build(); + cluster.waitActive(); URI uri = cluster.getQuorumJournalURI(JID); QuorumJournalManager qjm = new QuorumJournalManager( conf, uri, FAKE_NSINFO); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java index aac2f49e3b4..b0a9b990502 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java @@ -98,6 +98,7 @@ public class TestQJMWithFaults { private static long determineMaxIpcNumber() throws Exception { Configuration conf = new Configuration(); MiniJournalCluster cluster = new MiniJournalCluster.Builder(conf).build(); + cluster.waitActive(); QuorumJournalManager qjm = null; long ret; try { @@ -146,6 +147,7 @@ public void testRecoverAfterDoubleFailures() throws Exception { MiniJournalCluster cluster = new MiniJournalCluster.Builder(conf) .build(); + cluster.waitActive(); QuorumJournalManager qjm = null; try { qjm = createInjectableQJM(cluster); @@ -218,6 +220,7 @@ public void testRandomized() throws Exception { MiniJournalCluster cluster = new MiniJournalCluster.Builder(conf) .build(); + cluster.waitActive(); // Format the cluster using a non-faulty QJM. QuorumJournalManager qjmForInitialFormat = diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java index ad67debaf19..b9a0924c537 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java @@ -94,6 +94,7 @@ public void setup() throws Exception { cluster = new MiniJournalCluster.Builder(conf) .build(); + cluster.waitActive(); qjm = createSpyingQJM(); spies = qjm.getLoggerSetForTests().getLoggersForTests(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeMXBean.java index 347184870f6..498ef71bf80 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeMXBean.java @@ -19,6 +19,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import java.io.IOException; import java.lang.management.ManagementFactory; @@ -52,6 +53,7 @@ public void setup() throws IOException { // start 1 journal node jCluster = new MiniJournalCluster.Builder(new Configuration()).format(true) .numJournalNodes(NUM_JN).build(); + jCluster.waitActive(); jn = jCluster.getJournalNode(0); } @@ -89,19 +91,19 @@ public void testJournalNodeMXBean() throws Exception { Map infoMap = new HashMap(); infoMap.put("Formatted", "true"); jMap.put(NAMESERVICE, infoMap); + Map infoMap1 = new HashMap<>(); + infoMap1.put("Formatted", "false"); + jMap.put(MiniJournalCluster.CLUSTER_WAITACTIVE_URI, infoMap1); assertEquals(JSON.toString(jMap), journalStatus); // restart journal node without formatting jCluster = new MiniJournalCluster.Builder(new Configuration()).format(false) .numJournalNodes(NUM_JN).build(); + jCluster.waitActive(); jn = jCluster.getJournalNode(0); // re-check journalStatus = (String) mbs.getAttribute(mxbeanName, "JournalsStatus"); assertEquals(jn.getJournalsStatus(), journalStatus); - jMap = new HashMap>(); - infoMap = new HashMap(); - infoMap.put("Formatted", "true"); - jMap.put(NAMESERVICE, infoMap); assertEquals(JSON.toString(jMap), journalStatus); } }