HDFS-9429. Tests in TestDFSAdminWithHA intermittently fail with EOFException (Xiao Chen via Colin P. McCabe)

(cherry picked from commit 53e3bf7e70)
(cherry picked from commit 9b516a2a05)
This commit is contained in:
Colin Patrick Mccabe 2015-12-01 23:21:21 -08:00
parent 12fb8b4cc5
commit 228ad84e9c
12 changed files with 58 additions and 4 deletions

View File

@ -1553,6 +1553,9 @@ Release 2.8.0 - UNRELEASED
HDFS-6533. TestBPOfferService#testBasicFunctionalitytest fails HDFS-6533. TestBPOfferService#testBasicFunctionalitytest fails
intermittently. (Wei-Chiu Chuang via Arpit Agarwal) intermittently. (Wei-Chiu Chuang via Arpit Agarwal)
HDFS-9429. Tests in TestDFSAdminWithHA intermittently fail with
EOFException (Xiao Chen via Colin P. McCabe)
Release 2.7.3 - UNRELEASED Release 2.7.3 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -168,6 +168,7 @@ public void testRollingUpgradeWithQJM() throws Exception {
final Configuration conf = new HdfsConfiguration(); final Configuration conf = new HdfsConfiguration();
final MiniJournalCluster mjc = new MiniJournalCluster.Builder(conf).build(); final MiniJournalCluster mjc = new MiniJournalCluster.Builder(conf).build();
mjc.waitActive();
setConf(conf, nn1Dir, mjc); setConf(conf, nn1Dir, mjc);
{ {

View File

@ -153,6 +153,7 @@ public void testRollbackWithQJM() throws Exception {
try { try {
mjc = new MiniJournalCluster.Builder(conf).numJournalNodes( mjc = new MiniJournalCluster.Builder(conf).numJournalNodes(
NUM_JOURNAL_NODES).build(); NUM_JOURNAL_NODES).build();
mjc.waitActive();
conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, mjc conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, mjc
.getQuorumJournalURI(JOURNAL_ID).toString()); .getQuorumJournalURI(JOURNAL_ID).toString());
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build(); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build();

View File

@ -17,26 +17,34 @@
*/ */
package org.apache.hadoop.hdfs.qjournal; package org.apache.hadoop.hdfs.qjournal;
import static org.apache.hadoop.hdfs.qjournal.QJMTestUtil.FAKE_NSINFO;
import static org.junit.Assert.fail;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.InetSocketAddress; import java.net.InetSocketAddress;
import java.net.URI; import java.net.URI;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeoutException;
import com.google.common.base.Supplier;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager;
import org.apache.hadoop.hdfs.qjournal.server.JournalNode; import org.apache.hadoop.hdfs.qjournal.server.JournalNode;
import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetUtils;
import com.google.common.base.Joiner; import com.google.common.base.Joiner;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import org.apache.hadoop.test.GenericTestUtils;
public class MiniJournalCluster { public class MiniJournalCluster {
public static final String CLUSTER_WAITACTIVE_URI = "waitactive";
public static class Builder { public static class Builder {
private String baseDir; private String baseDir;
private int numJournalNodes = 3; private int numJournalNodes = 3;
@ -217,4 +225,34 @@ public int getNumNodes() {
return nodes.length; return nodes.length;
} }
/**
* Wait until all the journalnodes start.
*/
public void waitActive() throws IOException {
for (int i = 0; i < nodes.length; i++) {
final int index = i;
try {
GenericTestUtils.waitFor(new Supplier<Boolean>() {
// wait until all JN's IPC server is running
@Override public Boolean get() {
try {
QuorumJournalManager qjm =
new QuorumJournalManager(nodes[index].node.getConf(),
getQuorumJournalURI(CLUSTER_WAITACTIVE_URI), FAKE_NSINFO);
qjm.hasSomeData();
qjm.close();
} catch (IOException e) {
// Exception from IPC call, likely due to server not ready yet.
return false;
}
return true;
}
}, 50, 3000);
} catch (TimeoutException e) {
fail("Time out while waiting for journal node " + index + " to start.");
} catch (InterruptedException ite) {
LOG.warn("Thread interrupted when waiting for node start", ite);
}
}
}
} }

View File

@ -92,6 +92,7 @@ private MiniQJMHACluster(Builder builder) throws IOException {
// start 3 journal nodes // start 3 journal nodes
journalCluster = new MiniJournalCluster.Builder(conf).format(true) journalCluster = new MiniJournalCluster.Builder(conf).format(true)
.build(); .build();
journalCluster.waitActive();
URI journalURI = journalCluster.getQuorumJournalURI(NAMESERVICE); URI journalURI = journalCluster.getQuorumJournalURI(NAMESERVICE);
// start cluster with 2 NameNodes // start cluster with 2 NameNodes

View File

@ -36,6 +36,7 @@ public void testStartStop() throws IOException {
Configuration conf = new Configuration(); Configuration conf = new Configuration();
MiniJournalCluster c = new MiniJournalCluster.Builder(conf) MiniJournalCluster c = new MiniJournalCluster.Builder(conf)
.build(); .build();
c.waitActive();
try { try {
URI uri = c.getQuorumJournalURI("myjournal"); URI uri = c.getQuorumJournalURI("myjournal");
String[] addrs = uri.getAuthority().split(";"); String[] addrs = uri.getAuthority().split(";");

View File

@ -52,6 +52,7 @@ public void resetSystemExit() {
@Before @Before
public void startJNs() throws Exception { public void startJNs() throws Exception {
mjc = new MiniJournalCluster.Builder(conf).build(); mjc = new MiniJournalCluster.Builder(conf).build();
mjc.waitActive();
} }
@After @After

View File

@ -212,6 +212,7 @@ private void restartNameNode() throws IOException {
private void startCluster() throws IOException { private void startCluster() throws IOException {
mjc = new MiniJournalCluster.Builder(conf) mjc = new MiniJournalCluster.Builder(conf)
.build(); .build();
mjc.waitActive();
conf.set(DFS_NAMENODE_EDITS_DIR_KEY, conf.set(DFS_NAMENODE_EDITS_DIR_KEY,
mjc.getQuorumJournalURI("myjournal").toString()); mjc.getQuorumJournalURI("myjournal").toString());
cluster = new MiniDFSCluster.Builder(conf) cluster = new MiniDFSCluster.Builder(conf)

View File

@ -51,6 +51,7 @@ public class TestEpochsAreUnique {
public void testSingleThreaded() throws IOException { public void testSingleThreaded() throws IOException {
Configuration conf = new Configuration(); Configuration conf = new Configuration();
MiniJournalCluster cluster = new MiniJournalCluster.Builder(conf).build(); MiniJournalCluster cluster = new MiniJournalCluster.Builder(conf).build();
cluster.waitActive();
URI uri = cluster.getQuorumJournalURI(JID); URI uri = cluster.getQuorumJournalURI(JID);
QuorumJournalManager qjm = new QuorumJournalManager( QuorumJournalManager qjm = new QuorumJournalManager(
conf, uri, FAKE_NSINFO); conf, uri, FAKE_NSINFO);

View File

@ -98,6 +98,7 @@ public class TestQJMWithFaults {
private static long determineMaxIpcNumber() throws Exception { private static long determineMaxIpcNumber() throws Exception {
Configuration conf = new Configuration(); Configuration conf = new Configuration();
MiniJournalCluster cluster = new MiniJournalCluster.Builder(conf).build(); MiniJournalCluster cluster = new MiniJournalCluster.Builder(conf).build();
cluster.waitActive();
QuorumJournalManager qjm = null; QuorumJournalManager qjm = null;
long ret; long ret;
try { try {
@ -146,6 +147,7 @@ public void testRecoverAfterDoubleFailures() throws Exception {
MiniJournalCluster cluster = new MiniJournalCluster.Builder(conf) MiniJournalCluster cluster = new MiniJournalCluster.Builder(conf)
.build(); .build();
cluster.waitActive();
QuorumJournalManager qjm = null; QuorumJournalManager qjm = null;
try { try {
qjm = createInjectableQJM(cluster); qjm = createInjectableQJM(cluster);
@ -218,6 +220,7 @@ public void testRandomized() throws Exception {
MiniJournalCluster cluster = new MiniJournalCluster.Builder(conf) MiniJournalCluster cluster = new MiniJournalCluster.Builder(conf)
.build(); .build();
cluster.waitActive();
// Format the cluster using a non-faulty QJM. // Format the cluster using a non-faulty QJM.
QuorumJournalManager qjmForInitialFormat = QuorumJournalManager qjmForInitialFormat =

View File

@ -94,6 +94,7 @@ public void setup() throws Exception {
cluster = new MiniJournalCluster.Builder(conf) cluster = new MiniJournalCluster.Builder(conf)
.build(); .build();
cluster.waitActive();
qjm = createSpyingQJM(); qjm = createSpyingQJM();
spies = qjm.getLoggerSetForTests().getLoggersForTests(); spies = qjm.getLoggerSetForTests().getLoggersForTests();

View File

@ -19,6 +19,7 @@
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.IOException; import java.io.IOException;
import java.lang.management.ManagementFactory; import java.lang.management.ManagementFactory;
@ -52,6 +53,7 @@ public void setup() throws IOException {
// start 1 journal node // start 1 journal node
jCluster = new MiniJournalCluster.Builder(new Configuration()).format(true) jCluster = new MiniJournalCluster.Builder(new Configuration()).format(true)
.numJournalNodes(NUM_JN).build(); .numJournalNodes(NUM_JN).build();
jCluster.waitActive();
jn = jCluster.getJournalNode(0); jn = jCluster.getJournalNode(0);
} }
@ -89,19 +91,19 @@ public void testJournalNodeMXBean() throws Exception {
Map<String, String> infoMap = new HashMap<String, String>(); Map<String, String> infoMap = new HashMap<String, String>();
infoMap.put("Formatted", "true"); infoMap.put("Formatted", "true");
jMap.put(NAMESERVICE, infoMap); jMap.put(NAMESERVICE, infoMap);
Map<String, String> infoMap1 = new HashMap<>();
infoMap1.put("Formatted", "false");
jMap.put(MiniJournalCluster.CLUSTER_WAITACTIVE_URI, infoMap1);
assertEquals(JSON.toString(jMap), journalStatus); assertEquals(JSON.toString(jMap), journalStatus);
// restart journal node without formatting // restart journal node without formatting
jCluster = new MiniJournalCluster.Builder(new Configuration()).format(false) jCluster = new MiniJournalCluster.Builder(new Configuration()).format(false)
.numJournalNodes(NUM_JN).build(); .numJournalNodes(NUM_JN).build();
jCluster.waitActive();
jn = jCluster.getJournalNode(0); jn = jCluster.getJournalNode(0);
// re-check // re-check
journalStatus = (String) mbs.getAttribute(mxbeanName, "JournalsStatus"); journalStatus = (String) mbs.getAttribute(mxbeanName, "JournalsStatus");
assertEquals(jn.getJournalsStatus(), journalStatus); assertEquals(jn.getJournalsStatus(), journalStatus);
jMap = new HashMap<String, Map<String, String>>();
infoMap = new HashMap<String, String>();
infoMap.put("Formatted", "true");
jMap.put(NAMESERVICE, infoMap);
assertEquals(JSON.toString(jMap), journalStatus); assertEquals(JSON.toString(jMap), journalStatus);
} }
} }