diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-3042.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-3042.txt index eb5a78ee563..91979bbe686 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-3042.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-3042.txt @@ -19,3 +19,5 @@ HADOOP-8260. Replace ClientBaseWithFixes with our own modified copy of the class HADOOP-8246. Auto-HA: automatically scope znode by nameservice ID (todd) HADOOP-8247. Add a config to enable auto-HA, which disables manual FailoverController (todd) + +HADOOP-8306. ZKFC: improve error message when ZK is not running. (todd) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java index 47b6de8ad61..f87813e63f5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java @@ -34,6 +34,7 @@ import org.apache.hadoop.ha.HAZKUtil.ZKAuthInfo; import org.apache.hadoop.ha.HealthMonitor.State; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.util.Tool; +import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.ZooDefs.Ids; import org.apache.hadoop.util.ToolRunner; import org.apache.zookeeper.data.ACL; @@ -76,8 +77,11 @@ public abstract class ZKFailoverController implements Tool { static final int ERR_CODE_NO_FENCER = 4; /** Automatic failover is not enabled */ static final int ERR_CODE_AUTO_FAILOVER_NOT_ENABLED = 5; + /** Cannot connect to ZooKeeper */ + static final int ERR_CODE_NO_ZK = 6; private Configuration conf; + private String zkQuorum; private HealthMonitor healthMonitor; private ActiveStandbyElector elector; @@ -162,11 +166,23 @@ public abstract class ZKFailoverController implements Tool { } } - if (!elector.parentZNodeExists()) { - LOG.fatal("Unable to start failover controller. " + - "Parent znode does not exist.\n" + - "Run with -formatZK flag to initialize ZooKeeper."); - return ERR_CODE_NO_PARENT_ZNODE; + try { + if (!elector.parentZNodeExists()) { + LOG.fatal("Unable to start failover controller. " + + "Parent znode does not exist.\n" + + "Run with -formatZK flag to initialize ZooKeeper."); + return ERR_CODE_NO_PARENT_ZNODE; + } + } catch (IOException ioe) { + if (ioe.getCause() instanceof KeeperException.ConnectionLossException) { + LOG.fatal("Unable to start failover controller. Unable to connect " + + "to ZooKeeper quorum at " + zkQuorum + ". Please check the " + + "configured value for " + ZK_QUORUM_KEY + " and ensure that " + + "ZooKeeper is running."); + return ERR_CODE_NO_ZK; + } else { + throw ioe; + } } try { @@ -248,7 +264,7 @@ public abstract class ZKFailoverController implements Tool { } private void initZK() throws HadoopIllegalArgumentException, IOException { - String zkQuorum = conf.get(ZK_QUORUM_KEY); + zkQuorum = conf.get(ZK_QUORUM_KEY); int zkTimeout = conf.getInt(ZK_SESSION_TIMEOUT_KEY, ZK_SESSION_TIMEOUT_DEFAULT); // Parse ACLs from configuration. diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java index 33807f9f725..c010c3d3b0f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java @@ -95,6 +95,18 @@ public class TestZKFailoverController extends ClientBaseWithFixes { assertEquals(0, runFC(svc, "-formatZK", "-force")); } + /** + * Test that if ZooKeeper is not running, the correct error + * code is returned. + */ + @Test(timeout=15000) + public void testNoZK() throws Exception { + stopServer(); + DummyHAService svc = cluster.getService(1); + assertEquals(ZKFailoverController.ERR_CODE_NO_ZK, + runFC(svc)); + } + @Test public void testFormatOneClusterLeavesOtherClustersAlone() throws Exception { DummyHAService svc = cluster.getService(1);