HADOOP-8306. ZKFC: improve error message when ZK is not running. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-3042@1329960 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Todd Lipcon 2012-04-24 19:34:51 +00:00
parent ca49126e3d
commit c6e1321245
3 changed files with 36 additions and 6 deletions

View File

@ -19,3 +19,5 @@ HADOOP-8260. Replace ClientBaseWithFixes with our own modified copy of the class
HADOOP-8246. Auto-HA: automatically scope znode by nameservice ID (todd) HADOOP-8246. Auto-HA: automatically scope znode by nameservice ID (todd)
HADOOP-8247. Add a config to enable auto-HA, which disables manual FailoverController (todd) HADOOP-8247. Add a config to enable auto-HA, which disables manual FailoverController (todd)
HADOOP-8306. ZKFC: improve error message when ZK is not running. (todd)

View File

@ -34,6 +34,7 @@ import org.apache.hadoop.ha.HAZKUtil.ZKAuthInfo;
import org.apache.hadoop.ha.HealthMonitor.State; import org.apache.hadoop.ha.HealthMonitor.State;
import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.Tool;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.ZooDefs.Ids; import org.apache.zookeeper.ZooDefs.Ids;
import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.util.ToolRunner;
import org.apache.zookeeper.data.ACL; import org.apache.zookeeper.data.ACL;
@ -76,8 +77,11 @@ public abstract class ZKFailoverController implements Tool {
static final int ERR_CODE_NO_FENCER = 4; static final int ERR_CODE_NO_FENCER = 4;
/** Automatic failover is not enabled */ /** Automatic failover is not enabled */
static final int ERR_CODE_AUTO_FAILOVER_NOT_ENABLED = 5; static final int ERR_CODE_AUTO_FAILOVER_NOT_ENABLED = 5;
/** Cannot connect to ZooKeeper */
static final int ERR_CODE_NO_ZK = 6;
private Configuration conf; private Configuration conf;
private String zkQuorum;
private HealthMonitor healthMonitor; private HealthMonitor healthMonitor;
private ActiveStandbyElector elector; private ActiveStandbyElector elector;
@ -162,12 +166,24 @@ public abstract class ZKFailoverController implements Tool {
} }
} }
try {
if (!elector.parentZNodeExists()) { if (!elector.parentZNodeExists()) {
LOG.fatal("Unable to start failover controller. " + LOG.fatal("Unable to start failover controller. " +
"Parent znode does not exist.\n" + "Parent znode does not exist.\n" +
"Run with -formatZK flag to initialize ZooKeeper."); "Run with -formatZK flag to initialize ZooKeeper.");
return ERR_CODE_NO_PARENT_ZNODE; return ERR_CODE_NO_PARENT_ZNODE;
} }
} catch (IOException ioe) {
if (ioe.getCause() instanceof KeeperException.ConnectionLossException) {
LOG.fatal("Unable to start failover controller. Unable to connect " +
"to ZooKeeper quorum at " + zkQuorum + ". Please check the " +
"configured value for " + ZK_QUORUM_KEY + " and ensure that " +
"ZooKeeper is running.");
return ERR_CODE_NO_ZK;
} else {
throw ioe;
}
}
try { try {
localTarget.checkFencingConfigured(); localTarget.checkFencingConfigured();
@ -248,7 +264,7 @@ public abstract class ZKFailoverController implements Tool {
} }
private void initZK() throws HadoopIllegalArgumentException, IOException { private void initZK() throws HadoopIllegalArgumentException, IOException {
String zkQuorum = conf.get(ZK_QUORUM_KEY); zkQuorum = conf.get(ZK_QUORUM_KEY);
int zkTimeout = conf.getInt(ZK_SESSION_TIMEOUT_KEY, int zkTimeout = conf.getInt(ZK_SESSION_TIMEOUT_KEY,
ZK_SESSION_TIMEOUT_DEFAULT); ZK_SESSION_TIMEOUT_DEFAULT);
// Parse ACLs from configuration. // Parse ACLs from configuration.

View File

@ -95,6 +95,18 @@ public class TestZKFailoverController extends ClientBaseWithFixes {
assertEquals(0, runFC(svc, "-formatZK", "-force")); assertEquals(0, runFC(svc, "-formatZK", "-force"));
} }
/**
* Test that if ZooKeeper is not running, the correct error
* code is returned.
*/
@Test(timeout=15000)
public void testNoZK() throws Exception {
stopServer();
DummyHAService svc = cluster.getService(1);
assertEquals(ZKFailoverController.ERR_CODE_NO_ZK,
runFC(svc));
}
@Test @Test
public void testFormatOneClusterLeavesOtherClustersAlone() throws Exception { public void testFormatOneClusterLeavesOtherClustersAlone() throws Exception {
DummyHAService svc = cluster.getService(1); DummyHAService svc = cluster.getService(1);