diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3042.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3042.txt new file mode 100644 index 00000000000..7a345c63b73 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-3042.txt @@ -0,0 +1,7 @@ +Changes for HDFS-3042 branch. + +This change list will be merged into the trunk CHANGES.txt when the HDFS-3042 +branch is merged. +------------------------------ + +HDFS-2185. HDFS portion of ZK-based FailoverController (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml index 301d3028252..350796d3319 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml @@ -5,6 +5,9 @@ + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml index b87c59748b3..a410cc0bad4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml @@ -110,6 +110,33 @@ ant provided + + org.apache.zookeeper + zookeeper + 3.4.2 + + + + junit + junit + + + com.sun.jdmk + jmxtools + + + com.sun.jmx + jmxri + + + + + org.apache.zookeeper + zookeeper + 3.4.2 + test-jar + test + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs index fb409f3170c..1b386b534a1 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs @@ -30,6 +30,7 @@ function print_usage(){ echo " namenode -format format the DFS filesystem" echo " secondarynamenode run the DFS secondary namenode" echo " namenode run the DFS namenode" + echo " zkfc run the ZK Failover Controller daemon" echo " datanode run a DFS datanode" echo " dfsadmin run a DFS admin client" echo " haadmin run a DFS HA admin client" @@ -71,6 +72,9 @@ fi if [ "$COMMAND" = "namenode" ] ; then CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode' HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NAMENODE_OPTS" +elif [ "$COMMAND" = "zkfc" ] ; then + CLASS='org.apache.hadoop.hdfs.tools.DFSZKFailoverController' + HADOOP_OPTS="$HADOOP_OPTS $HADOOP_ZKFC_OPTS" elif [ "$COMMAND" = "secondarynamenode" ] ; then CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode' HADOOP_OPTS="$HADOOP_OPTS $HADOOP_SECONDARYNAMENODE_OPTS" diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSZKFailoverController.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSZKFailoverController.java new file mode 100644 index 00000000000..e04159fb4d5 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSZKFailoverController.java @@ -0,0 +1,115 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.tools; + +import java.net.InetSocketAddress; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.HadoopIllegalArgumentException; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.ha.HAServiceTarget; +import org.apache.hadoop.ha.ZKFailoverController; +import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.hdfs.HAUtil; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.hdfs.server.namenode.ha.proto.HAZKInfoProtos.ActiveNodeInfo; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.ToolRunner; + +import com.google.common.base.Preconditions; +import com.google.protobuf.InvalidProtocolBufferException; + +@InterfaceAudience.Private +public class DFSZKFailoverController extends ZKFailoverController { + + private static final Log LOG = + LogFactory.getLog(DFSZKFailoverController.class); + private NNHAServiceTarget localTarget; + private Configuration localNNConf; + + @Override + protected HAServiceTarget dataToTarget(byte[] data) { + ActiveNodeInfo proto; + try { + proto = ActiveNodeInfo.parseFrom(data); + } catch (InvalidProtocolBufferException e) { + throw new RuntimeException("Invalid data in ZK: " + + StringUtils.byteToHexString(data)); + } + NNHAServiceTarget ret = new NNHAServiceTarget( + getConf(), proto.getNameserviceId(), proto.getNamenodeId()); + InetSocketAddress addressFromProtobuf = new InetSocketAddress( + proto.getHostname(), proto.getPort()); + + if (!addressFromProtobuf.equals(ret.getAddress())) { + throw new RuntimeException("Mismatched address stored in ZK for " + + ret + ": Stored protobuf was " + proto + ", address from our own " + + "configuration for this NameNode was " + ret.getAddress()); + } + return ret; + } + + @Override + protected byte[] targetToData(HAServiceTarget target) { + InetSocketAddress addr = target.getAddress(); + return ActiveNodeInfo.newBuilder() + .setHostname(addr.getHostName()) + .setPort(addr.getPort()) + .setNameserviceId(localTarget.getNameServiceId()) + .setNamenodeId(localTarget.getNameNodeId()) + .build() + .toByteArray(); + } + + @Override + public void setConf(Configuration conf) { + // Use HdfsConfiguration here to force hdfs-site.xml to load + localNNConf = new HdfsConfiguration(conf); + + String nsId = DFSUtil.getNamenodeNameServiceId(conf); + + if (!HAUtil.isHAEnabled(localNNConf, nsId)) { + throw new HadoopIllegalArgumentException( + "HA is not enabled for this namenode."); + } + String nnId = HAUtil.getNameNodeId(localNNConf, nsId); + NameNode.initializeGenericKeys(localNNConf, nsId, nnId); + + localTarget = new NNHAServiceTarget(localNNConf, nsId, nnId); + + super.setConf(localNNConf); + LOG.info("Failover controller configured for NameNode " + + nsId + "." + nnId); + } + + @Override + public HAServiceTarget getLocalTarget() { + Preconditions.checkState(localTarget != null, + "setConf() should have already been called"); + return localTarget; + } + + public static void main(String args[]) + throws Exception { + System.exit(ToolRunner.run( + new DFSZKFailoverController(), args)); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/NNHAServiceTarget.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/NNHAServiceTarget.java index 9e8c239e7e8..5c7748a667e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/NNHAServiceTarget.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/NNHAServiceTarget.java @@ -20,11 +20,11 @@ package org.apache.hadoop.hdfs.tools; import java.net.InetSocketAddress; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.ha.BadFencingConfigurationException; import org.apache.hadoop.ha.HAServiceTarget; import org.apache.hadoop.ha.NodeFencer; import org.apache.hadoop.hdfs.DFSUtil; -import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.net.NetUtils; @@ -38,11 +38,13 @@ public class NNHAServiceTarget extends HAServiceTarget { private final InetSocketAddress addr; private NodeFencer fencer; private BadFencingConfigurationException fenceConfigError; + private final String nnId; + private final String nsId; - public NNHAServiceTarget(HdfsConfiguration conf, + public NNHAServiceTarget(Configuration localNNConf, String nsId, String nnId) { String serviceAddr = - DFSUtil.getNamenodeServiceAddr(conf, nsId, nnId); + DFSUtil.getNamenodeServiceAddr(localNNConf, nsId, nnId); if (serviceAddr == null) { throw new IllegalArgumentException( "Unable to determine service address for namenode '" + nnId + "'"); @@ -50,10 +52,12 @@ public class NNHAServiceTarget extends HAServiceTarget { this.addr = NetUtils.createSocketAddr(serviceAddr, NameNode.DEFAULT_PORT); try { - this.fencer = NodeFencer.create(conf); + this.fencer = NodeFencer.create(localNNConf); } catch (BadFencingConfigurationException e) { this.fenceConfigError = e; } + this.nnId = nnId; + this.nsId = nsId; } /** @@ -69,6 +73,10 @@ public class NNHAServiceTarget extends HAServiceTarget { if (fenceConfigError != null) { throw fenceConfigError; } + if (fencer == null) { + throw new BadFencingConfigurationException( + "No fencer configured for " + this); + } } @Override @@ -81,4 +89,11 @@ public class NNHAServiceTarget extends HAServiceTarget { return "NameNode at " + addr; } + public String getNameServiceId() { + return this.nsId; + } + + public String getNameNodeId() { + return this.nnId; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HAZKInfo.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HAZKInfo.proto new file mode 100644 index 00000000000..7836dbe5f49 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HAZKInfo.proto @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +option java_package = "org.apache.hadoop.hdfs.server.namenode.ha.proto"; +option java_outer_classname = "HAZKInfoProtos"; + +message ActiveNodeInfo { + required string nameserviceId = 1; + required string namenodeId = 2; + + required string hostname = 3; + required int32 port = 4; +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSZKFailoverController.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSZKFailoverController.java new file mode 100644 index 00000000000..5b4cfed667a --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSZKFailoverController.java @@ -0,0 +1,181 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode.ha; + +import static org.junit.Assert.*; + +import java.io.File; +import java.util.concurrent.TimeoutException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.ha.NodeFencer; +import org.apache.hadoop.ha.ZKFailoverController; +import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; +import org.apache.hadoop.ha.TestNodeFencer.AlwaysSucceedFencer; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.hdfs.tools.DFSZKFailoverController; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.MultithreadedTestUtil.TestContext; +import org.apache.hadoop.test.MultithreadedTestUtil.TestingThread; +import org.apache.zookeeper.test.ClientBase; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + + +import com.google.common.base.Supplier; + +public class TestDFSZKFailoverController extends ClientBase { + private Configuration conf; + private MiniDFSCluster cluster; + private TestContext ctx; + private ZKFCThread thr1, thr2; + private FileSystem fs; + + @Override + public void setUp() throws Exception { + // build.test.dir is used by zookeeper + new File(System.getProperty("build.test.dir", "build")).mkdirs(); + super.setUp(); + } + + @Before + public void setup() throws Exception { + conf = new Configuration(); + conf.set(ZKFailoverController.ZK_QUORUM_KEY, hostPort); + conf.set(NodeFencer.CONF_METHODS_KEY, + AlwaysSucceedFencer.class.getName()); + + MiniDFSNNTopology topology = new MiniDFSNNTopology() + .addNameservice(new MiniDFSNNTopology.NSConf("ns1") + .addNN(new MiniDFSNNTopology.NNConf("nn1").setIpcPort(10001)) + .addNN(new MiniDFSNNTopology.NNConf("nn2").setIpcPort(10002))); + cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(topology) + .numDataNodes(0) + .build(); + cluster.waitActive(); + + ctx = new TestContext(); + ctx.addThread(thr1 = new ZKFCThread(ctx, 0)); + assertEquals(0, thr1.zkfc.run(new String[]{"-formatZK"})); + + thr1.start(); + waitForHAState(0, HAServiceState.ACTIVE); + + ctx.addThread(thr2 = new ZKFCThread(ctx, 1)); + thr2.start(); + + fs = HATestUtil.configureFailoverFs(cluster, conf); + } + + @After + public void shutdown() throws Exception { + cluster.shutdown(); + + if (thr1 != null) { + thr1.interrupt(); + } + if (thr2 != null) { + thr2.interrupt(); + } + if (ctx != null) { + ctx.stop(); + } + } + + /** + * Test that automatic failover is triggered by shutting the + * active NN down. + */ + @Test(timeout=30000) + public void testFailoverAndBackOnNNShutdown() throws Exception { + Path p1 = new Path("/dir1"); + Path p2 = new Path("/dir2"); + + // Write some data on the first NN + fs.mkdirs(p1); + // Shut it down, causing automatic failover + cluster.shutdownNameNode(0); + // Data should still exist. Write some on the new NN + assertTrue(fs.exists(p1)); + fs.mkdirs(p2); + assertEquals(AlwaysSucceedFencer.getLastFencedService().getAddress(), + thr1.zkfc.getLocalTarget().getAddress()); + + // Start the first node back up + cluster.restartNameNode(0); + // This should have no effect -- the new node should be STANDBY. + waitForHAState(0, HAServiceState.STANDBY); + assertTrue(fs.exists(p1)); + assertTrue(fs.exists(p2)); + // Shut down the second node, which should failback to the first + cluster.shutdownNameNode(1); + waitForHAState(0, HAServiceState.ACTIVE); + + // First node should see what was written on the second node while it was down. + assertTrue(fs.exists(p1)); + assertTrue(fs.exists(p2)); + assertEquals(AlwaysSucceedFencer.getLastFencedService().getAddress(), + thr2.zkfc.getLocalTarget().getAddress()); + } + + private void waitForHAState(int nnidx, final HAServiceState state) + throws TimeoutException, InterruptedException { + final NameNode nn = cluster.getNameNode(nnidx); + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + try { + return nn.getRpcServer().getServiceStatus().getState() == state; + } catch (Exception e) { + e.printStackTrace(); + return false; + } + } + }, 50, 5000); + } + + /** + * Test-thread which runs a ZK Failover Controller corresponding + * to a given NameNode in the minicluster. + */ + private class ZKFCThread extends TestingThread { + private final DFSZKFailoverController zkfc; + + public ZKFCThread(TestContext ctx, int idx) { + super(ctx); + this.zkfc = new DFSZKFailoverController(); + zkfc.setConf(cluster.getConfiguration(idx)); + } + + @Override + public void doWork() throws Exception { + try { + assertEquals(0, zkfc.run(new String[0])); + } catch (InterruptedException ie) { + // Interrupted by main thread, that's OK. + } + } + } + +}