From 929989e0d3b7db75130c3eea6a950c65b57dc8f3 Mon Sep 17 00:00:00 2001 From: Aaron Myers Date: Wed, 4 Apr 2012 20:19:54 +0000 Subject: [PATCH] HDFS-3102. Add CLI tool to initialize the shared-edits dir. Contributed by Aaron T. Myers. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1309582 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../server/common/HdfsServerConstants.java | 3 +- .../hdfs/server/namenode/FSNamesystem.java | 26 +++- .../hadoop/hdfs/server/namenode/NameNode.java | 93 +++++++++++++- .../namenode/ha/TestBootstrapStandby.java | 1 - .../ha/TestInitializeSharedEdits.java | 120 ++++++++++++++++++ 6 files changed, 233 insertions(+), 12 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index d3ec859e59e..6ac807d58cc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -75,6 +75,8 @@ Release 2.0.0 - UNRELEASED HDFS-3000. Add a public API for setting quotas. (atm) + HDFS-3102. Add CLI tool to initialize the shared-edits dir. (atm) + IMPROVEMENTS HDFS-2018. Move all journal stream management code into one place. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java index 0cd7336a1d2..710fa4df35e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java @@ -53,7 +53,8 @@ public final class HdfsServerConstants { ROLLBACK("-rollback"), FINALIZE("-finalize"), IMPORT ("-importCheckpoint"), - BOOTSTRAPSTANDBY("-bootstrapStandby"); + BOOTSTRAPSTANDBY("-bootstrapStandby"), + INITIALIZESHAREDEDITS("-initializeSharedEdits"); private String name = null; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 239ed293980..5e24a22fbbf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -164,6 +164,7 @@ import org.apache.hadoop.hdfs.server.namenode.ha.HAState; import org.apache.hadoop.hdfs.server.namenode.ha.StandbyCheckpointer; import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState; import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean; +import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse; @@ -346,10 +347,27 @@ public class FSNamesystem implements Namesystem, FSClusterStats, * @throws IOException if loading fails */ public static FSNamesystem loadFromDisk(Configuration conf) - throws IOException { + throws IOException { Collection namespaceDirs = FSNamesystem.getNamespaceDirs(conf); List namespaceEditsDirs = FSNamesystem.getNamespaceEditsDirs(conf); + return loadFromDisk(conf, namespaceDirs, namespaceEditsDirs); + } + + /** + * Instantiates an FSNamesystem loaded from the image and edits + * directories passed. + * + * @param conf the Configuration which specifies the storage directories + * from which to load + * @param namespaceDirs directories to load the fsimages + * @param namespaceEditsDirs directories to load the edits from + * @return an FSNamesystem which contains the loaded namespace + * @throws IOException if loading fails + */ + public static FSNamesystem loadFromDisk(Configuration conf, + Collection namespaceDirs, List namespaceEditsDirs) + throws IOException { if (namespaceDirs.size() == 1) { LOG.warn("Only one " + DFS_NAMENODE_NAME_DIR_KEY @@ -370,8 +388,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, HAUtil.isHAEnabled(conf, nameserviceId)); long timeTakenToLoadFSImage = now() - loadStart; LOG.info("Finished loading FSImage in " + timeTakenToLoadFSImage + " msecs"); - NameNode.getNameNodeMetrics().setFsImageLoadTime( - (int) timeTakenToLoadFSImage); + NameNodeMetrics nnMetrics = NameNode.getNameNodeMetrics(); + if (nnMetrics != null) { + nnMetrics.setFsImageLoadTime((int) timeTakenToLoadFSImage); + } return namesystem; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 4ffa98d0aea..0c3117aedc4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -58,6 +58,7 @@ import org.apache.hadoop.hdfs.server.protocol.JournalProtocol; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration; +import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.ipc.StandbyException; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.net.NetUtils; @@ -70,6 +71,9 @@ import org.apache.hadoop.tools.GetUserMappingsProtocol; import org.apache.hadoop.util.ServicePlugin; import org.apache.hadoop.util.StringUtils; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; + /********************************************************** * NameNode serves as both directory namespace manager and * "inode table" for the Hadoop DFS. There is a single NameNode @@ -729,6 +733,67 @@ public class NameNode { + "to true in order to format this filesystem"); } } + + @VisibleForTesting + public static boolean initializeSharedEdits(Configuration conf) { + return initializeSharedEdits(conf, true); + } + + @VisibleForTesting + public static boolean initializeSharedEdits(Configuration conf, + boolean force) { + return initializeSharedEdits(conf, force, false); + } + + /** + * Format a new shared edits dir. + * + * @param conf configuration + * @param force format regardless of whether or not the shared edits dir exists + * @param interactive prompt the user when a dir exists + * @return true if the command aborts, false otherwise + */ + private static boolean initializeSharedEdits(Configuration conf, + boolean force, boolean interactive) { + NNStorage existingStorage = null; + try { + FSNamesystem fsns = FSNamesystem.loadFromDisk(conf, + FSNamesystem.getNamespaceDirs(conf), + FSNamesystem.getNamespaceEditsDirs(conf, false)); + + existingStorage = fsns.getFSImage().getStorage(); + + Collection sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf); + if (!confirmFormat(sharedEditsDirs, force, interactive)) { + return true; // aborted + } + NNStorage newSharedStorage = new NNStorage(conf, + Lists.newArrayList(), + sharedEditsDirs); + + newSharedStorage.format(new NamespaceInfo( + existingStorage.getNamespaceID(), + existingStorage.getClusterID(), + existingStorage.getBlockPoolID(), + existingStorage.getCTime(), + existingStorage.getDistributedUpgradeVersion())); + } catch (Exception e) { + LOG.error("Could not format shared edits dir", e); + return true; // aborted + } finally { + // Have to unlock storage explicitly for the case when we're running in a + // unit test, which runs in the same JVM as NNs. + if (existingStorage != null) { + try { + existingStorage.unlockAll(); + } catch (IOException ioe) { + LOG.warn("Could not unlock storage directories", ioe); + return true; // aborted + } + } + } + return false; // did not abort + } private static boolean finalize(Configuration conf, boolean isConfirmationNeeded @@ -763,7 +828,8 @@ public class NameNode { StartupOption.ROLLBACK.getName() + "] | [" + StartupOption.FINALIZE.getName() + "] | [" + StartupOption.IMPORT.getName() + "] | [" + - StartupOption.BOOTSTRAPSTANDBY.getName() + "]"); + StartupOption.BOOTSTRAPSTANDBY.getName() + "] | [" + + StartupOption.INITIALIZESHAREDEDITS.getName() + "]"); } private static StartupOption parseArguments(String args[]) { @@ -804,6 +870,9 @@ public class NameNode { } else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) { startOpt = StartupOption.BOOTSTRAPSTANDBY; return startOpt; + } else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) { + startOpt = StartupOption.INITIALIZESHAREDEDITS; + return startOpt; } else { return null; } @@ -868,29 +937,39 @@ public class NameNode { } switch (startOpt) { - case FORMAT: + case FORMAT: { boolean aborted = format(conf, false); System.exit(aborted ? 1 : 0); return null; // avoid javac warning - case GENCLUSTERID: + } + case GENCLUSTERID: { System.err.println("Generating new cluster id:"); System.out.println(NNStorage.newClusterID()); System.exit(0); return null; - case FINALIZE: - aborted = finalize(conf, true); + } + case FINALIZE: { + boolean aborted = finalize(conf, true); System.exit(aborted ? 1 : 0); return null; // avoid javac warning - case BOOTSTRAPSTANDBY: + } + case BOOTSTRAPSTANDBY: { String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length); int rc = BootstrapStandby.run(toolArgs, conf); System.exit(rc); return null; // avoid warning + } + case INITIALIZESHAREDEDITS: { + boolean aborted = initializeSharedEdits(conf, false, true); + System.exit(aborted ? 1 : 0); + return null; // avoid warning + } case BACKUP: - case CHECKPOINT: + case CHECKPOINT: { NamenodeRole role = startOpt.toNodeRole(); DefaultMetricsSystem.initialize(role.toString().replace(" ", "")); return new BackupNode(conf, role); + } default: DefaultMetricsSystem.initialize("NameNode"); return new NameNode(conf); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java index a74f3434ae4..fe3fe749ce9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java @@ -36,7 +36,6 @@ import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils.LogCapturer; -import org.codehaus.jackson.sym.NameN; import org.junit.After; import org.junit.Before; import org.junit.Test; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java new file mode 100644 index 00000000000..74aeaab4f28 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java @@ -0,0 +1,120 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode.ha; + +import java.io.File; +import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class TestInitializeSharedEdits { + + private static final Log LOG = LogFactory.getLog(TestInitializeSharedEdits.class); + + private static final Path TEST_PATH = new Path("/test"); + private Configuration conf; + private MiniDFSCluster cluster; + + @Before + public void setupCluster() throws IOException { + conf = new Configuration(); + + MiniDFSNNTopology topology = MiniDFSNNTopology.simpleHATopology(); + + cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(topology) + .numDataNodes(0) + .build(); + cluster.waitActive(); + + cluster.shutdownNameNode(0); + cluster.shutdownNameNode(1); + File sharedEditsDir = new File(cluster.getSharedEditsDir(0, 1)); + assertTrue(FileUtil.fullyDelete(sharedEditsDir)); + } + + @After + public void shutdownCluster() throws IOException { + if (cluster != null) { + cluster.shutdown(); + } + } + + @Test + public void testInitializeSharedEdits() throws Exception { + // Make sure we can't currently start either NN. + try { + cluster.restartNameNode(0, false); + fail("Should not have been able to start NN1 without shared dir"); + } catch (IOException ioe) { + LOG.info("Got expected exception", ioe); + GenericTestUtils.assertExceptionContains( + "Cannot start an HA namenode with name dirs that need recovery", ioe); + } + try { + cluster.restartNameNode(1, false); + fail("Should not have been able to start NN2 without shared dir"); + } catch (IOException ioe) { + LOG.info("Got expected exception", ioe); + GenericTestUtils.assertExceptionContains( + "Cannot start an HA namenode with name dirs that need recovery", ioe); + } + + // Initialize the shared edits dir. + assertFalse(NameNode.initializeSharedEdits(conf)); + + // Now should be able to start both NNs. Pass "false" here so that we don't + // try to waitActive on all NNs, since the second NN doesn't exist yet. + cluster.restartNameNode(0, false); + cluster.restartNameNode(1, true); + + // Make sure HA is working. + cluster.transitionToActive(0); + FileSystem fs = null; + try { + fs = HATestUtil.configureFailoverFs(cluster, conf); + assertTrue(fs.mkdirs(TEST_PATH)); + cluster.transitionToStandby(0); + cluster.transitionToActive(1); + assertTrue(fs.isDirectory(TEST_PATH)); + } finally { + if (fs != null) { + fs.close(); + } + } + } + + @Test + public void testDontOverWriteExistingDir() { + assertFalse(NameNode.initializeSharedEdits(conf, false)); + assertTrue(NameNode.initializeSharedEdits(conf, false)); + } +}