HDFS-3102. Add CLI tool to initialize the shared-edits dir. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1309580 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Aaron Myers 2012-04-04 20:15:08 +00:00
parent fcacb14a1e
commit ba688e11c1
6 changed files with 233 additions and 12 deletions

View File

@ -184,6 +184,8 @@ Release 2.0.0 - UNRELEASED
HDFS-3000. Add a public API for setting quotas. (atm)
HDFS-3102. Add CLI tool to initialize the shared-edits dir. (atm)
IMPROVEMENTS
HDFS-2018. Move all journal stream management code into one place.

View File

@ -53,7 +53,8 @@ public final class HdfsServerConstants {
ROLLBACK("-rollback"),
FINALIZE("-finalize"),
IMPORT ("-importCheckpoint"),
BOOTSTRAPSTANDBY("-bootstrapStandby");
BOOTSTRAPSTANDBY("-bootstrapStandby"),
INITIALIZESHAREDEDITS("-initializeSharedEdits");
private String name = null;

View File

@ -164,6 +164,7 @@ import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
import org.apache.hadoop.hdfs.server.namenode.ha.StandbyCheckpointer;
import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState;
import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean;
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
@ -346,10 +347,27 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
* @throws IOException if loading fails
*/
public static FSNamesystem loadFromDisk(Configuration conf)
throws IOException {
throws IOException {
Collection<URI> namespaceDirs = FSNamesystem.getNamespaceDirs(conf);
List<URI> namespaceEditsDirs =
FSNamesystem.getNamespaceEditsDirs(conf);
return loadFromDisk(conf, namespaceDirs, namespaceEditsDirs);
}
/**
* Instantiates an FSNamesystem loaded from the image and edits
* directories passed.
*
* @param conf the Configuration which specifies the storage directories
* from which to load
* @param namespaceDirs directories to load the fsimages
* @param namespaceEditsDirs directories to load the edits from
* @return an FSNamesystem which contains the loaded namespace
* @throws IOException if loading fails
*/
public static FSNamesystem loadFromDisk(Configuration conf,
Collection<URI> namespaceDirs, List<URI> namespaceEditsDirs)
throws IOException {
if (namespaceDirs.size() == 1) {
LOG.warn("Only one " + DFS_NAMENODE_NAME_DIR_KEY
@ -370,8 +388,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
HAUtil.isHAEnabled(conf, nameserviceId));
long timeTakenToLoadFSImage = now() - loadStart;
LOG.info("Finished loading FSImage in " + timeTakenToLoadFSImage + " msecs");
NameNode.getNameNodeMetrics().setFsImageLoadTime(
(int) timeTakenToLoadFSImage);
NameNodeMetrics nnMetrics = NameNode.getNameNodeMetrics();
if (nnMetrics != null) {
nnMetrics.setFsImageLoadTime((int) timeTakenToLoadFSImage);
}
return namesystem;
}

View File

@ -58,6 +58,7 @@ import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.ipc.StandbyException;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.net.NetUtils;
@ -70,6 +71,9 @@ import org.apache.hadoop.tools.GetUserMappingsProtocol;
import org.apache.hadoop.util.ServicePlugin;
import org.apache.hadoop.util.StringUtils;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
/**********************************************************
* NameNode serves as both directory namespace manager and
* "inode table" for the Hadoop DFS. There is a single NameNode
@ -729,6 +733,67 @@ public class NameNode {
+ "to true in order to format this filesystem");
}
}
@VisibleForTesting
public static boolean initializeSharedEdits(Configuration conf) {
return initializeSharedEdits(conf, true);
}
@VisibleForTesting
public static boolean initializeSharedEdits(Configuration conf,
boolean force) {
return initializeSharedEdits(conf, force, false);
}
/**
* Format a new shared edits dir.
*
* @param conf configuration
* @param force format regardless of whether or not the shared edits dir exists
* @param interactive prompt the user when a dir exists
* @return true if the command aborts, false otherwise
*/
private static boolean initializeSharedEdits(Configuration conf,
boolean force, boolean interactive) {
NNStorage existingStorage = null;
try {
FSNamesystem fsns = FSNamesystem.loadFromDisk(conf,
FSNamesystem.getNamespaceDirs(conf),
FSNamesystem.getNamespaceEditsDirs(conf, false));
existingStorage = fsns.getFSImage().getStorage();
Collection<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf);
if (!confirmFormat(sharedEditsDirs, force, interactive)) {
return true; // aborted
}
NNStorage newSharedStorage = new NNStorage(conf,
Lists.<URI>newArrayList(),
sharedEditsDirs);
newSharedStorage.format(new NamespaceInfo(
existingStorage.getNamespaceID(),
existingStorage.getClusterID(),
existingStorage.getBlockPoolID(),
existingStorage.getCTime(),
existingStorage.getDistributedUpgradeVersion()));
} catch (Exception e) {
LOG.error("Could not format shared edits dir", e);
return true; // aborted
} finally {
// Have to unlock storage explicitly for the case when we're running in a
// unit test, which runs in the same JVM as NNs.
if (existingStorage != null) {
try {
existingStorage.unlockAll();
} catch (IOException ioe) {
LOG.warn("Could not unlock storage directories", ioe);
return true; // aborted
}
}
}
return false; // did not abort
}
private static boolean finalize(Configuration conf,
boolean isConfirmationNeeded
@ -763,7 +828,8 @@ public class NameNode {
StartupOption.ROLLBACK.getName() + "] | [" +
StartupOption.FINALIZE.getName() + "] | [" +
StartupOption.IMPORT.getName() + "] | [" +
StartupOption.BOOTSTRAPSTANDBY.getName() + "]");
StartupOption.BOOTSTRAPSTANDBY.getName() + "] | [" +
StartupOption.INITIALIZESHAREDEDITS.getName() + "]");
}
private static StartupOption parseArguments(String args[]) {
@ -804,6 +870,9 @@ public class NameNode {
} else if (StartupOption.BOOTSTRAPSTANDBY.getName().equalsIgnoreCase(cmd)) {
startOpt = StartupOption.BOOTSTRAPSTANDBY;
return startOpt;
} else if (StartupOption.INITIALIZESHAREDEDITS.getName().equalsIgnoreCase(cmd)) {
startOpt = StartupOption.INITIALIZESHAREDEDITS;
return startOpt;
} else {
return null;
}
@ -868,29 +937,39 @@ public class NameNode {
}
switch (startOpt) {
case FORMAT:
case FORMAT: {
boolean aborted = format(conf, false);
System.exit(aborted ? 1 : 0);
return null; // avoid javac warning
case GENCLUSTERID:
}
case GENCLUSTERID: {
System.err.println("Generating new cluster id:");
System.out.println(NNStorage.newClusterID());
System.exit(0);
return null;
case FINALIZE:
aborted = finalize(conf, true);
}
case FINALIZE: {
boolean aborted = finalize(conf, true);
System.exit(aborted ? 1 : 0);
return null; // avoid javac warning
case BOOTSTRAPSTANDBY:
}
case BOOTSTRAPSTANDBY: {
String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length);
int rc = BootstrapStandby.run(toolArgs, conf);
System.exit(rc);
return null; // avoid warning
}
case INITIALIZESHAREDEDITS: {
boolean aborted = initializeSharedEdits(conf, false, true);
System.exit(aborted ? 1 : 0);
return null; // avoid warning
}
case BACKUP:
case CHECKPOINT:
case CHECKPOINT: {
NamenodeRole role = startOpt.toNodeRole();
DefaultMetricsSystem.initialize(role.toString().replace(" ", ""));
return new BackupNode(conf, role);
}
default:
DefaultMetricsSystem.initialize("NameNode");
return new NameNode(conf);

View File

@ -36,7 +36,6 @@ import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
import org.codehaus.jackson.sym.NameN;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

View File

@ -0,0 +1,120 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode.ha;
import java.io.File;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.*;
public class TestInitializeSharedEdits {
private static final Log LOG = LogFactory.getLog(TestInitializeSharedEdits.class);
private static final Path TEST_PATH = new Path("/test");
private Configuration conf;
private MiniDFSCluster cluster;
@Before
public void setupCluster() throws IOException {
conf = new Configuration();
MiniDFSNNTopology topology = MiniDFSNNTopology.simpleHATopology();
cluster = new MiniDFSCluster.Builder(conf)
.nnTopology(topology)
.numDataNodes(0)
.build();
cluster.waitActive();
cluster.shutdownNameNode(0);
cluster.shutdownNameNode(1);
File sharedEditsDir = new File(cluster.getSharedEditsDir(0, 1));
assertTrue(FileUtil.fullyDelete(sharedEditsDir));
}
@After
public void shutdownCluster() throws IOException {
if (cluster != null) {
cluster.shutdown();
}
}
@Test
public void testInitializeSharedEdits() throws Exception {
// Make sure we can't currently start either NN.
try {
cluster.restartNameNode(0, false);
fail("Should not have been able to start NN1 without shared dir");
} catch (IOException ioe) {
LOG.info("Got expected exception", ioe);
GenericTestUtils.assertExceptionContains(
"Cannot start an HA namenode with name dirs that need recovery", ioe);
}
try {
cluster.restartNameNode(1, false);
fail("Should not have been able to start NN2 without shared dir");
} catch (IOException ioe) {
LOG.info("Got expected exception", ioe);
GenericTestUtils.assertExceptionContains(
"Cannot start an HA namenode with name dirs that need recovery", ioe);
}
// Initialize the shared edits dir.
assertFalse(NameNode.initializeSharedEdits(conf));
// Now should be able to start both NNs. Pass "false" here so that we don't
// try to waitActive on all NNs, since the second NN doesn't exist yet.
cluster.restartNameNode(0, false);
cluster.restartNameNode(1, true);
// Make sure HA is working.
cluster.transitionToActive(0);
FileSystem fs = null;
try {
fs = HATestUtil.configureFailoverFs(cluster, conf);
assertTrue(fs.mkdirs(TEST_PATH));
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
assertTrue(fs.isDirectory(TEST_PATH));
} finally {
if (fs != null) {
fs.close();
}
}
}
@Test
public void testDontOverWriteExistingDir() {
assertFalse(NameNode.initializeSharedEdits(conf, false));
assertTrue(NameNode.initializeSharedEdits(conf, false));
}
}