HDFS-3695. Genericize format() to non-file JournalManagers. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1371513 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Todd Lipcon 2012-08-09 22:13:54 +00:00
parent 0733adf670
commit 9d0f8792a9
13 changed files with 271 additions and 76 deletions

View File

@ -112,6 +112,8 @@ Trunk (unreleased changes)
HDFS-3768. Exception in TestJettyHelper is incorrect.
(Eli Reisman via jghoman)
HDFS-3695. Genericize format() to non-file JournalManagers. (todd)
OPTIMIZATIONS
BUG FIXES

View File

@ -271,6 +271,23 @@ public void processResult(int rc, String path, Object ctx, String name) {
}
}
@Override
public void format(NamespaceInfo ns) {
// Currently, BKJM automatically formats itself when first accessed.
// TODO: change over to explicit formatting so that the admin can
// clear out the BK storage when reformatting a cluster.
LOG.info("Not formatting " + this + " - BKJM does not currently " +
"support reformatting. If it has not been used before, it will" +
"be formatted automatically upon first use.");
}
@Override
public boolean hasSomeData() throws IOException {
// Don't confirm format on BKJM, since format() is currently a
// no-op anyway
return false;
}
/**
* Start a new log segment in a BookKeeper ledger.
* First ensure that we have the write lock for this journal.

View File

@ -39,6 +39,7 @@
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.util.VersionInfo;
@ -219,7 +220,7 @@ public String listStorageDirectories() {
* One of the storage directories.
*/
@InterfaceAudience.Private
public static class StorageDirectory {
public static class StorageDirectory implements FormatConfirmable {
final File root; // root directory
final boolean useLock; // flag to enable storage lock
final StorageDirType dirType; // storage dir type
@ -574,6 +575,32 @@ public void doRecover(StorageState curState) throws IOException {
throw new IOException("Unexpected FS state: " + curState);
}
}
/**
* @return true if the storage directory should prompt the user prior
* to formatting (i.e if the directory appears to contain some data)
* @throws IOException if the SD cannot be accessed due to an IO error
*/
@Override
public boolean hasSomeData() throws IOException {
// Its alright for a dir not to exist, or to exist (properly accessible)
// and be completely empty.
if (!root.exists()) return false;
if (!root.isDirectory()) {
// a file where you expect a directory should not cause silent
// formatting
return true;
}
if (FileUtil.listFiles(root).length == 0) {
// Empty dir can format without prompt.
return false;
}
return true;
}
/**
* Lock storage to provide exclusive access.
@ -767,6 +794,68 @@ public static void checkVersionUpgradable(int oldVersion)
}
/**
* Iterate over each of the {@link FormatConfirmable} objects,
* potentially checking with the user whether it should be formatted.
*
* If running in interactive mode, will prompt the user for each
* directory to allow them to format anyway. Otherwise, returns
* false, unless 'force' is specified.
*
* @param force format regardless of whether dirs exist
* @param interactive prompt the user when a dir exists
* @return true if formatting should proceed
* @throws IOException if some storage cannot be accessed
*/
public static boolean confirmFormat(
Iterable<? extends FormatConfirmable> items,
boolean force, boolean interactive) throws IOException {
for (FormatConfirmable item : items) {
if (!item.hasSomeData())
continue;
if (force) { // Don't confirm, always format.
System.err.println(
"Data exists in " + item + ". Formatting anyway.");
continue;
}
if (!interactive) { // Don't ask - always don't format
System.err.println(
"Running in non-interactive mode, and data appears to exist in " +
item + ". Not formatting.");
return false;
}
if (!ToolRunner.confirmPrompt("Re-format filesystem in " + item + " ?")) {
System.err.println("Format aborted in " + item);
return false;
}
}
return true;
}
/**
* Interface for classes which need to have the user confirm their
* formatting during NameNode -format and other similar operations.
*
* This is currently a storage directory or journal manager.
*/
@InterfaceAudience.Private
public interface FormatConfirmable {
/**
* @return true if the storage seems to have some valid data in it,
* and the user should be required to confirm the format. Otherwise,
* false.
* @throws IOException if the storage cannot be accessed at all.
*/
public boolean hasSomeData() throws IOException;
/**
* @return a string representation of the formattable item, suitable
* for display to the user inside a prompt
*/
public String toString();
}
/**
* Get common storage fields.
* Should be overloaded if additional fields need to be get.

View File

@ -22,6 +22,7 @@
import org.apache.hadoop.hdfs.server.protocol.JournalInfo;
import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
/**
* A JournalManager implementation that uses RPCs to log transactions
@ -38,6 +39,20 @@ class BackupJournalManager implements JournalManager {
this.bnReg = bnReg;
}
@Override
public void format(NamespaceInfo nsInfo) {
// format() should only get called at startup, before any BNs
// can register with the NN.
throw new UnsupportedOperationException(
"BackupNode journal should never get formatted");
}
@Override
public boolean hasSomeData() {
throw new UnsupportedOperationException();
}
@Override
public EditLogOutputStream startLogSegment(long txId) throws IOException {
EditLogBackupOutputStream stm = new EditLogBackupOutputStream(bnReg,

View File

@ -41,6 +41,7 @@
import static org.apache.hadoop.util.ExitUtil.terminate;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
import org.apache.hadoop.hdfs.server.common.Storage.FormatConfirmable;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddOp;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CancelDelegationTokenOp;
@ -333,6 +334,39 @@ synchronized void close() {
state = State.CLOSED;
}
/**
* Format all configured journals which are not file-based.
*
* File-based journals are skipped, since they are formatted by the
* Storage format code.
*/
void formatNonFileJournals(NamespaceInfo nsInfo) throws IOException {
Preconditions.checkState(state == State.BETWEEN_LOG_SEGMENTS,
"Bad state: %s", state);
for (JournalManager jm : journalSet.getJournalManagers()) {
if (!(jm instanceof FileJournalManager)) {
jm.format(nsInfo);
}
}
}
List<FormatConfirmable> getFormatConfirmables() {
Preconditions.checkState(state == State.BETWEEN_LOG_SEGMENTS,
"Bad state: %s", state);
List<FormatConfirmable> ret = Lists.newArrayList();
for (final JournalManager jm : journalSet.getJournalManagers()) {
// The FJMs are confirmed separately since they are also
// StorageDirectories
if (!(jm instanceof FileJournalManager)) {
ret.add(jm);
}
}
return ret;
}
/**
* Write an operation to the edit log. Do not sync to persistent
* store yet.

View File

@ -40,10 +40,12 @@
import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.Storage.FormatConfirmable;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.common.Storage.StorageState;
import org.apache.hadoop.hdfs.server.common.Util;
import static org.apache.hadoop.util.Time.now;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
@ -139,10 +141,33 @@ void format(FSNamesystem fsn, String clusterId) throws IOException {
fileCount + " files");
NamespaceInfo ns = NNStorage.newNamespaceInfo();
ns.clusterID = clusterId;
storage.format(ns);
editLog.formatNonFileJournals(ns);
saveFSImageInAllDirs(fsn, 0);
}
/**
* Check whether the storage directories and non-file journals exist.
* If running in interactive mode, will prompt the user for each
* directory to allow them to format anyway. Otherwise, returns
* false, unless 'force' is specified.
*
* @param force format regardless of whether dirs exist
* @param interactive prompt the user when a dir exists
* @return true if formatting should proceed
* @throws IOException if some storage cannot be accessed
*/
boolean confirmFormat(boolean force, boolean interactive) throws IOException {
List<FormatConfirmable> confirms = Lists.newArrayList();
for (StorageDirectory sd : storage.dirIterable(null)) {
confirms.add(sd);
}
confirms.addAll(editLog.getFormatConfirmables());
return Storage.confirmFormat(confirms, force, interactive);
}
/**
* Analyze storage directories.
* Recover from previous transitions if required.

View File

@ -36,6 +36,7 @@
import org.apache.hadoop.hdfs.server.namenode.NNStorageRetentionManager.StoragePurger;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.EditLogValidation;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
import com.google.common.annotations.VisibleForTesting;
@ -77,6 +78,22 @@ public FileJournalManager(StorageDirectory sd,
@Override
public void close() throws IOException {}
@Override
public void format(NamespaceInfo ns) {
// Formatting file journals is done by the StorageDirectory
// format code, since they may share their directory with
// checkpoints, etc.
throw new UnsupportedOperationException();
}
@Override
public boolean hasSomeData() {
// Formatting file journals is done by the StorageDirectory
// format code, since they may share their directory with
// checkpoints, etc.
throw new UnsupportedOperationException();
}
@Override
synchronized public EditLogOutputStream startLogSegment(long txid)
throws IOException {

View File

@ -23,6 +23,8 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hdfs.server.common.Storage.FormatConfirmable;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
/**
* A JournalManager is responsible for managing a single place of storing
@ -33,7 +35,14 @@
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public interface JournalManager extends Closeable {
public interface JournalManager extends Closeable, FormatConfirmable {
/**
* Format the underlying storage, removing any previously
* stored data.
*/
void format(NamespaceInfo ns);
/**
* Begin writing to a new segment of the log stream, which starts at
* the given transaction ID.

View File

@ -33,6 +33,7 @@
import static org.apache.hadoop.util.ExitUtil.terminate;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
@ -172,6 +173,20 @@ public boolean isRequired() {
this.minimumRedundantJournals = minimumRedundantResources;
}
@Override
public void format(NamespaceInfo nsInfo) {
// The iteration is done by FSEditLog itself
throw new UnsupportedOperationException();
}
@Override
public boolean hasSomeData() throws IOException {
// This is called individually on the underlying journals,
// not on the JournalSet.
throw new UnsupportedOperationException();
}
@Override
public EditLogOutputStream startLogSegment(final long txId) throws IOException {
mapJournalsAndReportErrors(new JournalClosure() {

View File

@ -28,7 +28,6 @@
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.UUID;
import java.util.concurrent.CopyOnWriteArrayList;
@ -58,7 +57,6 @@
import com.google.common.base.Preconditions;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
/**
* NNStorage is responsible for management of the StorageDirectories used by

View File

@ -67,6 +67,7 @@
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.apache.hadoop.hdfs.util.AtomicFileOutputStream;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.ipc.Server;
@ -709,9 +710,6 @@ private static boolean format(Configuration conf, boolean force,
dirsToPrompt.addAll(sharedDirs);
List<URI> editDirsToFormat =
FSNamesystem.getNamespaceEditsDirs(conf);
if (!confirmFormat(dirsToPrompt, force, isInteractive)) {
return true; // aborted
}
// if clusterID is not provided - see if you can find the current one
String clusterId = StartupOption.FORMAT.getClusterId();
@ -723,62 +721,16 @@ private static boolean format(Configuration conf, boolean force,
FSImage fsImage = new FSImage(conf, nameDirsToFormat, editDirsToFormat);
FSNamesystem fsn = new FSNamesystem(conf, fsImage);
fsImage.getEditLog().initJournalsForWrite();
if (!fsImage.confirmFormat(force, isInteractive)) {
return true; // aborted
}
fsImage.format(fsn, clusterId);
return false;
}
/**
* Check whether the given storage directories already exist.
* If running in interactive mode, will prompt the user for each
* directory to allow them to format anyway. Otherwise, returns
* false, unless 'force' is specified.
*
* @param dirsToFormat the dirs to check
* @param force format regardless of whether dirs exist
* @param interactive prompt the user when a dir exists
* @return true if formatting should proceed
* @throws IOException
*/
public static boolean confirmFormat(Collection<URI> dirsToFormat,
boolean force, boolean interactive)
throws IOException {
for(Iterator<URI> it = dirsToFormat.iterator(); it.hasNext();) {
URI dirUri = it.next();
if (!dirUri.getScheme().equals(NNStorage.LOCAL_URI_SCHEME)) {
System.err.println("Skipping format for directory \"" + dirUri
+ "\". Can only format local directories with scheme \""
+ NNStorage.LOCAL_URI_SCHEME + "\".");
continue;
}
// To validate only file based schemes are formatted
assert dirUri.getScheme().equals(NNStorage.LOCAL_URI_SCHEME) :
"formatting is not supported for " + dirUri;
File curDir = new File(dirUri.getPath());
// Its alright for a dir not to exist, or to exist (properly accessible)
// and be completely empty.
if (!curDir.exists() ||
(curDir.isDirectory() && FileUtil.listFiles(curDir).length == 0))
continue;
if (force) { // Don't confirm, always format.
System.err.println(
"Storage directory exists in " + curDir + ". Formatting anyway.");
continue;
}
if (!interactive) { // Don't ask - always don't format
System.err.println(
"Running in non-interactive mode, and image appears to exist in " +
curDir + ". Not formatting.");
return false;
}
if (!confirmPrompt("Re-format filesystem in " + curDir + " ?")) {
System.err.println("Format aborted in " + curDir);
return false;
}
}
return true;
}
public static void checkAllowFormat(Configuration conf) throws IOException {
if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY,
DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) {
@ -822,17 +774,26 @@ private static boolean initializeSharedEdits(Configuration conf,
FSNamesystem.getNamespaceEditsDirs(conf, false));
existingStorage = fsns.getFSImage().getStorage();
NamespaceInfo nsInfo = existingStorage.getNamespaceInfo();
Collection<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf);
if (!confirmFormat(sharedEditsDirs, force, interactive)) {
return true; // aborted
}
NNStorage newSharedStorage = new NNStorage(conf,
List<URI> sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf);
FSImage sharedEditsImage = new FSImage(conf,
Lists.<URI>newArrayList(),
sharedEditsDirs);
sharedEditsImage.getEditLog().initJournalsForWrite();
newSharedStorage.format(existingStorage.getNamespaceInfo());
if (!sharedEditsImage.confirmFormat(force, interactive)) {
return true; // abort
}
NNStorage newSharedStorage = sharedEditsImage.getStorage();
// Call Storage.format instead of FSImage.format here, since we don't
// actually want to save a checkpoint - just prime the dirs with
// the existing namespace info
newSharedStorage.format(nsInfo);
sharedEditsImage.getEditLog().formatNonFileJournals(nsInfo);
// Need to make sure the edit log segments are in good shape to initialize
// the shared edits dir.
fsns.getFSImage().getEditLog().close();

View File

@ -37,6 +37,7 @@
import org.apache.hadoop.hdfs.HAUtil;
import org.apache.hadoop.hdfs.NameNodeProxies;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
import org.apache.hadoop.hdfs.server.namenode.FSImage;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
@ -55,7 +56,6 @@
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
/**
* Tool which allows the standby node's storage directories to be bootstrapped
@ -171,19 +171,18 @@ private int doRun() throws IOException {
" Layout version: " + nsInfo.getLayoutVersion() + "\n" +
"=====================================================");
long imageTxId = proxy.getMostRecentCheckpointTxId();
long curTxId = proxy.getTransactionID();
NNStorage storage = new NNStorage(conf, dirsToFormat, editUrisToFormat);
// Check with the user before blowing away data.
if (!NameNode.confirmFormat(
Sets.union(Sets.newHashSet(dirsToFormat),
Sets.newHashSet(editUrisToFormat)),
if (!Storage.confirmFormat(storage.dirIterable(null),
force, interactive)) {
return ERR_CODE_ALREADY_FORMATTED;
}
long imageTxId = proxy.getMostRecentCheckpointTxId();
long curTxId = proxy.getTransactionID();
// Format the storage (writes VERSION file)
NNStorage storage = new NNStorage(conf, dirsToFormat, editUrisToFormat);
storage.format(nsInfo);
// Load the newly formatted image, using all of the directories (including shared

View File

@ -17,9 +17,7 @@
*/
package org.apache.hadoop.hdfs.server.namenode;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.fail;
import static org.junit.Assert.*;
import static org.mockito.Mockito.mock;
import java.io.IOException;
@ -29,6 +27,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.common.Storage.FormatConfirmable;
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
import org.junit.Test;
@ -125,6 +124,8 @@ public void testDummyJournalManager() throws Exception {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build();
cluster.waitActive();
assertTrue(DummyJournalManager.shouldPromptCalled);
assertTrue(DummyJournalManager.formatCalled);
assertNotNull(DummyJournalManager.conf);
assertEquals(new URI(DUMMY_URI), DummyJournalManager.uri);
assertNotNull(DummyJournalManager.nsInfo);
@ -141,6 +142,8 @@ public static class DummyJournalManager implements JournalManager {
static Configuration conf = null;
static URI uri = null;
static NamespaceInfo nsInfo = null;
static boolean formatCalled = false;
static boolean shouldPromptCalled = false;
public DummyJournalManager(Configuration conf, URI u,
NamespaceInfo nsInfo) {
@ -150,6 +153,11 @@ public DummyJournalManager(Configuration conf, URI u,
DummyJournalManager.nsInfo = nsInfo;
}
@Override
public void format(NamespaceInfo nsInfo) {
formatCalled = true;
}
@Override
public EditLogOutputStream startLogSegment(long txId) throws IOException {
return mock(EditLogOutputStream.class);
@ -178,6 +186,12 @@ public void recoverUnfinalizedSegments() throws IOException {}
@Override
public void close() throws IOException {}
@Override
public boolean hasSomeData() throws IOException {
shouldPromptCalled = true;
return false;
}
}
public static class BadConstructorJournalManager extends DummyJournalManager {