HDFS-7185. The active NameNode will not accept an fsimage sent from the standby during rolling upgrade. Contributed by Jing Zhao.

This commit is contained in:
Jing Zhao 2014-10-15 10:27:30 -07:00
parent 1179a0cb07
commit e68fef4d81
7 changed files with 86 additions and 25 deletions

View File

@ -604,6 +604,9 @@ Release 2.6.0 - UNRELEASED
HDFS-7237. The command "hdfs namenode -rollingUpgrade" throws HDFS-7237. The command "hdfs namenode -rollingUpgrade" throws
ArrayIndexOutOfBoundsException. (szetszwo) ArrayIndexOutOfBoundsException. (szetszwo)
HDFS-7185. The active NameNode will not accept an fsimage sent from the
standby during rolling upgrade. (jing9)
BREAKDOWN OF HDFS-6134 AND HADOOP-10150 SUBTASKS AND RELATED JIRAS BREAKDOWN OF HDFS-6134 AND HADOOP-10150 SUBTASKS AND RELATED JIRAS
HDFS-6387. HDFS CLI admin tool for creating & deleting an HDFS-6387. HDFS CLI admin tool for creating & deleting an

View File

@ -322,7 +322,7 @@ public class FSImage implements Closeable {
if (curState != StorageState.NOT_FORMATTED if (curState != StorageState.NOT_FORMATTED
&& startOpt != StartupOption.ROLLBACK) { && startOpt != StartupOption.ROLLBACK) {
// read and verify consistency with other directories // read and verify consistency with other directories
storage.readProperties(sd); storage.readProperties(sd, startOpt);
isFormatted = true; isFormatted = true;
} }
if (startOpt == StartupOption.IMPORT && isFormatted) if (startOpt == StartupOption.IMPORT && isFormatted)
@ -563,7 +563,7 @@ public class FSImage implements Closeable {
assert editLog != null : "editLog must be initialized"; assert editLog != null : "editLog must be initialized";
editLog.openForWrite(); editLog.openForWrite();
storage.writeTransactionIdFileToStorage(editLog.getCurSegmentTxId()); storage.writeTransactionIdFileToStorage(editLog.getCurSegmentTxId());
}; }
/** /**
* Toss the current image and namesystem, reloading from the specified * Toss the current image and namesystem, reloading from the specified
@ -572,7 +572,7 @@ public class FSImage implements Closeable {
void reloadFromImageFile(File file, FSNamesystem target) throws IOException { void reloadFromImageFile(File file, FSNamesystem target) throws IOException {
target.clear(); target.clear();
LOG.debug("Reloading namespace from " + file); LOG.debug("Reloading namespace from " + file);
loadFSImage(file, target, null); loadFSImage(file, target, null, false);
} }
/** /**
@ -603,7 +603,8 @@ public class FSImage implements Closeable {
// otherwise we can load from both IMAGE and IMAGE_ROLLBACK // otherwise we can load from both IMAGE and IMAGE_ROLLBACK
nnfs = EnumSet.of(NameNodeFile.IMAGE, NameNodeFile.IMAGE_ROLLBACK); nnfs = EnumSet.of(NameNodeFile.IMAGE, NameNodeFile.IMAGE_ROLLBACK);
} }
final FSImageStorageInspector inspector = storage.readAndInspectDirs(nnfs); final FSImageStorageInspector inspector = storage
.readAndInspectDirs(nnfs, startOpt);
isUpgradeFinalized = inspector.isUpgradeFinalized(); isUpgradeFinalized = inspector.isUpgradeFinalized();
List<FSImageFile> imageFiles = inspector.getLatestImages(); List<FSImageFile> imageFiles = inspector.getLatestImages();
@ -659,7 +660,7 @@ public class FSImage implements Closeable {
for (int i = 0; i < imageFiles.size(); i++) { for (int i = 0; i < imageFiles.size(); i++) {
try { try {
imageFile = imageFiles.get(i); imageFile = imageFiles.get(i);
loadFSImageFile(target, recovery, imageFile); loadFSImageFile(target, recovery, imageFile, startOpt);
break; break;
} catch (IOException ioe) { } catch (IOException ioe) {
LOG.error("Failed to load image from " + imageFile, ioe); LOG.error("Failed to load image from " + imageFile, ioe);
@ -712,16 +713,18 @@ public class FSImage implements Closeable {
} }
void loadFSImageFile(FSNamesystem target, MetaRecoveryContext recovery, void loadFSImageFile(FSNamesystem target, MetaRecoveryContext recovery,
FSImageFile imageFile) throws IOException { FSImageFile imageFile, StartupOption startupOption) throws IOException {
LOG.debug("Planning to load image :\n" + imageFile); LOG.debug("Planning to load image :\n" + imageFile);
StorageDirectory sdForProperties = imageFile.sd; StorageDirectory sdForProperties = imageFile.sd;
storage.readProperties(sdForProperties); storage.readProperties(sdForProperties, startupOption);
if (NameNodeLayoutVersion.supports( if (NameNodeLayoutVersion.supports(
LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) { LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) {
// For txid-based layout, we should have a .md5 file // For txid-based layout, we should have a .md5 file
// next to the image file // next to the image file
loadFSImage(imageFile.getFile(), target, recovery); boolean isRollingRollback = RollingUpgradeStartupOption.ROLLBACK
.matches(startupOption);
loadFSImage(imageFile.getFile(), target, recovery, isRollingRollback);
} else if (NameNodeLayoutVersion.supports( } else if (NameNodeLayoutVersion.supports(
LayoutVersion.Feature.FSIMAGE_CHECKSUM, getLayoutVersion())) { LayoutVersion.Feature.FSIMAGE_CHECKSUM, getLayoutVersion())) {
// In 0.22, we have the checksum stored in the VERSION file. // In 0.22, we have the checksum stored in the VERSION file.
@ -733,10 +736,11 @@ public class FSImage implements Closeable {
NNStorage.DEPRECATED_MESSAGE_DIGEST_PROPERTY + NNStorage.DEPRECATED_MESSAGE_DIGEST_PROPERTY +
" not set for storage directory " + sdForProperties.getRoot()); " not set for storage directory " + sdForProperties.getRoot());
} }
loadFSImage(imageFile.getFile(), new MD5Hash(md5), target, recovery); loadFSImage(imageFile.getFile(), new MD5Hash(md5), target, recovery,
false);
} else { } else {
// We don't have any record of the md5sum // We don't have any record of the md5sum
loadFSImage(imageFile.getFile(), null, target, recovery); loadFSImage(imageFile.getFile(), null, target, recovery, false);
} }
} }
@ -894,13 +898,15 @@ public class FSImage implements Closeable {
* it against the MD5 sum stored in its associated .md5 file. * it against the MD5 sum stored in its associated .md5 file.
*/ */
private void loadFSImage(File imageFile, FSNamesystem target, private void loadFSImage(File imageFile, FSNamesystem target,
MetaRecoveryContext recovery) throws IOException { MetaRecoveryContext recovery, boolean requireSameLayoutVersion)
throws IOException {
MD5Hash expectedMD5 = MD5FileUtils.readStoredMd5ForFile(imageFile); MD5Hash expectedMD5 = MD5FileUtils.readStoredMd5ForFile(imageFile);
if (expectedMD5 == null) { if (expectedMD5 == null) {
throw new IOException("No MD5 file found corresponding to image file " throw new IOException("No MD5 file found corresponding to image file "
+ imageFile); + imageFile);
} }
loadFSImage(imageFile, expectedMD5, target, recovery); loadFSImage(imageFile, expectedMD5, target, recovery,
requireSameLayoutVersion);
} }
/** /**
@ -908,13 +914,14 @@ public class FSImage implements Closeable {
* filenames and blocks. * filenames and blocks.
*/ */
private void loadFSImage(File curFile, MD5Hash expectedMd5, private void loadFSImage(File curFile, MD5Hash expectedMd5,
FSNamesystem target, MetaRecoveryContext recovery) throws IOException { FSNamesystem target, MetaRecoveryContext recovery,
boolean requireSameLayoutVersion) throws IOException {
// BlockPoolId is required when the FsImageLoader loads the rolling upgrade // BlockPoolId is required when the FsImageLoader loads the rolling upgrade
// information. Make sure the ID is properly set. // information. Make sure the ID is properly set.
target.setBlockPoolId(this.getBlockPoolID()); target.setBlockPoolId(this.getBlockPoolID());
FSImageFormat.LoaderDelegator loader = FSImageFormat.newLoader(conf, target); FSImageFormat.LoaderDelegator loader = FSImageFormat.newLoader(conf, target);
loader.load(curFile); loader.load(curFile, requireSameLayoutVersion);
// Check that the image digest we loaded matches up with what // Check that the image digest we loaded matches up with what
// we expected // we expected
@ -1033,7 +1040,7 @@ public class FSImage implements Closeable {
} }
/** /**
* @see #saveNamespace(FSNamesystem, Canceler) * @see #saveNamespace(FSNamesystem, NameNodeFile, Canceler)
*/ */
public synchronized void saveNamespace(FSNamesystem source) public synchronized void saveNamespace(FSNamesystem source)
throws IOException { throws IOException {
@ -1072,7 +1079,7 @@ public class FSImage implements Closeable {
} }
/** /**
* @see #saveFSImageInAllDirs(FSNamesystem, long, Canceler) * @see #saveFSImageInAllDirs(FSNamesystem, NameNodeFile, long, Canceler)
*/ */
protected synchronized void saveFSImageInAllDirs(FSNamesystem source, long txid) protected synchronized void saveFSImageInAllDirs(FSNamesystem source, long txid)
throws IOException { throws IOException {

View File

@ -210,7 +210,8 @@ public class FSImageFormat {
return impl.getLoadedImageTxId(); return impl.getLoadedImageTxId();
} }
public void load(File file) throws IOException { public void load(File file, boolean requireSameLayoutVersion)
throws IOException {
Preconditions.checkState(impl == null, "Image already loaded!"); Preconditions.checkState(impl == null, "Image already loaded!");
FileInputStream is = null; FileInputStream is = null;
@ -220,7 +221,7 @@ public class FSImageFormat {
IOUtils.readFully(is, magic, 0, magic.length); IOUtils.readFully(is, magic, 0, magic.length);
if (Arrays.equals(magic, FSImageUtil.MAGIC_HEADER)) { if (Arrays.equals(magic, FSImageUtil.MAGIC_HEADER)) {
FSImageFormatProtobuf.Loader loader = new FSImageFormatProtobuf.Loader( FSImageFormatProtobuf.Loader loader = new FSImageFormatProtobuf.Loader(
conf, fsn); conf, fsn, requireSameLayoutVersion);
impl = loader; impl = loader;
loader.load(file); loader.load(file);
} else { } else {
@ -228,7 +229,6 @@ public class FSImageFormat {
impl = loader; impl = loader;
loader.load(file); loader.load(file);
} }
} finally { } finally {
IOUtils.cleanup(LOG, is); IOUtils.cleanup(LOG, is);
} }

View File

@ -42,9 +42,12 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection;
@ -139,11 +142,19 @@ public final class FSImageFormatProtobuf {
private MD5Hash imgDigest; private MD5Hash imgDigest;
/** The transaction ID of the last edit represented by the loaded file */ /** The transaction ID of the last edit represented by the loaded file */
private long imgTxId; private long imgTxId;
/**
* Whether the image's layout version must be the same with
* {@link HdfsConstants#NAMENODE_LAYOUT_VERSION}. This is only set to true
* when we're doing (rollingUpgrade rollback).
*/
private final boolean requireSameLayoutVersion;
Loader(Configuration conf, FSNamesystem fsn) { Loader(Configuration conf, FSNamesystem fsn,
boolean requireSameLayoutVersion) {
this.conf = conf; this.conf = conf;
this.fsn = fsn; this.fsn = fsn;
this.ctx = new LoaderContext(); this.ctx = new LoaderContext();
this.requireSameLayoutVersion = requireSameLayoutVersion;
} }
@Override @Override
@ -181,6 +192,12 @@ public final class FSImageFormatProtobuf {
throw new IOException("Unrecognized file format"); throw new IOException("Unrecognized file format");
} }
FileSummary summary = FSImageUtil.loadSummary(raFile); FileSummary summary = FSImageUtil.loadSummary(raFile);
if (requireSameLayoutVersion && summary.getLayoutVersion() !=
HdfsConstants.NAMENODE_LAYOUT_VERSION) {
throw new IOException("Image version " + summary.getLayoutVersion() +
" is not equal to the software version " +
HdfsConstants.NAMENODE_LAYOUT_VERSION);
}
FileChannel channel = fin.getChannel(); FileChannel channel = fin.getChannel();

View File

@ -998,7 +998,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
MetaRecoveryContext recovery = startOpt.createRecoveryContext(); MetaRecoveryContext recovery = startOpt.createRecoveryContext();
final boolean staleImage final boolean staleImage
= fsImage.recoverTransitionRead(startOpt, this, recovery); = fsImage.recoverTransitionRead(startOpt, this, recovery);
if (RollingUpgradeStartupOption.ROLLBACK.matches(startOpt)) { if (RollingUpgradeStartupOption.ROLLBACK.matches(startOpt) ||
RollingUpgradeStartupOption.DOWNGRADE.matches(startOpt)) {
rollingUpgradeInfo = null; rollingUpgradeInfo = null;
} }
final boolean needToSave = staleImage && !haEnabled && !isRollingUpgrade(); final boolean needToSave = staleImage && !haEnabled && !isRollingUpgrade();
@ -1008,6 +1009,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
if (needToSave) { if (needToSave) {
fsImage.saveNamespace(this); fsImage.saveNamespace(this);
} else { } else {
updateStorageVersionForRollingUpgrade(fsImage.getLayoutVersion(),
startOpt);
// No need to save, so mark the phase done. // No need to save, so mark the phase done.
StartupProgress prog = NameNode.getStartupProgress(); StartupProgress prog = NameNode.getStartupProgress();
prog.beginPhase(Phase.SAVING_CHECKPOINT); prog.beginPhase(Phase.SAVING_CHECKPOINT);
@ -1029,6 +1032,18 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
imageLoadComplete(); imageLoadComplete();
} }
private void updateStorageVersionForRollingUpgrade(final long layoutVersion,
StartupOption startOpt) throws IOException {
boolean rollingStarted = RollingUpgradeStartupOption.STARTED
.matches(startOpt) && layoutVersion > HdfsConstants
.NAMENODE_LAYOUT_VERSION;
boolean rollingRollback = RollingUpgradeStartupOption.ROLLBACK
.matches(startOpt);
if (rollingRollback || rollingStarted) {
fsImage.updateStorageVersion();
}
}
private void startSecretManager() { private void startSecretManager() {
if (dtSecretManager != null) { if (dtSecretManager != null) {
try { try {

View File

@ -39,9 +39,11 @@ import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.LayoutVersion; import org.apache.hadoop.hdfs.protocol.LayoutVersion;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException; import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.StorageErrorReporter; import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
import org.apache.hadoop.hdfs.server.common.Util; import org.apache.hadoop.hdfs.server.common.Util;
@ -620,6 +622,23 @@ public class NNStorage extends Storage implements Closeable,
setDeprecatedPropertiesForUpgrade(props); setDeprecatedPropertiesForUpgrade(props);
} }
void readProperties(StorageDirectory sd, StartupOption startupOption)
throws IOException {
Properties props = readPropertiesFile(sd.getVersionFile());
if (HdfsServerConstants.RollingUpgradeStartupOption.ROLLBACK.matches
(startupOption)) {
int lv = Integer.parseInt(getProperty(props, sd, "layoutVersion"));
if (lv > getServiceLayoutVersion()) {
// we should not use a newer version for rollingUpgrade rollback
throw new IncorrectVersionException(getServiceLayoutVersion(), lv,
"storage directory " + sd.getRoot().getAbsolutePath());
}
props.setProperty("layoutVersion",
Integer.toString(HdfsConstants.NAMENODE_LAYOUT_VERSION));
}
setFieldsFromProperties(props, sd);
}
/** /**
* Pull any properties out of the VERSION file that are from older * Pull any properties out of the VERSION file that are from older
* versions of HDFS and only necessary during upgrade. * versions of HDFS and only necessary during upgrade.
@ -1002,8 +1021,8 @@ public class NNStorage extends Storage implements Closeable,
* <b>Note:</b> this can mutate the storage info fields (ctime, version, etc). * <b>Note:</b> this can mutate the storage info fields (ctime, version, etc).
* @throws IOException if no valid storage dirs are found or no valid layout version * @throws IOException if no valid storage dirs are found or no valid layout version
*/ */
FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes) FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes,
throws IOException { StartupOption startupOption) throws IOException {
Integer layoutVersion = null; Integer layoutVersion = null;
boolean multipleLV = false; boolean multipleLV = false;
StringBuilder layoutVersions = new StringBuilder(); StringBuilder layoutVersions = new StringBuilder();
@ -1016,7 +1035,7 @@ public class NNStorage extends Storage implements Closeable,
FSImage.LOG.warn("Storage directory " + sd + " contains no VERSION file. Skipping..."); FSImage.LOG.warn("Storage directory " + sd + " contains no VERSION file. Skipping...");
continue; continue;
} }
readProperties(sd); // sets layoutVersion readProperties(sd, startupOption); // sets layoutVersion
int lv = getLayoutVersion(); int lv = getLayoutVersion();
if (layoutVersion == null) { if (layoutVersion == null) {
layoutVersion = Integer.valueOf(lv); layoutVersion = Integer.valueOf(lv);

View File

@ -157,7 +157,7 @@ public class TestFSImageWithSnapshot {
fsn.writeLock(); fsn.writeLock();
fsn.getFSDirectory().writeLock(); fsn.getFSDirectory().writeLock();
try { try {
loader.load(imageFile); loader.load(imageFile, false);
FSImage.updateCountForQuota( FSImage.updateCountForQuota(
INodeDirectory.valueOf(fsn.getFSDirectory().getINode("/"), "/")); INodeDirectory.valueOf(fsn.getFSDirectory().getINode("/"), "/"));
} finally { } finally {