HDFS-6000. Avoid saving namespace when starting rolling upgrade. Contributed by Jing Zhao.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-5535@1571840 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
abc9a6dad5
commit
e3d2e4c156
|
@ -97,3 +97,5 @@ HDFS-5535 subtasks:
|
||||||
HDFS-5924. Utilize OOB upgrade message processing for writes. (kihwal)
|
HDFS-5924. Utilize OOB upgrade message processing for writes. (kihwal)
|
||||||
|
|
||||||
HDFS-5498. Improve datanode startup time. (kihwal)
|
HDFS-5498. Improve datanode startup time. (kihwal)
|
||||||
|
|
||||||
|
HDFS-6000. Avoid saving namespace when starting rolling upgrade. (jing9)
|
||||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.hadoop.classification.InterfaceStability;
|
||||||
public class RollingUpgradeInfo extends RollingUpgradeStatus {
|
public class RollingUpgradeInfo extends RollingUpgradeStatus {
|
||||||
private final long startTime;
|
private final long startTime;
|
||||||
private final long finalizeTime;
|
private final long finalizeTime;
|
||||||
private final boolean createdRollbackImages;
|
private boolean createdRollbackImages;
|
||||||
|
|
||||||
public RollingUpgradeInfo(String blockPoolId, boolean createdRollbackImages,
|
public RollingUpgradeInfo(String blockPoolId, boolean createdRollbackImages,
|
||||||
long startTime, long finalizeTime) {
|
long startTime, long finalizeTime) {
|
||||||
|
@ -44,6 +44,10 @@ public class RollingUpgradeInfo extends RollingUpgradeStatus {
|
||||||
return createdRollbackImages;
|
return createdRollbackImages;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setCreatedRollbackImages(boolean created) {
|
||||||
|
this.createdRollbackImages = created;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isStarted() {
|
public boolean isStarted() {
|
||||||
return startTime != 0;
|
return startTime != 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -93,6 +93,12 @@ public final class HdfsServerConstants {
|
||||||
== RollingUpgradeStartupOption.ROLLBACK;
|
== RollingUpgradeStartupOption.ROLLBACK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static boolean isRollingUpgradeDowngrade(StartupOption option) {
|
||||||
|
return option == ROLLINGUPGRADE
|
||||||
|
&& option.getRollingUpgradeStartupOption()
|
||||||
|
== RollingUpgradeStartupOption.DOWNGRADE;
|
||||||
|
}
|
||||||
|
|
||||||
private final String name;
|
private final String name;
|
||||||
|
|
||||||
// Used only with format and upgrade options
|
// Used only with format and upgrade options
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.hdfs.DFSUtil;
|
import org.apache.hadoop.hdfs.DFSUtil;
|
||||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
|
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.CheckpointCommand;
|
import org.apache.hadoop.hdfs.server.protocol.CheckpointCommand;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
|
import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
|
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
|
||||||
|
@ -220,9 +221,9 @@ class Checkpointer extends Daemon {
|
||||||
LOG.info("Unable to roll forward using only logs. Downloading " +
|
LOG.info("Unable to roll forward using only logs. Downloading " +
|
||||||
"image with txid " + sig.mostRecentCheckpointTxId);
|
"image with txid " + sig.mostRecentCheckpointTxId);
|
||||||
MD5Hash downloadedHash = TransferFsImage.downloadImageToStorage(
|
MD5Hash downloadedHash = TransferFsImage.downloadImageToStorage(
|
||||||
backupNode.nnHttpAddress, sig.mostRecentCheckpointTxId,
|
backupNode.nnHttpAddress, NameNodeFile.IMAGE,
|
||||||
bnStorage, true);
|
sig.mostRecentCheckpointTxId, bnStorage, true);
|
||||||
bnImage.saveDigestAndRenameCheckpointImage(
|
bnImage.saveDigestAndRenameCheckpointImage(NameNodeFile.IMAGE,
|
||||||
sig.mostRecentCheckpointTxId, downloadedHash);
|
sig.mostRecentCheckpointTxId, downloadedHash);
|
||||||
lastApplied = sig.mostRecentCheckpointTxId;
|
lastApplied = sig.mostRecentCheckpointTxId;
|
||||||
needReloadImage = true;
|
needReloadImage = true;
|
||||||
|
@ -240,7 +241,8 @@ class Checkpointer extends Daemon {
|
||||||
|
|
||||||
if(needReloadImage) {
|
if(needReloadImage) {
|
||||||
LOG.info("Loading image with txid " + sig.mostRecentCheckpointTxId);
|
LOG.info("Loading image with txid " + sig.mostRecentCheckpointTxId);
|
||||||
File file = bnStorage.findImageFile(sig.mostRecentCheckpointTxId);
|
File file = bnStorage.findImageFile(NameNodeFile.IMAGE,
|
||||||
|
sig.mostRecentCheckpointTxId);
|
||||||
bnImage.reloadFromImageFile(file, backupNode.getNamesystem());
|
bnImage.reloadFromImageFile(file, backupNode.getNamesystem());
|
||||||
}
|
}
|
||||||
rollForwardByApplyingLogs(manifest, bnImage, backupNode.getNamesystem());
|
rollForwardByApplyingLogs(manifest, bnImage, backupNode.getNamesystem());
|
||||||
|
@ -263,7 +265,7 @@ class Checkpointer extends Daemon {
|
||||||
if(cpCmd.needToReturnImage()) {
|
if(cpCmd.needToReturnImage()) {
|
||||||
TransferFsImage.uploadImageFromStorage(
|
TransferFsImage.uploadImageFromStorage(
|
||||||
backupNode.nnHttpAddress, getImageListenAddress(),
|
backupNode.nnHttpAddress, getImageListenAddress(),
|
||||||
bnStorage, txid);
|
bnStorage, NameNodeFile.IMAGE, txid);
|
||||||
}
|
}
|
||||||
|
|
||||||
getRemoteNamenodeProxy().endCheckpoint(backupNode.getRegistration(), sig);
|
getRemoteNamenodeProxy().endCheckpoint(backupNode.getRegistration(), sig);
|
||||||
|
|
|
@ -727,10 +727,10 @@ public class FSEditLogLoader {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// start rolling upgrade
|
||||||
// save namespace if there is no rollback image existing
|
|
||||||
final long startTime = ((RollingUpgradeOp) op).getTime();
|
final long startTime = ((RollingUpgradeOp) op).getTime();
|
||||||
fsNamesys.startRollingUpgradeInternal(startTime, op.txid - 2);
|
fsNamesys.startRollingUpgradeInternal(startTime);
|
||||||
|
fsNamesys.triggerRollbackCheckpoint();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case OP_ROLLING_UPGRADE_FINALIZE: {
|
case OP_ROLLING_UPGRADE_FINALIZE: {
|
||||||
|
|
|
@ -21,11 +21,13 @@ import static org.apache.hadoop.util.Time.now;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.EnumSet;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -328,18 +330,19 @@ public class FSImage implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return true if there is rollback fsimage (for rolling upgrade) for the
|
* @return true if there is rollback fsimage (for rolling upgrade) in NameNode
|
||||||
* given txid in storage.
|
* directory.
|
||||||
*/
|
*/
|
||||||
boolean hasRollbackFSImage(long txid) {
|
public boolean hasRollbackFSImage() throws IOException {
|
||||||
for (StorageDirectory sd : storage.dirIterable(NameNodeDirType.IMAGE)) {
|
final FSImageStorageInspector inspector = new FSImageTransactionalStorageInspector(
|
||||||
final File rollbackImageFile = NNStorage.getStorageFile(sd,
|
EnumSet.of(NameNodeFile.IMAGE_ROLLBACK));
|
||||||
NameNodeFile.IMAGE_ROLLBACK, txid);
|
storage.inspectStorageDirs(inspector);
|
||||||
if (rollbackImageFile.exists()) {
|
try {
|
||||||
return true;
|
List<FSImageFile> images = inspector.getLatestImages();
|
||||||
}
|
return images != null && !images.isEmpty();
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void doUpgrade(FSNamesystem target) throws IOException {
|
void doUpgrade(FSNamesystem target) throws IOException {
|
||||||
|
@ -566,9 +569,15 @@ public class FSImage implements Closeable {
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final boolean rollingRollback = StartupOption
|
final boolean rollingRollback = StartupOption
|
||||||
.isRollingUpgradeRollback(startOpt);
|
.isRollingUpgradeRollback(startOpt);
|
||||||
final NameNodeFile nnf = rollingRollback ? NameNodeFile.IMAGE_ROLLBACK
|
final EnumSet<NameNodeFile> nnfs;
|
||||||
: NameNodeFile.IMAGE;
|
if (rollingRollback) {
|
||||||
final FSImageStorageInspector inspector = storage.readAndInspectDirs(nnf);
|
// if it is rollback of rolling upgrade, only load from the rollback image
|
||||||
|
nnfs = EnumSet.of(NameNodeFile.IMAGE_ROLLBACK);
|
||||||
|
} else {
|
||||||
|
// otherwise we can load from both IMAGE and IMAGE_ROLLBACK
|
||||||
|
nnfs = EnumSet.of(NameNodeFile.IMAGE, NameNodeFile.IMAGE_ROLLBACK);
|
||||||
|
}
|
||||||
|
final FSImageStorageInspector inspector = storage.readAndInspectDirs(nnfs);
|
||||||
|
|
||||||
isUpgradeFinalized = inspector.isUpgradeFinalized();
|
isUpgradeFinalized = inspector.isUpgradeFinalized();
|
||||||
List<FSImageFile> imageFiles = inspector.getLatestImages();
|
List<FSImageFile> imageFiles = inspector.getLatestImages();
|
||||||
|
@ -643,6 +652,10 @@ public class FSImage implements Closeable {
|
||||||
long txnsAdvanced = loadEdits(editStreams, target, startOpt, recovery);
|
long txnsAdvanced = loadEdits(editStreams, target, startOpt, recovery);
|
||||||
needToSave |= needsResaveBasedOnStaleCheckpoint(imageFile.getFile(),
|
needToSave |= needsResaveBasedOnStaleCheckpoint(imageFile.getFile(),
|
||||||
txnsAdvanced);
|
txnsAdvanced);
|
||||||
|
if (StartupOption.isRollingUpgradeDowngrade(startOpt)) {
|
||||||
|
// purge rollback image if it is downgrade
|
||||||
|
archivalManager.purgeCheckpoints(NameNodeFile.IMAGE_ROLLBACK);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// Trigger the rollback for rolling upgrade. Here lastAppliedTxId equals
|
// Trigger the rollback for rolling upgrade. Here lastAppliedTxId equals
|
||||||
// to the last txid in rollback fsimage.
|
// to the last txid in rollback fsimage.
|
||||||
|
@ -973,10 +986,10 @@ public class FSImage implements Closeable {
|
||||||
/**
|
/**
|
||||||
* Save the contents of the FS image to a new image file in each of the
|
* Save the contents of the FS image to a new image file in each of the
|
||||||
* current storage directories.
|
* current storage directories.
|
||||||
* @param canceler
|
* @param canceler
|
||||||
*/
|
*/
|
||||||
public synchronized void saveNamespace(FSNamesystem source,
|
public synchronized void saveNamespace(FSNamesystem source, NameNodeFile nnf,
|
||||||
NameNodeFile nnf, Canceler canceler) throws IOException {
|
Canceler canceler) throws IOException {
|
||||||
assert editLog != null : "editLog must be initialized";
|
assert editLog != null : "editLog must be initialized";
|
||||||
LOG.info("Save namespace ...");
|
LOG.info("Save namespace ...");
|
||||||
storage.attemptRestoreRemovedStorage();
|
storage.attemptRestoreRemovedStorage();
|
||||||
|
@ -1222,13 +1235,13 @@ public class FSImage implements Closeable {
|
||||||
* renames the image from fsimage_N.ckpt to fsimage_N and also
|
* renames the image from fsimage_N.ckpt to fsimage_N and also
|
||||||
* saves the related .md5 file into place.
|
* saves the related .md5 file into place.
|
||||||
*/
|
*/
|
||||||
public synchronized void saveDigestAndRenameCheckpointImage(
|
public synchronized void saveDigestAndRenameCheckpointImage(NameNodeFile nnf,
|
||||||
long txid, MD5Hash digest) throws IOException {
|
long txid, MD5Hash digest) throws IOException {
|
||||||
// Write and rename MD5 file
|
// Write and rename MD5 file
|
||||||
List<StorageDirectory> badSds = Lists.newArrayList();
|
List<StorageDirectory> badSds = Lists.newArrayList();
|
||||||
|
|
||||||
for (StorageDirectory sd : storage.dirIterable(NameNodeDirType.IMAGE)) {
|
for (StorageDirectory sd : storage.dirIterable(NameNodeDirType.IMAGE)) {
|
||||||
File imageFile = NNStorage.getImageFile(sd, txid);
|
File imageFile = NNStorage.getImageFile(sd, nnf, txid);
|
||||||
try {
|
try {
|
||||||
MD5FileUtils.saveMD5File(imageFile, digest);
|
MD5FileUtils.saveMD5File(imageFile, digest);
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
|
@ -1240,7 +1253,7 @@ public class FSImage implements Closeable {
|
||||||
CheckpointFaultInjector.getInstance().afterMD5Rename();
|
CheckpointFaultInjector.getInstance().afterMD5Rename();
|
||||||
|
|
||||||
// Rename image from tmp file
|
// Rename image from tmp file
|
||||||
renameCheckpoint(txid, NameNodeFile.IMAGE_NEW, NameNodeFile.IMAGE, false);
|
renameCheckpoint(txid, NameNodeFile.IMAGE_NEW, nnf, false);
|
||||||
// So long as this is the newest image available,
|
// So long as this is the newest image available,
|
||||||
// advertise it as such to other checkpointers
|
// advertise it as such to other checkpointers
|
||||||
// from now on
|
// from now on
|
||||||
|
|
|
@ -280,7 +280,10 @@ public final class FSImageFormatProtobuf {
|
||||||
fsn.setGenerationStampV1Limit(s.getGenstampV1Limit());
|
fsn.setGenerationStampV1Limit(s.getGenstampV1Limit());
|
||||||
fsn.setLastAllocatedBlockId(s.getLastAllocatedBlockId());
|
fsn.setLastAllocatedBlockId(s.getLastAllocatedBlockId());
|
||||||
imgTxId = s.getTransactionId();
|
imgTxId = s.getTransactionId();
|
||||||
if (s.hasRollingUpgradeStartTime()) {
|
if (s.hasRollingUpgradeStartTime()
|
||||||
|
&& fsn.getFSImage().hasRollbackFSImage()) {
|
||||||
|
// we set the rollingUpgradeInfo only when we make sure we have the
|
||||||
|
// rollback image
|
||||||
fsn.setRollingUpgradeInfo(true, s.getRollingUpgradeStartTime());
|
fsn.setRollingUpgradeInfo(true, s.getRollingUpgradeStartTime());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.EnumSet;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
|
@ -33,7 +34,9 @@ import org.apache.hadoop.fs.FileUtil;
|
||||||
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
|
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
|
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
|
||||||
|
|
||||||
import com.google.common.collect.ImmutableList;
|
import com.google.common.collect.ImmutableList;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
class FSImageTransactionalStorageInspector extends FSImageStorageInspector {
|
class FSImageTransactionalStorageInspector extends FSImageStorageInspector {
|
||||||
public static final Log LOG = LogFactory.getLog(
|
public static final Log LOG = LogFactory.getLog(
|
||||||
|
@ -45,14 +48,27 @@ class FSImageTransactionalStorageInspector extends FSImageStorageInspector {
|
||||||
List<FSImageFile> foundImages = new ArrayList<FSImageFile>();
|
List<FSImageFile> foundImages = new ArrayList<FSImageFile>();
|
||||||
private long maxSeenTxId = 0;
|
private long maxSeenTxId = 0;
|
||||||
|
|
||||||
private final Pattern namePattern;
|
private final List<Pattern> namePatterns = Lists.newArrayList();
|
||||||
|
|
||||||
FSImageTransactionalStorageInspector() {
|
FSImageTransactionalStorageInspector() {
|
||||||
this(NameNodeFile.IMAGE);
|
this(EnumSet.of(NameNodeFile.IMAGE));
|
||||||
}
|
}
|
||||||
|
|
||||||
FSImageTransactionalStorageInspector(NameNodeFile nnf) {
|
FSImageTransactionalStorageInspector(EnumSet<NameNodeFile> nnfs) {
|
||||||
namePattern = Pattern.compile(nnf.getName() + "_(\\d+)");
|
for (NameNodeFile nnf : nnfs) {
|
||||||
|
Pattern pattern = Pattern.compile(nnf.getName() + "_(\\d+)");
|
||||||
|
namePatterns.add(pattern);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Matcher matchPattern(String name) {
|
||||||
|
for (Pattern p : namePatterns) {
|
||||||
|
Matcher m = p.matcher(name);
|
||||||
|
if (m.matches()) {
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -88,8 +104,8 @@ class FSImageTransactionalStorageInspector extends FSImageStorageInspector {
|
||||||
String name = f.getName();
|
String name = f.getName();
|
||||||
|
|
||||||
// Check for fsimage_*
|
// Check for fsimage_*
|
||||||
Matcher imageMatch = namePattern.matcher(name);
|
Matcher imageMatch = this.matchPattern(name);
|
||||||
if (imageMatch.matches()) {
|
if (imageMatch != null) {
|
||||||
if (sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE)) {
|
if (sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE)) {
|
||||||
try {
|
try {
|
||||||
long txid = Long.valueOf(imageMatch.group(1));
|
long txid = Long.valueOf(imageMatch.group(1));
|
||||||
|
|
|
@ -885,6 +885,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
MetaRecoveryContext recovery = startOpt.createRecoveryContext();
|
MetaRecoveryContext recovery = startOpt.createRecoveryContext();
|
||||||
final boolean staleImage
|
final boolean staleImage
|
||||||
= fsImage.recoverTransitionRead(startOpt, this, recovery);
|
= fsImage.recoverTransitionRead(startOpt, this, recovery);
|
||||||
|
if (StartupOption.isRollingUpgradeRollback(startOpt)) {
|
||||||
|
rollingUpgradeInfo = null;
|
||||||
|
}
|
||||||
final boolean needToSave = staleImage && !haEnabled && !isRollingUpgrade();
|
final boolean needToSave = staleImage && !haEnabled && !isRollingUpgrade();
|
||||||
LOG.info("Need to save fs image? " + needToSave
|
LOG.info("Need to save fs image? " + needToSave
|
||||||
+ " (staleImage=" + staleImage + ", haEnabled=" + haEnabled
|
+ " (staleImage=" + staleImage + ", haEnabled=" + haEnabled
|
||||||
|
@ -1141,6 +1144,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called when the NN is in Standby state and the editlog tailer tails the
|
||||||
|
* OP_ROLLING_UPGRADE_START.
|
||||||
|
*/
|
||||||
|
void triggerRollbackCheckpoint() {
|
||||||
|
if (standbyCheckpointer != null) {
|
||||||
|
standbyCheckpointer.triggerRollbackCheckpoint();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Called while the NN is in Standby state, but just about to be
|
* Called while the NN is in Standby state, but just about to be
|
||||||
|
@ -7131,6 +7143,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
checkOperation(OperationCategory.READ);
|
checkOperation(OperationCategory.READ);
|
||||||
readLock();
|
readLock();
|
||||||
try {
|
try {
|
||||||
|
if (rollingUpgradeInfo != null) {
|
||||||
|
boolean hasRollbackImage = this.getFSImage().hasRollbackFSImage();
|
||||||
|
rollingUpgradeInfo.setCreatedRollbackImages(hasRollbackImage);
|
||||||
|
}
|
||||||
return rollingUpgradeInfo;
|
return rollingUpgradeInfo;
|
||||||
} finally {
|
} finally {
|
||||||
readUnlock();
|
readUnlock();
|
||||||
|
@ -7143,15 +7159,24 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
writeLock();
|
writeLock();
|
||||||
try {
|
try {
|
||||||
checkOperation(OperationCategory.WRITE);
|
checkOperation(OperationCategory.WRITE);
|
||||||
checkNameNodeSafeMode("Failed to start rolling upgrade");
|
long startTime = now();
|
||||||
startRollingUpgradeInternal(now(), -1);
|
if (!haEnabled) { // for non-HA, we require NN to be in safemode
|
||||||
|
startRollingUpgradeInternalForNonHA(startTime);
|
||||||
|
} else { // for HA, NN cannot be in safemode
|
||||||
|
checkNameNodeSafeMode("Failed to start rolling upgrade");
|
||||||
|
startRollingUpgradeInternal(startTime);
|
||||||
|
}
|
||||||
|
|
||||||
getEditLog().logStartRollingUpgrade(rollingUpgradeInfo.getStartTime());
|
getEditLog().logStartRollingUpgrade(rollingUpgradeInfo.getStartTime());
|
||||||
|
if (haEnabled) {
|
||||||
|
// roll the edit log to make sure the standby NameNode can tail
|
||||||
|
getFSImage().rollEditLog();
|
||||||
|
}
|
||||||
} finally {
|
} finally {
|
||||||
writeUnlock();
|
writeUnlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
getEditLog().logSync();
|
getEditLog().logSync();
|
||||||
|
|
||||||
if (auditLog.isInfoEnabled() && isExternalInvocation()) {
|
if (auditLog.isInfoEnabled() && isExternalInvocation()) {
|
||||||
logAuditEvent(true, "startRollingUpgrade", null, null, null);
|
logAuditEvent(true, "startRollingUpgrade", null, null, null);
|
||||||
}
|
}
|
||||||
|
@ -7160,19 +7185,35 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update internal state to indicate that a rolling upgrade is in progress.
|
* Update internal state to indicate that a rolling upgrade is in progress.
|
||||||
* Ootionally create a checkpoint before starting the RU.
|
|
||||||
* @param startTime
|
* @param startTime
|
||||||
*/
|
*/
|
||||||
void startRollingUpgradeInternal(long startTime, long txid)
|
void startRollingUpgradeInternal(long startTime)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
checkRollingUpgrade("start rolling upgrade");
|
checkRollingUpgrade("start rolling upgrade");
|
||||||
getFSImage().checkUpgrade(this);
|
getFSImage().checkUpgrade(this);
|
||||||
|
setRollingUpgradeInfo(false, startTime);
|
||||||
|
}
|
||||||
|
|
||||||
// if we have not made a rollback image, do it
|
/**
|
||||||
if (txid < 0 || !getFSImage().hasRollbackFSImage(txid)) {
|
* Update internal state to indicate that a rolling upgrade is in progress for
|
||||||
getFSImage().saveNamespace(this, NameNodeFile.IMAGE_ROLLBACK, null);
|
* non-HA setup. This requires the namesystem is in SafeMode and after doing a
|
||||||
LOG.info("Successfully saved namespace for preparing rolling upgrade.");
|
* checkpoint for rollback the namesystem will quit the safemode automatically
|
||||||
|
*/
|
||||||
|
private void startRollingUpgradeInternalForNonHA(long startTime)
|
||||||
|
throws IOException {
|
||||||
|
Preconditions.checkState(!haEnabled);
|
||||||
|
if (!isInSafeMode()) {
|
||||||
|
throw new IOException("Safe mode should be turned ON "
|
||||||
|
+ "in order to create namespace image.");
|
||||||
}
|
}
|
||||||
|
checkRollingUpgrade("start rolling upgrade");
|
||||||
|
getFSImage().checkUpgrade(this);
|
||||||
|
// in non-HA setup, we do an extra ckpt to generate a rollback image
|
||||||
|
getFSImage().saveNamespace(this, NameNodeFile.IMAGE_ROLLBACK, null);
|
||||||
|
LOG.info("Successfully saved namespace for preparing rolling upgrade.");
|
||||||
|
|
||||||
|
// leave SafeMode automatically
|
||||||
|
setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
|
||||||
setRollingUpgradeInfo(true, startTime);
|
setRollingUpgradeInfo(true, startTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7181,7 +7222,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
createdRollbackImages, startTime, 0L);
|
createdRollbackImages, startTime, 0L);
|
||||||
}
|
}
|
||||||
|
|
||||||
RollingUpgradeInfo getRollingUpgradeInfo() {
|
public void setCreatedRollbackImages(boolean created) {
|
||||||
|
if (rollingUpgradeInfo != null) {
|
||||||
|
rollingUpgradeInfo.setCreatedRollbackImages(created);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public RollingUpgradeInfo getRollingUpgradeInfo() {
|
||||||
return rollingUpgradeInfo;
|
return rollingUpgradeInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7232,7 +7279,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
|
|
||||||
final long startTime = rollingUpgradeInfo.getStartTime();
|
final long startTime = rollingUpgradeInfo.getStartTime();
|
||||||
rollingUpgradeInfo = null;
|
rollingUpgradeInfo = null;
|
||||||
return new RollingUpgradeInfo(blockPoolId, true, startTime, finalizeTime);
|
return new RollingUpgradeInfo(blockPoolId, false, startTime, finalizeTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
long addCacheDirective(CacheDirectiveInfo directive, EnumSet<CacheFlag> flags)
|
long addCacheDirective(CacheDirectiveInfo directive, EnumSet<CacheFlag> flags)
|
||||||
|
|
|
@ -78,7 +78,8 @@ public class GetImageServlet extends HttpServlet {
|
||||||
private static final String END_TXID_PARAM = "endTxId";
|
private static final String END_TXID_PARAM = "endTxId";
|
||||||
private static final String STORAGEINFO_PARAM = "storageInfo";
|
private static final String STORAGEINFO_PARAM = "storageInfo";
|
||||||
private static final String LATEST_FSIMAGE_VALUE = "latest";
|
private static final String LATEST_FSIMAGE_VALUE = "latest";
|
||||||
|
private static final String IMAGE_FILE_TYPE = "imageFile";
|
||||||
|
|
||||||
private static Set<Long> currentlyDownloadingCheckpoints =
|
private static Set<Long> currentlyDownloadingCheckpoints =
|
||||||
Collections.<Long>synchronizedSet(new HashSet<Long>());
|
Collections.<Long>synchronizedSet(new HashSet<Long>());
|
||||||
|
|
||||||
|
@ -87,7 +88,7 @@ public class GetImageServlet extends HttpServlet {
|
||||||
final HttpServletResponse response
|
final HttpServletResponse response
|
||||||
) throws ServletException, IOException {
|
) throws ServletException, IOException {
|
||||||
try {
|
try {
|
||||||
ServletContext context = getServletContext();
|
final ServletContext context = getServletContext();
|
||||||
final FSImage nnImage = NameNodeHttpServer.getFsImageFromContext(context);
|
final FSImage nnImage = NameNodeHttpServer.getFsImageFromContext(context);
|
||||||
final GetImageParams parsedParams = new GetImageParams(request, response);
|
final GetImageParams parsedParams = new GetImageParams(request, response);
|
||||||
final Configuration conf = (Configuration) context
|
final Configuration conf = (Configuration) context
|
||||||
|
@ -121,13 +122,14 @@ public class GetImageServlet extends HttpServlet {
|
||||||
public Void run() throws Exception {
|
public Void run() throws Exception {
|
||||||
if (parsedParams.isGetImage()) {
|
if (parsedParams.isGetImage()) {
|
||||||
long txid = parsedParams.getTxId();
|
long txid = parsedParams.getTxId();
|
||||||
|
final NameNodeFile nnf = parsedParams.getNameNodeFile();
|
||||||
File imageFile = null;
|
File imageFile = null;
|
||||||
String errorMessage = "Could not find image";
|
String errorMessage = "Could not find image";
|
||||||
if (parsedParams.shouldFetchLatest()) {
|
if (parsedParams.shouldFetchLatest()) {
|
||||||
imageFile = nnImage.getStorage().getHighestFsImageName();
|
imageFile = nnImage.getStorage().getHighestFsImageName();
|
||||||
} else {
|
} else {
|
||||||
errorMessage += " with txid " + txid;
|
errorMessage += " with txid " + txid;
|
||||||
imageFile = nnImage.getStorage().getFsImageName(txid);
|
imageFile = nnImage.getStorage().getFsImageName(txid, nnf);
|
||||||
}
|
}
|
||||||
if (imageFile == null) {
|
if (imageFile == null) {
|
||||||
throw new IOException(errorMessage);
|
throw new IOException(errorMessage);
|
||||||
|
@ -155,6 +157,7 @@ public class GetImageServlet extends HttpServlet {
|
||||||
}
|
}
|
||||||
} else if (parsedParams.isPutImage()) {
|
} else if (parsedParams.isPutImage()) {
|
||||||
final long txid = parsedParams.getTxId();
|
final long txid = parsedParams.getTxId();
|
||||||
|
final NameNodeFile nnf = parsedParams.getNameNodeFile();
|
||||||
|
|
||||||
if (! currentlyDownloadingCheckpoints.add(txid)) {
|
if (! currentlyDownloadingCheckpoints.add(txid)) {
|
||||||
response.sendError(HttpServletResponse.SC_CONFLICT,
|
response.sendError(HttpServletResponse.SC_CONFLICT,
|
||||||
|
@ -164,7 +167,7 @@ public class GetImageServlet extends HttpServlet {
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (nnImage.getStorage().findImageFile(txid) != null) {
|
if (nnImage.getStorage().findImageFile(nnf, txid) != null) {
|
||||||
response.sendError(HttpServletResponse.SC_CONFLICT,
|
response.sendError(HttpServletResponse.SC_CONFLICT,
|
||||||
"Another checkpointer already uploaded an checkpoint " +
|
"Another checkpointer already uploaded an checkpoint " +
|
||||||
"for txid " + txid);
|
"for txid " + txid);
|
||||||
|
@ -178,11 +181,15 @@ public class GetImageServlet extends HttpServlet {
|
||||||
|
|
||||||
long start = now();
|
long start = now();
|
||||||
// issue a HTTP get request to download the new fsimage
|
// issue a HTTP get request to download the new fsimage
|
||||||
MD5Hash downloadImageDigest =
|
MD5Hash downloadImageDigest = TransferFsImage
|
||||||
TransferFsImage.downloadImageToStorage(
|
.downloadImageToStorage(parsedParams.getInfoServer(conf),
|
||||||
parsedParams.getInfoServer(conf), txid,
|
nnf, txid, nnImage.getStorage(), true);
|
||||||
nnImage.getStorage(), true);
|
nnImage.saveDigestAndRenameCheckpointImage(nnf, txid,
|
||||||
nnImage.saveDigestAndRenameCheckpointImage(txid, downloadImageDigest);
|
downloadImageDigest);
|
||||||
|
if (nnf == NameNodeFile.IMAGE_ROLLBACK) {
|
||||||
|
NameNodeHttpServer.getNameNodeFromContext(context)
|
||||||
|
.getNamesystem().setCreatedRollbackImages(true);
|
||||||
|
}
|
||||||
|
|
||||||
if (metrics != null) { // Metrics non-null only when used inside name node
|
if (metrics != null) { // Metrics non-null only when used inside name node
|
||||||
long elapsed = now() - start;
|
long elapsed = now() - start;
|
||||||
|
@ -191,7 +198,7 @@ public class GetImageServlet extends HttpServlet {
|
||||||
|
|
||||||
// Now that we have a new checkpoint, we might be able to
|
// Now that we have a new checkpoint, we might be able to
|
||||||
// remove some old ones.
|
// remove some old ones.
|
||||||
nnImage.purgeOldStorage(NameNodeFile.IMAGE);
|
nnImage.purgeOldStorage(nnf);
|
||||||
} finally {
|
} finally {
|
||||||
currentlyDownloadingCheckpoints.remove(txid);
|
currentlyDownloadingCheckpoints.remove(txid);
|
||||||
}
|
}
|
||||||
|
@ -315,9 +322,10 @@ public class GetImageServlet extends HttpServlet {
|
||||||
return "getimage=1&" + TXID_PARAM + "=" + LATEST_FSIMAGE_VALUE;
|
return "getimage=1&" + TXID_PARAM + "=" + LATEST_FSIMAGE_VALUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static String getParamStringForImage(long txid,
|
static String getParamStringForImage(NameNodeFile nnf, long txid,
|
||||||
StorageInfo remoteStorageInfo) {
|
StorageInfo remoteStorageInfo) {
|
||||||
return "getimage=1&" + TXID_PARAM + "=" + txid
|
return "getimage=1&" + TXID_PARAM + "=" + txid
|
||||||
|
+ "&" + IMAGE_FILE_TYPE + "=" + nnf.name()
|
||||||
+ "&" + STORAGEINFO_PARAM + "=" +
|
+ "&" + STORAGEINFO_PARAM + "=" +
|
||||||
remoteStorageInfo.toColonSeparatedString();
|
remoteStorageInfo.toColonSeparatedString();
|
||||||
}
|
}
|
||||||
|
@ -330,7 +338,7 @@ public class GetImageServlet extends HttpServlet {
|
||||||
remoteStorageInfo.toColonSeparatedString();
|
remoteStorageInfo.toColonSeparatedString();
|
||||||
}
|
}
|
||||||
|
|
||||||
static String getParamStringToPutImage(long txid,
|
static String getParamStringToPutImage(NameNodeFile nnf, long txid,
|
||||||
URL url, Storage storage) {
|
URL url, Storage storage) {
|
||||||
InetSocketAddress imageListenAddress = NetUtils.createSocketAddr(url
|
InetSocketAddress imageListenAddress = NetUtils.createSocketAddr(url
|
||||||
.getAuthority());
|
.getAuthority());
|
||||||
|
@ -339,6 +347,7 @@ public class GetImageServlet extends HttpServlet {
|
||||||
: imageListenAddress.getHostName();
|
: imageListenAddress.getHostName();
|
||||||
return "putimage=1" +
|
return "putimage=1" +
|
||||||
"&" + TXID_PARAM + "=" + txid +
|
"&" + TXID_PARAM + "=" + txid +
|
||||||
|
"&" + IMAGE_FILE_TYPE + "=" + nnf.name() +
|
||||||
"&port=" + imageListenAddress.getPort() +
|
"&port=" + imageListenAddress.getPort() +
|
||||||
(machine != null ? "&machine=" + machine : "")
|
(machine != null ? "&machine=" + machine : "")
|
||||||
+ "&" + STORAGEINFO_PARAM + "=" +
|
+ "&" + STORAGEINFO_PARAM + "=" +
|
||||||
|
@ -352,6 +361,7 @@ public class GetImageServlet extends HttpServlet {
|
||||||
private boolean isPutImage;
|
private boolean isPutImage;
|
||||||
private int remoteport;
|
private int remoteport;
|
||||||
private String machineName;
|
private String machineName;
|
||||||
|
private NameNodeFile nnf;
|
||||||
private long startTxId, endTxId, txId;
|
private long startTxId, endTxId, txId;
|
||||||
private String storageInfoString;
|
private String storageInfoString;
|
||||||
private boolean fetchLatest;
|
private boolean fetchLatest;
|
||||||
|
@ -376,6 +386,9 @@ public class GetImageServlet extends HttpServlet {
|
||||||
isGetImage = true;
|
isGetImage = true;
|
||||||
try {
|
try {
|
||||||
txId = ServletUtil.parseLongParam(request, TXID_PARAM);
|
txId = ServletUtil.parseLongParam(request, TXID_PARAM);
|
||||||
|
String imageType = ServletUtil.getParameter(request, IMAGE_FILE_TYPE);
|
||||||
|
nnf = imageType == null ? NameNodeFile.IMAGE : NameNodeFile
|
||||||
|
.valueOf(imageType);
|
||||||
} catch (NumberFormatException nfe) {
|
} catch (NumberFormatException nfe) {
|
||||||
if (request.getParameter(TXID_PARAM).equals(LATEST_FSIMAGE_VALUE)) {
|
if (request.getParameter(TXID_PARAM).equals(LATEST_FSIMAGE_VALUE)) {
|
||||||
fetchLatest = true;
|
fetchLatest = true;
|
||||||
|
@ -390,6 +403,9 @@ public class GetImageServlet extends HttpServlet {
|
||||||
} else if (key.equals("putimage")) {
|
} else if (key.equals("putimage")) {
|
||||||
isPutImage = true;
|
isPutImage = true;
|
||||||
txId = ServletUtil.parseLongParam(request, TXID_PARAM);
|
txId = ServletUtil.parseLongParam(request, TXID_PARAM);
|
||||||
|
String imageType = ServletUtil.getParameter(request, IMAGE_FILE_TYPE);
|
||||||
|
nnf = imageType == null ? NameNodeFile.IMAGE : NameNodeFile
|
||||||
|
.valueOf(imageType);
|
||||||
} else if (key.equals("port")) {
|
} else if (key.equals("port")) {
|
||||||
remoteport = new Integer(val[0]).intValue();
|
remoteport = new Integer(val[0]).intValue();
|
||||||
} else if (key.equals("machine")) {
|
} else if (key.equals("machine")) {
|
||||||
|
@ -420,7 +436,12 @@ public class GetImageServlet extends HttpServlet {
|
||||||
Preconditions.checkState(isGetImage || isPutImage);
|
Preconditions.checkState(isGetImage || isPutImage);
|
||||||
return txId;
|
return txId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public NameNodeFile getNameNodeFile() {
|
||||||
|
Preconditions.checkState(isPutImage || isGetImage);
|
||||||
|
return nnf;
|
||||||
|
}
|
||||||
|
|
||||||
public long getStartTxId() {
|
public long getStartTxId() {
|
||||||
Preconditions.checkState(isGetEdit);
|
Preconditions.checkState(isGetEdit);
|
||||||
return startTxId;
|
return startTxId;
|
||||||
|
|
|
@ -25,6 +25,7 @@ import java.net.URI;
|
||||||
import java.net.UnknownHostException;
|
import java.net.UnknownHostException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.EnumSet;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -50,8 +51,8 @@ import org.apache.hadoop.io.IOUtils;
|
||||||
import org.apache.hadoop.net.DNS;
|
import org.apache.hadoop.net.DNS;
|
||||||
import org.apache.hadoop.util.Time;
|
import org.apache.hadoop.util.Time;
|
||||||
|
|
||||||
import com.google.common.base.Preconditions;
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -499,21 +500,24 @@ public class NNStorage extends Storage implements Closeable,
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the name of the image file.
|
* @return The first image file with the given txid and image type.
|
||||||
* @return The name of the first image file.
|
|
||||||
*/
|
*/
|
||||||
public File getFsImageName(long txid) {
|
public File getFsImageName(long txid, NameNodeFile nnf) {
|
||||||
StorageDirectory sd = null;
|
for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
|
||||||
for (Iterator<StorageDirectory> it =
|
it.hasNext();) {
|
||||||
dirIterator(NameNodeDirType.IMAGE); it.hasNext();) {
|
StorageDirectory sd = it.next();
|
||||||
sd = it.next();
|
File fsImage = getStorageFile(sd, nnf, txid);
|
||||||
File fsImage = getStorageFile(sd, NameNodeFile.IMAGE, txid);
|
if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
|
||||||
if(FileUtil.canRead(sd.getRoot()) && fsImage.exists())
|
|
||||||
return fsImage;
|
return fsImage;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public File getFsImageName(long txid) {
|
||||||
|
return getFsImageName(txid, NameNodeFile.IMAGE);
|
||||||
}
|
}
|
||||||
|
|
||||||
public File getHighestFsImageName() {
|
public File getHighestFsImageName() {
|
||||||
return getFsImageName(getMostRecentCheckpointTxId());
|
return getFsImageName(getMostRecentCheckpointTxId());
|
||||||
}
|
}
|
||||||
|
@ -697,12 +701,11 @@ public class NNStorage extends Storage implements Closeable,
|
||||||
return new File(sd.getCurrentDir(),
|
return new File(sd.getCurrentDir(),
|
||||||
getTemporaryEditsFileName(startTxId, endTxId, timestamp));
|
getTemporaryEditsFileName(startTxId, endTxId, timestamp));
|
||||||
}
|
}
|
||||||
|
|
||||||
static File getImageFile(StorageDirectory sd, long txid) {
|
static File getImageFile(StorageDirectory sd, NameNodeFile nnf, long txid) {
|
||||||
return new File(sd.getCurrentDir(),
|
return new File(sd.getCurrentDir(), getNameNodeFileName(nnf, txid));
|
||||||
getImageFileName(txid));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public static String getFinalizedEditsFileName(long startTxId, long endTxId) {
|
public static String getFinalizedEditsFileName(long startTxId, long endTxId) {
|
||||||
return String.format("%s_%019d-%019d", NameNodeFile.EDITS.getName(),
|
return String.format("%s_%019d-%019d", NameNodeFile.EDITS.getName(),
|
||||||
|
@ -730,12 +733,12 @@ public class NNStorage extends Storage implements Closeable,
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the first readable image file for the given txid, or null
|
* Return the first readable image file for the given txid and image type, or
|
||||||
* if no such image can be found
|
* null if no such image can be found
|
||||||
*/
|
*/
|
||||||
File findImageFile(long txid) {
|
File findImageFile(NameNodeFile nnf, long txid) {
|
||||||
return findFile(NameNodeDirType.IMAGE,
|
return findFile(NameNodeDirType.IMAGE,
|
||||||
getImageFileName(txid));
|
getNameNodeFileName(nnf, txid));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -980,7 +983,8 @@ public class NNStorage extends Storage implements Closeable,
|
||||||
* <b>Note:</b> this can mutate the storage info fields (ctime, version, etc).
|
* <b>Note:</b> this can mutate the storage info fields (ctime, version, etc).
|
||||||
* @throws IOException if no valid storage dirs are found or no valid layout version
|
* @throws IOException if no valid storage dirs are found or no valid layout version
|
||||||
*/
|
*/
|
||||||
FSImageStorageInspector readAndInspectDirs(NameNodeFile nnf) throws IOException {
|
FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes)
|
||||||
|
throws IOException {
|
||||||
Integer layoutVersion = null;
|
Integer layoutVersion = null;
|
||||||
boolean multipleLV = false;
|
boolean multipleLV = false;
|
||||||
StringBuilder layoutVersions = new StringBuilder();
|
StringBuilder layoutVersions = new StringBuilder();
|
||||||
|
@ -1017,7 +1021,7 @@ public class NNStorage extends Storage implements Closeable,
|
||||||
FSImageStorageInspector inspector;
|
FSImageStorageInspector inspector;
|
||||||
if (NameNodeLayoutVersion.supports(
|
if (NameNodeLayoutVersion.supports(
|
||||||
LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) {
|
LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) {
|
||||||
inspector = new FSImageTransactionalStorageInspector(nnf);
|
inspector = new FSImageTransactionalStorageInspector(fileTypes);
|
||||||
} else {
|
} else {
|
||||||
inspector = new FSImagePreTransactionalStorageInspector();
|
inspector = new FSImagePreTransactionalStorageInspector();
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
import java.util.EnumSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
@ -96,7 +97,7 @@ public class NNStorageRetentionManager {
|
||||||
void purgeCheckpoinsAfter(NameNodeFile nnf, long fromTxId)
|
void purgeCheckpoinsAfter(NameNodeFile nnf, long fromTxId)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
FSImageTransactionalStorageInspector inspector =
|
FSImageTransactionalStorageInspector inspector =
|
||||||
new FSImageTransactionalStorageInspector(nnf);
|
new FSImageTransactionalStorageInspector(EnumSet.of(nnf));
|
||||||
storage.inspectStorageDirs(inspector);
|
storage.inspectStorageDirs(inspector);
|
||||||
for (FSImageFile image : inspector.getFoundImages()) {
|
for (FSImageFile image : inspector.getFoundImages()) {
|
||||||
if (image.getCheckpointTxId() > fromTxId) {
|
if (image.getCheckpointTxId() > fromTxId) {
|
||||||
|
@ -107,7 +108,7 @@ public class NNStorageRetentionManager {
|
||||||
|
|
||||||
void purgeOldStorage(NameNodeFile nnf) throws IOException {
|
void purgeOldStorage(NameNodeFile nnf) throws IOException {
|
||||||
FSImageTransactionalStorageInspector inspector =
|
FSImageTransactionalStorageInspector inspector =
|
||||||
new FSImageTransactionalStorageInspector(nnf);
|
new FSImageTransactionalStorageInspector(EnumSet.of(nnf));
|
||||||
storage.inspectStorageDirs(inspector);
|
storage.inspectStorageDirs(inspector);
|
||||||
|
|
||||||
long minImageTxId = getImageTxIdToRetain(inspector);
|
long minImageTxId = getImageTxIdToRetain(inspector);
|
||||||
|
|
|
@ -445,8 +445,9 @@ public class SecondaryNameNode implements Runnable {
|
||||||
} else {
|
} else {
|
||||||
LOG.info("Image has changed. Downloading updated image from NN.");
|
LOG.info("Image has changed. Downloading updated image from NN.");
|
||||||
MD5Hash downloadedHash = TransferFsImage.downloadImageToStorage(
|
MD5Hash downloadedHash = TransferFsImage.downloadImageToStorage(
|
||||||
nnHostPort, sig.mostRecentCheckpointTxId, dstImage.getStorage(), true);
|
nnHostPort, NameNodeFile.IMAGE, sig.mostRecentCheckpointTxId,
|
||||||
dstImage.saveDigestAndRenameCheckpointImage(
|
dstImage.getStorage(), true);
|
||||||
|
dstImage.saveDigestAndRenameCheckpointImage(NameNodeFile.IMAGE,
|
||||||
sig.mostRecentCheckpointTxId, downloadedHash);
|
sig.mostRecentCheckpointTxId, downloadedHash);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -555,7 +556,7 @@ public class SecondaryNameNode implements Runnable {
|
||||||
//
|
//
|
||||||
long txid = checkpointImage.getLastAppliedTxId();
|
long txid = checkpointImage.getLastAppliedTxId();
|
||||||
TransferFsImage.uploadImageFromStorage(fsName, getImageListenAddress(),
|
TransferFsImage.uploadImageFromStorage(fsName, getImageListenAddress(),
|
||||||
dstStorage, txid);
|
dstStorage, NameNodeFile.IMAGE, txid);
|
||||||
|
|
||||||
// error simulation code for junit test
|
// error simulation code for junit test
|
||||||
CheckpointFaultInjector.getInstance().afterSecondaryUploadsNewImage();
|
CheckpointFaultInjector.getInstance().afterSecondaryUploadsNewImage();
|
||||||
|
@ -997,7 +998,8 @@ public class SecondaryNameNode implements Runnable {
|
||||||
|
|
||||||
dstStorage.setStorageInfo(sig);
|
dstStorage.setStorageInfo(sig);
|
||||||
if (loadImage) {
|
if (loadImage) {
|
||||||
File file = dstStorage.findImageFile(sig.mostRecentCheckpointTxId);
|
File file = dstStorage.findImageFile(NameNodeFile.IMAGE,
|
||||||
|
sig.mostRecentCheckpointTxId);
|
||||||
if (file == null) {
|
if (file == null) {
|
||||||
throw new IOException("Couldn't find image file at txid " +
|
throw new IOException("Couldn't find image file at txid " +
|
||||||
sig.mostRecentCheckpointTxId + " even though it should have " +
|
sig.mostRecentCheckpointTxId + " even though it should have " +
|
||||||
|
|
|
@ -23,7 +23,6 @@ import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.net.HttpURLConnection;
|
import java.net.HttpURLConnection;
|
||||||
import java.net.InetSocketAddress;
|
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.security.DigestInputStream;
|
import java.security.DigestInputStream;
|
||||||
import java.security.MessageDigest;
|
import java.security.MessageDigest;
|
||||||
|
@ -39,10 +38,6 @@ import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileUtil;
|
import org.apache.hadoop.fs.FileUtil;
|
||||||
import org.apache.hadoop.http.HttpConfig;
|
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
|
||||||
import org.apache.hadoop.security.authentication.client.AuthenticationException;
|
|
||||||
import org.apache.hadoop.util.Time;
|
|
||||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||||
|
@ -50,11 +45,13 @@ import org.apache.hadoop.hdfs.server.common.Storage;
|
||||||
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
||||||
import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
|
import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
|
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
|
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
|
||||||
import org.apache.hadoop.hdfs.util.DataTransferThrottler;
|
import org.apache.hadoop.hdfs.util.DataTransferThrottler;
|
||||||
import org.apache.hadoop.hdfs.web.URLConnectionFactory;
|
import org.apache.hadoop.hdfs.web.URLConnectionFactory;
|
||||||
import org.apache.hadoop.io.MD5Hash;
|
import org.apache.hadoop.io.MD5Hash;
|
||||||
import org.apache.hadoop.security.SecurityUtil;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
import org.apache.hadoop.security.authentication.client.AuthenticationException;
|
||||||
import org.apache.hadoop.util.Time;
|
import org.apache.hadoop.util.Time;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
@ -90,10 +87,10 @@ public class TransferFsImage {
|
||||||
null, false);
|
null, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static MD5Hash downloadImageToStorage(
|
public static MD5Hash downloadImageToStorage(URL fsName, NameNodeFile nnf,
|
||||||
URL fsName, long imageTxId, Storage dstStorage, boolean needDigest)
|
long imageTxId, Storage dstStorage, boolean needDigest)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
String fileid = GetImageServlet.getParamStringForImage(
|
String fileid = GetImageServlet.getParamStringForImage(nnf,
|
||||||
imageTxId, dstStorage);
|
imageTxId, dstStorage);
|
||||||
String fileName = NNStorage.getCheckpointImageFileName(imageTxId);
|
String fileName = NNStorage.getCheckpointImageFileName(imageTxId);
|
||||||
|
|
||||||
|
@ -166,14 +163,14 @@ public class TransferFsImage {
|
||||||
* @param myNNAddress the host/port where the local node is running an
|
* @param myNNAddress the host/port where the local node is running an
|
||||||
* HTTPServer hosting GetImageServlet
|
* HTTPServer hosting GetImageServlet
|
||||||
* @param storage the storage directory to transfer the image from
|
* @param storage the storage directory to transfer the image from
|
||||||
|
* @param nnf the NameNodeFile type of the image
|
||||||
* @param txid the transaction ID of the image to be uploaded
|
* @param txid the transaction ID of the image to be uploaded
|
||||||
*/
|
*/
|
||||||
public static void uploadImageFromStorage(URL fsName,
|
public static void uploadImageFromStorage(URL fsName, URL myNNAddress,
|
||||||
URL myNNAddress,
|
Storage storage, NameNodeFile nnf, long txid) throws IOException {
|
||||||
Storage storage, long txid) throws IOException {
|
|
||||||
|
|
||||||
String fileid = GetImageServlet.getParamStringToPutImage(
|
String fileid = GetImageServlet.getParamStringToPutImage(nnf, txid,
|
||||||
txid, myNNAddress, storage);
|
myNNAddress, storage);
|
||||||
// this doesn't directly upload an image, but rather asks the NN
|
// this doesn't directly upload an image, but rather asks the NN
|
||||||
// to connect back to the 2NN to download the specified image.
|
// to connect back to the 2NN to download the specified image.
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -46,6 +46,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NNStorage;
|
import org.apache.hadoop.hdfs.server.namenode.NNStorage;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.TransferFsImage;
|
import org.apache.hadoop.hdfs.server.namenode.TransferFsImage;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
|
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
|
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
|
||||||
import org.apache.hadoop.hdfs.tools.DFSHAAdmin;
|
import org.apache.hadoop.hdfs.tools.DFSHAAdmin;
|
||||||
|
@ -207,9 +208,10 @@ public class BootstrapStandby implements Tool, Configurable {
|
||||||
|
|
||||||
// Download that checkpoint into our storage directories.
|
// Download that checkpoint into our storage directories.
|
||||||
MD5Hash hash = TransferFsImage.downloadImageToStorage(
|
MD5Hash hash = TransferFsImage.downloadImageToStorage(
|
||||||
otherHttpAddr, imageTxId,
|
otherHttpAddr, NameNodeFile.IMAGE, imageTxId,
|
||||||
storage, true);
|
storage, true);
|
||||||
image.saveDigestAndRenameCheckpointImage(imageTxId, hash);
|
image.saveDigestAndRenameCheckpointImage(NameNodeFile.IMAGE, imageTxId,
|
||||||
|
hash);
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
image.close();
|
image.close();
|
||||||
throw ioe;
|
throw ioe;
|
||||||
|
|
|
@ -142,9 +142,15 @@ public class StandbyCheckpointer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void triggerRollbackCheckpoint() {
|
||||||
|
thread.setNeedRollbackCheckpoint(true);
|
||||||
|
thread.interrupt();
|
||||||
|
}
|
||||||
|
|
||||||
private void doCheckpoint() throws InterruptedException, IOException {
|
private void doCheckpoint() throws InterruptedException, IOException {
|
||||||
assert canceler != null;
|
assert canceler != null;
|
||||||
final long txid;
|
final long txid;
|
||||||
|
final NameNodeFile imageType;
|
||||||
|
|
||||||
namesystem.writeLockInterruptibly();
|
namesystem.writeLockInterruptibly();
|
||||||
try {
|
try {
|
||||||
|
@ -164,7 +170,15 @@ public class StandbyCheckpointer {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
img.saveNamespace(namesystem, NameNodeFile.IMAGE, canceler);
|
if (namesystem.isRollingUpgrade()
|
||||||
|
&& !namesystem.getFSImage().hasRollbackFSImage()) {
|
||||||
|
// if we will do rolling upgrade but have not created the rollback image
|
||||||
|
// yet, name this checkpoint as fsimage_rollback
|
||||||
|
imageType = NameNodeFile.IMAGE_ROLLBACK;
|
||||||
|
} else {
|
||||||
|
imageType = NameNodeFile.IMAGE;
|
||||||
|
}
|
||||||
|
img.saveNamespace(namesystem, imageType, canceler);
|
||||||
txid = img.getStorage().getMostRecentCheckpointTxId();
|
txid = img.getStorage().getMostRecentCheckpointTxId();
|
||||||
assert txid == thisCheckpointTxId : "expected to save checkpoint at txid=" +
|
assert txid == thisCheckpointTxId : "expected to save checkpoint at txid=" +
|
||||||
thisCheckpointTxId + " but instead saved at txid=" + txid;
|
thisCheckpointTxId + " but instead saved at txid=" + txid;
|
||||||
|
@ -181,7 +195,7 @@ public class StandbyCheckpointer {
|
||||||
@Override
|
@Override
|
||||||
public Void call() throws IOException {
|
public Void call() throws IOException {
|
||||||
TransferFsImage.uploadImageFromStorage(activeNNAddress, myNNAddress,
|
TransferFsImage.uploadImageFromStorage(activeNNAddress, myNNAddress,
|
||||||
namesystem.getFSImage().getStorage(), txid);
|
namesystem.getFSImage().getStorage(), imageType, txid);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -228,6 +242,9 @@ public class StandbyCheckpointer {
|
||||||
private class CheckpointerThread extends Thread {
|
private class CheckpointerThread extends Thread {
|
||||||
private volatile boolean shouldRun = true;
|
private volatile boolean shouldRun = true;
|
||||||
private volatile long preventCheckpointsUntil = 0;
|
private volatile long preventCheckpointsUntil = 0;
|
||||||
|
// Indicate that a rollback checkpoint is required immediately. It will be
|
||||||
|
// reset to false after the checkpoint is done
|
||||||
|
private volatile boolean needRollbackCheckpoint = false;
|
||||||
|
|
||||||
private CheckpointerThread() {
|
private CheckpointerThread() {
|
||||||
super("Standby State Checkpointer");
|
super("Standby State Checkpointer");
|
||||||
|
@ -237,6 +254,10 @@ public class StandbyCheckpointer {
|
||||||
this.shouldRun = shouldRun;
|
this.shouldRun = shouldRun;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void setNeedRollbackCheckpoint(boolean need) {
|
||||||
|
this.needRollbackCheckpoint = need;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
// We have to make sure we're logged in as far as JAAS
|
// We have to make sure we're logged in as far as JAAS
|
||||||
|
@ -266,16 +287,19 @@ public class StandbyCheckpointer {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void doWork() {
|
private void doWork() {
|
||||||
|
final long checkPeriod = 1000 * checkpointConf.getCheckPeriod();
|
||||||
// Reset checkpoint time so that we don't always checkpoint
|
// Reset checkpoint time so that we don't always checkpoint
|
||||||
// on startup.
|
// on startup.
|
||||||
lastCheckpointTime = now();
|
lastCheckpointTime = now();
|
||||||
while (shouldRun) {
|
while (shouldRun) {
|
||||||
try {
|
if (!needRollbackCheckpoint) {
|
||||||
Thread.sleep(1000 * checkpointConf.getCheckPeriod());
|
try {
|
||||||
} catch (InterruptedException ie) {
|
Thread.sleep(checkPeriod);
|
||||||
}
|
} catch (InterruptedException ie) {
|
||||||
if (!shouldRun) {
|
}
|
||||||
break;
|
if (!shouldRun) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
// We may have lost our ticket since last checkpoint, log in again, just in case
|
// We may have lost our ticket since last checkpoint, log in again, just in case
|
||||||
|
@ -287,8 +311,10 @@ public class StandbyCheckpointer {
|
||||||
long uncheckpointed = countUncheckpointedTxns();
|
long uncheckpointed = countUncheckpointedTxns();
|
||||||
long secsSinceLast = (now - lastCheckpointTime)/1000;
|
long secsSinceLast = (now - lastCheckpointTime)/1000;
|
||||||
|
|
||||||
boolean needCheckpoint = false;
|
boolean needCheckpoint = needRollbackCheckpoint;
|
||||||
if (uncheckpointed >= checkpointConf.getTxnCount()) {
|
if (needCheckpoint) {
|
||||||
|
LOG.info("Triggering a rollback fsimage for rolling upgrade.");
|
||||||
|
} else if (uncheckpointed >= checkpointConf.getTxnCount()) {
|
||||||
LOG.info("Triggering checkpoint because there have been " +
|
LOG.info("Triggering checkpoint because there have been " +
|
||||||
uncheckpointed + " txns since the last checkpoint, which " +
|
uncheckpointed + " txns since the last checkpoint, which " +
|
||||||
"exceeds the configured threshold " +
|
"exceeds the configured threshold " +
|
||||||
|
@ -313,6 +339,13 @@ public class StandbyCheckpointer {
|
||||||
|
|
||||||
if (needCheckpoint) {
|
if (needCheckpoint) {
|
||||||
doCheckpoint();
|
doCheckpoint();
|
||||||
|
// reset needRollbackCheckpoint to false only when we finish a ckpt
|
||||||
|
// for rollback image
|
||||||
|
if (needRollbackCheckpoint
|
||||||
|
&& namesystem.getFSImage().hasRollbackFSImage()) {
|
||||||
|
namesystem.setCreatedRollbackImages(true);
|
||||||
|
needRollbackCheckpoint = false;
|
||||||
|
}
|
||||||
lastCheckpointTime = now;
|
lastCheckpointTime = now;
|
||||||
}
|
}
|
||||||
} catch (SaveNamespaceCancelledException ce) {
|
} catch (SaveNamespaceCancelledException ce) {
|
||||||
|
|
|
@ -83,7 +83,9 @@ public class TestRollingUpgrade {
|
||||||
runCmd(dfsadmin, true, "-rollingUpgrade");
|
runCmd(dfsadmin, true, "-rollingUpgrade");
|
||||||
|
|
||||||
//start rolling upgrade
|
//start rolling upgrade
|
||||||
|
dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
|
||||||
runCmd(dfsadmin, true, "-rollingUpgrade", "prepare");
|
runCmd(dfsadmin, true, "-rollingUpgrade", "prepare");
|
||||||
|
dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
|
||||||
|
|
||||||
//query rolling upgrade
|
//query rolling upgrade
|
||||||
runCmd(dfsadmin, true, "-rollingUpgrade", "query");
|
runCmd(dfsadmin, true, "-rollingUpgrade", "query");
|
||||||
|
@ -182,7 +184,9 @@ public class TestRollingUpgrade {
|
||||||
dfs.mkdirs(foo);
|
dfs.mkdirs(foo);
|
||||||
|
|
||||||
//start rolling upgrade
|
//start rolling upgrade
|
||||||
|
dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
|
||||||
info1 = dfs.rollingUpgrade(RollingUpgradeAction.PREPARE);
|
info1 = dfs.rollingUpgrade(RollingUpgradeAction.PREPARE);
|
||||||
|
dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
|
||||||
LOG.info("START\n" + info1);
|
LOG.info("START\n" + info1);
|
||||||
|
|
||||||
//query rolling upgrade
|
//query rolling upgrade
|
||||||
|
@ -293,7 +297,9 @@ public class TestRollingUpgrade {
|
||||||
final DistributedFileSystem dfs = cluster.getFileSystem();
|
final DistributedFileSystem dfs = cluster.getFileSystem();
|
||||||
|
|
||||||
//start rolling upgrade
|
//start rolling upgrade
|
||||||
|
dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
|
||||||
dfs.rollingUpgrade(RollingUpgradeAction.PREPARE);
|
dfs.rollingUpgrade(RollingUpgradeAction.PREPARE);
|
||||||
|
dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
|
||||||
|
|
||||||
dfs.mkdirs(bar);
|
dfs.mkdirs(bar);
|
||||||
|
|
||||||
|
@ -378,26 +384,33 @@ public class TestRollingUpgrade {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean existRollbackFsImage(NNStorage storage)
|
public static boolean existRollbackFsImage(NNStorage storage)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final FilenameFilter filter = new FilenameFilter() {
|
final FilenameFilter filter = new FilenameFilter() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean accept(File dir, String name) {
|
public boolean accept(File dir, String name) {
|
||||||
return name.indexOf(NNStorage.NameNodeFile.IMAGE_ROLLBACK.getName()) != -1;
|
return name.indexOf(NNStorage.NameNodeFile.IMAGE_ROLLBACK.getName()) != -1;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
for (int i = 0; i < storage.getNumStorageDirs(); i++) {
|
final int total = 10;
|
||||||
File dir = storage.getStorageDir(i).getCurrentDir();
|
int retry = 0;
|
||||||
int l = dir.list(filter).length;
|
while (retry++ < total) {
|
||||||
if (l > 0) {
|
for (int i = 0; i < storage.getNumStorageDirs(); i++) {
|
||||||
return true;
|
File dir = storage.getStorageDir(i).getCurrentDir();
|
||||||
|
int l = dir.list(filter).length;
|
||||||
|
if (l > 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
Thread.sleep(1000);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test (timeout = 300000)
|
||||||
public void testFinalize() throws Exception {
|
public void testFinalize() throws Exception {
|
||||||
final Configuration conf = new HdfsConfiguration();
|
final Configuration conf = new HdfsConfiguration();
|
||||||
MiniQJMHACluster cluster = null;
|
MiniQJMHACluster cluster = null;
|
||||||
|
@ -409,6 +422,11 @@ public class TestRollingUpgrade {
|
||||||
MiniDFSCluster dfsCluster = cluster.getDfsCluster();
|
MiniDFSCluster dfsCluster = cluster.getDfsCluster();
|
||||||
dfsCluster.waitActive();
|
dfsCluster.waitActive();
|
||||||
|
|
||||||
|
// let NN1 tail editlog every 1s
|
||||||
|
dfsCluster.getConfiguration(1).setInt(
|
||||||
|
DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
|
||||||
|
dfsCluster.restartNameNode(1);
|
||||||
|
|
||||||
dfsCluster.transitionToActive(0);
|
dfsCluster.transitionToActive(0);
|
||||||
DistributedFileSystem dfs = dfsCluster.getFileSystem(0);
|
DistributedFileSystem dfs = dfsCluster.getFileSystem(0);
|
||||||
dfs.mkdirs(foo);
|
dfs.mkdirs(foo);
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.util.List;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.RollingUpgradeAction;
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants.RollingUpgradeAction;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
|
||||||
import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
|
import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
|
||||||
import org.apache.hadoop.hdfs.qjournal.MiniJournalCluster;
|
import org.apache.hadoop.hdfs.qjournal.MiniJournalCluster;
|
||||||
import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster;
|
import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster;
|
||||||
|
@ -101,8 +102,10 @@ public class TestRollingUpgradeRollback {
|
||||||
dfs.mkdirs(foo);
|
dfs.mkdirs(foo);
|
||||||
|
|
||||||
// start rolling upgrade
|
// start rolling upgrade
|
||||||
|
dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
|
||||||
Assert.assertEquals(0,
|
Assert.assertEquals(0,
|
||||||
dfsadmin.run(new String[] { "-rollingUpgrade", "prepare" }));
|
dfsadmin.run(new String[] { "-rollingUpgrade", "prepare" }));
|
||||||
|
dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
|
||||||
// create new directory
|
// create new directory
|
||||||
dfs.mkdirs(bar);
|
dfs.mkdirs(bar);
|
||||||
|
|
||||||
|
@ -160,8 +163,10 @@ public class TestRollingUpgradeRollback {
|
||||||
dfs.mkdirs(foo);
|
dfs.mkdirs(foo);
|
||||||
|
|
||||||
// start rolling upgrade
|
// start rolling upgrade
|
||||||
|
dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
|
||||||
Assert.assertEquals(0,
|
Assert.assertEquals(0,
|
||||||
dfsadmin.run(new String[] { "-rollingUpgrade", "prepare" }));
|
dfsadmin.run(new String[] { "-rollingUpgrade", "prepare" }));
|
||||||
|
dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
|
||||||
// create new directory
|
// create new directory
|
||||||
dfs.mkdirs(bar);
|
dfs.mkdirs(bar);
|
||||||
dfs.close();
|
dfs.close();
|
||||||
|
@ -206,9 +211,9 @@ public class TestRollingUpgradeRollback {
|
||||||
MiniDFSCluster dfsCluster = cluster.getDfsCluster();
|
MiniDFSCluster dfsCluster = cluster.getDfsCluster();
|
||||||
dfsCluster.waitActive();
|
dfsCluster.waitActive();
|
||||||
|
|
||||||
// let NN1 do checkpoints as fast as possible
|
// let NN1 tail editlog every 1s
|
||||||
dfsCluster.getConfiguration(1).setInt(
|
dfsCluster.getConfiguration(1).setInt(
|
||||||
DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 0);
|
DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
|
||||||
dfsCluster.restartNameNode(1);
|
dfsCluster.restartNameNode(1);
|
||||||
|
|
||||||
dfsCluster.transitionToActive(0);
|
dfsCluster.transitionToActive(0);
|
||||||
|
@ -223,6 +228,11 @@ public class TestRollingUpgradeRollback {
|
||||||
dfs.mkdirs(bar);
|
dfs.mkdirs(bar);
|
||||||
dfs.close();
|
dfs.close();
|
||||||
|
|
||||||
|
NNStorage storage0 = dfsCluster.getNameNode(0).getFSImage().getStorage();
|
||||||
|
NNStorage storage1 = dfsCluster.getNameNode(1).getFSImage().getStorage();
|
||||||
|
Assert.assertTrue(TestRollingUpgrade.existRollbackFsImage(storage0));
|
||||||
|
Assert.assertTrue(TestRollingUpgrade.existRollbackFsImage(storage1));
|
||||||
|
|
||||||
// rollback NN0
|
// rollback NN0
|
||||||
dfsCluster.restartNameNode(0, true, "-rollingUpgrade",
|
dfsCluster.restartNameNode(0, true, "-rollingUpgrade",
|
||||||
"rollback");
|
"rollback");
|
||||||
|
@ -238,21 +248,24 @@ public class TestRollingUpgradeRollback {
|
||||||
// check the details of NNStorage
|
// check the details of NNStorage
|
||||||
NNStorage storage = dfsCluster.getNamesystem(0).getFSImage()
|
NNStorage storage = dfsCluster.getNamesystem(0).getFSImage()
|
||||||
.getStorage();
|
.getStorage();
|
||||||
// (startSegment, upgrade marker, mkdir, endSegment)
|
// segments:(startSegment, mkdir, start upgrade endSegment),
|
||||||
checkNNStorage(storage, 3, 7);
|
// (startSegment, mkdir, endSegment)
|
||||||
|
checkNNStorage(storage, 4, 7);
|
||||||
|
|
||||||
// check storage in JNs
|
// check storage in JNs
|
||||||
for (int i = 0; i < NUM_JOURNAL_NODES; i++) {
|
for (int i = 0; i < NUM_JOURNAL_NODES; i++) {
|
||||||
File dir = cluster.getJournalCluster().getCurrentDir(0,
|
File dir = cluster.getJournalCluster().getCurrentDir(0,
|
||||||
MiniQJMHACluster.NAMESERVICE);
|
MiniQJMHACluster.NAMESERVICE);
|
||||||
// segments:(startSegment, mkdir, endSegment), (startSegment, upgrade
|
checkJNStorage(dir, 5, 7);
|
||||||
// marker, mkdir, endSegment)
|
|
||||||
checkJNStorage(dir, 4, 7);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// restart NN0 again to make sure we can start using the new fsimage and
|
// restart NN0 again to make sure we can start using the new fsimage and
|
||||||
// the corresponding md5 checksum
|
// the corresponding md5 checksum
|
||||||
dfsCluster.restartNameNode(0);
|
dfsCluster.restartNameNode(0);
|
||||||
|
// start the rolling upgrade again to make sure we do not load upgrade
|
||||||
|
// status after the rollback
|
||||||
|
dfsCluster.transitionToActive(0);
|
||||||
|
dfs.rollingUpgrade(RollingUpgradeAction.PREPARE);
|
||||||
} finally {
|
} finally {
|
||||||
if (cluster != null) {
|
if (cluster != null) {
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
|
|
|
@ -1454,7 +1454,7 @@ public class TestCheckpoint {
|
||||||
|
|
||||||
for (StorageDirectory sd :
|
for (StorageDirectory sd :
|
||||||
image.getStorage().dirIterable(NameNodeDirType.IMAGE)) {
|
image.getStorage().dirIterable(NameNodeDirType.IMAGE)) {
|
||||||
File imageFile = NNStorage.getImageFile(sd,
|
File imageFile = NNStorage.getImageFile(sd, NameNodeFile.IMAGE,
|
||||||
expectedTxIdToDownload + 5);
|
expectedTxIdToDownload + 5);
|
||||||
assertTrue("Image size increased",
|
assertTrue("Image size increased",
|
||||||
imageFile.length() > fsimageLength);
|
imageFile.length() > fsimageLength);
|
||||||
|
@ -1980,7 +1980,8 @@ public class TestCheckpoint {
|
||||||
.when(dstImage).toColonSeparatedString();
|
.when(dstImage).toColonSeparatedString();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
TransferFsImage.downloadImageToStorage(fsName, 0, dstImage, false);
|
TransferFsImage.downloadImageToStorage(fsName, NameNodeFile.IMAGE, 0,
|
||||||
|
dstImage, false);
|
||||||
fail("Storage info was not verified");
|
fail("Storage info was not verified");
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
String msg = StringUtils.stringifyException(ioe);
|
String msg = StringUtils.stringifyException(ioe);
|
||||||
|
@ -1996,7 +1997,8 @@ public class TestCheckpoint {
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
TransferFsImage.uploadImageFromStorage(fsName, new URL("http://localhost:1234"), dstImage, 0);
|
TransferFsImage.uploadImageFromStorage(fsName, new URL(
|
||||||
|
"http://localhost:1234"), dstImage, NameNodeFile.IMAGE, 0);
|
||||||
fail("Storage info was not verified");
|
fail("Storage info was not verified");
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
String msg = StringUtils.stringifyException(ioe);
|
String msg = StringUtils.stringifyException(ioe);
|
||||||
|
|
Loading…
Reference in New Issue