HDFS-5945. Add rolling upgrade information to fsimage; and disallow upgrade and rolling upgrade to be started simultaneously. Contributed by szetszwo & jing9

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-5535@1569515 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tsz-wo Sze 2014-02-18 20:12:17 +00:00
parent 470d4253b2
commit bc962d6df4
10 changed files with 61 additions and 35 deletions

View File

@ -43,3 +43,6 @@ HDFS-5535 subtasks:
HDFS-5920. Support rollback of rolling upgrade in NameNode and JournalNodes.
(jing9)
HDFS-5945. Add rolling upgrade information to fsimage; and disallow upgrade
and rolling upgrade to be started simultaneously. (szetszwo & jing9)

View File

@ -314,8 +314,9 @@ public class FSImage implements Closeable {
return isFormatted;
}
void doUpgrade(FSNamesystem target) throws IOException {
// Upgrade is allowed only if there are
/** Check if upgrade is in progress. */
void checkUpgrade(FSNamesystem target) throws IOException {
// Upgrade or rolling upgrade is allowed only if there are
// no previous fs states in any of the local directories
for (Iterator<StorageDirectory> it = storage.dirIterator(false); it.hasNext();) {
StorageDirectory sd = it.next();
@ -324,11 +325,16 @@ public class FSImage implements Closeable {
"previous fs state should not exist during upgrade. "
+ "Finalize or rollback first.");
}
}
void doUpgrade(FSNamesystem target) throws IOException {
checkUpgrade(target);
// load the latest image
// Do upgrade for each directory
this.loadFSImage(target, StartupOption.UPGRADE, null);
target.checkRollingUpgrade("upgrade namenode");
long oldCTime = storage.getCTime();
storage.cTime = now(); // generate new cTime for the state

View File

@ -42,7 +42,6 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.protocol.LayoutVersion;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
@ -279,6 +278,9 @@ public final class FSImageFormatProtobuf {
fsn.setGenerationStampV1Limit(s.getGenstampV1Limit());
fsn.setLastAllocatedBlockId(s.getLastAllocatedBlockId());
imgTxId = s.getTransactionId();
if (s.hasRollingUpgradeStartTime()) {
fsn.setRollingUpgradeInfo(s.getRollingUpgradeStartTime());
}
}
private void loadStringTableSection(InputStream in) throws IOException {
@ -519,6 +521,9 @@ public final class FSImageFormatProtobuf {
// from the actual saver thread, there's a potential of a
// fairness-related deadlock. See the comments on HDFS-2223.
b.setNamespaceId(fsn.unprotectedGetNamespaceInfo().getNamespaceID());
if (fsn.isRollingUpgrade()) {
b.setRollingUpgradeStartTime(fsn.getRollingUpgradeInfo().getStartTime());
}
NameSystemSection s = b.build();
s.writeDelimitedTo(out);

View File

@ -25,7 +25,6 @@ import java.util.Arrays;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.protocol.LayoutVersion;
import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.Loader;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;

View File

@ -200,7 +200,6 @@ import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.common.Util;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection.PersistToken;
import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream;
import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
@ -4512,7 +4511,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
readLock();
try {
checkOperation(OperationCategory.UNCHECKED);
checkRollingUpgrade("save namespace");
if (!isInSafeMode()) {
throw new IOException("Safe mode should be turned ON "
@ -7146,6 +7144,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
final String action = "start rolling upgrade";
checkNameNodeSafeMode("Failed to " + action);
checkRollingUpgrade(action);
getFSImage().checkUpgrade(this);
getFSImage().saveNamespace(this, NameNodeFile.IMAGE_ROLLBACK, null);
LOG.info("Successfully saved namespace for preparing rolling upgrade.");
@ -7167,12 +7166,16 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
rollingUpgradeInfo = new RollingUpgradeInfo(blockPoolId, startTime);
}
RollingUpgradeInfo getRollingUpgradeInfo() {
return rollingUpgradeInfo;
}
/** Is rolling upgrade in progress? */
public boolean isRollingUpgrade() {
return rollingUpgradeInfo != null;
}
private void checkRollingUpgrade(String action) throws RollingUpgradeException {
void checkRollingUpgrade(String action) throws RollingUpgradeException {
if (isRollingUpgrade()) {
throw new RollingUpgradeException("Failed to " + action
+ " since a rolling upgrade is already in progress."
@ -7197,13 +7200,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
returnInfo = new RollingUpgradeInfo(blockPoolId,
rollingUpgradeInfo.getStartTime(), now());
getFSImage().purgeCheckpoints(NameNodeFile.IMAGE_ROLLBACK);
rollingUpgradeInfo = null;
getEditLog().logFinalizeRollingUpgrade(returnInfo.getFinalizeTime());
getFSImage().saveNamespace(this);
getFSImage().purgeCheckpoints(NameNodeFile.IMAGE_ROLLBACK);
} finally {
writeUnlock();
}
getEditLog().logSync();
if (auditLog.isInfoEnabled() && isExternalInvocation()) {
logAuditEvent(true, "finalizeRollingUpgrade", null, null, null);

View File

@ -63,7 +63,7 @@ public class NameNodeLayoutVersion {
* </ul>
*/
public static enum Feature implements LayoutFeature {
ROLLING_UPGRADE_MARKER(-52, "Upgrade marker for rolling upgrade");
ROLLING_UPGRADE(-52, "Support rolling upgrade");
private final FeatureInfo info;

View File

@ -70,6 +70,7 @@ message NameSystemSection {
optional uint64 genstampV1Limit = 4;
optional uint64 lastAllocatedBlockId = 5;
optional uint64 transactionId = 6;
optional uint64 rollingUpgradeStartTime = 7;
}
/**

View File

@ -35,11 +35,15 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.RollingUpgradeAction;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.namenode.TestParallelImageWrite;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.util.StringUtils;
import org.junit.BeforeClass;
import org.junit.Ignore;
@ -214,7 +218,7 @@ public class TestDFSUpgrade {
* This test attempts to upgrade the NameNode and DataNode under
* a number of valid and invalid conditions.
*/
@Test
@Test(timeout = 60000)
public void testUpgrade() throws Exception {
File[] baseDirs;
StorageInfo storageInfo = null;
@ -227,6 +231,19 @@ public class TestDFSUpgrade {
log("Normal NameNode upgrade", numDirs);
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
cluster = createCluster();
// make sure that rolling upgrade cannot be started
try {
final DistributedFileSystem dfs = cluster.getFileSystem();
dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
dfs.rollingUpgrade(RollingUpgradeAction.START);
fail();
} catch(RemoteException re) {
assertEquals(InconsistentFSStateException.class.getName(),
re.getClassName());
LOG.info("The exception is expected.", re);
}
checkNameNode(nameNodeDirs, EXPECTED_TXID);
if (numDirs > 1)
TestParallelImageWrite.checkImages(cluster.getNamesystem(), numDirs);

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.hdfs;
import java.io.File;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -27,15 +28,13 @@ import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.RollingUpgradeAction;
import org.apache.hadoop.hdfs.protocol.RollingUpgradeException;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo;
import org.apache.hadoop.hdfs.qjournal.MiniJournalCluster;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode;
import org.apache.hadoop.hdfs.tools.DFSAdmin;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.ExitUtil.ExitException;
import org.junit.Assert;
@ -100,9 +99,13 @@ public class TestRollingUpgrade {
Assert.assertTrue(dfs.exists(foo));
Assert.assertTrue(dfs.exists(bar));
Assert.assertTrue(dfs.exists(baz));
dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
dfs.saveNamespace();
dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
}
cluster.restartNameNode();
cluster.restartNameNode("-rollingupgrade", "started");
{
final DistributedFileSystem dfs = cluster.getFileSystem();
Assert.assertTrue(dfs.exists(foo));
@ -182,24 +185,6 @@ public class TestRollingUpgrade {
Assert.assertEquals(info1, dfs.rollingUpgrade(RollingUpgradeAction.QUERY));
dfs.mkdirs(bar);
//save namespace should fail
try {
dfs.saveNamespace();
Assert.fail();
} catch(RemoteException re) {
Assert.assertEquals(RollingUpgradeException.class.getName(),
re.getClassName());
LOG.info("The exception is expected.", re);
}
//start checkpoint should fail
try {
NameNodeAdapter.startCheckpoint(cluster.getNameNode(), null, null);
Assert.fail();
} catch(RollingUpgradeException re) {
LOG.info("The exception is expected.", re);
}
}
// cluster2 takes over QJM
@ -231,8 +216,15 @@ public class TestRollingUpgrade {
Assert.assertTrue(dfs2.exists(bar));
Assert.assertTrue(dfs2.exists(baz));
//restart cluster with -upgrade should fail.
try {
cluster2.restartNameNode("-upgrade");
} catch(IOException e) {
LOG.info("The exception is expected.", e);
}
LOG.info("RESTART cluster 2 with -rollingupgrade started again");
cluster2.restartNameNode();
cluster2.restartNameNode("-rollingupgrade", "started");
Assert.assertEquals(info1, dfs2.rollingUpgrade(RollingUpgradeAction.QUERY));
Assert.assertTrue(dfs2.exists(foo));
Assert.assertTrue(dfs2.exists(bar));

View File

@ -78,7 +78,7 @@ public class TestLayoutVersion {
@Test
public void testNameNodeFeature() {
assertTrue(NameNodeLayoutVersion.supports(LayoutVersion.Feature.CACHING,
NameNodeLayoutVersion.Feature.ROLLING_UPGRADE_MARKER.getInfo().getLayoutVersion()));
NameNodeLayoutVersion.Feature.ROLLING_UPGRADE.getInfo().getLayoutVersion()));
}
/**