HDFS-4179. BackupNode: allow reads, fix checkpointing, safeMode. Contributed by Konstantin Shvachko.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1411509 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f1653b3ea8
commit
4688f839d2
|
@ -258,6 +258,9 @@ Release 2.0.3-alpha - Unreleased
|
|||
|
||||
HDFS-4178. Shell scripts should not close stderr (Andy Isaacson via daryn)
|
||||
|
||||
HDFS-4179. BackupNode: allow reads, fix checkpointing, safeMode. (shv)
|
||||
|
||||
|
||||
Release 2.0.2-alpha - 2012-09-07
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -68,6 +68,8 @@ public class BackupNode extends NameNode {
|
|||
private static final String BN_HTTP_ADDRESS_NAME_KEY = DFSConfigKeys.DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY;
|
||||
private static final String BN_HTTP_ADDRESS_DEFAULT = DFSConfigKeys.DFS_NAMENODE_BACKUP_HTTP_ADDRESS_DEFAULT;
|
||||
private static final String BN_SERVICE_RPC_ADDRESS_KEY = DFSConfigKeys.DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY;
|
||||
private static final float BN_SAFEMODE_THRESHOLD_PCT_DEFAULT = 1.5f;
|
||||
private static final int BN_SAFEMODE_EXTENSION_DEFAULT = Integer.MAX_VALUE;
|
||||
|
||||
/** Name-node proxy */
|
||||
NamenodeProtocol namenode;
|
||||
|
@ -126,6 +128,10 @@ public class BackupNode extends NameNode {
|
|||
|
||||
@Override // NameNode
|
||||
protected void loadNamesystem(Configuration conf) throws IOException {
|
||||
conf.setFloat(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY,
|
||||
BN_SAFEMODE_THRESHOLD_PCT_DEFAULT);
|
||||
conf.setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY,
|
||||
BN_SAFEMODE_EXTENSION_DEFAULT);
|
||||
BackupImage bnImage = new BackupImage(conf);
|
||||
this.namesystem = new FSNamesystem(conf, bnImage);
|
||||
bnImage.setNamesystem(namesystem);
|
||||
|
@ -413,9 +419,9 @@ public class BackupNode extends NameNode {
|
|||
return;
|
||||
}
|
||||
if (OperationCategory.JOURNAL != op &&
|
||||
!(OperationCategory.READ == op && allowStaleStandbyReads)) {
|
||||
!(OperationCategory.READ == op && !isRole(NamenodeRole.CHECKPOINT))) {
|
||||
String msg = "Operation category " + op
|
||||
+ " is not supported at the BackupNode";
|
||||
+ " is not supported at " + getRole();
|
||||
throw new StandbyException(msg);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -206,6 +206,7 @@ class Checkpointer extends Daemon {
|
|||
RemoteEditLogManifest manifest =
|
||||
getRemoteNamenodeProxy().getEditLogManifest(bnImage.getLastAppliedTxId() + 1);
|
||||
|
||||
boolean needReloadImage = false;
|
||||
if (!manifest.getLogs().isEmpty()) {
|
||||
RemoteEditLog firstRemoteLog = manifest.getLogs().get(0);
|
||||
// we don't have enough logs to roll forward using only logs. Need
|
||||
|
@ -218,13 +219,10 @@ class Checkpointer extends Daemon {
|
|||
bnStorage, true);
|
||||
bnImage.saveDigestAndRenameCheckpointImage(
|
||||
sig.mostRecentCheckpointTxId, downloadedHash);
|
||||
|
||||
LOG.info("Loading image with txid " + sig.mostRecentCheckpointTxId);
|
||||
File file = bnStorage.findImageFile(sig.mostRecentCheckpointTxId);
|
||||
bnImage.reloadFromImageFile(file, backupNode.getNamesystem());
|
||||
lastApplied = sig.mostRecentCheckpointTxId;
|
||||
needReloadImage = true;
|
||||
}
|
||||
|
||||
lastApplied = bnImage.getLastAppliedTxId();
|
||||
if (firstRemoteLog.getStartTxId() > lastApplied + 1) {
|
||||
throw new IOException("No logs to roll forward from " + lastApplied);
|
||||
}
|
||||
|
@ -235,6 +233,11 @@ class Checkpointer extends Daemon {
|
|||
backupNode.nnHttpAddress, log, bnStorage);
|
||||
}
|
||||
|
||||
if(needReloadImage) {
|
||||
LOG.info("Loading image with txid " + sig.mostRecentCheckpointTxId);
|
||||
File file = bnStorage.findImageFile(sig.mostRecentCheckpointTxId);
|
||||
bnImage.reloadFromImageFile(file, backupNode.getNamesystem());
|
||||
}
|
||||
rollForwardByApplyingLogs(manifest, bnImage, backupNode.getNamesystem());
|
||||
}
|
||||
|
||||
|
@ -243,8 +246,9 @@ class Checkpointer extends Daemon {
|
|||
backupNode.namesystem.writeLock();
|
||||
try {
|
||||
backupNode.namesystem.dir.setReady();
|
||||
if(backupNode.namesystem.getBlocksTotal() > 0) {
|
||||
backupNode.namesystem.setBlockTotal();
|
||||
|
||||
}
|
||||
bnImage.saveFSImageInAllDirs(backupNode.getNamesystem(), txid);
|
||||
bnStorage.writeAll();
|
||||
} finally {
|
||||
|
@ -284,9 +288,9 @@ class Checkpointer extends Daemon {
|
|||
|
||||
List<EditLogInputStream> editsStreams = Lists.newArrayList();
|
||||
for (RemoteEditLog log : manifest.getLogs()) {
|
||||
if (log.getEndTxId() > dstImage.getLastAppliedTxId()) {
|
||||
File f = dstStorage.findFinalizedEditsFile(
|
||||
log.getStartTxId(), log.getEndTxId());
|
||||
if (log.getStartTxId() > dstImage.getLastAppliedTxId()) {
|
||||
editsStreams.add(new EditLogFileInputStream(f, log.getStartTxId(),
|
||||
log.getEndTxId(), true));
|
||||
}
|
||||
|
|
|
@ -39,7 +39,6 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
|||
import org.apache.hadoop.hdfs.protocol.LayoutVersion;
|
||||
import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
|
||||
import org.apache.hadoop.hdfs.server.common.Storage.FormatConfirmable;
|
||||
import org.apache.hadoop.hdfs.server.common.GenerationStamp;
|
||||
import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
|
||||
import org.apache.hadoop.hdfs.server.common.Storage;
|
||||
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
||||
|
@ -1019,6 +1018,7 @@ public class FSImage implements Closeable {
|
|||
NamenodeCommand startCheckpoint(NamenodeRegistration bnReg, // backup node
|
||||
NamenodeRegistration nnReg) // active name-node
|
||||
throws IOException {
|
||||
LOG.info("Start checkpoint at txid " + getEditLog().getLastWrittenTxId());
|
||||
String msg = null;
|
||||
// Verify that checkpoint is allowed
|
||||
if(bnReg.getNamespaceID() != storage.getNamespaceID())
|
||||
|
@ -1058,6 +1058,7 @@ public class FSImage implements Closeable {
|
|||
* @throws IOException if the checkpoint fields are inconsistent
|
||||
*/
|
||||
void endCheckpoint(CheckpointSignature sig) throws IOException {
|
||||
LOG.info("End checkpoint at txid " + getEditLog().getLastWrittenTxId());
|
||||
sig.validateStorageInfo(this);
|
||||
}
|
||||
|
||||
|
|
|
@ -3970,7 +3970,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
// of the number of total blocks in the system.
|
||||
this.shouldIncrementallyTrackBlocks = true;
|
||||
}
|
||||
|
||||
if(blockSafe < 0)
|
||||
this.blockSafe = 0;
|
||||
checkMode();
|
||||
}
|
||||
|
||||
|
|
|
@ -39,6 +39,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
|
|||
import org.apache.hadoop.hdfs.HAUtil;
|
||||
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
|
||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
|
||||
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
|
||||
|
@ -94,7 +95,10 @@ public class TestBackupNode {
|
|||
c.set(DFSConfigKeys.DFS_NAMENODE_BACKUP_ADDRESS_KEY,
|
||||
"127.0.0.1:0");
|
||||
|
||||
return (BackupNode)NameNode.createNameNode(new String[]{startupOpt.getName()}, c);
|
||||
BackupNode bn = (BackupNode)NameNode.createNameNode(
|
||||
new String[]{startupOpt.getName()}, c);
|
||||
assertTrue(bn.getRole() + " must be in SafeMode.", bn.isInSafeMode());
|
||||
return bn;
|
||||
}
|
||||
|
||||
void waitCheckpointDone(MiniDFSCluster cluster, long txid) {
|
||||
|
@ -342,11 +346,22 @@ public class TestBackupNode {
|
|||
try {
|
||||
TestCheckpoint.writeFile(bnFS, file3, replication);
|
||||
} catch (IOException eio) {
|
||||
LOG.info("Write to BN failed as expected: ", eio);
|
||||
LOG.info("Write to " + backup.getRole() + " failed as expected: ", eio);
|
||||
canWrite = false;
|
||||
}
|
||||
assertFalse("Write to BackupNode must be prohibited.", canWrite);
|
||||
|
||||
// Reads are allowed for BackupNode, but not for CheckpointNode
|
||||
boolean canRead = true;
|
||||
try {
|
||||
bnFS.exists(file2);
|
||||
} catch (IOException eio) {
|
||||
LOG.info("Read from " + backup.getRole() + " failed: ", eio);
|
||||
canRead = false;
|
||||
}
|
||||
assertEquals("Reads to BackupNode are allowed, but not CheckpointNode.",
|
||||
canRead, backup.isRole(NamenodeRole.BACKUP));
|
||||
|
||||
TestCheckpoint.writeFile(fileSys, file3, replication);
|
||||
TestCheckpoint.checkFile(fileSys, file3, replication);
|
||||
// should also be on BN right away
|
||||
|
|
Loading…
Reference in New Issue