diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index b7b3cda0a6e..dcaff35358a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -36,6 +36,9 @@ Release 2.7.3 - UNRELEASED HDFS-9516. Truncate file fails with data dirs on multiple disks. (Plamen Jeliazkov via shv) + HDFS-9533. seen_txid in the shared edits directory is modified during + bootstrapping (kihwal) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java index dbb2c50da9d..98699e223dd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java @@ -473,8 +473,24 @@ public class NNStorage extends Storage implements Closeable, * @param txid the txid that has been reached */ public void writeTransactionIdFileToStorage(long txid) { + writeTransactionIdFileToStorage(txid, null); + } + + /** + * Write a small file in all available storage directories that + * indicates that the namespace has reached some given transaction ID. + * + * This is used when the image is loaded to avoid accidental rollbacks + * in the case where an edit log is fully deleted but there is no + * checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure() + * @param txid the txid that has been reached + * @param type the type of directory + */ + public void writeTransactionIdFileToStorage(long txid, + NameNodeDirType type) { // Write txid marker in all storage directories - for (StorageDirectory sd : storageDirs) { + for (Iterator it = dirIterator(type); it.hasNext();) { + StorageDirectory sd = it.next(); try { writeTransactionIdFile(sd, txid); } catch(IOException e) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java index 4bd53a5585b..2d83cbae04a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java @@ -51,6 +51,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSImage; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.NNStorage; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile; +import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; import org.apache.hadoop.hdfs.server.namenode.NNUpgradeUtil; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.TransferFsImage; @@ -312,13 +313,14 @@ public class BootstrapStandby implements Tool, Configurable { return ERR_CODE_LOGS_UNAVAILABLE; } - image.getStorage().writeTransactionIdFileToStorage(curTxId); - // Download that checkpoint into our storage directories. MD5Hash hash = TransferFsImage.downloadImageToStorage( otherHttpAddr, imageTxId, storage, true); image.saveDigestAndRenameCheckpointImage(NameNodeFile.IMAGE, imageTxId, hash); + + // Write seen_txid to the formatted image directories. + storage.writeTransactionIdFileToStorage(imageTxId, NameNodeDirType.IMAGE); } catch (IOException ioe) { image.close(); throw ioe; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java index dd370fa71b5..32a272f6e3c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java @@ -568,4 +568,11 @@ public abstract class FSImageTestUtil { FSImageTestUtil.assertParallelFilesAreIdentical(curDirs, ignoredFiles); } + + public static long getStorageTxId(NameNode node, URI storageUri) + throws IOException { + StorageDirectory sDir = getFSImage(node).getStorage(). + getStorageDirectory(storageUri); + return NNStorage.readTransactionIdFile(sDir); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java index 7abc5024a9b..85753ade589 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java @@ -29,6 +29,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.server.namenode.CheckpointSignature; @@ -126,6 +127,13 @@ public class TestBootstrapStandby { .getFSImage().getMostRecentCheckpointTxId(); assertEquals(6, expectedCheckpointTxId); + // advance the current txid + cluster.getFileSystem(0).create(new Path("/test_txid"), (short)1).close(); + + // obtain the content of seen_txid + URI editsUri = cluster.getSharedEditsDir(0, 1); + long seen_txid_shared = FSImageTestUtil.getStorageTxId(nn0, editsUri); + int rc = BootstrapStandby.run( new String[]{"-force"}, cluster.getConfiguration(1)); @@ -136,6 +144,10 @@ public class TestBootstrapStandby { ImmutableList.of((int)expectedCheckpointTxId)); FSImageTestUtil.assertNNFilesMatch(cluster); + // Make sure the seen_txid was not modified by the standby + assertEquals(seen_txid_shared, + FSImageTestUtil.getStorageTxId(nn0, editsUri)); + // We should now be able to start the standby successfully. cluster.restartNameNode(1); }