From e5e5ba495f8f68c546b70c5e8ad2c81ebfb27588 Mon Sep 17 00:00:00 2001 From: Kihwal Lee Date: Thu, 17 Dec 2015 17:23:55 -0600 Subject: [PATCH] HDFS-9533. seen_txid in the shared edits directory is modified during bootstrapping. Contributed by Kihwal Lee. (cherry picked from commit 5cb1e0118b173a95c1f7bdfae1e58d7833d61c26) Conflicts: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java (cherry picked from commit 3c0ae5914ca673b055a9673b4c17c8aaa570c258) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hadoop/hdfs/server/namenode/NNStorage.java | 18 +++++++++++++++++- .../server/namenode/ha/BootstrapStandby.java | 6 ++++-- .../hdfs/server/namenode/FSImageTestUtil.java | 7 +++++++ .../namenode/ha/TestBootstrapStandby.java | 11 +++++++++++ 5 files changed, 42 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index e8b960206fd..e0fe04edd3d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1629,6 +1629,9 @@ Release 2.7.3 - UNRELEASED HDFS-9516. Truncate file fails with data dirs on multiple disks. (Plamen Jeliazkov via shv) + HDFS-9533. seen_txid in the shared edits directory is modified during + bootstrapping (kihwal) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java index 9b63e728b1c..ed36b27d2e1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java @@ -482,8 +482,24 @@ public class NNStorage extends Storage implements Closeable, * @param txid the txid that has been reached */ public void writeTransactionIdFileToStorage(long txid) { + writeTransactionIdFileToStorage(txid, null); + } + + /** + * Write a small file in all available storage directories that + * indicates that the namespace has reached some given transaction ID. + * + * This is used when the image is loaded to avoid accidental rollbacks + * in the case where an edit log is fully deleted but there is no + * checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure() + * @param txid the txid that has been reached + * @param type the type of directory + */ + public void writeTransactionIdFileToStorage(long txid, + NameNodeDirType type) { // Write txid marker in all storage directories - for (StorageDirectory sd : storageDirs) { + for (Iterator it = dirIterator(type); it.hasNext();) { + StorageDirectory sd = it.next(); try { writeTransactionIdFile(sd, txid); } catch(IOException e) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java index 031170822c6..213141c2183 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java @@ -52,6 +52,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSImage; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.NNStorage; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile; +import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; import org.apache.hadoop.hdfs.server.namenode.NNUpgradeUtil; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.TransferFsImage; @@ -313,13 +314,14 @@ public class BootstrapStandby implements Tool, Configurable { return ERR_CODE_LOGS_UNAVAILABLE; } - image.getStorage().writeTransactionIdFileToStorage(curTxId); - // Download that checkpoint into our storage directories. MD5Hash hash = TransferFsImage.downloadImageToStorage( otherHttpAddr, imageTxId, storage, true, true); image.saveDigestAndRenameCheckpointImage(NameNodeFile.IMAGE, imageTxId, hash); + + // Write seen_txid to the formatted image directories. + storage.writeTransactionIdFileToStorage(imageTxId, NameNodeDirType.IMAGE); } catch (IOException ioe) { throw ioe; } finally { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java index 7ee6f12c42b..fafeae2fc90 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java @@ -568,4 +568,11 @@ public abstract class FSImageTestUtil { FSImageTestUtil.assertParallelFilesAreIdentical(curDirs, ignoredFiles); } + + public static long getStorageTxId(NameNode node, URI storageUri) + throws IOException { + StorageDirectory sDir = getFSImage(node).getStorage(). + getStorageDirectory(storageUri); + return NNStorage.readTransactionIdFile(sDir); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java index 6bcfb496cc6..9dfad12040d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java @@ -135,6 +135,13 @@ public class TestBootstrapStandby { .getFSImage().getMostRecentCheckpointTxId(); assertEquals(6, expectedCheckpointTxId); + // advance the current txid + cluster.getFileSystem(0).create(new Path("/test_txid"), (short)1).close(); + + // obtain the content of seen_txid + URI editsUri = cluster.getSharedEditsDir(0, 1); + long seen_txid_shared = FSImageTestUtil.getStorageTxId(nn0, editsUri); + int rc = BootstrapStandby.run( new String[]{"-force"}, cluster.getConfiguration(1)); @@ -145,6 +152,10 @@ public class TestBootstrapStandby { ImmutableList.of((int)expectedCheckpointTxId)); FSImageTestUtil.assertNNFilesMatch(cluster); + // Make sure the seen_txid was not modified by the standby + assertEquals(seen_txid_shared, + FSImageTestUtil.getStorageTxId(nn0, editsUri)); + // We should now be able to start the standby successfully. cluster.restartNameNode(1); }