From 821729905ed8439ee5f15372faaf6b40aff7f155 Mon Sep 17 00:00:00 2001 From: Manoj Govindassamy Date: Mon, 8 Jan 2018 15:34:00 -0800 Subject: [PATCH] HDFS-12985. NameNode crashes during restart after an OpenForWrite file present in the Snapshot got deleted. (cherry picked from commit 73ff09b79a5cf9932edc21c58f3a730f7379086b) --- .../hdfs/server/namenode/INodeFile.java | 16 ++++--- .../snapshot/TestOpenFilesWithSnapshot.java | 45 +++++++++++++++++++ 2 files changed, 55 insertions(+), 6 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java index 906a940cfda..90659f3e21a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java @@ -727,6 +727,13 @@ public void clearBlocks() { this.blocks = BlockInfo.EMPTY_ARRAY; } + private void updateRemovedUnderConstructionFiles( + ReclaimContext reclaimContext) { + if (isUnderConstruction() && reclaimContext.removedUCFiles != null) { + reclaimContext.removedUCFiles.add(getId()); + } + } + @Override public void cleanSubtree(ReclaimContext reclaimContext, final int snapshot, int priorSnapshotId) { @@ -735,6 +742,7 @@ public void cleanSubtree(ReclaimContext reclaimContext, // TODO: avoid calling getStoragePolicyID sf.cleanFile(reclaimContext, this, snapshot, priorSnapshotId, getStoragePolicyID()); + updateRemovedUnderConstructionFiles(reclaimContext); } else { if (snapshot == CURRENT_STATE_ID) { if (priorSnapshotId == NO_SNAPSHOT_ID) { @@ -747,9 +755,7 @@ public void cleanSubtree(ReclaimContext reclaimContext, // clean the 0-sized block if the file is UC if (uc != null) { uc.cleanZeroSizeBlock(this, reclaimContext.collectedBlocks); - if (reclaimContext.removedUCFiles != null) { - reclaimContext.removedUCFiles.add(getId()); - } + updateRemovedUnderConstructionFiles(reclaimContext); } } } @@ -768,9 +774,7 @@ public void destroyAndCollectBlocks(ReclaimContext reclaimContext) { reclaimContext.collectedBlocks); sf.clearDiffs(); } - if (isUnderConstruction() && reclaimContext.removedUCFiles != null) { - reclaimContext.removedUCFiles.add(getId()); - } + updateRemovedUnderConstructionFiles(reclaimContext); } public void clearFile(ReclaimContext reclaimContext) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestOpenFilesWithSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestOpenFilesWithSnapshot.java index 537612ca29a..a35e1410498 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestOpenFilesWithSnapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestOpenFilesWithSnapshot.java @@ -630,6 +630,51 @@ public void testSnapshotsForOpenFilesAndDeletion() throws Exception { hbaseOutputStream.close(); } + /** + * Verify if the NameNode can restart properly after an OpenForWrite + * file and the only snapshot it was present in were deleted. + * + * @throws Exception + */ + @Test (timeout = 600000) + public void testOpenFileDeletionAndNNRestart() throws Exception { + // Construct the directory tree + final Path snapRootDir = new Path("/level_0_A/test"); + final String hbaseFileName = "hbase.log"; + final String snap1Name = "snap_1"; + + // Create a file with few blocks. Get its output stream + // for append. + final Path hbaseFile = new Path(snapRootDir, hbaseFileName); + createFile(hbaseFile); + FSDataOutputStream hbaseOutputStream = fs.append(hbaseFile); + + int newWriteLength = (int) (BLOCKSIZE * 1.5); + byte[] buf = new byte[newWriteLength]; + Random random = new Random(); + random.nextBytes(buf); + + // Write more data to the file + writeToStream(hbaseOutputStream, buf); + + // Take a snapshot while the file is open for write + final Path snap1Dir = SnapshotTestHelper.createSnapshot( + fs, snapRootDir, snap1Name); + LOG.info("Open file status in snap: " + + fs.getFileStatus(new Path(snap1Dir, hbaseFileName))); + + // Delete the open file and the snapshot while + // its output stream is still open. + fs.delete(hbaseFile, true); + fs.deleteSnapshot(snapRootDir, snap1Name); + Assert.assertFalse(fs.exists(hbaseFile)); + + // Verify file existence after the NameNode restart + cluster.restartNameNode(); + cluster.waitActive(); + Assert.assertFalse(fs.exists(hbaseFile)); + } + /** * Test client writing to open files are not interrupted when snapshots * that captured open files get deleted.