diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index c29911bd927..aa4d3d2a0d4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -550,6 +550,9 @@ Release 2.3.0 - UNRELEASED HDFS-5075. httpfs-config.sh calls out incorrect env script name (Timothy St. Clair via stevel) + HDFS-5504. In HA mode, OP_DELETE_SNAPSHOT is not decrementing the safemode threshold, + leads to NN safemode. (Vinay via jing9) + Release 2.2.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index aaa82cfa870..ec11e5d8f61 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -593,7 +593,7 @@ public class FSEditLogLoader { fsNamesys.getSnapshotManager().deleteSnapshot( deleteSnapshotOp.snapshotRoot, deleteSnapshotOp.snapshotName, collectedBlocks, removedINodes); - fsNamesys.removeBlocks(collectedBlocks); + fsNamesys.removeBlocksAndUpdateSafemodeTotal(collectedBlocks); collectedBlocks.clear(); fsNamesys.dir.removeFromInodeMap(removedINodes); removedINodes.clear(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index da5654bc674..b502a0ed83c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -3316,6 +3316,18 @@ public class FSNamesystem implements Namesystem, FSClusterStats, return; } + removeBlocksAndUpdateSafemodeTotal(blocks); + } + + /** + * Removes the blocks from blocksmap and updates the safemode blocks total + * + * @param blocks + * An instance of {@link BlocksMapUpdateInfo} which contains a list + * of blocks that need to be removed from blocksMap + */ + void removeBlocksAndUpdateSafemodeTotal(BlocksMapUpdateInfo blocks) { + assert hasWriteLock(); // In the case that we are a Standby tailing edits from the // active while in safe-mode, we need to track the total number // of blocks and safe blocks in the system. @@ -3336,9 +3348,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } if (trackBlockCounts) { if (LOG.isDebugEnabled()) { - LOG.debug("Adjusting safe-mode totals for deletion of " + src + ":" + - "decreasing safeBlocks by " + numRemovedSafe + - ", totalBlocks by " + numRemovedComplete); + LOG.debug("Adjusting safe-mode totals for deletion." + + "decreasing safeBlocks by " + numRemovedSafe + + ", totalBlocks by " + numRemovedComplete); } adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDeletion.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDeletion.java index 442cc63aaee..0acad2bdf6d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDeletion.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDeletion.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; @@ -47,7 +48,10 @@ import org.apache.hadoop.hdfs.server.namenode.INode; import org.apache.hadoop.hdfs.server.namenode.INodeDirectory; import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryWithQuota; import org.apache.hadoop.hdfs.server.namenode.INodeFile; +import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.hdfs.server.namenode.Quota; +import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil; import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectoryWithSnapshot.DirectoryDiffList; import org.apache.hadoop.hdfs.util.ReadOnlyList; import org.apache.hadoop.io.IOUtils; @@ -949,4 +953,54 @@ public class TestSnapshotDeletion { psOut.close(); out.close(); } + + /* + * OP_DELETE_SNAPSHOT edits op was not decrementing the safemode threshold on + * restart in HA mode. HDFS-5504 + */ + @Test(timeout = 60000) + public void testHANNRestartAfterSnapshotDeletion() throws Exception { + hdfs.close(); + cluster.shutdown(); + conf = new Configuration(); + cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1) + .build(); + cluster.transitionToActive(0); + // stop the standby namenode + NameNode snn = cluster.getNameNode(1); + snn.stop(); + + hdfs = (DistributedFileSystem) HATestUtil + .configureFailoverFs(cluster, conf); + Path dir = new Path("/dir"); + Path subDir = new Path(dir, "sub"); + hdfs.mkdirs(dir); + hdfs.allowSnapshot(dir); + for (int i = 0; i < 5; i++) { + DFSTestUtil.createFile(hdfs, new Path(subDir, "" + i), 100, (short) 1, + 1024L); + } + + // take snapshot + hdfs.createSnapshot(dir, "s0"); + + // delete the subdir + hdfs.delete(subDir, true); + + // roll the edit log + NameNode ann = cluster.getNameNode(0); + ann.getRpcServer().rollEditLog(); + + hdfs.deleteSnapshot(dir, "s0"); + // wait for the blocks deletion at namenode + Thread.sleep(2000); + + NameNodeAdapter.abortEditLogs(ann); + cluster.restartNameNode(0, false); + cluster.transitionToActive(0); + + // wait till the cluster becomes active + cluster.waitClusterUp(); + } }