From 25119a39c05e00505b35e7fc607563c2fb029b29 Mon Sep 17 00:00:00 2001 From: Uma Maheswara Rao G Date: Wed, 27 Nov 2013 11:01:02 +0000 Subject: [PATCH] Merge HDFS-5568. Support includeSnapshots option with Fsck command. Contributed by Vinay git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1545991 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 ++ .../hdfs/server/namenode/NamenodeFsck.java | 24 +++++++++++++++++ .../org/apache/hadoop/hdfs/tools/DFSck.java | 16 ++++++++--- .../hadoop/hdfs/server/namenode/TestFsck.java | 27 +++++++++++++++++++ 4 files changed, 66 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 64aa5f32a38..172070f7ac1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -238,6 +238,8 @@ Release 2.2.1 - UNRELEASED HDFS-5544. Adding Test case For Checking dfs.checksum type as NULL value. (Sathish via umamahesh) + HDFS-5568. Support includeSnapshots option with Fsck command. (Vinayakumar B via umamahesh) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java index b933387a31b..7ed77585853 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java @@ -36,6 +36,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.hdfs.BlockReader; import org.apache.hadoop.hdfs.BlockReaderFactory; @@ -46,9 +47,11 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DirectoryListing; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; +import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus; import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy; import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementStatus; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; @@ -139,6 +142,7 @@ public class NamenodeFsck { private final Configuration conf; private final PrintWriter out; + private List snapshottableDirs = null; /** * Filesystem checker. @@ -178,6 +182,8 @@ else if (key.equals("listcorruptfileblocks")) { } else if (key.equals("startblockafter")) { this.currentCookie[0] = pmap.get("startblockafter")[0]; + } else if (key.equals("includeSnapshots")) { + this.snapshottableDirs = new ArrayList(); } } } @@ -194,6 +200,16 @@ public void fsck() { out.println(msg); namenode.getNamesystem().logFsckEvent(path, remoteAddress); + if (snapshottableDirs != null) { + SnapshottableDirectoryStatus[] snapshotDirs = namenode.getRpcServer() + .getSnapshottableDirListing(); + if (snapshotDirs != null) { + for (SnapshottableDirectoryStatus dir : snapshotDirs) { + snapshottableDirs.add(dir.getFullPath().toString()); + } + } + } + final HdfsFileStatus file = namenode.getRpcServer().getFileInfo(path); if (file != null) { @@ -272,6 +288,14 @@ void check(String parent, HdfsFileStatus file, Result res) throws IOException { boolean isOpen = false; if (file.isDir()) { + if (snapshottableDirs != null && snapshottableDirs.contains(path)) { + String snapshotPath = (path.endsWith(Path.SEPARATOR) ? path : path + + Path.SEPARATOR) + + HdfsConstants.DOT_SNAPSHOT_DIR; + HdfsFileStatus snapshotFileInfo = namenode.getRpcServer().getFileInfo( + snapshotPath); + check(snapshotPath, snapshotFileInfo, res); + } byte[] lastReturnedName = HdfsFileStatus.EMPTY_NAME; DirectoryListing thisListing; if (showFiles) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java index c3238f0de30..59985da289c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java @@ -82,15 +82,23 @@ public class DFSck extends Configured implements Tool { + "\t-delete\tdelete corrupted files\n" + "\t-files\tprint out files being checked\n" + "\t-openforwrite\tprint out files opened for write\n" + + "\t-includeSnapshots\tinclude snapshot data if the given path" + + " indicates a snapshottable directory or there are " + + "snapshottable directories under it\n" + "\t-list-corruptfileblocks\tprint out list of missing " + "blocks and files they belong to\n" + "\t-blocks\tprint out block report\n" + "\t-locations\tprint out locations for every block\n" - + "\t-racks\tprint out network topology for data-node locations\n" - + "\t\tBy default fsck ignores files opened for write, " + + "\t-racks\tprint out network topology for data-node locations\n\n" + + "Please Note:\n" + + "\t1. By default fsck ignores files opened for write, " + "use -openforwrite to report such files. They are usually " + " tagged CORRUPT or HEALTHY depending on their block " - + "allocation status"; + + "allocation status\n" + + "\t2. Option -includeSnapshots should not be used for comparing stats," + + " should be used only for HEALTH check, as this may contain duplicates" + + " if the same file present in both original fs tree " + + "and inside snapshots."; private final UserGroupInformation ugi; private final PrintStream out; @@ -255,6 +263,8 @@ private int doWork(final String[] args) throws IOException { else if (args[idx].equals("-list-corruptfileblocks")) { url.append("&listcorruptfileblocks=1"); doListCorruptFileBlocks = true; + } else if (args[idx].equals("-includeSnapshots")) { + url.append("&includeSnapshots=1"); } else if (!args[idx].startsWith("-")) { if (null == dir) { dir = args[idx]; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java index bcebce4e201..a6dd4fea1b1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java @@ -1058,4 +1058,31 @@ public void testFsckSymlink() throws Exception { if (cluster != null) { cluster.shutdown(); } } } + + /** + * Test for including the snapshot files in fsck report + */ + @Test + public void testFsckForSnapshotFiles() throws Exception { + final Configuration conf = new HdfsConfiguration(); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1) + .build(); + try { + String runFsck = runFsck(conf, 0, true, "/", "-includeSnapshots", + "-files"); + assertTrue(runFsck.contains("HEALTHY")); + final String fileName = "/srcdat"; + DistributedFileSystem hdfs = cluster.getFileSystem(); + Path file1 = new Path(fileName); + DFSTestUtil.createFile(hdfs, file1, 1024, (short) 1, 1000L); + hdfs.allowSnapshot(new Path("/")); + hdfs.createSnapshot(new Path("/"), "mySnapShot"); + runFsck = runFsck(conf, 0, true, "/", "-includeSnapshots", "-files"); + assertTrue(runFsck.contains("/.snapshot/mySnapShot/srcdat")); + runFsck = runFsck(conf, 0, true, "/", "-files"); + assertFalse(runFsck.contains("mySnapShot")); + } finally { + cluster.shutdown(); + } + } }