From 25119a39c05e00505b35e7fc607563c2fb029b29 Mon Sep 17 00:00:00 2001
From: Uma Maheswara Rao G <umamahesh@apache.org>
Date: Wed, 27 Nov 2013 11:01:02 +0000
Subject: [PATCH] Merge HDFS-5568. Support includeSnapshots option with Fsck
 command. Contributed by Vinay

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1545991 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   |  2 ++
 .../hdfs/server/namenode/NamenodeFsck.java    | 24 +++++++++++++++++
 .../org/apache/hadoop/hdfs/tools/DFSck.java   | 16 ++++++++---
 .../hadoop/hdfs/server/namenode/TestFsck.java | 27 +++++++++++++++++++
 4 files changed, 66 insertions(+), 3 deletions(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 64aa5f32a38..172070f7ac1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -238,6 +238,8 @@ Release 2.2.1 - UNRELEASED
 
     HDFS-5544. Adding Test case For Checking dfs.checksum type as NULL value. (Sathish via umamahesh)
 
+    HDFS-5568. Support includeSnapshots option with Fsck command. (Vinayakumar B via umamahesh)
+
   OPTIMIZATIONS
 
   BUG FIXES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
index b933387a31b..7ed77585853 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
@@ -36,6 +36,7 @@
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.hdfs.BlockReader;
 import org.apache.hadoop.hdfs.BlockReaderFactory;
@@ -46,9 +47,11 @@
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
+import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementStatus;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
@@ -139,6 +142,7 @@ public class NamenodeFsck {
 
   private final Configuration conf;
   private final PrintWriter out;
+  private List<String> snapshottableDirs = null;
 
   /**
    * Filesystem checker.
@@ -178,6 +182,8 @@ else if (key.equals("listcorruptfileblocks")) {
       }
       else if (key.equals("startblockafter")) {
         this.currentCookie[0] = pmap.get("startblockafter")[0];
+      } else if (key.equals("includeSnapshots")) {
+        this.snapshottableDirs = new ArrayList<String>();
       }
     }
   }
@@ -194,6 +200,16 @@ public void fsck() {
       out.println(msg);
       namenode.getNamesystem().logFsckEvent(path, remoteAddress);
 
+      if (snapshottableDirs != null) {
+        SnapshottableDirectoryStatus[] snapshotDirs = namenode.getRpcServer()
+            .getSnapshottableDirListing();
+        if (snapshotDirs != null) {
+          for (SnapshottableDirectoryStatus dir : snapshotDirs) {
+            snapshottableDirs.add(dir.getFullPath().toString());
+          }
+        }
+      }
+
       final HdfsFileStatus file = namenode.getRpcServer().getFileInfo(path);
       if (file != null) {
 
@@ -272,6 +288,14 @@ void check(String parent, HdfsFileStatus file, Result res) throws IOException {
     boolean isOpen = false;
 
     if (file.isDir()) {
+      if (snapshottableDirs != null && snapshottableDirs.contains(path)) {
+        String snapshotPath = (path.endsWith(Path.SEPARATOR) ? path : path
+            + Path.SEPARATOR)
+            + HdfsConstants.DOT_SNAPSHOT_DIR;
+        HdfsFileStatus snapshotFileInfo = namenode.getRpcServer().getFileInfo(
+            snapshotPath);
+        check(snapshotPath, snapshotFileInfo, res);
+      }
       byte[] lastReturnedName = HdfsFileStatus.EMPTY_NAME;
       DirectoryListing thisListing;
       if (showFiles) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java
index c3238f0de30..59985da289c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java
@@ -82,15 +82,23 @@ public class DFSck extends Configured implements Tool {
       + "\t-delete\tdelete corrupted files\n"
       + "\t-files\tprint out files being checked\n"
       + "\t-openforwrite\tprint out files opened for write\n"
+      + "\t-includeSnapshots\tinclude snapshot data if the given path"
+      + " indicates a snapshottable directory or there are "
+      + "snapshottable directories under it\n"
       + "\t-list-corruptfileblocks\tprint out list of missing "
       + "blocks and files they belong to\n"
       + "\t-blocks\tprint out block report\n"
       + "\t-locations\tprint out locations for every block\n"
-      + "\t-racks\tprint out network topology for data-node locations\n"
-      + "\t\tBy default fsck ignores files opened for write, "
+      + "\t-racks\tprint out network topology for data-node locations\n\n"
+      + "Please Note:\n"
+      + "\t1. By default fsck ignores files opened for write, "
       + "use -openforwrite to report such files. They are usually "
       + " tagged CORRUPT or HEALTHY depending on their block "
-      + "allocation status";
+      + "allocation status\n"
+      + "\t2. Option -includeSnapshots should not be used for comparing stats,"
+      + " should be used only for HEALTH check, as this may contain duplicates"
+      + " if the same file present in both original fs tree "
+      + "and inside snapshots.";
   
   private final UserGroupInformation ugi;
   private final PrintStream out;
@@ -255,6 +263,8 @@ private int doWork(final String[] args) throws IOException {
       else if (args[idx].equals("-list-corruptfileblocks")) {
         url.append("&listcorruptfileblocks=1");
         doListCorruptFileBlocks = true;
+      } else if (args[idx].equals("-includeSnapshots")) {
+        url.append("&includeSnapshots=1");
       } else if (!args[idx].startsWith("-")) {
         if (null == dir) {
           dir = args[idx];
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
index bcebce4e201..a6dd4fea1b1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java
@@ -1058,4 +1058,31 @@ public void testFsckSymlink() throws Exception {
       if (cluster != null) { cluster.shutdown(); }
     }
   }
+
+  /**
+   * Test for including the snapshot files in fsck report
+   */
+  @Test
+  public void testFsckForSnapshotFiles() throws Exception {
+    final Configuration conf = new HdfsConfiguration();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
+        .build();
+    try {
+      String runFsck = runFsck(conf, 0, true, "/", "-includeSnapshots",
+          "-files");
+      assertTrue(runFsck.contains("HEALTHY"));
+      final String fileName = "/srcdat";
+      DistributedFileSystem hdfs = cluster.getFileSystem();
+      Path file1 = new Path(fileName);
+      DFSTestUtil.createFile(hdfs, file1, 1024, (short) 1, 1000L);
+      hdfs.allowSnapshot(new Path("/"));
+      hdfs.createSnapshot(new Path("/"), "mySnapShot");
+      runFsck = runFsck(conf, 0, true, "/", "-includeSnapshots", "-files");
+      assertTrue(runFsck.contains("/.snapshot/mySnapShot/srcdat"));
+      runFsck = runFsck(conf, 0, true, "/", "-files");
+      assertFalse(runFsck.contains("mySnapShot"));
+    } finally {
+      cluster.shutdown();
+    }
+  }
 }