HDFS-5568. Support includeSnapshots option with Fsck command. Contributed by Vinay

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1545987 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uma Maheswara Rao G 2013-11-27 10:59:54 +00:00
parent 05a9a80bd4
commit b6d483b122
4 changed files with 66 additions and 3 deletions

View File

@ -649,6 +649,8 @@ Release 2.2.1 - UNRELEASED
HDFS-5544. Adding Test case For Checking dfs.checksum type as NULL value. (Sathish via umamahesh) HDFS-5544. Adding Test case For Checking dfs.checksum type as NULL value. (Sathish via umamahesh)
HDFS-5568. Support includeSnapshots option with Fsck command. (Vinayakumar B via umamahesh)
OPTIMIZATIONS OPTIMIZATIONS
BUG FIXES BUG FIXES

View File

@ -36,6 +36,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.fs.UnresolvedLinkException;
import org.apache.hadoop.hdfs.BlockReader; import org.apache.hadoop.hdfs.BlockReader;
import org.apache.hadoop.hdfs.BlockReaderFactory; import org.apache.hadoop.hdfs.BlockReaderFactory;
@ -46,9 +47,11 @@ import org.apache.hadoop.hdfs.net.TcpPeerServer;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.DirectoryListing; import org.apache.hadoop.hdfs.protocol.DirectoryListing;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy; import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementStatus; import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementStatus;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
@ -139,6 +142,7 @@ public class NamenodeFsck {
private final Configuration conf; private final Configuration conf;
private final PrintWriter out; private final PrintWriter out;
private List<String> snapshottableDirs = null;
/** /**
* Filesystem checker. * Filesystem checker.
@ -178,6 +182,8 @@ public class NamenodeFsck {
} }
else if (key.equals("startblockafter")) { else if (key.equals("startblockafter")) {
this.currentCookie[0] = pmap.get("startblockafter")[0]; this.currentCookie[0] = pmap.get("startblockafter")[0];
} else if (key.equals("includeSnapshots")) {
this.snapshottableDirs = new ArrayList<String>();
} }
} }
} }
@ -194,6 +200,16 @@ public class NamenodeFsck {
out.println(msg); out.println(msg);
namenode.getNamesystem().logFsckEvent(path, remoteAddress); namenode.getNamesystem().logFsckEvent(path, remoteAddress);
if (snapshottableDirs != null) {
SnapshottableDirectoryStatus[] snapshotDirs = namenode.getRpcServer()
.getSnapshottableDirListing();
if (snapshotDirs != null) {
for (SnapshottableDirectoryStatus dir : snapshotDirs) {
snapshottableDirs.add(dir.getFullPath().toString());
}
}
}
final HdfsFileStatus file = namenode.getRpcServer().getFileInfo(path); final HdfsFileStatus file = namenode.getRpcServer().getFileInfo(path);
if (file != null) { if (file != null) {
@ -272,6 +288,14 @@ public class NamenodeFsck {
boolean isOpen = false; boolean isOpen = false;
if (file.isDir()) { if (file.isDir()) {
if (snapshottableDirs != null && snapshottableDirs.contains(path)) {
String snapshotPath = (path.endsWith(Path.SEPARATOR) ? path : path
+ Path.SEPARATOR)
+ HdfsConstants.DOT_SNAPSHOT_DIR;
HdfsFileStatus snapshotFileInfo = namenode.getRpcServer().getFileInfo(
snapshotPath);
check(snapshotPath, snapshotFileInfo, res);
}
byte[] lastReturnedName = HdfsFileStatus.EMPTY_NAME; byte[] lastReturnedName = HdfsFileStatus.EMPTY_NAME;
DirectoryListing thisListing; DirectoryListing thisListing;
if (showFiles) { if (showFiles) {

View File

@ -83,15 +83,23 @@ public class DFSck extends Configured implements Tool {
+ "\t-delete\tdelete corrupted files\n" + "\t-delete\tdelete corrupted files\n"
+ "\t-files\tprint out files being checked\n" + "\t-files\tprint out files being checked\n"
+ "\t-openforwrite\tprint out files opened for write\n" + "\t-openforwrite\tprint out files opened for write\n"
+ "\t-includeSnapshots\tinclude snapshot data if the given path"
+ " indicates a snapshottable directory or there are "
+ "snapshottable directories under it\n"
+ "\t-list-corruptfileblocks\tprint out list of missing " + "\t-list-corruptfileblocks\tprint out list of missing "
+ "blocks and files they belong to\n" + "blocks and files they belong to\n"
+ "\t-blocks\tprint out block report\n" + "\t-blocks\tprint out block report\n"
+ "\t-locations\tprint out locations for every block\n" + "\t-locations\tprint out locations for every block\n"
+ "\t-racks\tprint out network topology for data-node locations\n" + "\t-racks\tprint out network topology for data-node locations\n\n"
+ "\t\tBy default fsck ignores files opened for write, " + "Please Note:\n"
+ "\t1. By default fsck ignores files opened for write, "
+ "use -openforwrite to report such files. They are usually " + "use -openforwrite to report such files. They are usually "
+ " tagged CORRUPT or HEALTHY depending on their block " + " tagged CORRUPT or HEALTHY depending on their block "
+ "allocation status"; + "allocation status\n"
+ "\t2. Option -includeSnapshots should not be used for comparing stats,"
+ " should be used only for HEALTH check, as this may contain duplicates"
+ " if the same file present in both original fs tree "
+ "and inside snapshots.";
private final UserGroupInformation ugi; private final UserGroupInformation ugi;
private final PrintStream out; private final PrintStream out;
@ -266,6 +274,8 @@ public class DFSck extends Configured implements Tool {
else if (args[idx].equals("-list-corruptfileblocks")) { else if (args[idx].equals("-list-corruptfileblocks")) {
url.append("&listcorruptfileblocks=1"); url.append("&listcorruptfileblocks=1");
doListCorruptFileBlocks = true; doListCorruptFileBlocks = true;
} else if (args[idx].equals("-includeSnapshots")) {
url.append("&includeSnapshots=1");
} else if (!args[idx].startsWith("-")) { } else if (!args[idx].startsWith("-")) {
if (null == dir) { if (null == dir) {
dir = args[idx]; dir = args[idx];

View File

@ -1058,4 +1058,31 @@ public class TestFsck {
if (cluster != null) { cluster.shutdown(); } if (cluster != null) { cluster.shutdown(); }
} }
} }
/**
* Test for including the snapshot files in fsck report
*/
@Test
public void testFsckForSnapshotFiles() throws Exception {
final Configuration conf = new HdfsConfiguration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
.build();
try {
String runFsck = runFsck(conf, 0, true, "/", "-includeSnapshots",
"-files");
assertTrue(runFsck.contains("HEALTHY"));
final String fileName = "/srcdat";
DistributedFileSystem hdfs = cluster.getFileSystem();
Path file1 = new Path(fileName);
DFSTestUtil.createFile(hdfs, file1, 1024, (short) 1, 1000L);
hdfs.allowSnapshot(new Path("/"));
hdfs.createSnapshot(new Path("/"), "mySnapShot");
runFsck = runFsck(conf, 0, true, "/", "-includeSnapshots", "-files");
assertTrue(runFsck.contains("/.snapshot/mySnapShot/srcdat"));
runFsck = runFsck(conf, 0, true, "/", "-files");
assertFalse(runFsck.contains("mySnapShot"));
} finally {
cluster.shutdown();
}
}
} }