From a0887132080a0972466630d4696c009a65f420fb Mon Sep 17 00:00:00 2001 From: Aaron Myers Date: Sat, 2 Jun 2012 00:42:09 +0000 Subject: [PATCH] HDFS-3442. Incorrect count for Missing Replicas in FSCK report. Contributed by Andrew Wang. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1345410 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../hdfs/server/namenode/NamenodeFsck.java | 50 +++++---- .../hadoop/hdfs/server/namenode/TestFsck.java | 100 +++++++++++++++++- 3 files changed, 125 insertions(+), 27 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 9447f7e9ac6..60fdc2d58f3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -144,6 +144,8 @@ Release 2.0.1-alpha - UNRELEASED HDFS-3487. offlineimageviewer should give byte offset information when it encounters an exception. (Colin Patrick McCabe via eli) + HDFS-3442. Incorrect count for Missing Replicas in FSCK report. (Andrew Wang via atm) + Release 2.0.0-alpha - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java index 7cb868b179b..2100b2c0cfc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java @@ -53,6 +53,8 @@ import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.net.NodeBase; import org.apache.hadoop.security.UserGroupInformation; +import com.google.common.annotations.VisibleForTesting; + /** * This class provides rudimentary checking of DFS volumes for errors and * sub-optimal conditions. @@ -244,7 +246,8 @@ public class NamenodeFsck { out.println(); } - private void check(String parent, HdfsFileStatus file, Result res) throws IOException { + @VisibleForTesting + void check(String parent, HdfsFileStatus file, Result res) throws IOException { String path = file.getFullName(parent); boolean isOpen = false; @@ -313,6 +316,7 @@ public class NamenodeFsck { DatanodeInfo[] locs = lBlk.getLocations(); res.totalReplicas += locs.length; short targetFileReplication = file.getReplication(); + res.numExpectedReplicas += targetFileReplication; if (locs.length > targetFileReplication) { res.excessiveReplicas += (locs.length - targetFileReplication); res.numOverReplicatedBlocks += 1; @@ -608,29 +612,31 @@ public class NamenodeFsck { /** * FsckResult of checking, plus overall DFS statistics. */ - private static class Result { - private List missingIds = new ArrayList(); - private long missingSize = 0L; - private long corruptFiles = 0L; - private long corruptBlocks = 0L; - private long excessiveReplicas = 0L; - private long missingReplicas = 0L; - private long numOverReplicatedBlocks = 0L; - private long numUnderReplicatedBlocks = 0L; - private long numMisReplicatedBlocks = 0L; // blocks that do not satisfy block placement policy - private long numMinReplicatedBlocks = 0L; // minimally replicatedblocks - private long totalBlocks = 0L; - private long totalOpenFilesBlocks = 0L; - private long totalFiles = 0L; - private long totalOpenFiles = 0L; - private long totalDirs = 0L; - private long totalSize = 0L; - private long totalOpenFilesSize = 0L; - private long totalReplicas = 0L; + @VisibleForTesting + static class Result { + List missingIds = new ArrayList(); + long missingSize = 0L; + long corruptFiles = 0L; + long corruptBlocks = 0L; + long excessiveReplicas = 0L; + long missingReplicas = 0L; + long numOverReplicatedBlocks = 0L; + long numUnderReplicatedBlocks = 0L; + long numMisReplicatedBlocks = 0L; // blocks that do not satisfy block placement policy + long numMinReplicatedBlocks = 0L; // minimally replicatedblocks + long totalBlocks = 0L; + long numExpectedReplicas = 0L; + long totalOpenFilesBlocks = 0L; + long totalFiles = 0L; + long totalOpenFiles = 0L; + long totalDirs = 0L; + long totalSize = 0L; + long totalOpenFilesSize = 0L; + long totalReplicas = 0L; final short replication; - private Result(Configuration conf) { + Result(Configuration conf) { this.replication = (short)conf.getInt(DFSConfigKeys.DFS_REPLICATION_KEY, DFSConfigKeys.DFS_REPLICATION_DEFAULT); } @@ -726,7 +732,7 @@ public class NamenodeFsck { missingReplicas); if (totalReplicas > 0) { res.append(" (").append( - ((float) (missingReplicas * 100) / (float) totalReplicas)).append( + ((float) (missingReplicas * 100) / (float) numExpectedReplicas)).append( " %)"); } return res.toString(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java index f7626ad3025..9b59802b1d1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java @@ -18,21 +18,27 @@ package org.apache.hadoop.hdfs.server.namenode; +import static org.junit.Assert.*; + import java.io.BufferedReader; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.PrintStream; +import java.io.PrintWriter; import java.io.RandomAccessFile; +import java.io.StringWriter; +import java.io.Writer; +import java.net.InetAddress; import java.net.InetSocketAddress; import java.nio.channels.FileChannel; import java.security.PrivilegedExceptionAction; +import java.util.HashMap; +import java.util.Map; import java.util.Random; import java.util.regex.Pattern; -import junit.framework.TestCase; - import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -42,25 +48,30 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks; +import org.apache.hadoop.hdfs.protocol.ExtendedBlock; +import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; +import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck.Result; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.hdfs.tools.DFSck; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ToolRunner; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.PatternLayout; import org.apache.log4j.RollingFileAppender; +import org.junit.Test; /** * A JUnit test for doing fsck */ -public class TestFsck extends TestCase { +public class TestFsck { static final String auditLogFile = System.getProperty("test.build.dir", "build/test") + "/audit.log"; @@ -79,13 +90,15 @@ public class TestFsck extends TestCase { PrintStream out = new PrintStream(bStream, true); ((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.ALL); int errCode = ToolRunner.run(new DFSck(conf, out), path); - if (checkErrorCode) + if (checkErrorCode) { assertEquals(expectedErrCode, errCode); + } ((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.INFO); return bStream.toString(); } /** do fsck */ + @Test public void testFsck() throws Exception { DFSTestUtil util = new DFSTestUtil("TestFsck", 20, 3, 8*1024); MiniDFSCluster cluster = null; @@ -158,6 +171,7 @@ public class TestFsck extends TestCase { assertNull("Unexpected event in audit log", reader.readLine()); } + @Test public void testFsckNonExistent() throws Exception { DFSTestUtil util = new DFSTestUtil("TestFsck", 20, 3, 8*1024); MiniDFSCluster cluster = null; @@ -180,6 +194,7 @@ public class TestFsck extends TestCase { } /** Test fsck with permission set on inodes */ + @Test public void testFsckPermission() throws Exception { final DFSTestUtil util = new DFSTestUtil(getClass().getSimpleName(), 20, 3, 8*1024); final Configuration conf = new HdfsConfiguration(); @@ -227,6 +242,7 @@ public class TestFsck extends TestCase { } } + @Test public void testFsckMoveAndDelete() throws Exception { final int MAX_MOVE_TRIES = 5; DFSTestUtil util = new DFSTestUtil("TestFsck", 5, 3, 8*1024); @@ -300,6 +316,7 @@ public class TestFsck extends TestCase { } } + @Test public void testFsckOpenFiles() throws Exception { DFSTestUtil util = new DFSTestUtil("TestFsck", 4, 3, 8*1024); MiniDFSCluster cluster = null; @@ -350,6 +367,7 @@ public class TestFsck extends TestCase { } } + @Test public void testCorruptBlock() throws Exception { Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000); @@ -426,6 +444,7 @@ public class TestFsck extends TestCase { * * @throws Exception */ + @Test public void testFsckError() throws Exception { MiniDFSCluster cluster = null; try { @@ -460,6 +479,7 @@ public class TestFsck extends TestCase { } /** check if option -list-corruptfiles of fsck command works properly */ + @Test public void testFsckListCorruptFilesBlocks() throws Exception { Configuration conf = new Configuration(); conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000); @@ -529,6 +549,7 @@ public class TestFsck extends TestCase { * Test for checking fsck command on illegal arguments should print the proper * usage. */ + @Test public void testToCheckTheFsckCommandOnIllegalArguments() throws Exception { MiniDFSCluster cluster = null; try { @@ -560,4 +581,73 @@ public class TestFsck extends TestCase { } } } + + /** + * Tests that the # of missing block replicas and expected replicas is correct + * @throws IOException + */ + @Test + public void testFsckMissingReplicas() throws IOException { + // Desired replication factor + // Set this higher than NUM_REPLICAS so it's under-replicated + final short REPL_FACTOR = 2; + // Number of replicas to actually start + final short NUM_REPLICAS = 1; + // Number of blocks to write + final short NUM_BLOCKS = 3; + // Set a small-ish blocksize + final long blockSize = 512; + + Configuration conf = new Configuration(); + conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); + + MiniDFSCluster cluster = null; + DistributedFileSystem dfs = null; + + try { + // Startup a minicluster + cluster = + new MiniDFSCluster.Builder(conf).numDataNodes(NUM_REPLICAS).build(); + assertNotNull("Failed Cluster Creation", cluster); + cluster.waitClusterUp(); + dfs = (DistributedFileSystem) cluster.getFileSystem(); + assertNotNull("Failed to get FileSystem", dfs); + + // Create a file that will be intentionally under-replicated + final String pathString = new String("/testfile"); + final Path path = new Path(pathString); + long fileLen = blockSize * NUM_BLOCKS; + DFSTestUtil.createFile(dfs, path, fileLen, REPL_FACTOR, 1); + + // Create an under-replicated file + NameNode namenode = cluster.getNameNode(); + NetworkTopology nettop = cluster.getNamesystem().getBlockManager() + .getDatanodeManager().getNetworkTopology(); + Map pmap = new HashMap(); + Writer result = new StringWriter(); + PrintWriter out = new PrintWriter(result, true); + InetAddress remoteAddress = InetAddress.getLocalHost(); + NamenodeFsck fsck = new NamenodeFsck(conf, namenode, nettop, pmap, out, + NUM_REPLICAS, (short)1, remoteAddress); + + // Run the fsck and check the Result + final HdfsFileStatus file = + namenode.getRpcServer().getFileInfo(pathString); + assertNotNull(file); + Result res = new Result(conf); + fsck.check(pathString, file, res); + // Also print the output from the fsck, for ex post facto sanity checks + System.out.println(result.toString()); + assertEquals(res.missingReplicas, + (NUM_BLOCKS*REPL_FACTOR) - (NUM_BLOCKS*NUM_REPLICAS)); + assertEquals(res.numExpectedReplicas, NUM_BLOCKS*REPL_FACTOR); + } finally { + if(dfs != null) { + dfs.close(); + } + if(cluster != null) { + cluster.shutdown(); + } + } + } }