From 871bf6a765b56215fc88c3dcfb52be4c209b82c1 Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Wed, 15 Apr 2015 08:43:42 -0700 Subject: [PATCH] HDFS-8117. More accurate verification in SimulatedFSDataset: replace DEFAULT_DATABYTE with patterned data. Contributed by Zhe Zhang. --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../org/apache/hadoop/hdfs/DFSTestUtil.java | 25 +++++++++++++++++++ .../apache/hadoop/hdfs/TestFileAppend.java | 12 ++++----- .../org/apache/hadoop/hdfs/TestPread.java | 19 ++++++++------ .../apache/hadoop/hdfs/TestSmallBlock.java | 14 ++++++----- .../server/datanode/SimulatedFSDataset.java | 25 ++++++++++--------- .../datanode/TestSimulatedFSDataset.java | 3 ++- 7 files changed, 69 insertions(+), 32 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index bf9a63441c7..32df2f76265 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -113,6 +113,9 @@ Release 2.8.0 - UNRELEASED HDFS-8083. Move dfs.client.write.* conf from DFSConfigKeys to HdfsClientConfigKeys.Write. (szetszwo) + HDFS-8117. More accurate verification in SimulatedFSDataset: replace + DEFAULT_DATABYTE with patterned data. (Zhe Zhang via wang) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java index c3cefdf2ce9..aa73499babd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java @@ -91,6 +91,7 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.MiniDFSCluster.NameNodeInfo; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.client.HdfsDataInputStream; +import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; import org.apache.hadoop.hdfs.protocol.CachePoolInfo; import org.apache.hadoop.hdfs.protocol.DatanodeID; @@ -117,6 +118,7 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DataNodeLayoutVersion; +import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset; import org.apache.hadoop.hdfs.server.datanode.TestTransferRbw; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.hdfs.server.namenode.FSEditLog; @@ -1776,4 +1778,27 @@ public class DFSTestUtil { dn.setLastUpdate(Time.now() + offset); dn.setLastUpdateMonotonic(Time.monotonicNow() + offset); } + + /** + * This method takes a set of block locations and fills the provided buffer + * with expected bytes based on simulated content from + * {@link SimulatedFSDataset}. + * + * @param lbs The block locations of a file + * @param expected The buffer to be filled with expected bytes on the above + * locations. + */ + public static void fillExpectedBuf(LocatedBlocks lbs, byte[] expected) { + Block[] blks = new Block[lbs.getLocatedBlocks().size()]; + for (int i = 0; i < lbs.getLocatedBlocks().size(); i++) { + blks[i] = lbs.getLocatedBlocks().get(i).getBlock().getLocalBlock(); + } + int bufPos = 0; + for (Block b : blks) { + for (long blkPos = 0; blkPos < b.getNumBytes(); blkPos++) { + assert bufPos < expected.length; + expected[bufPos++] = SimulatedFSDataset.simulatedByte(b, blkPos); + } + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppend.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppend.java index ff0b9d7c245..04f523eba03 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppend.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppend.java @@ -68,7 +68,7 @@ public class TestFileAppend{ // // verify that the data written to the full blocks are sane // - private void checkFile(FileSystem fileSys, Path name, int repl) + private void checkFile(DistributedFileSystem fileSys, Path name, int repl) throws IOException { boolean done = false; @@ -96,9 +96,9 @@ public class TestFileAppend{ byte[] expected = new byte[AppendTestUtil.NUM_BLOCKS * AppendTestUtil.BLOCK_SIZE]; if (simulatedStorage) { - for (int i= 0; i < expected.length; i++) { - expected[i] = SimulatedFSDataset.DEFAULT_DATABYTE; - } + LocatedBlocks lbs = fileSys.getClient().getLocatedBlocks(name.toString(), + 0, AppendTestUtil.FILE_SIZE); + DFSTestUtil.fillExpectedBuf(lbs, expected); } else { System.arraycopy(fileContents, 0, expected, 0, expected.length); } @@ -193,7 +193,7 @@ public class TestFileAppend{ } fileContents = AppendTestUtil.initBuffer(AppendTestUtil.FILE_SIZE); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); - FileSystem fs = cluster.getFileSystem(); + DistributedFileSystem fs = cluster.getFileSystem(); try { // create a new file. @@ -249,7 +249,7 @@ public class TestFileAppend{ } fileContents = AppendTestUtil.initBuffer(AppendTestUtil.FILE_SIZE); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); - FileSystem fs = cluster.getFileSystem(); + DistributedFileSystem fs = cluster.getFileSystem(); try { // create a new file. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java index 007e7a7b1d2..d7a9a80ecfa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java @@ -38,6 +38,7 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; +import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtocol; import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset; import org.apache.hadoop.io.IOUtils; @@ -55,6 +56,8 @@ import org.mockito.stubbing.Answer; public class TestPread { static final long seed = 0xDEADBEEFL; static final int blockSize = 4096; + static final int numBlocksPerFile = 12; + static final int fileSize = numBlocksPerFile * blockSize; boolean simulatedStorage; boolean isHedgedRead; @@ -72,7 +75,7 @@ public class TestPread { // test empty file open and read stm.close(); FSDataInputStream in = fileSys.open(name); - byte[] buffer = new byte[12 * blockSize]; + byte[] buffer = new byte[fileSize]; in.readFully(0, buffer, 0, 0); IOException res = null; try { // read beyond the end of the file @@ -87,7 +90,7 @@ public class TestPread { assertTrue("Cannot delete file", false); // now create the real file - DFSTestUtil.createFile(fileSys, name, 12 * blockSize, 12 * blockSize, + DFSTestUtil.createFile(fileSys, name, fileSize, fileSize, blockSize, (short) replication, seed); } @@ -131,11 +134,13 @@ public class TestPread { private void pReadFile(FileSystem fileSys, Path name) throws IOException { FSDataInputStream stm = fileSys.open(name); - byte[] expected = new byte[12 * blockSize]; + byte[] expected = new byte[fileSize]; if (simulatedStorage) { - for (int i= 0; i < expected.length; i++) { - expected[i] = SimulatedFSDataset.DEFAULT_DATABYTE; - } + assert fileSys instanceof DistributedFileSystem; + DistributedFileSystem dfs = (DistributedFileSystem) fileSys; + LocatedBlocks lbs = dfs.getClient().getLocatedBlocks(name.toString(), + 0, fileSize); + DFSTestUtil.fillExpectedBuf(lbs, expected); } else { Random rand = new Random(seed); rand.nextBytes(expected); @@ -450,7 +455,7 @@ public class TestPread { FileSystem fileSys = cluster.getFileSystem(); fileSys.setVerifyChecksum(verifyChecksum); try { - Path file1 = new Path("preadtest.dat"); + Path file1 = new Path("/preadtest.dat"); writeFile(fileSys, file1); pReadFile(fileSys, file1); datanodeRestartTest(cluster, fileSys, file1); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSmallBlock.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSmallBlock.java index 8cbb4fd3172..7782bd2504e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSmallBlock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSmallBlock.java @@ -30,6 +30,7 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset; import org.junit.Test; @@ -64,16 +65,17 @@ public class TestSmallBlock { } } - private void checkFile(FileSystem fileSys, Path name) throws IOException { + private void checkFile(DistributedFileSystem fileSys, Path name) + throws IOException { BlockLocation[] locations = fileSys.getFileBlockLocations( fileSys.getFileStatus(name), 0, fileSize); assertEquals("Number of blocks", fileSize, locations.length); FSDataInputStream stm = fileSys.open(name); byte[] expected = new byte[fileSize]; if (simulatedStorage) { - for (int i = 0; i < expected.length; ++i) { - expected[i] = SimulatedFSDataset.DEFAULT_DATABYTE; - } + LocatedBlocks lbs = fileSys.getClient().getLocatedBlocks(name.toString(), + 0, fileSize); + DFSTestUtil.fillExpectedBuf(lbs, expected); } else { Random rand = new Random(seed); rand.nextBytes(expected); @@ -102,9 +104,9 @@ public class TestSmallBlock { } conf.set(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, "1"); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); - FileSystem fileSys = cluster.getFileSystem(); + DistributedFileSystem fileSys = cluster.getFileSystem(); try { - Path file1 = new Path("smallblocktest.dat"); + Path file1 = new Path("/smallblocktest.dat"); writeFile(fileSys, file1); checkFile(fileSys, file1); cleanupFile(fileSys, file1); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java index a358e2256fe..344d1fedee5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java @@ -97,12 +97,16 @@ public class SimulatedFSDataset implements FsDatasetSpi { conf.set(DFSConfigKeys.DFS_DATANODE_FSDATASET_FACTORY_KEY, Factory.class.getName()); } + + public static byte simulatedByte(Block b, long offsetInBlk) { + byte firstByte = (byte) (b.getBlockId() % Byte.MAX_VALUE); + return (byte) ((firstByte + offsetInBlk) % Byte.MAX_VALUE); + } public static final String CONFIG_PROPERTY_CAPACITY = "dfs.datanode.simulateddatastorage.capacity"; public static final long DEFAULT_CAPACITY = 2L<<40; // 1 terabyte - public static final byte DEFAULT_DATABYTE = 9; public static final String CONFIG_PROPERTY_STATE = "dfs.datanode.simulateddatastorage.state"; @@ -182,9 +186,9 @@ public class SimulatedFSDataset implements FsDatasetSpi { synchronized SimulatedInputStream getIStream() { if (!finalized) { // throw new IOException("Trying to read an unfinalized block"); - return new SimulatedInputStream(oStream.getLength(), DEFAULT_DATABYTE); + return new SimulatedInputStream(oStream.getLength(), theBlock); } else { - return new SimulatedInputStream(theBlock.getNumBytes(), DEFAULT_DATABYTE); + return new SimulatedInputStream(theBlock.getNumBytes(), theBlock); } } @@ -991,21 +995,19 @@ public class SimulatedFSDataset implements FsDatasetSpi { * */ static private class SimulatedInputStream extends java.io.InputStream { - - - byte theRepeatedData = 7; final long length; // bytes int currentPos = 0; byte[] data = null; + Block theBlock = null; /** * An input stream of size l with repeated bytes * @param l size of the stream * @param iRepeatedData byte that is repeated in the stream */ - SimulatedInputStream(long l, byte iRepeatedData) { + SimulatedInputStream(long l, Block b) { length = l; - theRepeatedData = iRepeatedData; + theBlock = b; } /** @@ -1031,8 +1033,7 @@ public class SimulatedFSDataset implements FsDatasetSpi { if (data !=null) { return data[currentPos++]; } else { - currentPos++; - return theRepeatedData; + return simulatedByte(theBlock, currentPos++); } } @@ -1052,8 +1053,8 @@ public class SimulatedFSDataset implements FsDatasetSpi { if (data != null) { System.arraycopy(data, currentPos, b, 0, bytesRead); } else { // all data is zero - for (int i : b) { - b[i] = theRepeatedData; + for (int i = 0; i < bytesRead; i++) { + b[i] = simulatedByte(theBlock, currentPos + i); } } currentPos += bytesRead; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestSimulatedFSDataset.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestSimulatedFSDataset.java index dd246853463..f76781de4df 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestSimulatedFSDataset.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestSimulatedFSDataset.java @@ -144,7 +144,8 @@ public class TestSimulatedFSDataset { long lengthRead = 0; int data; while ((data = input.read()) != -1) { - assertEquals(SimulatedFSDataset.DEFAULT_DATABYTE, data); + assertEquals(SimulatedFSDataset.simulatedByte(b.getLocalBlock(), + lengthRead), data); lengthRead++; } assertEquals(expectedLen, lengthRead);