From d60e22152ac098da103fd37fb81f8758e68d1efa Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Mon, 13 Apr 2015 13:01:10 -0700 Subject: [PATCH] HDFS-8117. More accurate verification in SimulatedFSDataset: replace DEFAULT_DATABYTE with patterned data. Contributed by Zhe Zhang. --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../org/apache/hadoop/hdfs/DFSTestUtil.java | 25 +++++++++++++++++++ .../apache/hadoop/hdfs/TestFileAppend.java | 12 ++++----- .../org/apache/hadoop/hdfs/TestPread.java | 21 ++++++++++------ .../apache/hadoop/hdfs/TestSmallBlock.java | 16 ++++++------ .../server/datanode/SimulatedFSDataset.java | 25 ++++++++++--------- .../datanode/TestSimulatedFSDataset.java | 3 ++- 7 files changed, 70 insertions(+), 35 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 2a26544d54a..1aaf42c02a6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -431,6 +431,9 @@ Release 2.8.0 - UNRELEASED HDFS-8083. Move dfs.client.write.* conf from DFSConfigKeys to HdfsClientConfigKeys.Write. (szetszwo) + HDFS-8117. More accurate verification in SimulatedFSDataset: replace + DEFAULT_DATABYTE with patterned data. (Zhe Zhang via wang) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java index 1b3b62dc3fa..ae2d403e67e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java @@ -91,6 +91,7 @@ import org.apache.hadoop.hdfs.MiniDFSCluster.NameNodeInfo; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.client.HdfsDataInputStream; +import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; import org.apache.hadoop.hdfs.protocol.CachePoolInfo; import org.apache.hadoop.hdfs.protocol.DatanodeID; @@ -117,6 +118,7 @@ import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DataNodeLayoutVersion; +import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset; import org.apache.hadoop.hdfs.server.datanode.TestTransferRbw; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.hdfs.server.namenode.FSEditLog; @@ -1769,4 +1771,27 @@ public static void resetLastUpdatesWithOffset(DatanodeInfo dn, long offset) { dn.setLastUpdateMonotonic(Time.monotonicNow() + offset); } + /** + * This method takes a set of block locations and fills the provided buffer + * with expected bytes based on simulated content from + * {@link SimulatedFSDataset}. + * + * @param lbs The block locations of a file + * @param expected The buffer to be filled with expected bytes on the above + * locations. + */ + public static void fillExpectedBuf(LocatedBlocks lbs, byte[] expected) { + Block[] blks = new Block[lbs.getLocatedBlocks().size()]; + for (int i = 0; i < lbs.getLocatedBlocks().size(); i++) { + blks[i] = lbs.getLocatedBlocks().get(i).getBlock().getLocalBlock(); + } + int bufPos = 0; + for (Block b : blks) { + for (long blkPos = 0; blkPos < b.getNumBytes(); blkPos++) { + assert bufPos < expected.length; + expected[bufPos++] = SimulatedFSDataset.simulatedByte(b, blkPos); + } + } + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppend.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppend.java index af404cd82c8..6a7c3eaa587 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppend.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppend.java @@ -68,7 +68,7 @@ private void writeFile(FSDataOutputStream stm) throws IOException { // // verify that the data written to the full blocks are sane // - private void checkFile(FileSystem fileSys, Path name, int repl) + private void checkFile(DistributedFileSystem fileSys, Path name, int repl) throws IOException { boolean done = false; @@ -96,9 +96,9 @@ private void checkFile(FileSystem fileSys, Path name, int repl) byte[] expected = new byte[AppendTestUtil.NUM_BLOCKS * AppendTestUtil.BLOCK_SIZE]; if (simulatedStorage) { - for (int i= 0; i < expected.length; i++) { - expected[i] = SimulatedFSDataset.DEFAULT_DATABYTE; - } + LocatedBlocks lbs = fileSys.getClient().getLocatedBlocks(name.toString(), + 0, AppendTestUtil.FILE_SIZE); + DFSTestUtil.fillExpectedBuf(lbs, expected); } else { System.arraycopy(fileContents, 0, expected, 0, expected.length); } @@ -193,7 +193,7 @@ public void testSimpleFlush() throws IOException { } fileContents = AppendTestUtil.initBuffer(AppendTestUtil.FILE_SIZE); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); - FileSystem fs = cluster.getFileSystem(); + DistributedFileSystem fs = cluster.getFileSystem(); try { // create a new file. @@ -249,7 +249,7 @@ public void testComplexFlush() throws IOException { } fileContents = AppendTestUtil.initBuffer(AppendTestUtil.FILE_SIZE); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); - FileSystem fs = cluster.getFileSystem(); + DistributedFileSystem fs = cluster.getFileSystem(); try { // create a new file. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java index 62f6c0600e8..1a2840432f9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java @@ -37,6 +37,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; +import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtocol; import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset; import org.apache.hadoop.io.IOUtils; @@ -54,6 +55,8 @@ public class TestPread { static final long seed = 0xDEADBEEFL; static final int blockSize = 4096; + static final int numBlocksPerFile = 12; + static final int fileSize = numBlocksPerFile * blockSize; boolean simulatedStorage; boolean isHedgedRead; @@ -66,10 +69,10 @@ public void setup() { private void writeFile(FileSystem fileSys, Path name) throws IOException { int replication = 3;// We need > 1 blocks to test out the hedged reads. // test empty file open and read - DFSTestUtil.createFile(fileSys, name, 12 * blockSize, 0, + DFSTestUtil.createFile(fileSys, name, fileSize, 0, blockSize, (short)replication, seed); FSDataInputStream in = fileSys.open(name); - byte[] buffer = new byte[12 * blockSize]; + byte[] buffer = new byte[fileSize]; in.readFully(0, buffer, 0, 0); IOException res = null; try { // read beyond the end of the file @@ -84,7 +87,7 @@ private void writeFile(FileSystem fileSys, Path name) throws IOException { assertTrue("Cannot delete file", false); // now create the real file - DFSTestUtil.createFile(fileSys, name, 12 * blockSize, 12 * blockSize, + DFSTestUtil.createFile(fileSys, name, fileSize, fileSize, blockSize, (short) replication, seed); } @@ -128,11 +131,13 @@ private void doPread(FSDataInputStream stm, long position, byte[] buffer, private void pReadFile(FileSystem fileSys, Path name) throws IOException { FSDataInputStream stm = fileSys.open(name); - byte[] expected = new byte[12 * blockSize]; + byte[] expected = new byte[fileSize]; if (simulatedStorage) { - for (int i= 0; i < expected.length; i++) { - expected[i] = SimulatedFSDataset.DEFAULT_DATABYTE; - } + assert fileSys instanceof DistributedFileSystem; + DistributedFileSystem dfs = (DistributedFileSystem) fileSys; + LocatedBlocks lbs = dfs.getClient().getLocatedBlocks(name.toString(), + 0, fileSize); + DFSTestUtil.fillExpectedBuf(lbs, expected); } else { Random rand = new Random(seed); rand.nextBytes(expected); @@ -447,7 +452,7 @@ private void dfsPreadTest(Configuration conf, boolean disableTransferTo, boolean FileSystem fileSys = cluster.getFileSystem(); fileSys.setVerifyChecksum(verifyChecksum); try { - Path file1 = new Path("preadtest.dat"); + Path file1 = new Path("/preadtest.dat"); writeFile(fileSys, file1); pReadFile(fileSys, file1); datanodeRestartTest(cluster, fileSys, file1); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSmallBlock.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSmallBlock.java index 90f47e58333..6983cde954b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSmallBlock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSmallBlock.java @@ -25,11 +25,10 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; -import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset; import org.junit.Test; @@ -52,16 +51,17 @@ private void checkAndEraseData(byte[] actual, int from, byte[] expected, String } } - private void checkFile(FileSystem fileSys, Path name) throws IOException { + private void checkFile(DistributedFileSystem fileSys, Path name) + throws IOException { BlockLocation[] locations = fileSys.getFileBlockLocations( fileSys.getFileStatus(name), 0, fileSize); assertEquals("Number of blocks", fileSize, locations.length); FSDataInputStream stm = fileSys.open(name); byte[] expected = new byte[fileSize]; if (simulatedStorage) { - for (int i = 0; i < expected.length; ++i) { - expected[i] = SimulatedFSDataset.DEFAULT_DATABYTE; - } + LocatedBlocks lbs = fileSys.getClient().getLocatedBlocks(name.toString(), + 0, fileSize); + DFSTestUtil.fillExpectedBuf(lbs, expected); } else { Random rand = new Random(seed); rand.nextBytes(expected); @@ -90,9 +90,9 @@ public void testSmallBlock() throws IOException { } conf.set(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, "1"); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); - FileSystem fileSys = cluster.getFileSystem(); + DistributedFileSystem fileSys = cluster.getFileSystem(); try { - Path file1 = new Path("smallblocktest.dat"); + Path file1 = new Path("/smallblocktest.dat"); DFSTestUtil.createFile(fileSys, file1, fileSize, fileSize, blockSize, (short) 1, seed); checkFile(fileSys, file1); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java index a358e2256fe..344d1fedee5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java @@ -97,12 +97,16 @@ public static void setFactory(Configuration conf) { conf.set(DFSConfigKeys.DFS_DATANODE_FSDATASET_FACTORY_KEY, Factory.class.getName()); } + + public static byte simulatedByte(Block b, long offsetInBlk) { + byte firstByte = (byte) (b.getBlockId() % Byte.MAX_VALUE); + return (byte) ((firstByte + offsetInBlk) % Byte.MAX_VALUE); + } public static final String CONFIG_PROPERTY_CAPACITY = "dfs.datanode.simulateddatastorage.capacity"; public static final long DEFAULT_CAPACITY = 2L<<40; // 1 terabyte - public static final byte DEFAULT_DATABYTE = 9; public static final String CONFIG_PROPERTY_STATE = "dfs.datanode.simulateddatastorage.state"; @@ -182,9 +186,9 @@ synchronized public void setNumBytes(long length) { synchronized SimulatedInputStream getIStream() { if (!finalized) { // throw new IOException("Trying to read an unfinalized block"); - return new SimulatedInputStream(oStream.getLength(), DEFAULT_DATABYTE); + return new SimulatedInputStream(oStream.getLength(), theBlock); } else { - return new SimulatedInputStream(theBlock.getNumBytes(), DEFAULT_DATABYTE); + return new SimulatedInputStream(theBlock.getNumBytes(), theBlock); } } @@ -991,21 +995,19 @@ public synchronized void adjustCrcChannelPosition(ExtendedBlock b, * */ static private class SimulatedInputStream extends java.io.InputStream { - - - byte theRepeatedData = 7; final long length; // bytes int currentPos = 0; byte[] data = null; + Block theBlock = null; /** * An input stream of size l with repeated bytes * @param l size of the stream * @param iRepeatedData byte that is repeated in the stream */ - SimulatedInputStream(long l, byte iRepeatedData) { + SimulatedInputStream(long l, Block b) { length = l; - theRepeatedData = iRepeatedData; + theBlock = b; } /** @@ -1031,8 +1033,7 @@ public int read() throws IOException { if (data !=null) { return data[currentPos++]; } else { - currentPos++; - return theRepeatedData; + return simulatedByte(theBlock, currentPos++); } } @@ -1052,8 +1053,8 @@ public int read(byte[] b) throws IOException { if (data != null) { System.arraycopy(data, currentPos, b, 0, bytesRead); } else { // all data is zero - for (int i : b) { - b[i] = theRepeatedData; + for (int i = 0; i < bytesRead; i++) { + b[i] = simulatedByte(theBlock, currentPos + i); } } currentPos += bytesRead; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestSimulatedFSDataset.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestSimulatedFSDataset.java index dd246853463..f76781de4df 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestSimulatedFSDataset.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestSimulatedFSDataset.java @@ -144,7 +144,8 @@ void checkBlockDataAndSize(SimulatedFSDataset fsdataset, ExtendedBlock b, long lengthRead = 0; int data; while ((data = input.read()) != -1) { - assertEquals(SimulatedFSDataset.DEFAULT_DATABYTE, data); + assertEquals(SimulatedFSDataset.simulatedByte(b.getLocalBlock(), + lengthRead), data); lengthRead++; } assertEquals(expectedLen, lengthRead);