From eb612b0b70f1439194d3c1cdf151f3301e88522b Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Thu, 23 Apr 2015 15:43:04 -0700 Subject: [PATCH] HDFS-8233. Fix DFSStripedOutputStream#getCurrentBlockGroupBytes when the last stripe is at the block group boundary. Contributed by Jing Zhao. --- .../hadoop-hdfs/CHANGES-HDFS-EC-7285.txt | 5 +- .../hadoop/hdfs/DFSStripedOutputStream.java | 51 +++++++++---------- .../hdfs/TestDFSStripedOutputStream.java | 6 +++ 3 files changed, 34 insertions(+), 28 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt index 8977c46989a..48791b134a6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt @@ -121,4 +121,7 @@ schema. (Kai Zheng via Zhe Zhang) HDFS-8136. Client gets and uses EC schema when reads and writes a stripping - file. (Kai Sasaki via Kai Zheng) \ No newline at end of file + file. (Kai Sasaki via Kai Zheng) + + HDFS-8233. Fix DFSStripedOutputStream#getCurrentBlockGroupBytes when the last + stripe is at the block group boundary. (jing9) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java index eeb9d7ea96b..245dfc10b6e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSStripedOutputStream.java @@ -36,7 +36,6 @@ import org.apache.hadoop.hdfs.protocol.ECInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.protocol.LocatedBlock; -import org.apache.hadoop.hdfs.util.StripedBlockUtil; import org.apache.hadoop.io.erasurecode.rawcoder.RSRawEncoder; import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureEncoder; import org.apache.hadoop.util.DataChecksum; @@ -278,14 +277,6 @@ public class DFSStripedOutputStream extends DFSOutputStream { return numDataBlocks * cellSize; } - private long getCurrentBlockGroupBytes() { - long sum = 0; - for (int i = 0; i < numDataBlocks; i++) { - sum += streamers.get(i).getBytesCurBlock(); - } - return sum; - } - private void notSupported(String headMsg) throws IOException{ throw new IOException( @@ -347,37 +338,43 @@ public class DFSStripedOutputStream extends DFSOutputStream { } } + /** + * Simply add bytesCurBlock together. Note that this result is not accurately + * the size of the block group. + */ + private long getCurrentSumBytes() { + long sum = 0; + for (int i = 0; i < numDataBlocks; i++) { + sum += streamers.get(i).getBytesCurBlock(); + } + return sum; + } + private void writeParityCellsForLastStripe() throws IOException { - final long currentBlockGroupBytes = getCurrentBlockGroupBytes(); - long parityBlkSize = StripedBlockUtil.getInternalBlockLength( - currentBlockGroupBytes, cellSize, numDataBlocks, - numDataBlocks + 1); - if (parityBlkSize == 0 || currentBlockGroupBytes % stripeDataSize() == 0) { + final long currentBlockGroupBytes = getCurrentSumBytes(); + if (currentBlockGroupBytes % stripeDataSize() == 0) { return; } - int parityCellSize = parityBlkSize % cellSize == 0 ? cellSize : - (int) (parityBlkSize % cellSize); + long firstCellSize = getLeadingStreamer().getBytesCurBlock() % cellSize; + long parityCellSize = firstCellSize > 0 && firstCellSize < cellSize ? + firstCellSize : cellSize; for (int i = 0; i < numAllBlocks; i++) { - long internalBlkLen = StripedBlockUtil.getInternalBlockLength( - currentBlockGroupBytes, cellSize, numDataBlocks, i); // Pad zero bytes to make all cells exactly the size of parityCellSize // If internal block is smaller than parity block, pad zero bytes. // Also pad zero bytes to all parity cells - if (internalBlkLen < parityBlkSize || i >= numDataBlocks) { - int position = cellBuffers[i].position(); - assert position <= parityCellSize : "If an internal block is smaller" + - " than parity block, then its last cell should be small than last" + - " parity cell"; - for (int j = 0; j < parityCellSize - position; j++) { - cellBuffers[i].put((byte) 0); - } + int position = cellBuffers[i].position(); + assert position <= parityCellSize : "If an internal block is smaller" + + " than parity block, then its last cell should be small than last" + + " parity cell"; + for (int j = 0; j < parityCellSize - position; j++) { + cellBuffers[i].put((byte) 0); } cellBuffers[i].flip(); } encode(cellBuffers); - //write parity cells + // write parity cells curIdx = numDataBlocks; refreshStreamer(); for (int i = numDataBlocks; i < numAllBlocks; i++) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java index 26f6d2c22dc..5ce94ee1a41 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedOutputStream.java @@ -114,6 +114,12 @@ public class TestDFSStripedOutputStream { + cellSize * dataBlocks + 123); } + @Test + public void testFileLessThanFullBlockGroup() throws IOException { + testOneFile("/LessThanFullBlockGroup", + cellSize * dataBlocks * (stripesPerBlock - 1) + cellSize); + } + @Test public void testFileFullBlockGroup() throws IOException { testOneFile("/FullBlockGroup", blockSize * dataBlocks);