HDFS-8481. Erasure coding: remove workarounds in client side stripped blocks recovering. Contributed by Zhe Zhang.

This commit is contained in:
Zhe Zhang 2015-06-01 09:37:29 -07:00
parent 1299357a05
commit 014bd32c58
5 changed files with 60 additions and 36 deletions

View File

@ -262,3 +262,6 @@
HDFS-8479. Erasure coding: fix striping related logic in FSDirWriteFileOp to HDFS-8479. Erasure coding: fix striping related logic in FSDirWriteFileOp to
sync with HDFS-8421. (Zhe Zhang via jing9) sync with HDFS-8421. (Zhe Zhang via jing9)
HDFS-8481. Erasure coding: remove workarounds in client side stripped blocks
recovering. (zhz)

View File

@ -31,6 +31,7 @@ import org.apache.hadoop.io.ByteBufferPool;
import static org.apache.hadoop.hdfs.util.StripedBlockUtil.divideByteRangeIntoStripes; import static org.apache.hadoop.hdfs.util.StripedBlockUtil.divideByteRangeIntoStripes;
import static org.apache.hadoop.hdfs.util.StripedBlockUtil.initDecodeInputs; import static org.apache.hadoop.hdfs.util.StripedBlockUtil.initDecodeInputs;
import static org.apache.hadoop.hdfs.util.StripedBlockUtil.finalizeDecodeInputs;
import static org.apache.hadoop.hdfs.util.StripedBlockUtil.decodeAndFillBuffer; import static org.apache.hadoop.hdfs.util.StripedBlockUtil.decodeAndFillBuffer;
import static org.apache.hadoop.hdfs.util.StripedBlockUtil.getNextCompletedStripedRead; import static org.apache.hadoop.hdfs.util.StripedBlockUtil.getNextCompletedStripedRead;
import static org.apache.hadoop.hdfs.util.StripedBlockUtil.getStartOffsetsForInternalBlocks; import static org.apache.hadoop.hdfs.util.StripedBlockUtil.getStartOffsetsForInternalBlocks;
@ -41,6 +42,8 @@ import static org.apache.hadoop.hdfs.util.StripedBlockUtil.StripingChunkReadResu
import org.apache.hadoop.io.erasurecode.ECSchema; import org.apache.hadoop.io.erasurecode.ECSchema;
import org.apache.hadoop.io.erasurecode.rawcoder.RSRawDecoder;
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder;
import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetUtils;
import org.apache.htrace.Span; import org.apache.htrace.Span;
import org.apache.htrace.Trace; import org.apache.htrace.Trace;
@ -117,6 +120,8 @@ public class DFSStripedInputStream extends DFSInputStream {
/** the buffer for a complete stripe */ /** the buffer for a complete stripe */
private ByteBuffer curStripeBuf; private ByteBuffer curStripeBuf;
private final ECSchema schema; private final ECSchema schema;
private final RawErasureDecoder decoder;
/** /**
* indicate the start/end offset of the current buffered stripe in the * indicate the start/end offset of the current buffered stripe in the
* block group * block group
@ -139,6 +144,7 @@ public class DFSStripedInputStream extends DFSInputStream {
curStripeRange = new StripeRange(0, 0); curStripeRange = new StripeRange(0, 0);
readingService = readingService =
new ExecutorCompletionService<>(dfsClient.getStripedReadsThreadPool()); new ExecutorCompletionService<>(dfsClient.getStripedReadsThreadPool());
decoder = new RSRawDecoder(dataBlkNum, parityBlkNum);
if (DFSClient.LOG.isDebugEnabled()) { if (DFSClient.LOG.isDebugEnabled()) {
DFSClient.LOG.debug("Creating an striped input stream for file " + src); DFSClient.LOG.debug("Creating an striped input stream for file " + src);
} }
@ -591,8 +597,9 @@ public class DFSStripedInputStream extends DFSInputStream {
} }
if (alignedStripe.missingChunksNum > 0) { if (alignedStripe.missingChunksNum > 0) {
decodeAndFillBuffer(decodeInputs, buf, alignedStripe, finalizeDecodeInputs(decodeInputs, alignedStripe);
dataBlkNum, parityBlkNum); decodeAndFillBuffer(decodeInputs, buf, alignedStripe, parityBlkNum,
decoder);
} }
} }

View File

@ -32,6 +32,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedStripedBlock;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import org.apache.hadoop.io.erasurecode.ECSchema; import org.apache.hadoop.io.erasurecode.ECSchema;
import org.apache.hadoop.io.erasurecode.rawcoder.RSRawDecoder; import org.apache.hadoop.io.erasurecode.rawcoder.RSRawDecoder;
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder;
import java.util.*; import java.util.*;
import java.io.IOException; import java.io.IOException;
@ -246,19 +247,36 @@ public class StripedBlockUtil {
/** /**
* Initialize the decoding input buffers based on the chunk states in an * Initialize the decoding input buffers based on the chunk states in an
* AlignedStripe * {@link AlignedStripe}. For each chunk that was not initially requested,
* schedule a new fetch request with the decoding input buffer as transfer
* destination.
*/ */
public static byte[][] initDecodeInputs(AlignedStripe alignedStripe, public static byte[][] initDecodeInputs(AlignedStripe alignedStripe,
int dataBlkNum, int parityBlkNum) { int dataBlkNum, int parityBlkNum) {
byte[][] decodeInputs = byte[][] decodeInputs =
new byte[dataBlkNum + parityBlkNum][(int) alignedStripe.getSpanInBlock()]; new byte[dataBlkNum + parityBlkNum][(int) alignedStripe.getSpanInBlock()];
for (int i = 0; i < alignedStripe.chunks.length; i++) { for (int i = 0; i < alignedStripe.chunks.length; i++) {
StripingChunk chunk = alignedStripe.chunks[i]; if (alignedStripe.chunks[i] == null) {
if (chunk == null) {
alignedStripe.chunks[i] = new StripingChunk(decodeInputs[i]); alignedStripe.chunks[i] = new StripingChunk(decodeInputs[i]);
alignedStripe.chunks[i].offsetsInBuf.add(0); alignedStripe.chunks[i].offsetsInBuf.add(0);
alignedStripe.chunks[i].lengthsInBuf.add((int) alignedStripe.getSpanInBlock()); alignedStripe.chunks[i].lengthsInBuf.add((int) alignedStripe.getSpanInBlock());
} else if (chunk.state == StripingChunk.FETCHED) { }
}
return decodeInputs;
}
/**
* Some fetched {@link StripingChunk} might be stored in original application
* buffer instead of prepared decode input buffers. Some others are beyond
* the range of the internal blocks and should correspond to all zero bytes.
* When all pending requests have returned, this method should be called to
* finalize decode input buffers.
*/
public static void finalizeDecodeInputs(final byte[][] decodeInputs,
AlignedStripe alignedStripe) {
for (int i = 0; i < alignedStripe.chunks.length; i++) {
StripingChunk chunk = alignedStripe.chunks[i];
if (chunk.state == StripingChunk.FETCHED) {
int posInBuf = 0; int posInBuf = 0;
for (int j = 0; j < chunk.offsetsInBuf.size(); j++) { for (int j = 0; j < chunk.offsetsInBuf.size(); j++) {
System.arraycopy(chunk.buf, chunk.offsetsInBuf.get(j), System.arraycopy(chunk.buf, chunk.offsetsInBuf.get(j),
@ -267,39 +285,41 @@ public class StripedBlockUtil {
} }
} else if (chunk.state == StripingChunk.ALLZERO) { } else if (chunk.state == StripingChunk.ALLZERO) {
Arrays.fill(decodeInputs[i], (byte)0); Arrays.fill(decodeInputs[i], (byte)0);
} else {
decodeInputs[i] = null;
} }
} }
return decodeInputs;
} }
/** /**
* Decode based on the given input buffers and schema * Decode based on the given input buffers and schema.
*/ */
public static void decodeAndFillBuffer(final byte[][] decodeInputs, byte[] buf, public static void decodeAndFillBuffer(final byte[][] decodeInputs,
AlignedStripe alignedStripe, int dataBlkNum, int parityBlkNum) { byte[] buf, AlignedStripe alignedStripe, int parityBlkNum,
RawErasureDecoder decoder) {
// Step 1: prepare indices and output buffers for missing data units
int[] decodeIndices = new int[parityBlkNum]; int[] decodeIndices = new int[parityBlkNum];
int pos = 0; int pos = 0;
for (int i = 0; i < alignedStripe.chunks.length; i++) { for (int i = 0; i < alignedStripe.chunks.length; i++) {
if (alignedStripe.chunks[i].state != StripingChunk.FETCHED && if (alignedStripe.chunks[i].state == StripingChunk.MISSING){
alignedStripe.chunks[i].state != StripingChunk.ALLZERO) {
decodeIndices[pos++] = i; decodeIndices[pos++] = i;
} }
} }
decodeIndices = Arrays.copyOf(decodeIndices, pos);
byte[][] decodeOutputs =
new byte[decodeIndices.length][(int) alignedStripe.getSpanInBlock()];
byte[][] outputs = new byte[parityBlkNum][(int) alignedStripe.getSpanInBlock()]; // Step 2: decode into prepared output buffers
RSRawDecoder rsRawDecoder = new RSRawDecoder(dataBlkNum, parityBlkNum); decoder.decode(decodeInputs, decodeIndices, decodeOutputs);
rsRawDecoder.decode(decodeInputs, decodeIndices, outputs);
for (int i = 0; i < dataBlkNum + parityBlkNum; i++) { // Step 3: fill original application buffer with decoded data
StripingChunk chunk = alignedStripe.chunks[i]; for (int i = 0; i < decodeIndices.length; i++) {
int missingBlkIdx = decodeIndices[i];
StripingChunk chunk = alignedStripe.chunks[missingBlkIdx];
if (chunk.state == StripingChunk.MISSING) { if (chunk.state == StripingChunk.MISSING) {
int srcPos = 0; int srcPos = 0;
for (int j = 0; j < chunk.offsetsInBuf.size(); j++) { for (int j = 0; j < chunk.offsetsInBuf.size(); j++) {
//TODO: workaround (filling fixed bytes), to remove after HADOOP-11938 System.arraycopy(decodeOutputs[i], srcPos, buf, chunk.offsetsInBuf.get(j),
// System.arraycopy(outputs[i], srcPos, buf, chunk.offsetsInBuf.get(j), chunk.lengthsInBuf.get(j));
// chunk.lengthsInBuf.get(j));
Arrays.fill(buf, chunk.offsetsInBuf.get(j),
chunk.offsetsInBuf.get(j) + chunk.lengthsInBuf.get(j), (byte)7);
srcPos += chunk.lengthsInBuf.get(j); srcPos += chunk.lengthsInBuf.get(j);
} }
} }

View File

@ -221,13 +221,13 @@ public class TestDFSStripedInputStream {
decodeInputs[DATA_BLK_NUM][k] = SimulatedFSDataset.simulatedByte( decodeInputs[DATA_BLK_NUM][k] = SimulatedFSDataset.simulatedByte(
new Block(bg.getBlock().getBlockId() + DATA_BLK_NUM), posInBlk); new Block(bg.getBlock().getBlockId() + DATA_BLK_NUM), posInBlk);
} }
// RSRawDecoder rsRawDecoder = new RSRawDecoder(); for (int m : missingBlkIdx) {
// rsRawDecoder.initialize(DATA_BLK_NUM, PARITY_BLK_NUM, CELLSIZE); decodeInputs[m] = null;
// rsRawDecoder.decode(decodeInputs, missingBlkIdx, decodeOutputs); }
RSRawDecoder rsRawDecoder = new RSRawDecoder(DATA_BLK_NUM, PARITY_BLK_NUM);
rsRawDecoder.decode(decodeInputs, missingBlkIdx, decodeOutputs);
int posInBuf = i * CELLSIZE * DATA_BLK_NUM + failedDNIdx * CELLSIZE; int posInBuf = i * CELLSIZE * DATA_BLK_NUM + failedDNIdx * CELLSIZE;
// System.arraycopy(decodeOutputs[0], 0, expected, posInBuf, CELLSIZE); System.arraycopy(decodeOutputs[0], 0, expected, posInBuf, CELLSIZE);
//TODO: workaround (filling fixed bytes), to remove after HADOOP-11938
Arrays.fill(expected, posInBuf, posInBuf + CELLSIZE, (byte)7);
} }
int delta = 10; int delta = 10;
int done = 0; int done = 0;

View File

@ -382,16 +382,10 @@ public class TestWriteReadStripedFile {
Assert.assertEquals("The length of file should be the same to write size", Assert.assertEquals("The length of file should be the same to write size",
length - startOffsetInFile, readLen); length - startOffsetInFile, readLen);
RSRawDecoder rsRawDecoder = new RSRawDecoder(dataBlocks, parityBlocks);
byte[] expected = new byte[readLen]; byte[] expected = new byte[readLen];
for (int i = startOffsetInFile; i < length; i++) { for (int i = startOffsetInFile; i < length; i++) {
//TODO: workaround (filling fixed bytes), to remove after HADOOP-11938
if ((i / cellSize) % dataBlocks == failedDNIdx) {
expected[i - startOffsetInFile] = (byte)7;
} else {
expected[i - startOffsetInFile] = getByte(i); expected[i - startOffsetInFile] = getByte(i);
} }
}
for (int i = startOffsetInFile; i < length; i++) { for (int i = startOffsetInFile; i < length; i++) {
Assert.assertEquals("Byte at " + i + " should be the same", Assert.assertEquals("Byte at " + i + " should be the same",
expected[i - startOffsetInFile], buf[i - startOffsetInFile]); expected[i - startOffsetInFile], buf[i - startOffsetInFile]);