HDFS-8481. Erasure coding: remove workarounds in client side stripped blocks recovering. Contributed by Zhe Zhang.
This commit is contained in:
parent
1299357a05
commit
014bd32c58
|
@ -262,3 +262,6 @@
|
||||||
|
|
||||||
HDFS-8479. Erasure coding: fix striping related logic in FSDirWriteFileOp to
|
HDFS-8479. Erasure coding: fix striping related logic in FSDirWriteFileOp to
|
||||||
sync with HDFS-8421. (Zhe Zhang via jing9)
|
sync with HDFS-8421. (Zhe Zhang via jing9)
|
||||||
|
|
||||||
|
HDFS-8481. Erasure coding: remove workarounds in client side stripped blocks
|
||||||
|
recovering. (zhz)
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.hadoop.io.ByteBufferPool;
|
||||||
|
|
||||||
import static org.apache.hadoop.hdfs.util.StripedBlockUtil.divideByteRangeIntoStripes;
|
import static org.apache.hadoop.hdfs.util.StripedBlockUtil.divideByteRangeIntoStripes;
|
||||||
import static org.apache.hadoop.hdfs.util.StripedBlockUtil.initDecodeInputs;
|
import static org.apache.hadoop.hdfs.util.StripedBlockUtil.initDecodeInputs;
|
||||||
|
import static org.apache.hadoop.hdfs.util.StripedBlockUtil.finalizeDecodeInputs;
|
||||||
import static org.apache.hadoop.hdfs.util.StripedBlockUtil.decodeAndFillBuffer;
|
import static org.apache.hadoop.hdfs.util.StripedBlockUtil.decodeAndFillBuffer;
|
||||||
import static org.apache.hadoop.hdfs.util.StripedBlockUtil.getNextCompletedStripedRead;
|
import static org.apache.hadoop.hdfs.util.StripedBlockUtil.getNextCompletedStripedRead;
|
||||||
import static org.apache.hadoop.hdfs.util.StripedBlockUtil.getStartOffsetsForInternalBlocks;
|
import static org.apache.hadoop.hdfs.util.StripedBlockUtil.getStartOffsetsForInternalBlocks;
|
||||||
|
@ -41,6 +42,8 @@ import static org.apache.hadoop.hdfs.util.StripedBlockUtil.StripingChunkReadResu
|
||||||
|
|
||||||
import org.apache.hadoop.io.erasurecode.ECSchema;
|
import org.apache.hadoop.io.erasurecode.ECSchema;
|
||||||
|
|
||||||
|
import org.apache.hadoop.io.erasurecode.rawcoder.RSRawDecoder;
|
||||||
|
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder;
|
||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.net.NetUtils;
|
||||||
import org.apache.htrace.Span;
|
import org.apache.htrace.Span;
|
||||||
import org.apache.htrace.Trace;
|
import org.apache.htrace.Trace;
|
||||||
|
@ -117,6 +120,8 @@ public class DFSStripedInputStream extends DFSInputStream {
|
||||||
/** the buffer for a complete stripe */
|
/** the buffer for a complete stripe */
|
||||||
private ByteBuffer curStripeBuf;
|
private ByteBuffer curStripeBuf;
|
||||||
private final ECSchema schema;
|
private final ECSchema schema;
|
||||||
|
private final RawErasureDecoder decoder;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* indicate the start/end offset of the current buffered stripe in the
|
* indicate the start/end offset of the current buffered stripe in the
|
||||||
* block group
|
* block group
|
||||||
|
@ -139,6 +144,7 @@ public class DFSStripedInputStream extends DFSInputStream {
|
||||||
curStripeRange = new StripeRange(0, 0);
|
curStripeRange = new StripeRange(0, 0);
|
||||||
readingService =
|
readingService =
|
||||||
new ExecutorCompletionService<>(dfsClient.getStripedReadsThreadPool());
|
new ExecutorCompletionService<>(dfsClient.getStripedReadsThreadPool());
|
||||||
|
decoder = new RSRawDecoder(dataBlkNum, parityBlkNum);
|
||||||
if (DFSClient.LOG.isDebugEnabled()) {
|
if (DFSClient.LOG.isDebugEnabled()) {
|
||||||
DFSClient.LOG.debug("Creating an striped input stream for file " + src);
|
DFSClient.LOG.debug("Creating an striped input stream for file " + src);
|
||||||
}
|
}
|
||||||
|
@ -591,8 +597,9 @@ public class DFSStripedInputStream extends DFSInputStream {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (alignedStripe.missingChunksNum > 0) {
|
if (alignedStripe.missingChunksNum > 0) {
|
||||||
decodeAndFillBuffer(decodeInputs, buf, alignedStripe,
|
finalizeDecodeInputs(decodeInputs, alignedStripe);
|
||||||
dataBlkNum, parityBlkNum);
|
decodeAndFillBuffer(decodeInputs, buf, alignedStripe, parityBlkNum,
|
||||||
|
decoder);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedStripedBlock;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import org.apache.hadoop.io.erasurecode.ECSchema;
|
import org.apache.hadoop.io.erasurecode.ECSchema;
|
||||||
import org.apache.hadoop.io.erasurecode.rawcoder.RSRawDecoder;
|
import org.apache.hadoop.io.erasurecode.rawcoder.RSRawDecoder;
|
||||||
|
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -246,19 +247,36 @@ public class StripedBlockUtil {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialize the decoding input buffers based on the chunk states in an
|
* Initialize the decoding input buffers based on the chunk states in an
|
||||||
* AlignedStripe
|
* {@link AlignedStripe}. For each chunk that was not initially requested,
|
||||||
|
* schedule a new fetch request with the decoding input buffer as transfer
|
||||||
|
* destination.
|
||||||
*/
|
*/
|
||||||
public static byte[][] initDecodeInputs(AlignedStripe alignedStripe,
|
public static byte[][] initDecodeInputs(AlignedStripe alignedStripe,
|
||||||
int dataBlkNum, int parityBlkNum) {
|
int dataBlkNum, int parityBlkNum) {
|
||||||
byte[][] decodeInputs =
|
byte[][] decodeInputs =
|
||||||
new byte[dataBlkNum + parityBlkNum][(int) alignedStripe.getSpanInBlock()];
|
new byte[dataBlkNum + parityBlkNum][(int) alignedStripe.getSpanInBlock()];
|
||||||
for (int i = 0; i < alignedStripe.chunks.length; i++) {
|
for (int i = 0; i < alignedStripe.chunks.length; i++) {
|
||||||
StripingChunk chunk = alignedStripe.chunks[i];
|
if (alignedStripe.chunks[i] == null) {
|
||||||
if (chunk == null) {
|
|
||||||
alignedStripe.chunks[i] = new StripingChunk(decodeInputs[i]);
|
alignedStripe.chunks[i] = new StripingChunk(decodeInputs[i]);
|
||||||
alignedStripe.chunks[i].offsetsInBuf.add(0);
|
alignedStripe.chunks[i].offsetsInBuf.add(0);
|
||||||
alignedStripe.chunks[i].lengthsInBuf.add((int) alignedStripe.getSpanInBlock());
|
alignedStripe.chunks[i].lengthsInBuf.add((int) alignedStripe.getSpanInBlock());
|
||||||
} else if (chunk.state == StripingChunk.FETCHED) {
|
}
|
||||||
|
}
|
||||||
|
return decodeInputs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Some fetched {@link StripingChunk} might be stored in original application
|
||||||
|
* buffer instead of prepared decode input buffers. Some others are beyond
|
||||||
|
* the range of the internal blocks and should correspond to all zero bytes.
|
||||||
|
* When all pending requests have returned, this method should be called to
|
||||||
|
* finalize decode input buffers.
|
||||||
|
*/
|
||||||
|
public static void finalizeDecodeInputs(final byte[][] decodeInputs,
|
||||||
|
AlignedStripe alignedStripe) {
|
||||||
|
for (int i = 0; i < alignedStripe.chunks.length; i++) {
|
||||||
|
StripingChunk chunk = alignedStripe.chunks[i];
|
||||||
|
if (chunk.state == StripingChunk.FETCHED) {
|
||||||
int posInBuf = 0;
|
int posInBuf = 0;
|
||||||
for (int j = 0; j < chunk.offsetsInBuf.size(); j++) {
|
for (int j = 0; j < chunk.offsetsInBuf.size(); j++) {
|
||||||
System.arraycopy(chunk.buf, chunk.offsetsInBuf.get(j),
|
System.arraycopy(chunk.buf, chunk.offsetsInBuf.get(j),
|
||||||
|
@ -267,39 +285,41 @@ public class StripedBlockUtil {
|
||||||
}
|
}
|
||||||
} else if (chunk.state == StripingChunk.ALLZERO) {
|
} else if (chunk.state == StripingChunk.ALLZERO) {
|
||||||
Arrays.fill(decodeInputs[i], (byte)0);
|
Arrays.fill(decodeInputs[i], (byte)0);
|
||||||
|
} else {
|
||||||
|
decodeInputs[i] = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return decodeInputs;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decode based on the given input buffers and schema
|
* Decode based on the given input buffers and schema.
|
||||||
*/
|
*/
|
||||||
public static void decodeAndFillBuffer(final byte[][] decodeInputs, byte[] buf,
|
public static void decodeAndFillBuffer(final byte[][] decodeInputs,
|
||||||
AlignedStripe alignedStripe, int dataBlkNum, int parityBlkNum) {
|
byte[] buf, AlignedStripe alignedStripe, int parityBlkNum,
|
||||||
|
RawErasureDecoder decoder) {
|
||||||
|
// Step 1: prepare indices and output buffers for missing data units
|
||||||
int[] decodeIndices = new int[parityBlkNum];
|
int[] decodeIndices = new int[parityBlkNum];
|
||||||
int pos = 0;
|
int pos = 0;
|
||||||
for (int i = 0; i < alignedStripe.chunks.length; i++) {
|
for (int i = 0; i < alignedStripe.chunks.length; i++) {
|
||||||
if (alignedStripe.chunks[i].state != StripingChunk.FETCHED &&
|
if (alignedStripe.chunks[i].state == StripingChunk.MISSING){
|
||||||
alignedStripe.chunks[i].state != StripingChunk.ALLZERO) {
|
|
||||||
decodeIndices[pos++] = i;
|
decodeIndices[pos++] = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
decodeIndices = Arrays.copyOf(decodeIndices, pos);
|
||||||
|
byte[][] decodeOutputs =
|
||||||
|
new byte[decodeIndices.length][(int) alignedStripe.getSpanInBlock()];
|
||||||
|
|
||||||
byte[][] outputs = new byte[parityBlkNum][(int) alignedStripe.getSpanInBlock()];
|
// Step 2: decode into prepared output buffers
|
||||||
RSRawDecoder rsRawDecoder = new RSRawDecoder(dataBlkNum, parityBlkNum);
|
decoder.decode(decodeInputs, decodeIndices, decodeOutputs);
|
||||||
rsRawDecoder.decode(decodeInputs, decodeIndices, outputs);
|
|
||||||
|
|
||||||
for (int i = 0; i < dataBlkNum + parityBlkNum; i++) {
|
// Step 3: fill original application buffer with decoded data
|
||||||
StripingChunk chunk = alignedStripe.chunks[i];
|
for (int i = 0; i < decodeIndices.length; i++) {
|
||||||
|
int missingBlkIdx = decodeIndices[i];
|
||||||
|
StripingChunk chunk = alignedStripe.chunks[missingBlkIdx];
|
||||||
if (chunk.state == StripingChunk.MISSING) {
|
if (chunk.state == StripingChunk.MISSING) {
|
||||||
int srcPos = 0;
|
int srcPos = 0;
|
||||||
for (int j = 0; j < chunk.offsetsInBuf.size(); j++) {
|
for (int j = 0; j < chunk.offsetsInBuf.size(); j++) {
|
||||||
//TODO: workaround (filling fixed bytes), to remove after HADOOP-11938
|
System.arraycopy(decodeOutputs[i], srcPos, buf, chunk.offsetsInBuf.get(j),
|
||||||
// System.arraycopy(outputs[i], srcPos, buf, chunk.offsetsInBuf.get(j),
|
chunk.lengthsInBuf.get(j));
|
||||||
// chunk.lengthsInBuf.get(j));
|
|
||||||
Arrays.fill(buf, chunk.offsetsInBuf.get(j),
|
|
||||||
chunk.offsetsInBuf.get(j) + chunk.lengthsInBuf.get(j), (byte)7);
|
|
||||||
srcPos += chunk.lengthsInBuf.get(j);
|
srcPos += chunk.lengthsInBuf.get(j);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -221,13 +221,13 @@ public class TestDFSStripedInputStream {
|
||||||
decodeInputs[DATA_BLK_NUM][k] = SimulatedFSDataset.simulatedByte(
|
decodeInputs[DATA_BLK_NUM][k] = SimulatedFSDataset.simulatedByte(
|
||||||
new Block(bg.getBlock().getBlockId() + DATA_BLK_NUM), posInBlk);
|
new Block(bg.getBlock().getBlockId() + DATA_BLK_NUM), posInBlk);
|
||||||
}
|
}
|
||||||
// RSRawDecoder rsRawDecoder = new RSRawDecoder();
|
for (int m : missingBlkIdx) {
|
||||||
// rsRawDecoder.initialize(DATA_BLK_NUM, PARITY_BLK_NUM, CELLSIZE);
|
decodeInputs[m] = null;
|
||||||
// rsRawDecoder.decode(decodeInputs, missingBlkIdx, decodeOutputs);
|
}
|
||||||
|
RSRawDecoder rsRawDecoder = new RSRawDecoder(DATA_BLK_NUM, PARITY_BLK_NUM);
|
||||||
|
rsRawDecoder.decode(decodeInputs, missingBlkIdx, decodeOutputs);
|
||||||
int posInBuf = i * CELLSIZE * DATA_BLK_NUM + failedDNIdx * CELLSIZE;
|
int posInBuf = i * CELLSIZE * DATA_BLK_NUM + failedDNIdx * CELLSIZE;
|
||||||
// System.arraycopy(decodeOutputs[0], 0, expected, posInBuf, CELLSIZE);
|
System.arraycopy(decodeOutputs[0], 0, expected, posInBuf, CELLSIZE);
|
||||||
//TODO: workaround (filling fixed bytes), to remove after HADOOP-11938
|
|
||||||
Arrays.fill(expected, posInBuf, posInBuf + CELLSIZE, (byte)7);
|
|
||||||
}
|
}
|
||||||
int delta = 10;
|
int delta = 10;
|
||||||
int done = 0;
|
int done = 0;
|
||||||
|
|
|
@ -382,16 +382,10 @@ public class TestWriteReadStripedFile {
|
||||||
Assert.assertEquals("The length of file should be the same to write size",
|
Assert.assertEquals("The length of file should be the same to write size",
|
||||||
length - startOffsetInFile, readLen);
|
length - startOffsetInFile, readLen);
|
||||||
|
|
||||||
RSRawDecoder rsRawDecoder = new RSRawDecoder(dataBlocks, parityBlocks);
|
|
||||||
byte[] expected = new byte[readLen];
|
byte[] expected = new byte[readLen];
|
||||||
for (int i = startOffsetInFile; i < length; i++) {
|
for (int i = startOffsetInFile; i < length; i++) {
|
||||||
//TODO: workaround (filling fixed bytes), to remove after HADOOP-11938
|
|
||||||
if ((i / cellSize) % dataBlocks == failedDNIdx) {
|
|
||||||
expected[i - startOffsetInFile] = (byte)7;
|
|
||||||
} else {
|
|
||||||
expected[i - startOffsetInFile] = getByte(i);
|
expected[i - startOffsetInFile] = getByte(i);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
for (int i = startOffsetInFile; i < length; i++) {
|
for (int i = startOffsetInFile; i < length; i++) {
|
||||||
Assert.assertEquals("Byte at " + i + " should be the same",
|
Assert.assertEquals("Byte at " + i + " should be the same",
|
||||||
expected[i - startOffsetInFile], buf[i - startOffsetInFile]);
|
expected[i - startOffsetInFile], buf[i - startOffsetInFile]);
|
||||||
|
|
Loading…
Reference in New Issue