HDFS-8901. Use ByteBuffer in striping positional read. Contributed by Sammi Chen and Kai Zheng.
This commit is contained in:
parent
20a20c2f6e
commit
401db4fc65
|
@ -304,7 +304,7 @@ public class DataChecksum implements Checksum {
|
||||||
bytesPerChecksum, checksums.array(), crcsOffset, fileName, basePos);
|
bytesPerChecksum, checksums.array(), crcsOffset, fileName, basePos);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (NativeCrc32.isAvailable()) {
|
if (NativeCrc32.isAvailable() && data.isDirect()) {
|
||||||
NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data,
|
NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data,
|
||||||
fileName, basePos);
|
fileName, basePos);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -533,7 +533,8 @@ public class DFSInputStream extends FSInputStream
|
||||||
* Open a DataInputStream to a DataNode so that it can be read from.
|
* Open a DataInputStream to a DataNode so that it can be read from.
|
||||||
* We get block ID and the IDs of the destinations at startup, from the namenode.
|
* We get block ID and the IDs of the destinations at startup, from the namenode.
|
||||||
*/
|
*/
|
||||||
private synchronized DatanodeInfo blockSeekTo(long target) throws IOException {
|
private synchronized DatanodeInfo blockSeekTo(long target)
|
||||||
|
throws IOException {
|
||||||
if (target >= getFileLength()) {
|
if (target >= getFileLength()) {
|
||||||
throw new IOException("Attempted to read past end of file");
|
throw new IOException("Attempted to read past end of file");
|
||||||
}
|
}
|
||||||
|
@ -962,14 +963,14 @@ public class DFSInputStream extends FSInputStream
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void fetchBlockByteRange(LocatedBlock block, long start, long end,
|
protected void fetchBlockByteRange(LocatedBlock block, long start, long end,
|
||||||
byte[] buf, int offset, CorruptedBlocks corruptedBlocks)
|
ByteBuffer buf, CorruptedBlocks corruptedBlocks)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
block = refreshLocatedBlock(block);
|
block = refreshLocatedBlock(block);
|
||||||
while (true) {
|
while (true) {
|
||||||
DNAddrPair addressPair = chooseDataNode(block, null);
|
DNAddrPair addressPair = chooseDataNode(block, null);
|
||||||
try {
|
try {
|
||||||
actualGetFromOneDataNode(addressPair, block, start, end,
|
actualGetFromOneDataNode(addressPair, block, start, end,
|
||||||
buf, offset, corruptedBlocks);
|
buf, corruptedBlocks);
|
||||||
return;
|
return;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
checkInterrupted(e); // check if the read has been interrupted
|
checkInterrupted(e); // check if the read has been interrupted
|
||||||
|
@ -988,12 +989,10 @@ public class DFSInputStream extends FSInputStream
|
||||||
return new Callable<ByteBuffer>() {
|
return new Callable<ByteBuffer>() {
|
||||||
@Override
|
@Override
|
||||||
public ByteBuffer call() throws Exception {
|
public ByteBuffer call() throws Exception {
|
||||||
byte[] buf = bb.array();
|
|
||||||
int offset = bb.position();
|
|
||||||
try (TraceScope ignored = dfsClient.getTracer().
|
try (TraceScope ignored = dfsClient.getTracer().
|
||||||
newScope("hedgedRead" + hedgedReadId, parentSpanId)) {
|
newScope("hedgedRead" + hedgedReadId, parentSpanId)) {
|
||||||
actualGetFromOneDataNode(datanode, block, start, end, buf,
|
actualGetFromOneDataNode(datanode, block, start, end, bb,
|
||||||
offset, corruptedBlocks);
|
corruptedBlocks);
|
||||||
return bb;
|
return bb;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1007,13 +1006,12 @@ public class DFSInputStream extends FSInputStream
|
||||||
* @param block the located block containing the requested data
|
* @param block the located block containing the requested data
|
||||||
* @param startInBlk the startInBlk offset of the block
|
* @param startInBlk the startInBlk offset of the block
|
||||||
* @param endInBlk the endInBlk offset of the block
|
* @param endInBlk the endInBlk offset of the block
|
||||||
* @param buf the given byte array into which the data is read
|
* @param buf the given byte buffer into which the data is read
|
||||||
* @param offset the offset in buf
|
|
||||||
* @param corruptedBlocks map recording list of datanodes with corrupted
|
* @param corruptedBlocks map recording list of datanodes with corrupted
|
||||||
* block replica
|
* block replica
|
||||||
*/
|
*/
|
||||||
void actualGetFromOneDataNode(final DNAddrPair datanode, LocatedBlock block,
|
void actualGetFromOneDataNode(final DNAddrPair datanode, LocatedBlock block,
|
||||||
final long startInBlk, final long endInBlk, byte[] buf, int offset,
|
final long startInBlk, final long endInBlk, ByteBuffer buf,
|
||||||
CorruptedBlocks corruptedBlocks)
|
CorruptedBlocks corruptedBlocks)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
DFSClientFaultInjector.get().startFetchFromDatanode();
|
DFSClientFaultInjector.get().startFetchFromDatanode();
|
||||||
|
@ -1031,7 +1029,22 @@ public class DFSInputStream extends FSInputStream
|
||||||
DFSClientFaultInjector.get().fetchFromDatanodeException();
|
DFSClientFaultInjector.get().fetchFromDatanodeException();
|
||||||
reader = getBlockReader(block, startInBlk, len, datanode.addr,
|
reader = getBlockReader(block, startInBlk, len, datanode.addr,
|
||||||
datanode.storageType, datanode.info);
|
datanode.storageType, datanode.info);
|
||||||
int nread = reader.readAll(buf, offset, len);
|
|
||||||
|
//Behave exactly as the readAll() call
|
||||||
|
ByteBuffer tmp = buf.duplicate();
|
||||||
|
tmp.limit(tmp.position() + len);
|
||||||
|
tmp = tmp.slice();
|
||||||
|
int nread = 0;
|
||||||
|
int ret;
|
||||||
|
while (true) {
|
||||||
|
ret = reader.read(tmp);
|
||||||
|
if (ret <= 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
nread += ret;
|
||||||
|
}
|
||||||
|
buf.position(buf.position() + nread);
|
||||||
|
|
||||||
IOUtilsClient.updateReadStatistics(readStatistics, nread, reader);
|
IOUtilsClient.updateReadStatistics(readStatistics, nread, reader);
|
||||||
dfsClient.updateFileSystemReadStats(
|
dfsClient.updateFileSystemReadStats(
|
||||||
reader.getNetworkDistance(), nread);
|
reader.getNetworkDistance(), nread);
|
||||||
|
@ -1098,7 +1111,7 @@ public class DFSInputStream extends FSInputStream
|
||||||
* time. We then wait on which ever read returns first.
|
* time. We then wait on which ever read returns first.
|
||||||
*/
|
*/
|
||||||
private void hedgedFetchBlockByteRange(LocatedBlock block, long start,
|
private void hedgedFetchBlockByteRange(LocatedBlock block, long start,
|
||||||
long end, byte[] buf, int offset, CorruptedBlocks corruptedBlocks)
|
long end, ByteBuffer buf, CorruptedBlocks corruptedBlocks)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final DfsClientConf conf = dfsClient.getConf();
|
final DfsClientConf conf = dfsClient.getConf();
|
||||||
ArrayList<Future<ByteBuffer>> futures = new ArrayList<>();
|
ArrayList<Future<ByteBuffer>> futures = new ArrayList<>();
|
||||||
|
@ -1130,8 +1143,8 @@ public class DFSInputStream extends FSInputStream
|
||||||
conf.getHedgedReadThresholdMillis(), TimeUnit.MILLISECONDS);
|
conf.getHedgedReadThresholdMillis(), TimeUnit.MILLISECONDS);
|
||||||
if (future != null) {
|
if (future != null) {
|
||||||
ByteBuffer result = future.get();
|
ByteBuffer result = future.get();
|
||||||
System.arraycopy(result.array(), result.position(), buf, offset,
|
result.flip();
|
||||||
len);
|
buf.put(result);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
DFSClient.LOG.debug("Waited {}ms to read from {}; spawning hedged "
|
DFSClient.LOG.debug("Waited {}ms to read from {}; spawning hedged "
|
||||||
|
@ -1173,8 +1186,8 @@ public class DFSInputStream extends FSInputStream
|
||||||
// cancel the rest.
|
// cancel the rest.
|
||||||
cancelAll(futures);
|
cancelAll(futures);
|
||||||
dfsClient.getHedgedReadMetrics().incHedgedReadWins();
|
dfsClient.getHedgedReadMetrics().incHedgedReadWins();
|
||||||
System.arraycopy(result.array(), result.position(), buf, offset,
|
result.flip();
|
||||||
len);
|
buf.put(result);
|
||||||
return;
|
return;
|
||||||
} catch (InterruptedException ie) {
|
} catch (InterruptedException ie) {
|
||||||
// Ignore and retry
|
// Ignore and retry
|
||||||
|
@ -1244,7 +1257,8 @@ public class DFSInputStream extends FSInputStream
|
||||||
* access key from its memory since it's considered expired based on
|
* access key from its memory since it's considered expired based on
|
||||||
* the estimated expiration date.
|
* the estimated expiration date.
|
||||||
*/
|
*/
|
||||||
if (ex instanceof InvalidBlockTokenException || ex instanceof InvalidToken) {
|
if (ex instanceof InvalidBlockTokenException ||
|
||||||
|
ex instanceof InvalidToken) {
|
||||||
DFSClient.LOG.info("Access token was invalid when connecting to "
|
DFSClient.LOG.info("Access token was invalid when connecting to "
|
||||||
+ targetAddr + " : " + ex);
|
+ targetAddr + " : " + ex);
|
||||||
return true;
|
return true;
|
||||||
|
@ -1272,7 +1286,8 @@ public class DFSInputStream extends FSInputStream
|
||||||
try (TraceScope scope = dfsClient.
|
try (TraceScope scope = dfsClient.
|
||||||
newReaderTraceScope("DFSInputStream#byteArrayPread",
|
newReaderTraceScope("DFSInputStream#byteArrayPread",
|
||||||
src, position, length)) {
|
src, position, length)) {
|
||||||
int retLen = pread(position, buffer, offset, length);
|
ByteBuffer bb = ByteBuffer.wrap(buffer, offset, length);
|
||||||
|
int retLen = pread(position, bb);
|
||||||
if (retLen < length) {
|
if (retLen < length) {
|
||||||
dfsClient.addRetLenToReaderScope(scope, retLen);
|
dfsClient.addRetLenToReaderScope(scope, retLen);
|
||||||
}
|
}
|
||||||
|
@ -1280,7 +1295,7 @@ public class DFSInputStream extends FSInputStream
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private int pread(long position, byte[] buffer, int offset, int length)
|
private int pread(long position, ByteBuffer buffer)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
// sanity checks
|
// sanity checks
|
||||||
dfsClient.checkOpen();
|
dfsClient.checkOpen();
|
||||||
|
@ -1292,6 +1307,7 @@ public class DFSInputStream extends FSInputStream
|
||||||
if ((position < 0) || (position >= filelen)) {
|
if ((position < 0) || (position >= filelen)) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
int length = buffer.remaining();
|
||||||
int realLen = length;
|
int realLen = length;
|
||||||
if ((position + length) > filelen) {
|
if ((position + length) > filelen) {
|
||||||
realLen = (int)(filelen - position);
|
realLen = (int)(filelen - position);
|
||||||
|
@ -1304,14 +1320,16 @@ public class DFSInputStream extends FSInputStream
|
||||||
CorruptedBlocks corruptedBlocks = new CorruptedBlocks();
|
CorruptedBlocks corruptedBlocks = new CorruptedBlocks();
|
||||||
for (LocatedBlock blk : blockRange) {
|
for (LocatedBlock blk : blockRange) {
|
||||||
long targetStart = position - blk.getStartOffset();
|
long targetStart = position - blk.getStartOffset();
|
||||||
long bytesToRead = Math.min(remaining, blk.getBlockSize() - targetStart);
|
int bytesToRead = (int) Math.min(remaining,
|
||||||
|
blk.getBlockSize() - targetStart);
|
||||||
|
long targetEnd = targetStart + bytesToRead - 1;
|
||||||
try {
|
try {
|
||||||
if (dfsClient.isHedgedReadsEnabled() && !blk.isStriped()) {
|
if (dfsClient.isHedgedReadsEnabled() && !blk.isStriped()) {
|
||||||
hedgedFetchBlockByteRange(blk, targetStart,
|
hedgedFetchBlockByteRange(blk, targetStart,
|
||||||
targetStart + bytesToRead - 1, buffer, offset, corruptedBlocks);
|
targetEnd, buffer, corruptedBlocks);
|
||||||
} else {
|
} else {
|
||||||
fetchBlockByteRange(blk, targetStart, targetStart + bytesToRead - 1,
|
fetchBlockByteRange(blk, targetStart, targetEnd,
|
||||||
buffer, offset, corruptedBlocks);
|
buffer, corruptedBlocks);
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
// Check and report if any block replicas are corrupted.
|
// Check and report if any block replicas are corrupted.
|
||||||
|
@ -1323,7 +1341,6 @@ public class DFSInputStream extends FSInputStream
|
||||||
|
|
||||||
remaining -= bytesToRead;
|
remaining -= bytesToRead;
|
||||||
position += bytesToRead;
|
position += bytesToRead;
|
||||||
offset += bytesToRead;
|
|
||||||
}
|
}
|
||||||
assert remaining == 0 : "Wrong number of bytes read.";
|
assert remaining == 0 : "Wrong number of bytes read.";
|
||||||
return realLen;
|
return realLen;
|
||||||
|
@ -1457,7 +1474,8 @@ public class DFSInputStream extends FSInputStream
|
||||||
* If another node could not be found, then returns false.
|
* If another node could not be found, then returns false.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public synchronized boolean seekToNewSource(long targetPos) throws IOException {
|
public synchronized boolean seekToNewSource(long targetPos)
|
||||||
|
throws IOException {
|
||||||
if (currentNode == null) {
|
if (currentNode == null) {
|
||||||
return seekToBlockSource(targetPos);
|
return seekToBlockSource(targetPos);
|
||||||
}
|
}
|
||||||
|
|
|
@ -307,8 +307,8 @@ public class DFSStripedInputStream extends DFSInputStream {
|
||||||
stripeLimit - stripeBufOffset);
|
stripeLimit - stripeBufOffset);
|
||||||
|
|
||||||
LocatedStripedBlock blockGroup = (LocatedStripedBlock) currentLocatedBlock;
|
LocatedStripedBlock blockGroup = (LocatedStripedBlock) currentLocatedBlock;
|
||||||
AlignedStripe[] stripes = StripedBlockUtil.divideOneStripe(ecPolicy, cellSize,
|
AlignedStripe[] stripes = StripedBlockUtil.divideOneStripe(ecPolicy,
|
||||||
blockGroup, offsetInBlockGroup,
|
cellSize, blockGroup, offsetInBlockGroup,
|
||||||
offsetInBlockGroup + stripeRange.length - 1, curStripeBuf);
|
offsetInBlockGroup + stripeRange.length - 1, curStripeBuf);
|
||||||
final LocatedBlock[] blks = StripedBlockUtil.parseStripedBlockGroup(
|
final LocatedBlock[] blks = StripedBlockUtil.parseStripedBlockGroup(
|
||||||
blockGroup, cellSize, dataBlkNum, parityBlkNum);
|
blockGroup, cellSize, dataBlkNum, parityBlkNum);
|
||||||
|
@ -523,13 +523,13 @@ public class DFSStripedInputStream extends DFSInputStream {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
protected void fetchBlockByteRange(LocatedBlock block, long start,
|
protected void fetchBlockByteRange(LocatedBlock block, long start,
|
||||||
long end, byte[] buf, int offset, CorruptedBlocks corruptedBlocks)
|
long end, ByteBuffer buf, CorruptedBlocks corruptedBlocks)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
// Refresh the striped block group
|
// Refresh the striped block group
|
||||||
LocatedStripedBlock blockGroup = getBlockGroupAt(block.getStartOffset());
|
LocatedStripedBlock blockGroup = getBlockGroupAt(block.getStartOffset());
|
||||||
|
|
||||||
AlignedStripe[] stripes = StripedBlockUtil.divideByteRangeIntoStripes(
|
AlignedStripe[] stripes = StripedBlockUtil.divideByteRangeIntoStripes(
|
||||||
ecPolicy, cellSize, blockGroup, start, end, buf, offset);
|
ecPolicy, cellSize, blockGroup, start, end, buf);
|
||||||
CompletionService<Void> readService = new ExecutorCompletionService<>(
|
CompletionService<Void> readService = new ExecutorCompletionService<>(
|
||||||
dfsClient.getStripedReadsThreadPool());
|
dfsClient.getStripedReadsThreadPool());
|
||||||
final LocatedBlock[] blks = StripedBlockUtil.parseStripedBlockGroup(
|
final LocatedBlock[] blks = StripedBlockUtil.parseStripedBlockGroup(
|
||||||
|
@ -542,6 +542,7 @@ public class DFSStripedInputStream extends DFSInputStream {
|
||||||
blks, preaderInfos, corruptedBlocks);
|
blks, preaderInfos, corruptedBlocks);
|
||||||
preader.readStripe();
|
preader.readStripe();
|
||||||
}
|
}
|
||||||
|
buf.position(buf.position() + (int)(end - start + 1));
|
||||||
} finally {
|
} finally {
|
||||||
for (BlockReaderInfo preaderInfo : preaderInfos) {
|
for (BlockReaderInfo preaderInfo : preaderInfos) {
|
||||||
closeReader(preaderInfo);
|
closeReader(preaderInfo);
|
||||||
|
@ -698,16 +699,15 @@ public class DFSStripedInputStream extends DFSInputStream {
|
||||||
}
|
}
|
||||||
|
|
||||||
private ByteBufferStrategy[] getReadStrategies(StripingChunk chunk) {
|
private ByteBufferStrategy[] getReadStrategies(StripingChunk chunk) {
|
||||||
if (chunk.byteBuffer != null) {
|
if (chunk.useByteBuffer()) {
|
||||||
ByteBufferStrategy strategy =
|
ByteBufferStrategy strategy = new ByteBufferStrategy(
|
||||||
new ByteBufferStrategy(chunk.byteBuffer, readStatistics, dfsClient);
|
chunk.getByteBuffer(), readStatistics, dfsClient);
|
||||||
return new ByteBufferStrategy[]{strategy};
|
return new ByteBufferStrategy[]{strategy};
|
||||||
} else {
|
} else {
|
||||||
ByteBufferStrategy[] strategies =
|
ByteBufferStrategy[] strategies =
|
||||||
new ByteBufferStrategy[chunk.byteArray.getOffsets().length];
|
new ByteBufferStrategy[chunk.getChunkBuffer().getSlices().size()];
|
||||||
for (int i = 0; i < strategies.length; i++) {
|
for (int i = 0; i < strategies.length; i++) {
|
||||||
ByteBuffer buffer = ByteBuffer.wrap(chunk.byteArray.buf(),
|
ByteBuffer buffer = chunk.getChunkBuffer().getSlice(i);
|
||||||
chunk.byteArray.getOffsets()[i], chunk.byteArray.getLengths()[i]);
|
|
||||||
strategies[i] =
|
strategies[i] =
|
||||||
new ByteBufferStrategy(buffer, readStatistics, dfsClient);
|
new ByteBufferStrategy(buffer, readStatistics, dfsClient);
|
||||||
}
|
}
|
||||||
|
@ -814,7 +814,7 @@ public class DFSStripedInputStream extends DFSInputStream {
|
||||||
}
|
}
|
||||||
|
|
||||||
class PositionStripeReader extends StripeReader {
|
class PositionStripeReader extends StripeReader {
|
||||||
private byte[][] decodeInputs = null;
|
private ByteBuffer[] decodeInputs = null;
|
||||||
|
|
||||||
PositionStripeReader(CompletionService<Void> service,
|
PositionStripeReader(CompletionService<Void> service,
|
||||||
AlignedStripe alignedStripe, LocatedBlock[] targetBlocks,
|
AlignedStripe alignedStripe, LocatedBlock[] targetBlocks,
|
||||||
|
@ -836,8 +836,6 @@ public class DFSStripedInputStream extends DFSInputStream {
|
||||||
Preconditions.checkState(index >= dataBlkNum &&
|
Preconditions.checkState(index >= dataBlkNum &&
|
||||||
alignedStripe.chunks[index] == null);
|
alignedStripe.chunks[index] == null);
|
||||||
alignedStripe.chunks[index] = new StripingChunk(decodeInputs[index]);
|
alignedStripe.chunks[index] = new StripingChunk(decodeInputs[index]);
|
||||||
alignedStripe.chunks[index].addByteArraySlice(0,
|
|
||||||
(int) alignedStripe.getSpanInBlock());
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -73,7 +73,8 @@ import java.util.concurrent.TimeUnit;
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
public class StripedBlockUtil {
|
public class StripedBlockUtil {
|
||||||
|
|
||||||
public static final Logger LOG = LoggerFactory.getLogger(StripedBlockUtil.class);
|
public static final Logger LOG =
|
||||||
|
LoggerFactory.getLogger(StripedBlockUtil.class);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parses a striped block group into individual blocks.
|
* Parses a striped block group into individual blocks.
|
||||||
|
@ -312,16 +313,17 @@ public class StripedBlockUtil {
|
||||||
* schedule a new fetch request with the decoding input buffer as transfer
|
* schedule a new fetch request with the decoding input buffer as transfer
|
||||||
* destination.
|
* destination.
|
||||||
*/
|
*/
|
||||||
public static byte[][] initDecodeInputs(AlignedStripe alignedStripe,
|
public static ByteBuffer[] initDecodeInputs(AlignedStripe alignedStripe,
|
||||||
int dataBlkNum, int parityBlkNum) {
|
int dataBlkNum, int parityBlkNum) {
|
||||||
byte[][] decodeInputs =
|
ByteBuffer[] decodeInputs = new ByteBuffer[dataBlkNum + parityBlkNum];
|
||||||
new byte[dataBlkNum + parityBlkNum][(int) alignedStripe.getSpanInBlock()];
|
for (int i = 0; i < decodeInputs.length; i++) {
|
||||||
|
decodeInputs[i] = ByteBuffer.allocate(
|
||||||
|
(int) alignedStripe.getSpanInBlock());
|
||||||
|
}
|
||||||
// read the full data aligned stripe
|
// read the full data aligned stripe
|
||||||
for (int i = 0; i < dataBlkNum; i++) {
|
for (int i = 0; i < dataBlkNum; i++) {
|
||||||
if (alignedStripe.chunks[i] == null) {
|
if (alignedStripe.chunks[i] == null) {
|
||||||
alignedStripe.chunks[i] = new StripingChunk(decodeInputs[i]);
|
alignedStripe.chunks[i] = new StripingChunk(decodeInputs[i]);
|
||||||
alignedStripe.chunks[i].addByteArraySlice(0,
|
|
||||||
(int) alignedStripe.getSpanInBlock());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return decodeInputs;
|
return decodeInputs;
|
||||||
|
@ -334,14 +336,21 @@ public class StripedBlockUtil {
|
||||||
* When all pending requests have returned, this method should be called to
|
* When all pending requests have returned, this method should be called to
|
||||||
* finalize decode input buffers.
|
* finalize decode input buffers.
|
||||||
*/
|
*/
|
||||||
public static void finalizeDecodeInputs(final byte[][] decodeInputs,
|
public static void finalizeDecodeInputs(final ByteBuffer[] decodeInputs,
|
||||||
AlignedStripe alignedStripe) {
|
AlignedStripe alignedStripe) {
|
||||||
for (int i = 0; i < alignedStripe.chunks.length; i++) {
|
for (int i = 0; i < alignedStripe.chunks.length; i++) {
|
||||||
final StripingChunk chunk = alignedStripe.chunks[i];
|
final StripingChunk chunk = alignedStripe.chunks[i];
|
||||||
if (chunk != null && chunk.state == StripingChunk.FETCHED) {
|
if (chunk != null && chunk.state == StripingChunk.FETCHED) {
|
||||||
chunk.copyTo(decodeInputs[i]);
|
if (chunk.useChunkBuffer()) {
|
||||||
|
chunk.getChunkBuffer().copyTo(decodeInputs[i]);
|
||||||
|
} else {
|
||||||
|
chunk.getByteBuffer().flip();
|
||||||
|
}
|
||||||
} else if (chunk != null && chunk.state == StripingChunk.ALLZERO) {
|
} else if (chunk != null && chunk.state == StripingChunk.ALLZERO) {
|
||||||
Arrays.fill(decodeInputs[i], (byte) 0);
|
//ZERO it. Will be better handled in other following issue.
|
||||||
|
byte[] emptyBytes = new byte[decodeInputs[i].limit()];
|
||||||
|
decodeInputs[i].put(emptyBytes);
|
||||||
|
decodeInputs[i].flip();
|
||||||
} else {
|
} else {
|
||||||
decodeInputs[i] = null;
|
decodeInputs[i] = null;
|
||||||
}
|
}
|
||||||
|
@ -351,7 +360,7 @@ public class StripedBlockUtil {
|
||||||
/**
|
/**
|
||||||
* Decode based on the given input buffers and erasure coding policy.
|
* Decode based on the given input buffers and erasure coding policy.
|
||||||
*/
|
*/
|
||||||
public static void decodeAndFillBuffer(final byte[][] decodeInputs,
|
public static void decodeAndFillBuffer(final ByteBuffer[] decodeInputs,
|
||||||
AlignedStripe alignedStripe, int dataBlkNum, int parityBlkNum,
|
AlignedStripe alignedStripe, int dataBlkNum, int parityBlkNum,
|
||||||
RawErasureDecoder decoder) {
|
RawErasureDecoder decoder) {
|
||||||
// Step 1: prepare indices and output buffers for missing data units
|
// Step 1: prepare indices and output buffers for missing data units
|
||||||
|
@ -364,8 +373,11 @@ public class StripedBlockUtil {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
decodeIndices = Arrays.copyOf(decodeIndices, pos);
|
decodeIndices = Arrays.copyOf(decodeIndices, pos);
|
||||||
byte[][] decodeOutputs =
|
ByteBuffer[] decodeOutputs = new ByteBuffer[decodeIndices.length];
|
||||||
new byte[decodeIndices.length][(int) alignedStripe.getSpanInBlock()];
|
for (int i = 0; i < decodeOutputs.length; i++) {
|
||||||
|
decodeOutputs[i] = ByteBuffer.allocate(
|
||||||
|
(int) alignedStripe.getSpanInBlock());
|
||||||
|
}
|
||||||
|
|
||||||
// Step 2: decode into prepared output buffers
|
// Step 2: decode into prepared output buffers
|
||||||
decoder.decode(decodeInputs, decodeIndices, decodeOutputs);
|
decoder.decode(decodeInputs, decodeIndices, decodeOutputs);
|
||||||
|
@ -374,8 +386,8 @@ public class StripedBlockUtil {
|
||||||
for (int i = 0; i < decodeIndices.length; i++) {
|
for (int i = 0; i < decodeIndices.length; i++) {
|
||||||
int missingBlkIdx = decodeIndices[i];
|
int missingBlkIdx = decodeIndices[i];
|
||||||
StripingChunk chunk = alignedStripe.chunks[missingBlkIdx];
|
StripingChunk chunk = alignedStripe.chunks[missingBlkIdx];
|
||||||
if (chunk.state == StripingChunk.MISSING) {
|
if (chunk.state == StripingChunk.MISSING && chunk.useChunkBuffer()) {
|
||||||
chunk.copyFrom(decodeOutputs[i]);
|
chunk.getChunkBuffer().copyFrom(decodeOutputs[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -402,7 +414,8 @@ public class StripedBlockUtil {
|
||||||
|
|
||||||
// Step 4: calculate each chunk's position in destination buffer. Since the
|
// Step 4: calculate each chunk's position in destination buffer. Since the
|
||||||
// whole read range is within a single stripe, the logic is simpler here.
|
// whole read range is within a single stripe, the logic is simpler here.
|
||||||
int bufOffset = (int) (rangeStartInBlockGroup % ((long) cellSize * dataBlkNum));
|
int bufOffset =
|
||||||
|
(int) (rangeStartInBlockGroup % ((long) cellSize * dataBlkNum));
|
||||||
for (StripingCell cell : cells) {
|
for (StripingCell cell : cells) {
|
||||||
long cellStart = cell.idxInInternalBlk * cellSize + cell.offset;
|
long cellStart = cell.idxInInternalBlk * cellSize + cell.offset;
|
||||||
long cellEnd = cellStart + cell.size - 1;
|
long cellEnd = cellStart + cell.size - 1;
|
||||||
|
@ -437,15 +450,14 @@ public class StripedBlockUtil {
|
||||||
* @param rangeStartInBlockGroup The byte range's start offset in block group
|
* @param rangeStartInBlockGroup The byte range's start offset in block group
|
||||||
* @param rangeEndInBlockGroup The byte range's end offset in block group
|
* @param rangeEndInBlockGroup The byte range's end offset in block group
|
||||||
* @param buf Destination buffer of the read operation for the byte range
|
* @param buf Destination buffer of the read operation for the byte range
|
||||||
* @param offsetInBuf Start offset into the destination buffer
|
|
||||||
*
|
*
|
||||||
* At most 5 stripes will be generated from each logical range, as
|
* At most 5 stripes will be generated from each logical range, as
|
||||||
* demonstrated in the header of {@link AlignedStripe}.
|
* demonstrated in the header of {@link AlignedStripe}.
|
||||||
*/
|
*/
|
||||||
public static AlignedStripe[] divideByteRangeIntoStripes(ErasureCodingPolicy ecPolicy,
|
public static AlignedStripe[] divideByteRangeIntoStripes(
|
||||||
|
ErasureCodingPolicy ecPolicy,
|
||||||
int cellSize, LocatedStripedBlock blockGroup,
|
int cellSize, LocatedStripedBlock blockGroup,
|
||||||
long rangeStartInBlockGroup, long rangeEndInBlockGroup, byte[] buf,
|
long rangeStartInBlockGroup, long rangeEndInBlockGroup, ByteBuffer buf) {
|
||||||
int offsetInBuf) {
|
|
||||||
|
|
||||||
// Step 0: analyze range and calculate basic parameters
|
// Step 0: analyze range and calculate basic parameters
|
||||||
final int dataBlkNum = ecPolicy.getNumDataUnits();
|
final int dataBlkNum = ecPolicy.getNumDataUnits();
|
||||||
|
@ -462,7 +474,7 @@ public class StripedBlockUtil {
|
||||||
AlignedStripe[] stripes = mergeRangesForInternalBlocks(ecPolicy, ranges);
|
AlignedStripe[] stripes = mergeRangesForInternalBlocks(ecPolicy, ranges);
|
||||||
|
|
||||||
// Step 4: calculate each chunk's position in destination buffer
|
// Step 4: calculate each chunk's position in destination buffer
|
||||||
calcualteChunkPositionsInBuf(cellSize, stripes, cells, buf, offsetInBuf);
|
calcualteChunkPositionsInBuf(cellSize, stripes, cells, buf);
|
||||||
|
|
||||||
// Step 5: prepare ALLZERO blocks
|
// Step 5: prepare ALLZERO blocks
|
||||||
prepareAllZeroChunks(blockGroup, stripes, cellSize, dataBlkNum);
|
prepareAllZeroChunks(blockGroup, stripes, cellSize, dataBlkNum);
|
||||||
|
@ -476,7 +488,8 @@ public class StripedBlockUtil {
|
||||||
* used by {@link DFSStripedOutputStream} in encoding
|
* used by {@link DFSStripedOutputStream} in encoding
|
||||||
*/
|
*/
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
private static StripingCell[] getStripingCellsOfByteRange(ErasureCodingPolicy ecPolicy,
|
private static StripingCell[] getStripingCellsOfByteRange(
|
||||||
|
ErasureCodingPolicy ecPolicy,
|
||||||
int cellSize, LocatedStripedBlock blockGroup,
|
int cellSize, LocatedStripedBlock blockGroup,
|
||||||
long rangeStartInBlockGroup, long rangeEndInBlockGroup) {
|
long rangeStartInBlockGroup, long rangeEndInBlockGroup) {
|
||||||
Preconditions.checkArgument(
|
Preconditions.checkArgument(
|
||||||
|
@ -511,7 +524,8 @@ public class StripedBlockUtil {
|
||||||
* the physical byte range (inclusive) on each stored internal block.
|
* the physical byte range (inclusive) on each stored internal block.
|
||||||
*/
|
*/
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
private static VerticalRange[] getRangesForInternalBlocks(ErasureCodingPolicy ecPolicy,
|
private static VerticalRange[] getRangesForInternalBlocks(
|
||||||
|
ErasureCodingPolicy ecPolicy,
|
||||||
int cellSize, StripingCell[] cells) {
|
int cellSize, StripingCell[] cells) {
|
||||||
int dataBlkNum = ecPolicy.getNumDataUnits();
|
int dataBlkNum = ecPolicy.getNumDataUnits();
|
||||||
int parityBlkNum = ecPolicy.getNumParityUnits();
|
int parityBlkNum = ecPolicy.getNumParityUnits();
|
||||||
|
@ -575,8 +589,7 @@ public class StripedBlockUtil {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void calcualteChunkPositionsInBuf(int cellSize,
|
private static void calcualteChunkPositionsInBuf(int cellSize,
|
||||||
AlignedStripe[] stripes, StripingCell[] cells, byte[] buf,
|
AlignedStripe[] stripes, StripingCell[] cells, ByteBuffer buf) {
|
||||||
int offsetInBuf) {
|
|
||||||
/**
|
/**
|
||||||
* | <--------------- AlignedStripe --------------->|
|
* | <--------------- AlignedStripe --------------->|
|
||||||
*
|
*
|
||||||
|
@ -598,6 +611,7 @@ public class StripedBlockUtil {
|
||||||
for (StripingCell cell : cells) {
|
for (StripingCell cell : cells) {
|
||||||
long cellStart = cell.idxInInternalBlk * cellSize + cell.offset;
|
long cellStart = cell.idxInInternalBlk * cellSize + cell.offset;
|
||||||
long cellEnd = cellStart + cell.size - 1;
|
long cellEnd = cellStart + cell.size - 1;
|
||||||
|
StripingChunk chunk;
|
||||||
for (AlignedStripe s : stripes) {
|
for (AlignedStripe s : stripes) {
|
||||||
long stripeEnd = s.getOffsetInBlock() + s.getSpanInBlock() - 1;
|
long stripeEnd = s.getOffsetInBlock() + s.getSpanInBlock() - 1;
|
||||||
long overlapStart = Math.max(cellStart, s.getOffsetInBlock());
|
long overlapStart = Math.max(cellStart, s.getOffsetInBlock());
|
||||||
|
@ -606,11 +620,13 @@ public class StripedBlockUtil {
|
||||||
if (overLapLen <= 0) {
|
if (overLapLen <= 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (s.chunks[cell.idxInStripe] == null) {
|
chunk = s.chunks[cell.idxInStripe];
|
||||||
s.chunks[cell.idxInStripe] = new StripingChunk(buf);
|
if (chunk == null) {
|
||||||
|
chunk = new StripingChunk();
|
||||||
|
s.chunks[cell.idxInStripe] = chunk;
|
||||||
}
|
}
|
||||||
s.chunks[cell.idxInStripe].addByteArraySlice(
|
chunk.getChunkBuffer().addSlice(buf,
|
||||||
(int)(offsetInBuf + done + overlapStart - cellStart), overLapLen);
|
(int) (done + overlapStart - cellStart), overLapLen);
|
||||||
}
|
}
|
||||||
done += cell.size;
|
done += cell.size;
|
||||||
}
|
}
|
||||||
|
@ -833,88 +849,89 @@ public class StripedBlockUtil {
|
||||||
*/
|
*/
|
||||||
public int state = REQUESTED;
|
public int state = REQUESTED;
|
||||||
|
|
||||||
public final ChunkByteArray byteArray;
|
private final ChunkByteBuffer chunkBuffer;
|
||||||
public final ByteBuffer byteBuffer;
|
private final ByteBuffer byteBuffer;
|
||||||
|
|
||||||
public StripingChunk(byte[] buf) {
|
public StripingChunk() {
|
||||||
this.byteArray = new ChunkByteArray(buf);
|
this.chunkBuffer = new ChunkByteBuffer();
|
||||||
byteBuffer = null;
|
byteBuffer = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public StripingChunk(ByteBuffer buf) {
|
public StripingChunk(ByteBuffer buf) {
|
||||||
this.byteArray = null;
|
this.chunkBuffer = null;
|
||||||
this.byteBuffer = buf;
|
this.byteBuffer = buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
public StripingChunk(int state) {
|
public StripingChunk(int state) {
|
||||||
this.byteArray = null;
|
this.chunkBuffer = null;
|
||||||
this.byteBuffer = null;
|
this.byteBuffer = null;
|
||||||
this.state = state;
|
this.state = state;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addByteArraySlice(int offset, int length) {
|
public boolean useByteBuffer(){
|
||||||
assert byteArray != null;
|
return byteBuffer != null;
|
||||||
byteArray.offsetsInBuf.add(offset);
|
|
||||||
byteArray.lengthsInBuf.add(length);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void copyTo(byte[] target) {
|
public boolean useChunkBuffer() {
|
||||||
assert byteArray != null;
|
return chunkBuffer != null;
|
||||||
byteArray.copyTo(target);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void copyFrom(byte[] src) {
|
public ByteBuffer getByteBuffer() {
|
||||||
assert byteArray != null;
|
assert byteBuffer != null;
|
||||||
byteArray.copyFrom(src);
|
return byteBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ChunkByteBuffer getChunkBuffer() {
|
||||||
|
assert chunkBuffer != null;
|
||||||
|
return chunkBuffer;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class ChunkByteArray {
|
/**
|
||||||
private final byte[] buf;
|
* A utility to manage ByteBuffer slices for a reader.
|
||||||
private final List<Integer> offsetsInBuf;
|
*/
|
||||||
private final List<Integer> lengthsInBuf;
|
public static class ChunkByteBuffer {
|
||||||
|
private final List<ByteBuffer> slices;
|
||||||
|
|
||||||
ChunkByteArray(byte[] buf) {
|
ChunkByteBuffer() {
|
||||||
this.buf = buf;
|
this.slices = new ArrayList<>();
|
||||||
this.offsetsInBuf = new ArrayList<>();
|
|
||||||
this.lengthsInBuf = new ArrayList<>();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int[] getOffsets() {
|
public void addSlice(ByteBuffer buffer, int offset, int len) {
|
||||||
int[] offsets = new int[offsetsInBuf.size()];
|
ByteBuffer tmp = buffer.duplicate();
|
||||||
for (int i = 0; i < offsets.length; i++) {
|
tmp.position(buffer.position() + offset);
|
||||||
offsets[i] = offsetsInBuf.get(i);
|
tmp.limit(buffer.position() + offset + len);
|
||||||
}
|
slices.add(tmp.slice());
|
||||||
return offsets;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int[] getLengths() {
|
public ByteBuffer getSlice(int i) {
|
||||||
int[] lens = new int[this.lengthsInBuf.size()];
|
return slices.get(i);
|
||||||
for (int i = 0; i < lens.length; i++) {
|
|
||||||
lens[i] = this.lengthsInBuf.get(i);
|
|
||||||
}
|
|
||||||
return lens;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] buf() {
|
public List<ByteBuffer> getSlices() {
|
||||||
return buf;
|
return slices;
|
||||||
}
|
}
|
||||||
|
|
||||||
void copyTo(byte[] target) {
|
/**
|
||||||
int posInBuf = 0;
|
* Note: target will be ready-to-read state after the call.
|
||||||
for (int i = 0; i < offsetsInBuf.size(); i++) {
|
*/
|
||||||
System.arraycopy(buf, offsetsInBuf.get(i),
|
void copyTo(ByteBuffer target) {
|
||||||
target, posInBuf, lengthsInBuf.get(i));
|
for (ByteBuffer slice : slices) {
|
||||||
posInBuf += lengthsInBuf.get(i);
|
slice.flip();
|
||||||
|
target.put(slice);
|
||||||
}
|
}
|
||||||
|
target.flip();
|
||||||
}
|
}
|
||||||
|
|
||||||
void copyFrom(byte[] src) {
|
void copyFrom(ByteBuffer src) {
|
||||||
int srcPos = 0;
|
ByteBuffer tmp;
|
||||||
for (int j = 0; j < offsetsInBuf.size(); j++) {
|
int len;
|
||||||
System.arraycopy(src, srcPos, buf, offsetsInBuf.get(j),
|
for (ByteBuffer slice : slices) {
|
||||||
lengthsInBuf.get(j));
|
len = slice.remaining();
|
||||||
srcPos += lengthsInBuf.get(j);
|
tmp = src.duplicate();
|
||||||
|
tmp.limit(tmp.position() + len);
|
||||||
|
slice.put(tmp);
|
||||||
|
src.position(src.position() + len);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,7 +57,8 @@ import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
public class TestDFSStripedInputStream {
|
public class TestDFSStripedInputStream {
|
||||||
|
|
||||||
public static final Log LOG = LogFactory.getLog(TestDFSStripedInputStream.class);
|
public static final Log LOG =
|
||||||
|
LogFactory.getLog(TestDFSStripedInputStream.class);
|
||||||
|
|
||||||
private MiniDFSCluster cluster;
|
private MiniDFSCluster cluster;
|
||||||
private Configuration conf = new Configuration();
|
private Configuration conf = new Configuration();
|
||||||
|
@ -272,12 +273,16 @@ public class TestDFSStripedInputStream {
|
||||||
// |10 |
|
// |10 |
|
||||||
done += in.read(0, readBuffer, 0, delta);
|
done += in.read(0, readBuffer, 0, delta);
|
||||||
assertEquals(delta, done);
|
assertEquals(delta, done);
|
||||||
|
assertArrayEquals(Arrays.copyOf(expected, done),
|
||||||
|
Arrays.copyOf(readBuffer, done));
|
||||||
// both head and trail cells are partial
|
// both head and trail cells are partial
|
||||||
// |c_0 |c_1 |c_2 |c_3 |c_4 |c_5 |
|
// |c_0 |c_1 |c_2 |c_3 |c_4 |c_5 |
|
||||||
// |256K - 10|missing|256K|256K|256K - 10|not in range|
|
// |256K - 10|missing|256K|256K|256K - 10|not in range|
|
||||||
done += in.read(delta, readBuffer, delta,
|
done += in.read(delta, readBuffer, delta,
|
||||||
CELLSIZE * (DATA_BLK_NUM - 1) - 2 * delta);
|
CELLSIZE * (DATA_BLK_NUM - 1) - 2 * delta);
|
||||||
assertEquals(CELLSIZE * (DATA_BLK_NUM - 1) - delta, done);
|
assertEquals(CELLSIZE * (DATA_BLK_NUM - 1) - delta, done);
|
||||||
|
assertArrayEquals(Arrays.copyOf(expected, done),
|
||||||
|
Arrays.copyOf(readBuffer, done));
|
||||||
// read the rest
|
// read the rest
|
||||||
done += in.read(done, readBuffer, done, readSize - done);
|
done += in.read(done, readBuffer, done, readSize - done);
|
||||||
assertEquals(readSize, done);
|
assertEquals(readSize, done);
|
||||||
|
@ -291,8 +296,8 @@ public class TestDFSStripedInputStream {
|
||||||
testStatefulRead(true, true);
|
testStatefulRead(true, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void testStatefulRead(boolean useByteBuffer, boolean cellMisalignPacket)
|
private void testStatefulRead(boolean useByteBuffer,
|
||||||
throws Exception {
|
boolean cellMisalignPacket) throws Exception {
|
||||||
final int numBlocks = 2;
|
final int numBlocks = 2;
|
||||||
final int fileSize = numBlocks * BLOCK_GROUP_SIZE;
|
final int fileSize = numBlocks * BLOCK_GROUP_SIZE;
|
||||||
if (cellMisalignPacket) {
|
if (cellMisalignPacket) {
|
||||||
|
@ -302,7 +307,8 @@ public class TestDFSStripedInputStream {
|
||||||
}
|
}
|
||||||
DFSTestUtil.createStripedFile(cluster, filePath, null, numBlocks,
|
DFSTestUtil.createStripedFile(cluster, filePath, null, numBlocks,
|
||||||
NUM_STRIPE_PER_BLOCK, false);
|
NUM_STRIPE_PER_BLOCK, false);
|
||||||
LocatedBlocks lbs = fs.getClient().namenode.getBlockLocations(filePath.toString(), 0, fileSize);
|
LocatedBlocks lbs = fs.getClient().namenode.
|
||||||
|
getBlockLocations(filePath.toString(), 0, fileSize);
|
||||||
|
|
||||||
assert lbs.getLocatedBlocks().size() == numBlocks;
|
assert lbs.getLocatedBlocks().size() == numBlocks;
|
||||||
for (LocatedBlock lb : lbs.getLocatedBlocks()) {
|
for (LocatedBlock lb : lbs.getLocatedBlocks()) {
|
||||||
|
@ -360,4 +366,111 @@ public class TestDFSStripedInputStream {
|
||||||
}
|
}
|
||||||
fs.delete(filePath, true);
|
fs.delete(filePath, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testStatefulReadWithDNFailure() throws Exception {
|
||||||
|
final int numBlocks = 4;
|
||||||
|
final int failedDNIdx = DATA_BLK_NUM - 1;
|
||||||
|
DFSTestUtil.createStripedFile(cluster, filePath, null, numBlocks,
|
||||||
|
NUM_STRIPE_PER_BLOCK, false);
|
||||||
|
LocatedBlocks lbs = fs.getClient().namenode.getBlockLocations(
|
||||||
|
filePath.toString(), 0, BLOCK_GROUP_SIZE);
|
||||||
|
|
||||||
|
assert lbs.get(0) instanceof LocatedStripedBlock;
|
||||||
|
LocatedStripedBlock bg = (LocatedStripedBlock) (lbs.get(0));
|
||||||
|
for (int i = 0; i < DATA_BLK_NUM + PARITY_BLK_NUM; i++) {
|
||||||
|
Block blk = new Block(bg.getBlock().getBlockId() + i,
|
||||||
|
NUM_STRIPE_PER_BLOCK * CELLSIZE,
|
||||||
|
bg.getBlock().getGenerationStamp());
|
||||||
|
blk.setGenerationStamp(bg.getBlock().getGenerationStamp());
|
||||||
|
cluster.injectBlocks(i, Arrays.asList(blk),
|
||||||
|
bg.getBlock().getBlockPoolId());
|
||||||
|
}
|
||||||
|
DFSStripedInputStream in =
|
||||||
|
new DFSStripedInputStream(fs.getClient(), filePath.toString(), false,
|
||||||
|
ecPolicy, null);
|
||||||
|
int readSize = BLOCK_GROUP_SIZE;
|
||||||
|
byte[] readBuffer = new byte[readSize];
|
||||||
|
byte[] expected = new byte[readSize];
|
||||||
|
/** A variation of {@link DFSTestUtil#fillExpectedBuf} for striped blocks */
|
||||||
|
for (int i = 0; i < NUM_STRIPE_PER_BLOCK; i++) {
|
||||||
|
for (int j = 0; j < DATA_BLK_NUM; j++) {
|
||||||
|
for (int k = 0; k < CELLSIZE; k++) {
|
||||||
|
int posInBlk = i * CELLSIZE + k;
|
||||||
|
int posInFile = i * CELLSIZE * DATA_BLK_NUM + j * CELLSIZE + k;
|
||||||
|
expected[posInFile] = SimulatedFSDataset.simulatedByte(
|
||||||
|
new Block(bg.getBlock().getBlockId() + j), posInBlk);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ErasureCoderOptions coderOptions = new ErasureCoderOptions(
|
||||||
|
DATA_BLK_NUM, PARITY_BLK_NUM);
|
||||||
|
RawErasureDecoder rawDecoder = CodecUtil.createRawDecoder(conf,
|
||||||
|
ecPolicy.getCodecName(), coderOptions);
|
||||||
|
|
||||||
|
// Update the expected content for decoded data
|
||||||
|
int[] missingBlkIdx = new int[PARITY_BLK_NUM];
|
||||||
|
for (int i = 0; i < missingBlkIdx.length; i++) {
|
||||||
|
if (i == 0) {
|
||||||
|
missingBlkIdx[i] = failedDNIdx;
|
||||||
|
} else {
|
||||||
|
missingBlkIdx[i] = DATA_BLK_NUM + i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cluster.stopDataNode(failedDNIdx);
|
||||||
|
for (int i = 0; i < NUM_STRIPE_PER_BLOCK; i++) {
|
||||||
|
byte[][] decodeInputs = new byte[DATA_BLK_NUM + PARITY_BLK_NUM][CELLSIZE];
|
||||||
|
byte[][] decodeOutputs = new byte[missingBlkIdx.length][CELLSIZE];
|
||||||
|
for (int j = 0; j < DATA_BLK_NUM; j++) {
|
||||||
|
int posInBuf = i * CELLSIZE * DATA_BLK_NUM + j * CELLSIZE;
|
||||||
|
if (j != failedDNIdx) {
|
||||||
|
System.arraycopy(expected, posInBuf, decodeInputs[j], 0, CELLSIZE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int j = DATA_BLK_NUM; j < DATA_BLK_NUM + PARITY_BLK_NUM; j++) {
|
||||||
|
for (int k = 0; k < CELLSIZE; k++) {
|
||||||
|
int posInBlk = i * CELLSIZE + k;
|
||||||
|
decodeInputs[j][k] = SimulatedFSDataset.simulatedByte(
|
||||||
|
new Block(bg.getBlock().getBlockId() + j), posInBlk);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int m : missingBlkIdx) {
|
||||||
|
decodeInputs[m] = null;
|
||||||
|
}
|
||||||
|
rawDecoder.decode(decodeInputs, missingBlkIdx, decodeOutputs);
|
||||||
|
int posInBuf = i * CELLSIZE * DATA_BLK_NUM + failedDNIdx * CELLSIZE;
|
||||||
|
System.arraycopy(decodeOutputs[0], 0, expected, posInBuf, CELLSIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
int delta = 10;
|
||||||
|
int done = 0;
|
||||||
|
// read a small delta, shouldn't trigger decode
|
||||||
|
// |cell_0 |
|
||||||
|
// |10 |
|
||||||
|
done += in.read(readBuffer, 0, delta);
|
||||||
|
assertEquals(delta, done);
|
||||||
|
// both head and trail cells are partial
|
||||||
|
// |c_0 |c_1 |c_2 |c_3 |c_4 |c_5 |
|
||||||
|
// |256K - 10|missing|256K|256K|256K - 10|not in range|
|
||||||
|
while (done < (CELLSIZE * (DATA_BLK_NUM - 1) - 2 * delta)) {
|
||||||
|
int ret = in.read(readBuffer, delta,
|
||||||
|
CELLSIZE * (DATA_BLK_NUM - 1) - 2 * delta);
|
||||||
|
assertTrue(ret > 0);
|
||||||
|
done += ret;
|
||||||
|
}
|
||||||
|
assertEquals(CELLSIZE * (DATA_BLK_NUM - 1) - delta, done);
|
||||||
|
// read the rest
|
||||||
|
|
||||||
|
int restSize;
|
||||||
|
restSize = readSize - done;
|
||||||
|
while (done < restSize) {
|
||||||
|
int ret = in.read(readBuffer, done, restSize);
|
||||||
|
assertTrue(ret > 0);
|
||||||
|
done += ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(readSize, done);
|
||||||
|
assertArrayEquals(expected, readBuffer);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,6 +36,7 @@ import org.junit.Rule;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.rules.Timeout;
|
import org.junit.rules.Timeout;
|
||||||
|
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
|
@ -242,7 +243,8 @@ public class TestStripedBlockUtil {
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testDivideByteRangeIntoStripes() {
|
public void testDivideByteRangeIntoStripes() {
|
||||||
byte[] assembled = new byte[BLK_GROUP_STRIPE_NUM * FULL_STRIPE_SIZE];
|
ByteBuffer assembled =
|
||||||
|
ByteBuffer.allocate(BLK_GROUP_STRIPE_NUM * FULL_STRIPE_SIZE);
|
||||||
for (int bgSize : blockGroupSizes) {
|
for (int bgSize : blockGroupSizes) {
|
||||||
LocatedStripedBlock blockGroup = createDummyLocatedBlock(bgSize);
|
LocatedStripedBlock blockGroup = createDummyLocatedBlock(bgSize);
|
||||||
byte[][] internalBlkBufs = createInternalBlkBuffers(bgSize);
|
byte[][] internalBlkBufs = createInternalBlkBuffers(bgSize);
|
||||||
|
@ -252,7 +254,7 @@ public class TestStripedBlockUtil {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
AlignedStripe[] stripes = divideByteRangeIntoStripes(EC_POLICY,
|
AlignedStripe[] stripes = divideByteRangeIntoStripes(EC_POLICY,
|
||||||
CELLSIZE, blockGroup, brStart, brStart + brSize - 1, assembled, 0);
|
CELLSIZE, blockGroup, brStart, brStart + brSize - 1, assembled);
|
||||||
|
|
||||||
for (AlignedStripe stripe : stripes) {
|
for (AlignedStripe stripe : stripes) {
|
||||||
for (int i = 0; i < DATA_BLK_NUM; i++) {
|
for (int i = 0; i < DATA_BLK_NUM; i++) {
|
||||||
|
@ -261,21 +263,21 @@ public class TestStripedBlockUtil {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
int done = 0;
|
int done = 0;
|
||||||
for (int j = 0; j < chunk.byteArray.getLengths().length; j++) {
|
int len;
|
||||||
System.arraycopy(internalBlkBufs[i],
|
for (ByteBuffer slice : chunk.getChunkBuffer().getSlices()) {
|
||||||
(int) stripe.getOffsetInBlock() + done, assembled,
|
len = slice.remaining();
|
||||||
chunk.byteArray.getOffsets()[j],
|
slice.put(internalBlkBufs[i],
|
||||||
chunk.byteArray.getLengths()[j]);
|
(int) stripe.getOffsetInBlock() + done, len);
|
||||||
done += chunk.byteArray.getLengths()[j];
|
done += len;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (int i = 0; i < brSize; i++) {
|
for (int i = 0; i < brSize; i++) {
|
||||||
if (hashIntToByte(brStart + i) != assembled[i]) {
|
if (hashIntToByte(brStart + i) != assembled.get(i)) {
|
||||||
System.out.println("Oops");
|
System.out.println("Oops");
|
||||||
}
|
}
|
||||||
assertEquals("Byte at " + (brStart + i) + " should be the same",
|
assertEquals("Byte at " + (brStart + i) + " should be the same",
|
||||||
hashIntToByte(brStart + i), assembled[i]);
|
hashIntToByte(brStart + i), assembled.get(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue