HDFS-8481. Erasure coding: remove workarounds in client side stripped blocks recovering. Contributed by Zhe Zhang.

2015-06-01 09:37:29 -07:00 · 2015-06-01 09:37:29 -07:00 · 014bd32c58
parent 1299357a05
commit 014bd32c58
5 changed files with 60 additions and 36 deletions
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt
@ -262,3 +262,6 @@
    HDFS-8479. Erasure coding: fix striping related logic in FSDirWriteFileOp to
    sync with HDFS-8421. (Zhe Zhang via jing9)
    HDFS-8481. Erasure coding: remove workarounds in client side stripped blocks 
    recovering. (zhz)
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSStripedInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSStripedInputStream.java
@ -31,6 +31,7 @@ import org.apache.hadoop.io.ByteBufferPool;
 import static org.apache.hadoop.hdfs.util.StripedBlockUtil.divideByteRangeIntoStripes;
 import static org.apache.hadoop.hdfs.util.StripedBlockUtil.initDecodeInputs;
 import static org.apache.hadoop.hdfs.util.StripedBlockUtil.finalizeDecodeInputs;
 import static org.apache.hadoop.hdfs.util.StripedBlockUtil.decodeAndFillBuffer;
 import static org.apache.hadoop.hdfs.util.StripedBlockUtil.getNextCompletedStripedRead;
 import static org.apache.hadoop.hdfs.util.StripedBlockUtil.getStartOffsetsForInternalBlocks;
@ -41,6 +42,8 @@ import static org.apache.hadoop.hdfs.util.StripedBlockUtil.StripingChunkReadResu
 import org.apache.hadoop.io.erasurecode.ECSchema;
 import org.apache.hadoop.io.erasurecode.rawcoder.RSRawDecoder;
 import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.htrace.Span;
 import org.apache.htrace.Trace;
@ -117,6 +120,8 @@ public class DFSStripedInputStream extends DFSInputStream {
  /** the buffer for a complete stripe */
  private ByteBuffer curStripeBuf;
  private final ECSchema schema;
  private final RawErasureDecoder decoder;
  /**
   * indicate the start/end offset of the current buffered stripe in the
   * block group
@ -139,6 +144,7 @@ public class DFSStripedInputStream extends DFSInputStream {
    curStripeRange = new StripeRange(0, 0);
    readingService =
        new ExecutorCompletionService<>(dfsClient.getStripedReadsThreadPool());
    decoder = new RSRawDecoder(dataBlkNum, parityBlkNum);
    if (DFSClient.LOG.isDebugEnabled()) {
      DFSClient.LOG.debug("Creating an striped input stream for file " + src);
    }
@ -591,8 +597,9 @@ public class DFSStripedInputStream extends DFSInputStream {
    }
    if (alignedStripe.missingChunksNum > 0) {
-      decodeAndFillBuffer(decodeInputs, buf, alignedStripe,
+      finalizeDecodeInputs(decodeInputs, alignedStripe);
-          dataBlkNum, parityBlkNum);
+      decodeAndFillBuffer(decodeInputs, buf, alignedStripe, parityBlkNum,
          decoder);
    }
  }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/StripedBlockUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/StripedBlockUtil.java
@ -32,6 +32,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedStripedBlock;
 import com.google.common.base.Preconditions;
 import org.apache.hadoop.io.erasurecode.ECSchema;
 import org.apache.hadoop.io.erasurecode.rawcoder.RSRawDecoder;
 import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder;
 import java.util.*;
 import java.io.IOException;
@ -246,19 +247,36 @@ public class StripedBlockUtil {
  /**
   * Initialize the decoding input buffers based on the chunk states in an
-   * AlignedStripe
+   * {@link AlignedStripe}. For each chunk that was not initially requested,
   * schedule a new fetch request with the decoding input buffer as transfer
   * destination.
   */
  public static byte[][] initDecodeInputs(AlignedStripe alignedStripe,
      int dataBlkNum, int parityBlkNum) {
    byte[][] decodeInputs =
        new byte[dataBlkNum + parityBlkNum][(int) alignedStripe.getSpanInBlock()];
    for (int i = 0; i < alignedStripe.chunks.length; i++) {
-      StripingChunk chunk = alignedStripe.chunks[i];
+      if (alignedStripe.chunks[i] == null) {
      if (chunk == null) {
        alignedStripe.chunks[i] = new StripingChunk(decodeInputs[i]);
        alignedStripe.chunks[i].offsetsInBuf.add(0);
        alignedStripe.chunks[i].lengthsInBuf.add((int) alignedStripe.getSpanInBlock());
-      } else if (chunk.state == StripingChunk.FETCHED) {
+      }
    }
    return decodeInputs;
  }
  /**
   * Some fetched {@link StripingChunk} might be stored in original application
   * buffer instead of prepared decode input buffers. Some others are beyond
   * the range of the internal blocks and should correspond to all zero bytes.
   * When all pending requests have returned, this method should be called to
   * finalize decode input buffers.
   */
  public static void finalizeDecodeInputs(final byte[][] decodeInputs,
      AlignedStripe alignedStripe) {
    for (int i = 0; i < alignedStripe.chunks.length; i++) {
      StripingChunk chunk = alignedStripe.chunks[i];
      if (chunk.state == StripingChunk.FETCHED) {
        int posInBuf = 0;
        for (int j = 0; j < chunk.offsetsInBuf.size(); j++) {
          System.arraycopy(chunk.buf, chunk.offsetsInBuf.get(j),
@ -267,39 +285,41 @@ public class StripedBlockUtil {
        }
      } else if (chunk.state == StripingChunk.ALLZERO) {
        Arrays.fill(decodeInputs[i], (byte)0);
      } else {
        decodeInputs[i] = null;
      }
    }
    return decodeInputs;
  }
  /**
-   * Decode based on the given input buffers and schema
+   * Decode based on the given input buffers and schema.
   */
-  public static void decodeAndFillBuffer(final byte[][] decodeInputs, byte[] buf,
+  public static void decodeAndFillBuffer(final byte[][] decodeInputs,
-      AlignedStripe alignedStripe, int dataBlkNum, int parityBlkNum) {
+      byte[] buf, AlignedStripe alignedStripe, int parityBlkNum,
      RawErasureDecoder decoder) {
    // Step 1: prepare indices and output buffers for missing data units
    int[] decodeIndices = new int[parityBlkNum];
    int pos = 0;
    for (int i = 0; i < alignedStripe.chunks.length; i++) {
-      if (alignedStripe.chunks[i].state != StripingChunk.FETCHED &&
+      if (alignedStripe.chunks[i].state == StripingChunk.MISSING){
          alignedStripe.chunks[i].state != StripingChunk.ALLZERO) {
        decodeIndices[pos++] = i;
      }
    }
    decodeIndices = Arrays.copyOf(decodeIndices, pos);
    byte[][] decodeOutputs =
        new byte[decodeIndices.length][(int) alignedStripe.getSpanInBlock()];
-    byte[][] outputs = new byte[parityBlkNum][(int) alignedStripe.getSpanInBlock()];
+    // Step 2: decode into prepared output buffers
-    RSRawDecoder rsRawDecoder = new RSRawDecoder(dataBlkNum, parityBlkNum);
+    decoder.decode(decodeInputs, decodeIndices, decodeOutputs);
    rsRawDecoder.decode(decodeInputs, decodeIndices, outputs);
-    for (int i = 0; i < dataBlkNum + parityBlkNum; i++) {
+    // Step 3: fill original application buffer with decoded data
-      StripingChunk chunk = alignedStripe.chunks[i];
+    for (int i = 0; i < decodeIndices.length; i++) {
      int missingBlkIdx = decodeIndices[i];
      StripingChunk chunk = alignedStripe.chunks[missingBlkIdx];
      if (chunk.state == StripingChunk.MISSING) {
        int srcPos = 0;
        for (int j = 0; j < chunk.offsetsInBuf.size(); j++) {
-          //TODO: workaround (filling fixed bytes), to remove after HADOOP-11938
+          System.arraycopy(decodeOutputs[i], srcPos, buf, chunk.offsetsInBuf.get(j),
-//          System.arraycopy(outputs[i], srcPos, buf, chunk.offsetsInBuf.get(j),
+              chunk.lengthsInBuf.get(j));
 //              chunk.lengthsInBuf.get(j));
          Arrays.fill(buf, chunk.offsetsInBuf.get(j),
              chunk.offsetsInBuf.get(j) + chunk.lengthsInBuf.get(j), (byte)7);
          srcPos += chunk.lengthsInBuf.get(j);
        }
      }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSStripedInputStream.java
@ -221,13 +221,13 @@ public class TestDFSStripedInputStream {
        decodeInputs[DATA_BLK_NUM][k] = SimulatedFSDataset.simulatedByte(
            new Block(bg.getBlock().getBlockId() + DATA_BLK_NUM), posInBlk);
      }
-//      RSRawDecoder rsRawDecoder = new RSRawDecoder();
+      for (int m : missingBlkIdx) {
-//      rsRawDecoder.initialize(DATA_BLK_NUM, PARITY_BLK_NUM, CELLSIZE);
+        decodeInputs[m] = null;
-//      rsRawDecoder.decode(decodeInputs, missingBlkIdx, decodeOutputs);
+      }
      RSRawDecoder rsRawDecoder = new RSRawDecoder(DATA_BLK_NUM, PARITY_BLK_NUM);
      rsRawDecoder.decode(decodeInputs, missingBlkIdx, decodeOutputs);
      int posInBuf = i * CELLSIZE * DATA_BLK_NUM + failedDNIdx * CELLSIZE;
-//      System.arraycopy(decodeOutputs[0], 0, expected, posInBuf, CELLSIZE);
+      System.arraycopy(decodeOutputs[0], 0, expected, posInBuf, CELLSIZE);
      //TODO: workaround (filling fixed bytes), to remove after HADOOP-11938
      Arrays.fill(expected, posInBuf, posInBuf + CELLSIZE, (byte)7);
    }
    int delta = 10;
    int done = 0;
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestWriteReadStripedFile.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestWriteReadStripedFile.java
@ -382,16 +382,10 @@ public class TestWriteReadStripedFile {
      Assert.assertEquals("The length of file should be the same to write size",
          length - startOffsetInFile, readLen);
      RSRawDecoder rsRawDecoder = new RSRawDecoder(dataBlocks, parityBlocks);
      byte[] expected = new byte[readLen];
      for (int i = startOffsetInFile; i < length; i++) {
        //TODO: workaround (filling fixed bytes), to remove after HADOOP-11938
        if ((i / cellSize) % dataBlocks == failedDNIdx) {
          expected[i - startOffsetInFile] = (byte)7;
        } else {
        expected[i - startOffsetInFile] = getByte(i);
      }
      }
      for (int i = startOffsetInFile; i < length; i++) {
        Assert.assertEquals("Byte at " + i + " should be the same",
            expected[i - startOffsetInFile], buf[i - startOffsetInFile]);