HADOOP-11921. Enhance tests for erasure coders. Contributed by Kai Zheng.

This commit is contained in:
Zhe Zhang 2015-05-18 10:06:56 -07:00 committed by Zhe Zhang
parent a919726914
commit 9c7a78c874
9 changed files with 277 additions and 154 deletions

View File

@ -44,3 +44,5 @@
HADOOP-11818. Minor improvements for erasurecode classes. (Rakesh R via Kai Zheng)
HADOOP-11841. Remove unused ecschema-def.xml files. (szetszwo)
HADOOP-11921. Enhance tests for erasure coders. (Kai Zheng via Zhe Zhang)

View File

@ -49,15 +49,15 @@ public abstract class TestCoderBase {
* Prepare before running the case.
* @param numDataUnits
* @param numParityUnits
* @param erasedIndexes
* @param erasedDataIndexes
*/
protected void prepare(Configuration conf, int numDataUnits,
int numParityUnits, int[] erasedIndexes) {
int numParityUnits, int[] erasedDataIndexes) {
this.conf = conf;
this.numDataUnits = numDataUnits;
this.numParityUnits = numParityUnits;
this.erasedDataIndexes = erasedIndexes != null ?
erasedIndexes : new int[] {0};
this.erasedDataIndexes = erasedDataIndexes != null ?
erasedDataIndexes : new int[] {0};
}
/**
@ -82,15 +82,19 @@ protected void compareAndVerify(ECChunk[] erasedChunks,
}
/**
* Adjust and return erased indexes based on the array of the input chunks (
* parity chunks + data chunks).
* @return
* Adjust and return erased indexes altogether, including erased data indexes
* and parity indexes.
* @return erased indexes altogether
*/
protected int[] getErasedIndexesForDecoding() {
int[] erasedIndexesForDecoding = new int[erasedDataIndexes.length];
int idx = 0;
for (int i = 0; i < erasedDataIndexes.length; i++) {
erasedIndexesForDecoding[i] = erasedDataIndexes[i] + numParityUnits;
erasedIndexesForDecoding[idx ++] = erasedDataIndexes[i] + numParityUnits;
}
return erasedIndexesForDecoding;
}
@ -116,30 +120,23 @@ protected ECChunk[] prepareInputChunksForDecoding(ECChunk[] dataChunks,
}
/**
* Have a copy of the data chunks that's to be erased thereafter. The copy
* will be used to compare and verify with the to be recovered chunks.
* Erase chunks to test the recovering of them. Before erasure clone them
* first so could return them.
* @param dataChunks
* @return
* @return clone of erased chunks
*/
protected ECChunk[] copyDataChunksToErase(ECChunk[] dataChunks) {
ECChunk[] copiedChunks = new ECChunk[erasedDataIndexes.length];
protected ECChunk[] backupAndEraseChunks(ECChunk[] dataChunks) {
ECChunk[] toEraseChunks = new ECChunk[erasedDataIndexes.length];
int idx = 0;
int j = 0;
for (int i = 0; i < erasedDataIndexes.length; i++) {
copiedChunks[j ++] = cloneChunkWithData(dataChunks[erasedDataIndexes[i]]);
ECChunk chunk = dataChunks[erasedDataIndexes[i]];
toEraseChunks[idx ++] = cloneChunkWithData(chunk);
eraseDataFromChunk(chunk);
}
return copiedChunks;
}
/**
* Erase some data chunks to test the recovering of them
* @param dataChunks
*/
protected void eraseSomeDataBlocks(ECChunk[] dataChunks) {
for (int i = 0; i < erasedDataIndexes.length; i++) {
eraseDataFromChunk(dataChunks[erasedDataIndexes[i]]);
}
return toEraseChunks;
}
/**
@ -277,6 +274,7 @@ protected ECChunk[] prepareParityChunksForEncoding() {
*/
protected ECChunk[] prepareOutputChunksForDecoding() {
ECChunk[] chunks = new ECChunk[erasedDataIndexes.length];
for (int i = 0; i < chunks.length; i++) {
chunks[i] = allocateOutputChunk();
}

View File

@ -29,6 +29,9 @@ public abstract class TestErasureCoderBase extends TestCoderBase {
protected Class<? extends ErasureCoder> encoderClass;
protected Class<? extends ErasureCoder> decoderClass;
private ErasureCoder encoder;
private ErasureCoder decoder;
protected int numChunksInBlock = 16;
/**
@ -54,39 +57,27 @@ public TestBlock(ECChunk[] chunks) {
*/
protected void testCoding(boolean usingDirectBuffer) {
this.usingDirectBuffer = usingDirectBuffer;
ErasureCoder encoder = createEncoder();
prepareCoders();
// Generate data and encode
ECBlockGroup blockGroup = prepareBlockGroupForEncoding();
// Backup all the source chunks for later recovering because some coders
// may affect the source data.
TestBlock[] clonedDataBlocks = cloneBlocksWithData((TestBlock[])
blockGroup.getDataBlocks());
// Make a copy of a strip for later comparing
TestBlock[] toEraseBlocks = copyDataBlocksToErase(clonedDataBlocks);
TestBlock[] clonedDataBlocks = cloneBlocksWithData((TestBlock[]) blockGroup.getDataBlocks());
ErasureCodingStep codingStep;
try {
codingStep = encoder.calculateCoding(blockGroup);
performCodingStep(codingStep);
} finally {
encoder.release();
}
// Erase the copied sources
eraseSomeDataBlocks(clonedDataBlocks);
// Erase specified sources but return copies of them for later comparing
TestBlock[] backupBlocks = backupAndEraseBlocks(clonedDataBlocks);
//Decode
// Decode
blockGroup = new ECBlockGroup(clonedDataBlocks, blockGroup.getParityBlocks());
ErasureCoder decoder = createDecoder();
try {
codingStep = decoder.calculateCoding(blockGroup);
performCodingStep(codingStep);
} finally {
decoder.release();
}
//Compare
compareAndVerify(toEraseBlocks, codingStep.getOutputBlocks());
// Compare
compareAndVerify(backupBlocks, codingStep.getOutputBlocks());
}
/**
@ -129,8 +120,7 @@ private void performCodingStep(ErasureCodingStep codingStep) {
protected void compareAndVerify(ECBlock[] erasedBlocks,
ECBlock[] recoveredBlocks) {
for (int i = 0; i < erasedBlocks.length; ++i) {
compareAndVerify(((TestBlock) erasedBlocks[i]).chunks,
((TestBlock) recoveredBlocks[i]).chunks);
compareAndVerify(((TestBlock) erasedBlocks[i]).chunks, ((TestBlock) recoveredBlocks[i]).chunks);
}
}
@ -151,6 +141,16 @@ private ErasureCoder createEncoder() {
return encoder;
}
private void prepareCoders() {
if (encoder == null) {
encoder = createEncoder();
}
if (decoder == null) {
decoder = createDecoder();
}
}
/**
* Create the erasure decoder for the test.
* @return
@ -201,6 +201,26 @@ protected ECBlock generateDataBlock() {
return new TestBlock(chunks);
}
/**
* Erase blocks to test the recovering of them. Before erasure clone them
* first so could return themselves.
* @param dataBlocks
* @return clone of erased dataBlocks
*/
protected TestBlock[] backupAndEraseBlocks(TestBlock[] dataBlocks) {
TestBlock[] toEraseBlocks = new TestBlock[erasedDataIndexes.length];
int idx = 0;
for (int i = 0; i < erasedDataIndexes.length; i++) {
TestBlock block = dataBlocks[erasedDataIndexes[i]];
toEraseBlocks[idx ++] = cloneBlockWithData(block);
eraseDataFromBlock(block);
}
return toEraseBlocks;
}
/**
* Copy those data blocks that's to be erased for later comparing and
* verifying.
@ -255,22 +275,9 @@ protected static TestBlock cloneBlockWithData(TestBlock block) {
}
/**
* Erase some data blocks specified by the indexes from the data blocks.
* @param dataBlocks
* Erase data from a block.
*/
protected void eraseSomeDataBlocks(TestBlock[] dataBlocks) {
for (int i = 0; i < erasedDataIndexes.length; ++i) {
eraseDataFromBlock(dataBlocks, erasedDataIndexes[i]);
}
}
/**
* Erase data from a block specified by erased index.
* @param blocks
* @param erasedIndex
*/
protected void eraseDataFromBlock(TestBlock[] blocks, int erasedIndex) {
TestBlock theBlock = blocks[erasedIndex];
protected void eraseDataFromBlock(TestBlock theBlock) {
eraseDataFromChunks(theBlock.chunks);
theBlock.setErased(true);
}

View File

@ -40,19 +40,18 @@ public void setup() {
}
@Test
public void testCodingNoDirectBuffer_10x4() {
prepare(null, 10, 4, null);
public void testCodingNoDirectBuffer_10x4_erasing_d0() {
prepare(null, 10, 4, new int[] {0});
/**
* Doing twice to test if the coders can be repeatedly reused. This matters
* as the underlying coding buffers are shared, which may have bugs.
*/
testCoding(false);
testCoding(false);
}
@Test
public void testCodingDirectBuffer_10x4() {
prepare(null, 10, 4, null);
testCoding(true);
}
@Test
public void testCodingDirectBufferWithConf_10x4() {
public void testCodingDirectBufferWithConf_10x4_erasing_d0() {
/**
* This tests if the two configuration items work or not.
*/
@ -61,31 +60,62 @@ public void testCodingDirectBufferWithConf_10x4() {
RSRawErasureCoderFactory.class.getCanonicalName());
conf.setBoolean(
CommonConfigurationKeys.IO_ERASURECODE_CODEC_RS_USEXOR_KEY, false);
prepare(conf, 10, 4, null);
prepare(conf, 10, 4, new int[]{0});
testCoding(true);
}
@Test
public void testCodingDirectBuffer_10x4_erasure_of_2_4() {
public void testCodingDirectBuffer_10x4_erasing_d2() {
prepare(null, 10, 4, new int[] {2});
testCoding(true);
testCoding(true);
}
@Test
public void testCodingDirectBuffer_10x4_erasing_d0() {
prepare(null, 10, 4, new int[] {0});
testCoding(true);
testCoding(true);
}
@Test
public void testCodingBothBuffers_10x4_erasing_d0() {
prepare(null, 10, 4, new int[] {0});
/**
* Doing in mixed buffer usage model to test if the coders can be repeatedly
* reused with different buffer usage model. This matters as the underlying
* coding buffers are shared, which may have bugs.
*/
testCoding(true);
testCoding(false);
testCoding(true);
testCoding(false);
}
@Test
public void testCodingDirectBuffer_10x4_erasure_of_d2_d4() {
prepare(null, 10, 4, new int[] {2, 4});
testCoding(true);
}
@Test
public void testCodingDirectBuffer_10x4_erasing_all() {
prepare(null, 10, 4, new int[] {0, 1, 2, 3});
public void testCodingDirectBuffer_10x4_erasing_d0_d1() {
prepare(null, 10, 4, new int[] {0, 1});
testCoding(true);
}
@Test
public void testCodingNoDirectBuffer_3x3() {
prepare(null, 3, 3, null);
public void testCodingNoDirectBuffer_3x3_erasing_d0() {
prepare(null, 3, 3, new int[] {0});
testCoding(false);
}
@Test
public void testCodingDirectBuffer_3x3() {
prepare(null, 3, 3, null);
public void testCodingDirectBuffer_3x3_erasing_d0() {
prepare(null, 3, 3, new int[] {0});
testCoding(true);
}

View File

@ -32,19 +32,33 @@ public void setup() {
this.numDataUnits = 10;
this.numParityUnits = 1;
this.erasedDataIndexes = new int[] {0};
this.numChunksInBlock = 10;
}
@Test
public void testCodingNoDirectBuffer() {
public void testCodingNoDirectBuffer_erasing_d0() {
prepare(null, 10, 1, new int[] {0});
/**
* Doing twice to test if the coders can be repeatedly reused. This matters
* as the underlying coding buffers are shared, which may have bugs.
*/
testCoding(false);
testCoding(false);
}
@Test
public void testCodingDirectBuffer() {
testCoding(true);
}
public void testCodingBothBuffers_erasing_d5() {
prepare(null, 10, 1, new int[]{5});
/**
* Doing in mixed buffer usage model to test if the coders can be repeatedly
* reused with different buffer usage model. This matters as the underlying
* coding buffers are shared, which may have bugs.
*/
testCoding(true);
testCoding(false);
testCoding(true);
testCoding(false);
}
}

View File

@ -17,26 +17,13 @@
*/
package org.apache.hadoop.io.erasurecode.rawcoder;
import org.apache.hadoop.io.erasurecode.ECChunk;
import org.apache.hadoop.io.erasurecode.rawcoder.util.RSUtil;
import org.junit.Before;
import org.junit.Test;
import java.nio.ByteBuffer;
/**
* Test raw Reed-solomon encoding and decoding.
* Test raw Reed-solomon coder implemented in Java.
*/
public class TestRSRawCoder extends TestRawCoderBase {
private static int symbolSize = 0;
private static int symbolMax = 0;
static {
symbolSize = (int) Math.round(Math.log(
RSUtil.GF.getFieldSize()) / Math.log(2));
symbolMax = (int) Math.pow(2, symbolSize);
}
public class TestRSRawCoder extends TestRSRawCoderBase {
@Before
public void setup() {
@ -45,49 +32,66 @@ public void setup() {
}
@Test
public void testCodingNoDirectBuffer_10x4() {
prepare(null, 10, 4, null);
public void testCodingNoDirectBuffer_10x4_erasing_d0() {
prepare(null, 10, 4, new int[] {0});
/**
* Doing twice to test if the coders can be repeatedly reused. This matters
* as the underlying coding buffers are shared, which may have bugs.
*/
testCoding(false);
testCoding(false);
}
@Test
public void testCodingDirectBuffer_10x4() {
prepare(null, 10, 4, null);
public void testCodingDirectBuffer_10x4_erasing_d2() {
prepare(null, 10, 4, new int[] {2});
testCoding(true);
testCoding(true);
}
@Test
public void testCodingDirectBuffer_10x4_erasure_of_2_4() {
public void testCodingDirectBuffer_10x4_erasing_d0() {
prepare(null, 10, 4, new int[] {0});
testCoding(true);
testCoding(true);
}
@Test
public void testCodingBothBuffers_10x4_erasing_d0() {
prepare(null, 10, 4, new int[] {0});
/**
* Doing in mixed buffer usage model to test if the coders can be repeatedly
* reused with different buffer usage model. This matters as the underlying
* coding buffers are shared, which may have bugs.
*/
testCoding(true);
testCoding(false);
testCoding(true);
testCoding(false);
}
@Test
public void testCodingDirectBuffer_10x4_erasure_of_d2_d4() {
prepare(null, 10, 4, new int[] {2, 4});
testCoding(true);
}
@Test
public void testCodingDirectBuffer_10x4_erasing_all() {
prepare(null, 10, 4, new int[] {0, 1, 2, 3});
public void testCodingDirectBuffer_10x4_erasing_d0_d1() {
prepare(null, 10, 4, new int[] {0, 1});
testCoding(true);
}
@Test
public void testCodingNoDirectBuffer_3x3() {
prepare(null, 3, 3, null);
public void testCodingNoDirectBuffer_3x3_erasing_d0() {
prepare(null, 3, 3, new int[] {0});
testCoding(false);
}
@Test
public void testCodingDirectBuffer_3x3() {
prepare(null, 3, 3, null);
public void testCodingDirectBuffer_3x3_erasing_d0() {
prepare(null, 3, 3, new int[] {0});
testCoding(true);
}
@Override
protected ECChunk generateDataChunk() {
ByteBuffer buffer = allocateOutputBuffer();
for (int i = 0; i < chunkSize; i++) {
buffer.put((byte) RAND.nextInt(symbolMax));
}
buffer.flip();
return new ECChunk(buffer);
}
}

View File

@ -0,0 +1,51 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.io.erasurecode.rawcoder;
import org.apache.hadoop.io.erasurecode.ECChunk;
import org.apache.hadoop.io.erasurecode.rawcoder.util.RSUtil;
import org.junit.Before;
import org.junit.Test;
import java.nio.ByteBuffer;
/**
* Test base for raw Reed-solomon coders.
*/
public abstract class TestRSRawCoderBase extends TestRawCoderBase {
private static int symbolSize = 0;
private static int symbolMax = 0;
static {
symbolSize = (int) Math.round(Math.log(
RSUtil.GF.getFieldSize()) / Math.log(2));
symbolMax = (int) Math.pow(2, symbolSize);
}
@Override
protected ECChunk generateDataChunk() {
ByteBuffer buffer = allocateOutputBuffer();
for (int i = 0; i < chunkSize; i++) {
buffer.put((byte) RAND.nextInt(symbolMax));
}
buffer.flip();
return new ECChunk(buffer);
}
}

View File

@ -26,6 +26,8 @@
public abstract class TestRawCoderBase extends TestCoderBase {
protected Class<? extends RawErasureEncoder> encoderClass;
protected Class<? extends RawErasureDecoder> decoderClass;
private RawErasureEncoder encoder;
private RawErasureDecoder decoder;
/**
* Generating source data, encoding, recovering and then verifying.
@ -37,40 +39,41 @@ public abstract class TestRawCoderBase extends TestCoderBase {
*/
protected void testCoding(boolean usingDirectBuffer) {
this.usingDirectBuffer = usingDirectBuffer;
prepareCoders();
// Generate data and encode
ECChunk[] dataChunks = prepareDataChunksForEncoding();
ECChunk[] parityChunks = prepareParityChunksForEncoding();
RawErasureEncoder encoder = createEncoder();
// Backup all the source chunks for later recovering because some coders
// may affect the source data.
ECChunk[] clonedDataChunks = cloneChunksWithData(dataChunks);
// Make a copy of a strip for later comparing
ECChunk[] toEraseDataChunks = copyDataChunksToErase(clonedDataChunks);
try {
encoder.encode(dataChunks, parityChunks);
} finally {
encoder.release();
}
// Erase the copied sources
eraseSomeDataBlocks(clonedDataChunks);
//Decode
ECChunk[] inputChunks = prepareInputChunksForDecoding(clonedDataChunks,
parityChunks);
// Backup and erase some chunks
ECChunk[] backupChunks = backupAndEraseChunks(clonedDataChunks);
// Decode
ECChunk[] inputChunks = prepareInputChunksForDecoding(
clonedDataChunks, parityChunks);
ECChunk[] recoveredChunks = prepareOutputChunksForDecoding();
RawErasureDecoder decoder = createDecoder();
try {
decoder.decode(inputChunks,
getErasedIndexesForDecoding(), recoveredChunks);
} finally {
decoder.release();
decoder.decode(inputChunks, getErasedIndexesForDecoding(), recoveredChunks);
// Compare
compareAndVerify(backupChunks, recoveredChunks);
}
//Compare
compareAndVerify(toEraseDataChunks, recoveredChunks);
private void prepareCoders() {
if (encoder == null) {
encoder = createEncoder();
}
if (decoder == null) {
decoder = createDecoder();
}
}
/**

View File

@ -32,18 +32,32 @@ public void setup() {
this.numDataUnits = 10;
this.numParityUnits = 1;
this.erasedDataIndexes = new int[] {0};
}
@Test
public void testCodingNoDirectBuffer() {
public void testCodingNoDirectBuffer_erasing_d0() {
prepare(null, 10, 1, new int[] {0});
/**
* Doing twice to test if the coders can be repeatedly reused. This matters
* as the underlying coding buffers are shared, which may have bugs.
*/
testCoding(false);
testCoding(false);
}
@Test
public void testCodingDirectBuffer() {
testCoding(true);
}
public void testCodingBothBuffers_erasing_d5() {
prepare(null, 10, 1, new int[]{5});
/**
* Doing in mixed buffer usage model to test if the coders can be repeatedly
* reused with different buffer usage model. This matters as the underlying
* coding buffers are shared, which may have bugs.
*/
testCoding(true);
testCoding(false);
testCoding(true);
testCoding(false);
}
}