HADOOP-11647. Reed-Solomon ErasureCoder. Contributed by Kai Zheng
This commit is contained in:
parent
90d332d6be
commit
df297245a7
|
@ -26,3 +26,6 @@
|
|||
|
||||
HADOOP-11707. Add factory to create raw erasure coder. Contributed by Kai Zheng
|
||||
( Kai Zheng )
|
||||
|
||||
HADOOP-11647. Reed-Solomon ErasureCoder. Contributed by Kai Zheng
|
||||
( Kai Zheng )
|
||||
|
|
|
@ -136,6 +136,21 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
|
|||
public static final boolean IO_COMPRESSION_CODEC_LZ4_USELZ4HC_DEFAULT =
|
||||
false;
|
||||
|
||||
/**
|
||||
* Erasure Coding configuration family
|
||||
*/
|
||||
|
||||
/** Supported erasure codec classes */
|
||||
public static final String IO_ERASURECODE_CODECS_KEY = "io.erasurecode.codecs";
|
||||
|
||||
/** Use XOR raw coder when possible for the RS codec */
|
||||
public static final String IO_ERASURECODE_CODEC_RS_USEXOR_KEY =
|
||||
"io.erasurecode.codec.rs.usexor";
|
||||
|
||||
/** Raw coder factory for the RS codec */
|
||||
public static final String IO_ERASURECODE_CODEC_RS_RAWCODER_KEY =
|
||||
"io.erasurecode.codec.rs.rawcoder";
|
||||
|
||||
/**
|
||||
* Service Authorization
|
||||
*/
|
||||
|
|
|
@ -17,7 +17,12 @@
|
|||
*/
|
||||
package org.apache.hadoop.io.erasurecode.coder;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureCoder;
|
||||
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureCoderFactory;
|
||||
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder;
|
||||
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureEncoder;
|
||||
|
||||
/**
|
||||
* A common class of basic facilities to be shared by encoder and decoder
|
||||
|
@ -31,6 +36,66 @@ public abstract class AbstractErasureCoder
|
|||
private int numParityUnits;
|
||||
private int chunkSize;
|
||||
|
||||
/**
|
||||
* Create raw decoder using the factory specified by rawCoderFactoryKey
|
||||
* @param rawCoderFactoryKey
|
||||
* @return raw decoder
|
||||
*/
|
||||
protected RawErasureDecoder createRawDecoder(String rawCoderFactoryKey) {
|
||||
RawErasureCoder rawCoder = createRawCoder(getConf(),
|
||||
rawCoderFactoryKey, false);
|
||||
return (RawErasureDecoder) rawCoder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create raw encoder using the factory specified by rawCoderFactoryKey
|
||||
* @param rawCoderFactoryKey
|
||||
* @return raw encoder
|
||||
*/
|
||||
protected RawErasureEncoder createRawEncoder(String rawCoderFactoryKey) {
|
||||
RawErasureCoder rawCoder = createRawCoder(getConf(),
|
||||
rawCoderFactoryKey, true);
|
||||
return (RawErasureEncoder) rawCoder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create raw coder using specified conf and raw coder factory key.
|
||||
* @param conf
|
||||
* @param rawCoderFactoryKey
|
||||
* @param isEncoder
|
||||
* @return raw coder
|
||||
*/
|
||||
protected static RawErasureCoder createRawCoder(Configuration conf,
|
||||
String rawCoderFactoryKey, boolean isEncoder) {
|
||||
|
||||
if (conf == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Class<? extends RawErasureCoderFactory> factClass = null;
|
||||
factClass = conf.getClass(rawCoderFactoryKey,
|
||||
factClass, RawErasureCoderFactory.class);
|
||||
|
||||
if (factClass == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
RawErasureCoderFactory fact;
|
||||
try {
|
||||
fact = factClass.newInstance();
|
||||
} catch (InstantiationException e) {
|
||||
throw new RuntimeException("Failed to create raw coder", e);
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new RuntimeException("Failed to create raw coder", e);
|
||||
}
|
||||
|
||||
if (fact != null) {
|
||||
return isEncoder ? fact.createEncoder() : fact.createDecoder();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initialize(int numDataUnits, int numParityUnits,
|
||||
int chunkSize) {
|
||||
|
|
|
@ -30,7 +30,8 @@ public abstract class AbstractErasureDecoder extends AbstractErasureCoder
|
|||
|
||||
@Override
|
||||
public ErasureCodingStep decode(ECBlockGroup blockGroup) {
|
||||
return performDecoding(blockGroup);
|
||||
// We may have more than this when considering complicate cases. HADOOP-11550
|
||||
return prepareDecodingStep(blockGroup);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -38,7 +39,8 @@ public abstract class AbstractErasureDecoder extends AbstractErasureCoder
|
|||
* @param blockGroup
|
||||
* @return decoding step for caller to do the real work
|
||||
*/
|
||||
protected abstract ErasureCodingStep performDecoding(ECBlockGroup blockGroup);
|
||||
protected abstract ErasureCodingStep prepareDecodingStep(
|
||||
ECBlockGroup blockGroup);
|
||||
|
||||
/**
|
||||
* We have all the data blocks and parity blocks as input blocks for
|
||||
|
|
|
@ -30,7 +30,8 @@ public abstract class AbstractErasureEncoder extends AbstractErasureCoder
|
|||
|
||||
@Override
|
||||
public ErasureCodingStep encode(ECBlockGroup blockGroup) {
|
||||
return performEncoding(blockGroup);
|
||||
// We may have more than this when considering complicate cases. HADOOP-11550
|
||||
return prepareEncodingStep(blockGroup);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -38,7 +39,8 @@ public abstract class AbstractErasureEncoder extends AbstractErasureCoder
|
|||
* @param blockGroup
|
||||
* @return encoding step for caller to do the real work
|
||||
*/
|
||||
protected abstract ErasureCodingStep performEncoding(ECBlockGroup blockGroup);
|
||||
protected abstract ErasureCodingStep prepareEncodingStep(
|
||||
ECBlockGroup blockGroup);
|
||||
|
||||
protected ECBlock[] getInputBlocks(ECBlockGroup blockGroup) {
|
||||
return blockGroup.getDataBlocks();
|
||||
|
|
|
@ -0,0 +1,83 @@
|
|||
package org.apache.hadoop.io.erasurecode.coder;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
||||
import org.apache.hadoop.io.erasurecode.ECBlock;
|
||||
import org.apache.hadoop.io.erasurecode.ECBlockGroup;
|
||||
import org.apache.hadoop.io.erasurecode.rawcoder.JRSRawDecoder;
|
||||
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder;
|
||||
import org.apache.hadoop.io.erasurecode.rawcoder.XorRawDecoder;
|
||||
|
||||
/**
|
||||
* Reed-Solomon erasure decoder that decodes a block group.
|
||||
*
|
||||
* It implements {@link ErasureDecoder}.
|
||||
*/
|
||||
public class RSErasureDecoder extends AbstractErasureDecoder {
|
||||
private RawErasureDecoder rsRawDecoder;
|
||||
private RawErasureDecoder xorRawDecoder;
|
||||
private boolean useXorWhenPossible = true;
|
||||
|
||||
@Override
|
||||
public void setConf(Configuration conf) {
|
||||
super.setConf(conf);
|
||||
|
||||
if (conf != null) {
|
||||
this.useXorWhenPossible = conf.getBoolean(
|
||||
CommonConfigurationKeys.IO_ERASURECODE_CODEC_RS_USEXOR_KEY, true);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ErasureCodingStep prepareDecodingStep(final ECBlockGroup blockGroup) {
|
||||
|
||||
RawErasureDecoder rawDecoder;
|
||||
|
||||
ECBlock[] inputBlocks = getInputBlocks(blockGroup);
|
||||
ECBlock[] outputBlocks = getOutputBlocks(blockGroup);
|
||||
|
||||
/**
|
||||
* Optimization: according to some benchmark, when only one block is erased
|
||||
* and to be recovering, the most simple XOR scheme can be much efficient.
|
||||
* We will have benchmark tests to verify this opt is effect or not.
|
||||
*/
|
||||
if (outputBlocks.length == 1 && useXorWhenPossible) {
|
||||
rawDecoder = checkCreateXorRawDecoder();
|
||||
} else {
|
||||
rawDecoder = checkCreateRSRawDecoder();
|
||||
}
|
||||
|
||||
return new ErasureDecodingStep(inputBlocks,
|
||||
getErasedIndexes(inputBlocks), outputBlocks, rawDecoder);
|
||||
}
|
||||
|
||||
private RawErasureDecoder checkCreateRSRawDecoder() {
|
||||
if (rsRawDecoder == null) {
|
||||
rsRawDecoder = createRawDecoder(
|
||||
CommonConfigurationKeys.IO_ERASURECODE_CODEC_RS_RAWCODER_KEY);
|
||||
if (rsRawDecoder == null) {
|
||||
rsRawDecoder = new JRSRawDecoder();
|
||||
}
|
||||
rsRawDecoder.initialize(getNumDataUnits(),
|
||||
getNumParityUnits(), getChunkSize());
|
||||
}
|
||||
return rsRawDecoder;
|
||||
}
|
||||
|
||||
private RawErasureDecoder checkCreateXorRawDecoder() {
|
||||
if (xorRawDecoder == null) {
|
||||
xorRawDecoder = new XorRawDecoder();
|
||||
xorRawDecoder.initialize(getNumDataUnits(), 1, getChunkSize());
|
||||
}
|
||||
return xorRawDecoder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void release() {
|
||||
if (xorRawDecoder != null) {
|
||||
xorRawDecoder.release();
|
||||
} else if (rsRawDecoder != null) {
|
||||
rsRawDecoder.release();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
package org.apache.hadoop.io.erasurecode.coder;
|
||||
|
||||
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
||||
import org.apache.hadoop.io.erasurecode.ECBlock;
|
||||
import org.apache.hadoop.io.erasurecode.ECBlockGroup;
|
||||
import org.apache.hadoop.io.erasurecode.rawcoder.JRSRawEncoder;
|
||||
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureEncoder;
|
||||
|
||||
/**
|
||||
* Reed-Solomon erasure encoder that encodes a block group.
|
||||
*
|
||||
* It implements {@link ErasureEncoder}.
|
||||
*/
|
||||
public class RSErasureEncoder extends AbstractErasureEncoder {
|
||||
private RawErasureEncoder rawEncoder;
|
||||
|
||||
@Override
|
||||
protected ErasureCodingStep prepareEncodingStep(final ECBlockGroup blockGroup) {
|
||||
|
||||
RawErasureEncoder rawEncoder = checkCreateRSRawEncoder();
|
||||
|
||||
ECBlock[] inputBlocks = getInputBlocks(blockGroup);
|
||||
|
||||
return new ErasureEncodingStep(inputBlocks,
|
||||
getOutputBlocks(blockGroup), rawEncoder);
|
||||
}
|
||||
|
||||
private RawErasureEncoder checkCreateRSRawEncoder() {
|
||||
if (rawEncoder == null) {
|
||||
rawEncoder = createRawEncoder(
|
||||
CommonConfigurationKeys.IO_ERASURECODE_CODEC_RS_RAWCODER_KEY);
|
||||
if (rawEncoder == null) {
|
||||
rawEncoder = new JRSRawEncoder();
|
||||
}
|
||||
rawEncoder.initialize(getNumDataUnits(),
|
||||
getNumParityUnits(), getChunkSize());
|
||||
}
|
||||
return rawEncoder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void release() {
|
||||
if (rawEncoder != null) {
|
||||
rawEncoder.release();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -30,7 +30,7 @@ import org.apache.hadoop.io.erasurecode.rawcoder.XorRawDecoder;
|
|||
public class XorErasureDecoder extends AbstractErasureDecoder {
|
||||
|
||||
@Override
|
||||
protected ErasureCodingStep performDecoding(final ECBlockGroup blockGroup) {
|
||||
protected ErasureCodingStep prepareDecodingStep(final ECBlockGroup blockGroup) {
|
||||
// May be configured
|
||||
RawErasureDecoder rawDecoder = new XorRawDecoder();
|
||||
rawDecoder.initialize(getNumDataUnits(),
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.hadoop.io.erasurecode.rawcoder.XorRawEncoder;
|
|||
public class XorErasureEncoder extends AbstractErasureEncoder {
|
||||
|
||||
@Override
|
||||
protected ErasureCodingStep performEncoding(final ECBlockGroup blockGroup) {
|
||||
protected ErasureCodingStep prepareEncodingStep(final ECBlockGroup blockGroup) {
|
||||
// May be configured
|
||||
RawErasureEncoder rawEncoder = new XorRawEncoder();
|
||||
rawEncoder.initialize(getNumDataUnits(),
|
||||
|
|
|
@ -0,0 +1,92 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.io.erasurecode.coder;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
||||
import org.apache.hadoop.io.erasurecode.rawcoder.JRSRawErasureCoderFactory;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Test Reed-Solomon encoding and decoding.
|
||||
*/
|
||||
public class TestRSErasureCoder extends TestErasureCoderBase {
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
this.encoderClass = RSErasureEncoder.class;
|
||||
this.decoderClass = RSErasureDecoder.class;
|
||||
|
||||
this.numDataUnits = 10;
|
||||
this.numParityUnits = 1;
|
||||
|
||||
this.numChunksInBlock = 10;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCodingNoDirectBuffer_10x4() {
|
||||
prepare(null, 10, 4, null);
|
||||
testCoding(false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCodingDirectBuffer_10x4() {
|
||||
prepare(null, 10, 4, null);
|
||||
testCoding(true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCodingDirectBufferWithConf_10x4() {
|
||||
/**
|
||||
* This tests if the two configuration items work or not.
|
||||
*/
|
||||
Configuration conf = new Configuration();
|
||||
conf.set(CommonConfigurationKeys.IO_ERASURECODE_CODEC_RS_RAWCODER_KEY,
|
||||
JRSRawErasureCoderFactory.class.getCanonicalName());
|
||||
conf.setBoolean(
|
||||
CommonConfigurationKeys.IO_ERASURECODE_CODEC_RS_USEXOR_KEY, false);
|
||||
prepare(conf, 10, 4, null);
|
||||
testCoding(true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCodingDirectBuffer_10x4_erasure_of_2_4() {
|
||||
prepare(null, 10, 4, new int[] {2, 4});
|
||||
testCoding(true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCodingDirectBuffer_10x4_erasing_all() {
|
||||
prepare(null, 10, 4, new int[] {0, 1, 2, 3});
|
||||
testCoding(true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCodingNoDirectBuffer_3x3() {
|
||||
prepare(null, 3, 3, null);
|
||||
testCoding(false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCodingDirectBuffer_3x3() {
|
||||
prepare(null, 3, 3, null);
|
||||
testCoding(true);
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue