HADOOP-11645. Erasure Codec API covering the essential aspects for an erasure code ( Contributed by Kai Zheng)

This commit is contained in:
Vinayakumar B 2015-04-07 16:05:22 +05:30 committed by Zhe Zhang
parent aac73c21c3
commit 99502cbbe2
9 changed files with 352 additions and 0 deletions

View File

@ -37,3 +37,6 @@
HADOOP-11805 Better to rename some raw erasure coders. Contributed by Kai Zheng
( Kai Zheng )
HADOOP-11645. Erasure Codec API covering the essential aspects for an erasure code
( Kai Zheng via vinayakumarb )

View File

@ -79,4 +79,22 @@ public class ECBlockGroup {
return false;
}
/**
* Get erased blocks count
* @return
*/
public int getErasedCount() {
int erasedCount = 0;
for (ECBlock dataBlock : dataBlocks) {
if (dataBlock.isErased()) erasedCount++;
}
for (ECBlock parityBlock : parityBlocks) {
if (parityBlock.isErased()) erasedCount++;
}
return erasedCount;
}
}

View File

@ -0,0 +1,88 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.io.erasurecode.codec;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.erasurecode.ECSchema;
import org.apache.hadoop.io.erasurecode.coder.*;
import org.apache.hadoop.io.erasurecode.grouper.BlockGrouper;
/**
* Abstract Erasure Codec that implements {@link ErasureCodec}.
*/
public abstract class AbstractErasureCodec extends Configured
implements ErasureCodec {
private ECSchema schema;
@Override
public void setSchema(ECSchema schema) {
this.schema = schema;
}
public String getName() {
return schema.getCodecName();
}
protected ECSchema getSchema() {
return schema;
}
@Override
public BlockGrouper createBlockGrouper() {
BlockGrouper blockGrouper = new BlockGrouper();
blockGrouper.setSchema(getSchema());
return blockGrouper;
}
@Override
public ErasureCoder createEncoder() {
ErasureCoder encoder = doCreateEncoder();
prepareErasureCoder(encoder);
return encoder;
}
/**
* Create a new encoder instance to be initialized afterwards.
* @return encoder
*/
protected abstract ErasureCoder doCreateEncoder();
@Override
public ErasureCoder createDecoder() {
ErasureCoder decoder = doCreateDecoder();
prepareErasureCoder(decoder);
return decoder;
}
/**
* Create a new decoder instance to be initialized afterwards.
* @return decoder
*/
protected abstract ErasureCoder doCreateDecoder();
private void prepareErasureCoder(ErasureCoder erasureCoder) {
if (getSchema() == null) {
throw new RuntimeException("No schema been set yet");
}
erasureCoder.setConf(getConf());
erasureCoder.initialize(getSchema());
}
}

View File

@ -0,0 +1,56 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.io.erasurecode.codec;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.io.erasurecode.ECSchema;
import org.apache.hadoop.io.erasurecode.coder.ErasureCoder;
import org.apache.hadoop.io.erasurecode.grouper.BlockGrouper;
/**
* Erasure Codec API that's to cover the essential specific aspects of a code.
* Currently it cares only block grouper and erasure coder. In future we may
* add more aspects here to make the behaviors customizable.
*/
public interface ErasureCodec extends Configurable {
/**
* Set EC schema to be used by this codec.
* @param schema
*/
public void setSchema(ECSchema schema);
/**
* Create block grouper
* @return block grouper
*/
public BlockGrouper createBlockGrouper();
/**
* Create Erasure Encoder
* @return erasure encoder
*/
public ErasureCoder createEncoder();
/**
* Create Erasure Decoder
* @return erasure decoder
*/
public ErasureCoder createDecoder();
}

View File

@ -0,0 +1,38 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.io.erasurecode.codec;
import org.apache.hadoop.io.erasurecode.coder.ErasureCoder;
import org.apache.hadoop.io.erasurecode.coder.RSErasureDecoder;
import org.apache.hadoop.io.erasurecode.coder.RSErasureEncoder;
/**
* A Reed-Solomon erasure codec.
*/
public class RSErasureCodec extends AbstractErasureCodec {
@Override
protected ErasureCoder doCreateEncoder() {
return new RSErasureEncoder();
}
@Override
protected ErasureCoder doCreateDecoder() {
return new RSErasureDecoder();
}
}

View File

@ -0,0 +1,45 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.io.erasurecode.codec;
import org.apache.hadoop.io.erasurecode.ECSchema;
import org.apache.hadoop.io.erasurecode.coder.ErasureCoder;
import org.apache.hadoop.io.erasurecode.coder.XORErasureDecoder;
import org.apache.hadoop.io.erasurecode.coder.XORErasureEncoder;
/**
* A XOR erasure codec.
*/
public class XORErasureCodec extends AbstractErasureCodec {
@Override
public void setSchema(ECSchema schema) {
super.setSchema(schema);
assert(schema.getNumParityUnits() == 1);
}
@Override
protected ErasureCoder doCreateEncoder() {
return new XORErasureEncoder();
}
@Override
protected ErasureCoder doCreateDecoder() {
return new XORErasureDecoder();
}
}

View File

@ -19,6 +19,7 @@ package org.apache.hadoop.io.erasurecode.coder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.erasurecode.ECSchema;
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureCoder;
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureCoderFactory;
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder;
@ -104,6 +105,12 @@ public abstract class AbstractErasureCoder
this.chunkSize = chunkSize;
}
@Override
public void initialize(ECSchema schema) {
initialize(schema.getNumDataUnits(), schema.getNumParityUnits(),
schema.getChunkSize());
}
@Override
public int getNumDataUnits() {
return numDataUnits;

View File

@ -19,6 +19,7 @@ package org.apache.hadoop.io.erasurecode.coder;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.io.erasurecode.ECBlockGroup;
import org.apache.hadoop.io.erasurecode.ECSchema;
/**
* An erasure coder to perform encoding or decoding given a group. Generally it
@ -44,6 +45,12 @@ public interface ErasureCoder extends Configurable {
*/
public void initialize(int numDataUnits, int numParityUnits, int chunkSize);
/**
* Initialize with an EC schema.
* @param schema
*/
public void initialize(ECSchema schema);
/**
* The number of data input units for the coding. A unit can be a byte,
* chunk or buffer or even a block.

View File

@ -0,0 +1,90 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.io.erasurecode.grouper;
import org.apache.hadoop.io.erasurecode.ECBlock;
import org.apache.hadoop.io.erasurecode.ECBlockGroup;
import org.apache.hadoop.io.erasurecode.ECSchema;
/**
* As part of a codec, to handle how to form a block group for encoding
* and provide instructions on how to recover erased blocks from a block group
*/
public class BlockGrouper {
private ECSchema schema;
/**
* Set EC schema.
* @param schema
*/
public void setSchema(ECSchema schema) {
this.schema = schema;
}
/**
* Get EC schema.
* @return
*/
protected ECSchema getSchema() {
return schema;
}
/**
* Get required data blocks count in a BlockGroup.
* @return count of required data blocks
*/
public int getRequiredNumDataBlocks() {
return schema.getNumDataUnits();
}
/**
* Get required parity blocks count in a BlockGroup.
* @return count of required parity blocks
*/
public int getRequiredNumParityBlocks() {
return schema.getNumParityUnits();
}
/**
* Calculating and organizing BlockGroup, to be called by ECManager
* @param dataBlocks Data blocks to compute parity blocks against
* @param parityBlocks To be computed parity blocks
* @return
*/
public ECBlockGroup makeBlockGroup(ECBlock[] dataBlocks,
ECBlock[] parityBlocks) {
ECBlockGroup blockGroup = new ECBlockGroup(dataBlocks, parityBlocks);
return blockGroup;
}
/**
* Given a BlockGroup, tell if any of the missing blocks can be recovered,
* to be called by ECManager
* @param blockGroup a blockGroup that may contain erased blocks but not sure
* recoverable or not
* @return true if any erased block recoverable, false otherwise
*/
public boolean anyRecoverable(ECBlockGroup blockGroup) {
int erasedCount = blockGroup.getErasedCount();
return erasedCount > 0 && erasedCount <= getRequiredNumParityBlocks();
}
}