HADOOP-11645. Erasure Codec API covering the essential aspects for an erasure code ( Contributed by Kai Zheng)
This commit is contained in:
parent
aac73c21c3
commit
99502cbbe2
|
@ -37,3 +37,6 @@
|
|||
|
||||
HADOOP-11805 Better to rename some raw erasure coders. Contributed by Kai Zheng
|
||||
( Kai Zheng )
|
||||
|
||||
HADOOP-11645. Erasure Codec API covering the essential aspects for an erasure code
|
||||
( Kai Zheng via vinayakumarb )
|
||||
|
|
|
@ -79,4 +79,22 @@ public class ECBlockGroup {
|
|||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get erased blocks count
|
||||
* @return
|
||||
*/
|
||||
public int getErasedCount() {
|
||||
int erasedCount = 0;
|
||||
|
||||
for (ECBlock dataBlock : dataBlocks) {
|
||||
if (dataBlock.isErased()) erasedCount++;
|
||||
}
|
||||
|
||||
for (ECBlock parityBlock : parityBlocks) {
|
||||
if (parityBlock.isErased()) erasedCount++;
|
||||
}
|
||||
|
||||
return erasedCount;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.io.erasurecode.codec;
|
||||
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.io.erasurecode.ECSchema;
|
||||
import org.apache.hadoop.io.erasurecode.coder.*;
|
||||
import org.apache.hadoop.io.erasurecode.grouper.BlockGrouper;
|
||||
|
||||
/**
|
||||
* Abstract Erasure Codec that implements {@link ErasureCodec}.
|
||||
*/
|
||||
public abstract class AbstractErasureCodec extends Configured
|
||||
implements ErasureCodec {
|
||||
|
||||
private ECSchema schema;
|
||||
|
||||
@Override
|
||||
public void setSchema(ECSchema schema) {
|
||||
this.schema = schema;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return schema.getCodecName();
|
||||
}
|
||||
|
||||
protected ECSchema getSchema() {
|
||||
return schema;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BlockGrouper createBlockGrouper() {
|
||||
BlockGrouper blockGrouper = new BlockGrouper();
|
||||
blockGrouper.setSchema(getSchema());
|
||||
|
||||
return blockGrouper;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ErasureCoder createEncoder() {
|
||||
ErasureCoder encoder = doCreateEncoder();
|
||||
prepareErasureCoder(encoder);
|
||||
return encoder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new encoder instance to be initialized afterwards.
|
||||
* @return encoder
|
||||
*/
|
||||
protected abstract ErasureCoder doCreateEncoder();
|
||||
|
||||
@Override
|
||||
public ErasureCoder createDecoder() {
|
||||
ErasureCoder decoder = doCreateDecoder();
|
||||
prepareErasureCoder(decoder);
|
||||
return decoder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new decoder instance to be initialized afterwards.
|
||||
* @return decoder
|
||||
*/
|
||||
protected abstract ErasureCoder doCreateDecoder();
|
||||
|
||||
private void prepareErasureCoder(ErasureCoder erasureCoder) {
|
||||
if (getSchema() == null) {
|
||||
throw new RuntimeException("No schema been set yet");
|
||||
}
|
||||
|
||||
erasureCoder.setConf(getConf());
|
||||
erasureCoder.initialize(getSchema());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.io.erasurecode.codec;
|
||||
|
||||
import org.apache.hadoop.conf.Configurable;
|
||||
import org.apache.hadoop.io.erasurecode.ECSchema;
|
||||
import org.apache.hadoop.io.erasurecode.coder.ErasureCoder;
|
||||
import org.apache.hadoop.io.erasurecode.grouper.BlockGrouper;
|
||||
|
||||
/**
|
||||
* Erasure Codec API that's to cover the essential specific aspects of a code.
|
||||
* Currently it cares only block grouper and erasure coder. In future we may
|
||||
* add more aspects here to make the behaviors customizable.
|
||||
*/
|
||||
public interface ErasureCodec extends Configurable {
|
||||
|
||||
/**
|
||||
* Set EC schema to be used by this codec.
|
||||
* @param schema
|
||||
*/
|
||||
public void setSchema(ECSchema schema);
|
||||
|
||||
/**
|
||||
* Create block grouper
|
||||
* @return block grouper
|
||||
*/
|
||||
public BlockGrouper createBlockGrouper();
|
||||
|
||||
/**
|
||||
* Create Erasure Encoder
|
||||
* @return erasure encoder
|
||||
*/
|
||||
public ErasureCoder createEncoder();
|
||||
|
||||
/**
|
||||
* Create Erasure Decoder
|
||||
* @return erasure decoder
|
||||
*/
|
||||
public ErasureCoder createDecoder();
|
||||
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.io.erasurecode.codec;
|
||||
|
||||
import org.apache.hadoop.io.erasurecode.coder.ErasureCoder;
|
||||
import org.apache.hadoop.io.erasurecode.coder.RSErasureDecoder;
|
||||
import org.apache.hadoop.io.erasurecode.coder.RSErasureEncoder;
|
||||
|
||||
/**
|
||||
* A Reed-Solomon erasure codec.
|
||||
*/
|
||||
public class RSErasureCodec extends AbstractErasureCodec {
|
||||
|
||||
@Override
|
||||
protected ErasureCoder doCreateEncoder() {
|
||||
return new RSErasureEncoder();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ErasureCoder doCreateDecoder() {
|
||||
return new RSErasureDecoder();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.io.erasurecode.codec;
|
||||
|
||||
import org.apache.hadoop.io.erasurecode.ECSchema;
|
||||
import org.apache.hadoop.io.erasurecode.coder.ErasureCoder;
|
||||
import org.apache.hadoop.io.erasurecode.coder.XORErasureDecoder;
|
||||
import org.apache.hadoop.io.erasurecode.coder.XORErasureEncoder;
|
||||
|
||||
/**
|
||||
* A XOR erasure codec.
|
||||
*/
|
||||
public class XORErasureCodec extends AbstractErasureCodec {
|
||||
|
||||
@Override
|
||||
public void setSchema(ECSchema schema) {
|
||||
super.setSchema(schema);
|
||||
assert(schema.getNumParityUnits() == 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ErasureCoder doCreateEncoder() {
|
||||
return new XORErasureEncoder();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ErasureCoder doCreateDecoder() {
|
||||
return new XORErasureDecoder();
|
||||
}
|
||||
}
|
|
@ -19,6 +19,7 @@ package org.apache.hadoop.io.erasurecode.coder;
|
|||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.io.erasurecode.ECSchema;
|
||||
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureCoder;
|
||||
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureCoderFactory;
|
||||
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder;
|
||||
|
@ -104,6 +105,12 @@ public abstract class AbstractErasureCoder
|
|||
this.chunkSize = chunkSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initialize(ECSchema schema) {
|
||||
initialize(schema.getNumDataUnits(), schema.getNumParityUnits(),
|
||||
schema.getChunkSize());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDataUnits() {
|
||||
return numDataUnits;
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.hadoop.io.erasurecode.coder;
|
|||
|
||||
import org.apache.hadoop.conf.Configurable;
|
||||
import org.apache.hadoop.io.erasurecode.ECBlockGroup;
|
||||
import org.apache.hadoop.io.erasurecode.ECSchema;
|
||||
|
||||
/**
|
||||
* An erasure coder to perform encoding or decoding given a group. Generally it
|
||||
|
@ -44,6 +45,12 @@ public interface ErasureCoder extends Configurable {
|
|||
*/
|
||||
public void initialize(int numDataUnits, int numParityUnits, int chunkSize);
|
||||
|
||||
/**
|
||||
* Initialize with an EC schema.
|
||||
* @param schema
|
||||
*/
|
||||
public void initialize(ECSchema schema);
|
||||
|
||||
/**
|
||||
* The number of data input units for the coding. A unit can be a byte,
|
||||
* chunk or buffer or even a block.
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.io.erasurecode.grouper;
|
||||
|
||||
import org.apache.hadoop.io.erasurecode.ECBlock;
|
||||
import org.apache.hadoop.io.erasurecode.ECBlockGroup;
|
||||
import org.apache.hadoop.io.erasurecode.ECSchema;
|
||||
|
||||
/**
|
||||
* As part of a codec, to handle how to form a block group for encoding
|
||||
* and provide instructions on how to recover erased blocks from a block group
|
||||
*/
|
||||
public class BlockGrouper {
|
||||
|
||||
private ECSchema schema;
|
||||
|
||||
/**
|
||||
* Set EC schema.
|
||||
* @param schema
|
||||
*/
|
||||
public void setSchema(ECSchema schema) {
|
||||
this.schema = schema;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get EC schema.
|
||||
* @return
|
||||
*/
|
||||
protected ECSchema getSchema() {
|
||||
return schema;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get required data blocks count in a BlockGroup.
|
||||
* @return count of required data blocks
|
||||
*/
|
||||
public int getRequiredNumDataBlocks() {
|
||||
return schema.getNumDataUnits();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get required parity blocks count in a BlockGroup.
|
||||
* @return count of required parity blocks
|
||||
*/
|
||||
public int getRequiredNumParityBlocks() {
|
||||
return schema.getNumParityUnits();
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculating and organizing BlockGroup, to be called by ECManager
|
||||
* @param dataBlocks Data blocks to compute parity blocks against
|
||||
* @param parityBlocks To be computed parity blocks
|
||||
* @return
|
||||
*/
|
||||
public ECBlockGroup makeBlockGroup(ECBlock[] dataBlocks,
|
||||
ECBlock[] parityBlocks) {
|
||||
|
||||
ECBlockGroup blockGroup = new ECBlockGroup(dataBlocks, parityBlocks);
|
||||
return blockGroup;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a BlockGroup, tell if any of the missing blocks can be recovered,
|
||||
* to be called by ECManager
|
||||
* @param blockGroup a blockGroup that may contain erased blocks but not sure
|
||||
* recoverable or not
|
||||
* @return true if any erased block recoverable, false otherwise
|
||||
*/
|
||||
public boolean anyRecoverable(ECBlockGroup blockGroup) {
|
||||
int erasedCount = blockGroup.getErasedCount();
|
||||
|
||||
return erasedCount > 0 && erasedCount <= getRequiredNumParityBlocks();
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue