diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java index 02af483f171..18d598424ff 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java @@ -35,7 +35,6 @@ import org.apache.hadoop.hbase.util.ClassSize; public class HFileContext implements HeapSize, Cloneable { public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024; - public static final ChecksumType DEFAULT_CHECKSUM_TYPE = ChecksumType.CRC32; /** Whether checksum is enabled or not**/ private boolean usesHBaseChecksum = true; @@ -48,7 +47,7 @@ public class HFileContext implements HeapSize, Cloneable { /** Whether tags to be compressed or not**/ private boolean compressTags; /** the checksum type **/ - private ChecksumType checksumType = DEFAULT_CHECKSUM_TYPE; + private ChecksumType checksumType = ChecksumType.getDefaultChecksumType(); /** the number of bytes per checksum value **/ private int bytesPerChecksum = DEFAULT_BYTES_PER_CHECKSUM; /** Number of uncompressed bytes we allow per block. */ diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java index 7416f4ea67d..4d8bede6baf 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hbase.util.ChecksumType; public class HFileContextBuilder { public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024; - public static final ChecksumType DEFAULT_CHECKSUM_TYPE = ChecksumType.CRC32; /** Whether checksum is enabled or not **/ private boolean usesHBaseChecksum = true; @@ -44,7 +43,7 @@ public class HFileContextBuilder { /** Whether tags to be compressed or not **/ private boolean compressTags = false; /** the checksum type **/ - private ChecksumType checksumType = DEFAULT_CHECKSUM_TYPE; + private ChecksumType checksumType = ChecksumType.getDefaultChecksumType(); /** the number of bytes per checksum value **/ private int bytesPerChecksum = DEFAULT_BYTES_PER_CHECKSUM; /** Number of uncompressed bytes we allow per block. */ diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ChecksumType.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ChecksumType.java index 95df7693590..d862b365554 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ChecksumType.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ChecksumType.java @@ -18,13 +18,8 @@ package org.apache.hadoop.hbase.util; -import java.io.IOException; -import java.lang.reflect.Constructor; -import java.util.zip.Checksum; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.util.DataChecksum; /** * Checksum types. The Checksum type is a one byte number @@ -40,112 +35,50 @@ public enum ChecksumType { public String getName() { return "NULL"; } - @Override - public void initialize() { - // do nothing - } - @Override - public Checksum getChecksumObject() throws IOException { - return null; // checksums not used + + @Override public DataChecksum.Type getDataChecksumType() { + return DataChecksum.Type.NULL; } }, CRC32((byte)1) { - private transient Constructor ctor; - @Override public String getName() { return "CRC32"; } - @Override - public void initialize() { - final String PURECRC32 = "org.apache.hadoop.util.PureJavaCrc32"; - final String JDKCRC = "java.util.zip.CRC32"; - LOG = LogFactory.getLog(ChecksumType.class); - - // check if hadoop library is available - try { - ctor = ChecksumFactory.newConstructor(PURECRC32); - LOG.debug(PURECRC32 + " available"); - } catch (Exception e) { - LOG.trace(PURECRC32 + " not available."); - } - try { - // The default checksum class name is java.util.zip.CRC32. - // This is available on all JVMs. - if (ctor == null) { - ctor = ChecksumFactory.newConstructor(JDKCRC); - LOG.debug(JDKCRC + " available"); - } - } catch (Exception e) { - LOG.trace(JDKCRC + " not available."); - } - } - - @Override - public Checksum getChecksumObject() throws IOException { - if (ctor == null) { - throw new IOException("Bad constructor for " + getName()); - } - try { - return (Checksum)ctor.newInstance(); - } catch (Exception e) { - throw new IOException(e); - } + @Override public DataChecksum.Type getDataChecksumType() { + return DataChecksum.Type.CRC32; } }, CRC32C((byte)2) { - private transient Constructor ctor; - @Override public String getName() { return "CRC32C"; } - @Override - public void initialize() { - final String PURECRC32C = "org.apache.hadoop.util.PureJavaCrc32C"; - LOG = LogFactory.getLog(ChecksumType.class); - try { - ctor = ChecksumFactory.newConstructor(PURECRC32C); - LOG.debug(PURECRC32C + " available"); - } catch (Exception e) { - LOG.trace(PURECRC32C + " not available."); - } - } - - @Override - public Checksum getChecksumObject() throws IOException { - if (ctor == null) { - throw new IOException("Bad constructor for " + getName()); - } - try { - return (Checksum)ctor.newInstance(); - } catch (Exception e) { - throw new IOException(e); - } + @Override public DataChecksum.Type getDataChecksumType() { + return DataChecksum.Type.CRC32C; } }; private final byte code; - protected Log LOG; - /** initializes the relevant checksum class object */ - abstract void initialize(); + public static ChecksumType getDefaultChecksumType() { + return ChecksumType.CRC32C; + } /** returns the name of this checksum type */ public abstract String getName(); + /** Function to get corresponding {@link org.apache.hadoop.util.DataChecksum.Type}. */ + public abstract DataChecksum.Type getDataChecksumType(); + private ChecksumType(final byte c) { this.code = c; - initialize(); } - /** returns a object that can be used to generate/validate checksums */ - public abstract Checksum getChecksumObject() throws IOException; - public byte getCode() { return this.code; } diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml index 2d05e96f1ea..ba8e99719ea 100644 --- a/hbase-common/src/main/resources/hbase-default.xml +++ b/hbase-common/src/main/resources/hbase-default.xml @@ -1201,7 +1201,7 @@ possible configurations would overwhelm and obscure the important. hbase.hstore.checksum.algorithm - CRC32 + CRC32C Name of an algorithm that is used to compute checksums. Possible values are NULL, CRC32, CRC32C. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java index 0e03a423fc7..61862eb3192 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java @@ -20,19 +20,21 @@ package org.apache.hadoop.hbase.io.hfile; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.ByteBuffer; -import java.util.zip.Checksum; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.util.ByteBufferUtils; -import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ChecksumType; +import org.apache.hadoop.util.DataChecksum; /** * Utility methods to compute and validate checksums. */ @InterfaceAudience.Private public class ChecksumUtil { + public static final Log LOG = LogFactory.getLog(ChecksumUtil.class); /** This is used to reserve space in a byte buffer */ private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE]; @@ -60,33 +62,20 @@ public class ChecksumUtil { * @param checksumType type of checksum * @param bytesPerChecksum number of bytes per checksum value */ - static void generateChecksums(byte[] indata, - int startOffset, int endOffset, - byte[] outdata, int outOffset, - ChecksumType checksumType, + static void generateChecksums(byte[] indata, int startOffset, int endOffset, + byte[] outdata, int outOffset, ChecksumType checksumType, int bytesPerChecksum) throws IOException { if (checksumType == ChecksumType.NULL) { - return; // No checkums for this block. + return; // No checksum for this block. } - Checksum checksum = checksumType.getChecksumObject(); - int bytesLeft = endOffset - startOffset; - int chunkNum = 0; + DataChecksum checksum = DataChecksum.newDataChecksum( + checksumType.getDataChecksumType(), bytesPerChecksum); - while (bytesLeft > 0) { - // generate the checksum for one chunk - checksum.reset(); - int count = Math.min(bytesLeft, bytesPerChecksum); - checksum.update(indata, startOffset, count); - - // write the checksum value to the output buffer. - int cksumValue = (int)checksum.getValue(); - outOffset = Bytes.putInt(outdata, outOffset, cksumValue); - chunkNum++; - startOffset += count; - bytesLeft -= count; - } + checksum.calculateChunkedSums( + ByteBuffer.wrap(indata, startOffset, endOffset - startOffset), + ByteBuffer.wrap(outdata, outOffset, outdata.length - outOffset)); } /** @@ -98,7 +87,7 @@ public class ChecksumUtil { * The header is extracted from the specified HFileBlock while the * data-to-be-verified is extracted from 'data'. */ - static boolean validateBlockChecksum(Path path, HFileBlock block, + static boolean validateBlockChecksum(Path path, HFileBlock block, byte[] data, int hdrSize) throws IOException { // If this is an older version of the block that does not have @@ -117,65 +106,32 @@ public class ChecksumUtil { // always return true. ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType()); if (cktype == ChecksumType.NULL) { - return true; // No checkums validations needed for this block. + return true; // No checksum validations needed for this block. } - Checksum checksumObject = cktype.getChecksumObject(); - checksumObject.reset(); // read in the stored value of the checksum size from the header. int bytesPerChecksum = block.getBytesPerChecksum(); - // bytesPerChecksum is always larger than the size of the header - if (bytesPerChecksum < hdrSize) { - String msg = "Unsupported value of bytesPerChecksum. " + - " Minimum is " + hdrSize + - " but the configured value is " + bytesPerChecksum; - HFile.LOG.warn(msg); - return false; // cannot happen case, unable to verify checksum + DataChecksum dataChecksum = DataChecksum.newDataChecksum( + cktype.getDataChecksumType(), bytesPerChecksum); + assert dataChecksum != null; + int sizeWithHeader = block.getOnDiskDataSizeWithHeader(); + if (LOG.isTraceEnabled()) { + LOG.info("length of data = " + data.length + + " OnDiskDataSizeWithHeader = " + sizeWithHeader + + " checksum type = " + cktype.getName() + + " file =" + path.toString() + + " header size = " + hdrSize + + " bytesPerChecksum = " + bytesPerChecksum); } - // Extract the header and compute checksum for the header. - ByteBuffer hdr = block.getBufferWithHeader(); - if (hdr.hasArray()) { - checksumObject.update(hdr.array(), hdr.arrayOffset(), hdrSize); - } else { - checksumObject.update(ByteBufferUtils.toBytes(hdr, 0, hdrSize), 0, hdrSize); + try { + dataChecksum.verifyChunkedSums(ByteBuffer.wrap(data, 0, sizeWithHeader), + ByteBuffer.wrap(data, sizeWithHeader, data.length - sizeWithHeader), + path.toString(), 0); + } catch (ChecksumException e) { + return false; } - - int off = hdrSize; - int consumed = hdrSize; - int bytesLeft = block.getOnDiskDataSizeWithHeader() - off; - int cksumOffset = block.getOnDiskDataSizeWithHeader(); - - // validate each chunk - while (bytesLeft > 0) { - int thisChunkSize = bytesPerChecksum - consumed; - int count = Math.min(bytesLeft, thisChunkSize); - checksumObject.update(data, off, count); - - int storedChecksum = Bytes.toInt(data, cksumOffset); - if (storedChecksum != (int)checksumObject.getValue()) { - String msg = "File " + path + - " Stored checksum value of " + storedChecksum + - " at offset " + cksumOffset + - " does not match computed checksum " + - checksumObject.getValue() + - ", total data size " + data.length + - " Checksum data range offset " + off + " len " + count + - HFileBlock.toStringHeader(block.getBufferReadOnly()); - HFile.LOG.warn(msg); - if (generateExceptions) { - throw new IOException(msg); // this is only for unit tests - } else { - return false; // checksum validation failure - } - } - cksumOffset += HFileBlock.CHECKSUM_SIZE; - bytesLeft -= count; - off += count; - consumed = 0; - checksumObject.reset(); - } - return true; // checksum is valid + return true; // checksum is valid } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java index 08fc9e43be0..fbc614a8d65 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java @@ -62,7 +62,6 @@ import org.apache.hadoop.hbase.protobuf.generated.HFileProtos; import org.apache.hadoop.hbase.util.BloomFilterWriter; import org.apache.hadoop.hbase.util.ByteStringer; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.ChecksumType; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.io.Writable; @@ -178,9 +177,6 @@ public class HFile { * The number of bytes per checksum. */ public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024; - // TODO: This define is done in three places. Fix. - public static final ChecksumType DEFAULT_CHECKSUM_TYPE = ChecksumType.CRC32; - // For measuring number of checksum failures static final AtomicLong checksumFailures = new AtomicLong(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index cf93d596134..98dab42dc9d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -432,7 +432,7 @@ public class HStore implements Store { public static ChecksumType getChecksumType(Configuration conf) { String checksumName = conf.get(HConstants.CHECKSUM_TYPE_NAME); if (checksumName == null) { - return HFile.DEFAULT_CHECKSUM_TYPE; + return ChecksumType.getDefaultChecksumType(); } else { return ChecksumType.nameToType(checksumName); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java index 011ddbf74df..9f19251405b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java @@ -22,12 +22,18 @@ package org.apache.hadoop.hbase.io.hfile; import static org.apache.hadoop.hbase.io.compress.Compression.Algorithm.GZ; import static org.apache.hadoop.hbase.io.compress.Compression.Algorithm.NONE; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; +import java.nio.BufferUnderflowException; import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -67,6 +73,73 @@ public class TestChecksum { hfs = (HFileSystem)fs; } + @Test + public void testNewBlocksHaveDefaultChecksum() throws IOException { + Path path = new Path(TEST_UTIL.getDataTestDir(), "default_checksum"); + FSDataOutputStream os = fs.create(path); + HFileContext meta = new HFileContextBuilder().build(); + HFileBlock.Writer hbw = new HFileBlock.Writer(null, meta); + DataOutputStream dos = hbw.startWriting(BlockType.DATA); + for (int i = 0; i < 1000; ++i) + dos.writeInt(i); + hbw.writeHeaderAndData(os); + int totalSize = hbw.getOnDiskSizeWithHeader(); + os.close(); + + // Use hbase checksums. + assertEquals(true, hfs.useHBaseChecksum()); + + FSDataInputStreamWrapper is = new FSDataInputStreamWrapper(fs, path); + meta = new HFileContextBuilder().withHBaseCheckSum(true).build(); + HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl( + is, totalSize, (HFileSystem) fs, path, meta); + HFileBlock b = hbr.readBlockData(0, -1, -1, false); + assertEquals(b.getChecksumType(), ChecksumType.getDefaultChecksumType().getCode()); + } + + /** + * Test all checksum types by writing and reading back blocks. + */ + @Test + public void testAllChecksumTypes() throws IOException { + List cktypes = new ArrayList<>(Arrays.asList(ChecksumType.values())); + for (Iterator itr = cktypes.iterator(); itr.hasNext(); ) { + ChecksumType cktype = itr.next(); + Path path = new Path(TEST_UTIL.getDataTestDir(), "checksum" + cktype.getName()); + FSDataOutputStream os = fs.create(path); + HFileContext meta = new HFileContextBuilder() + .withChecksumType(cktype).build(); + HFileBlock.Writer hbw = new HFileBlock.Writer(null, meta); + DataOutputStream dos = hbw.startWriting(BlockType.DATA); + for (int i = 0; i < 1000; ++i) + dos.writeInt(i); + hbw.writeHeaderAndData(os); + int totalSize = hbw.getOnDiskSizeWithHeader(); + os.close(); + + // Use hbase checksums. + assertEquals(true, hfs.useHBaseChecksum()); + + FSDataInputStreamWrapper is = new FSDataInputStreamWrapper(fs, path); + meta = new HFileContextBuilder().withHBaseCheckSum(true).build(); + HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl( + is, totalSize, (HFileSystem) fs, path, meta); + HFileBlock b = hbr.readBlockData(0, -1, -1, false); + ByteBuffer data = b.getBufferWithoutHeader(); + for (int i = 0; i < 1000; i++) { + assertEquals(i, data.getInt()); + } + boolean exception_thrown = false; + try { + data.getInt(); + } catch (BufferUnderflowException e) { + exception_thrown = true; + } + assertTrue(exception_thrown); + assertEquals(0, HFile.getChecksumFailuresCount()); + } + } + /** * Introduce checksum failures and check that we can still read * the data @@ -255,16 +328,6 @@ public class TestChecksum { } } - /** - * Test to ensure that these is at least one valid checksum implementation - */ - @Test - public void testChecksumAlgorithm() throws IOException { - ChecksumType type = ChecksumType.CRC32; - assertEquals(ChecksumType.nameToType(type.getName()), type); - assertEquals(ChecksumType.valueOf(type.toString()), type); - } - private void validateData(DataInputStream in) throws IOException { // validate data for (int i = 0; i < 1234; i++) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java index 766ddf9c519..981044870dd 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java @@ -248,7 +248,8 @@ public class TestHFileBlock { final String correctTestBlockStr = "DATABLK*\\x00\\x00\\x00>\\x00\\x00\\x0F\\xA0\\xFF\\xFF\\xFF\\xFF" + "\\xFF\\xFF\\xFF\\xFF" - + "\\x01\\x00\\x00@\\x00\\x00\\x00\\x00[" + + "\\x0" + ChecksumType.getDefaultChecksumType().getCode() + + "\\x00\\x00@\\x00\\x00\\x00\\x00[" // gzip-compressed block: http://www.gzip.org/zlib/rfc-gzip.html + "\\x1F\\x8B" // gzip magic signature + "\\x08" // Compression method: 8 = "deflate" diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java index ab8a570548e..d1aabfb70d2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java @@ -73,7 +73,7 @@ public class TestStoreFile extends HBaseTestCase { private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); private CacheConfig cacheConf = new CacheConfig(TEST_UTIL.getConfiguration()); private static String ROOT_DIR = TEST_UTIL.getDataTestDir("TestStoreFile").toString(); - private static final ChecksumType CKTYPE = ChecksumType.CRC32; + private static final ChecksumType CKTYPE = ChecksumType.CRC32C; private static final int CKBYTES = 512; private static String TEST_FAMILY = "cf";