HADOOP-8648. libhadoop: native CRC32 validation crashes when io.bytes.per.checksum=1. Contributed by Colin Patrick McCabe

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1381419 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Eli Collins 2012-09-05 22:19:40 +00:00
parent 91e832e7b4
commit eebbeb5434
6 changed files with 178 additions and 23 deletions

View File

@ -467,6 +467,9 @@ Branch-2 ( Unreleased changes )
HADOOP-8770. NN should not RPC to self to find trash defaults. (eli)
HADOOP-8648. libhadoop: native CRC32 validation crashes when
io.bytes.per.checksum=1. (Colin Patrick McCabe via eli)
BREAKDOWN OF HDFS-3042 SUBTASKS
HADOOP-8220. ZKFailoverController doesn't handle failure to become active

View File

@ -535,6 +535,20 @@
</target>
</configuration>
</execution>
<execution>
<id>native_tests</id>
<phase>test</phase>
<goals><goal>run</goal></goals>
<configuration>
<target>
<exec executable="sh" failonerror="true" dir="${project.build.directory}/native">
<arg value="-c"/>
<arg value="[ x$SKIPTESTS = xtrue ] || ${project.build.directory}/native/test_bulk_crc32"/>
<env key="SKIPTESTS" value="${skipTests}"/>
</exec>
</target>
</configuration>
</execution>
</executions>
</plugin>
</plugins>

View File

@ -60,6 +60,7 @@ find_package(ZLIB REQUIRED)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -Wall -O2")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_REENTRANT -D_FILE_OFFSET_BITS=64")
set(D main/native/src/org/apache/hadoop)
set(T main/native/src/test/org/apache/hadoop)
GET_FILENAME_COMPONENT(HADOOP_ZLIB_LIBRARY ${ZLIB_LIBRARIES} NAME)
@ -98,9 +99,16 @@ include_directories(
${JNI_INCLUDE_DIRS}
${ZLIB_INCLUDE_DIRS}
${SNAPPY_INCLUDE_DIR}
${D}/util
)
CONFIGURE_FILE(${CMAKE_SOURCE_DIR}/config.h.cmake ${CMAKE_BINARY_DIR}/config.h)
add_executable(test_bulk_crc32
${D}/util/bulk_crc32.c
${T}/util/test_bulk_crc32.c
)
set_property(SOURCE main.cpp PROPERTY INCLUDE_DIRECTORIES "\"-Werror\" \"-Wall\"")
add_dual_library(hadoop
${D}/io/compress/lz4/Lz4Compressor.c
${D}/io/compress/lz4/Lz4Decompressor.c

View File

@ -23,6 +23,7 @@
*/
#include <assert.h>
#include <arpa/inet.h>
#include <errno.h>
#include <stdint.h>
#include <unistd.h>
@ -33,9 +34,10 @@
#define USE_PIPELINED
#define CRC_INITIAL_VAL 0xffffffff
typedef uint32_t (*crc_update_func_t)(uint32_t, const uint8_t *, size_t);
static uint32_t crc_init();
static uint32_t crc_val(uint32_t crc);
static inline uint32_t crc_val(uint32_t crc);
static uint32_t crc32_zlib_sb8(uint32_t crc, const uint8_t *buf, size_t length);
static uint32_t crc32c_sb8(uint32_t crc, const uint8_t *buf, size_t length);
@ -45,6 +47,35 @@ static void pipelined_crc32c(uint32_t *crc1, uint32_t *crc2, uint32_t *crc3, con
static int cached_cpu_supports_crc32; // initialized by constructor below
static uint32_t crc32c_hardware(uint32_t crc, const uint8_t* data, size_t length);
int bulk_calculate_crc(const uint8_t *data, size_t data_len,
uint32_t *sums, int checksum_type,
int bytes_per_checksum) {
uint32_t crc;
crc_update_func_t crc_update_func;
switch (checksum_type) {
case CRC32_ZLIB_POLYNOMIAL:
crc_update_func = crc32_zlib_sb8;
break;
case CRC32C_POLYNOMIAL:
crc_update_func = crc32c_sb8;
break;
default:
return -EINVAL;
break;
}
while (likely(data_len > 0)) {
int len = likely(data_len >= bytes_per_checksum) ? bytes_per_checksum : data_len;
crc = CRC_INITIAL_VAL;
crc = crc_update_func(crc, data, len);
*sums = ntohl(crc_val(crc));
data += len;
data_len -= len;
sums++;
}
return 0;
}
int bulk_verify_crc(const uint8_t *data, size_t data_len,
const uint32_t *sums, int checksum_type,
int bytes_per_checksum,
@ -80,7 +111,7 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len,
if (do_pipelined) {
/* Process three blocks at a time */
while (likely(n_blocks >= 3)) {
crc1 = crc2 = crc3 = crc_init();
crc1 = crc2 = crc3 = CRC_INITIAL_VAL;
pipelined_crc32c(&crc1, &crc2, &crc3, data, bytes_per_checksum, 3);
crc = ntohl(crc_val(crc1));
@ -101,7 +132,7 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len,
/* One or two blocks */
if (n_blocks) {
crc1 = crc2 = crc_init();
crc1 = crc2 = crc3 = CRC_INITIAL_VAL;
pipelined_crc32c(&crc1, &crc2, &crc3, data, bytes_per_checksum, n_blocks);
if ((crc = ntohl(crc_val(crc1))) != *sums)
@ -118,7 +149,7 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len,
/* For something smaller than a block */
if (remainder) {
crc1 = crc_init();
crc1 = crc2 = crc3 = CRC_INITIAL_VAL;
pipelined_crc32c(&crc1, &crc2, &crc3, data, remainder, 1);
if ((crc = ntohl(crc_val(crc1))) != *sums)
@ -130,7 +161,7 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len,
while (likely(data_len > 0)) {
int len = likely(data_len >= bytes_per_checksum) ? bytes_per_checksum : data_len;
crc = crc_init();
crc = CRC_INITIAL_VAL;
crc = crc_update_func(crc, data, len);
crc = ntohl(crc_val(crc));
if (unlikely(crc != *sums)) {
@ -151,18 +182,10 @@ return_crc_error:
return INVALID_CHECKSUM_DETECTED;
}
/**
* Initialize a CRC
*/
static uint32_t crc_init() {
return 0xffffffff;
}
/**
* Extract the final result of a CRC
*/
static uint32_t crc_val(uint32_t crc) {
static inline uint32_t crc_val(uint32_t crc) {
return ~crc;
}
@ -398,7 +421,7 @@ static void pipelined_crc32c(uint32_t *crc1, uint32_t *crc2, uint32_t *crc3, con
counter--;
}
/* Take care of the remainder. They are only up to three bytes,
/* Take care of the remainder. They are only up to seven bytes,
* so performing byte-level crc32 won't take much time.
*/
bdata = (uint8_t*)data;
@ -433,7 +456,7 @@ static void pipelined_crc32c(uint32_t *crc1, uint32_t *crc2, uint32_t *crc3, con
"crc32b (%5), %0;\n\t"
"crc32b (%5,%4,1), %1;\n\t"
: "=r"(c1), "=r"(c2)
: "r"(c1), "r"(c2), "r"(c3), "r"(block_size), "r"(bdata)
: "r"(c1), "r"(c2), "r"(block_size), "r"(bdata)
);
bdata++;
remainder--;
@ -593,7 +616,7 @@ static void pipelined_crc32c(uint32_t *crc1, uint32_t *crc2, uint32_t *crc3, con
"crc32b (%5), %0;\n\t"
"crc32b (%5,%4,1), %1;\n\t"
: "=r"(c1), "=r"(c2)
: "r"(c1), "r"(c2), "r"(c3), "r"(block_size), "r"(bdata)
: "r"(c1), "r"(c2), "r"(block_size), "r"(bdata)
);
bdata++;
remainder--;

View File

@ -19,6 +19,7 @@
#define BULK_CRC32_H_INCLUDED
#include <stdint.h>
#include <unistd.h> /* for size_t */
// Constants for different CRC algorithms
#define CRC32C_POLYNOMIAL 1
@ -42,11 +43,17 @@ typedef struct crc32_error {
* of bytes_per_checksum bytes. The checksums are each 32 bits
* and are stored in sequential indexes of the 'sums' array.
*
* checksum_type - one of the CRC32 constants defined above
* error_info - if non-NULL, will be filled in if an error
* @param data The data to checksum
* @param dataLen Length of the data buffer
* @param sums (out param) buffer to write checksums into.
* It must contain at least dataLen * 4 bytes.
* @param checksum_type One of the CRC32 algorithm constants defined
* above
* @param bytes_per_checksum How many bytes of data to process per checksum.
* @param error_info If non-NULL, will be filled in if an error
* is detected
*
* Returns: 0 for success, non-zero for an error, result codes
* @return 0 for success, non-zero for an error, result codes
* for which are defined above
*/
extern int bulk_verify_crc(const uint8_t *data, size_t data_len,
@ -54,4 +61,27 @@ extern int bulk_verify_crc(const uint8_t *data, size_t data_len,
int bytes_per_checksum,
crc32_error_t *error_info);
/**
* Calculate checksums for some data.
*
* The checksums are each 32 bits and are stored in sequential indexes of the
* 'sums' array.
*
* This function is not (yet) optimized. It is provided for testing purposes
* only.
*
* @param data The data to checksum
* @param dataLen Length of the data buffer
* @param sums (out param) buffer to write checksums into.
* It must contain at least dataLen * 4 bytes.
* @param checksum_type One of the CRC32 algorithm constants defined
* above
* @param bytesPerChecksum How many bytes of data to process per checksum.
*
* @return 0 for success, non-zero for an error
*/
int bulk_calculate_crc(const uint8_t *data, size_t data_len,
uint32_t *sums, int checksum_type,
int bytes_per_checksum);
#endif

View File

@ -0,0 +1,77 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "bulk_crc32.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#define EXPECT_ZERO(x) \
do { \
int __my_ret__ = x; \
if (__my_ret__) { \
fprintf(stderr, "TEST_ERROR: failed on line %d with return " \
"code %d: got nonzero from %s\n", __LINE__, __my_ret__, #x); \
return __my_ret__; \
} \
} while (0);
static int testBulkVerifyCrc(int dataLen, int crcType, int bytesPerChecksum)
{
int i;
uint8_t *data;
uint32_t *sums;
crc32_error_t errorData;
data = malloc(dataLen);
for (i = 0; i < dataLen; i++) {
data[i] = (i % 16) + 1;
}
sums = calloc(sizeof(uint32_t),
(dataLen + bytesPerChecksum - 1) / bytesPerChecksum);
EXPECT_ZERO(bulk_calculate_crc(data, dataLen, sums, crcType,
bytesPerChecksum));
EXPECT_ZERO(bulk_verify_crc(data, dataLen, sums, crcType,
bytesPerChecksum, &errorData));
free(data);
free(sums);
return 0;
}
int main(int argc, char **argv)
{
/* Test running bulk_calculate_crc with some different algorithms and
* bytePerChecksum values. */
EXPECT_ZERO(testBulkVerifyCrc(4096, CRC32C_POLYNOMIAL, 512));
EXPECT_ZERO(testBulkVerifyCrc(4096, CRC32_ZLIB_POLYNOMIAL, 512));
EXPECT_ZERO(testBulkVerifyCrc(256, CRC32C_POLYNOMIAL, 1));
EXPECT_ZERO(testBulkVerifyCrc(256, CRC32_ZLIB_POLYNOMIAL, 1));
EXPECT_ZERO(testBulkVerifyCrc(1, CRC32C_POLYNOMIAL, 1));
EXPECT_ZERO(testBulkVerifyCrc(1, CRC32_ZLIB_POLYNOMIAL, 1));
EXPECT_ZERO(testBulkVerifyCrc(2, CRC32C_POLYNOMIAL, 1));
EXPECT_ZERO(testBulkVerifyCrc(17, CRC32C_POLYNOMIAL, 1));
EXPECT_ZERO(testBulkVerifyCrc(17, CRC32C_POLYNOMIAL, 2));
EXPECT_ZERO(testBulkVerifyCrc(17, CRC32_ZLIB_POLYNOMIAL, 2));
EXPECT_ZERO(testBulkVerifyCrc(17, CRC32C_POLYNOMIAL, 4));
EXPECT_ZERO(testBulkVerifyCrc(17, CRC32_ZLIB_POLYNOMIAL, 4));
fprintf(stderr, "%s: SUCCESS.\n", argv[0]);
return EXIT_SUCCESS;
}