HADOOP-8648. libhadoop: native CRC32 validation crashes when io.bytes.per.checksum=1. Contributed by Colin Patrick McCabe

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1381423 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Eli Collins 2012-09-05 22:24:44 +00:00
parent 1c71954df8
commit bcf79c727e
6 changed files with 178 additions and 23 deletions

View File

@ -254,6 +254,9 @@ Release 2.0.1-alpha - UNRELEASED
HADOOP-8764. CMake: HADOOP-8737 broke ARM build. (Trevor Robinson via eli) HADOOP-8764. CMake: HADOOP-8737 broke ARM build. (Trevor Robinson via eli)
HADOOP-8648. libhadoop: native CRC32 validation crashes when
io.bytes.per.checksum=1. (Colin Patrick McCabe via eli)
BREAKDOWN OF HDFS-3042 SUBTASKS BREAKDOWN OF HDFS-3042 SUBTASKS
HADOOP-8220. ZKFailoverController doesn't handle failure to become active HADOOP-8220. ZKFailoverController doesn't handle failure to become active

View File

@ -525,6 +525,20 @@
</target> </target>
</configuration> </configuration>
</execution> </execution>
<execution>
<id>native_tests</id>
<phase>test</phase>
<goals><goal>run</goal></goals>
<configuration>
<target>
<exec executable="sh" failonerror="true" dir="${project.build.directory}/native">
<arg value="-c"/>
<arg value="[ x$SKIPTESTS = xtrue ] || ${project.build.directory}/native/test_bulk_crc32"/>
<env key="SKIPTESTS" value="${skipTests}"/>
</exec>
</target>
</configuration>
</execution>
</executions> </executions>
</plugin> </plugin>
</plugins> </plugins>

View File

@ -60,6 +60,7 @@ find_package(ZLIB REQUIRED)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -Wall -O2") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -Wall -O2")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_REENTRANT -D_FILE_OFFSET_BITS=64") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_REENTRANT -D_FILE_OFFSET_BITS=64")
set(D main/native/src/org/apache/hadoop) set(D main/native/src/org/apache/hadoop)
set(T main/native/src/test/org/apache/hadoop)
GET_FILENAME_COMPONENT(HADOOP_ZLIB_LIBRARY ${ZLIB_LIBRARIES} NAME) GET_FILENAME_COMPONENT(HADOOP_ZLIB_LIBRARY ${ZLIB_LIBRARIES} NAME)
@ -98,9 +99,16 @@ include_directories(
${JNI_INCLUDE_DIRS} ${JNI_INCLUDE_DIRS}
${ZLIB_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS}
${SNAPPY_INCLUDE_DIR} ${SNAPPY_INCLUDE_DIR}
${D}/util
) )
CONFIGURE_FILE(${CMAKE_SOURCE_DIR}/config.h.cmake ${CMAKE_BINARY_DIR}/config.h) CONFIGURE_FILE(${CMAKE_SOURCE_DIR}/config.h.cmake ${CMAKE_BINARY_DIR}/config.h)
add_executable(test_bulk_crc32
${D}/util/bulk_crc32.c
${T}/util/test_bulk_crc32.c
)
set_property(SOURCE main.cpp PROPERTY INCLUDE_DIRECTORIES "\"-Werror\" \"-Wall\"")
add_dual_library(hadoop add_dual_library(hadoop
${D}/io/compress/lz4/Lz4Compressor.c ${D}/io/compress/lz4/Lz4Compressor.c
${D}/io/compress/lz4/Lz4Decompressor.c ${D}/io/compress/lz4/Lz4Decompressor.c

View File

@ -23,6 +23,7 @@
*/ */
#include <assert.h> #include <assert.h>
#include <arpa/inet.h> #include <arpa/inet.h>
#include <errno.h>
#include <stdint.h> #include <stdint.h>
#include <unistd.h> #include <unistd.h>
@ -33,9 +34,10 @@
#define USE_PIPELINED #define USE_PIPELINED
#define CRC_INITIAL_VAL 0xffffffff
typedef uint32_t (*crc_update_func_t)(uint32_t, const uint8_t *, size_t); typedef uint32_t (*crc_update_func_t)(uint32_t, const uint8_t *, size_t);
static uint32_t crc_init(); static inline uint32_t crc_val(uint32_t crc);
static uint32_t crc_val(uint32_t crc);
static uint32_t crc32_zlib_sb8(uint32_t crc, const uint8_t *buf, size_t length); static uint32_t crc32_zlib_sb8(uint32_t crc, const uint8_t *buf, size_t length);
static uint32_t crc32c_sb8(uint32_t crc, const uint8_t *buf, size_t length); static uint32_t crc32c_sb8(uint32_t crc, const uint8_t *buf, size_t length);
@ -45,6 +47,35 @@ static void pipelined_crc32c(uint32_t *crc1, uint32_t *crc2, uint32_t *crc3, con
static int cached_cpu_supports_crc32; // initialized by constructor below static int cached_cpu_supports_crc32; // initialized by constructor below
static uint32_t crc32c_hardware(uint32_t crc, const uint8_t* data, size_t length); static uint32_t crc32c_hardware(uint32_t crc, const uint8_t* data, size_t length);
int bulk_calculate_crc(const uint8_t *data, size_t data_len,
uint32_t *sums, int checksum_type,
int bytes_per_checksum) {
uint32_t crc;
crc_update_func_t crc_update_func;
switch (checksum_type) {
case CRC32_ZLIB_POLYNOMIAL:
crc_update_func = crc32_zlib_sb8;
break;
case CRC32C_POLYNOMIAL:
crc_update_func = crc32c_sb8;
break;
default:
return -EINVAL;
break;
}
while (likely(data_len > 0)) {
int len = likely(data_len >= bytes_per_checksum) ? bytes_per_checksum : data_len;
crc = CRC_INITIAL_VAL;
crc = crc_update_func(crc, data, len);
*sums = ntohl(crc_val(crc));
data += len;
data_len -= len;
sums++;
}
return 0;
}
int bulk_verify_crc(const uint8_t *data, size_t data_len, int bulk_verify_crc(const uint8_t *data, size_t data_len,
const uint32_t *sums, int checksum_type, const uint32_t *sums, int checksum_type,
int bytes_per_checksum, int bytes_per_checksum,
@ -80,7 +111,7 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len,
if (do_pipelined) { if (do_pipelined) {
/* Process three blocks at a time */ /* Process three blocks at a time */
while (likely(n_blocks >= 3)) { while (likely(n_blocks >= 3)) {
crc1 = crc2 = crc3 = crc_init(); crc1 = crc2 = crc3 = CRC_INITIAL_VAL;
pipelined_crc32c(&crc1, &crc2, &crc3, data, bytes_per_checksum, 3); pipelined_crc32c(&crc1, &crc2, &crc3, data, bytes_per_checksum, 3);
crc = ntohl(crc_val(crc1)); crc = ntohl(crc_val(crc1));
@ -101,7 +132,7 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len,
/* One or two blocks */ /* One or two blocks */
if (n_blocks) { if (n_blocks) {
crc1 = crc2 = crc_init(); crc1 = crc2 = crc3 = CRC_INITIAL_VAL;
pipelined_crc32c(&crc1, &crc2, &crc3, data, bytes_per_checksum, n_blocks); pipelined_crc32c(&crc1, &crc2, &crc3, data, bytes_per_checksum, n_blocks);
if ((crc = ntohl(crc_val(crc1))) != *sums) if ((crc = ntohl(crc_val(crc1))) != *sums)
@ -118,7 +149,7 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len,
/* For something smaller than a block */ /* For something smaller than a block */
if (remainder) { if (remainder) {
crc1 = crc_init(); crc1 = crc2 = crc3 = CRC_INITIAL_VAL;
pipelined_crc32c(&crc1, &crc2, &crc3, data, remainder, 1); pipelined_crc32c(&crc1, &crc2, &crc3, data, remainder, 1);
if ((crc = ntohl(crc_val(crc1))) != *sums) if ((crc = ntohl(crc_val(crc1))) != *sums)
@ -130,7 +161,7 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len,
while (likely(data_len > 0)) { while (likely(data_len > 0)) {
int len = likely(data_len >= bytes_per_checksum) ? bytes_per_checksum : data_len; int len = likely(data_len >= bytes_per_checksum) ? bytes_per_checksum : data_len;
crc = crc_init(); crc = CRC_INITIAL_VAL;
crc = crc_update_func(crc, data, len); crc = crc_update_func(crc, data, len);
crc = ntohl(crc_val(crc)); crc = ntohl(crc_val(crc));
if (unlikely(crc != *sums)) { if (unlikely(crc != *sums)) {
@ -151,18 +182,10 @@ return_crc_error:
return INVALID_CHECKSUM_DETECTED; return INVALID_CHECKSUM_DETECTED;
} }
/**
* Initialize a CRC
*/
static uint32_t crc_init() {
return 0xffffffff;
}
/** /**
* Extract the final result of a CRC * Extract the final result of a CRC
*/ */
static uint32_t crc_val(uint32_t crc) { static inline uint32_t crc_val(uint32_t crc) {
return ~crc; return ~crc;
} }
@ -398,7 +421,7 @@ static void pipelined_crc32c(uint32_t *crc1, uint32_t *crc2, uint32_t *crc3, con
counter--; counter--;
} }
/* Take care of the remainder. They are only up to three bytes, /* Take care of the remainder. They are only up to seven bytes,
* so performing byte-level crc32 won't take much time. * so performing byte-level crc32 won't take much time.
*/ */
bdata = (uint8_t*)data; bdata = (uint8_t*)data;
@ -433,7 +456,7 @@ static void pipelined_crc32c(uint32_t *crc1, uint32_t *crc2, uint32_t *crc3, con
"crc32b (%5), %0;\n\t" "crc32b (%5), %0;\n\t"
"crc32b (%5,%4,1), %1;\n\t" "crc32b (%5,%4,1), %1;\n\t"
: "=r"(c1), "=r"(c2) : "=r"(c1), "=r"(c2)
: "r"(c1), "r"(c2), "r"(c3), "r"(block_size), "r"(bdata) : "r"(c1), "r"(c2), "r"(block_size), "r"(bdata)
); );
bdata++; bdata++;
remainder--; remainder--;
@ -593,7 +616,7 @@ static void pipelined_crc32c(uint32_t *crc1, uint32_t *crc2, uint32_t *crc3, con
"crc32b (%5), %0;\n\t" "crc32b (%5), %0;\n\t"
"crc32b (%5,%4,1), %1;\n\t" "crc32b (%5,%4,1), %1;\n\t"
: "=r"(c1), "=r"(c2) : "=r"(c1), "=r"(c2)
: "r"(c1), "r"(c2), "r"(c3), "r"(block_size), "r"(bdata) : "r"(c1), "r"(c2), "r"(block_size), "r"(bdata)
); );
bdata++; bdata++;
remainder--; remainder--;

View File

@ -19,6 +19,7 @@
#define BULK_CRC32_H_INCLUDED #define BULK_CRC32_H_INCLUDED
#include <stdint.h> #include <stdint.h>
#include <unistd.h> /* for size_t */
// Constants for different CRC algorithms // Constants for different CRC algorithms
#define CRC32C_POLYNOMIAL 1 #define CRC32C_POLYNOMIAL 1
@ -42,11 +43,17 @@ typedef struct crc32_error {
* of bytes_per_checksum bytes. The checksums are each 32 bits * of bytes_per_checksum bytes. The checksums are each 32 bits
* and are stored in sequential indexes of the 'sums' array. * and are stored in sequential indexes of the 'sums' array.
* *
* checksum_type - one of the CRC32 constants defined above * @param data The data to checksum
* error_info - if non-NULL, will be filled in if an error * @param dataLen Length of the data buffer
* @param sums (out param) buffer to write checksums into.
* It must contain at least dataLen * 4 bytes.
* @param checksum_type One of the CRC32 algorithm constants defined
* above
* @param bytes_per_checksum How many bytes of data to process per checksum.
* @param error_info If non-NULL, will be filled in if an error
* is detected * is detected
* *
* Returns: 0 for success, non-zero for an error, result codes * @return 0 for success, non-zero for an error, result codes
* for which are defined above * for which are defined above
*/ */
extern int bulk_verify_crc(const uint8_t *data, size_t data_len, extern int bulk_verify_crc(const uint8_t *data, size_t data_len,
@ -54,4 +61,27 @@ extern int bulk_verify_crc(const uint8_t *data, size_t data_len,
int bytes_per_checksum, int bytes_per_checksum,
crc32_error_t *error_info); crc32_error_t *error_info);
/**
* Calculate checksums for some data.
*
* The checksums are each 32 bits and are stored in sequential indexes of the
* 'sums' array.
*
* This function is not (yet) optimized. It is provided for testing purposes
* only.
*
* @param data The data to checksum
* @param dataLen Length of the data buffer
* @param sums (out param) buffer to write checksums into.
* It must contain at least dataLen * 4 bytes.
* @param checksum_type One of the CRC32 algorithm constants defined
* above
* @param bytesPerChecksum How many bytes of data to process per checksum.
*
* @return 0 for success, non-zero for an error
*/
int bulk_calculate_crc(const uint8_t *data, size_t data_len,
uint32_t *sums, int checksum_type,
int bytes_per_checksum);
#endif #endif

View File

@ -0,0 +1,77 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "bulk_crc32.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#define EXPECT_ZERO(x) \
do { \
int __my_ret__ = x; \
if (__my_ret__) { \
fprintf(stderr, "TEST_ERROR: failed on line %d with return " \
"code %d: got nonzero from %s\n", __LINE__, __my_ret__, #x); \
return __my_ret__; \
} \
} while (0);
static int testBulkVerifyCrc(int dataLen, int crcType, int bytesPerChecksum)
{
int i;
uint8_t *data;
uint32_t *sums;
crc32_error_t errorData;
data = malloc(dataLen);
for (i = 0; i < dataLen; i++) {
data[i] = (i % 16) + 1;
}
sums = calloc(sizeof(uint32_t),
(dataLen + bytesPerChecksum - 1) / bytesPerChecksum);
EXPECT_ZERO(bulk_calculate_crc(data, dataLen, sums, crcType,
bytesPerChecksum));
EXPECT_ZERO(bulk_verify_crc(data, dataLen, sums, crcType,
bytesPerChecksum, &errorData));
free(data);
free(sums);
return 0;
}
int main(int argc, char **argv)
{
/* Test running bulk_calculate_crc with some different algorithms and
* bytePerChecksum values. */
EXPECT_ZERO(testBulkVerifyCrc(4096, CRC32C_POLYNOMIAL, 512));
EXPECT_ZERO(testBulkVerifyCrc(4096, CRC32_ZLIB_POLYNOMIAL, 512));
EXPECT_ZERO(testBulkVerifyCrc(256, CRC32C_POLYNOMIAL, 1));
EXPECT_ZERO(testBulkVerifyCrc(256, CRC32_ZLIB_POLYNOMIAL, 1));
EXPECT_ZERO(testBulkVerifyCrc(1, CRC32C_POLYNOMIAL, 1));
EXPECT_ZERO(testBulkVerifyCrc(1, CRC32_ZLIB_POLYNOMIAL, 1));
EXPECT_ZERO(testBulkVerifyCrc(2, CRC32C_POLYNOMIAL, 1));
EXPECT_ZERO(testBulkVerifyCrc(17, CRC32C_POLYNOMIAL, 1));
EXPECT_ZERO(testBulkVerifyCrc(17, CRC32C_POLYNOMIAL, 2));
EXPECT_ZERO(testBulkVerifyCrc(17, CRC32_ZLIB_POLYNOMIAL, 2));
EXPECT_ZERO(testBulkVerifyCrc(17, CRC32C_POLYNOMIAL, 4));
EXPECT_ZERO(testBulkVerifyCrc(17, CRC32_ZLIB_POLYNOMIAL, 4));
fprintf(stderr, "%s: SUCCESS.\n", argv[0]);
return EXIT_SUCCESS;
}