HADOOP-8648. libhadoop: native CRC32 validation crashes when io.bytes.per.checksum=1. Contributed by Colin Patrick McCabe
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1381423 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1c71954df8
commit
bcf79c727e
|
@ -254,6 +254,9 @@ Release 2.0.1-alpha - UNRELEASED
|
||||||
|
|
||||||
HADOOP-8764. CMake: HADOOP-8737 broke ARM build. (Trevor Robinson via eli)
|
HADOOP-8764. CMake: HADOOP-8737 broke ARM build. (Trevor Robinson via eli)
|
||||||
|
|
||||||
|
HADOOP-8648. libhadoop: native CRC32 validation crashes when
|
||||||
|
io.bytes.per.checksum=1. (Colin Patrick McCabe via eli)
|
||||||
|
|
||||||
BREAKDOWN OF HDFS-3042 SUBTASKS
|
BREAKDOWN OF HDFS-3042 SUBTASKS
|
||||||
|
|
||||||
HADOOP-8220. ZKFailoverController doesn't handle failure to become active
|
HADOOP-8220. ZKFailoverController doesn't handle failure to become active
|
||||||
|
|
|
@ -525,6 +525,20 @@
|
||||||
</target>
|
</target>
|
||||||
</configuration>
|
</configuration>
|
||||||
</execution>
|
</execution>
|
||||||
|
<execution>
|
||||||
|
<id>native_tests</id>
|
||||||
|
<phase>test</phase>
|
||||||
|
<goals><goal>run</goal></goals>
|
||||||
|
<configuration>
|
||||||
|
<target>
|
||||||
|
<exec executable="sh" failonerror="true" dir="${project.build.directory}/native">
|
||||||
|
<arg value="-c"/>
|
||||||
|
<arg value="[ x$SKIPTESTS = xtrue ] || ${project.build.directory}/native/test_bulk_crc32"/>
|
||||||
|
<env key="SKIPTESTS" value="${skipTests}"/>
|
||||||
|
</exec>
|
||||||
|
</target>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
</executions>
|
</executions>
|
||||||
</plugin>
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
|
|
|
@ -60,6 +60,7 @@ find_package(ZLIB REQUIRED)
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -Wall -O2")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -Wall -O2")
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_REENTRANT -D_FILE_OFFSET_BITS=64")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_REENTRANT -D_FILE_OFFSET_BITS=64")
|
||||||
set(D main/native/src/org/apache/hadoop)
|
set(D main/native/src/org/apache/hadoop)
|
||||||
|
set(T main/native/src/test/org/apache/hadoop)
|
||||||
|
|
||||||
GET_FILENAME_COMPONENT(HADOOP_ZLIB_LIBRARY ${ZLIB_LIBRARIES} NAME)
|
GET_FILENAME_COMPONENT(HADOOP_ZLIB_LIBRARY ${ZLIB_LIBRARIES} NAME)
|
||||||
|
|
||||||
|
@ -98,9 +99,16 @@ include_directories(
|
||||||
${JNI_INCLUDE_DIRS}
|
${JNI_INCLUDE_DIRS}
|
||||||
${ZLIB_INCLUDE_DIRS}
|
${ZLIB_INCLUDE_DIRS}
|
||||||
${SNAPPY_INCLUDE_DIR}
|
${SNAPPY_INCLUDE_DIR}
|
||||||
|
${D}/util
|
||||||
)
|
)
|
||||||
CONFIGURE_FILE(${CMAKE_SOURCE_DIR}/config.h.cmake ${CMAKE_BINARY_DIR}/config.h)
|
CONFIGURE_FILE(${CMAKE_SOURCE_DIR}/config.h.cmake ${CMAKE_BINARY_DIR}/config.h)
|
||||||
|
|
||||||
|
add_executable(test_bulk_crc32
|
||||||
|
${D}/util/bulk_crc32.c
|
||||||
|
${T}/util/test_bulk_crc32.c
|
||||||
|
)
|
||||||
|
set_property(SOURCE main.cpp PROPERTY INCLUDE_DIRECTORIES "\"-Werror\" \"-Wall\"")
|
||||||
|
|
||||||
add_dual_library(hadoop
|
add_dual_library(hadoop
|
||||||
${D}/io/compress/lz4/Lz4Compressor.c
|
${D}/io/compress/lz4/Lz4Compressor.c
|
||||||
${D}/io/compress/lz4/Lz4Decompressor.c
|
${D}/io/compress/lz4/Lz4Decompressor.c
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
*/
|
*/
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <arpa/inet.h>
|
#include <arpa/inet.h>
|
||||||
|
#include <errno.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
|
@ -33,9 +34,10 @@
|
||||||
|
|
||||||
#define USE_PIPELINED
|
#define USE_PIPELINED
|
||||||
|
|
||||||
|
#define CRC_INITIAL_VAL 0xffffffff
|
||||||
|
|
||||||
typedef uint32_t (*crc_update_func_t)(uint32_t, const uint8_t *, size_t);
|
typedef uint32_t (*crc_update_func_t)(uint32_t, const uint8_t *, size_t);
|
||||||
static uint32_t crc_init();
|
static inline uint32_t crc_val(uint32_t crc);
|
||||||
static uint32_t crc_val(uint32_t crc);
|
|
||||||
static uint32_t crc32_zlib_sb8(uint32_t crc, const uint8_t *buf, size_t length);
|
static uint32_t crc32_zlib_sb8(uint32_t crc, const uint8_t *buf, size_t length);
|
||||||
static uint32_t crc32c_sb8(uint32_t crc, const uint8_t *buf, size_t length);
|
static uint32_t crc32c_sb8(uint32_t crc, const uint8_t *buf, size_t length);
|
||||||
|
|
||||||
|
@ -45,6 +47,35 @@ static void pipelined_crc32c(uint32_t *crc1, uint32_t *crc2, uint32_t *crc3, con
|
||||||
static int cached_cpu_supports_crc32; // initialized by constructor below
|
static int cached_cpu_supports_crc32; // initialized by constructor below
|
||||||
static uint32_t crc32c_hardware(uint32_t crc, const uint8_t* data, size_t length);
|
static uint32_t crc32c_hardware(uint32_t crc, const uint8_t* data, size_t length);
|
||||||
|
|
||||||
|
int bulk_calculate_crc(const uint8_t *data, size_t data_len,
|
||||||
|
uint32_t *sums, int checksum_type,
|
||||||
|
int bytes_per_checksum) {
|
||||||
|
uint32_t crc;
|
||||||
|
crc_update_func_t crc_update_func;
|
||||||
|
|
||||||
|
switch (checksum_type) {
|
||||||
|
case CRC32_ZLIB_POLYNOMIAL:
|
||||||
|
crc_update_func = crc32_zlib_sb8;
|
||||||
|
break;
|
||||||
|
case CRC32C_POLYNOMIAL:
|
||||||
|
crc_update_func = crc32c_sb8;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return -EINVAL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
while (likely(data_len > 0)) {
|
||||||
|
int len = likely(data_len >= bytes_per_checksum) ? bytes_per_checksum : data_len;
|
||||||
|
crc = CRC_INITIAL_VAL;
|
||||||
|
crc = crc_update_func(crc, data, len);
|
||||||
|
*sums = ntohl(crc_val(crc));
|
||||||
|
data += len;
|
||||||
|
data_len -= len;
|
||||||
|
sums++;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int bulk_verify_crc(const uint8_t *data, size_t data_len,
|
int bulk_verify_crc(const uint8_t *data, size_t data_len,
|
||||||
const uint32_t *sums, int checksum_type,
|
const uint32_t *sums, int checksum_type,
|
||||||
int bytes_per_checksum,
|
int bytes_per_checksum,
|
||||||
|
@ -80,7 +111,7 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len,
|
||||||
if (do_pipelined) {
|
if (do_pipelined) {
|
||||||
/* Process three blocks at a time */
|
/* Process three blocks at a time */
|
||||||
while (likely(n_blocks >= 3)) {
|
while (likely(n_blocks >= 3)) {
|
||||||
crc1 = crc2 = crc3 = crc_init();
|
crc1 = crc2 = crc3 = CRC_INITIAL_VAL;
|
||||||
pipelined_crc32c(&crc1, &crc2, &crc3, data, bytes_per_checksum, 3);
|
pipelined_crc32c(&crc1, &crc2, &crc3, data, bytes_per_checksum, 3);
|
||||||
|
|
||||||
crc = ntohl(crc_val(crc1));
|
crc = ntohl(crc_val(crc1));
|
||||||
|
@ -101,7 +132,7 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len,
|
||||||
|
|
||||||
/* One or two blocks */
|
/* One or two blocks */
|
||||||
if (n_blocks) {
|
if (n_blocks) {
|
||||||
crc1 = crc2 = crc_init();
|
crc1 = crc2 = crc3 = CRC_INITIAL_VAL;
|
||||||
pipelined_crc32c(&crc1, &crc2, &crc3, data, bytes_per_checksum, n_blocks);
|
pipelined_crc32c(&crc1, &crc2, &crc3, data, bytes_per_checksum, n_blocks);
|
||||||
|
|
||||||
if ((crc = ntohl(crc_val(crc1))) != *sums)
|
if ((crc = ntohl(crc_val(crc1))) != *sums)
|
||||||
|
@ -118,7 +149,7 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len,
|
||||||
|
|
||||||
/* For something smaller than a block */
|
/* For something smaller than a block */
|
||||||
if (remainder) {
|
if (remainder) {
|
||||||
crc1 = crc_init();
|
crc1 = crc2 = crc3 = CRC_INITIAL_VAL;
|
||||||
pipelined_crc32c(&crc1, &crc2, &crc3, data, remainder, 1);
|
pipelined_crc32c(&crc1, &crc2, &crc3, data, remainder, 1);
|
||||||
|
|
||||||
if ((crc = ntohl(crc_val(crc1))) != *sums)
|
if ((crc = ntohl(crc_val(crc1))) != *sums)
|
||||||
|
@ -130,7 +161,7 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len,
|
||||||
|
|
||||||
while (likely(data_len > 0)) {
|
while (likely(data_len > 0)) {
|
||||||
int len = likely(data_len >= bytes_per_checksum) ? bytes_per_checksum : data_len;
|
int len = likely(data_len >= bytes_per_checksum) ? bytes_per_checksum : data_len;
|
||||||
crc = crc_init();
|
crc = CRC_INITIAL_VAL;
|
||||||
crc = crc_update_func(crc, data, len);
|
crc = crc_update_func(crc, data, len);
|
||||||
crc = ntohl(crc_val(crc));
|
crc = ntohl(crc_val(crc));
|
||||||
if (unlikely(crc != *sums)) {
|
if (unlikely(crc != *sums)) {
|
||||||
|
@ -151,18 +182,10 @@ return_crc_error:
|
||||||
return INVALID_CHECKSUM_DETECTED;
|
return INVALID_CHECKSUM_DETECTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize a CRC
|
|
||||||
*/
|
|
||||||
static uint32_t crc_init() {
|
|
||||||
return 0xffffffff;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract the final result of a CRC
|
* Extract the final result of a CRC
|
||||||
*/
|
*/
|
||||||
static uint32_t crc_val(uint32_t crc) {
|
static inline uint32_t crc_val(uint32_t crc) {
|
||||||
return ~crc;
|
return ~crc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -398,7 +421,7 @@ static void pipelined_crc32c(uint32_t *crc1, uint32_t *crc2, uint32_t *crc3, con
|
||||||
counter--;
|
counter--;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Take care of the remainder. They are only up to three bytes,
|
/* Take care of the remainder. They are only up to seven bytes,
|
||||||
* so performing byte-level crc32 won't take much time.
|
* so performing byte-level crc32 won't take much time.
|
||||||
*/
|
*/
|
||||||
bdata = (uint8_t*)data;
|
bdata = (uint8_t*)data;
|
||||||
|
@ -433,7 +456,7 @@ static void pipelined_crc32c(uint32_t *crc1, uint32_t *crc2, uint32_t *crc3, con
|
||||||
"crc32b (%5), %0;\n\t"
|
"crc32b (%5), %0;\n\t"
|
||||||
"crc32b (%5,%4,1), %1;\n\t"
|
"crc32b (%5,%4,1), %1;\n\t"
|
||||||
: "=r"(c1), "=r"(c2)
|
: "=r"(c1), "=r"(c2)
|
||||||
: "r"(c1), "r"(c2), "r"(c3), "r"(block_size), "r"(bdata)
|
: "r"(c1), "r"(c2), "r"(block_size), "r"(bdata)
|
||||||
);
|
);
|
||||||
bdata++;
|
bdata++;
|
||||||
remainder--;
|
remainder--;
|
||||||
|
@ -593,7 +616,7 @@ static void pipelined_crc32c(uint32_t *crc1, uint32_t *crc2, uint32_t *crc3, con
|
||||||
"crc32b (%5), %0;\n\t"
|
"crc32b (%5), %0;\n\t"
|
||||||
"crc32b (%5,%4,1), %1;\n\t"
|
"crc32b (%5,%4,1), %1;\n\t"
|
||||||
: "=r"(c1), "=r"(c2)
|
: "=r"(c1), "=r"(c2)
|
||||||
: "r"(c1), "r"(c2), "r"(c3), "r"(block_size), "r"(bdata)
|
: "r"(c1), "r"(c2), "r"(block_size), "r"(bdata)
|
||||||
);
|
);
|
||||||
bdata++;
|
bdata++;
|
||||||
remainder--;
|
remainder--;
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
#define BULK_CRC32_H_INCLUDED
|
#define BULK_CRC32_H_INCLUDED
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <unistd.h> /* for size_t */
|
||||||
|
|
||||||
// Constants for different CRC algorithms
|
// Constants for different CRC algorithms
|
||||||
#define CRC32C_POLYNOMIAL 1
|
#define CRC32C_POLYNOMIAL 1
|
||||||
|
@ -42,11 +43,17 @@ typedef struct crc32_error {
|
||||||
* of bytes_per_checksum bytes. The checksums are each 32 bits
|
* of bytes_per_checksum bytes. The checksums are each 32 bits
|
||||||
* and are stored in sequential indexes of the 'sums' array.
|
* and are stored in sequential indexes of the 'sums' array.
|
||||||
*
|
*
|
||||||
* checksum_type - one of the CRC32 constants defined above
|
* @param data The data to checksum
|
||||||
* error_info - if non-NULL, will be filled in if an error
|
* @param dataLen Length of the data buffer
|
||||||
|
* @param sums (out param) buffer to write checksums into.
|
||||||
|
* It must contain at least dataLen * 4 bytes.
|
||||||
|
* @param checksum_type One of the CRC32 algorithm constants defined
|
||||||
|
* above
|
||||||
|
* @param bytes_per_checksum How many bytes of data to process per checksum.
|
||||||
|
* @param error_info If non-NULL, will be filled in if an error
|
||||||
* is detected
|
* is detected
|
||||||
*
|
*
|
||||||
* Returns: 0 for success, non-zero for an error, result codes
|
* @return 0 for success, non-zero for an error, result codes
|
||||||
* for which are defined above
|
* for which are defined above
|
||||||
*/
|
*/
|
||||||
extern int bulk_verify_crc(const uint8_t *data, size_t data_len,
|
extern int bulk_verify_crc(const uint8_t *data, size_t data_len,
|
||||||
|
@ -54,4 +61,27 @@ extern int bulk_verify_crc(const uint8_t *data, size_t data_len,
|
||||||
int bytes_per_checksum,
|
int bytes_per_checksum,
|
||||||
crc32_error_t *error_info);
|
crc32_error_t *error_info);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate checksums for some data.
|
||||||
|
*
|
||||||
|
* The checksums are each 32 bits and are stored in sequential indexes of the
|
||||||
|
* 'sums' array.
|
||||||
|
*
|
||||||
|
* This function is not (yet) optimized. It is provided for testing purposes
|
||||||
|
* only.
|
||||||
|
*
|
||||||
|
* @param data The data to checksum
|
||||||
|
* @param dataLen Length of the data buffer
|
||||||
|
* @param sums (out param) buffer to write checksums into.
|
||||||
|
* It must contain at least dataLen * 4 bytes.
|
||||||
|
* @param checksum_type One of the CRC32 algorithm constants defined
|
||||||
|
* above
|
||||||
|
* @param bytesPerChecksum How many bytes of data to process per checksum.
|
||||||
|
*
|
||||||
|
* @return 0 for success, non-zero for an error
|
||||||
|
*/
|
||||||
|
int bulk_calculate_crc(const uint8_t *data, size_t data_len,
|
||||||
|
uint32_t *sums, int checksum_type,
|
||||||
|
int bytes_per_checksum);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -0,0 +1,77 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "bulk_crc32.h"
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#define EXPECT_ZERO(x) \
|
||||||
|
do { \
|
||||||
|
int __my_ret__ = x; \
|
||||||
|
if (__my_ret__) { \
|
||||||
|
fprintf(stderr, "TEST_ERROR: failed on line %d with return " \
|
||||||
|
"code %d: got nonzero from %s\n", __LINE__, __my_ret__, #x); \
|
||||||
|
return __my_ret__; \
|
||||||
|
} \
|
||||||
|
} while (0);
|
||||||
|
|
||||||
|
static int testBulkVerifyCrc(int dataLen, int crcType, int bytesPerChecksum)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
uint8_t *data;
|
||||||
|
uint32_t *sums;
|
||||||
|
crc32_error_t errorData;
|
||||||
|
|
||||||
|
data = malloc(dataLen);
|
||||||
|
for (i = 0; i < dataLen; i++) {
|
||||||
|
data[i] = (i % 16) + 1;
|
||||||
|
}
|
||||||
|
sums = calloc(sizeof(uint32_t),
|
||||||
|
(dataLen + bytesPerChecksum - 1) / bytesPerChecksum);
|
||||||
|
|
||||||
|
EXPECT_ZERO(bulk_calculate_crc(data, dataLen, sums, crcType,
|
||||||
|
bytesPerChecksum));
|
||||||
|
EXPECT_ZERO(bulk_verify_crc(data, dataLen, sums, crcType,
|
||||||
|
bytesPerChecksum, &errorData));
|
||||||
|
free(data);
|
||||||
|
free(sums);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
/* Test running bulk_calculate_crc with some different algorithms and
|
||||||
|
* bytePerChecksum values. */
|
||||||
|
EXPECT_ZERO(testBulkVerifyCrc(4096, CRC32C_POLYNOMIAL, 512));
|
||||||
|
EXPECT_ZERO(testBulkVerifyCrc(4096, CRC32_ZLIB_POLYNOMIAL, 512));
|
||||||
|
EXPECT_ZERO(testBulkVerifyCrc(256, CRC32C_POLYNOMIAL, 1));
|
||||||
|
EXPECT_ZERO(testBulkVerifyCrc(256, CRC32_ZLIB_POLYNOMIAL, 1));
|
||||||
|
EXPECT_ZERO(testBulkVerifyCrc(1, CRC32C_POLYNOMIAL, 1));
|
||||||
|
EXPECT_ZERO(testBulkVerifyCrc(1, CRC32_ZLIB_POLYNOMIAL, 1));
|
||||||
|
EXPECT_ZERO(testBulkVerifyCrc(2, CRC32C_POLYNOMIAL, 1));
|
||||||
|
EXPECT_ZERO(testBulkVerifyCrc(17, CRC32C_POLYNOMIAL, 1));
|
||||||
|
EXPECT_ZERO(testBulkVerifyCrc(17, CRC32C_POLYNOMIAL, 2));
|
||||||
|
EXPECT_ZERO(testBulkVerifyCrc(17, CRC32_ZLIB_POLYNOMIAL, 2));
|
||||||
|
EXPECT_ZERO(testBulkVerifyCrc(17, CRC32C_POLYNOMIAL, 4));
|
||||||
|
EXPECT_ZERO(testBulkVerifyCrc(17, CRC32_ZLIB_POLYNOMIAL, 4));
|
||||||
|
|
||||||
|
fprintf(stderr, "%s: SUCCESS.\n", argv[0]);
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
Loading…
Reference in New Issue