diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 93ad985375a..15e5d59acbf 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -467,6 +467,9 @@ Branch-2 ( Unreleased changes ) HADOOP-8770. NN should not RPC to self to find trash defaults. (eli) + HADOOP-8648. libhadoop: native CRC32 validation crashes when + io.bytes.per.checksum=1. (Colin Patrick McCabe via eli) + BREAKDOWN OF HDFS-3042 SUBTASKS HADOOP-8220. ZKFailoverController doesn't handle failure to become active diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index d1714b1a9f9..47f3c995575 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -535,6 +535,20 @@ + + native_tests + test + run + + + + + + + + + + diff --git a/hadoop-common-project/hadoop-common/src/CMakeLists.txt b/hadoop-common-project/hadoop-common/src/CMakeLists.txt index 8ff2f12e6d2..5c3d77db4cd 100644 --- a/hadoop-common-project/hadoop-common/src/CMakeLists.txt +++ b/hadoop-common-project/hadoop-common/src/CMakeLists.txt @@ -60,6 +60,7 @@ find_package(ZLIB REQUIRED) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -Wall -O2") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_REENTRANT -D_FILE_OFFSET_BITS=64") set(D main/native/src/org/apache/hadoop) +set(T main/native/src/test/org/apache/hadoop) GET_FILENAME_COMPONENT(HADOOP_ZLIB_LIBRARY ${ZLIB_LIBRARIES} NAME) @@ -98,9 +99,16 @@ include_directories( ${JNI_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS} ${SNAPPY_INCLUDE_DIR} + ${D}/util ) CONFIGURE_FILE(${CMAKE_SOURCE_DIR}/config.h.cmake ${CMAKE_BINARY_DIR}/config.h) +add_executable(test_bulk_crc32 + ${D}/util/bulk_crc32.c + ${T}/util/test_bulk_crc32.c +) +set_property(SOURCE main.cpp PROPERTY INCLUDE_DIRECTORIES "\"-Werror\" \"-Wall\"") + add_dual_library(hadoop ${D}/io/compress/lz4/Lz4Compressor.c ${D}/io/compress/lz4/Lz4Decompressor.c diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.c index d2491d7344f..7009bf1f5cc 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.c +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.c @@ -23,6 +23,7 @@ */ #include #include +#include #include #include @@ -33,9 +34,10 @@ #define USE_PIPELINED +#define CRC_INITIAL_VAL 0xffffffff + typedef uint32_t (*crc_update_func_t)(uint32_t, const uint8_t *, size_t); -static uint32_t crc_init(); -static uint32_t crc_val(uint32_t crc); +static inline uint32_t crc_val(uint32_t crc); static uint32_t crc32_zlib_sb8(uint32_t crc, const uint8_t *buf, size_t length); static uint32_t crc32c_sb8(uint32_t crc, const uint8_t *buf, size_t length); @@ -45,6 +47,35 @@ static void pipelined_crc32c(uint32_t *crc1, uint32_t *crc2, uint32_t *crc3, con static int cached_cpu_supports_crc32; // initialized by constructor below static uint32_t crc32c_hardware(uint32_t crc, const uint8_t* data, size_t length); +int bulk_calculate_crc(const uint8_t *data, size_t data_len, + uint32_t *sums, int checksum_type, + int bytes_per_checksum) { + uint32_t crc; + crc_update_func_t crc_update_func; + + switch (checksum_type) { + case CRC32_ZLIB_POLYNOMIAL: + crc_update_func = crc32_zlib_sb8; + break; + case CRC32C_POLYNOMIAL: + crc_update_func = crc32c_sb8; + break; + default: + return -EINVAL; + break; + } + while (likely(data_len > 0)) { + int len = likely(data_len >= bytes_per_checksum) ? bytes_per_checksum : data_len; + crc = CRC_INITIAL_VAL; + crc = crc_update_func(crc, data, len); + *sums = ntohl(crc_val(crc)); + data += len; + data_len -= len; + sums++; + } + return 0; +} + int bulk_verify_crc(const uint8_t *data, size_t data_len, const uint32_t *sums, int checksum_type, int bytes_per_checksum, @@ -80,7 +111,7 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len, if (do_pipelined) { /* Process three blocks at a time */ while (likely(n_blocks >= 3)) { - crc1 = crc2 = crc3 = crc_init(); + crc1 = crc2 = crc3 = CRC_INITIAL_VAL; pipelined_crc32c(&crc1, &crc2, &crc3, data, bytes_per_checksum, 3); crc = ntohl(crc_val(crc1)); @@ -101,7 +132,7 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len, /* One or two blocks */ if (n_blocks) { - crc1 = crc2 = crc_init(); + crc1 = crc2 = crc3 = CRC_INITIAL_VAL; pipelined_crc32c(&crc1, &crc2, &crc3, data, bytes_per_checksum, n_blocks); if ((crc = ntohl(crc_val(crc1))) != *sums) @@ -118,7 +149,7 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len, /* For something smaller than a block */ if (remainder) { - crc1 = crc_init(); + crc1 = crc2 = crc3 = CRC_INITIAL_VAL; pipelined_crc32c(&crc1, &crc2, &crc3, data, remainder, 1); if ((crc = ntohl(crc_val(crc1))) != *sums) @@ -130,7 +161,7 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len, while (likely(data_len > 0)) { int len = likely(data_len >= bytes_per_checksum) ? bytes_per_checksum : data_len; - crc = crc_init(); + crc = CRC_INITIAL_VAL; crc = crc_update_func(crc, data, len); crc = ntohl(crc_val(crc)); if (unlikely(crc != *sums)) { @@ -151,18 +182,10 @@ return_crc_error: return INVALID_CHECKSUM_DETECTED; } - -/** - * Initialize a CRC - */ -static uint32_t crc_init() { - return 0xffffffff; -} - /** * Extract the final result of a CRC */ -static uint32_t crc_val(uint32_t crc) { +static inline uint32_t crc_val(uint32_t crc) { return ~crc; } @@ -398,7 +421,7 @@ static void pipelined_crc32c(uint32_t *crc1, uint32_t *crc2, uint32_t *crc3, con counter--; } - /* Take care of the remainder. They are only up to three bytes, + /* Take care of the remainder. They are only up to seven bytes, * so performing byte-level crc32 won't take much time. */ bdata = (uint8_t*)data; @@ -433,7 +456,7 @@ static void pipelined_crc32c(uint32_t *crc1, uint32_t *crc2, uint32_t *crc3, con "crc32b (%5), %0;\n\t" "crc32b (%5,%4,1), %1;\n\t" : "=r"(c1), "=r"(c2) - : "r"(c1), "r"(c2), "r"(c3), "r"(block_size), "r"(bdata) + : "r"(c1), "r"(c2), "r"(block_size), "r"(bdata) ); bdata++; remainder--; @@ -593,7 +616,7 @@ static void pipelined_crc32c(uint32_t *crc1, uint32_t *crc2, uint32_t *crc3, con "crc32b (%5), %0;\n\t" "crc32b (%5,%4,1), %1;\n\t" : "=r"(c1), "=r"(c2) - : "r"(c1), "r"(c2), "r"(c3), "r"(block_size), "r"(bdata) + : "r"(c1), "r"(c2), "r"(block_size), "r"(bdata) ); bdata++; remainder--; diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.h b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.h index 2ab1bd3c402..44cf52eaeca 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.h +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.h @@ -19,6 +19,7 @@ #define BULK_CRC32_H_INCLUDED #include +#include /* for size_t */ // Constants for different CRC algorithms #define CRC32C_POLYNOMIAL 1 @@ -42,16 +43,45 @@ typedef struct crc32_error { * of bytes_per_checksum bytes. The checksums are each 32 bits * and are stored in sequential indexes of the 'sums' array. * - * checksum_type - one of the CRC32 constants defined above - * error_info - if non-NULL, will be filled in if an error - * is detected + * @param data The data to checksum + * @param dataLen Length of the data buffer + * @param sums (out param) buffer to write checksums into. + * It must contain at least dataLen * 4 bytes. + * @param checksum_type One of the CRC32 algorithm constants defined + * above + * @param bytes_per_checksum How many bytes of data to process per checksum. + * @param error_info If non-NULL, will be filled in if an error + * is detected * - * Returns: 0 for success, non-zero for an error, result codes - * for which are defined above + * @return 0 for success, non-zero for an error, result codes + * for which are defined above */ extern int bulk_verify_crc(const uint8_t *data, size_t data_len, const uint32_t *sums, int checksum_type, int bytes_per_checksum, crc32_error_t *error_info); +/** + * Calculate checksums for some data. + * + * The checksums are each 32 bits and are stored in sequential indexes of the + * 'sums' array. + * + * This function is not (yet) optimized. It is provided for testing purposes + * only. + * + * @param data The data to checksum + * @param dataLen Length of the data buffer + * @param sums (out param) buffer to write checksums into. + * It must contain at least dataLen * 4 bytes. + * @param checksum_type One of the CRC32 algorithm constants defined + * above + * @param bytesPerChecksum How many bytes of data to process per checksum. + * + * @return 0 for success, non-zero for an error + */ +int bulk_calculate_crc(const uint8_t *data, size_t data_len, + uint32_t *sums, int checksum_type, + int bytes_per_checksum); + #endif diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/util/test_bulk_crc32.c b/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/util/test_bulk_crc32.c new file mode 100644 index 00000000000..ff7753718c5 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/util/test_bulk_crc32.c @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "bulk_crc32.h" + +#include +#include +#include + +#define EXPECT_ZERO(x) \ + do { \ + int __my_ret__ = x; \ + if (__my_ret__) { \ + fprintf(stderr, "TEST_ERROR: failed on line %d with return " \ + "code %d: got nonzero from %s\n", __LINE__, __my_ret__, #x); \ + return __my_ret__; \ + } \ + } while (0); + +static int testBulkVerifyCrc(int dataLen, int crcType, int bytesPerChecksum) +{ + int i; + uint8_t *data; + uint32_t *sums; + crc32_error_t errorData; + + data = malloc(dataLen); + for (i = 0; i < dataLen; i++) { + data[i] = (i % 16) + 1; + } + sums = calloc(sizeof(uint32_t), + (dataLen + bytesPerChecksum - 1) / bytesPerChecksum); + + EXPECT_ZERO(bulk_calculate_crc(data, dataLen, sums, crcType, + bytesPerChecksum)); + EXPECT_ZERO(bulk_verify_crc(data, dataLen, sums, crcType, + bytesPerChecksum, &errorData)); + free(data); + free(sums); + return 0; +} + +int main(int argc, char **argv) +{ + /* Test running bulk_calculate_crc with some different algorithms and + * bytePerChecksum values. */ + EXPECT_ZERO(testBulkVerifyCrc(4096, CRC32C_POLYNOMIAL, 512)); + EXPECT_ZERO(testBulkVerifyCrc(4096, CRC32_ZLIB_POLYNOMIAL, 512)); + EXPECT_ZERO(testBulkVerifyCrc(256, CRC32C_POLYNOMIAL, 1)); + EXPECT_ZERO(testBulkVerifyCrc(256, CRC32_ZLIB_POLYNOMIAL, 1)); + EXPECT_ZERO(testBulkVerifyCrc(1, CRC32C_POLYNOMIAL, 1)); + EXPECT_ZERO(testBulkVerifyCrc(1, CRC32_ZLIB_POLYNOMIAL, 1)); + EXPECT_ZERO(testBulkVerifyCrc(2, CRC32C_POLYNOMIAL, 1)); + EXPECT_ZERO(testBulkVerifyCrc(17, CRC32C_POLYNOMIAL, 1)); + EXPECT_ZERO(testBulkVerifyCrc(17, CRC32C_POLYNOMIAL, 2)); + EXPECT_ZERO(testBulkVerifyCrc(17, CRC32_ZLIB_POLYNOMIAL, 2)); + EXPECT_ZERO(testBulkVerifyCrc(17, CRC32C_POLYNOMIAL, 4)); + EXPECT_ZERO(testBulkVerifyCrc(17, CRC32_ZLIB_POLYNOMIAL, 4)); + + fprintf(stderr, "%s: SUCCESS.\n", argv[0]); + return EXIT_SUCCESS; +}