HADOOP-11887. Introduce Intel ISA-L erasure coding library for native erasure encoding support (Kai Zheng via Colin P. McCabe)

This commit is contained in:
Colin Patrick Mccabe 2015-11-05 10:31:01 -08:00
parent efc73d7896
commit 482e35c55a
18 changed files with 1122 additions and 11 deletions

View File

@ -74,6 +74,8 @@ Optional packages:
* Snappy compression * Snappy compression
$ sudo apt-get install snappy libsnappy-dev $ sudo apt-get install snappy libsnappy-dev
* Intel ISA-L library for erasure coding
Please refer to https://01.org/intel%C2%AE-storage-acceleration-library-open-source-version
* Bzip2 * Bzip2
$ sudo apt-get install bzip2 libbz2-dev $ sudo apt-get install bzip2 libbz2-dev
* Jansson (C Library for JSON) * Jansson (C Library for JSON)
@ -179,6 +181,29 @@ Maven build goals:
* -Dtest.exclude=<TESTCLASSNAME> * -Dtest.exclude=<TESTCLASSNAME>
* -Dtest.exclude.pattern=**/<TESTCLASSNAME1>.java,**/<TESTCLASSNAME2>.java * -Dtest.exclude.pattern=**/<TESTCLASSNAME1>.java,**/<TESTCLASSNAME2>.java
Intel ISA-L build options:
Intel ISA-L is a erasure coding library that can be utilized by the native code.
It is currently an optional component, meaning that Hadoop can be built with
or without this dependency. Note the library is used via dynamic module. Please
reference the official site for the library details.
https://01.org/intel%C2%AE-storage-acceleration-library-open-source-version
* Use -Drequire.isal to fail the build if libisal.so is not found.
If this option is not specified and the isal library is missing,
we silently build a version of libhadoop.so that cannot make use of ISA-L and
the native raw erasure coders.
This option is recommended if you plan on making use of native raw erasure
coders and want to get more repeatable builds.
* Use -Disal.prefix to specify a nonstandard location for the libisal
library files. You do not need this option if you have installed ISA-L to the
system library path.
* Use -Disal.lib to specify a nonstandard location for the libisal library
files.
* Use -Dbundle.isal to copy the contents of the isal.lib directory into
the final tar file. This option requires that -Disal.lib is also given,
and it ignores the -Disal.prefix option.
---------------------------------------------------------------------------------- ----------------------------------------------------------------------------------
Building components separately Building components separately

View File

@ -948,6 +948,9 @@ Release 2.8.0 - UNRELEASED
HADOOP-12040. Adjust inputs order for the decode API in raw erasure coder. HADOOP-12040. Adjust inputs order for the decode API in raw erasure coder.
(Kai Zheng via yliu) (Kai Zheng via yliu)
HADOOP-11887. Introduce Intel ISA-L erasure coding library for native
erasure encoding support (Kai Zheng via Colin P. McCabe)
OPTIMIZATIONS OPTIMIZATIONS
HADOOP-11785. Reduce the number of listStatus operation in distcp HADOOP-11785. Reduce the number of listStatus operation in distcp

View File

@ -567,6 +567,9 @@
<openssl.prefix></openssl.prefix> <openssl.prefix></openssl.prefix>
<openssl.lib></openssl.lib> <openssl.lib></openssl.lib>
<openssl.include></openssl.include> <openssl.include></openssl.include>
<require.isal>false</require.isal>
<isal.prefix></isal.prefix>
<isal.lib></isal.lib>
<require.openssl>false</require.openssl> <require.openssl>false</require.openssl>
<runningWithNative>true</runningWithNative> <runningWithNative>true</runningWithNative>
<bundle.openssl.in.bin>false</bundle.openssl.in.bin> <bundle.openssl.in.bin>false</bundle.openssl.in.bin>
@ -620,6 +623,7 @@
<javahClassName>org.apache.hadoop.io.compress.snappy.SnappyDecompressor</javahClassName> <javahClassName>org.apache.hadoop.io.compress.snappy.SnappyDecompressor</javahClassName>
<javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Compressor</javahClassName> <javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Compressor</javahClassName>
<javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Decompressor</javahClassName> <javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Decompressor</javahClassName>
<javahClassName>org.apache.hadoop.io.erasurecode.ErasureCodeNative</javahClassName>
<javahClassName>org.apache.hadoop.crypto.OpensslCipher</javahClassName> <javahClassName>org.apache.hadoop.crypto.OpensslCipher</javahClassName>
<javahClassName>org.apache.hadoop.crypto.random.OpensslSecureRandom</javahClassName> <javahClassName>org.apache.hadoop.crypto.random.OpensslSecureRandom</javahClassName>
<javahClassName>org.apache.hadoop.util.NativeCrc32</javahClassName> <javahClassName>org.apache.hadoop.util.NativeCrc32</javahClassName>
@ -642,7 +646,7 @@
<configuration> <configuration>
<target> <target>
<exec executable="cmake" dir="${project.build.directory}/native" failonerror="true"> <exec executable="cmake" dir="${project.build.directory}/native" failonerror="true">
<arg line="${basedir}/src/ -DGENERATED_JAVAH=${project.build.directory}/native/javah -DJVM_ARCH_DATA_MODEL=${sun.arch.data.model} -DREQUIRE_BZIP2=${require.bzip2} -DREQUIRE_SNAPPY=${require.snappy} -DCUSTOM_SNAPPY_PREFIX=${snappy.prefix} -DCUSTOM_SNAPPY_LIB=${snappy.lib} -DCUSTOM_SNAPPY_INCLUDE=${snappy.include} -DREQUIRE_OPENSSL=${require.openssl} -DCUSTOM_OPENSSL_PREFIX=${openssl.prefix} -DCUSTOM_OPENSSL_LIB=${openssl.lib} -DCUSTOM_OPENSSL_INCLUDE=${openssl.include} -DEXTRA_LIBHADOOP_RPATH=${extra.libhadoop.rpath}"/> <arg line="${basedir}/src/ -DGENERATED_JAVAH=${project.build.directory}/native/javah -DJVM_ARCH_DATA_MODEL=${sun.arch.data.model} -DREQUIRE_BZIP2=${require.bzip2} -DREQUIRE_SNAPPY=${require.snappy} -DCUSTOM_SNAPPY_PREFIX=${snappy.prefix} -DCUSTOM_SNAPPY_LIB=${snappy.lib} -DCUSTOM_SNAPPY_INCLUDE=${snappy.include} -DREQUIRE_ISAL=${require.isal} -DCUSTOM_ISAL_PREFIX=${isal.prefix} -DCUSTOM_ISAL_LIB=${isal.lib} -DREQUIRE_OPENSSL=${require.openssl} -DCUSTOM_OPENSSL_PREFIX=${openssl.prefix} -DCUSTOM_OPENSSL_LIB=${openssl.lib} -DCUSTOM_OPENSSL_INCLUDE=${openssl.include} -DEXTRA_LIBHADOOP_RPATH=${extra.libhadoop.rpath}"/>
</exec> </exec>
<exec executable="make" dir="${project.build.directory}/native" failonerror="true"> <exec executable="make" dir="${project.build.directory}/native" failonerror="true">
<arg line="VERBOSE=1"/> <arg line="VERBOSE=1"/>
@ -664,7 +668,13 @@
<arg value="[ x$SKIPTESTS = xtrue ] || ${project.build.directory}/native/test_bulk_crc32"/> <arg value="[ x$SKIPTESTS = xtrue ] || ${project.build.directory}/native/test_bulk_crc32"/>
<env key="SKIPTESTS" value="${skipTests}"/> <env key="SKIPTESTS" value="${skipTests}"/>
</exec> </exec>
</target> <exec executable="${shell-executable}" failonerror="true" dir="${project.build.directory}/native">
<arg value="-c"/>
<arg value="[ ! -f ${project.build.directory}/native/erasure_code_test ] || ${project.build.directory}/native/erasure_code_test"/>
<env key="SKIPTESTS" value="${skipTests}"/>
<env key="LD_LIBRARY_PATH" value="${LD_LIBRARY_PATH}:${isal.lib}:${isal.prefix}"/>
</exec>
</target>
</configuration> </configuration>
</execution> </execution>
</executions> </executions>
@ -684,6 +694,9 @@
<snappy.prefix></snappy.prefix> <snappy.prefix></snappy.prefix>
<snappy.lib></snappy.lib> <snappy.lib></snappy.lib>
<snappy.include></snappy.include> <snappy.include></snappy.include>
<require.isal>false</require.isal>
<isal.prefix></isal.prefix>
<isal.lib></isal.lib>
<require.snappy>false</require.snappy> <require.snappy>false</require.snappy>
<bundle.snappy.in.bin>true</bundle.snappy.in.bin> <bundle.snappy.in.bin>true</bundle.snappy.in.bin>
<openssl.prefix></openssl.prefix> <openssl.prefix></openssl.prefix>
@ -737,6 +750,7 @@
<javahClassName>org.apache.hadoop.io.compress.snappy.SnappyDecompressor</javahClassName> <javahClassName>org.apache.hadoop.io.compress.snappy.SnappyDecompressor</javahClassName>
<javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Compressor</javahClassName> <javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Compressor</javahClassName>
<javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Decompressor</javahClassName> <javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Decompressor</javahClassName>
<javahClassName>org.apache.hadoop.io.erasurecode.ErasureCodeNative</javahClassName>
<javahClassName>org.apache.hadoop.crypto.OpensslCipher</javahClassName> <javahClassName>org.apache.hadoop.crypto.OpensslCipher</javahClassName>
<javahClassName>org.apache.hadoop.crypto.random.OpensslSecureRandom</javahClassName> <javahClassName>org.apache.hadoop.crypto.random.OpensslSecureRandom</javahClassName>
<javahClassName>org.apache.hadoop.util.NativeCrc32</javahClassName> <javahClassName>org.apache.hadoop.util.NativeCrc32</javahClassName>
@ -790,6 +804,9 @@
<argument>/p:CustomOpensslLib=${openssl.lib}</argument> <argument>/p:CustomOpensslLib=${openssl.lib}</argument>
<argument>/p:CustomOpensslInclude=${openssl.include}</argument> <argument>/p:CustomOpensslInclude=${openssl.include}</argument>
<argument>/p:RequireOpenssl=${require.openssl}</argument> <argument>/p:RequireOpenssl=${require.openssl}</argument>
<argument>/p:RequireIsal=${require.isal}</argument>
<argument>/p:CustomIsalPrefix=${isal.prefix}</argument>
<argument>/p:CustomIsalLib=${isal.lib}</argument>
</arguments> </arguments>
</configuration> </configuration>
</execution> </execution>

View File

@ -94,6 +94,29 @@ else()
endif() endif()
endif() endif()
set(STORED_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
hadoop_set_find_shared_library_version("2")
find_library(ISAL_LIBRARY
NAMES isal
PATHS ${CUSTOM_ISAL_PREFIX} ${CUSTOM_ISAL_PREFIX}/lib
${CUSTOM_ISAL_PREFIX}/lib64 ${CUSTOM_ISAL_LIB})
set(CMAKE_FIND_LIBRARY_SUFFIXES ${STORED_CMAKE_FIND_LIBRARY_SUFFIXES})
if (ISAL_LIBRARY)
GET_FILENAME_COMPONENT(HADOOP_ISAL_LIBRARY ${ISAL_LIBRARY} NAME)
set(ISAL_INCLUDE_DIR ${SRC}/io/erasurecode/include)
set(ISAL_SOURCE_FILES
${SRC}/io/erasurecode/erasure_code.c)
add_executable(erasure_code_test
${SRC}/io/erasurecode/erasure_code.c
${TST}/io/erasurecode/erasure_code_test.c
)
target_link_libraries(erasure_code_test ${CMAKE_DL_LIBS})
else (ISAL_LIBRARY)
IF(REQUIRE_ISAL)
MESSAGE(FATAL_ERROR "Required ISA-L library could not be found. ISAL_LIBRARY=${ISAL_LIBRARY}, CUSTOM_ISAL_PREFIX=${CUSTOM_ISAL_PREFIX}")
ENDIF(REQUIRE_ISAL)
endif (ISAL_LIBRARY)
# Build hardware CRC32 acceleration, if supported on the platform. # Build hardware CRC32 acceleration, if supported on the platform.
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64") if(CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
set(BULK_CRC_ARCH_SOURCE_FIlE "${SRC}/util/bulk_crc32_x86.c") set(BULK_CRC_ARCH_SOURCE_FIlE "${SRC}/util/bulk_crc32_x86.c")
@ -169,6 +192,7 @@ include_directories(
${ZLIB_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS}
${BZIP2_INCLUDE_DIR} ${BZIP2_INCLUDE_DIR}
${SNAPPY_INCLUDE_DIR} ${SNAPPY_INCLUDE_DIR}
${ISAL_INCLUDE_DIR}
${OPENSSL_INCLUDE_DIR} ${OPENSSL_INCLUDE_DIR}
${SRC}/util ${SRC}/util
) )
@ -181,6 +205,7 @@ hadoop_add_dual_library(hadoop
${SRC}/io/compress/lz4/Lz4Decompressor.c ${SRC}/io/compress/lz4/Lz4Decompressor.c
${SRC}/io/compress/lz4/lz4.c ${SRC}/io/compress/lz4/lz4.c
${SRC}/io/compress/lz4/lz4hc.c ${SRC}/io/compress/lz4/lz4hc.c
${ISAL_SOURCE_FILES}
${SNAPPY_SOURCE_FILES} ${SNAPPY_SOURCE_FILES}
${OPENSSL_SOURCE_FILES} ${OPENSSL_SOURCE_FILES}
${SRC}/io/compress/zlib/ZlibCompressor.c ${SRC}/io/compress/zlib/ZlibCompressor.c

View File

@ -22,6 +22,7 @@
#cmakedefine HADOOP_BZIP2_LIBRARY "@HADOOP_BZIP2_LIBRARY@" #cmakedefine HADOOP_BZIP2_LIBRARY "@HADOOP_BZIP2_LIBRARY@"
#cmakedefine HADOOP_SNAPPY_LIBRARY "@HADOOP_SNAPPY_LIBRARY@" #cmakedefine HADOOP_SNAPPY_LIBRARY "@HADOOP_SNAPPY_LIBRARY@"
#cmakedefine HADOOP_OPENSSL_LIBRARY "@HADOOP_OPENSSL_LIBRARY@" #cmakedefine HADOOP_OPENSSL_LIBRARY "@HADOOP_OPENSSL_LIBRARY@"
#cmakedefine HADOOP_ISAL_LIBRARY "@HADOOP_ISAL_LIBRARY@"
#cmakedefine HAVE_SYNC_FILE_RANGE #cmakedefine HAVE_SYNC_FILE_RANGE
#cmakedefine HAVE_POSIX_FADVISE #cmakedefine HAVE_POSIX_FADVISE

View File

@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.io.erasurecode;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.util.NativeCodeLoader;
/**
* Erasure code native libraries (for now, Intel ISA-L) related utilities.
*/
public final class ErasureCodeNative {
private static final Log LOG =
LogFactory.getLog(ErasureCodeNative.class.getName());
/**
* The reason why ISA-L library is not available, or null if it is available.
*/
private static final String LOADING_FAILURE_REASON;
static {
if (!NativeCodeLoader.isNativeCodeLoaded()) {
LOADING_FAILURE_REASON = "hadoop native library cannot be loaded.";
} else if (!NativeCodeLoader.buildSupportsIsal()) {
LOADING_FAILURE_REASON = "libhadoop was built without ISA-L support";
} else {
String problem = null;
try {
loadLibrary();
} catch (Throwable t) {
problem = "Loading ISA-L failed: " + t.getMessage();
LOG.error("Loading ISA-L failed", t);
}
LOADING_FAILURE_REASON = problem;
}
}
private ErasureCodeNative() {}
/**
* Are native libraries loaded?
*/
public static boolean isNativeCodeLoaded() {
return LOADING_FAILURE_REASON == null;
}
/**
* Is the native ISA-L library loaded and initialized? Throw exception if not.
*/
public static void checkNativeCodeLoaded() {
if (LOADING_FAILURE_REASON != null) {
throw new RuntimeException(LOADING_FAILURE_REASON);
}
}
/**
* Load native library available or supported.
*/
public static native void loadLibrary();
/**
* Get the native library name that's available or supported.
*/
public static native String getLibraryName();
public static String getLoadingFailureReason() {
return LOADING_FAILURE_REASON;
}
}

View File

@ -31,7 +31,7 @@ import org.apache.hadoop.classification.InterfaceStability;
*/ */
@InterfaceAudience.Private @InterfaceAudience.Private
@InterfaceStability.Unstable @InterfaceStability.Unstable
public class NativeCodeLoader { public final class NativeCodeLoader {
private static final Log LOG = private static final Log LOG =
LogFactory.getLog(NativeCodeLoader.class); LogFactory.getLog(NativeCodeLoader.class);
@ -62,6 +62,8 @@ public class NativeCodeLoader {
} }
} }
private NativeCodeLoader() {}
/** /**
* Check if native-hadoop code is loaded for this platform. * Check if native-hadoop code is loaded for this platform.
* *
@ -77,6 +79,11 @@ public class NativeCodeLoader {
*/ */
public static native boolean buildSupportsSnappy(); public static native boolean buildSupportsSnappy();
/**
* Returns true only if this build was compiled with support for ISA-L.
*/
public static native boolean buildSupportsIsal();
/** /**
* Returns true only if this build was compiled with support for openssl. * Returns true only if this build was compiled with support for openssl.
*/ */

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.util; package org.apache.hadoop.util;
import org.apache.hadoop.io.erasurecode.ErasureCodeNative;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.crypto.OpensslCipher; import org.apache.hadoop.crypto.OpensslCipher;
import org.apache.hadoop.io.compress.Lz4Codec; import org.apache.hadoop.io.compress.Lz4Codec;
@ -65,6 +66,7 @@ public class NativeLibraryChecker {
boolean nativeHadoopLoaded = NativeCodeLoader.isNativeCodeLoaded(); boolean nativeHadoopLoaded = NativeCodeLoader.isNativeCodeLoaded();
boolean zlibLoaded = false; boolean zlibLoaded = false;
boolean snappyLoaded = false; boolean snappyLoaded = false;
boolean isalLoaded = false;
// lz4 is linked within libhadoop // lz4 is linked within libhadoop
boolean lz4Loaded = nativeHadoopLoaded; boolean lz4Loaded = nativeHadoopLoaded;
boolean bzip2Loaded = Bzip2Factory.isNativeBzip2Loaded(conf); boolean bzip2Loaded = Bzip2Factory.isNativeBzip2Loaded(conf);
@ -75,6 +77,7 @@ public class NativeLibraryChecker {
String hadoopLibraryName = ""; String hadoopLibraryName = "";
String zlibLibraryName = ""; String zlibLibraryName = "";
String snappyLibraryName = ""; String snappyLibraryName = "";
String isalDetail = "";
String lz4LibraryName = ""; String lz4LibraryName = "";
String bzip2LibraryName = ""; String bzip2LibraryName = "";
String winutilsPath = null; String winutilsPath = null;
@ -85,18 +88,29 @@ public class NativeLibraryChecker {
if (zlibLoaded) { if (zlibLoaded) {
zlibLibraryName = ZlibFactory.getLibraryName(); zlibLibraryName = ZlibFactory.getLibraryName();
} }
snappyLoaded = NativeCodeLoader.buildSupportsSnappy() && snappyLoaded = NativeCodeLoader.buildSupportsSnappy() &&
SnappyCodec.isNativeCodeLoaded(); SnappyCodec.isNativeCodeLoaded();
if (snappyLoaded && NativeCodeLoader.buildSupportsSnappy()) { if (snappyLoaded && NativeCodeLoader.buildSupportsSnappy()) {
snappyLibraryName = SnappyCodec.getLibraryName(); snappyLibraryName = SnappyCodec.getLibraryName();
} }
if (OpensslCipher.getLoadingFailureReason() != null) {
openSslDetail = OpensslCipher.getLoadingFailureReason(); isalDetail = ErasureCodeNative.getLoadingFailureReason();
if (isalDetail != null) {
isalLoaded = false;
} else {
isalDetail = ErasureCodeNative.getLibraryName();
isalLoaded = true;
}
openSslDetail = OpensslCipher.getLoadingFailureReason();
if (openSslDetail != null) {
openSslLoaded = false; openSslLoaded = false;
} else { } else {
openSslDetail = OpensslCipher.getLibraryName(); openSslDetail = OpensslCipher.getLibraryName();
openSslLoaded = true; openSslLoaded = true;
} }
if (lz4Loaded) { if (lz4Loaded) {
lz4LibraryName = Lz4Codec.getLibraryName(); lz4LibraryName = Lz4Codec.getLibraryName();
} }
@ -125,6 +139,8 @@ public class NativeLibraryChecker {
System.out.printf("lz4: %b %s%n", lz4Loaded, lz4LibraryName); System.out.printf("lz4: %b %s%n", lz4Loaded, lz4LibraryName);
System.out.printf("bzip2: %b %s%n", bzip2Loaded, bzip2LibraryName); System.out.printf("bzip2: %b %s%n", bzip2Loaded, bzip2LibraryName);
System.out.printf("openssl: %b %s%n", openSslLoaded, openSslDetail); System.out.printf("openssl: %b %s%n", openSslLoaded, openSslDetail);
System.out.printf("ISA-L: %b %s%n", isalLoaded, isalDetail);
if (Shell.WINDOWS) { if (Shell.WINDOWS) {
System.out.printf("winutils: %b %s%n", winutilsExists, winutilsPath); System.out.printf("winutils: %b %s%n", winutilsExists, winutilsPath);
} }

View File

@ -17,7 +17,7 @@
limitations under the License. limitations under the License.
--> -->
<Project DefaultTargets="CheckRequireSnappy;Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Project DefaultTargets="CheckRequireSnappy;CheckRequireIsal;Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations"> <ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Release|Win32"> <ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration> <Configuration>Release</Configuration>
@ -79,11 +79,22 @@
<IncludePath Condition="'$(SnappyEnabled)' == 'true'">$(SnappyInclude);$(IncludePath)</IncludePath> <IncludePath Condition="'$(SnappyEnabled)' == 'true'">$(SnappyInclude);$(IncludePath)</IncludePath>
<IncludePath Condition="Exists('$(ZLIB_HOME)')">$(ZLIB_HOME);$(IncludePath)</IncludePath> <IncludePath Condition="Exists('$(ZLIB_HOME)')">$(ZLIB_HOME);$(IncludePath)</IncludePath>
</PropertyGroup> </PropertyGroup>
<PropertyGroup>
<IsalLib Condition="Exists('$(CustomIsalPrefix)\isa-l.dll')">$(CustomIsalPrefix)</IsalLib>
<IsalLib Condition="Exists('$(CustomIsalPrefix)\lib\isa-l.dll') And '$(IsalLib)' == ''">$(CustomIsalPrefix)\lib</IsalLib>
<IsalLib Condition="Exists('$(CustomIsalLib)') And '$(IsalLib)' == ''">$(CustomIsalLib)</IsalLib>
<IsalEnabled Condition="'$(IsalLib)' != ''">true</IsalEnabled>
</PropertyGroup>
<Target Name="CheckRequireSnappy"> <Target Name="CheckRequireSnappy">
<Error <Error
Text="Required snappy library could not be found. SnappyLibrary=$(SnappyLibrary), SnappyInclude=$(SnappyInclude), CustomSnappyLib=$(CustomSnappyLib), CustomSnappyInclude=$(CustomSnappyInclude), CustomSnappyPrefix=$(CustomSnappyPrefix)" Text="Required snappy library could not be found. SnappyLibrary=$(SnappyLibrary), SnappyInclude=$(SnappyInclude), CustomSnappyLib=$(CustomSnappyLib), CustomSnappyInclude=$(CustomSnappyInclude), CustomSnappyPrefix=$(CustomSnappyPrefix)"
Condition="'$(RequireSnappy)' == 'true' And '$(SnappyEnabled)' != 'true'" /> Condition="'$(RequireSnappy)' == 'true' And '$(SnappyEnabled)' != 'true'" />
</Target> </Target>
<Target Name="CheckRequireIsal">
<Error
Text="Required ISA-L library could not be found. CustomIsalLib=$(CustomIsalLib), CustomIsalPrefix=$(CustomIsalPrefix)"
Condition="'$(RequireIsal)' == 'true' And '$(IsalEnabled)' != 'true'" />
</Target>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile> <ClCompile>
<WarningLevel>Level3</WarningLevel> <WarningLevel>Level3</WarningLevel>
@ -92,7 +103,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking> <FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions> <IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;NATIVE_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;NATIVE_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>..\winutils\include;..\..\..\target\native\javah;%JAVA_HOME%\include;%JAVA_HOME%\include\win32;.\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>..\winutils\include;..\native\src\org\apache\hadoop\io\erasurecode\include;..\..\..\target\native\javah;%JAVA_HOME%\include;%JAVA_HOME%\include\win32;.\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<CompileAs>CompileAsC</CompileAs> <CompileAs>CompileAsC</CompileAs>
<DisableSpecificWarnings>4244</DisableSpecificWarnings> <DisableSpecificWarnings>4244</DisableSpecificWarnings>
</ClCompile> </ClCompile>
@ -113,7 +124,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking> <FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions> <IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;NATIVE_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;NATIVE_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>..\winutils\include;..\..\..\target\native\javah;%JAVA_HOME%\include;%JAVA_HOME%\include\win32;.\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>..\winutils\include;..\native\src\org\apache\hadoop\io\erasurecode\include;..\..\..\target\native\javah;%JAVA_HOME%\include;%JAVA_HOME%\include\win32;.\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<CompileAs>CompileAsC</CompileAs> <CompileAs>CompileAsC</CompileAs>
<DisableSpecificWarnings>4244</DisableSpecificWarnings> <DisableSpecificWarnings>4244</DisableSpecificWarnings>
</ClCompile> </ClCompile>
@ -145,11 +156,16 @@
<ClCompile Include="src\org\apache\hadoop\util\bulk_crc32.c" /> <ClCompile Include="src\org\apache\hadoop\util\bulk_crc32.c" />
<ClCompile Include="src\org\apache\hadoop\util\NativeCodeLoader.c"> <ClCompile Include="src\org\apache\hadoop\util\NativeCodeLoader.c">
<AdditionalOptions Condition="'$(SnappyEnabled)' == 'true'">/D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\"</AdditionalOptions> <AdditionalOptions Condition="'$(SnappyEnabled)' == 'true'">/D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\"</AdditionalOptions>
<AdditionalOptions Condition="'$(IsalEnabled)' == 'true'">/D HADOOP_ISAL_LIBRARY=\"isa-l.dll\"</AdditionalOptions>
</ClCompile> </ClCompile>
<ClCompile Include="src\org\apache\hadoop\util\NativeCrc32.c" /> <ClCompile Include="src\org\apache\hadoop\util\NativeCrc32.c" />
<ClCompile Include="src\org\apache\hadoop\yarn\server\nodemanager\windows_secure_container_executor.c"> <ClCompile Include="src\org\apache\hadoop\yarn\server\nodemanager\windows_secure_container_executor.c">
<AdditionalIncludeDirectories>src\org\apache\hadoop\io\nativeio;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>src\org\apache\hadoop\io\nativeio;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile> </ClCompile>
<ClCompile Include="src\org\apache\hadoop\io\erasurecode\erasure_code.c" Condition="'$(IsalEnabled)' == 'true'">
<AdditionalOptions>/D HADOOP_ISAL_LIBRARY=\"isa-l.dll\"</AdditionalOptions>
</ClCompile>
<ClCompile Include="src\org\apache\hadoop\io\erasurecode\coder\erasure_code_native.c" Condition="'$(IsalEnabled)' == 'true'"/>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="..\src\org\apache\hadoop\util\crc32c_tables.h" /> <ClInclude Include="..\src\org\apache\hadoop\util\crc32c_tables.h" />

View File

@ -0,0 +1,49 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "org_apache_hadoop.h"
#include "../include/erasure_code.h"
#include "org_apache_hadoop_io_erasurecode_ErasureCodeNative.h"
#ifdef UNIX
#include "config.h"
#endif
JNIEXPORT void JNICALL
Java_org_apache_hadoop_io_erasurecode_ErasureCodeNative_loadLibrary
(JNIEnv *env, jclass myclass) {
char errMsg[1024];
load_erasurecode_lib(errMsg, sizeof(errMsg));
if (strlen(errMsg) > 0) {
THROW(env, "java/lang/UnsatisfiedLinkError", errMsg);
}
}
JNIEXPORT jstring JNICALL
Java_org_apache_hadoop_io_erasurecode_ErasureCodeNative_getLibraryName
(JNIEnv *env, jclass myclass) {
char* libName = get_library_name();
if (libName == NULL) {
libName = "Unavailable";
}
return (*env)->NewStringUTF(env, libName);
}

View File

@ -0,0 +1,29 @@
/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class org_apache_hadoop_io_erasurecode_ErasureCodeNative */
#ifndef _Included_org_apache_hadoop_io_erasurecode_ErasureCodeNative
#define _Included_org_apache_hadoop_io_erasurecode_ErasureCodeNative
#ifdef __cplusplus
extern "C" {
#endif
/*
* Class: org_apache_hadoop_io_erasurecode_ErasureCodeNative
* Method: loadLibrary
* Signature: ()V
*/
JNIEXPORT void JNICALL Java_org_apache_hadoop_io_erasurecode_ErasureCodeNative_loadLibrary
(JNIEnv *, jclass);
/*
* Class: org_apache_hadoop_io_erasurecode_ErasureCodeNative
* Method: getLibraryName
* Signature: ()Ljava/lang/String;
*/
JNIEXPORT jstring JNICALL Java_org_apache_hadoop_io_erasurecode_ErasureCodeNative_getLibraryName
(JNIEnv *, jclass);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,271 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "org_apache_hadoop.h"
#include "../include/gf_util.h"
#include "../include/erasure_code.h"
#ifdef UNIX
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <dlfcn.h>
#include "config.h"
#endif
#ifdef WINDOWS
#include <Windows.h>
#endif
/**
* erasure_code.c
* Implementation erasure code utilities based on lib of erasure_code.so.
* Building of this codes won't rely on any ISA-L source codes, but running
* into this will rely on successfully loading of the dynamic library.
*
*/
/**
* The loaded library handle.
*/
static void* libec = NULL;
/**
* A helper function to dlsym a 'symbol' from a given library-handle.
*/
#ifdef UNIX
static __attribute__ ((unused))
void *my_dlsym(void *handle, const char *symbol) {
void *func_ptr = dlsym(handle, symbol);
return func_ptr;
}
/* A helper macro to dlsym the requisite dynamic symbol in NON-JNI env. */
#define EC_LOAD_DYNAMIC_SYMBOL(func_ptr, handle, symbol) \
if ((func_ptr = my_dlsym(handle, symbol)) == NULL) { \
return "Failed to load symbol" symbol; \
}
#endif
#ifdef WINDOWS
static FARPROC WINAPI my_dlsym(HMODULE handle, LPCSTR symbol) {
FARPROC func_ptr = GetProcAddress(handle, symbol);
return func_ptr;
}
/* A helper macro to dlsym the requisite dynamic symbol in NON-JNI env. */
#define EC_LOAD_DYNAMIC_SYMBOL(func_type, func_ptr, handle, symbol) \
if ((func_ptr = (func_type)my_dlsym(handle, symbol)) == NULL) { \
return "Failed to load symbol" symbol; \
}
#endif
#ifdef UNIX
// For gf_util.h
static unsigned char (*d_gf_mul)(unsigned char, unsigned char);
static unsigned char (*d_gf_inv)(unsigned char);
static void (*d_gf_gen_rs_matrix)(unsigned char *, int, int);
static void (*d_gf_gen_cauchy_matrix)(unsigned char *, int, int);
static int (*d_gf_invert_matrix)(unsigned char *, unsigned char *, const int);
static int (*d_gf_vect_mul)(int, unsigned char *, void *, void *);
// For erasure_code.h
static void (*d_ec_init_tables)(int, int, unsigned char*, unsigned char*);
static void (*d_ec_encode_data)(int, int, int, unsigned char*,
unsigned char**, unsigned char**);
static void (*d_ec_encode_data_update)(int, int, int, int, unsigned char*,
unsigned char*, unsigned char**);
#endif
#ifdef WINDOWS
// For erasure_code.h
typedef unsigned char (__cdecl *__d_gf_mul)(unsigned char, unsigned char);
static __d_gf_mul d_gf_mul;
typedef unsigned char (__cdecl *__d_gf_inv)(unsigned char);
static __d_gf_inv d_gf_inv;
typedef void (__cdecl *__d_gf_gen_rs_matrix)(unsigned char *, int, int);
static __d_gf_gen_rs_matrix d_gf_gen_rs_matrix;
typedef void (__cdecl *__d_gf_gen_cauchy_matrix)(unsigned char *, int, int);
static __d_gf_gen_cauchy_matrix d_gf_gen_cauchy_matrix;
typedef int (__cdecl *__d_gf_invert_matrix)(unsigned char *,
unsigned char *, const int);
static __d_gf_invert_matrix d_gf_invert_matrix;
typedef int (__cdecl *__d_gf_vect_mul)(int, unsigned char *, void *, void *);
static __d_gf_vect_mul d_gf_vect_mul;
// For erasure_code.h
typedef void (__cdecl *__d_ec_init_tables)(int, int,
unsigned char*, unsigned char*);
static __d_ec_init_tables d_ec_init_tables;
typedef void (__cdecl *__d_ec_encode_data)(int, int, int, unsigned char*,
unsigned char**, unsigned char**);
static __d_ec_encode_data d_ec_encode_data;
typedef void (__cdecl *__d_ec_encode_data_update)(int, int, int, int, unsigned char*,
unsigned char*, unsigned char**);
static __d_ec_encode_data_update d_ec_encode_data_update;
#endif
static const char* load_functions(void* libec) {
#ifdef UNIX
EC_LOAD_DYNAMIC_SYMBOL(d_gf_mul, libec, "gf_mul");
EC_LOAD_DYNAMIC_SYMBOL(d_gf_inv, libec, "gf_inv");
EC_LOAD_DYNAMIC_SYMBOL(d_gf_gen_rs_matrix, libec, "gf_gen_rs_matrix");
EC_LOAD_DYNAMIC_SYMBOL(d_gf_gen_cauchy_matrix, libec, "gf_gen_cauchy1_matrix");
EC_LOAD_DYNAMIC_SYMBOL(d_gf_invert_matrix, libec, "gf_invert_matrix");
EC_LOAD_DYNAMIC_SYMBOL(d_gf_vect_mul, libec, "gf_vect_mul");
EC_LOAD_DYNAMIC_SYMBOL(d_ec_init_tables, libec, "ec_init_tables");
EC_LOAD_DYNAMIC_SYMBOL(d_ec_encode_data, libec, "ec_encode_data");
EC_LOAD_DYNAMIC_SYMBOL(d_ec_encode_data_update, libec, "ec_encode_data_update");
#endif
#ifdef WINDOWS
EC_LOAD_DYNAMIC_SYMBOL(__d_gf_mul, d_gf_mul, libec, "gf_mul");
EC_LOAD_DYNAMIC_SYMBOL(__d_gf_inv, d_gf_inv, libec, "gf_inv");
EC_LOAD_DYNAMIC_SYMBOL(__d_gf_gen_rs_matrix, d_gf_gen_rs_matrix, libec, "gf_gen_rs_matrix");
EC_LOAD_DYNAMIC_SYMBOL(__d_gf_gen_cauchy_matrix, d_gf_gen_cauchy_matrix, libec, "gf_gen_cauchy1_matrix");
EC_LOAD_DYNAMIC_SYMBOL(__d_gf_invert_matrix, d_gf_invert_matrix, libec, "gf_invert_matrix");
EC_LOAD_DYNAMIC_SYMBOL(__d_gf_vect_mul, d_gf_vect_mul, libec, "gf_vect_mul");
EC_LOAD_DYNAMIC_SYMBOL(__d_ec_init_tables, d_ec_init_tables, libec, "ec_init_tables");
EC_LOAD_DYNAMIC_SYMBOL(__d_ec_encode_data, d_ec_encode_data, libec, "ec_encode_data");
EC_LOAD_DYNAMIC_SYMBOL(__d_ec_encode_data_update, d_ec_encode_data_update, libec, "ec_encode_data_update");
#endif
return NULL;
}
void load_erasurecode_lib(char* err, size_t err_len) {
const char* errMsg;
err[0] = '\0';
if (libec != NULL) {
return;
}
// Load Intel ISA-L
#ifdef UNIX
libec = dlopen(HADOOP_ISAL_LIBRARY, RTLD_LAZY | RTLD_GLOBAL);
if (libec == NULL) {
snprintf(err, err_len, "Failed to load %s (%s)",
HADOOP_ISAL_LIBRARY, dlerror());
return;
}
// Clear any existing error
dlerror();
#endif
#ifdef WINDOWS
libec = LoadLibrary(HADOOP_ISAL_LIBRARY);
if (libec == NULL) {
snprintf(err, err_len, "Failed to load %s", HADOOP_ISAL_LIBRARY);
return;
}
#endif
errMsg = load_functions(libec);
if (errMsg != NULL) {
snprintf(err, err_len, "Loading functions from ISA-L failed: %s", errMsg);
}
}
int build_support_erasurecode() {
#ifdef HADOOP_ISAL_LIBRARY
return 1;
#else
return 0;
#endif
}
const char* get_library_name() {
#ifdef UNIX
Dl_info dl_info;
if (d_ec_encode_data == NULL) {
return HADOOP_ISAL_LIBRARY;
}
if(dladdr(d_ec_encode_data, &dl_info)) {
return dl_info.dli_fname;
}
#else
LPTSTR filename = NULL;
if (libec == NULL) {
return HADOOP_ISAL_LIBRARY;
}
if (GetModuleFileName(libec, filename, 256) > 0) {
return filename;
}
#endif
return NULL;
}
unsigned char h_gf_mul(unsigned char a, unsigned char b) {
return d_gf_mul(a, b);
}
unsigned char h_gf_inv(unsigned char a) {
return d_gf_inv(a);
}
void h_gf_gen_rs_matrix(unsigned char *a, int m, int k) {
d_gf_gen_rs_matrix(a, m, k);
}
void h_gf_gen_cauchy_matrix(unsigned char *a, int m, int k) {
d_gf_gen_cauchy_matrix(a, m, k);
}
int h_gf_invert_matrix(unsigned char *in, unsigned char *out, const int n) {
return d_gf_invert_matrix(in, out, n);
}
int h_gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest) {
return d_gf_vect_mul(len, gftbl, src, dest);
}
void h_ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls) {
d_ec_init_tables(k, rows, a, gftbls);
}
void h_ec_encode_data(int len, int k, int rows, unsigned char *gftbls,
unsigned char **data, unsigned char **coding) {
d_ec_encode_data(len, k, rows, gftbls, data, coding);
}
void h_ec_encode_data_update(int len, int k, int rows, int vec_i,
unsigned char *gftbls, unsigned char *data, unsigned char **coding) {
d_ec_encode_data_update(len, k, rows, vec_i, gftbls, data, coding);
}

View File

@ -0,0 +1,125 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _ERASURE_CODE_H_
#define _ERASURE_CODE_H_
#include <stddef.h>
/**
* Interface to functions supporting erasure code encode and decode.
*
* This file defines the interface to optimized functions used in erasure
* codes. Encode and decode of erasures in GF(2^8) are made by calculating the
* dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a
* set of coefficients. Values for the coefficients are determined by the type
* of erasure code. Using a general dot product means that any sequence of
* coefficients may be used including erasure codes based on random
* coefficients.
* Multiple versions of dot product are supplied to calculate 1-6 output
* vectors in one pass.
* Base GF multiply and divide functions can be sped up by defining
* GF_LARGE_TABLES at the expense of memory size.
*
*/
/**
* Return 0 if not support, 1 otherwise.
*/
int build_support_erasurecode();
/**
* Get the library name possibly of full path.
*/
const char* get_library_name();
/**
* Initialize and load erasure code library, returning error message if any.
*
* @param err The err message buffer.
* @param err_len The length of the message buffer.
*/
void load_erasurecode_lib(char* err, size_t err_len);
/**
* Initialize tables for fast Erasure Code encode and decode.
*
* Generates the expanded tables needed for fast encode or decode for erasure
* codes on blocks of data. 32bytes is generated for each input coefficient.
*
* @param k The number of vector sources or rows in the generator matrix
* for coding.
* @param rows The number of output vectors to concurrently encode/decode.
* @param a Pointer to sets of arrays of input coefficients used to encode
* or decode data.
* @param gftbls Pointer to start of space for concatenated output tables
* generated from input coefficients. Must be of size 32*k*rows.
* @returns none
*/
void h_ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls);
/**
* Generate or decode erasure codes on blocks of data, runs appropriate version.
*
* Given a list of source data blocks, generate one or multiple blocks of
* encoded data as specified by a matrix of GF(2^8) coefficients. When given a
* suitable set of coefficients, this function will perform the fast generation
* or decoding of Reed-Solomon type erasure codes.
*
* This function determines what instruction sets are enabled and
* selects the appropriate version at runtime.
*
* @param len Length of each block of data (vector) of source or dest data.
* @param k The number of vector sources or rows in the generator matrix
* for coding.
* @param rows The number of output vectors to concurrently encode/decode.
* @param gftbls Pointer to array of input tables generated from coding
* coefficients in ec_init_tables(). Must be of size 32*k*rows
* @param data Array of pointers to source input buffers.
* @param coding Array of pointers to coded output buffers.
* @returns none
*/
void h_ec_encode_data(int len, int k, int rows, unsigned char *gftbls,
unsigned char **data, unsigned char **coding);
/**
* @brief Generate update for encode or decode of erasure codes from single
* source, runs appropriate version.
*
* Given one source data block, update one or multiple blocks of encoded data as
* specified by a matrix of GF(2^8) coefficients. When given a suitable set of
* coefficients, this function will perform the fast generation or decoding of
* Reed-Solomon type erasure codes from one input source at a time.
*
* This function determines what instruction sets are enabled and selects the
* appropriate version at runtime.
*
* @param len Length of each block of data (vector) of source or dest data.
* @param k The number of vector sources or rows in the generator matrix
* for coding.
* @param rows The number of output vectors to concurrently encode/decode.
* @param vec_i The vector index corresponding to the single input source.
* @param gftbls Pointer to array of input tables generated from coding
* coefficients in ec_init_tables(). Must be of size 32*k*rows
* @param data Pointer to single input source used to update output parity.
* @param coding Array of pointers to coded output buffers.
* @returns none
*/
void h_ec_encode_data_update(int len, int k, int rows, int vec_i,
unsigned char *gftbls, unsigned char *data, unsigned char **coding);
#endif //_ERASURE_CODE_H_

View File

@ -0,0 +1,111 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _GF_UTIL_H
#define _GF_UTIL_H
/**
* gf_util.h
* Interface to functions for vector (block) multiplication in GF(2^8).
*
* This file defines the interface to routines used in fast RAID rebuild and
* erasure codes.
*/
/**
* Single element GF(2^8) multiply.
*
* @param a Multiplicand a
* @param b Multiplicand b
* @returns Product of a and b in GF(2^8)
*/
unsigned char h_gf_mul(unsigned char a, unsigned char b);
/**
* Single element GF(2^8) inverse.
*
* @param a Input element
* @returns Field element b such that a x b = {1}
*/
unsigned char h_gf_inv(unsigned char a);
/**
* Generate a matrix of coefficients to be used for encoding.
*
* Vandermonde matrix example of encoding coefficients where high portion of
* matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)}
* i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in
* erasure encoding but does not guarantee invertable for every sub matrix. For
* large k it is possible to find cases where the decode matrix chosen from
* sources and parity not in erasure are not invertable. Users may want to
* adjust for k > 5.
*
* @param a [mxk] array to hold coefficients
* @param m number of rows in matrix corresponding to srcs + parity.
* @param k number of columns in matrix corresponding to srcs.
* @returns none
*/
void h_gf_gen_rs_matrix(unsigned char *a, int m, int k);
/**
* Generate a Cauchy matrix of coefficients to be used for encoding.
*
* Cauchy matrix example of encoding coefficients where high portion of matrix
* is identity matrix I and lower portion is constructed as 1/(i + j) | i != j,
* i:{0,k-1} j:{k,m-1}. Any sub-matrix of a Cauchy matrix should be invertable.
*
* @param a [mxk] array to hold coefficients
* @param m number of rows in matrix corresponding to srcs + parity.
* @param k number of columns in matrix corresponding to srcs.
* @returns none
*/
void h_gf_gen_cauchy_matrix(unsigned char *a, int m, int k);
/**
* Invert a matrix in GF(2^8)
*
* @param in input matrix
* @param out output matrix such that [in] x [out] = [I] - identity matrix
* @param n size of matrix [nxn]
* @returns 0 successful, other fail on singular input matrix
*/
int h_gf_invert_matrix(unsigned char *in, unsigned char *out, const int n);
/**
* GF(2^8) vector multiply by constant, runs appropriate version.
*
* Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
* is a single field element in GF(2^8). Can be used for RAID6 rebuild
* and partial write functions. Function requires pre-calculation of a
* 32-element constant array based on constant C. gftbl(C) = {C{00},
* C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }.
* Len and src must be aligned to 32B.
*
* This function determines what instruction sets are enabled
* and selects the appropriate version at runtime.
*
* @param len Length of vector in bytes. Must be aligned to 32B.
* @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
* @param src Pointer to src data array. Must be aligned to 32B.
* @param dest Pointer to destination data array. Must be aligned to 32B.
* @returns 0 pass, other fail
*/
int h_gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest);
#endif //_GF_UTIL_H

View File

@ -49,6 +49,16 @@ JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_util_NativeCodeLoader_buildSup
#endif #endif
} }
JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_util_NativeCodeLoader_buildSupportsIsal
(JNIEnv *env, jclass clazz)
{
#ifdef HADOOP_ISAL_LIBRARY
return JNI_TRUE;
#else
return JNI_FALSE;
#endif
}
JNIEXPORT jstring JNICALL Java_org_apache_hadoop_util_NativeCodeLoader_getLibraryName JNIEXPORT jstring JNICALL Java_org_apache_hadoop_util_NativeCodeLoader_getLibraryName
(JNIEnv *env, jclass clazz) (JNIEnv *env, jclass clazz)
{ {

View File

@ -0,0 +1,310 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This is a lightweight version of the same file in Intel ISA-L library to test
* and verify the basic functions of ISA-L integration. Note it's not serving as
* a complete ISA-L library test nor as any sample to write an erasure coder
* using the library. A sample is to be written and provided separately.
*/
#include "org_apache_hadoop.h"
#include "erasure_code.h"
#include "gf_util.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define TEST_LEN 8192
#define TEST_SOURCES 127
#define MMAX TEST_SOURCES
#define KMAX TEST_SOURCES
#define TEST_SEED 11
static void dump(unsigned char *buf, int len)
{
int i;
for (i = 0; i < len;) {
printf(" %2x", 0xff & buf[i++]);
if (i % 32 == 0)
printf("\n");
}
printf("\n");
}
static void dump_matrix(unsigned char **s, int k, int m)
{
int i, j;
for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) {
printf(" %2x", s[i][j]);
}
printf("\n");
}
printf("\n");
}
static void dump_u8xu8(unsigned char *s, int k, int m)
{
int i, j;
for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) {
printf(" %2x", 0xff & s[j + (i * m)]);
}
printf("\n");
}
printf("\n");
}
// Generate Random errors
static void gen_err_list(unsigned char *src_err_list,
unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m)
{
int i, err;
int nerrs = 0, nsrcerrs = 0;
for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) {
err = 1 & rand();
src_in_err[i] = err;
if (err) {
src_err_list[nerrs++] = i;
if (i < k) {
nsrcerrs++;
}
}
}
if (nerrs == 0) { // should have at least one error
while ((err = (rand() % KMAX)) >= m) ;
src_err_list[nerrs++] = err;
src_in_err[err] = 1;
if (err < k)
nsrcerrs = 1;
}
*pnerrs = nerrs;
*pnsrcerrs = nsrcerrs;
return;
}
#define NO_INVERT_MATRIX -2
// Generate decode matrix from encode matrix
static int gf_gen_decode_matrix(unsigned char *encode_matrix,
unsigned char *decode_matrix,
unsigned char *invert_matrix,
unsigned int *decode_index,
unsigned char *src_err_list,
unsigned char *src_in_err,
int nerrs, int nsrcerrs, int k, int m)
{
int i, j, p;
int r;
unsigned char *backup, *b, s;
int incr = 0;
b = malloc(MMAX * KMAX);
backup = malloc(MMAX * KMAX);
if (b == NULL || backup == NULL) {
printf("Test failure! Error with malloc\n");
free(b);
free(backup);
return -1;
}
// Construct matrix b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r])
r++;
for (j = 0; j < k; j++) {
b[k * i + j] = encode_matrix[k * r + j];
backup[k * i + j] = encode_matrix[k * r + j];
}
decode_index[i] = r;
}
incr = 0;
while (h_gf_invert_matrix(b, invert_matrix, k) < 0) {
if (nerrs == (m - k)) {
free(b);
free(backup);
printf("BAD MATRIX\n");
return NO_INVERT_MATRIX;
}
incr++;
memcpy(b, backup, MMAX * KMAX);
for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) {
if (src_err_list[i] == (decode_index[k - 1] + incr)) {
// skip the erased parity line
incr++;
continue;
}
}
if (decode_index[k - 1] + incr >= m) {
free(b);
free(backup);
printf("BAD MATRIX\n");
return NO_INVERT_MATRIX;
}
decode_index[k - 1] += incr;
for (j = 0; j < k; j++)
b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j];
};
for (i = 0; i < nsrcerrs; i++) {
for (j = 0; j < k; j++) {
decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j];
}
}
/* src_err_list from encode_matrix * invert of b for parity decoding */
for (p = nsrcerrs; p < nerrs; p++) {
for (i = 0; i < k; i++) {
s = 0;
for (j = 0; j < k; j++)
s ^= h_gf_mul(invert_matrix[j * k + i],
encode_matrix[k * src_err_list[p] + j]);
decode_matrix[k * p + i] = s;
}
}
free(b);
free(backup);
return 0;
}
int main(int argc, char *argv[])
{
char err[256];
size_t err_len = sizeof(err);
int re, i, j, p, m, k;
int nerrs, nsrcerrs;
unsigned int decode_index[MMAX];
unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls;
unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES];
unsigned char *recov[TEST_SOURCES];
if (0 == build_support_erasurecode()) {
printf("The native library isn't available, skipping this test\n");
return 0; // Normal, not an error
}
load_erasurecode_lib(err, err_len);
if (strlen(err) > 0) {
printf("Loading erasurecode library failed: %s\n", err);
return -1;
}
printf("Performing erasure code test\n");
srand(TEST_SEED);
// Allocate the arrays
for (i = 0; i < TEST_SOURCES; i++) {
buffs[i] = malloc(TEST_LEN);
}
for (i = 0; i < TEST_SOURCES; i++) {
temp_buffs[i] = malloc(TEST_LEN);
}
// Test erasure code by encode and recovery
encode_matrix = malloc(MMAX * KMAX);
decode_matrix = malloc(MMAX * KMAX);
invert_matrix = malloc(MMAX * KMAX);
g_tbls = malloc(KMAX * TEST_SOURCES * 32);
if (encode_matrix == NULL || decode_matrix == NULL
|| invert_matrix == NULL || g_tbls == NULL) {
snprintf(err, err_len, "%s", "allocating test matrix buffers error");
return -1;
}
m = 9;
k = 5;
if (m > MMAX || k > KMAX)
return -1;
// Make random data
for (i = 0; i < k; i++)
for (j = 0; j < TEST_LEN; j++)
buffs[i][j] = rand();
// The matrix generated by gf_gen_cauchy1_matrix
// is always invertable.
h_gf_gen_cauchy_matrix(encode_matrix, m, k);
// Generate g_tbls from encode matrix encode_matrix
h_ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
// Perform matrix dot_prod for EC encoding
// using g_tbls from encode matrix encode_matrix
h_ec_encode_data(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
// Choose random buffers to be in erasure
memset(src_in_err, 0, TEST_SOURCES);
gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
// Generate decode matrix
re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
invert_matrix, decode_index, src_err_list, src_in_err,
nerrs, nsrcerrs, k, m);
if (re != 0) {
snprintf(err, err_len, "%s", "gf_gen_decode_matrix failed");
return -1;
}
// Pack recovery array as list of valid sources
// Its order must be the same as the order
// to generate matrix b in gf_gen_decode_matrix
for (i = 0; i < k; i++) {
recov[i] = buffs[decode_index[i]];
}
// Recover data
h_ec_init_tables(k, nerrs, decode_matrix, g_tbls);
h_ec_encode_data(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
for (i = 0; i < nerrs; i++) {
if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
snprintf(err, err_len, "%s", "Error recovery failed");
printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
printf(" - erase list = ");
for (j = 0; j < nerrs; j++) {
printf(" %d", src_err_list[j]);
}
printf(" - Index = ");
for (p = 0; p < k; p++) {
printf(" %d", decode_index[p]);
}
printf("\nencode_matrix:\n");
dump_u8xu8((unsigned char *) encode_matrix, m, k);
printf("inv b:\n");
dump_u8xu8((unsigned char *) invert_matrix, k, k);
printf("\ndecode_matrix:\n");
dump_u8xu8((unsigned char *) decode_matrix, m, k);
printf("recov %d:", src_err_list[i]);
dump(temp_buffs[k + i], 25);
printf("orig :");
dump(buffs[src_err_list[i]], 25);
return -1;
}
}
printf("done EC tests: Pass\n");
return 0;
}

View File

@ -41,6 +41,7 @@
<hadoop.component>UNDEF</hadoop.component> <hadoop.component>UNDEF</hadoop.component>
<bundle.snappy>false</bundle.snappy> <bundle.snappy>false</bundle.snappy>
<bundle.snappy.in.bin>false</bundle.snappy.in.bin> <bundle.snappy.in.bin>false</bundle.snappy.in.bin>
<bundle.isal>true</bundle.isal>
<bundle.openssl>false</bundle.openssl> <bundle.openssl>false</bundle.openssl>
<bundle.openssl.in.bin>false</bundle.openssl.in.bin> <bundle.openssl.in.bin>false</bundle.openssl.in.bin>
</properties> </properties>
@ -332,14 +333,22 @@
mkdir -p $${TARGET_DIR} mkdir -p $${TARGET_DIR}
cd $${LIB_DIR} cd $${LIB_DIR}
$$TAR lib* | (cd $${TARGET_DIR}/; $$UNTAR) $$TAR lib* | (cd $${TARGET_DIR}/; $$UNTAR)
if [ "${bundle.snappy}" = "true" ] ; then if [ "X${bundle.snappy}" = "Xtrue" ] ; then
cd "${snappy.lib}" cd "${snappy.lib}"
$$TAR *snappy* | (cd $${TARGET_DIR}/; $$UNTAR) $$TAR *snappy* | (cd $${TARGET_DIR}/; $$UNTAR)
fi fi
if [ "${bundle.openssl}" = "true" ] ; then if [ "X${bundle.openssl}" = "Xtrue" ] ; then
cd "${openssl.lib}" cd "${openssl.lib}"
$$TAR *crypto* | (cd $${TARGET_DIR}/; $$UNTAR) $$TAR *crypto* | (cd $${TARGET_DIR}/; $$UNTAR)
fi fi
if [ "X${bundle.isal}" = "Xtrue" ] ; then
if [ "X${isal.lib}" != "X" ]; then
cd "${isal.lib}"
$$TAR *isa* | (cd $${TARGET_DIR}/; $$UNTAR)
else
echo "The required option isal.lib isn't given, bundling ISA-L skipped"
fi
fi
fi fi
BIN_DIR="${BUILD_DIR}/bin" BIN_DIR="${BUILD_DIR}/bin"
if [ -d $${BIN_DIR} ] ; then if [ -d $${BIN_DIR} ] ; then

View File

@ -1245,6 +1245,7 @@
<!-- Specify where to look for the native DLL on Windows --> <!-- Specify where to look for the native DLL on Windows -->
<PATH>${env.PATH};${hadoop.common.build.dir}/bin;${snappy.lib}</PATH> <PATH>${env.PATH};${hadoop.common.build.dir}/bin;${snappy.lib}</PATH>
<PATH>${env.PATH};${hadoop.common.build.dir}/bin;${openssl.lib}</PATH> <PATH>${env.PATH};${hadoop.common.build.dir}/bin;${openssl.lib}</PATH>
<PATH>${env.PATH};${hadoop.common.build.dir}/bin;${isal.lib}</PATH>
</environmentVariables> </environmentVariables>
</configuration> </configuration>
</plugin> </plugin>