HADOOP-9802. Support Snappy codec on Windows. Contributed by Chris Nauroth.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1512872 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris Nauroth 2013-08-11 05:51:34 +00:00
parent 5726834cae
commit 1fafb1e1e0
11 changed files with 195 additions and 41 deletions

View File

@ -340,6 +340,8 @@ Release 2.1.1-beta - UNRELEASED
HADOOP-8814. Replace string equals "" by String#isEmpty().
(Brandon Li via suresh)
HADOOP-9802. Support Snappy codec on Windows. (cnauroth)
OPTIMIZATIONS
BUG FIXES

View File

@ -586,6 +586,13 @@
<family>Windows</family>
</os>
</activation>
<properties>
<snappy.prefix></snappy.prefix>
<snappy.lib></snappy.lib>
<snappy.include></snappy.include>
<require.snappy>false</require.snappy>
<bundle.snappy.in.bin>true</bundle.snappy.in.bin>
</properties>
<build>
<plugins>
<plugin>
@ -670,6 +677,10 @@
<argument>/nologo</argument>
<argument>/p:Configuration=Release</argument>
<argument>/p:OutDir=${project.build.directory}/bin/</argument>
<argument>/p:CustomSnappyPrefix=${snappy.prefix}</argument>
<argument>/p:CustomSnappyLib=${snappy.lib}</argument>
<argument>/p:CustomSnappyInclude=${snappy.include}</argument>
<argument>/p:RequireSnappy=${require.snappy}</argument>
</arguments>
</configuration>
</execution>

View File

@ -17,7 +17,7 @@
limitations under the License.
-->
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Project DefaultTargets="CheckRequireSnappy;Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
@ -49,6 +49,21 @@
<IntDir>..\..\..\target\native\$(Configuration)\</IntDir>
<TargetName>hadoop</TargetName>
</PropertyGroup>
<PropertyGroup>
<SnappyLib Condition="Exists('$(CustomSnappyPrefix)\snappy.dll')">$(CustomSnappyPrefix)</SnappyLib>
<SnappyLib Condition="Exists('$(CustomSnappyPrefix)\lib\snappy.dll') And '$(SnappyLib)' == ''">$(CustomSnappyPrefix)\lib</SnappyLib>
<SnappyLib Condition="Exists('$(CustomSnappyLib)') And '$(SnappyLib)' == ''">$(CustomSnappyLib)</SnappyLib>
<SnappyInclude Condition="Exists('$(CustomSnappyPrefix)\snappy.h')">$(CustomSnappyPrefix)</SnappyInclude>
<SnappyInclude Condition="Exists('$(CustomSnappyPrefix)\include\snappy.h') And '$(SnappyInclude)' == ''">$(CustomSnappyPrefix)\include</SnappyInclude>
<SnappyInclude Condition="Exists('$(CustomSnappyInclude)') And '$(SnappyInclude)' == ''">$(CustomSnappyInclude)</SnappyInclude>
<SnappyEnabled Condition="'$(SnappyLib)' != '' And '$(SnappyInclude)' != ''">true</SnappyEnabled>
<IncludePath Condition="'$(SnappyEnabled)' == 'true'">$(SnappyInclude);$(IncludePath)</IncludePath>
</PropertyGroup>
<Target Name="CheckRequireSnappy">
<Error
Text="Required snappy library could not be found. SnappyLibrary=$(SnappyLibrary), SnappyInclude=$(SnappyInclude), CustomSnappyLib=$(CustomSnappyLib), CustomSnappyInclude=$(CustomSnappyInclude), CustomSnappyPrefix=$(CustomSnappyPrefix)"
Condition="'$(RequireSnappy)' == 'true' And '$(SnappyEnabled)' != 'true'" />
</Target>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
@ -71,6 +86,12 @@
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="src\org\apache\hadoop\io\compress\snappy\SnappyCompressor.c" Condition="'$(SnappyEnabled)' == 'true'">
<AdditionalOptions>/D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\"</AdditionalOptions>
</ClCompile>
<ClCompile Include="src\org\apache\hadoop\io\compress\snappy\SnappyDecompressor.c" Condition="'$(SnappyEnabled)' == 'true'">
<AdditionalOptions>/D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\"</AdditionalOptions>
</ClCompile>
<ClCompile Include="src\org\apache\hadoop\io\compress\lz4\lz4.c" />
<ClCompile Include="src\org\apache\hadoop\io\compress\lz4\lz4hc.c" />
<ClCompile Include="src\org\apache\hadoop\io\compress\lz4\Lz4Compressor.c" />
@ -79,12 +100,15 @@
<ClCompile Include="src\org\apache\hadoop\io\nativeio\NativeIO.c" />
<ClCompile Include="src\org\apache\hadoop\security\JniBasedUnixGroupsMappingWin.c" />
<ClCompile Include="src\org\apache\hadoop\util\bulk_crc32.c" />
<ClCompile Include="src\org\apache\hadoop\util\NativeCodeLoader.c" />
<ClCompile Include="src\org\apache\hadoop\util\NativeCodeLoader.c">
<AdditionalOptions Condition="'$(SnappyEnabled)' == 'true'">/D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\"</AdditionalOptions>
</ClCompile>
<ClCompile Include="src\org\apache\hadoop\util\NativeCrc32.c" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\src\org\apache\hadoop\util\crc32c_tables.h" />
<ClInclude Include="..\src\org\apache\hadoop\util\crc32_zlib_polynomial_tables.h" />
<ClInclude Include="src\org\apache\hadoop\io\compress\snappy\org_apache_hadoop_io_compress_snappy.h" />
<ClInclude Include="src\org\apache\hadoop\io\nativeio\file_descriptor.h" />
<ClInclude Include="src\org\apache\hadoop\util\bulk_crc32.h" />
<ClInclude Include="src\org\apache\hadoop\util\crc32c_tables.h" />

View File

@ -30,6 +30,10 @@
#include "config.h"
#endif // UNIX
#ifdef WINDOWS
#include "winutils.h"
#endif
#include "org_apache_hadoop_io_compress_snappy_SnappyCompressor.h"
#define JINT_MAX 0x7fffffff
@ -40,11 +44,18 @@ static jfieldID SnappyCompressor_uncompressedDirectBufLen;
static jfieldID SnappyCompressor_compressedDirectBuf;
static jfieldID SnappyCompressor_directBufferSize;
#ifdef UNIX
static snappy_status (*dlsym_snappy_compress)(const char*, size_t, char*, size_t*);
#endif
#ifdef WINDOWS
typedef snappy_status (__cdecl *__dlsym_snappy_compress)(const char*, size_t, char*, size_t*);
static __dlsym_snappy_compress dlsym_snappy_compress;
#endif
JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompressor_initIDs
(JNIEnv *env, jclass clazz){
#ifdef UNIX
// Load libsnappy.so
void *libsnappy = dlopen(HADOOP_SNAPPY_LIBRARY, RTLD_LAZY | RTLD_GLOBAL);
if (!libsnappy) {
@ -53,10 +64,25 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompresso
THROW(env, "java/lang/UnsatisfiedLinkError", msg);
return;
}
#endif
#ifdef WINDOWS
HMODULE libsnappy = LoadLibrary(HADOOP_SNAPPY_LIBRARY);
if (!libsnappy) {
THROW(env, "java/lang/UnsatisfiedLinkError", "Cannot load snappy.dll");
return;
}
#endif
// Locate the requisite symbols from libsnappy.so
#ifdef UNIX
dlerror(); // Clear any existing error
LOAD_DYNAMIC_SYMBOL(dlsym_snappy_compress, env, libsnappy, "snappy_compress");
#endif
#ifdef WINDOWS
LOAD_DYNAMIC_SYMBOL(__dlsym_snappy_compress, dlsym_snappy_compress, env, libsnappy, "snappy_compress");
#endif
SnappyCompressor_clazz = (*env)->GetStaticFieldID(env, clazz, "clazz",
"Ljava/lang/Class;");
@ -74,6 +100,9 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompresso
JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompressor_compressBytesDirect
(JNIEnv *env, jobject thisj){
const char* uncompressed_bytes;
char* compressed_bytes;
snappy_status ret;
// Get members of SnappyCompressor
jobject clazz = (*env)->GetStaticObjectField(env, thisj, SnappyCompressor_clazz);
jobject uncompressed_direct_buf = (*env)->GetObjectField(env, thisj, SnappyCompressor_uncompressedDirectBuf);
@ -84,7 +113,7 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompresso
// Get the input direct buffer
LOCK_CLASS(env, clazz, "SnappyCompressor");
const char* uncompressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf);
uncompressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf);
UNLOCK_CLASS(env, clazz, "SnappyCompressor");
if (uncompressed_bytes == 0) {
@ -93,7 +122,7 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompresso
// Get the output direct buffer
LOCK_CLASS(env, clazz, "SnappyCompressor");
char* compressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, compressed_direct_buf);
compressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, compressed_direct_buf);
UNLOCK_CLASS(env, clazz, "SnappyCompressor");
if (compressed_bytes == 0) {
@ -102,8 +131,8 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompresso
/* size_t should always be 4 bytes or larger. */
buf_len = (size_t)compressed_direct_buf_len;
snappy_status ret = dlsym_snappy_compress(uncompressed_bytes,
uncompressed_direct_buf_len, compressed_bytes, &buf_len);
ret = dlsym_snappy_compress(uncompressed_bytes, uncompressed_direct_buf_len,
compressed_bytes, &buf_len);
if (ret != SNAPPY_OK){
THROW(env, "java/lang/InternalError", "Could not compress data. Buffer length is too small.");
return 0;
@ -128,8 +157,18 @@ Java_org_apache_hadoop_io_compress_snappy_SnappyCompressor_getLibraryName(JNIEnv
return (*env)->NewStringUTF(env, dl_info.dli_fname);
}
}
#endif
return (*env)->NewStringUTF(env, HADOOP_SNAPPY_LIBRARY);
}
return (*env)->NewStringUTF(env, HADOOP_SNAPPY_LIBRARY);
#endif
#ifdef WINDOWS
LPWSTR filename = NULL;
GetLibraryName(dlsym_snappy_compress, &filename);
if (filename != NULL) {
return (*env)->NewString(env, filename, (jsize) wcslen(filename));
} else {
return (*env)->NewStringUTF(env, "Unavailable");
}
#endif
}
#endif //define HADOOP_SNAPPY_LIBRARY

View File

@ -37,12 +37,20 @@ static jfieldID SnappyDecompressor_compressedDirectBufLen;
static jfieldID SnappyDecompressor_uncompressedDirectBuf;
static jfieldID SnappyDecompressor_directBufferSize;
#ifdef UNIX
static snappy_status (*dlsym_snappy_uncompress)(const char*, size_t, char*, size_t*);
#endif
#ifdef WINDOWS
typedef snappy_status (__cdecl *__dlsym_snappy_uncompress)(const char*, size_t, char*, size_t*);
static __dlsym_snappy_uncompress dlsym_snappy_uncompress;
#endif
JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompressor_initIDs
(JNIEnv *env, jclass clazz){
// Load libsnappy.so
#ifdef UNIX
void *libsnappy = dlopen(HADOOP_SNAPPY_LIBRARY, RTLD_LAZY | RTLD_GLOBAL);
if (!libsnappy) {
char* msg = (char*)malloc(1000);
@ -50,11 +58,27 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompres
THROW(env, "java/lang/UnsatisfiedLinkError", msg);
return;
}
#endif
#ifdef WINDOWS
HMODULE libsnappy = LoadLibrary(HADOOP_SNAPPY_LIBRARY);
if (!libsnappy) {
THROW(env, "java/lang/UnsatisfiedLinkError", "Cannot load snappy.dll");
return;
}
#endif
// Locate the requisite symbols from libsnappy.so
#ifdef UNIX
dlerror(); // Clear any existing error
LOAD_DYNAMIC_SYMBOL(dlsym_snappy_uncompress, env, libsnappy, "snappy_uncompress");
#endif
#ifdef WINDOWS
LOAD_DYNAMIC_SYMBOL(__dlsym_snappy_uncompress, dlsym_snappy_uncompress, env, libsnappy, "snappy_uncompress");
#endif
SnappyDecompressor_clazz = (*env)->GetStaticFieldID(env, clazz, "clazz",
"Ljava/lang/Class;");
SnappyDecompressor_compressedDirectBuf = (*env)->GetFieldID(env,clazz,
@ -71,6 +95,9 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompres
JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompressor_decompressBytesDirect
(JNIEnv *env, jobject thisj){
const char* compressed_bytes = NULL;
char* uncompressed_bytes = NULL;
snappy_status ret;
// Get members of SnappyDecompressor
jobject clazz = (*env)->GetStaticObjectField(env,thisj, SnappyDecompressor_clazz);
jobject compressed_direct_buf = (*env)->GetObjectField(env,thisj, SnappyDecompressor_compressedDirectBuf);
@ -80,7 +107,7 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompres
// Get the input direct buffer
LOCK_CLASS(env, clazz, "SnappyDecompressor");
const char* compressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, compressed_direct_buf);
compressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, compressed_direct_buf);
UNLOCK_CLASS(env, clazz, "SnappyDecompressor");
if (compressed_bytes == 0) {
@ -89,14 +116,15 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompres
// Get the output direct buffer
LOCK_CLASS(env, clazz, "SnappyDecompressor");
char* uncompressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf);
uncompressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf);
UNLOCK_CLASS(env, clazz, "SnappyDecompressor");
if (uncompressed_bytes == 0) {
return (jint)0;
}
snappy_status ret = dlsym_snappy_uncompress(compressed_bytes, compressed_direct_buf_len, uncompressed_bytes, &uncompressed_direct_buf_len);
ret = dlsym_snappy_uncompress(compressed_bytes, compressed_direct_buf_len,
uncompressed_bytes, &uncompressed_direct_buf_len);
if (ret == SNAPPY_BUFFER_TOO_SMALL){
THROW(env, "java/lang/InternalError", "Could not decompress data. Buffer length is too small.");
} else if (ret == SNAPPY_INVALID_INPUT){

View File

@ -21,7 +21,11 @@
#define ORG_APACHE_HADOOP_IO_COMPRESS_SNAPPY_SNAPPY_H
#include "org_apache_hadoop.h"
#ifdef UNIX
#include <dlfcn.h>
#endif
#include <jni.h>
#include <snappy-c.h>
#include <stddef.h>

View File

@ -23,6 +23,10 @@
#include "config.h"
#endif // UNIX
#ifdef WINDOWS
#include "winutils.h"
#endif
#include <jni.h>
JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_util_NativeCodeLoader_buildSupportsSnappy
@ -47,32 +51,16 @@ JNIEXPORT jstring JNICALL Java_org_apache_hadoop_util_NativeCodeLoader_getLibrar
#endif
#ifdef WINDOWS
SIZE_T ret = 0;
DWORD size = MAX_PATH;
LPWSTR filename = NULL;
HMODULE mod = NULL;
DWORD err = ERROR_SUCCESS;
MEMORY_BASIC_INFORMATION mbi;
ret = VirtualQuery(Java_org_apache_hadoop_util_NativeCodeLoader_getLibraryName,
&mbi, sizeof(mbi));
if (ret == 0) goto cleanup;
mod = mbi.AllocationBase;
do {
filename = (LPWSTR) realloc(filename, size * sizeof(WCHAR));
if (filename == NULL) goto cleanup;
GetModuleFileName(mod, filename, size);
size <<= 1;
err = GetLastError();
} while (err == ERROR_INSUFFICIENT_BUFFER);
if (err != ERROR_SUCCESS) goto cleanup;
return (*env)->NewString(env, filename, (jsize) wcslen(filename));
cleanup:
if (filename != NULL) free(filename);
return (*env)->NewStringUTF(env, "Unavailable");
GetLibraryName(Java_org_apache_hadoop_util_NativeCodeLoader_getLibraryName,
&filename);
if (filename != NULL)
{
return (*env)->NewString(env, filename, (jsize) wcslen(filename));
}
else
{
return (*env)->NewStringUTF(env, "Unavailable");
}
#endif
}

View File

@ -153,4 +153,6 @@ DWORD ChangeFileModeByMask(__in LPCWSTR path, INT mode);
DWORD GetLocalGroupsForUser(__in LPCWSTR user,
__out LPLOCALGROUP_USERS_INFO_0 *groups, __out LPDWORD entries);
BOOL EnablePrivilege(__in LPCWSTR privilegeName);
BOOL EnablePrivilege(__in LPCWSTR privilegeName);
void GetLibraryName(__in LPCVOID lpAddress, __out LPWSTR *filename);

View File

@ -1709,3 +1709,51 @@ void ReportErrorCode(LPCWSTR func, DWORD err)
}
if (msg != NULL) LocalFree(msg);
}
//----------------------------------------------------------------------------
// Function: GetLibraryName
//
// Description:
// Given an address, get the file name of the library from which it was loaded.
//
// Returns:
// None
//
// Notes:
// - The function allocates heap memory and points the filename out parameter to
// the newly allocated memory, which will contain the name of the file.
//
// - If there is any failure, then the function frees the heap memory it
// allocated and sets the filename out parameter to NULL.
//
void GetLibraryName(LPCVOID lpAddress, LPWSTR *filename)
{
SIZE_T ret = 0;
DWORD size = MAX_PATH;
HMODULE mod = NULL;
DWORD err = ERROR_SUCCESS;
MEMORY_BASIC_INFORMATION mbi;
ret = VirtualQuery(lpAddress, &mbi, sizeof(mbi));
if (ret == 0) goto cleanup;
mod = mbi.AllocationBase;
do {
*filename = (LPWSTR) realloc(*filename, size * sizeof(WCHAR));
if (*filename == NULL) goto cleanup;
GetModuleFileName(mod, *filename, size);
size <<= 1;
err = GetLastError();
} while (err == ERROR_INSUFFICIENT_BUFFER);
if (err != ERROR_SUCCESS) goto cleanup;
return;
cleanup:
if (*filename != NULL)
{
free(*filename);
*filename = NULL;
}
}

View File

@ -40,6 +40,7 @@
<hadoop.component>UNDEF</hadoop.component>
<bundle.snappy>false</bundle.snappy>
<bundle.snappy.in.bin>false</bundle.snappy.in.bin>
</properties>
<dependencies>
@ -355,6 +356,12 @@
mkdir -p $${TARGET_BIN_DIR}
cd $${BIN_DIR}
$$TAR * | (cd $${TARGET_BIN_DIR}/; $$UNTAR)
if [ "${bundle.snappy.in.bin}" = "true" ] ; then
if [ "${bundle.snappy}" = "true" ] ; then
cd ${snappy.lib}
$$TAR *snappy* | (cd $${TARGET_BIN_DIR}/; $$UNTAR)
fi
fi
fi
</echo>
<exec executable="sh" dir="${project.build.directory}" failonerror="true">

View File

@ -892,6 +892,7 @@
<!-- will use a native entropy provider. This will not really -->
<!-- attempt to open a file at this path. -->
<java.security.egd>file:/dev/urandom</java.security.egd>
<bundle.snappy.in.bin>true</bundle.snappy.in.bin>
</properties>
<build>
<plugins>
@ -901,7 +902,7 @@
<configuration>
<environmentVariables>
<!-- Specify where to look for the native DLL on Windows -->
<PATH>${env.PATH};${hadoop.common.build.dir}/bin</PATH>
<PATH>${env.PATH};${hadoop.common.build.dir}/bin;${snappy.lib}</PATH>
</environmentVariables>
</configuration>
</plugin>