From 5257afb1531be9ee72865d16ce3a383ff203cb5c Mon Sep 17 00:00:00 2001 From: Kihwal Lee Date: Tue, 4 Feb 2020 12:24:12 -0600 Subject: [PATCH] HDFS-12491. Support wildcard in CLASSPATH for libhdfs. Contributed by Muhammad Samir Khan. (cherry picked from commit 10a60fbe20bb08cdd71076ea9bf2ebb3a2f6226e) --- .../src/main/native/libhdfs/jni_helper.c | 277 +++++++++++++++++- .../src/main/native/libhdfs/jni_helper.h | 10 +- .../hadoop-hdfs/src/site/markdown/LibHdfs.md | 3 +- 3 files changed, 287 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.c b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.c index c45d598961f..91a3c1cafc8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.c +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.c @@ -24,6 +24,8 @@ #include "os/mutexes.h" #include "os/thread_local_storage.h" +#include +#include #include #include @@ -357,6 +359,277 @@ done: } +/** + * For the given path, expand it by filling in with all *.jar or *.JAR files, + * separated by PATH_SEPARATOR. Assumes that expanded is big enough to hold the + * string, eg allocated after using this function with expanded=NULL to get the + * right size. Also assumes that the path ends with a "/.". The length of the + * expanded path is returned, which includes space at the end for either a + * PATH_SEPARATOR or null terminator. + */ +static ssize_t wildcard_expandPath(const char* path, char* expanded) +{ + struct dirent* file; + char* dest = expanded; + ssize_t length = 0; + size_t pathLength = strlen(path); + DIR* dir; + + dir = opendir(path); + if (dir != NULL) { + // can open dir so try to match with all *.jar and *.JAR entries + +#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_ + printf("wildcard_expandPath: %s\n", path); +#endif + + errno = 0; + while ((file = readdir(dir)) != NULL) { + const char* filename = file->d_name; + const size_t filenameLength = strlen(filename); + const char* jarExtension; + + // If filename is smaller than 4 characters then it can not possibly + // have extension ".jar" or ".JAR" + if (filenameLength < 4) { + continue; + } + + jarExtension = &filename[filenameLength-4]; + if ((strcmp(jarExtension, ".jar") == 0) || + (strcmp(jarExtension, ".JAR") == 0)) { + + // pathLength includes an extra '.' which we'll use for either + // separator or null termination + length += pathLength + filenameLength; + +#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_ + printf("wildcard_scanPath:\t%s\t:\t%zd\n", filename, length); +#endif + + if (expanded != NULL) { + // pathLength includes an extra '.' + strncpy(dest, path, pathLength-1); + dest += pathLength - 1; + strncpy(dest, filename, filenameLength); + dest += filenameLength; + *dest = PATH_SEPARATOR; + dest++; + +#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_ + printf("wildcard_expandPath:\t%s\t:\t%s\n", + filename, expanded); +#endif + } + } + } + + if (errno != 0) { + fprintf(stderr, "wildcard_expandPath: on readdir %s: %s\n", + path, strerror(errno)); + length = -1; + } + + if (closedir(dir) != 0) { + fprintf(stderr, "wildcard_expandPath: on closedir %s: %s\n", + path, strerror(errno)); + } + } else if ((errno != EACCES) && (errno != ENOENT) && (errno != ENOTDIR)) { + // can not opendir due to an error we can not handle + fprintf(stderr, "wildcard_expandPath: on opendir %s: %s\n", path, + strerror(errno)); + length = -1; + } + + if (length == 0) { + // either we failed to open dir due to EACCESS, ENOENT, or ENOTDIR, or + // we did not find any file that matches *.jar or *.JAR + +#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_ + fprintf(stderr, "wildcard_expandPath: can not expand %.*s*: %s\n", + (int)(pathLength-1), path, strerror(errno)); +#endif + + // in this case, the wildcard expansion is the same as the original + // +1 for PATH_SEPARTOR or null termination + length = pathLength + 1; + if (expanded != NULL) { + // pathLength includes an extra '.' + strncpy(dest, path, pathLength-1); + dest += pathLength-1; + *dest = '*'; // restore wildcard + dest++; + *dest = PATH_SEPARATOR; + dest++; + } + } + + return length; +} + +/** + * Helper to expand classpaths. Returns the total length of the expanded + * classpath. If expandedClasspath is not NULL, then fills that with the + * expanded classpath. It assumes that expandedClasspath is of correct size, eg + * allocated after using this function with expandedClasspath=NULL to get the + * right size. + */ +static ssize_t getClassPath_helper(const char *classpath, char* expandedClasspath) +{ + ssize_t length; + ssize_t retval; + char* expandedCP_curr; + char* cp_token; + char* classpath_dup; + + classpath_dup = strdup(classpath); + if (classpath_dup == NULL) { + fprintf(stderr, "getClassPath_helper: failed strdup: %s\n", + strerror(errno)); + return -1; + } + + length = 0; + + // expandedCP_curr is the current pointer + expandedCP_curr = expandedClasspath; + + cp_token = strtok(classpath_dup, PATH_SEPARATOR_STR); + while (cp_token != NULL) { + size_t tokenlen; + +#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_ + printf("%s\n", cp_token); +#endif + + tokenlen = strlen(cp_token); + // We only expand if token ends with "/*" + if ((tokenlen > 1) && + (cp_token[tokenlen-1] == '*') && (cp_token[tokenlen-2] == '/')) { + // replace the '*' with '.' so that we don't have to allocate another + // string for passing to opendir() in wildcard_expandPath() + cp_token[tokenlen-1] = '.'; + retval = wildcard_expandPath(cp_token, expandedCP_curr); + if (retval < 0) { + free(classpath_dup); + return -1; + } + + length += retval; + if (expandedCP_curr != NULL) { + expandedCP_curr += retval; + } + } else { + // +1 for path separator or null terminator + length += tokenlen + 1; + if (expandedCP_curr != NULL) { + strncpy(expandedCP_curr, cp_token, tokenlen); + expandedCP_curr += tokenlen; + *expandedCP_curr = PATH_SEPARATOR; + expandedCP_curr++; + } + } + + cp_token = strtok(NULL, PATH_SEPARATOR_STR); + } + + // Fix the last ':' and use it to null terminate + if (expandedCP_curr != NULL) { + expandedCP_curr--; + *expandedCP_curr = '\0'; + } + + free(classpath_dup); + return length; +} + +/** + * Gets the classpath. Wild card entries are resolved only if the entry ends + * with "/\*" (backslash to escape commenting) to match against .jar and .JAR. + * All other wild card entries (eg /path/to/dir/\*foo*) are not resolved, + * following JAVA default behavior, see: + * https://docs.oracle.com/javase/8/docs/technotes/tools/unix/classpath.html + */ +static char* getClassPath() +{ + char* classpath; + char* expandedClasspath; + ssize_t length; + ssize_t retval; + + classpath = getenv("CLASSPATH"); + if (classpath == NULL) { + return NULL; + } + + // First, get the total size of the string we will need for the expanded + // classpath + length = getClassPath_helper(classpath, NULL); + if (length < 0) { + return NULL; + } + +#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_ + printf("+++++++++++++++++\n"); +#endif + + // we don't have to do anything if classpath has no valid wildcards + // we get length = 0 when CLASSPATH is set but empty + // if CLASSPATH is not empty, then length includes null terminator + // if length of expansion is same as original, then return a duplicate of + // original since expansion can only be longer + if ((length == 0) || ((length - 1) == strlen(classpath))) { + +#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_ + if ((length == 0) && (strlen(classpath) != 0)) { + fprintf(stderr, "Something went wrong with getting the wildcard \ + expansion length\n" ); + } +#endif + + expandedClasspath = strdup(classpath); + +#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_ + printf("Expanded classpath=%s\n", expandedClasspath); +#endif + + return expandedClasspath; + } + + // Allocte memory for expanded classpath string + expandedClasspath = calloc(length, sizeof(char)); + if (expandedClasspath == NULL) { + fprintf(stderr, "getClassPath: failed calloc: %s\n", strerror(errno)); + return NULL; + } + + // Actual expansion + retval = getClassPath_helper(classpath, expandedClasspath); + if (retval < 0) { + free(expandedClasspath); + return NULL; + } + + // This should not happen, but dotting i's and crossing t's + if (retval != length) { + fprintf(stderr, + "Expected classpath expansion length to be %zu but instead got %zu\n", + length, retval); + free(expandedClasspath); + return NULL; + } + +#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_ + printf("===============\n"); + printf("Allocated %zd for expanding classpath\n", length); + printf("Used %zu for expanding classpath\n", strlen(expandedClasspath) + 1); + printf("Expanded classpath=%s\n", expandedClasspath); +#endif + + return expandedClasspath; +} + + /** * Get the global JNI environemnt. * @@ -393,7 +666,7 @@ static JNIEnv* getGlobalJNIEnv(void) if (noVMs == 0) { //Get the environment variables for initializing the JVM - hadoopClassPath = getenv("CLASSPATH"); + hadoopClassPath = getClassPath(); if (hadoopClassPath == NULL) { fprintf(stderr, "Environment variable CLASSPATH not set!\n"); return NULL; @@ -404,6 +677,8 @@ static JNIEnv* getGlobalJNIEnv(void) snprintf(optHadoopClassPath, optHadoopClassPathLen, "%s%s", hadoopClassPathVMArg, hadoopClassPath); + free(hadoopClassPath); + // Determine the # of LIBHDFS_OPTS args hadoopJvmArgs = getenv("LIBHDFS_OPTS"); if (hadoopJvmArgs != NULL) { diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.h b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.h index e63ce5306cd..f0d06d72fc0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.h +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.h @@ -26,7 +26,15 @@ #include #include -#define PATH_SEPARATOR ':' +#ifdef WIN32 + #define PATH_SEPARATOR ';' + #define PATH_SEPARATOR_STR ";" +#else + #define PATH_SEPARATOR ':' + #define PATH_SEPARATOR_STR ":" +#endif + +// #define _LIBHDFS_JNI_HELPER_DEBUGGING_ON_ /** Denote the method we want to invoke as STATIC or INSTANCE */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/LibHdfs.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/LibHdfs.md index 7049dcbce2a..ab0376efc84 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/LibHdfs.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/LibHdfs.md @@ -61,7 +61,8 @@ See the CMake file for `test_libhdfs_ops.c` in the libhdfs source directory (`ha Common Problems --------------- -The most common problem is the `CLASSPATH` is not set properly when calling a program that uses libhdfs. Make sure you set it to all the Hadoop jars needed to run Hadoop itself as well as the right configuration directory containing `hdfs-site.xml`. It is not valid to use wildcard syntax for specifying multiple jars. It may be useful to run `hadoop classpath --glob` or `hadoop classpath --jar to generate the correct classpath for your deployment. See [Hadoop Commands Reference](../hadoop-common/CommandsManual.html#classpath) for more information on this command. +The most common problem is the `CLASSPATH` is not set properly when calling a program that uses libhdfs. Make sure you set it to all the Hadoop jars needed to run Hadoop itself as well as the right configuration directory containing `hdfs-site.xml`. +Wildcard entries in the `CLASSPATH` are now supported by libhdfs. Thread Safe -----------