HDFS-12491. Support wildcard in CLASSPATH for libhdfs. Contributed by Muhammad Samir Khan.
(cherry picked from commit 10a60fbe20
)
This commit is contained in:
parent
8ea4787f9f
commit
a55a0a1f6d
|
@ -24,6 +24,8 @@
|
|||
#include "os/mutexes.h"
|
||||
#include "os/thread_local_storage.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <dirent.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
|
@ -357,6 +359,277 @@ done:
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* For the given path, expand it by filling in with all *.jar or *.JAR files,
|
||||
* separated by PATH_SEPARATOR. Assumes that expanded is big enough to hold the
|
||||
* string, eg allocated after using this function with expanded=NULL to get the
|
||||
* right size. Also assumes that the path ends with a "/.". The length of the
|
||||
* expanded path is returned, which includes space at the end for either a
|
||||
* PATH_SEPARATOR or null terminator.
|
||||
*/
|
||||
static ssize_t wildcard_expandPath(const char* path, char* expanded)
|
||||
{
|
||||
struct dirent* file;
|
||||
char* dest = expanded;
|
||||
ssize_t length = 0;
|
||||
size_t pathLength = strlen(path);
|
||||
DIR* dir;
|
||||
|
||||
dir = opendir(path);
|
||||
if (dir != NULL) {
|
||||
// can open dir so try to match with all *.jar and *.JAR entries
|
||||
|
||||
#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
|
||||
printf("wildcard_expandPath: %s\n", path);
|
||||
#endif
|
||||
|
||||
errno = 0;
|
||||
while ((file = readdir(dir)) != NULL) {
|
||||
const char* filename = file->d_name;
|
||||
const size_t filenameLength = strlen(filename);
|
||||
const char* jarExtension;
|
||||
|
||||
// If filename is smaller than 4 characters then it can not possibly
|
||||
// have extension ".jar" or ".JAR"
|
||||
if (filenameLength < 4) {
|
||||
continue;
|
||||
}
|
||||
|
||||
jarExtension = &filename[filenameLength-4];
|
||||
if ((strcmp(jarExtension, ".jar") == 0) ||
|
||||
(strcmp(jarExtension, ".JAR") == 0)) {
|
||||
|
||||
// pathLength includes an extra '.' which we'll use for either
|
||||
// separator or null termination
|
||||
length += pathLength + filenameLength;
|
||||
|
||||
#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
|
||||
printf("wildcard_scanPath:\t%s\t:\t%zd\n", filename, length);
|
||||
#endif
|
||||
|
||||
if (expanded != NULL) {
|
||||
// pathLength includes an extra '.'
|
||||
strncpy(dest, path, pathLength-1);
|
||||
dest += pathLength - 1;
|
||||
strncpy(dest, filename, filenameLength);
|
||||
dest += filenameLength;
|
||||
*dest = PATH_SEPARATOR;
|
||||
dest++;
|
||||
|
||||
#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
|
||||
printf("wildcard_expandPath:\t%s\t:\t%s\n",
|
||||
filename, expanded);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (errno != 0) {
|
||||
fprintf(stderr, "wildcard_expandPath: on readdir %s: %s\n",
|
||||
path, strerror(errno));
|
||||
length = -1;
|
||||
}
|
||||
|
||||
if (closedir(dir) != 0) {
|
||||
fprintf(stderr, "wildcard_expandPath: on closedir %s: %s\n",
|
||||
path, strerror(errno));
|
||||
}
|
||||
} else if ((errno != EACCES) && (errno != ENOENT) && (errno != ENOTDIR)) {
|
||||
// can not opendir due to an error we can not handle
|
||||
fprintf(stderr, "wildcard_expandPath: on opendir %s: %s\n", path,
|
||||
strerror(errno));
|
||||
length = -1;
|
||||
}
|
||||
|
||||
if (length == 0) {
|
||||
// either we failed to open dir due to EACCESS, ENOENT, or ENOTDIR, or
|
||||
// we did not find any file that matches *.jar or *.JAR
|
||||
|
||||
#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
|
||||
fprintf(stderr, "wildcard_expandPath: can not expand %.*s*: %s\n",
|
||||
(int)(pathLength-1), path, strerror(errno));
|
||||
#endif
|
||||
|
||||
// in this case, the wildcard expansion is the same as the original
|
||||
// +1 for PATH_SEPARTOR or null termination
|
||||
length = pathLength + 1;
|
||||
if (expanded != NULL) {
|
||||
// pathLength includes an extra '.'
|
||||
strncpy(dest, path, pathLength-1);
|
||||
dest += pathLength-1;
|
||||
*dest = '*'; // restore wildcard
|
||||
dest++;
|
||||
*dest = PATH_SEPARATOR;
|
||||
dest++;
|
||||
}
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to expand classpaths. Returns the total length of the expanded
|
||||
* classpath. If expandedClasspath is not NULL, then fills that with the
|
||||
* expanded classpath. It assumes that expandedClasspath is of correct size, eg
|
||||
* allocated after using this function with expandedClasspath=NULL to get the
|
||||
* right size.
|
||||
*/
|
||||
static ssize_t getClassPath_helper(const char *classpath, char* expandedClasspath)
|
||||
{
|
||||
ssize_t length;
|
||||
ssize_t retval;
|
||||
char* expandedCP_curr;
|
||||
char* cp_token;
|
||||
char* classpath_dup;
|
||||
|
||||
classpath_dup = strdup(classpath);
|
||||
if (classpath_dup == NULL) {
|
||||
fprintf(stderr, "getClassPath_helper: failed strdup: %s\n",
|
||||
strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
length = 0;
|
||||
|
||||
// expandedCP_curr is the current pointer
|
||||
expandedCP_curr = expandedClasspath;
|
||||
|
||||
cp_token = strtok(classpath_dup, PATH_SEPARATOR_STR);
|
||||
while (cp_token != NULL) {
|
||||
size_t tokenlen;
|
||||
|
||||
#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
|
||||
printf("%s\n", cp_token);
|
||||
#endif
|
||||
|
||||
tokenlen = strlen(cp_token);
|
||||
// We only expand if token ends with "/*"
|
||||
if ((tokenlen > 1) &&
|
||||
(cp_token[tokenlen-1] == '*') && (cp_token[tokenlen-2] == '/')) {
|
||||
// replace the '*' with '.' so that we don't have to allocate another
|
||||
// string for passing to opendir() in wildcard_expandPath()
|
||||
cp_token[tokenlen-1] = '.';
|
||||
retval = wildcard_expandPath(cp_token, expandedCP_curr);
|
||||
if (retval < 0) {
|
||||
free(classpath_dup);
|
||||
return -1;
|
||||
}
|
||||
|
||||
length += retval;
|
||||
if (expandedCP_curr != NULL) {
|
||||
expandedCP_curr += retval;
|
||||
}
|
||||
} else {
|
||||
// +1 for path separator or null terminator
|
||||
length += tokenlen + 1;
|
||||
if (expandedCP_curr != NULL) {
|
||||
strncpy(expandedCP_curr, cp_token, tokenlen);
|
||||
expandedCP_curr += tokenlen;
|
||||
*expandedCP_curr = PATH_SEPARATOR;
|
||||
expandedCP_curr++;
|
||||
}
|
||||
}
|
||||
|
||||
cp_token = strtok(NULL, PATH_SEPARATOR_STR);
|
||||
}
|
||||
|
||||
// Fix the last ':' and use it to null terminate
|
||||
if (expandedCP_curr != NULL) {
|
||||
expandedCP_curr--;
|
||||
*expandedCP_curr = '\0';
|
||||
}
|
||||
|
||||
free(classpath_dup);
|
||||
return length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the classpath. Wild card entries are resolved only if the entry ends
|
||||
* with "/\*" (backslash to escape commenting) to match against .jar and .JAR.
|
||||
* All other wild card entries (eg /path/to/dir/\*foo*) are not resolved,
|
||||
* following JAVA default behavior, see:
|
||||
* https://docs.oracle.com/javase/8/docs/technotes/tools/unix/classpath.html
|
||||
*/
|
||||
static char* getClassPath()
|
||||
{
|
||||
char* classpath;
|
||||
char* expandedClasspath;
|
||||
ssize_t length;
|
||||
ssize_t retval;
|
||||
|
||||
classpath = getenv("CLASSPATH");
|
||||
if (classpath == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// First, get the total size of the string we will need for the expanded
|
||||
// classpath
|
||||
length = getClassPath_helper(classpath, NULL);
|
||||
if (length < 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
|
||||
printf("+++++++++++++++++\n");
|
||||
#endif
|
||||
|
||||
// we don't have to do anything if classpath has no valid wildcards
|
||||
// we get length = 0 when CLASSPATH is set but empty
|
||||
// if CLASSPATH is not empty, then length includes null terminator
|
||||
// if length of expansion is same as original, then return a duplicate of
|
||||
// original since expansion can only be longer
|
||||
if ((length == 0) || ((length - 1) == strlen(classpath))) {
|
||||
|
||||
#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
|
||||
if ((length == 0) && (strlen(classpath) != 0)) {
|
||||
fprintf(stderr, "Something went wrong with getting the wildcard \
|
||||
expansion length\n" );
|
||||
}
|
||||
#endif
|
||||
|
||||
expandedClasspath = strdup(classpath);
|
||||
|
||||
#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
|
||||
printf("Expanded classpath=%s\n", expandedClasspath);
|
||||
#endif
|
||||
|
||||
return expandedClasspath;
|
||||
}
|
||||
|
||||
// Allocte memory for expanded classpath string
|
||||
expandedClasspath = calloc(length, sizeof(char));
|
||||
if (expandedClasspath == NULL) {
|
||||
fprintf(stderr, "getClassPath: failed calloc: %s\n", strerror(errno));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Actual expansion
|
||||
retval = getClassPath_helper(classpath, expandedClasspath);
|
||||
if (retval < 0) {
|
||||
free(expandedClasspath);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// This should not happen, but dotting i's and crossing t's
|
||||
if (retval != length) {
|
||||
fprintf(stderr,
|
||||
"Expected classpath expansion length to be %zu but instead got %zu\n",
|
||||
length, retval);
|
||||
free(expandedClasspath);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
|
||||
printf("===============\n");
|
||||
printf("Allocated %zd for expanding classpath\n", length);
|
||||
printf("Used %zu for expanding classpath\n", strlen(expandedClasspath) + 1);
|
||||
printf("Expanded classpath=%s\n", expandedClasspath);
|
||||
#endif
|
||||
|
||||
return expandedClasspath;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the global JNI environemnt.
|
||||
*
|
||||
|
@ -393,7 +666,7 @@ static JNIEnv* getGlobalJNIEnv(void)
|
|||
|
||||
if (noVMs == 0) {
|
||||
//Get the environment variables for initializing the JVM
|
||||
hadoopClassPath = getenv("CLASSPATH");
|
||||
hadoopClassPath = getClassPath();
|
||||
if (hadoopClassPath == NULL) {
|
||||
fprintf(stderr, "Environment variable CLASSPATH not set!\n");
|
||||
return NULL;
|
||||
|
@ -404,6 +677,8 @@ static JNIEnv* getGlobalJNIEnv(void)
|
|||
snprintf(optHadoopClassPath, optHadoopClassPathLen,
|
||||
"%s%s", hadoopClassPathVMArg, hadoopClassPath);
|
||||
|
||||
free(hadoopClassPath);
|
||||
|
||||
// Determine the # of LIBHDFS_OPTS args
|
||||
hadoopJvmArgs = getenv("LIBHDFS_OPTS");
|
||||
if (hadoopJvmArgs != NULL) {
|
||||
|
|
|
@ -26,7 +26,15 @@
|
|||
#include <stdarg.h>
|
||||
#include <errno.h>
|
||||
|
||||
#define PATH_SEPARATOR ':'
|
||||
#ifdef WIN32
|
||||
#define PATH_SEPARATOR ';'
|
||||
#define PATH_SEPARATOR_STR ";"
|
||||
#else
|
||||
#define PATH_SEPARATOR ':'
|
||||
#define PATH_SEPARATOR_STR ":"
|
||||
#endif
|
||||
|
||||
// #define _LIBHDFS_JNI_HELPER_DEBUGGING_ON_
|
||||
|
||||
|
||||
/** Denote the method we want to invoke as STATIC or INSTANCE */
|
||||
|
|
|
@ -61,7 +61,8 @@ See the CMake file for `test_libhdfs_ops.c` in the libhdfs source directory (`ha
|
|||
Common Problems
|
||||
---------------
|
||||
|
||||
The most common problem is the `CLASSPATH` is not set properly when calling a program that uses libhdfs. Make sure you set it to all the Hadoop jars needed to run Hadoop itself as well as the right configuration directory containing `hdfs-site.xml`. It is not valid to use wildcard syntax for specifying multiple jars. It may be useful to run `hadoop classpath --glob` or `hadoop classpath --jar <path`\> to generate the correct classpath for your deployment. See [Hadoop Commands Reference](../hadoop-common/CommandsManual.html#classpath) for more information on this command.
|
||||
The most common problem is the `CLASSPATH` is not set properly when calling a program that uses libhdfs. Make sure you set it to all the Hadoop jars needed to run Hadoop itself as well as the right configuration directory containing `hdfs-site.xml`.
|
||||
Wildcard entries in the `CLASSPATH` are now supported by libhdfs.
|
||||
|
||||
Thread Safe
|
||||
-----------
|
||||
|
|
Loading…
Reference in New Issue