HADOOP-12747. support wildcard in libjars argument (sjlee)

(cherry picked from commit 0ad48aa2c8)
This commit is contained in:
Sangjin Lee 2016-08-08 17:34:56 -07:00
parent e7c701586d
commit 660e321523
5 changed files with 269 additions and 90 deletions

View File

@ -18,7 +18,14 @@
package org.apache.hadoop.fs;
import java.io.*;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.InetAddress;
import java.net.URI;
import java.net.UnknownHostException;
@ -37,6 +44,8 @@ import java.util.zip.ZipFile;
import org.apache.commons.collections.map.CaseInsensitiveMap;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
@ -44,11 +53,9 @@ import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.nativeio.NativeIO;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.Shell.ShellCommandExecutor;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.util.StringUtils;
/**
* A collection of file-processing util methods
@ -1247,19 +1254,13 @@ public class FileUtil {
continue;
}
if (classPathEntry.endsWith("*")) {
boolean foundWildCardJar = false;
// Append all jars that match the wildcard
Path globPath = new Path(classPathEntry).suffix("{.jar,.JAR}");
FileStatus[] wildcardJars = FileContext.getLocalFSFileContext().util()
.globStatus(globPath);
if (wildcardJars != null) {
for (FileStatus wildcardJar: wildcardJars) {
foundWildCardJar = true;
classPathEntryList.add(wildcardJar.getPath().toUri().toURL()
.toExternalForm());
List<Path> jars = getJarsInDirectory(classPathEntry);
if (!jars.isEmpty()) {
for (Path jar: jars) {
classPathEntryList.add(jar.toUri().toURL().toExternalForm());
}
}
if (!foundWildCardJar) {
} else {
unexpandedWildcardClasspath.append(File.pathSeparator);
unexpandedWildcardClasspath.append(classPathEntry);
}
@ -1315,6 +1316,48 @@ public class FileUtil {
return jarCp;
}
/**
* Returns all jars that are in the directory. It is useful in expanding a
* wildcard path to return all jars from the directory to use in a classpath.
* It operates only on local paths.
*
* @param path the path to the directory. The path may include the wildcard.
* @return the list of jars as URLs, or an empty list if there are no jars, or
* the directory does not exist locally
*/
public static List<Path> getJarsInDirectory(String path) {
return getJarsInDirectory(path, true);
}
/**
* Returns all jars that are in the directory. It is useful in expanding a
* wildcard path to return all jars from the directory to use in a classpath.
*
* @param path the path to the directory. The path may include the wildcard.
* @return the list of jars as URLs, or an empty list if there are no jars, or
* the directory does not exist
*/
public static List<Path> getJarsInDirectory(String path, boolean useLocal) {
List<Path> paths = new ArrayList<>();
try {
// add the wildcard if it is not provided
if (!path.endsWith("*")) {
path += File.separator + "*";
}
Path globPath = new Path(path).suffix("{.jar,.JAR}");
FileContext context = useLocal ?
FileContext.getLocalFSFileContext() :
FileContext.getFileContext(globPath.toUri());
FileStatus[] files = context.util().globStatus(globPath);
if (files != null) {
for (FileStatus file: files) {
paths.add(file.getPath());
}
}
} catch (IOException ignore) {} // return the empty list
return paths;
}
public static boolean compareFs(FileSystem srcFs, FileSystem destFs) {
if (srcFs==null || destFs==null) {
return false;

View File

@ -19,7 +19,6 @@
package org.apache.hadoop.util;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
@ -34,6 +33,8 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
/**
* A {@link URLClassLoader} for application isolation. Classes from the
@ -58,14 +59,6 @@ public class ApplicationClassLoader extends URLClassLoader {
private static final Log LOG =
LogFactory.getLog(ApplicationClassLoader.class.getName());
private static final FilenameFilter JAR_FILENAME_FILTER =
new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.endsWith(".jar") || name.endsWith(".JAR");
}
};
static {
try (InputStream is = ApplicationClassLoader.class.getClassLoader()
.getResourceAsStream(PROPERTIES_FILE);) {
@ -116,11 +109,10 @@ public class ApplicationClassLoader extends URLClassLoader {
List<URL> urls = new ArrayList<URL>();
for (String element : classpath.split(File.pathSeparator)) {
if (element.endsWith("/*")) {
String dir = element.substring(0, element.length() - 1);
File[] files = new File(dir).listFiles(JAR_FILENAME_FILTER);
if (files != null) {
for (File file : files) {
urls.add(file.toURI().toURL());
List<Path> jars = FileUtil.getJarsInDirectory(element);
if (!jars.isEmpty()) {
for (Path jar: jars) {
urls.add(jar.toUri().toURL());
}
}
} else {

View File

@ -17,6 +17,7 @@
*/
package org.apache.hadoop.util;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
@ -41,6 +42,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
@ -167,8 +169,8 @@ public class GenericOptionsParser {
*/
public GenericOptionsParser(Configuration conf,
Options options, String[] args) throws IOException {
parseGeneralOptions(options, conf, args);
this.conf = conf;
parseGeneralOptions(options, args);
}
/**
@ -259,12 +261,11 @@ public class GenericOptionsParser {
}
/**
* Modify configuration according user-specified generic options
* @param conf Configuration to be modified
* Modify configuration according user-specified generic options.
*
* @param line User-specified generic options
*/
private void processGeneralOptions(Configuration conf,
CommandLine line) throws IOException {
private void processGeneralOptions(CommandLine line) throws IOException {
if (line.hasOption("fs")) {
FileSystem.setDefaultUri(conf, line.getOptionValue("fs"));
}
@ -296,8 +297,9 @@ public class GenericOptionsParser {
}
if (line.hasOption("libjars")) {
conf.set("tmpjars",
validateFiles(line.getOptionValue("libjars"), conf),
// for libjars, we allow expansion of wildcards
conf.set("tmpjars",
validateFiles(line.getOptionValue("libjars"), true),
"from -libjars command line option");
//setting libjars in client classpath
URL[] libjars = getLibJars(conf);
@ -310,12 +312,12 @@ public class GenericOptionsParser {
}
if (line.hasOption("files")) {
conf.set("tmpfiles",
validateFiles(line.getOptionValue("files"), conf),
validateFiles(line.getOptionValue("files")),
"from -files command line option");
}
if (line.hasOption("archives")) {
conf.set("tmparchives",
validateFiles(line.getOptionValue("archives"), conf),
validateFiles(line.getOptionValue("archives")),
"from -archives command line option");
}
conf.setBoolean("mapreduce.client.genericoptionsparser.used", true);
@ -348,7 +350,7 @@ public class GenericOptionsParser {
*/
public static URL[] getLibJars(Configuration conf) throws IOException {
String jars = conf.get("tmpjars");
if(jars==null) {
if (jars == null || jars.trim().isEmpty()) {
return null;
}
String[] files = jars.split(",");
@ -359,7 +361,7 @@ public class GenericOptionsParser {
cp.add(FileSystem.getLocal(conf).pathToFile(tmp).toURI().toURL());
} else {
LOG.warn("The libjars file " + tmp + " is not on the local " +
"filesystem. Ignoring.");
"filesystem. It will not be added to the local classpath.");
}
}
return cp.toArray(new URL[0]);
@ -371,28 +373,62 @@ public class GenericOptionsParser {
* if the files specified do not have a scheme.
* it returns the paths uri converted defaulting to file:///.
* So an input of /home/user/file1,/home/user/file2 would return
* file:///home/user/file1,file:///home/user/file2
* @param files
* @return
* file:///home/user/file1,file:///home/user/file2.
*
* This method does not recognize wildcards.
*
* @param files the input files argument
* @return a comma-separated list of validated and qualified paths, or null
* if the input files argument is null
*/
private String validateFiles(String files, Configuration conf)
throws IOException {
if (files == null)
private String validateFiles(String files) throws IOException {
return validateFiles(files, false);
}
/**
* takes input as a comma separated list of files
* and verifies if they exist. It defaults for file:///
* if the files specified do not have a scheme.
* it returns the paths uri converted defaulting to file:///.
* So an input of /home/user/file1,/home/user/file2 would return
* file:///home/user/file1,file:///home/user/file2.
*
* @param files the input files argument
* @param expandWildcard whether a wildcard entry is allowed and expanded. If
* true, any directory followed by a wildcard is a valid entry and is replaced
* with the list of jars in that directory. It is used to support the wildcard
* notation in a classpath.
* @return a comma-separated list of validated and qualified paths, or null
* if the input files argument is null
*/
private String validateFiles(String files, boolean expandWildcard)
throws IOException {
if (files == null) {
return null;
}
String[] fileArr = files.split(",");
if (fileArr.length == 0) {
throw new IllegalArgumentException("File name can't be empty string");
}
String[] finalArr = new String[fileArr.length];
List<String> finalPaths = new ArrayList<>(fileArr.length);
for (int i =0; i < fileArr.length; i++) {
String tmp = fileArr[i];
if (tmp.isEmpty()) {
throw new IllegalArgumentException("File name can't be empty string");
}
String finalPath;
URI pathURI;
final String wildcard = "*";
boolean isWildcard = tmp.endsWith(wildcard) && expandWildcard;
try {
pathURI = new URI(tmp);
if (isWildcard) {
// strip the wildcard
tmp = tmp.substring(0, tmp.length() - 1);
}
// handle the case where a wildcard alone ("*") or the wildcard on the
// current directory ("./*") is specified
pathURI = matchesCurrentDirectory(tmp) ?
new File(Path.CUR_DIR).toURI() :
new URI(tmp);
} catch (URISyntaxException e) {
throw new IllegalArgumentException(e);
}
@ -404,10 +440,13 @@ public class GenericOptionsParser {
if (!localFs.exists(path)) {
throw new FileNotFoundException("File " + tmp + " does not exist.");
}
finalPath = path.makeQualified(localFs.getUri(),
localFs.getWorkingDirectory()).toString();
}
else {
if (isWildcard) {
expandWildcard(finalPaths, path, localFs);
} else {
finalPaths.add(path.makeQualified(localFs.getUri(),
localFs.getWorkingDirectory()).toString());
}
} else {
// check if the file exists in this file system
// we need to recreate this filesystem object to copy
// these files to the file system ResourceManager is running
@ -416,12 +455,41 @@ public class GenericOptionsParser {
if (!fs.exists(path)) {
throw new FileNotFoundException("File " + tmp + " does not exist.");
}
finalPath = path.makeQualified(fs.getUri(),
fs.getWorkingDirectory()).toString();
if (isWildcard) {
expandWildcard(finalPaths, path, fs);
} else {
finalPaths.add(path.makeQualified(fs.getUri(),
fs.getWorkingDirectory()).toString());
}
}
}
if (finalPaths.isEmpty()) {
throw new IllegalArgumentException("Path " + files + " cannot be empty.");
}
return StringUtils.join(",", finalPaths);
}
private boolean matchesCurrentDirectory(String path) {
return path.isEmpty() || path.equals(Path.CUR_DIR) ||
path.equals(Path.CUR_DIR + File.separator);
}
private void expandWildcard(List<String> finalPaths, Path path, FileSystem fs)
throws IOException {
if (!fs.isDirectory(path)) {
throw new FileNotFoundException(path + " is not a directory.");
}
// get all the jars in the directory
List<Path> jars = FileUtil.getJarsInDirectory(path.toString(),
fs.equals(FileSystem.getLocal(conf)));
if (jars.isEmpty()) {
LOG.warn(path + " does not have jars in it. It will be ignored.");
} else {
for (Path jar: jars) {
finalPaths.add(jar.makeQualified(fs.getUri(),
fs.getWorkingDirectory()).toString());
}
finalArr[i] = finalPath;
}
return StringUtils.arrayToString(finalArr);
}
/**
@ -473,18 +541,18 @@ public class GenericOptionsParser {
/**
* Parse the user-specified options, get the generic options, and modify
* configuration accordingly
* configuration accordingly.
*
* @param opts Options to use for parsing args.
* @param conf Configuration to be modified
* @param args User-specified arguments
*/
private void parseGeneralOptions(Options opts, Configuration conf,
String[] args) throws IOException {
private void parseGeneralOptions(Options opts, String[] args)
throws IOException {
opts = buildGeneralOptions(opts);
CommandLineParser parser = new GnuParser();
try {
commandLine = parser.parse(opts, preProcessForWindows(args), true);
processGeneralOptions(conf, commandLine);
processGeneralOptions(commandLine);
} catch(ParseException e) {
LOG.warn("options parsing failed: "+e.getMessage());

View File

@ -17,8 +17,13 @@
*/
package org.apache.hadoop.fs;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.*;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import java.io.BufferedReader;
import java.io.File;
@ -27,10 +32,11 @@ import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.net.InetAddress;
import java.net.URI;
import java.io.PrintWriter;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Arrays;
@ -45,16 +51,16 @@ import java.util.zip.ZipOutputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.StringUtils;
import org.apache.tools.tar.TarEntry;
import org.apache.tools.tar.TarOutputStream;
import javax.print.attribute.URISyntax;
import static org.junit.Assert.*;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
public class TestFileUtil {
private static final Log LOG = LogFactory.getLog(TestFileUtil.class);
@ -1117,6 +1123,40 @@ public class TestFileUtil {
}
}
@Test
public void testGetJarsInDirectory() throws Exception {
List<Path> jars = FileUtil.getJarsInDirectory("/foo/bar/bogus/");
assertTrue("no jars should be returned for a bogus path",
jars.isEmpty());
// setup test directory for files
assertFalse(tmp.exists());
assertTrue(tmp.mkdirs());
// create jar files to be returned
File jar1 = new File(tmp, "wildcard1.jar");
File jar2 = new File(tmp, "wildcard2.JAR");
List<File> matches = Arrays.asList(jar1, jar2);
for (File match: matches) {
assertTrue("failure creating file: " + match, match.createNewFile());
}
// create non-jar files, which we expect to not be included in the result
assertTrue(new File(tmp, "text.txt").createNewFile());
assertTrue(new File(tmp, "executable.exe").createNewFile());
assertTrue(new File(tmp, "README").createNewFile());
// pass in the directory
String directory = tmp.getCanonicalPath();
jars = FileUtil.getJarsInDirectory(directory);
assertEquals("there should be 2 jars", 2, jars.size());
for (Path jar: jars) {
URL url = jar.toUri().toURL();
assertTrue("the jar should match either of the jars",
url.equals(jar1.toURI().toURL()) || url.equals(jar2.toURI().toURL()));
}
}
@Ignore
public void setupCompareFs() {
// Set up Strings

View File

@ -17,6 +17,13 @@
*/
package org.apache.hadoop.util;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
@ -26,8 +33,9 @@ import java.util.Arrays;
import java.util.List;
import java.util.Map;
import junit.framework.TestCase;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.math3.util.Pair;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@ -38,20 +46,18 @@ import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.junit.Assert;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import com.google.common.collect.Maps;
import static org.junit.Assert.fail;
public class TestGenericOptionsParser extends TestCase {
public class TestGenericOptionsParser {
File testDir;
Configuration conf;
FileSystem localFs;
@Test
public void testFilesOption() throws Exception {
File tmpFile = new File(testDir, "tmpfile");
Path tmpPath = new Path(tmpFile.toString());
@ -97,10 +103,38 @@ public class TestGenericOptionsParser extends TestCase {
assertNull("files is not null", files);
}
@Test
public void testLibjarsOption() throws Exception {
File tmpJar = new File(testDir, "tmp.jar");
Path tmpJarPath = new Path(tmpJar.toString());
localFs.create(tmpJarPath);
String[] args = new String[2];
// pass a libjars option
// first, pass the jar directly
args[0] = "-libjars";
// Convert a file to a URI as File.toString() is not a valid URI on
// all platforms and GenericOptionsParser accepts only valid URIs
args[1] = tmpJar.toURI().toString();
new GenericOptionsParser(conf, args);
String libjars = conf.get("tmpjars");
assertNotNull("libjars is null", libjars);
assertEquals("libjars does not match",
localFs.makeQualified(tmpJarPath).toString(), libjars);
// now test the wildcard
args[1] = testDir.toURI().toString() + "*";
new GenericOptionsParser(conf, args);
libjars = conf.get("tmpjars");
assertNotNull("libjars is null", libjars);
assertEquals("libjars does not match",
localFs.makeQualified(tmpJarPath).toString(), libjars);
}
/**
* Test the case where the libjars, files and archives arguments
* contains an empty token, which should create an IllegalArgumentException.
*/
@Test
public void testEmptyFilenames() throws Exception {
List<Pair<String, String>> argsAndConfNames = new ArrayList<Pair<String, String>>();
argsAndConfNames.add(new Pair<String, String>("-libjars", "tmpjars"));
@ -108,7 +142,6 @@ public class TestGenericOptionsParser extends TestCase {
argsAndConfNames.add(new Pair<String, String>("-archives", "tmparchives"));
for (Pair<String, String> argAndConfName : argsAndConfNames) {
String arg = argAndConfName.getFirst();
String configName = argAndConfName.getSecond();
File tmpFileOne = new File(testDir, "tmpfile1");
Path tmpPathOne = new Path(tmpFileOne.toString());
@ -162,6 +195,7 @@ public class TestGenericOptionsParser extends TestCase {
* Test that options passed to the constructor are used.
*/
@SuppressWarnings("static-access")
@Test
public void testCreateWithOptions() throws Exception {
// Create new option newOpt
Option opt = OptionBuilder.withArgName("int")
@ -183,6 +217,7 @@ public class TestGenericOptionsParser extends TestCase {
/**
* Test that multiple conf arguments can be used.
*/
@Test
public void testConfWithMultipleOpts() throws Exception {
String[] args = new String[2];
args[0] = "--conf=foo";
@ -193,10 +228,9 @@ public class TestGenericOptionsParser extends TestCase {
assertEquals("2st conf param is incorrect",
"bar", g.getCommandLine().getOptionValues("conf")[1]);
}
@Override
protected void setUp() throws Exception {
super.setUp();
@Before
public void setUp() throws Exception {
conf = new Configuration();
localFs = FileSystem.getLocal(conf);
testDir = GenericTestUtils.getTestDir("generic");
@ -204,9 +238,8 @@ public class TestGenericOptionsParser extends TestCase {
localFs.delete(new Path(testDir.toString()), true);
}
@Override
protected void tearDown() throws Exception {
super.tearDown();
@After
public void tearDown() throws Exception {
if(testDir.exists()) {
localFs.delete(new Path(testDir.toString()), true);
}
@ -216,6 +249,7 @@ public class TestGenericOptionsParser extends TestCase {
* testing -fileCache option
* @throws IOException
*/
@Test
public void testTokenCacheOption() throws IOException {
FileSystem localFs = FileSystem.getLocal(conf);
@ -264,6 +298,7 @@ public class TestGenericOptionsParser extends TestCase {
}
/** Test -D parsing */
@Test
public void testDOptionParsing() throws Exception {
String[] args;
Map<String,String> expectedMap;
@ -344,7 +379,7 @@ public class TestGenericOptionsParser extends TestCase {
assertEquals(entry.getValue(), conf.get(entry.getKey()));
}
Assert.assertArrayEquals(
assertArrayEquals(
Arrays.toString(remainingArgs) + Arrays.toString(expectedRemainingArgs),
expectedRemainingArgs, remainingArgs);
}
@ -352,6 +387,7 @@ public class TestGenericOptionsParser extends TestCase {
/** Test passing null as args. Some classes still call
* Tool interface from java passing null.
*/
@Test
public void testNullArgs() throws IOException {
GenericOptionsParser parser = new GenericOptionsParser(conf, null);
parser.getRemainingArgs();