MAPREDUCE-6454. Fixed MapReduce to modify HADOOP_CLASSPATH to have distributed cache files so that child processes running hadoop scripts can access these files. Contributed by Junping Du.
This commit is contained in:
parent
45be1240d9
commit
9da81a3955
|
@ -557,6 +557,10 @@ Release 2.6.2 - UNRELEASED
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
||||||
|
MAPREDUCE-6454. Fixed MapReduce to modify HADOOP_CLASSPATH to have distributed
|
||||||
|
cache files so that child processes running hadoop scripts can access these
|
||||||
|
files. (Junping Du via vinodkv)
|
||||||
|
|
||||||
Release 2.6.1 - UNRELEASED
|
Release 2.6.1 - UNRELEASED
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -173,6 +173,7 @@ public abstract class TaskAttemptImpl implements
|
||||||
private static AtomicBoolean initialClasspathFlag = new AtomicBoolean();
|
private static AtomicBoolean initialClasspathFlag = new AtomicBoolean();
|
||||||
private static String initialClasspath = null;
|
private static String initialClasspath = null;
|
||||||
private static String initialAppClasspath = null;
|
private static String initialAppClasspath = null;
|
||||||
|
private static String initialHadoopClasspath = null;
|
||||||
private static Object commonContainerSpecLock = new Object();
|
private static Object commonContainerSpecLock = new Object();
|
||||||
private static ContainerLaunchContext commonContainerSpec = null;
|
private static ContainerLaunchContext commonContainerSpec = null;
|
||||||
private static final Object classpathLock = new Object();
|
private static final Object classpathLock = new Object();
|
||||||
|
@ -742,6 +743,7 @@ public abstract class TaskAttemptImpl implements
|
||||||
MRApps.setClasspath(env, conf);
|
MRApps.setClasspath(env, conf);
|
||||||
initialClasspath = env.get(Environment.CLASSPATH.name());
|
initialClasspath = env.get(Environment.CLASSPATH.name());
|
||||||
initialAppClasspath = env.get(Environment.APP_CLASSPATH.name());
|
initialAppClasspath = env.get(Environment.APP_CLASSPATH.name());
|
||||||
|
initialHadoopClasspath = env.get(Environment.HADOOP_CLASSPATH.name());
|
||||||
initialClasspathFlag.set(true);
|
initialClasspathFlag.set(true);
|
||||||
return initialClasspath;
|
return initialClasspath;
|
||||||
}
|
}
|
||||||
|
@ -879,6 +881,13 @@ public abstract class TaskAttemptImpl implements
|
||||||
Environment.CLASSPATH.name(),
|
Environment.CLASSPATH.name(),
|
||||||
getInitialClasspath(conf), conf);
|
getInitialClasspath(conf), conf);
|
||||||
|
|
||||||
|
if (initialHadoopClasspath != null) {
|
||||||
|
MRApps.addToEnvironment(
|
||||||
|
environment,
|
||||||
|
Environment.HADOOP_CLASSPATH.name(),
|
||||||
|
initialHadoopClasspath, conf);
|
||||||
|
}
|
||||||
|
|
||||||
if (initialAppClasspath != null) {
|
if (initialAppClasspath != null) {
|
||||||
MRApps.addToEnvironment(
|
MRApps.addToEnvironment(
|
||||||
environment,
|
environment,
|
||||||
|
|
|
@ -245,11 +245,30 @@ public class MRApps extends Apps {
|
||||||
conf.getBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, false)
|
conf.getBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, false)
|
||||||
? Environment.APP_CLASSPATH.name() : Environment.CLASSPATH.name();
|
? Environment.APP_CLASSPATH.name() : Environment.CLASSPATH.name();
|
||||||
|
|
||||||
|
String hadoopClasspathEnvVar = Environment.HADOOP_CLASSPATH.name();
|
||||||
|
|
||||||
MRApps.addToEnvironment(environment,
|
MRApps.addToEnvironment(environment,
|
||||||
classpathEnvVar, crossPlatformifyMREnv(conf, Environment.PWD), conf);
|
classpathEnvVar, crossPlatformifyMREnv(conf, Environment.PWD), conf);
|
||||||
|
|
||||||
|
MRApps.addToEnvironment(environment,
|
||||||
|
hadoopClasspathEnvVar, crossPlatformifyMREnv(conf, Environment.PWD),
|
||||||
|
conf);
|
||||||
|
|
||||||
if (!userClassesTakesPrecedence) {
|
if (!userClassesTakesPrecedence) {
|
||||||
MRApps.setMRFrameworkClasspath(environment, conf);
|
MRApps.setMRFrameworkClasspath(environment, conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
addClasspathToEnv(environment, classpathEnvVar, conf);
|
||||||
|
addClasspathToEnv(environment, hadoopClasspathEnvVar, conf);
|
||||||
|
|
||||||
|
if (userClassesTakesPrecedence) {
|
||||||
|
MRApps.setMRFrameworkClasspath(environment, conf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("deprecation")
|
||||||
|
public static void addClasspathToEnv(Map<String, String> environment,
|
||||||
|
String classpathEnvVar, Configuration conf) throws IOException {
|
||||||
MRApps.addToEnvironment(
|
MRApps.addToEnvironment(
|
||||||
environment,
|
environment,
|
||||||
classpathEnvVar,
|
classpathEnvVar,
|
||||||
|
@ -257,15 +276,21 @@ public class MRApps extends Apps {
|
||||||
MRApps.addToEnvironment(
|
MRApps.addToEnvironment(
|
||||||
environment,
|
environment,
|
||||||
classpathEnvVar,
|
classpathEnvVar,
|
||||||
MRJobConfig.JOB_JAR + Path.SEPARATOR + "classes" + Path.SEPARATOR, conf);
|
MRJobConfig.JOB_JAR + Path.SEPARATOR + "classes" + Path.SEPARATOR,
|
||||||
|
conf);
|
||||||
|
|
||||||
MRApps.addToEnvironment(
|
MRApps.addToEnvironment(
|
||||||
environment,
|
environment,
|
||||||
classpathEnvVar,
|
classpathEnvVar,
|
||||||
MRJobConfig.JOB_JAR + Path.SEPARATOR + "lib" + Path.SEPARATOR + "*", conf);
|
MRJobConfig.JOB_JAR + Path.SEPARATOR + "lib" + Path.SEPARATOR + "*",
|
||||||
|
conf);
|
||||||
|
|
||||||
MRApps.addToEnvironment(
|
MRApps.addToEnvironment(
|
||||||
environment,
|
environment,
|
||||||
classpathEnvVar,
|
classpathEnvVar,
|
||||||
crossPlatformifyMREnv(conf, Environment.PWD) + Path.SEPARATOR + "*", conf);
|
crossPlatformifyMREnv(conf, Environment.PWD) + Path.SEPARATOR + "*",
|
||||||
|
conf);
|
||||||
|
|
||||||
// a * in the classpath will only find a .jar, so we need to filter out
|
// a * in the classpath will only find a .jar, so we need to filter out
|
||||||
// all .jars and add everything else
|
// all .jars and add everything else
|
||||||
addToClasspathIfNotJar(DistributedCache.getFileClassPaths(conf),
|
addToClasspathIfNotJar(DistributedCache.getFileClassPaths(conf),
|
||||||
|
@ -276,9 +301,6 @@ public class MRApps extends Apps {
|
||||||
DistributedCache.getCacheArchives(conf),
|
DistributedCache.getCacheArchives(conf),
|
||||||
conf,
|
conf,
|
||||||
environment, classpathEnvVar);
|
environment, classpathEnvVar);
|
||||||
if (userClassesTakesPrecedence) {
|
|
||||||
MRApps.setMRFrameworkClasspath(environment, conf);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -446,6 +468,7 @@ public class MRApps extends Apps {
|
||||||
return startCommitFile;
|
return startCommitFile;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("deprecation")
|
||||||
public static void setupDistributedCache(
|
public static void setupDistributedCache(
|
||||||
Configuration conf,
|
Configuration conf,
|
||||||
Map<String, LocalResource> localResources)
|
Map<String, LocalResource> localResources)
|
||||||
|
@ -478,6 +501,7 @@ public class MRApps extends Apps {
|
||||||
* @param conf
|
* @param conf
|
||||||
* @throws java.io.IOException
|
* @throws java.io.IOException
|
||||||
*/
|
*/
|
||||||
|
@SuppressWarnings("deprecation")
|
||||||
public static void setupDistributedCacheLocal(Configuration conf)
|
public static void setupDistributedCacheLocal(Configuration conf)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
|
@ -545,6 +569,7 @@ public class MRApps extends Apps {
|
||||||
// TODO - Move this to MR!
|
// TODO - Move this to MR!
|
||||||
// Use TaskDistributedCacheManager.CacheFiles.makeCacheFiles(URI[],
|
// Use TaskDistributedCacheManager.CacheFiles.makeCacheFiles(URI[],
|
||||||
// long[], boolean[], Path[], FileType)
|
// long[], boolean[], Path[], FileType)
|
||||||
|
@SuppressWarnings("deprecation")
|
||||||
private static void parseDistributedCacheArtifacts(
|
private static void parseDistributedCacheArtifacts(
|
||||||
Configuration conf,
|
Configuration conf,
|
||||||
Map<String, LocalResource> localResources,
|
Map<String, LocalResource> localResources,
|
||||||
|
|
|
@ -36,6 +36,8 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.FilterFileSystem;
|
import org.apache.hadoop.fs.FilterFileSystem;
|
||||||
|
@ -67,6 +69,9 @@ import org.junit.Test;
|
||||||
public class TestMRApps {
|
public class TestMRApps {
|
||||||
private static File testWorkDir = null;
|
private static File testWorkDir = null;
|
||||||
|
|
||||||
|
private static final Log LOG =
|
||||||
|
LogFactory.getLog(TestMRApps.class);
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void setupTestDirs() throws IOException {
|
public static void setupTestDirs() throws IOException {
|
||||||
testWorkDir = new File("target", TestMRApps.class.getCanonicalName());
|
testWorkDir = new File("target", TestMRApps.class.getCanonicalName());
|
||||||
|
@ -193,8 +198,8 @@ public class TestMRApps {
|
||||||
Job job = Job.getInstance(conf);
|
Job job = Job.getInstance(conf);
|
||||||
Map<String, String> environment = new HashMap<String, String>();
|
Map<String, String> environment = new HashMap<String, String>();
|
||||||
MRApps.setClasspath(environment, job.getConfiguration());
|
MRApps.setClasspath(environment, job.getConfiguration());
|
||||||
assertTrue(environment.get("CLASSPATH").startsWith(
|
assertTrue(environment.get(ApplicationConstants.Environment.CLASSPATH.
|
||||||
ApplicationConstants.Environment.PWD.$$()
|
name()).startsWith(ApplicationConstants.Environment.PWD.$$()
|
||||||
+ ApplicationConstants.CLASS_PATH_SEPARATOR));
|
+ ApplicationConstants.CLASS_PATH_SEPARATOR));
|
||||||
String yarnAppClasspath = job.getConfiguration().get(
|
String yarnAppClasspath = job.getConfiguration().get(
|
||||||
YarnConfiguration.YARN_APPLICATION_CLASSPATH,
|
YarnConfiguration.YARN_APPLICATION_CLASSPATH,
|
||||||
|
@ -205,7 +210,8 @@ public class TestMRApps {
|
||||||
yarnAppClasspath.replaceAll(",\\s*",
|
yarnAppClasspath.replaceAll(",\\s*",
|
||||||
ApplicationConstants.CLASS_PATH_SEPARATOR).trim();
|
ApplicationConstants.CLASS_PATH_SEPARATOR).trim();
|
||||||
}
|
}
|
||||||
assertTrue(environment.get("CLASSPATH").contains(yarnAppClasspath));
|
assertTrue(environment.get(ApplicationConstants.Environment.
|
||||||
|
CLASSPATH.name()).contains(yarnAppClasspath));
|
||||||
String mrAppClasspath =
|
String mrAppClasspath =
|
||||||
job.getConfiguration().get(
|
job.getConfiguration().get(
|
||||||
MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH,
|
MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH,
|
||||||
|
@ -215,7 +221,8 @@ public class TestMRApps {
|
||||||
mrAppClasspath.replaceAll(",\\s*",
|
mrAppClasspath.replaceAll(",\\s*",
|
||||||
ApplicationConstants.CLASS_PATH_SEPARATOR).trim();
|
ApplicationConstants.CLASS_PATH_SEPARATOR).trim();
|
||||||
}
|
}
|
||||||
assertTrue(environment.get("CLASSPATH").contains(mrAppClasspath));
|
assertTrue(environment.get(ApplicationConstants.Environment.CLASSPATH.
|
||||||
|
name()).contains(mrAppClasspath));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test (timeout = 120000)
|
@Test (timeout = 120000)
|
||||||
|
@ -234,8 +241,12 @@ public class TestMRApps {
|
||||||
conf.set(MRJobConfig.CACHE_ARCHIVES, testTGZQualifiedPath + "#testTGZ");
|
conf.set(MRJobConfig.CACHE_ARCHIVES, testTGZQualifiedPath + "#testTGZ");
|
||||||
Map<String, String> environment = new HashMap<String, String>();
|
Map<String, String> environment = new HashMap<String, String>();
|
||||||
MRApps.setClasspath(environment, conf);
|
MRApps.setClasspath(environment, conf);
|
||||||
assertTrue(environment.get("CLASSPATH").startsWith(
|
assertTrue(environment.get(ApplicationConstants.Environment.CLASSPATH.name()).startsWith(
|
||||||
ApplicationConstants.Environment.PWD.$$() + ApplicationConstants.CLASS_PATH_SEPARATOR));
|
ApplicationConstants.Environment.PWD.$$() + ApplicationConstants.CLASS_PATH_SEPARATOR));
|
||||||
|
assertTrue(environment.get(ApplicationConstants.Environment.
|
||||||
|
HADOOP_CLASSPATH.name()).startsWith(
|
||||||
|
ApplicationConstants.Environment.PWD.$$() +
|
||||||
|
ApplicationConstants.CLASS_PATH_SEPARATOR));
|
||||||
String confClasspath = job.getConfiguration().get(
|
String confClasspath = job.getConfiguration().get(
|
||||||
YarnConfiguration.YARN_APPLICATION_CLASSPATH,
|
YarnConfiguration.YARN_APPLICATION_CLASSPATH,
|
||||||
StringUtils.join(",",
|
StringUtils.join(",",
|
||||||
|
@ -244,8 +255,19 @@ public class TestMRApps {
|
||||||
confClasspath = confClasspath.replaceAll(",\\s*", ApplicationConstants.CLASS_PATH_SEPARATOR)
|
confClasspath = confClasspath.replaceAll(",\\s*", ApplicationConstants.CLASS_PATH_SEPARATOR)
|
||||||
.trim();
|
.trim();
|
||||||
}
|
}
|
||||||
assertTrue(environment.get("CLASSPATH").contains(confClasspath));
|
LOG.info("CLASSPATH: "+ environment.get(
|
||||||
assertTrue(environment.get("CLASSPATH").contains("testTGZ"));
|
ApplicationConstants.Environment.CLASSPATH.name()));
|
||||||
|
LOG.info("confClasspath: " + confClasspath);
|
||||||
|
assertTrue(environment.get(
|
||||||
|
ApplicationConstants.Environment.CLASSPATH.name()).contains(
|
||||||
|
confClasspath));
|
||||||
|
LOG.info("HADOOP_CLASSPATH: " + environment.get(
|
||||||
|
ApplicationConstants.Environment.HADOOP_CLASSPATH.name()));
|
||||||
|
assertTrue(environment.get(
|
||||||
|
ApplicationConstants.Environment.CLASSPATH.name()).contains("testTGZ"));
|
||||||
|
assertTrue(environment.get(
|
||||||
|
ApplicationConstants.Environment.HADOOP_CLASSPATH.name()).
|
||||||
|
contains("testTGZ"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test (timeout = 120000)
|
@Test (timeout = 120000)
|
||||||
|
@ -259,7 +281,7 @@ public class TestMRApps {
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
fail("Got exception while setting classpath");
|
fail("Got exception while setting classpath");
|
||||||
}
|
}
|
||||||
String env_str = env.get("CLASSPATH");
|
String env_str = env.get(ApplicationConstants.Environment.CLASSPATH.name());
|
||||||
String expectedClasspath = StringUtils.join(ApplicationConstants.CLASS_PATH_SEPARATOR,
|
String expectedClasspath = StringUtils.join(ApplicationConstants.CLASS_PATH_SEPARATOR,
|
||||||
Arrays.asList(ApplicationConstants.Environment.PWD.$$(), "job.jar/job.jar",
|
Arrays.asList(ApplicationConstants.Environment.PWD.$$(), "job.jar/job.jar",
|
||||||
"job.jar/classes/", "job.jar/lib/*",
|
"job.jar/classes/", "job.jar/lib/*",
|
||||||
|
@ -279,7 +301,7 @@ public class TestMRApps {
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
fail("Got exception while setting classpath");
|
fail("Got exception while setting classpath");
|
||||||
}
|
}
|
||||||
String env_str = env.get("CLASSPATH");
|
String env_str = env.get(ApplicationConstants.Environment.CLASSPATH.name());
|
||||||
String expectedClasspath = StringUtils.join(ApplicationConstants.CLASS_PATH_SEPARATOR,
|
String expectedClasspath = StringUtils.join(ApplicationConstants.CLASS_PATH_SEPARATOR,
|
||||||
Arrays.asList("job.jar/job.jar", "job.jar/classes/", "job.jar/lib/*",
|
Arrays.asList("job.jar/job.jar", "job.jar/classes/", "job.jar/lib/*",
|
||||||
ApplicationConstants.Environment.PWD.$$() + "/*"));
|
ApplicationConstants.Environment.PWD.$$() + "/*"));
|
||||||
|
@ -296,8 +318,9 @@ public class TestMRApps {
|
||||||
conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, true);
|
conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, true);
|
||||||
Map<String, String> env = new HashMap<String, String>();
|
Map<String, String> env = new HashMap<String, String>();
|
||||||
MRApps.setClasspath(env, conf);
|
MRApps.setClasspath(env, conf);
|
||||||
String cp = env.get("CLASSPATH");
|
String cp = env.get(ApplicationConstants.Environment.CLASSPATH.name());
|
||||||
String appCp = env.get("APP_CLASSPATH");
|
String appCp = env.get(ApplicationConstants.Environment.
|
||||||
|
APP_CLASSPATH.name());
|
||||||
assertFalse("MAPREDUCE_JOB_CLASSLOADER true, but job.jar is in the"
|
assertFalse("MAPREDUCE_JOB_CLASSLOADER true, but job.jar is in the"
|
||||||
+ " classpath!", cp.contains("jar" + ApplicationConstants.CLASS_PATH_SEPARATOR + "job"));
|
+ " classpath!", cp.contains("jar" + ApplicationConstants.CLASS_PATH_SEPARATOR + "job"));
|
||||||
assertFalse("MAPREDUCE_JOB_CLASSLOADER true, but PWD is in the classpath!",
|
assertFalse("MAPREDUCE_JOB_CLASSLOADER true, but PWD is in the classpath!",
|
||||||
|
@ -338,7 +361,8 @@ public class TestMRApps {
|
||||||
Arrays.asList(ApplicationConstants.Environment.PWD.$$(),
|
Arrays.asList(ApplicationConstants.Environment.PWD.$$(),
|
||||||
FRAMEWORK_CLASSPATH, stdClasspath));
|
FRAMEWORK_CLASSPATH, stdClasspath));
|
||||||
assertEquals("Incorrect classpath with framework and no user precedence",
|
assertEquals("Incorrect classpath with framework and no user precedence",
|
||||||
expectedClasspath, env.get("CLASSPATH"));
|
expectedClasspath, env.get(ApplicationConstants.Environment.
|
||||||
|
CLASSPATH.name()));
|
||||||
|
|
||||||
env.clear();
|
env.clear();
|
||||||
conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true);
|
conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true);
|
||||||
|
@ -347,7 +371,8 @@ public class TestMRApps {
|
||||||
Arrays.asList(ApplicationConstants.Environment.PWD.$$(),
|
Arrays.asList(ApplicationConstants.Environment.PWD.$$(),
|
||||||
stdClasspath, FRAMEWORK_CLASSPATH));
|
stdClasspath, FRAMEWORK_CLASSPATH));
|
||||||
assertEquals("Incorrect classpath with framework and user precedence",
|
assertEquals("Incorrect classpath with framework and user precedence",
|
||||||
expectedClasspath, env.get("CLASSPATH"));
|
expectedClasspath, env.get(ApplicationConstants.Environment.CLASSPATH.
|
||||||
|
name()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test (timeout = 30000)
|
@Test (timeout = 30000)
|
||||||
|
|
|
@ -160,6 +160,11 @@ public interface ApplicationConstants {
|
||||||
*/
|
*/
|
||||||
APP_CLASSPATH("APP_CLASSPATH"),
|
APP_CLASSPATH("APP_CLASSPATH"),
|
||||||
|
|
||||||
|
/**
|
||||||
|
* $HADOOP_CLASSPATH.
|
||||||
|
*/
|
||||||
|
HADOOP_CLASSPATH("HADOOP_CLASSPATH"),
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* $LD_LIBRARY_PATH
|
* $LD_LIBRARY_PATH
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Reference in New Issue