From 0ba7078ef4ee127a47c5042c82db0b113a967b23 Mon Sep 17 00:00:00 2001 From: Thomas White Date: Wed, 9 Jan 2013 16:12:58 +0000 Subject: [PATCH] MAPREDUCE-1700. User supplied dependencies may conflict with MapReduce system JARs. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1430929 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 + .../org/apache/hadoop/mapred/YarnChild.java | 6 +- .../hadoop/mapreduce/v2/app/MRAppMaster.java | 2 + .../v2/app/job/impl/TaskAttemptImpl.java | 9 ++ .../hadoop/mapreduce/v2/util/MRApps.java | 83 +++++++++++++++++-- .../hadoop/mapreduce/v2/util/TestMRApps.java | 16 ++++ .../apache/hadoop/mapreduce/MRJobConfig.java | 4 + .../src/main/resources/mapred-default.xml | 17 ++++ 8 files changed, 130 insertions(+), 10 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index de9e597170d..eb7fbb04c72 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -247,6 +247,9 @@ Release 2.0.3-alpha - Unreleased MAPREDUCE-4278. Cannot run two local jobs in parallel from the same gateway. (Sandy Ryza via tomwhite) + MAPREDUCE-1700. User supplied dependencies may conflict with MapReduce + system JARs. (tomwhite) + Release 2.0.2-alpha - 2012-09-07 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java index 84676f26e70..e312e682bec 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java @@ -47,6 +47,7 @@ import org.apache.hadoop.mapreduce.security.TokenCache; import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier; import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager; +import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.source.JvmMetrics; import org.apache.hadoop.net.NetUtils; @@ -253,7 +254,10 @@ private static JobConf configureTask(Task task, Credentials credentials, Token jt) throws IOException { final JobConf job = new JobConf(MRJobConfig.JOB_CONF_FILE); job.setCredentials(credentials); - + + // set job classloader if configured + MRApps.setJobClassLoader(job); + String appAttemptIdEnv = System .getenv(MRJobConfig.APPLICATION_ATTEMPT_ID_ENV); LOG.debug("APPLICATION_ATTEMPT_ID: " + appAttemptIdEnv); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java index 89feb8e26e5..405c4748032 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java @@ -1223,6 +1223,8 @@ public static void main(String[] args) { // SIGTERM I have a chance to write out the job history. I'll be closing // the objects myself. conf.setBoolean("fs.automatic.close", false); + // set job classloader if configured + MRApps.setJobClassLoader(conf); initAndStartAppMaster(appMaster, conf, jobUserName); } catch (Throwable t) { LOG.fatal("Error starting MRAppMaster", t); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java index 31ea2f2ac1a..c9535a79a29 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java @@ -165,6 +165,7 @@ public abstract class TaskAttemptImpl implements private Token jobToken; private static AtomicBoolean initialClasspathFlag = new AtomicBoolean(); private static String initialClasspath = null; + private static String initialAppClasspath = null; private static Object commonContainerSpecLock = new Object(); private static ContainerLaunchContext commonContainerSpec = null; private static final Object classpathLock = new Object(); @@ -599,6 +600,7 @@ private static String getInitialClasspath(Configuration conf) throws IOException Map env = new HashMap(); MRApps.setClasspath(env, conf); initialClasspath = env.get(Environment.CLASSPATH.name()); + initialAppClasspath = env.get(Environment.APP_CLASSPATH.name()); initialClasspathFlag.set(true); return initialClasspath; } @@ -697,6 +699,13 @@ private static ContainerLaunchContext createCommonContainerLaunchContext( environment, Environment.CLASSPATH.name(), getInitialClasspath(conf)); + + if (initialAppClasspath != null) { + Apps.addToEnvironment( + environment, + Environment.APP_CLASSPATH.name(), + initialAppClasspath); + } } catch (IOException e) { throw new YarnException(e); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java index 82f4bbcd57e..975d9f8c010 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java @@ -19,12 +19,18 @@ package org.apache.hadoop.mapreduce.v2.util; import java.io.IOException; +import java.net.MalformedURLException; import java.net.URI; +import java.security.AccessController; +import java.security.PrivilegedActionException; +import java.security.PrivilegedExceptionAction; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; @@ -50,6 +56,7 @@ import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.util.ApplicationClassLoader; import org.apache.hadoop.yarn.util.Apps; import org.apache.hadoop.yarn.util.BuilderUtils; import org.apache.hadoop.yarn.util.ConverterUtils; @@ -62,6 +69,8 @@ @Private @Unstable public class MRApps extends Apps { + public static final Log LOG = LogFactory.getLog(MRApps.class); + public static String toString(JobId jid) { return jid.toString(); } @@ -157,38 +166,42 @@ public static void setClasspath(Map environment, boolean userClassesTakesPrecedence = conf.getBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, false); + String classpathEnvVar = + conf.getBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, false) + ? Environment.APP_CLASSPATH.name() : Environment.CLASSPATH.name(); + Apps.addToEnvironment(environment, - Environment.CLASSPATH.name(), + classpathEnvVar, Environment.PWD.$()); if (!userClassesTakesPrecedence) { MRApps.setMRFrameworkClasspath(environment, conf); } Apps.addToEnvironment( environment, - Environment.CLASSPATH.name(), + classpathEnvVar, MRJobConfig.JOB_JAR + Path.SEPARATOR + MRJobConfig.JOB_JAR); Apps.addToEnvironment( environment, - Environment.CLASSPATH.name(), + classpathEnvVar, MRJobConfig.JOB_JAR + Path.SEPARATOR + "classes" + Path.SEPARATOR); Apps.addToEnvironment( environment, - Environment.CLASSPATH.name(), + classpathEnvVar, MRJobConfig.JOB_JAR + Path.SEPARATOR + "lib" + Path.SEPARATOR + "*"); Apps.addToEnvironment( environment, - Environment.CLASSPATH.name(), + classpathEnvVar, Environment.PWD.$() + Path.SEPARATOR + "*"); // a * in the classpath will only find a .jar, so we need to filter out // all .jars and add everything else addToClasspathIfNotJar(DistributedCache.getFileClassPaths(conf), DistributedCache.getCacheFiles(conf), conf, - environment); + environment, classpathEnvVar); addToClasspathIfNotJar(DistributedCache.getArchiveClassPaths(conf), DistributedCache.getCacheArchives(conf), conf, - environment); + environment, classpathEnvVar); if (userClassesTakesPrecedence) { MRApps.setMRFrameworkClasspath(environment, conf); } @@ -204,7 +217,8 @@ public static void setClasspath(Map environment, */ private static void addToClasspathIfNotJar(Path[] paths, URI[] withLinks, Configuration conf, - Map environment) throws IOException { + Map environment, + String classpathEnvVar) throws IOException { if (paths != null) { HashMap linkLookup = new HashMap(); if (withLinks != null) { @@ -232,13 +246,64 @@ private static void addToClasspathIfNotJar(Path[] paths, if(!name.toLowerCase().endsWith(".jar")) { Apps.addToEnvironment( environment, - Environment.CLASSPATH.name(), + classpathEnvVar, Environment.PWD.$() + Path.SEPARATOR + name); } } } } + /** + * Sets a {@link ApplicationClassLoader} on the given configuration and as + * the context classloader, if + * {@link MRJobConfig#MAPREDUCE_JOB_CLASSLOADER} is set to true, and + * the APP_CLASSPATH environment variable is set. + * @param conf + * @throws IOException + */ + public static void setJobClassLoader(Configuration conf) + throws IOException { + if (conf.getBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, false)) { + String appClasspath = System.getenv(Environment.APP_CLASSPATH.key()); + if (appClasspath == null) { + LOG.warn("Not using job classloader since APP_CLASSPATH is not set."); + } else { + LOG.info("Using job classloader"); + if (LOG.isDebugEnabled()) { + LOG.debug("APP_CLASSPATH=" + appClasspath); + } + String[] systemClasses = conf.getStrings( + MRJobConfig.MAPREDUCE_JOB_CLASSLOADER_SYSTEM_CLASSES); + ClassLoader jobClassLoader = createJobClassLoader(appClasspath, + systemClasses); + if (jobClassLoader != null) { + conf.setClassLoader(jobClassLoader); + Thread.currentThread().setContextClassLoader(jobClassLoader); + } + } + } + } + + private static ClassLoader createJobClassLoader(final String appClasspath, + final String[] systemClasses) throws IOException { + try { + return AccessController.doPrivileged( + new PrivilegedExceptionAction() { + @Override + public ClassLoader run() throws MalformedURLException { + return new ApplicationClassLoader(appClasspath, + MRApps.class.getClassLoader(), Arrays.asList(systemClasses)); + } + }); + } catch (PrivilegedActionException e) { + Throwable t = e.getCause(); + if (t instanceof MalformedURLException) { + throw (MalformedURLException) t; + } + throw new IOException(e); + } + } + private static final String STAGING_CONSTANT = ".staging"; public static Path getStagingAreaDir(Configuration conf, String user) { return new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR, diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java index 5cf515bb25e..b196c18f129 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java @@ -235,6 +235,22 @@ private static void delete(File dir) throws IOException { index, 0); } + @Test public void testSetClasspathWithJobClassloader() throws IOException { + Configuration conf = new Configuration(); + conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, true); + Map env = new HashMap(); + MRApps.setClasspath(env, conf); + String cp = env.get("CLASSPATH"); + String appCp = env.get("APP_CLASSPATH"); + assertSame("MAPREDUCE_JOB_CLASSLOADER true, but job.jar is" + + " in the classpath!", cp.indexOf("jar:job"), -1); + assertSame("MAPREDUCE_JOB_CLASSLOADER true, but PWD is" + + " in the classpath!", cp.indexOf("PWD"), -1); + assertEquals("MAPREDUCE_JOB_CLASSLOADER true, but job.jar is not" + + " in the app classpath!", + "$PWD:job.jar/job.jar:job.jar/classes/:job.jar/lib/*:$PWD/*", appCp); + } + @Test public void testSetupDistributedCacheEmpty() throws IOException { Configuration conf = new Configuration(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index f4ff04b111a..5b154671605 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -127,6 +127,10 @@ public interface MRJobConfig { public static final String MAPREDUCE_JOB_USER_CLASSPATH_FIRST = "mapreduce.job.user.classpath.first"; + public static final String MAPREDUCE_JOB_CLASSLOADER = "mapreduce.job.classloader"; + + public static final String MAPREDUCE_JOB_CLASSLOADER_SYSTEM_CLASSES = "mapreduce.job.classloader.system.classes"; + public static final String IO_SORT_FACTOR = "mapreduce.task.io.sort.factor"; public static final String IO_SORT_MB = "mapreduce.task.io.sort.mb"; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index a08e261b00f..bf575bc9816 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -961,6 +961,23 @@ $HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/* + + mapreduce.job.classloader + false + Whether to use a separate (isolated) classloader for + user classes in the task JVM. + + + + mapreduce.job.classloader.system.classes + java.,javax.,org.apache.commons.logging.,org.apache.log4j.,org.apache.hadoop. + A comma-separated list of classes that should be loaded from the + system classpath, not the user-supplied JARs, when mapreduce.job.classloader + is enabled. Names ending in '.' (period) are treated as package names, + and names starting with a '-' are treated as negative matches. + + +