diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java index 936dc5a00d1..d305f9ff076 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java @@ -19,7 +19,6 @@ package org.apache.hadoop.mapred; import java.net.InetSocketAddress; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Vector; @@ -28,7 +27,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.TaskLog.LogName; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TaskType; -import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.api.ApplicationConstants.Environment; @@ -42,50 +40,53 @@ public class MapReduceChildJVM { filter.toString(); } - private static String getChildEnv(JobConf jobConf, boolean isMap) { + private static String getChildEnvProp(JobConf jobConf, boolean isMap) { if (isMap) { - return jobConf.get(JobConf.MAPRED_MAP_TASK_ENV, - jobConf.get(JobConf.MAPRED_TASK_ENV)); + return JobConf.MAPRED_MAP_TASK_ENV; } - return jobConf.get(JobConf.MAPRED_REDUCE_TASK_ENV, - jobConf.get(JobConf.MAPRED_TASK_ENV)); + return JobConf.MAPRED_REDUCE_TASK_ENV; + } + + private static String getChildEnvDefaultValue(JobConf jobConf) { + // There is no default value for these - use the fallback value instead. + return jobConf.get(JobConf.MAPRED_TASK_ENV); } public static void setVMEnv(Map environment, Task task) { JobConf conf = task.conf; - // Add the env variables passed by the user - String mapredChildEnv = getChildEnv(conf, task.isMapTask()); - MRApps.setEnvFromInputString(environment, mapredChildEnv, conf); + boolean isMap = task.isMapTask(); - // Set logging level in the environment. - // This is so that, if the child forks another "bin/hadoop" (common in - // streaming) it will have the correct loglevel. - environment.put( - "HADOOP_ROOT_LOGGER", - MRApps.getChildLogLevel(conf, task.isMapTask()) + ",console"); + // Remove these before adding the user variables to prevent + // MRApps.setEnvFromInputProperty() from appending to them. + String hadoopRootLoggerKey = "HADOOP_ROOT_LOGGER"; + String hadoopClientOptsKey = "HADOOP_CLIENT_OPTS"; + environment.remove(hadoopRootLoggerKey); + environment.remove(hadoopClientOptsKey); - // TODO: The following is useful for instance in streaming tasks. Should be - // set in ApplicationMaster's env by the RM. - String hadoopClientOpts = System.getenv("HADOOP_CLIENT_OPTS"); - if (hadoopClientOpts == null) { - hadoopClientOpts = ""; - } else { - hadoopClientOpts = hadoopClientOpts + " "; + // Add the environment variables passed by the user + MRApps.setEnvFromInputProperty(environment, getChildEnvProp(conf, isMap), + getChildEnvDefaultValue(conf), conf); + + // Set HADOOP_ROOT_LOGGER and HADOOP_CLIENTS if the user did not set them. + if (!environment.containsKey(hadoopRootLoggerKey)) { + // Set the value for logging level in the environment. + // This is so that, if the child forks another "bin/hadoop" (common in + // streaming) it will have the correct loglevel. + environment.put(hadoopRootLoggerKey, + MRApps.getChildLogLevel(conf, task.isMapTask()) + ",console"); } - environment.put("HADOOP_CLIENT_OPTS", hadoopClientOpts); - - // setEnvFromInputString above will add env variable values from - // mapredChildEnv to existing variables. We want to overwrite - // HADOOP_ROOT_LOGGER and HADOOP_CLIENT_OPTS if the user set it explicitly. - Map tmpEnv = new HashMap(); - MRApps.setEnvFromInputString(tmpEnv, mapredChildEnv, conf); - String[] keys = { "HADOOP_ROOT_LOGGER", "HADOOP_CLIENT_OPTS" }; - for (String key : keys) { - if (tmpEnv.containsKey(key)) { - environment.put(key, tmpEnv.get(key)); + if (!environment.containsKey(hadoopClientOptsKey)) { + // TODO: The following is useful for instance in streaming tasks. + // Should be set in ApplicationMaster's env by the RM. + String hadoopClientOptsValue = System.getenv(hadoopClientOptsKey); + if (hadoopClientOptsValue == null) { + hadoopClientOptsValue = ""; + } else { + hadoopClientOptsValue = hadoopClientOptsValue + " "; } + environment.put(hadoopClientOptsKey, hadoopClientOptsValue); } // Add stdout/stderr env diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java index ad63cece080..6d1ac61250a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java @@ -961,10 +961,10 @@ public abstract class TaskAttemptImpl implements MRApps.crossPlatformifyMREnv(conf, Environment.PWD), conf); // Add the env variables passed by the admin - MRApps.setEnvFromInputString(environment, - conf.get(MRJobConfig.MAPRED_ADMIN_USER_ENV, - MRJobConfig.DEFAULT_MAPRED_ADMIN_USER_ENV), - conf); + MRApps.setEnvFromInputProperty(environment, + MRJobConfig.MAPRED_ADMIN_USER_ENV, + MRJobConfig.DEFAULT_MAPRED_ADMIN_USER_ENV, conf); + return environment; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestMapReduceChildJVM.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestMapReduceChildJVM.java index 4a5d480133c..f00ff281f30 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestMapReduceChildJVM.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestMapReduceChildJVM.java @@ -289,11 +289,11 @@ public class TestMapReduceChildJVM { app.verifyCompleted(); Assert.assertTrue("HADOOP_ROOT_LOGGER not set for job", - app.cmdEnvironment.containsKey("HADOOP_ROOT_LOGGER")); + app.cmdEnvironment.containsKey("HADOOP_ROOT_LOGGER")); Assert.assertEquals("WARN,console", - app.cmdEnvironment.get("HADOOP_ROOT_LOGGER")); + app.cmdEnvironment.get("HADOOP_ROOT_LOGGER")); Assert.assertTrue("HADOOP_CLIENT_OPTS not set for job", - app.cmdEnvironment.containsKey("HADOOP_CLIENT_OPTS")); + app.cmdEnvironment.containsKey("HADOOP_CLIENT_OPTS")); Assert.assertEquals("test", app.cmdEnvironment.get("HADOOP_CLIENT_OPTS")); // Try one more. @@ -305,8 +305,22 @@ public class TestMapReduceChildJVM { app.verifyCompleted(); Assert.assertTrue("HADOOP_ROOT_LOGGER not set for job", - app.cmdEnvironment.containsKey("HADOOP_ROOT_LOGGER")); + app.cmdEnvironment.containsKey("HADOOP_ROOT_LOGGER")); Assert.assertEquals("trace", - app.cmdEnvironment.get("HADOOP_ROOT_LOGGER")); + app.cmdEnvironment.get("HADOOP_ROOT_LOGGER")); + + // Try one using the mapreduce.task.env.var=value syntax + app = new MyMRApp(1, 0, true, this.getClass().getName(), true); + conf = new Configuration(); + conf.set(JobConf.MAPRED_MAP_TASK_ENV + ".HADOOP_ROOT_LOGGER", + "DEBUG,console"); + job = app.submit(conf); + app.waitForState(job, JobState.SUCCEEDED); + app.verifyCompleted(); + + Assert.assertTrue("HADOOP_ROOT_LOGGER not set for job", + app.cmdEnvironment.containsKey("HADOOP_ROOT_LOGGER")); + Assert.assertEquals("DEBUG,console", + app.cmdEnvironment.get("HADOOP_ROOT_LOGGER")); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java index d435a1fa3d4..1b087a760cb 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java @@ -708,6 +708,16 @@ public class MRApps extends Apps { Apps.setEnvFromInputString(env, envString, classPathSeparator); } + public static void setEnvFromInputProperty(Map env, + String propName, String defaultPropValue, Configuration conf) { + String classPathSeparator = + conf.getBoolean(MRConfig.MAPREDUCE_APP_SUBMISSION_CROSS_PLATFORM, + MRConfig.DEFAULT_MAPREDUCE_APP_SUBMISSION_CROSS_PLATFORM) + ? ApplicationConstants.CLASS_PATH_SEPARATOR : File.pathSeparator; + Apps.setEnvFromInputProperty(env, propName, defaultPropValue, conf, + classPathSeparator); + } + @Public @Unstable public static void addToEnvironment(Map environment, diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java index 8c57d1bed03..c0dd6502f07 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java @@ -312,6 +312,15 @@ public class JobConf extends Configuration { *
    *
  • A=foo - This will set the env variable A to foo.
  • *
+ * + * You can also add environment variables individually by appending + * .VARNAME to this configuration key, where VARNAME is + * the name of the environment variable. + * + * Example: + *
    + *
  • mapreduce.map.env.VARNAME=value
  • + *
*/ public static final String MAPRED_MAP_TASK_ENV = JobContext.MAP_ENV; @@ -326,6 +335,15 @@ public class JobConf extends Configuration { *
    *
  • A=foo - This will set the env variable A to foo.
  • *
+ * + * You can also add environment variables individually by appending + * .VARNAME to this configuration key, where VARNAME is + * the name of the environment variable. + * + * Example: + *
    + *
  • mapreduce.reduce.env.VARNAME=value
  • + *
*/ public static final String MAPRED_REDUCE_TASK_ENV = JobContext.REDUCE_ENV; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index c7d6cd2d49a..e5da41f9e8b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -350,11 +350,22 @@ mapred.child.env - User added environment variables for the task processes. + User added environment variables for the task processes, + specified as a comma separated list. Example : 1) A=foo This will set the env variable A to foo 2) B=$B:c This is inherit nodemanager's B env variable on Unix. 3) B=%B%;c This is inherit nodemanager's B env variable on Windows. + + To specify a comma separated list of environment variables specifically for + map or reduce tasks, use the mapreduce.map.env or mapreduce.reduce.env + properties. + + To define environment variables individually for map or reduce tasks, + you can specify multiple properties of the form mapreduce.map.env.VARNAME + or mapreduce.reduce.env.VARNAME, where VARNAME is the name of the + environment variable. This is the only way to add a variable when its value + contains commas. @@ -362,7 +373,15 @@ mapreduce.map.env - User added environment variables for the map task processes. + User added environment variables for the map task processes, + specified as a comma separated list. + Example: + VAR1=value1,VAR2=value2 + + To define environment variables individually, you can specify + multiple properties of the form mapreduce.map.env.VARNAME, + where VARNAME is the name of the environment variable. This is the only + way to add a variable when its value contains commas. --> @@ -371,7 +390,16 @@ mapreduce.reduce.env - User added environment variables for the reduce task processes. + User added environment variables for the reduce task processes, + specified as a comma separated list. + Example: + VAR1=value1,VAR2=value2 + + To define environment variables individually, you can specify + multiple properties of the form mapreduce.reduce.env.VARNAME, + where VARNAME is the name of the environment variable. This is the only + way to add a variable when its value contains commas. + contains commas. --> @@ -385,9 +413,14 @@ You must preserve the original value if you want your map and reduce tasks to have access to native libraries (compression, etc). When this value is empty, the command to set execution - envrionment will be OS dependent: + environment will be OS dependent: For linux, use LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native. For windows, use PATH = %PATH%;%HADOOP_COMMON_HOME%\\bin. + + To define environment variables individually, you can specify + multiple properties of the form mapreduce.admin.user.env.VARNAME, + where VARNAME is the name of the environment variable. This is the only + way to add a variable when its value contains commas. @@ -1365,20 +1398,32 @@ yarn.app.mapreduce.am.env User added environment variables for the MR App Master - processes. Example : + processes, specified as a comma separated list. + Example : 1) A=foo This will set the env variable A to foo 2) B=$B:c This is inherit tasktracker's B env variable. + + To define environment variables individually, you can specify + multiple properties of the form yarn.app.mapreduce.am.env.VARNAME, + where VARNAME is the name of the environment variable. This is the only + way to add a variable when its value contains commas. yarn.app.mapreduce.am.admin.user.env - Environment variables for the MR App Master - processes for admin purposes. These values are set first and can be - overridden by the user env (yarn.app.mapreduce.am.env) Example : + Environment variables for the MR App Master + processes for admin purposes, specified as a comma separated list + These values are set first and can be overridden by the user env + (yarn.app.mapreduce.am.env). Example : 1) A=foo This will set the env variable A to foo 2) B=$B:c This is inherit app master's B env variable. + + To define environment variables individually, you can specify + multiple properties of the form yarn.app.mapreduce.am.admin.user.env.VARNAME, + where VARNAME is the name of the environment variable. This is the only + way to add a variable when its value contains commas. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapReduceTutorial.md b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapReduceTutorial.md index fc3f020ea23..b585e84bac3 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapReduceTutorial.md +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapReduceTutorial.md @@ -206,6 +206,12 @@ For example, Here, the files dir1/dict.txt and dir2/dict.txt can be accessed by tasks using the symbolic names dict1 and dict2 respectively. The archive mytar.tgz will be placed and unarchived into a directory by the name "tgzdir". +Applications can specify environment variables for mapper, reducer, and application master tasks by specifying them on the command line using the options -Dmapreduce.map.env, -Dmapreduce.reduce.env, and -Dyarn.app.mapreduce.am.env, respectively. + +For example the following sets environment variables FOO_VAR=bar and LIST_VAR=a,b,c for the mappers and reducers, + + bin/hadoop jar hadoop-mapreduce-examples-.jar wordcount -Dmapreduce.map.env.FOO_VAR=bar -Dmapreduce.map.env.LIST_VAR=a,b,c -Dmapreduce.reduce.env.FOO_VAR=bar -Dmapreduce.reduce.env.LIST_VAR=a,b,c input output + ### Walk-through The `WordCount` application is quite straight-forward. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java index 127e1dc0f79..496ff10d60a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java @@ -533,12 +533,13 @@ public class YARNRunner implements ClientProtocol { MRApps.crossPlatformifyMREnv(conf, Environment.PWD), conf); // Setup the environment variables for Admin first - MRApps.setEnvFromInputString(environment, - conf.get(MRJobConfig.MR_AM_ADMIN_USER_ENV, - MRJobConfig.DEFAULT_MR_AM_ADMIN_USER_ENV), conf); + MRApps.setEnvFromInputProperty(environment, + MRJobConfig.MR_AM_ADMIN_USER_ENV, + MRJobConfig.DEFAULT_MR_AM_ADMIN_USER_ENV, + conf); // Setup the environment variables (LD_LIBRARY_PATH, etc) - MRApps.setEnvFromInputString(environment, - conf.get(MRJobConfig.MR_AM_ENV), conf); + MRApps.setEnvFromInputProperty(environment, MRJobConfig.MR_AM_ENV, null, + conf); // Parse distributed cache MRApps.setupDistributedCache(jobConf, localResources); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java index 7b3b5bee50f..babf22b6399 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java @@ -812,15 +812,22 @@ public class TestYARNRunner { @Test public void testAMStandardEnvWithDefaultLibPath() throws Exception { - testAMStandardEnv(false); + testAMStandardEnv(false, false); } @Test public void testAMStandardEnvWithCustomLibPath() throws Exception { - testAMStandardEnv(true); + testAMStandardEnv(true, false); } - private void testAMStandardEnv(boolean customLibPath) throws Exception { + @Test + public void testAMStandardEnvWithCustomLibPathWithSeparateEnvProps() + throws Exception { + testAMStandardEnv(true, true); + } + + private void testAMStandardEnv(boolean customLibPath, + boolean useSeparateEnvProps) throws Exception { // the Windows behavior is different and this test currently doesn't really // apply // MAPREDUCE-6588 should revisit this test @@ -833,9 +840,16 @@ public class TestYARNRunner { String pathKey = Environment.LD_LIBRARY_PATH.name(); if (customLibPath) { - jobConf.set(MRJobConfig.MR_AM_ADMIN_USER_ENV, pathKey + "=" + - ADMIN_LIB_PATH); - jobConf.set(MRJobConfig.MR_AM_ENV, pathKey + "=" + USER_LIB_PATH); + if (useSeparateEnvProps) { + // Specify these as individual variables instead of k=v lists + jobConf.set(MRJobConfig.MR_AM_ADMIN_USER_ENV + "." + pathKey, + ADMIN_LIB_PATH); + jobConf.set(MRJobConfig.MR_AM_ENV + "." + pathKey, USER_LIB_PATH); + } else { + jobConf.set(MRJobConfig.MR_AM_ADMIN_USER_ENV, pathKey + "=" + + ADMIN_LIB_PATH); + jobConf.set(MRJobConfig.MR_AM_ENV, pathKey + "=" + USER_LIB_PATH); + } } jobConf.set(MRJobConfig.MAPRED_ADMIN_USER_SHELL, USER_SHELL); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java index 1c90d551b7b..7a02874c4f4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java @@ -24,6 +24,7 @@ import static org.apache.hadoop.yarn.util.StringHelper.sjoin; import java.io.File; import java.util.ArrayList; +import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.regex.Matcher; @@ -32,6 +33,7 @@ import java.util.regex.Pattern; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.StringInterner; import org.apache.hadoop.yarn.api.ApplicationConstants; @@ -80,33 +82,112 @@ public class Apps { throw new YarnRuntimeException(join("Error parsing ", name, ": ", s)); } + private static void setEnvFromString(Map env, + String envVar, String varString, String classPathSeparator) { + Matcher m = VAR_SUBBER.matcher(varString); + StringBuffer sb = new StringBuffer(); + while (m.find()) { + String var = m.group(1); + // do variable substitution of $var from passed in environment or from + // system environment and default to empty string if undefined in both. + String replace = env.get(var); + if (replace == null) { + replace = System.getenv(var); + } + if (replace == null) { + replace = ""; + } + m.appendReplacement(sb, Matcher.quoteReplacement(replace)); + } + m.appendTail(sb); + addToEnvironment(env, envVar, sb.toString(), classPathSeparator); + } + public static void setEnvFromInputString(Map env, String envString, String classPathSeparator) { if (envString != null && envString.length() > 0) { Matcher varValMatcher = VARVAL_SPLITTER.matcher(envString); while (varValMatcher.find()) { String envVar = varValMatcher.group(1); - Matcher m = VAR_SUBBER.matcher(varValMatcher.group(2)); - StringBuffer sb = new StringBuffer(); - while (m.find()) { - String var = m.group(1); - // replace $env with the child's env constructed by tt's - String replace = env.get(var); - // if this key is not configured by the tt for the child .. get it - // from the tt's env - if (replace == null) - replace = System.getenv(var); - // the env key is note present anywhere .. simply set it - if (replace == null) - replace = ""; - m.appendReplacement(sb, Matcher.quoteReplacement(replace)); - } - m.appendTail(sb); - addToEnvironment(env, envVar, sb.toString(), classPathSeparator); + String varString = varValMatcher.group(2); + setEnvFromString(env, envVar, varString, classPathSeparator); } } } + /** + * Set environment from string without doing any variable substitution. + * Used internally to avoid double expansion. + * @param env environment to set + * @param envString comma-separated k=v pairs. + * @param classPathSeparator Separator to use when appending to an existing + * environment variable. + */ + private static void setEnvFromInputStringNoExpand(Map env, + String envString, String classPathSeparator) { + if (envString != null && envString.length() > 0) { + Matcher varValMatcher = VARVAL_SPLITTER.matcher(envString); + while (varValMatcher.find()) { + String envVar = varValMatcher.group(1); + String varString = varValMatcher.group(2); + addToEnvironment(env, envVar, varString, classPathSeparator); + } + } + } + + /** + * Set environment variables from map of input properties. + * @param env environment to update + * @param inputMap environment variable property keys and values + * @param classPathSeparator separator to use when appending to an existing + * environment variable + */ + private static void setEnvFromInputStringMap(Map env, + Map inputMap, String classPathSeparator) { + for(Map.Entry inputVar: inputMap.entrySet()) { + String envVar = inputVar.getKey(); + String varString = inputVar.getValue(); + setEnvFromString(env, envVar, varString, classPathSeparator); + } + } + + /** + * Set environment variables from the given environment input property. + * For example, given the property mapreduce.map.env, this method + * will extract environment variables from: + * the comma-separated string value of mapreduce.map.env, and + * the values of any properties of the form mapreduce.map.env.VAR_NAME + * Variables specified via the latter syntax take precedence over those + * specified using the former syntax. + * @param env the environment to update + * @param propName the name of the property + * @param defaultPropValue the default value for propName + * @param conf configuration containing properties + * @param classPathSeparator Separator used when appending to an existing var + */ + public static void setEnvFromInputProperty(Map env, + String propName, String defaultPropValue, Configuration conf, + String classPathSeparator) { + + String envString = conf.get(propName, defaultPropValue); + + // Get k,v pairs from string into a tmp env. Note that we don't want + // to expand the env var values, because we will do that below - + // don't want to do it twice. + Map tmpEnv = new HashMap(); + Apps.setEnvFromInputStringNoExpand(tmpEnv, envString, classPathSeparator); + + // Get map of props with prefix propName. + // (e.g., map.reduce.env.ENV_VAR_NAME=value) + Map inputMap = conf.getPropsWithPrefix(propName + "."); + + // Entries from map should override entries from input string. + tmpEnv.putAll(inputMap); + + // Add them to the environment + setEnvFromInputStringMap(env, tmpEnv, classPathSeparator); + } + /** * * @param envString String containing env variable definitions diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestApps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestApps.java index 66d2d231344..0d65d659f42 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestApps.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestApps.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.yarn.util; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.Shell; import org.junit.Test; @@ -25,6 +26,8 @@ import java.util.HashMap; import java.util.Map; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; public class TestApps { @Test @@ -58,4 +61,137 @@ public class TestApps { assertEquals("=", environment.get("e1")); assertEquals("a1=a2", environment.get("e2")); } + + @Test + public void testSetEnvFromInputProperty() { + Configuration conf = new Configuration(false); + Map env = new HashMap<>(); + String propName = "mapreduce.map.env"; + String defaultPropName = "mapreduce.child.env"; + // Setup environment input properties + conf.set(propName, "env1=env1_val,env2=env2_val,env3=env3_val"); + conf.set(propName + ".env4", "env4_val"); + conf.set(propName + ".env2", "new_env2_val"); + // Setup some default values - we shouldn't see these values + conf.set(defaultPropName, "env1=def1_val,env2=def2_val,env3=def3_val"); + String defaultPropValue = conf.get(defaultPropName); + // These should never be referenced. + conf.set(defaultPropName + ".env4", "def4_val"); + conf.set(defaultPropName + ".env2", "new_def2_val"); + Apps.setEnvFromInputProperty(env, propName, defaultPropValue, conf, + File.pathSeparator); + // Check values from string + assertEquals("env1_val", env.get("env1")); + assertEquals("env3_val", env.get("env3")); + // Check individual value + assertEquals("env4_val", env.get("env4")); + // Check individual value that eclipses one in string + assertEquals("new_env2_val", env.get("env2")); + } + + @Test + public void testSetEnvFromInputPropertyDefault() { + Configuration conf = new Configuration(false); + Map env = new HashMap<>(); + String propName = "mapreduce.map.env"; + String defaultPropName = "mapreduce.child.env"; + // Setup environment input properties + conf.set(propName, "env1=env1_val,env2=env2_val,env3=env3_val"); + conf.set(propName + ".env4", "env4_val"); + conf.set(propName + ".env2", "new_env2_val"); + // Setup some default values + conf.set(defaultPropName, "env1=def1_val,env2=def2_val,env3=def3_val"); + String defaultPropValue = conf.get(defaultPropName); + // These should never be referenced. + conf.set(defaultPropName + ".env4", "def4_val"); + conf.set(defaultPropName + ".env2", "new_def2_val"); + + // Test using default value for the string. + // Individually specified env properties do not have defaults, + // so this should just get things from the defaultPropName string. + String bogusProp = propName + "bogus"; + Apps.setEnvFromInputProperty(env, bogusProp, defaultPropValue, conf, + File.pathSeparator); + // Check values from string + assertEquals("def1_val", env.get("env1")); + assertEquals("def2_val", env.get("env2")); + assertEquals("def3_val", env.get("env3")); + // Check individual value is not set. + assertNull(env.get("env4")); + } + + @Test + public void testSetEnvFromInputPropertyOverrideDefault() { + Configuration conf = new Configuration(false); + Map env = new HashMap<>(); + + // Try using default value, but specify some individual values using + // the main propName, but no main value, so it should get values from + // the default string, and then the individual values. + String propName = "mapreduce.reduce.env"; + conf.set(propName + ".env2", "new2_val"); + conf.set(propName + ".env4", "new4_val"); + // Setup some default values - we shouldn't see these values + String defaultPropName = "mapreduce.child.env"; + conf.set(defaultPropName, "env1=def1_val,env2=def2_val,env3=def3_val"); + String defaultPropValue = conf.get(defaultPropName); + // These should never be referenced. + conf.set(defaultPropName + ".env4", "def4_val"); + conf.set(defaultPropName + ".env2", "new_def2_val"); + Apps.setEnvFromInputProperty(env, propName, defaultPropValue, conf, + File.pathSeparator); + + // Check values from string + assertEquals("def1_val", env.get("env1")); + assertEquals("def3_val", env.get("env3")); + // Check individual value + assertEquals("new4_val", env.get("env4")); + // Check individual value that eclipses one in string + assertEquals("new2_val", env.get("env2")); + } + + @Test + public void testSetEnvFromInputPropertyCommas() { + Configuration conf = new Configuration(false); + Map env = new HashMap<>(); + String propName = "mapreduce.reduce.env"; + conf.set(propName, "env1=env1_val,env2=env2_val,env3=env3_val"); + conf.set(propName + ".env2", "new2_val1,new2_val2,new2_val3"); + conf.set(propName + ".env4", "new4_valwith=equals"); + // Setup some default values - we shouldn't see these values + String defaultPropName = "mapreduce.child.env"; + conf.set(defaultPropName, "env1=def1_val,env2=def2_val,env3=def3_val"); + String defaultPropValue = conf.get(defaultPropName); + + Apps.setEnvFromInputProperty(env, propName, defaultPropValue, conf, + File.pathSeparator); + // Check values from string + assertEquals("env1_val", env.get("env1")); + assertEquals("env3_val", env.get("env3")); + // Check individual value + assertEquals("new4_valwith=equals", env.get("env4")); + // Check individual value that eclipses one in string + assertEquals("new2_val1,new2_val2,new2_val3", env.get("env2")); + } + + @Test + public void testSetEnvFromInputPropertyNull() { + Configuration conf = new Configuration(false); + Map env = new HashMap<>(); + String propName = "mapreduce.map.env"; + String defaultPropName = "mapreduce.child.env"; + // Setup environment input properties + conf.set(propName, "env1=env1_val,env2=env2_val,env3=env3_val"); + conf.set(propName + ".env4", "env4_val"); + conf.set(propName + ".env2", "new_env2_val"); + // Setup some default values - we shouldn't see these values + conf.set(defaultPropName, "env1=def1_val,env2=def2_val,env3=def3_val"); + String defaultPropValue = conf.get(defaultPropName); + // These should never be referenced. + conf.set(defaultPropName + ".env4", "def4_val"); + conf.set(defaultPropName + ".env2", "new_def2_val"); + // Try with null inputs + Apps.setEnvFromInputProperty(env, "bogus1", null, conf, File.pathSeparator); + assertTrue(env.isEmpty()); + } }