From 23c3ff85a9e73d8f0755e14f12cc7c89b72acddd Mon Sep 17 00:00:00 2001 From: Allen Wittenauer Date: Tue, 28 Jun 2016 05:53:03 -0700 Subject: [PATCH] HADOOP-13209. replace slaves with workers (John Smith via aw) --- .../hadoop-common/src/main/bin/hadoop | 6 +- .../src/main/bin/hadoop-config.cmd | 4 +- .../src/main/bin/hadoop-config.sh | 4 + .../src/main/bin/hadoop-daemons.sh | 6 +- .../src/main/bin/hadoop-functions.sh | 75 ++++++----- .../src/main/bin/{slaves.sh => workers.sh} | 10 +- .../hadoop-common/src/main/conf/hadoop-env.sh | 4 +- .../conf/hadoop-user-functions.sh.example | 4 +- .../hadoop-common/src/main/conf/workers | 0 .../hadoop-common/src/main/java/overview.html | 46 +++---- .../src/site/markdown/ClusterSetup.md | 18 +-- .../src/site/markdown/CommandsManual.md | 6 +- .../src/site/markdown/RackAwareness.md | 4 +- .../src/test/scripts/hadoop_ssh.bats | 18 +-- ...hadoop_slaves.bats => hadoop_workers.bats} | 20 +-- .../hadoop-hdfs/src/main/bin/hdfs | 10 +- .../hadoop-hdfs/src/main/bin/start-dfs.sh | 12 +- .../src/main/bin/start-secure-dns.sh | 2 +- .../hadoop-hdfs/src/main/bin/stop-dfs.sh | 12 +- .../src/main/bin/stop-secure-dns.sh | 2 +- .../token/block/BlockTokenSecretManager.java | 47 +++---- .../hadoop-hdfs/src/main/java/overview.html | 46 +++---- .../src/site/markdown/Federation.md | 2 +- .../markdown/HDFSHighAvailabilityWithQJM.md | 2 +- .../hadoop/filecache/DistributedCache.java | 70 +++++------ .../mapreduce/filecache/DistributedCache.java | 108 ++++++++-------- .../src/site/markdown/MapReduceTutorial.md | 22 ++-- .../apache/hadoop/mapred/ReliabilityTest.java | 119 +++++++++--------- .../apache/hadoop/mapred/TestLazyOutput.java | 31 ++--- .../apache/hadoop/mapred/pipes/TestPipes.java | 105 ++++++++-------- .../mapreduce/TestMapReduceLazyOutput.java | 44 ++++--- .../security/TestBinaryTokenFile.java | 68 +++++----- .../mapreduce/security/TestMRCredentials.java | 6 +- .../hadoop-yarn/bin/start-yarn.sh | 6 +- .../hadoop-yarn/bin/stop-yarn.sh | 6 +- hadoop-yarn-project/hadoop-yarn/bin/yarn | 12 +- .../hadoop-yarn/bin/yarn-config.cmd | 2 +- .../hadoop-yarn/bin/yarn-config.sh | 20 +-- .../hadoop-yarn/bin/yarn-daemons.sh | 6 +- hadoop-yarn-project/hadoop-yarn/pom.xml | 2 +- 40 files changed, 505 insertions(+), 482 deletions(-) rename hadoop-common-project/hadoop-common/src/main/bin/{slaves.sh => workers.sh} (86%) rename hadoop-yarn-project/hadoop-yarn/conf/slaves => hadoop-common-project/hadoop-common/src/main/conf/workers (100%) rename hadoop-common-project/hadoop-common/src/test/scripts/{hadoop_slaves.bats => hadoop_workers.bats} (70%) diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop b/hadoop-common-project/hadoop-common/src/main/bin/hadoop index 6cf872c5eb4..b57a4c13053 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop @@ -28,7 +28,7 @@ function hadoop_usage hadoop_add_option "hostnames list[,of,host,names]" "hosts to use in slave mode" hadoop_add_option "loglevel level" "set the log4j level for this command" hadoop_add_option "hosts filename" "list of hosts to use in slave mode" - hadoop_add_option "slaves" "turn on slave mode" + hadoop_add_option "workers" "turn on worker mode" hadoop_add_subcommand "checknative" "check native Hadoop and compression libraries availability" hadoop_add_subcommand "classpath" "prints the class path needed to get the Hadoop jar and the required libraries" @@ -205,8 +205,8 @@ fi hadoop_verify_user "${HADOOP_SUBCMD}" -if [[ ${HADOOP_SLAVE_MODE} = true ]]; then - hadoop_common_slave_mode_execute "${HADOOP_COMMON_HOME}/bin/hadoop" "${HADOOP_USER_PARAMS[@]}" +if [[ ${HADOOP_WORKER_MODE} = true ]]; then + hadoop_common_worker_mode_execute "${HADOOP_COMMON_HOME}/bin/hadoop" "${HADOOP_USER_PARAMS[@]}" exit $? fi diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.cmd b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.cmd index 8d4b897eaac..d77dc5346a1 100644 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.cmd +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.cmd @@ -80,12 +80,12 @@ if "%1" == "--config" ( ) @rem -@rem check to see it is specified whether to use the slaves or the +@rem check to see it is specified whether to use the workers or the @rem masters file @rem if "%1" == "--hosts" ( - set HADOOP_SLAVES=%HADOOP_CONF_DIR%\%2 + set HADOOP_WORKERS=%HADOOP_CONF_DIR%\%2 shift shift ) diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh index 104247adf71..ba8d69d3826 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh @@ -113,6 +113,10 @@ hadoop_exec_userfuncs hadoop_exec_user_hadoopenv hadoop_verify_confdir +hadoop_deprecate_envvar HADOOP_SLAVES HADOOP_WORKERS +hadoop_deprecate_envvar HADOOP_SLAVE_NAMES HADOOP_WORKER_NAMES +hadoop_deprecate_envvar HADOOP_SLAVE_SLEEP HADOOP_WORKER_SLEEP + # do all the OS-specific startup bits here # this allows us to get a decent JAVA_HOME, # call crle for LD_LIBRARY_PATH, etc. diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh index ae1e3248238..55304916ad1 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh @@ -57,13 +57,13 @@ else fi hadoop_error "WARNING: Use of this script to ${daemonmode} HDFS daemons is deprecated." -hadoop_error "WARNING: Attempting to execute replacement \"hdfs --slaves --daemon ${daemonmode}\" instead." +hadoop_error "WARNING: Attempting to execute replacement \"hdfs --workers --daemon ${daemonmode}\" instead." # # Original input was usually: # hadoop-daemons.sh (shell options) (start|stop) (datanode|...) (daemon options) # we're going to turn this into -# hdfs --slaves --daemon (start|stop) (rest of options) +# hdfs --workers --daemon (start|stop) (rest of options) # for (( i = 0; i < ${#HADOOP_USER_PARAMS[@]}; i++ )) do @@ -74,4 +74,4 @@ do fi done -${hdfsscript} --slaves --daemon "${daemonmode}" "${HADOOP_USER_PARAMS[@]}" +${hdfsscript} --workers --daemon "${daemonmode}" "${HADOOP_USER_PARAMS[@]}" diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh index 99c47384bd2..c38059903b0 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh @@ -602,25 +602,25 @@ function hadoop_basic_init HADOOP_SSH_PARALLEL=${HADOOP_SSH_PARALLEL:-10} } -## @description Set the slave support information to the contents +## @description Set the worker support information to the contents ## @description of `filename` ## @audience public ## @stability stable ## @replaceable no ## @param filename ## @return will exit if file does not exist -function hadoop_populate_slaves_file +function hadoop_populate_workers_file { - local slavesfile=$1 + local workersfile=$1 shift - if [[ -f "${slavesfile}" ]]; then + if [[ -f "${workersfile}" ]]; then # shellcheck disable=2034 - HADOOP_SLAVES="${slavesfile}" - elif [[ -f "${HADOOP_CONF_DIR}/${slavesfile}" ]]; then + HADOOP_WORKERS="${workersfile}" + elif [[ -f "${HADOOP_CONF_DIR}/${workersfile}" ]]; then # shellcheck disable=2034 - HADOOP_SLAVES="${HADOOP_CONF_DIR}/${slavesfile}" + HADOOP_WORKERS="${HADOOP_CONF_DIR}/${workersfile}" else - hadoop_error "ERROR: Cannot find hosts file \"${slavesfile}\"" + hadoop_error "ERROR: Cannot find hosts file \"${workersfile}\"" hadoop_exit_with_usage 1 fi } @@ -669,14 +669,14 @@ function hadoop_actual_ssh { # we are passing this function to xargs # should get hostname followed by rest of command line - local slave=$1 + local worker=$1 shift # shellcheck disable=SC2086 - ssh ${HADOOP_SSH_OPTS} ${slave} $"${@// /\\ }" 2>&1 | sed "s/^/$slave: /" + ssh ${HADOOP_SSH_OPTS} ${worker} $"${@// /\\ }" 2>&1 | sed "s/^/$worker: /" } -## @description Connect to ${HADOOP_SLAVES} or ${HADOOP_SLAVE_NAMES} +## @description Connect to ${HADOOP_WORKERS} or ${HADOOP_WORKER_NAMES} ## @description and execute command. ## @audience private ## @stability evolving @@ -687,45 +687,52 @@ function hadoop_connect_to_hosts { # shellcheck disable=SC2124 local params="$@" - local slave_file + local worker_file local tmpslvnames # # ssh (or whatever) to a host # # User can specify hostnames or a file where the hostnames are (not both) - if [[ -n "${HADOOP_SLAVES}" && -n "${HADOOP_SLAVE_NAMES}" ]] ; then - hadoop_error "ERROR: Both HADOOP_SLAVES and HADOOP_SLAVE_NAME were defined. Aborting." + if [[ -n "${HADOOP_WORKERS}" && -n "${HADOOP_WORKER_NAMES}" ]] ; then + hadoop_error "ERROR: Both HADOOP_WORKERS and HADOOP_WORKER_NAME were defined. Aborting." exit 1 - elif [[ -z "${HADOOP_SLAVE_NAMES}" ]]; then - slave_file=${HADOOP_SLAVES:-${HADOOP_CONF_DIR}/slaves} + elif [[ -z "${HADOOP_WORKER_NAMES}" ]]; then + if [[ -n "${HADOOP_WORKERS}" ]]; then + worker_file=${HADOOP_WORKERS} + elif [[ -f "${HADOOP_CONF_DIR}/workers" ]]; then + worker_file=${HADOOP_CONF_DIR}/workers + elif [[ -f "${HADOOP_CONF_DIR}/slaves" ]]; then + hadoop_error "WARNING: 'slaves' file has been deprecated. Please use 'workers' file instead." + worker_file=${HADOOP_CONF_DIR}/slaves + fi fi # if pdsh is available, let's use it. otherwise default # to a loop around ssh. (ugh) if [[ -e '/usr/bin/pdsh' ]]; then - if [[ -z "${HADOOP_SLAVE_NAMES}" ]] ; then + if [[ -z "${HADOOP_WORKER_NAMES}" ]] ; then # if we were given a file, just let pdsh deal with it. # shellcheck disable=SC2086 PDSH_SSH_ARGS_APPEND="${HADOOP_SSH_OPTS}" pdsh \ - -f "${HADOOP_SSH_PARALLEL}" -w ^"${slave_file}" $"${@// /\\ }" 2>&1 + -f "${HADOOP_SSH_PARALLEL}" -w ^"${worker_file}" $"${@// /\\ }" 2>&1 else # no spaces allowed in the pdsh arg host list # shellcheck disable=SC2086 - tmpslvnames=$(echo ${SLAVE_NAMES} | tr -s ' ' ,) + tmpslvnames=$(echo ${HADOOP_WORKER_NAMES} | tr -s ' ' ,) PDSH_SSH_ARGS_APPEND="${HADOOP_SSH_OPTS}" pdsh \ -f "${HADOOP_SSH_PARALLEL}" \ -w "${tmpslvnames}" $"${@// /\\ }" 2>&1 fi else - if [[ -z "${HADOOP_SLAVE_NAMES}" ]]; then - HADOOP_SLAVE_NAMES=$(sed 's/#.*$//;/^$/d' "${slave_file}") + if [[ -z "${HADOOP_WORKER_NAMES}" ]]; then + HADOOP_WORKER_NAMES=$(sed 's/#.*$//;/^$/d' "${worker_file}") fi hadoop_connect_to_hosts_without_pdsh "${params}" fi } -## @description Connect to ${SLAVE_NAMES} and execute command +## @description Connect to ${HADOOP_WORKER_NAMES} and execute command ## @description under the environment which does not support pdsh. ## @audience private ## @stability evolving @@ -736,24 +743,24 @@ function hadoop_connect_to_hosts_without_pdsh { # shellcheck disable=SC2124 local params="$@" - local slaves=(${HADOOP_SLAVE_NAMES}) - for (( i = 0; i < ${#slaves[@]}; i++ )) + local workers=(${HADOOP_WORKER_NAMES}) + for (( i = 0; i < ${#workers[@]}; i++ )) do if (( i != 0 && i % HADOOP_SSH_PARALLEL == 0 )); then wait fi # shellcheck disable=SC2086 - hadoop_actual_ssh "${slaves[$i]}" ${params} & + hadoop_actual_ssh "${workers[$i]}" ${params} & done wait } -## @description Utility routine to handle --slaves mode +## @description Utility routine to handle --workers mode ## @audience private ## @stability evolving ## @replaceable yes ## @param commandarray -function hadoop_common_slave_mode_execute +function hadoop_common_worker_mode_execute { # # input should be the command line as given by the user @@ -761,13 +768,13 @@ function hadoop_common_slave_mode_execute # local argv=("$@") - # if --slaves is still on the command line, remove it + # if --workers is still on the command line, remove it # to prevent loops # Also remove --hostnames and --hosts along with arg values local argsSize=${#argv[@]}; for (( i = 0; i < argsSize; i++ )) do - if [[ "${argv[$i]}" =~ ^--slaves$ ]]; then + if [[ "${argv[$i]}" =~ ^--workers$ ]]; then unset argv[$i] elif [[ "${argv[$i]}" =~ ^--hostnames$ ]] || [[ "${argv[$i]}" =~ ^--hosts$ ]]; then @@ -2051,13 +2058,13 @@ function hadoop_parse_args --hostnames) shift # shellcheck disable=SC2034 - HADOOP_SLAVE_NAMES="$1" + HADOOP_WORKER_NAMES="$1" shift ((HADOOP_PARSE_COUNTER=HADOOP_PARSE_COUNTER+2)) ;; --hosts) shift - hadoop_populate_slaves_file "$1" + hadoop_populate_workers_file "$1" shift ((HADOOP_PARSE_COUNTER=HADOOP_PARSE_COUNTER+2)) ;; @@ -2068,10 +2075,10 @@ function hadoop_parse_args shift ((HADOOP_PARSE_COUNTER=HADOOP_PARSE_COUNTER+2)) ;; - --slaves) + --workers) shift # shellcheck disable=SC2034 - HADOOP_SLAVE_MODE=true + HADOOP_WORKER_MODE=true ((HADOOP_PARSE_COUNTER=HADOOP_PARSE_COUNTER+1)) ;; *) @@ -2104,4 +2111,4 @@ function hadoop_xml_escape function hadoop_sed_escape { sed -e 's/[\/&]/\\&/g' <<< "$1" -} \ No newline at end of file +} diff --git a/hadoop-common-project/hadoop-common/src/main/bin/slaves.sh b/hadoop-common-project/hadoop-common/src/main/bin/workers.sh similarity index 86% rename from hadoop-common-project/hadoop-common/src/main/bin/slaves.sh rename to hadoop-common-project/hadoop-common/src/main/bin/workers.sh index 34bf0ebb2b8..84ffabd857e 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/slaves.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/workers.sh @@ -16,20 +16,20 @@ # limitations under the License. -# Run a shell command on all slave hosts. +# Run a shell command on all worker hosts. # # Environment Variables # -# HADOOP_SLAVES File naming remote hosts. -# Default is ${HADOOP_CONF_DIR}/slaves. +# HADOOP_WORKERS File naming remote hosts. +# Default is ${HADOOP_CONF_DIR}/workers. # HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_HOME}/conf. -# HADOOP_SLAVE_SLEEP Seconds to sleep between spawning remote commands. +# HADOOP_WORKER_SLEEP Seconds to sleep between spawning remote commands. # HADOOP_SSH_OPTS Options passed to ssh when running remote commands. ## function hadoop_usage { - echo "Usage: slaves.sh [--config confdir] command..." + echo "Usage: workers.sh [--config confdir] command..." } # let's locate libexec... diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh index 3f19e459c4d..6565d1d6a76 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh +++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh @@ -169,8 +169,8 @@ esac # export HADOOP_SSH_PARALLEL=10 # Filename which contains all of the hosts for any remote execution -# helper scripts # such as slaves.sh, start-dfs.sh, etc. -# export HADOOP_SLAVES="${HADOOP_CONF_DIR}/slaves" +# helper scripts # such as workers.sh, start-dfs.sh, etc. +# export HADOOP_WORKERS="${HADOOP_CONF_DIR}/workers" ### # Options for all daemons diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example index 91a2d8bdf63..0eeae3c8043 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example +++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example @@ -87,7 +87,7 @@ #} # -# Example: efficient command execution for the slaves +# Example: efficient command execution for the workers # # To improve performance, you can use xargs -P # instead of the for loop, if supported. @@ -108,7 +108,7 @@ # # list with each hostname read from stdin/pipe. But it consider one # # line as one argument while reading from stdin/pipe. So place each # # hostname in different lines while passing via pipe. -# tmpslvnames=$(echo "${HADOOP_SLAVE_NAMES}" | tr ' ' '\n' ) +# tmpslvnames=$(echo "${HADOOP_WORKER_NAMES}" | tr ' ' '\n' ) # echo "${tmpslvnames}" | \ # xargs -n 1 -P"${HADOOP_SSH_PARALLEL}" \ # -I {} bash -c -- "hadoop_actual_ssh {} ${params}" diff --git a/hadoop-yarn-project/hadoop-yarn/conf/slaves b/hadoop-common-project/hadoop-common/src/main/conf/workers similarity index 100% rename from hadoop-yarn-project/hadoop-yarn/conf/slaves rename to hadoop-common-project/hadoop-common/src/main/conf/workers diff --git a/hadoop-common-project/hadoop-common/src/main/java/overview.html b/hadoop-common-project/hadoop-common/src/main/java/overview.html index 5868617709b..2c64121831f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/overview.html +++ b/hadoop-common-project/hadoop-common/src/main/java/overview.html @@ -23,33 +23,33 @@ Hadoop is a distributed computing platform. -

Hadoop primarily consists of the Hadoop Distributed FileSystem -(HDFS) and an +

Hadoop primarily consists of the Hadoop Distributed FileSystem +(HDFS) and an implementation of the Map-Reduce programming paradigm.

-

Hadoop is a software framework that lets one easily write and run applications +

Hadoop is a software framework that lets one easily write and run applications that process vast amounts of data. Here's what makes Hadoop especially useful:

+

Requirements

@@ -61,15 +61,15 @@ that process vast amounts of data. Here's what makes Hadoop especially useful:
  • Windows is also a supported platform. -
  • + - +

    Requisite Software

    1. - Java 1.6.x, preferably from - Sun. + Java 1.6.x, preferably from + Sun. Set JAVA_HOME to the root of your Java installation.
    2. @@ -141,8 +141,8 @@ host and port. This is specified with the configuration property href="../mapred-default.html#mapred.job.tracker">mapred.job.tracker.
    3. -
    4. A slaves file that lists the names of all the hosts in -the cluster. The default slaves file is conf/slaves. +
    5. A workers file that lists the names of all the hosts in +the cluster. The default workers file is conf/workers.
    @@ -242,31 +242,31 @@ as master.example.com:port in conf/mapred-site.xm
  • Directories for dfs.name.dir and -dfs.data.dir +dfs.data.dir in conf/hdfs-site.xml. These are local directories used to hold distributed filesystem -data on the master node and slave nodes respectively. Note +data on the master node and worker nodes respectively. Note that dfs.data.dir may contain a space- or comma-separated list of directory names, so that data may be stored on multiple local devices.
  • mapred.local.dir - in conf/mapred-site.xml, the local directory where temporary + in conf/mapred-site.xml, the local directory where temporary MapReduce data is stored. It also may be a list of directories.
  • mapred.map.tasks and mapred.reduce.tasks +href="../mapred-default.html#mapred.reduce.tasks">mapred.reduce.tasks in conf/mapred-site.xml. As a rule of thumb, use 10x the -number of slave processors for mapred.map.tasks, and 2x the -number of slave processors for mapred.reduce.tasks.
  • +number of worker processors for mapred.map.tasks, and 2x the +number of worker processors for mapred.reduce.tasks. -

    Finally, list all slave hostnames or IP addresses in your -conf/slaves file, one per line. Then format your filesystem +

    Finally, list all worker hostnames or IP addresses in your +conf/workers file, one per line. Then format your filesystem and start your cluster on your master node, as above. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md b/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md index 7d5040c85e7..0d551b1c2e8 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md @@ -27,7 +27,7 @@ This document does not cover advanced topics such as [Security](./SecureMode.htm Prerequisites ------------- -* Install Java. See the [Hadoop Wiki](http://wiki.apache.org/hadoop/HadoopJavaVersions) for known good versions. +* Install Java. See the [Hadoop Wiki](http://wiki.apache.org/hadoop/HadoopJavaVersions) for known good versions. * Download a stable version of Hadoop from Apache mirrors. Installation @@ -37,7 +37,7 @@ Installing a Hadoop cluster typically involves unpacking the software on all the Typically one machine in the cluster is designated as the NameNode and another machine the as ResourceManager, exclusively. These are the masters. Other services (such as Web App Proxy Server and MapReduce Job History server) are usually run either on dedicated hardware or on shared infrastrucutre, depending upon the load. -The rest of the machines in the cluster act as both DataNode and NodeManager. These are the slaves. +The rest of the machines in the cluster act as both DataNode and NodeManager. These are the workers. Configuring Hadoop in Non-Secure Mode ------------------------------------- @@ -216,12 +216,12 @@ The health checker script is not supposed to give ERROR if only some of the loca Slaves File ----------- -List all slave hostnames or IP addresses in your `etc/hadoop/slaves` file, one per line. Helper scripts (described below) will use the `etc/hadoop/slaves` file to run commands on many hosts at once. It is not used for any of the Java-based Hadoop configuration. In order to use this functionality, ssh trusts (via either passphraseless ssh or some other means, such as Kerberos) must be established for the accounts used to run Hadoop. +List all worker hostnames or IP addresses in your `etc/hadoop/workers` file, one per line. Helper scripts (described below) will use the `etc/hadoop/workers` file to run commands on many hosts at once. It is not used for any of the Java-based Hadoop configuration. In order to use this functionality, ssh trusts (via either passphraseless ssh or some other means, such as Kerberos) must be established for the accounts used to run Hadoop. Hadoop Rack Awareness --------------------- -Many Hadoop components are rack-aware and take advantage of the network topology for performance and safety. Hadoop daemons obtain the rack information of the slaves in the cluster by invoking an administrator configured module. See the [Rack Awareness](./RackAwareness.html) documentation for more specific information. +Many Hadoop components are rack-aware and take advantage of the network topology for performance and safety. Hadoop daemons obtain the rack information of the workers in the cluster by invoking an administrator configured module. See the [Rack Awareness](./RackAwareness.html) documentation for more specific information. It is highly recommended configuring rack awareness prior to starting HDFS. @@ -253,7 +253,7 @@ Start a HDFS DataNode with the following command on each designated node as *hdf [hdfs]$ $HADOOP_HOME/bin/hdfs --daemon start datanode -If `etc/hadoop/slaves` and ssh trusted access is configured (see [Single Node Setup](./SingleCluster.html)), all of the HDFS processes can be started with a utility script. As *hdfs*: +If `etc/hadoop/workers` and ssh trusted access is configured (see [Single Node Setup](./SingleCluster.html)), all of the HDFS processes can be started with a utility script. As *hdfs*: [hdfs]$ $HADOOP_HOME/sbin/start-dfs.sh @@ -269,7 +269,7 @@ Start a standalone WebAppProxy server. Run on the WebAppProxy server as *yarn*. [yarn]$ $HADOOP_HOME/bin/yarn --daemon start proxyserver -If `etc/hadoop/slaves` and ssh trusted access is configured (see [Single Node Setup](./SingleCluster.html)), all of the YARN processes can be started with a utility script. As *yarn*: +If `etc/hadoop/workers` and ssh trusted access is configured (see [Single Node Setup](./SingleCluster.html)), all of the YARN processes can be started with a utility script. As *yarn*: [yarn]$ $HADOOP_HOME/sbin/start-yarn.sh @@ -287,7 +287,7 @@ Run a script to stop a DataNode as *hdfs*: [hdfs]$ $HADOOP_HOME/bin/hdfs --daemon stop datanode -If `etc/hadoop/slaves` and ssh trusted access is configured (see [Single Node Setup](./SingleCluster.html)), all of the HDFS processes may be stopped with a utility script. As *hdfs*: +If `etc/hadoop/workers` and ssh trusted access is configured (see [Single Node Setup](./SingleCluster.html)), all of the HDFS processes may be stopped with a utility script. As *hdfs*: [hdfs]$ $HADOOP_HOME/sbin/stop-dfs.sh @@ -295,11 +295,11 @@ Stop the ResourceManager with the following command, run on the designated Resou [yarn]$ $HADOOP_HOME/bin/yarn --daemon stop resourcemanager -Run a script to stop a NodeManager on a slave as *yarn*: +Run a script to stop a NodeManager on a worker as *yarn*: [yarn]$ $HADOOP_HOME/bin/yarn --daemon stop nodemanager -If `etc/hadoop/slaves` and ssh trusted access is configured (see [Single Node Setup](./SingleCluster.html)), all of the YARN processes can be stopped with a utility script. As *yarn*: +If `etc/hadoop/workers` and ssh trusted access is configured (see [Single Node Setup](./SingleCluster.html)), all of the YARN processes can be stopped with a utility script. As *yarn*: [yarn]$ $HADOOP_HOME/sbin/stop-yarn.sh diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/CommandsManual.md b/hadoop-common-project/hadoop-common/src/site/markdown/CommandsManual.md index 44ba5eaffe1..4d7d5044ad5 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/CommandsManual.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/CommandsManual.md @@ -43,10 +43,10 @@ All of the shell commands will accept a common set of options. For some commands | `--daemon mode` | If the command supports daemonization (e.g., `hdfs namenode`), execute in the appropriate mode. Supported modes are `start` to start the process in daemon mode, `stop` to stop the process, and `status` to determine the active status of the process. `status` will return an [LSB-compliant](http://refspecs.linuxbase.org/LSB_3.0.0/LSB-generic/LSB-generic/iniscrptact.html) result code. If no option is provided, commands that support daemonization will run in the foreground. For commands that do not support daemonization, this option is ignored. | | `--debug` | Enables shell level configuration debugging information | | `--help` | Shell script usage information. | -| `--hostnames` | When `--slaves` is used, override the slaves file with a space delimited list of hostnames where to execute a multi-host subcommand. If `--slaves` is not used, this option is ignored. | -| `--hosts` | When `--slaves` is used, override the slaves file with another file that contains a list of hostnames where to execute a multi-host subcommand. If `--slaves` is not used, this option is ignored. | +| `--hostnames` | When `--workers` is used, override the workers file with a space delimited list of hostnames where to execute a multi-host subcommand. If `--workers` is not used, this option is ignored. | +| `--hosts` | When `--workers` is used, override the workers file with another file that contains a list of hostnames where to execute a multi-host subcommand. If `--workers` is not used, this option is ignored. | | `--loglevel loglevel` | Overrides the log level. Valid log levels are FATAL, ERROR, WARN, INFO, DEBUG, and TRACE. Default is INFO. | -| `--slaves` | If possible, execute this command on all hosts in the `slaves` file. | +| `--workers` | If possible, execute this command on all hosts in the `workers` file. | ### Generic Options diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/RackAwareness.md b/hadoop-common-project/hadoop-common/src/site/markdown/RackAwareness.md index f440686c806..6a52f511d6b 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/RackAwareness.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/RackAwareness.md @@ -22,7 +22,7 @@ will use rack awareness for fault tolerance by placing one block replica on a different rack. This provides data availability in the event of a network switch failure or partition within the cluster. -Hadoop master daemons obtain the rack id of the cluster slaves by +Hadoop master daemons obtain the rack id of the cluster workers by invoking either an external script or java class as specified by configuration files. Using either the java class or external script for topology, output must adhere to the java @@ -40,7 +40,7 @@ in the configuration file. An example, NetworkTopology.java, is included with the hadoop distribution and can be customized by the Hadoop administrator. Using a Java class instead of an external script has a performance benefit in that Hadoop doesn't need to fork an -external process when a new slave node registers itself. +external process when a new worker node registers itself. If implementing an external script, it will be specified with the **net.topology.script.file.name** parameter in the configuration diff --git a/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_ssh.bats b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_ssh.bats index 2520a1ce0c4..50959f99a6a 100644 --- a/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_ssh.bats +++ b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_ssh.bats @@ -30,22 +30,22 @@ load hadoop-functions_test_helper hadoop_connect_to_hosts_without_pdsh } -@test "hadoop_common_slave_mode_execute (--slaves 1)" { - run hadoop_common_slave_mode_execute --slaves command +@test "hadoop_common_worker_mode_execute (--workers 1)" { + run hadoop_common_worker_mode_execute --workers command [ "${output}" = "command" ] } -@test "hadoop_common_slave_mode_execute (--slaves 2)" { - run hadoop_common_slave_mode_execute --slaves command1 command2 +@test "hadoop_common_worker_mode_execute (--workers 2)" { + run hadoop_common_worker_mode_execute --workers command1 command2 [ "${output}" = "command1 command2" ] } -@test "hadoop_common_slave_mode_execute (--hosts)" { - run hadoop_common_slave_mode_execute --hosts filename command +@test "hadoop_common_worker_mode_execute (--hosts)" { + run hadoop_common_worker_mode_execute --hosts filename command [ "${output}" = "command" ] } -@test "hadoop_common_slave_mode_execute (--hostnames 2)" { - run hadoop_common_slave_mode_execute --hostnames "host1,host2" command1 command2 +@test "hadoop_common_worker_mode_execute (--hostnames 2)" { + run hadoop_common_worker_mode_execute --hostnames "host1,host2" command1 command2 [ "${output}" = "command1 command2" ] -} \ No newline at end of file +} diff --git a/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_slaves.bats b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_workers.bats similarity index 70% rename from hadoop-common-project/hadoop-common/src/test/scripts/hadoop_slaves.bats rename to hadoop-common-project/hadoop-common/src/test/scripts/hadoop_workers.bats index cc33f0ecdf6..123bf04d5c8 100644 --- a/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_slaves.bats +++ b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_workers.bats @@ -15,23 +15,23 @@ load hadoop-functions_test_helper -@test "hadoop_populate_slaves_file (specific file)" { +@test "hadoop_populate_workers_file (specific file)" { touch "${TMP}/file" - hadoop_populate_slaves_file "${TMP}/file" - [ "${HADOOP_SLAVES}" = "${TMP}/file" ] + hadoop_populate_workers_file "${TMP}/file" + [ "${HADOOP_WORKERS}" = "${TMP}/file" ] } -@test "hadoop_populate_slaves_file (specific conf dir file)" { +@test "hadoop_populate_workers_file (specific conf dir file)" { HADOOP_CONF_DIR=${TMP}/1 mkdir -p "${HADOOP_CONF_DIR}" touch "${HADOOP_CONF_DIR}/file" - hadoop_populate_slaves_file "file" - echo "${HADOOP_SLAVES}" - [ "${HADOOP_SLAVES}" = "${HADOOP_CONF_DIR}/file" ] + hadoop_populate_workers_file "file" + echo "${HADOOP_WORKERS}" + [ "${HADOOP_WORKERS}" = "${HADOOP_CONF_DIR}/file" ] } -@test "hadoop_populate_slaves_file (no file)" { +@test "hadoop_populate_workers_file (no file)" { HADOOP_CONF_DIR=${TMP} - run hadoop_populate_slaves_file "foo" + run hadoop_populate_workers_file "foo" [ "${status}" -eq 1 ] -} \ No newline at end of file +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs index 79525608580..50595286d4d 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs @@ -26,10 +26,10 @@ function hadoop_usage { hadoop_add_option "--buildpaths" "attempt to add class files from build tree" hadoop_add_option "--daemon (start|status|stop)" "operate on a daemon" - hadoop_add_option "--hostnames list[,of,host,names]" "hosts to use in slave mode" + hadoop_add_option "--hostnames list[,of,host,names]" "hosts to use in worker mode" hadoop_add_option "--loglevel level" "set the log4j level for this command" - hadoop_add_option "--hosts filename" "list of hosts to use in slave mode" - hadoop_add_option "--slaves" "turn on slave mode" + hadoop_add_option "--hosts filename" "list of hosts to use in worker mode" + hadoop_add_option "--workers" "turn on worker mode" hadoop_add_subcommand "balancer" "run a cluster balancing utility" hadoop_add_subcommand "cacheadmin" "configure the HDFS cache" @@ -293,8 +293,8 @@ fi hadoop_verify_user "${HADOOP_SUBCMD}" -if [[ ${HADOOP_SLAVE_MODE} = true ]]; then - hadoop_common_slave_mode_execute "${HADOOP_HDFS_HOME}/bin/hdfs" "${HADOOP_USER_PARAMS[@]}" +if [[ ${HADOOP_WORKER_MODE} = true ]]; then + hadoop_common_worker_mode_execute "${HADOOP_HDFS_HOME}/bin/hdfs" "${HADOOP_USER_PARAMS[@]}" exit $? fi diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.sh index 1e35e7d8a67..fc46740d164 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.sh +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.sh @@ -79,14 +79,14 @@ fi echo "Starting namenodes on [${NAMENODES}]" "${HADOOP_HDFS_HOME}/bin/hdfs" \ - --slaves \ + --workers \ --config "${HADOOP_CONF_DIR}" \ --hostnames "${NAMENODES}" \ --daemon start \ namenode ${nameStartOpt} #--------------------------------------------------------- -# datanodes (using default slaves file) +# datanodes (using default workers file) if [[ -n "${HADOOP_SECURE_DN_USER}" ]] && [[ -z "${HADOOP_SECURE_COMMAND}" ]]; then @@ -98,7 +98,7 @@ else echo "Starting datanodes" "${HADOOP_HDFS_HOME}/bin/hdfs" \ - --slaves \ + --workers \ --config "${HADOOP_CONF_DIR}" \ --daemon start \ datanode ${dataStartOpt} @@ -125,7 +125,7 @@ if [[ -n "${SECONDARY_NAMENODES}" ]]; then echo "Starting secondary namenodes [${SECONDARY_NAMENODES}]" "${HADOOP_HDFS_HOME}/bin/hdfs" \ - --slaves \ + --workers \ --config "${HADOOP_CONF_DIR}" \ --hostnames "${SECONDARY_NAMENODES}" \ --daemon start \ @@ -144,7 +144,7 @@ case "${SHARED_EDITS_DIR}" in echo "Starting journal nodes [${JOURNAL_NODES}]" "${HADOOP_HDFS_HOME}/bin/hdfs" \ - --slaves \ + --workers \ --config "${HADOOP_CONF_DIR}" \ --hostnames "${JOURNAL_NODES}" \ --daemon start \ @@ -159,7 +159,7 @@ if [[ "${AUTOHA_ENABLED}" = "true" ]]; then echo "Starting ZK Failover Controllers on NN hosts [${NAMENODES}]" "${HADOOP_HDFS_HOME}/bin/hdfs" \ - --slaves \ + --workers \ --config "${HADOOP_CONF_DIR}" \ --hostnames "${NAMENODES}" \ --daemon start \ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-secure-dns.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-secure-dns.sh index 3fce34572de..7dcbba830e4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-secure-dns.sh +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-secure-dns.sh @@ -45,7 +45,7 @@ fi if [[ "${EUID}" -eq 0 ]] && [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then exec "${HADOOP_HDFS_HOME}/bin/hdfs" \ --config "${HADOOP_CONF_DIR}" \ - --slaves \ + --workers \ --daemon start \ datanode else diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.sh index e6933742113..797b95b6f75 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.sh +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.sh @@ -56,14 +56,14 @@ fi echo "Stopping namenodes on [${NAMENODES}]" "${HADOOP_HDFS_HOME}/bin/hdfs" \ - --slaves \ + --workers \ --config "${HADOOP_CONF_DIR}" \ --hostnames "${NAMENODES}" \ --daemon stop \ namenode #--------------------------------------------------------- -# datanodes (using default slaves file) +# datanodes (using default workers file) if [[ -n "${HADOOP_SECURE_DN_USER}" ]] && [[ -z "${HADOOP_SECURE_COMMAND}" ]]; then @@ -75,7 +75,7 @@ else echo "Stopping datanodes" "${HADOOP_HDFS_HOME}/bin/hdfs" \ - --slaves \ + --workers \ --config "${HADOOP_CONF_DIR}" \ --daemon stop \ datanode @@ -94,7 +94,7 @@ if [[ -n "${SECONDARY_NAMENODES}" ]]; then echo "Stopping secondary namenodes [${SECONDARY_NAMENODES}]" "${HADOOP_HDFS_HOME}/bin/hdfs" \ - --slaves \ + --workers \ --config "${HADOOP_CONF_DIR}" \ --hostnames "${SECONDARY_NAMENODES}" \ --daemon stop \ @@ -112,7 +112,7 @@ case "${SHARED_EDITS_DIR}" in echo "Stopping journal nodes [${JOURNAL_NODES}]" "${HADOOP_HDFS_HOME}/bin/hdfs" \ - --slaves \ + --workers \ --config "${HADOOP_CONF_DIR}" \ --hostnames "${JOURNAL_NODES}" \ --daemon stop \ @@ -127,7 +127,7 @@ if [[ "${AUTOHA_ENABLED}" = "true" ]]; then echo "Stopping ZK Failover Controllers on NN hosts [${NAMENODES}]" "${HADOOP_HDFS_HOME}/bin/hdfs" \ - --slaves \ + --workers \ --config "${HADOOP_CONF_DIR}" \ --hostnames "${NAMENODES}" \ --daemon stop \ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-secure-dns.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-secure-dns.sh index 2a973b1e839..be9683662ed 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-secure-dns.sh +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-secure-dns.sh @@ -45,7 +45,7 @@ fi if [[ "${EUID}" -eq 0 ]] && [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then exec "${HADOOP_HDFS_HOME}/bin/hdfs" \ --config "${HADOOP_CONF_DIR}" \ - --slaves \ + --workers \ --daemon stop \ datanode else diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenSecretManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenSecretManager.java index 53da44c9479..b55a2c4ad0e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenSecretManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/block/BlockTokenSecretManager.java @@ -43,11 +43,12 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; /** - * BlockTokenSecretManager can be instantiated in 2 modes, master mode and slave - * mode. Master can generate new block keys and export block keys to slaves, - * while slaves can only import and use block keys received from master. Both - * master and slave can generate and verify block tokens. Typically, master mode - * is used by NN and slave mode is used by DN. + * BlockTokenSecretManager can be instantiated in 2 modes, master mode + * and worker mode. Master can generate new block keys and export block + * keys to workers, while workers can only import and use block keys + * received from master. Both master and worker can generate and verify + * block tokens. Typically, master mode is used by NN and worker mode + * is used by DN. */ @InterfaceAudience.Private public class BlockTokenSecretManager extends @@ -57,7 +58,7 @@ public class BlockTokenSecretManager extends public static final Token DUMMY_TOKEN = new Token(); private final boolean isMaster; - + /** * keyUpdateInterval is the interval that NN updates its block keys. It should * be set long enough so that all live DN's and Balancer should have sync'ed @@ -78,7 +79,7 @@ public class BlockTokenSecretManager extends private final SecureRandom nonceGenerator = new SecureRandom(); /** - * Constructor for slaves. + * Constructor for workers. * * @param keyUpdateInterval how often a new key will be generated * @param tokenLifetime how long an individual token is valid @@ -88,10 +89,10 @@ public class BlockTokenSecretManager extends this(false, keyUpdateInterval, tokenLifetime, blockPoolId, encryptionAlgorithm, 0, 1); } - + /** * Constructor for masters. - * + * * @param keyUpdateInterval how often a new key will be generated * @param tokenLifetime how long an individual token is valid * @param nnIndex namenode index of the namenode for which we are creating the manager @@ -108,7 +109,7 @@ public class BlockTokenSecretManager extends setSerialNo(new SecureRandom().nextInt()); generateKeys(); } - + private BlockTokenSecretManager(boolean isMaster, long keyUpdateInterval, long tokenLifetime, String blockPoolId, String encryptionAlgorithm, int nnIndex, int numNNs) { this.intRange = Integer.MAX_VALUE / numNNs; @@ -121,13 +122,13 @@ public class BlockTokenSecretManager extends this.encryptionAlgorithm = encryptionAlgorithm; generateKeys(); } - + @VisibleForTesting public synchronized void setSerialNo(int serialNo) { // we mod the serial number by the range and then add that times the index this.serialNo = (serialNo % intRange) + (nnRangeStart); } - + public void setBlockPoolId(String blockPoolId) { this.blockPoolId = blockPoolId; } @@ -180,7 +181,7 @@ public class BlockTokenSecretManager extends } /** - * Set block keys, only to be used in slave mode + * Set block keys, only to be used in worker mode */ public synchronized void addKeys(ExportedBlockKeys exportedKeys) throws IOException { @@ -324,7 +325,7 @@ public class BlockTokenSecretManager extends /** * Create an empty block token identifier - * + * * @return a newly created empty block token identifier */ @Override @@ -334,7 +335,7 @@ public class BlockTokenSecretManager extends /** * Create a new password/secret for the given block token identifier. - * + * * @param identifier * the block token identifier * @return token password/secret @@ -357,7 +358,7 @@ public class BlockTokenSecretManager extends /** * Look up the token password/secret for the given block token identifier. - * + * * @param identifier * the block token identifier to look up * @return token password/secret as byte[] @@ -381,11 +382,11 @@ public class BlockTokenSecretManager extends } return createPassword(identifier.getBytes(), key.getKey()); } - + /** * Generate a data encryption key for this block pool, using the current * BlockKey. - * + * * @return a data encryption key which may be used to encrypt traffic * over the DataTransferProtocol */ @@ -401,10 +402,10 @@ public class BlockTokenSecretManager extends encryptionKey, Time.now() + tokenLifetime, encryptionAlgorithm); } - + /** * Recreate an encryption key based on the given key id and nonce. - * + * * @param keyId identifier of the secret key used to generate the encryption key. * @param nonce random value used to create the encryption key * @return the encryption key which corresponds to this (keyId, blockPoolId, nonce) @@ -423,7 +424,7 @@ public class BlockTokenSecretManager extends } return createPassword(nonce, key.getKey()); } - + @VisibleForTesting public synchronized void setKeyUpdateIntervalForTesting(long millis) { this.keyUpdateInterval = millis; @@ -433,10 +434,10 @@ public class BlockTokenSecretManager extends public void clearAllKeysForTesting() { allKeys.clear(); } - + @VisibleForTesting public synchronized int getSerialNoForTesting() { return serialNo; } - + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/overview.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/overview.html index 759c093aa59..e6636d74a91 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/overview.html +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/overview.html @@ -23,33 +23,33 @@ Hadoop is a distributed computing platform. -

    Hadoop primarily consists of the Hadoop Distributed FileSystem -(HDFS) and an +

    Hadoop primarily consists of the Hadoop Distributed FileSystem +(HDFS) and an implementation of the Map-Reduce programming paradigm.

    -

    Hadoop is a software framework that lets one easily write and run applications +

    Hadoop is a software framework that lets one easily write and run applications that process vast amounts of data. Here's what makes Hadoop especially useful:

    +

    Requirements

    @@ -61,15 +61,15 @@ that process vast amounts of data. Here's what makes Hadoop especially useful:
  • Windows is also a supported platform. -
  • + - +

    Requisite Software

    1. - Java 1.6.x, preferably from - Sun. + Java 1.6.x, preferably from + Sun. Set JAVA_HOME to the root of your Java installation.
    2. @@ -141,8 +141,8 @@ host and port. This is specified with the configuration property href="../mapred-default.html#mapred.job.tracker">mapred.job.tracker.
    3. -
    4. A slaves file that lists the names of all the hosts in -the cluster. The default slaves file is conf/slaves. +
    5. A workers file that lists the names of all the hosts in +the cluster. The default workers file is conf/workers.
    @@ -242,31 +242,31 @@ as master.example.com:port in conf/mapred-site.xm
  • Directories for dfs.name.dir and -dfs.data.dir +dfs.data.dir in conf/hdfs-site.xml. These are local directories used to hold distributed filesystem -data on the master node and slave nodes respectively. Note +data on the master node and worker nodes respectively. Note that dfs.data.dir may contain a space- or comma-separated list of directory names, so that data may be stored on multiple local devices.
  • mapred.local.dir - in conf/mapred-site.xml, the local directory where temporary + in conf/mapred-site.xml, the local directory where temporary MapReduce data is stored. It also may be a list of directories.
  • mapred.map.tasks and mapred.reduce.tasks +href="../mapred-default.html#mapred.reduce.tasks">mapred.reduce.tasks in conf/mapred-site.xml. As a rule of thumb, use 10x the -number of slave processors for mapred.map.tasks, and 2x the -number of slave processors for mapred.reduce.tasks.
  • +number of worker processors for mapred.map.tasks, and 2x the +number of worker processors for mapred.reduce.tasks. -

    Finally, list all slave hostnames or IP addresses in your -conf/slaves file, one per line. Then format your filesystem +

    Finally, list all worker hostnames or IP addresses in your +conf/workers file, one per line. Then format your filesystem and start your cluster on your master node, as above. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md index 99a41a29010..0ed5df8aae2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md @@ -202,7 +202,7 @@ To stop the cluster run the following command: [hdfs]$ $HADOOP_HOME/sbin/stop-dfs.sh -These commands can be run from any node where the HDFS configuration is available. The command uses the configuration to determine the Namenodes in the cluster and then starts the Namenode process on those nodes. The Datanodes are started on the nodes specified in the `slaves` file. The script can be used as a reference for building your own scripts to start and stop the cluster. +These commands can be run from any node where the HDFS configuration is available. The command uses the configuration to determine the Namenodes in the cluster and then starts the Namenode process on those nodes. The Datanodes are started on the nodes specified in the `workers` file. The script can be used as a reference for building your own scripts to start and stop the cluster. ### Balancer diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md index 45498246b87..df2db496d1e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md @@ -70,7 +70,7 @@ The HDFS High Availability feature addresses the above problems by providing the Architecture ------------ -In a typical HA cluster, two or more separate machines are configured as NameNodes. At any point in time, exactly one of the NameNodes is in an *Active* state, and the others are in a *Standby* state. The Active NameNode is responsible for all client operations in the cluster, while the Standbys are simply acting as slaves, maintaining enough state to provide a fast failover if necessary. +In a typical HA cluster, two or more separate machines are configured as NameNodes. At any point in time, exactly one of the NameNodes is in an *Active* state, and the others are in a *Standby* state. The Active NameNode is responsible for all client operations in the cluster, while the Standbys are simply acting as workers, maintaining enough state to provide a fast failover if necessary. In order for the Standby node to keep its state synchronized with the Active node, both nodes communicate with a group of separate daemons called "JournalNodes" (JNs). When any namespace modification is performed by the Active node, it durably logs a record of the modification to a majority of these JNs. The Standby node is capable of reading the edits from the JNs, and is constantly watching them for changes to the edit log. As the Standby Node sees the edits, it applies them to its own namespace. In the event of a failover, the Standby will ensure that it has read all of the edits from the JounalNodes before promoting itself to the Active state. This ensures that the namespace state is fully synchronized before a failover occurs. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java index 0783eb50f84..65d43292f93 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java @@ -32,79 +32,79 @@ import org.apache.hadoop.mapreduce.MRJobConfig; /** * Distribute application-specific large, read-only files efficiently. - * + * *

    DistributedCache is a facility provided by the Map-Reduce * framework to cache files (text, archives, jars etc.) needed by applications. *

    - * - *

    Applications specify the files, via urls (hdfs:// or http://) to be cached + * + *

    Applications specify the files, via urls (hdfs:// or http://) to be cached * via the {@link org.apache.hadoop.mapred.JobConf}. The * DistributedCache assumes that the files specified via urls are * already present on the {@link FileSystem} at the path specified by the url * and are accessible by every machine in the cluster.

    - * - *

    The framework will copy the necessary files on to the slave node before - * any tasks for the job are executed on that node. Its efficiency stems from - * the fact that the files are only copied once per job and the ability to - * cache archives which are un-archived on the slaves.

    + * + *

    The framework will copy the necessary files on to the worker node before + * any tasks for the job are executed on that node. Its efficiency stems from + * the fact that the files are only copied once per job and the ability to + * cache archives which are un-archived on the workers.

    * *

    DistributedCache can be used to distribute simple, read-only - * data/text files and/or more complex types such as archives, jars etc. - * Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. - * Jars may be optionally added to the classpath of the tasks, a rudimentary + * data/text files and/or more complex types such as archives, jars etc. + * Archives (zip, tar and tgz/tar.gz files) are un-archived at the worker nodes. + * Jars may be optionally added to the classpath of the tasks, a rudimentary * software distribution mechanism. Files have execution permissions. * In older version of Hadoop Map/Reduce users could optionally ask for symlinks - * to be created in the working directory of the child task. In the current - * version symlinks are always created. If the URL does not have a fragment - * the name of the file or directory will be used. If multiple files or + * to be created in the working directory of the child task. In the current + * version symlinks are always created. If the URL does not have a fragment + * the name of the file or directory will be used. If multiple files or * directories map to the same link name, the last one added, will be used. All * others will not even be downloaded.

    - * - *

    DistributedCache tracks modification timestamps of the cache - * files. Clearly the cache files should not be modified by the application + * + *

    DistributedCache tracks modification timestamps of the cache + * files. Clearly the cache files should not be modified by the application * or externally while the job is executing.

    - * - *

    Here is an illustrative example on how to use the + * + *

    Here is an illustrative example on how to use the * DistributedCache:

    *

      *     // Setting up the cache for the application
    - *     
    + *
      *     1. Copy the requisite files to the FileSystem:
    - *     
    - *     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
    - *     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
    + *
    + *     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat
    + *     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip
      *     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
      *     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
      *     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
      *     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
    - *     
    + *
      *     2. Setup the application's JobConf:
    - *     
    + *
      *     JobConf job = new JobConf();
    - *     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
    + *     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"),
      *                                   job);
      *     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
      *     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
      *     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
      *     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
      *     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
    - *     
    + *
      *     3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
      *     or {@link org.apache.hadoop.mapred.Reducer}:
    - *     
    - *     public static class MapClass extends MapReduceBase  
    + *
    + *     public static class MapClass extends MapReduceBase
      *     implements Mapper<K, V, K, V> {
    - *     
    + *
      *       private Path[] localArchives;
      *       private Path[] localFiles;
    - *       
    + *
      *       public void configure(JobConf job) {
      *         // Get the cached archives/files
      *         File f = new File("./map.zip/some/file/in/zip.txt");
      *       }
    - *       
    - *       public void map(K key, V value, 
    - *                       OutputCollector<K, V> output, Reporter reporter) 
    + *
    + *       public void map(K key, V value,
    + *                       OutputCollector<K, V> output, Reporter reporter)
      *       throws IOException {
      *         // Use data from the cached archives/files here
      *         // ...
    @@ -112,7 +112,7 @@ import org.apache.hadoop.mapreduce.MRJobConfig;
      *         output.collect(k, v);
      *       }
      *     }
    - *     
    + *
      * 
    * * It is also very common to use the DistributedCache by using diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java index d4d6c6e38f7..0c43633c1f9 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java @@ -34,79 +34,79 @@ import java.net.URI; /** * Distribute application-specific large, read-only files efficiently. - * + * *

    DistributedCache is a facility provided by the Map-Reduce * framework to cache files (text, archives, jars etc.) needed by applications. *

    - * - *

    Applications specify the files, via urls (hdfs:// or http://) to be cached + * + *

    Applications specify the files, via urls (hdfs:// or http://) to be cached * via the {@link org.apache.hadoop.mapred.JobConf}. The * DistributedCache assumes that the files specified via urls are * already present on the {@link FileSystem} at the path specified by the url * and are accessible by every machine in the cluster.

    - * - *

    The framework will copy the necessary files on to the slave node before - * any tasks for the job are executed on that node. Its efficiency stems from - * the fact that the files are only copied once per job and the ability to - * cache archives which are un-archived on the slaves.

    + * + *

    The framework will copy the necessary files on to the worker node before + * any tasks for the job are executed on that node. Its efficiency stems from + * the fact that the files are only copied once per job and the ability to + * cache archives which are un-archived on the workers.

    * *

    DistributedCache can be used to distribute simple, read-only - * data/text files and/or more complex types such as archives, jars etc. - * Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. - * Jars may be optionally added to the classpath of the tasks, a rudimentary + * data/text files and/or more complex types such as archives, jars etc. + * Archives (zip, tar and tgz/tar.gz files) are un-archived at the worker nodes. + * Jars may be optionally added to the classpath of the tasks, a rudimentary * software distribution mechanism. Files have execution permissions. * In older version of Hadoop Map/Reduce users could optionally ask for symlinks - * to be created in the working directory of the child task. In the current - * version symlinks are always created. If the URL does not have a fragment - * the name of the file or directory will be used. If multiple files or + * to be created in the working directory of the child task. In the current + * version symlinks are always created. If the URL does not have a fragment + * the name of the file or directory will be used. If multiple files or * directories map to the same link name, the last one added, will be used. All * others will not even be downloaded.

    - * - *

    DistributedCache tracks modification timestamps of the cache - * files. Clearly the cache files should not be modified by the application + * + *

    DistributedCache tracks modification timestamps of the cache + * files. Clearly the cache files should not be modified by the application * or externally while the job is executing.

    - * - *

    Here is an illustrative example on how to use the + * + *

    Here is an illustrative example on how to use the * DistributedCache:

    *

      *     // Setting up the cache for the application
    - *     
    + *
      *     1. Copy the requisite files to the FileSystem:
    - *     
    - *     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
    - *     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
    + *
    + *     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat
    + *     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip
      *     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
      *     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
      *     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
      *     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
    - *     
    + *
      *     2. Setup the application's JobConf:
    - *     
    + *
      *     JobConf job = new JobConf();
    - *     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
    + *     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"),
      *                                   job);
      *     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
      *     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
      *     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
      *     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
      *     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
    - *     
    + *
      *     3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
      *     or {@link org.apache.hadoop.mapred.Reducer}:
    - *     
    - *     public static class MapClass extends MapReduceBase  
    + *
    + *     public static class MapClass extends MapReduceBase
      *     implements Mapper<K, V, K, V> {
    - *     
    + *
      *       private Path[] localArchives;
      *       private Path[] localFiles;
    - *       
    + *
      *       public void configure(JobConf job) {
      *         // Get the cached archives/files
      *         File f = new File("./map.zip/some/file/in/zip.txt");
      *       }
    - *       
    - *       public void map(K key, V value, 
    - *                       OutputCollector<K, V> output, Reporter reporter) 
    + *
    + *       public void map(K key, V value,
    + *                       OutputCollector<K, V> output, Reporter reporter)
      *       throws IOException {
      *         // Use data from the cached archives/files here
      *         // ...
    @@ -114,7 +114,7 @@ import java.net.URI;
      *         output.collect(k, v);
      *       }
      *     }
    - *     
    + *
      * 
    * * It is also very common to use the DistributedCache by using @@ -281,7 +281,7 @@ public class DistributedCache { conf.set(MRJobConfig.CACHE_ARCHIVES, archives == null ? uri.toString() : archives + "," + uri.toString()); } - + /** * Add a file to be localized to the conf. The localized file will be * downloaded to the execution node(s), and a link will created to the @@ -370,7 +370,7 @@ public class DistributedCache { /** * Get the file entries in classpath as an array of Path. * Used by internal DistributedCache code. - * + * * @param conf Configuration that contains the classpath setting * @deprecated Use {@link JobContext#getFileClassPaths()} instead * @see JobContext#getFileClassPaths() @@ -379,8 +379,8 @@ public class DistributedCache { public static Path[] getFileClassPaths(Configuration conf) { ArrayList list = (ArrayList)conf.getStringCollection( MRJobConfig.CLASSPATH_FILES); - if (list.size() == 0) { - return null; + if (list.size() == 0) { + return null; } Path[] paths = new Path[list.size()]; for (int i = 0; i < list.size(); i++) { @@ -392,7 +392,7 @@ public class DistributedCache { /** * Add an archive path to the current set of classpath entries. It adds the * archive to cache as well. Intended to be used by user code. - * + * * @param archive Path of the archive to be added * @param conf Configuration that contains the classpath setting * @deprecated Use {@link Job#addArchiveToClassPath(Path)} instead @@ -426,7 +426,7 @@ public class DistributedCache { /** * Get the archive entries in classpath as an array of Path. * Used by internal DistributedCache code. - * + * * @param conf Configuration that contains the classpath setting * @deprecated Use {@link JobContext#getArchiveClassPaths()} instead * @see JobContext#getArchiveClassPaths() @@ -435,8 +435,8 @@ public class DistributedCache { public static Path[] getArchiveClassPaths(Configuration conf) { ArrayList list = (ArrayList)conf.getStringCollection( MRJobConfig.CLASSPATH_ARCHIVES); - if (list.size() == 0) { - return null; + if (list.size() == 0) { + return null; } Path[] paths = new Path[list.size()]; for (int i = 0; i < list.size(); i++) { @@ -449,13 +449,13 @@ public class DistributedCache { * Originally intended to enable symlinks, but currently symlinks cannot be * disabled. This is a NO-OP. * @param conf the jobconf - * @deprecated This is a NO-OP. + * @deprecated This is a NO-OP. */ @Deprecated public static void createSymlink(Configuration conf){ //NOOP } - + /** * Originally intended to check if symlinks should be used, but currently * symlinks cannot be disabled. @@ -480,29 +480,29 @@ public class DistributedCache { } /** - * Get the booleans on whether the files are public or not. Used by + * Get the booleans on whether the files are public or not. Used by * internal DistributedCache and MapReduce code. * @param conf The configuration which stored the timestamps - * @return a string array of booleans + * @return a string array of booleans */ public static boolean[] getFileVisibilities(Configuration conf) { return parseBooleans(conf.getStrings(MRJobConfig.CACHE_FILE_VISIBILITIES)); } /** - * Get the booleans on whether the archives are public or not. Used by + * Get the booleans on whether the archives are public or not. Used by * internal DistributedCache and MapReduce code. * @param conf The configuration which stored the timestamps - * @return a string array of booleans + * @return a string array of booleans */ public static boolean[] getArchiveVisibilities(Configuration conf) { return parseBooleans(conf.getStrings(MRJobConfig.CACHE_ARCHIVES_VISIBILITIES)); } /** - * This method checks if there is a conflict in the fragment names - * of the uris. Also makes sure that each uri has a fragment. It - * is only to be called if you want to create symlinks for + * This method checks if there is a conflict in the fragment names + * of the uris. Also makes sure that each uri has a fragment. It + * is only to be called if you want to create symlinks for * the various archives and files. May be used by user code. * @param uriFiles The uri array of urifiles * @param uriArchives the uri array of uri archives @@ -514,7 +514,7 @@ public class DistributedCache { // check if fragment is null for any uri // also check if there are any conflicts in fragment names Set fragments = new HashSet(); - + // iterate over file uris if (uriFiles != null) { for (int i = 0; i < uriFiles.length; i++) { @@ -529,7 +529,7 @@ public class DistributedCache { fragments.add(lowerCaseFragment); } } - + // iterate over archive uris if (uriArchives != null) { for (int i = 0; i < uriArchives.length; i++) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapReduceTutorial.md b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapReduceTutorial.md index 16f3afb9880..1d5b7f2b114 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapReduceTutorial.md +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapReduceTutorial.md @@ -85,11 +85,11 @@ A MapReduce *job* usually splits the input data-set into independent chunks whic Typically the compute nodes and the storage nodes are the same, that is, the MapReduce framework and the Hadoop Distributed File System (see [HDFS Architecture Guide](../../hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)) are running on the same set of nodes. This configuration allows the framework to effectively schedule tasks on the nodes where data is already present, resulting in very high aggregate bandwidth across the cluster. -The MapReduce framework consists of a single master `ResourceManager`, one slave `NodeManager` per cluster-node, and `MRAppMaster` per application (see [YARN Architecture Guide](../../hadoop-yarn/hadoop-yarn-site/YARN.html)). +The MapReduce framework consists of a single master `ResourceManager`, one worker `NodeManager` per cluster-node, and `MRAppMaster` per application (see [YARN Architecture Guide](../../hadoop-yarn/hadoop-yarn-site/YARN.html)). Minimally, applications specify the input/output locations and supply *map* and *reduce* functions via implementations of appropriate interfaces and/or abstract-classes. These, and other job parameters, comprise the *job configuration*. -The Hadoop *job client* then submits the job (jar/executable etc.) and configuration to the `ResourceManager` which then assumes the responsibility of distributing the software/configuration to the slaves, scheduling tasks and monitoring them, providing status and diagnostic information to the job-client. +The Hadoop *job client* then submits the job (jar/executable etc.) and configuration to the `ResourceManager` which then assumes the responsibility of distributing the software/configuration to the workers, scheduling tasks and monitoring them, providing status and diagnostic information to the job-client. Although the Hadoop framework is implemented in Java™, MapReduce applications need not be written in Java. @@ -213,10 +213,10 @@ Sample text-files as input: $ bin/hadoop fs -ls /user/joe/wordcount/input/ /user/joe/wordcount/input/file01 /user/joe/wordcount/input/file02 - + $ bin/hadoop fs -cat /user/joe/wordcount/input/file01 Hello World Bye World - + $ bin/hadoop fs -cat /user/joe/wordcount/input/file02 Hello Hadoop Goodbye Hadoop @@ -787,11 +787,11 @@ or Counters.incrCounter(String, String, long) in the `map` and/or `reduce` metho Applications specify the files to be cached via urls (hdfs://) in the `Job`. The `DistributedCache` assumes that the files specified via hdfs:// urls are already present on the `FileSystem`. -The framework will copy the necessary files to the slave node before any tasks for the job are executed on that node. Its efficiency stems from the fact that the files are only copied once per job and the ability to cache archives which are un-archived on the slaves. +The framework will copy the necessary files to the worker node before any tasks for the job are executed on that node. Its efficiency stems from the fact that the files are only copied once per job and the ability to cache archives which are un-archived on the workers. `DistributedCache` tracks the modification timestamps of the cached files. Clearly the cache files should not be modified by the application or externally while the job is executing. -`DistributedCache` can be used to distribute simple, read-only data/text files and more complex types such as archives and jars. Archives (zip, tar, tgz and tar.gz files) are *un-archived* at the slave nodes. Files have *execution permissions* set. +`DistributedCache` can be used to distribute simple, read-only data/text files and more complex types such as archives and jars. Archives (zip, tar, tgz and tar.gz files) are *un-archived* at the worker nodes. Files have *execution permissions* set. The files/archives can be distributed by setting the property `mapreduce.job.cache.{files |archives}`. If more than one file/archive has to be distributed, they can be added as comma separated paths. The properties can also be set by APIs [Job.addCacheFile(URI)](../../api/org/apache/hadoop/mapreduce/Job.html)/ @@ -808,12 +808,12 @@ api can be used to cache files/jars and also add them to the *classpath* of chil ##### Private and Public DistributedCache Files -DistributedCache files can be private or public, that determines how they can be shared on the slave nodes. +DistributedCache files can be private or public, that determines how they can be shared on the worker nodes. * "Private" DistributedCache files are cached in a localdirectory private to the user whose jobs need these files. These files are shared by all tasks and jobs of the specific user only and cannot be accessed by jobs of - other users on the slaves. A DistributedCache file becomes private by + other users on the workers. A DistributedCache file becomes private by virtue of its permissions on the file system where the files are uploaded, typically HDFS. If the file has no world readable access, or if the directory path leading to the file has no world executable access for @@ -821,7 +821,7 @@ DistributedCache files can be private or public, that determines how they can be * "Public" DistributedCache files are cached in a global directory and the file access is setup such that they are publicly visible to all users. - These files can be shared by tasks and jobs of all users on the slaves. A + These files can be shared by tasks and jobs of all users on the workers. A DistributedCache file becomes public by virtue of its permissions on the file system where the files are uploaded, typically HDFS. If the file has world readable access, AND if the directory path leading to the file has @@ -1076,10 +1076,10 @@ Sample text-files as input: $ bin/hadoop fs -ls /user/joe/wordcount/input/ /user/joe/wordcount/input/file01 /user/joe/wordcount/input/file02 - + $ bin/hadoop fs -cat /user/joe/wordcount/input/file01 Hello World, Bye World! - + $ bin/hadoop fs -cat /user/joe/wordcount/input/file02 Hello Hadoop, Goodbye to hadoop. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/ReliabilityTest.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/ReliabilityTest.java index ecac83af5b3..983a4a7e67b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/ReliabilityTest.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/ReliabilityTest.java @@ -43,7 +43,7 @@ import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; /** - * This class tests reliability of the framework in the face of failures of + * This class tests reliability of the framework in the face of failures of * both tasks and tasktrackers. Steps: * 1) Get the cluster status * 2) Get the number of slots in the cluster @@ -59,12 +59,12 @@ import org.apache.hadoop.util.ToolRunner; * ./bin/hadoop --config jar * build/hadoop--test.jar MRReliabilityTest -libjars * build/hadoop--examples.jar [-scratchdir ]" - * - * The scratchdir is optional and by default the current directory on the client - * will be used as the scratch space. Note that password-less SSH must be set up - * between the client machine from where the test is submitted, and the cluster - * nodes where the test runs. - * + * + * The scratchdir is optional and by default the current directory on + * the client will be used as the scratch space. Note that password-less + * SSH must be set up between the client machine from where the test is + * submitted, and the cluster nodes where the test runs. + * * The test should be run on a free cluster where there is no other parallel * job submission going on. Submission of other jobs while the test runs can cause * the tests/jobs submitted to fail. @@ -73,7 +73,7 @@ import org.apache.hadoop.util.ToolRunner; public class ReliabilityTest extends Configured implements Tool { private String dir; - private static final Log LOG = LogFactory.getLog(ReliabilityTest.class); + private static final Log LOG = LogFactory.getLog(ReliabilityTest.class); private void displayUsage() { LOG.info("This must be run in only the distributed mode " + @@ -88,13 +88,13 @@ public class ReliabilityTest extends Configured implements Tool { " any job submission while the tests are running can cause jobs/tests to fail"); System.exit(-1); } - + public int run(String[] args) throws Exception { Configuration conf = getConf(); if ("local".equals(conf.get(JTConfig.JT_IPC_ADDRESS, "local"))) { displayUsage(); } - String[] otherArgs = + String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length == 2) { if (otherArgs[0].equals("-scratchdir")) { @@ -108,7 +108,7 @@ public class ReliabilityTest extends Configured implements Tool { } else { displayUsage(); } - + //to protect against the case of jobs failing even when multiple attempts //fail, set some high values for the max attempts conf.setInt(JobContext.MAP_MAX_ATTEMPTS, 10); @@ -117,26 +117,26 @@ public class ReliabilityTest extends Configured implements Tool { runSortJobTests(new JobClient(new JobConf(conf)), conf); return 0; } - - private void runSleepJobTest(final JobClient jc, final Configuration conf) + + private void runSleepJobTest(final JobClient jc, final Configuration conf) throws Exception { ClusterStatus c = jc.getClusterStatus(); int maxMaps = c.getMaxMapTasks() * 2; int maxReduces = maxMaps; int mapSleepTime = (int)c.getTTExpiryInterval(); int reduceSleepTime = mapSleepTime; - String[] sleepJobArgs = new String[] { - "-m", Integer.toString(maxMaps), + String[] sleepJobArgs = new String[] { + "-m", Integer.toString(maxMaps), "-r", Integer.toString(maxReduces), "-mt", Integer.toString(mapSleepTime), "-rt", Integer.toString(reduceSleepTime)}; - runTest(jc, conf, "org.apache.hadoop.mapreduce.SleepJob", sleepJobArgs, + runTest(jc, conf, "org.apache.hadoop.mapreduce.SleepJob", sleepJobArgs, new KillTaskThread(jc, 2, 0.2f, false, 2), new KillTrackerThread(jc, 2, 0.4f, false, 1)); LOG.info("SleepJob done"); } - - private void runSortJobTests(final JobClient jc, final Configuration conf) + + private void runSortJobTests(final JobClient jc, final Configuration conf) throws Exception { String inputPath = "my_reliability_test_input"; String outputPath = "my_reliability_test_output"; @@ -147,36 +147,36 @@ public class ReliabilityTest extends Configured implements Tool { runSortTest(jc, conf, inputPath, outputPath); runSortValidatorTest(jc, conf, inputPath, outputPath); } - - private void runRandomWriterTest(final JobClient jc, - final Configuration conf, final String inputPath) + + private void runRandomWriterTest(final JobClient jc, + final Configuration conf, final String inputPath) throws Exception { - runTest(jc, conf, "org.apache.hadoop.examples.RandomWriter", - new String[]{inputPath}, + runTest(jc, conf, "org.apache.hadoop.examples.RandomWriter", + new String[]{inputPath}, null, new KillTrackerThread(jc, 0, 0.4f, false, 1)); LOG.info("RandomWriter job done"); } - + private void runSortTest(final JobClient jc, final Configuration conf, - final String inputPath, final String outputPath) + final String inputPath, final String outputPath) throws Exception { - runTest(jc, conf, "org.apache.hadoop.examples.Sort", + runTest(jc, conf, "org.apache.hadoop.examples.Sort", new String[]{inputPath, outputPath}, new KillTaskThread(jc, 2, 0.2f, false, 2), new KillTrackerThread(jc, 2, 0.8f, false, 1)); LOG.info("Sort job done"); } - - private void runSortValidatorTest(final JobClient jc, + + private void runSortValidatorTest(final JobClient jc, final Configuration conf, final String inputPath, final String outputPath) throws Exception { runTest(jc, conf, "org.apache.hadoop.mapred.SortValidator", new String[] { "-sortInput", inputPath, "-sortOutput", outputPath}, new KillTaskThread(jc, 2, 0.2f, false, 1), - new KillTrackerThread(jc, 2, 0.8f, false, 1)); - LOG.info("SortValidator job done"); + new KillTrackerThread(jc, 2, 0.8f, false, 1)); + LOG.info("SortValidator job done"); } - + private String normalizeCommandPath(String command) { final String hadoopHome; if ((hadoopHome = System.getenv("HADOOP_HOME")) != null) { @@ -184,7 +184,7 @@ public class ReliabilityTest extends Configured implements Tool { } return command; } - + private void checkJobExitStatus(int status, String jobName) { if (status != 0) { LOG.info(jobName + " job failed with status: " + status); @@ -203,7 +203,7 @@ public class ReliabilityTest extends Configured implements Tool { public void run() { try { Class jobClassObj = conf.getClassByName(jobClass); - int status = ToolRunner.run(conf, (Tool)(jobClassObj.newInstance()), + int status = ToolRunner.run(conf, (Tool)(jobClassObj.newInstance()), args); checkJobExitStatus(status, jobClass); } catch (Exception e) { @@ -223,7 +223,8 @@ public class ReliabilityTest extends Configured implements Tool { JobID jobId = jobs[jobs.length - 1].getJobID(); RunningJob rJob = jc.getJob(jobId); if(rJob.isComplete()) { - LOG.error("The last job returned by the querying JobTracker is complete :" + + LOG.error("The last job returned by the querying " + +"JobTracker is complete :" + rJob.getJobID() + " .Exiting the test"); System.exit(-1); } @@ -246,7 +247,7 @@ public class ReliabilityTest extends Configured implements Tool { } t.join(); } - + private class KillTrackerThread extends Thread { private volatile boolean killed = false; private JobClient jc; @@ -255,14 +256,14 @@ public class ReliabilityTest extends Configured implements Tool { private float threshold = 0.2f; private boolean onlyMapsProgress; private int numIterations; - final private String slavesFile = dir + "/_reliability_test_slaves_file_"; - final String shellCommand = normalizeCommandPath("bin/slaves.sh"); - final private String STOP_COMMAND = "ps uwwx | grep java | grep " + - "org.apache.hadoop.mapred.TaskTracker"+ " |" + - " grep -v grep | tr -s ' ' | cut -d ' ' -f2 | xargs kill -s STOP"; - final private String RESUME_COMMAND = "ps uwwx | grep java | grep " + - "org.apache.hadoop.mapred.TaskTracker"+ " |" + - " grep -v grep | tr -s ' ' | cut -d ' ' -f2 | xargs kill -s CONT"; + final private String workersFile = dir + "/_reliability_test_workers_file_"; + final private String shellCommand = normalizeCommandPath("bin/workers.sh"); + final private String stopCommand = "ps uwwx | grep java | grep " + + "org.apache.hadoop.mapred.TaskTracker"+ " |" + + " grep -v grep | tr -s ' ' | cut -d ' ' -f2 | xargs kill -s STOP"; + final private String resumeCommand = "ps uwwx | grep java | grep " + + "org.apache.hadoop.mapred.TaskTracker"+ " |" + + " grep -v grep | tr -s ' ' | cut -d ' ' -f2 | xargs kill -s CONT"; //Only one instance must be active at any point public KillTrackerThread(JobClient jc, int threshaldMultiplier, float threshold, boolean onlyMapsProgress, int numIterations) { @@ -293,8 +294,8 @@ public class ReliabilityTest extends Configured implements Tool { LOG.info("Will STOP/RESUME tasktrackers based on " + "Reduces' progress"); } - LOG.info("Initial progress threshold: " + threshold + - ". Threshold Multiplier: " + thresholdMultiplier + + LOG.info("Initial progress threshold: " + threshold + + ". Threshold Multiplier: " + thresholdMultiplier + ". Number of iterations: " + numIterations); float thresholdVal = threshold; int numIterationsDone = 0; @@ -336,7 +337,7 @@ public class ReliabilityTest extends Configured implements Tool { int count = 0; - FileOutputStream fos = new FileOutputStream(new File(slavesFile)); + FileOutputStream fos = new FileOutputStream(new File(workersFile)); LOG.info(new Date() + " Stopping a few trackers"); for (String tracker : trackerNamesList) { @@ -355,17 +356,17 @@ public class ReliabilityTest extends Configured implements Tool { private void startTaskTrackers() throws Exception { LOG.info(new Date() + " Resuming the stopped trackers"); runOperationOnTT("resume"); - new File(slavesFile).delete(); + new File(workersFile).delete(); } - + private void runOperationOnTT(String operation) throws IOException { Map hMap = new HashMap(); - hMap.put("HADOOP_SLAVES", slavesFile); + hMap.put("HADOOP_WORKERS", workersFile); StringTokenizer strToken; if (operation.equals("suspend")) { - strToken = new StringTokenizer(STOP_COMMAND, " "); + strToken = new StringTokenizer(stopCommand, " "); } else { - strToken = new StringTokenizer(RESUME_COMMAND, " "); + strToken = new StringTokenizer(resumeCommand, " "); } String commandArgs[] = new String[strToken.countTokens() + 1]; int i = 0; @@ -382,14 +383,14 @@ public class ReliabilityTest extends Configured implements Tool { private String convertTrackerNameToHostName(String trackerName) { // Convert the trackerName to it's host name int indexOfColon = trackerName.indexOf(":"); - String trackerHostName = (indexOfColon == -1) ? - trackerName : + String trackerHostName = (indexOfColon == -1) ? + trackerName : trackerName.substring(0, indexOfColon); return trackerHostName.substring("tracker_".length()); } } - + private class KillTaskThread extends Thread { private volatile boolean killed = false; @@ -399,7 +400,7 @@ public class ReliabilityTest extends Configured implements Tool { private float threshold = 0.2f; private boolean onlyMapsProgress; private int numIterations; - public KillTaskThread(JobClient jc, int thresholdMultiplier, + public KillTaskThread(JobClient jc, int thresholdMultiplier, float threshold, boolean onlyMapsProgress, int numIterations) { this.jc = jc; this.thresholdMultiplier = thresholdMultiplier; @@ -427,15 +428,15 @@ public class ReliabilityTest extends Configured implements Tool { } else { LOG.info("Will kill tasks based on Reduces' progress"); } - LOG.info("Initial progress threshold: " + threshold + - ". Threshold Multiplier: " + thresholdMultiplier + + LOG.info("Initial progress threshold: " + threshold + + ". Threshold Multiplier: " + thresholdMultiplier + ". Number of iterations: " + numIterations); float thresholdVal = threshold; int numIterationsDone = 0; while (!killed) { try { float progress; - if (jc.getJob(rJob.getID()).isComplete() || + if (jc.getJob(rJob.getID()).isComplete() || numIterationsDone == numIterations) { break; } @@ -499,7 +500,7 @@ public class ReliabilityTest extends Configured implements Tool { } } } - + public static void main(String args[]) throws Exception { int res = ToolRunner.run(new Configuration(), new ReliabilityTest(), args); System.exit(res); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestLazyOutput.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestLazyOutput.java index dde9310607f..04a5127aaf2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestLazyOutput.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestLazyOutput.java @@ -44,11 +44,11 @@ import static org.junit.Assert.assertTrue; * 0 byte files */ public class TestLazyOutput { - private static final int NUM_HADOOP_SLAVES = 3; + private static final int NUM_HADOOP_WORKERS = 3; private static final int NUM_MAPS_PER_NODE = 2; - private static final Path INPUT = new Path("/testlazy/input"); + private static final Path INPUTPATH = new Path("/testlazy/input"); - private static final List input = + private static final List INPUTLIST = Arrays.asList("All","Roads","Lead","To","Hadoop"); @@ -70,7 +70,7 @@ public class TestLazyOutput { } } - static class TestReducer extends MapReduceBase + static class TestReducer extends MapReduceBase implements Reducer { private String id; @@ -93,12 +93,12 @@ public class TestLazyOutput { } private static void runTestLazyOutput(JobConf job, Path output, - int numReducers, boolean createLazily) + int numReducers, boolean createLazily) throws Exception { job.setJobName("test-lazy-output"); - FileInputFormat.setInputPaths(job, INPUT); + FileInputFormat.setInputPaths(job, INPUTPATH); FileOutputFormat.setOutputPath(job, output); job.setInputFormat(TextInputFormat.class); job.setMapOutputKeyClass(LongWritable.class); @@ -106,7 +106,7 @@ public class TestLazyOutput { job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); - job.setMapperClass(TestMapper.class); + job.setMapperClass(TestMapper.class); job.setReducerClass(TestReducer.class); JobClient client = new JobClient(job); @@ -123,10 +123,10 @@ public class TestLazyOutput { public void createInput(FileSystem fs, int numMappers) throws Exception { for (int i =0; i < numMappers; i++) { - OutputStream os = fs.create(new Path(INPUT, + OutputStream os = fs.create(new Path(INPUTPATH, "text" + i + ".txt")); Writer wr = new OutputStreamWriter(os); - for(String inp : input) { + for(String inp : INPUTLIST) { wr.write(inp+"\n"); } wr.close(); @@ -142,22 +142,23 @@ public class TestLazyOutput { Configuration conf = new Configuration(); // Start the mini-MR and mini-DFS clusters - dfs = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_HADOOP_SLAVES) + dfs = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_HADOOP_WORKERS) .build(); fileSys = dfs.getFileSystem(); - mr = new MiniMRCluster(NUM_HADOOP_SLAVES, fileSys.getUri().toString(), 1); + mr = new MiniMRCluster(NUM_HADOOP_WORKERS, + fileSys.getUri().toString(), 1); int numReducers = 2; - int numMappers = NUM_HADOOP_SLAVES * NUM_MAPS_PER_NODE; + int numMappers = NUM_HADOOP_WORKERS * NUM_MAPS_PER_NODE; createInput(fileSys, numMappers); Path output1 = new Path("/testlazy/output1"); - // Test 1. - runTestLazyOutput(mr.createJobConf(), output1, + // Test 1. + runTestLazyOutput(mr.createJobConf(), output1, numReducers, true); - Path[] fileList = + Path[] fileList = FileUtil.stat2Paths(fileSys.listStatus(output1, new Utils.OutputFileUtils.OutputFilesFilter())); for(int i=0; i < fileList.length; ++i) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/pipes/TestPipes.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/pipes/TestPipes.java index 34b1d75dfed..84b491a79ec 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/pipes/TestPipes.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/pipes/TestPipes.java @@ -53,18 +53,18 @@ import static org.junit.Assert.assertFalse; public class TestPipes { private static final Log LOG = LogFactory.getLog(TestPipes.class.getName()); - - private static Path cppExamples = + + private static Path cppExamples = new Path(System.getProperty("install.c++.examples")); - static Path wordCountSimple = + private static Path wordCountSimple = new Path(cppExamples, "bin/wordcount-simple"); - static Path wordCountPart = + private static Path wordCountPart = new Path(cppExamples, "bin/wordcount-part"); - static Path wordCountNoPipes = + private static Path wordCountNoPipes = new Path(cppExamples,"bin/wordcount-nopipe"); - + static Path nonPipedOutDir; - + static void cleanup(FileSystem fs, Path p) throws IOException { fs.delete(p, true); assertFalse("output not cleaned up", fs.exists(p)); @@ -80,15 +80,16 @@ public class TestPipes { Path inputPath = new Path("testing/in"); Path outputPath = new Path("testing/out"); try { - final int numSlaves = 2; + final int numWorkers = 2; Configuration conf = new Configuration(); - dfs = new MiniDFSCluster.Builder(conf).numDataNodes(numSlaves).build(); - mr = new MiniMRCluster(numSlaves, dfs.getFileSystem().getUri().toString(), 1); + dfs = new MiniDFSCluster.Builder(conf).numDataNodes(numWorkers).build(); + mr = new MiniMRCluster(numWorkers, + dfs.getFileSystem().getUri().toString(), 1); writeInputFile(dfs.getFileSystem(), inputPath); - runProgram(mr, dfs, wordCountSimple, + runProgram(mr, dfs, wordCountSimple, inputPath, outputPath, 3, 2, twoSplitOutput, null); cleanup(dfs.getFileSystem(), outputPath); - runProgram(mr, dfs, wordCountSimple, + runProgram(mr, dfs, wordCountSimple, inputPath, outputPath, 3, 0, noSortOutput, null); cleanup(dfs.getFileSystem(), outputPath); runProgram(mr, dfs, wordCountPart, @@ -104,41 +105,41 @@ public class TestPipes { final static String[] twoSplitOutput = new String[] { "`and\t1\na\t1\nand\t1\nbeginning\t1\nbook\t1\nbut\t1\nby\t1\n" + - "conversation?'\t1\ndo:\t1\nhad\t2\nhaving\t1\nher\t2\nin\t1\nit\t1\n"+ - "it,\t1\nno\t1\nnothing\t1\nof\t3\non\t1\nonce\t1\nor\t3\npeeped\t1\n"+ - "pictures\t2\nthe\t3\nthought\t1\nto\t2\nuse\t1\nwas\t2\n", + "conversation?'\t1\ndo:\t1\nhad\t2\nhaving\t1\nher\t2\nin\t1\nit\t1\n"+ + "it,\t1\nno\t1\nnothing\t1\nof\t3\non\t1\nonce\t1\nor\t3\npeeped\t1\n"+ + "pictures\t2\nthe\t3\nthought\t1\nto\t2\nuse\t1\nwas\t2\n", - "Alice\t2\n`without\t1\nbank,\t1\nbook,'\t1\nconversations\t1\nget\t1\n" + - "into\t1\nis\t1\nreading,\t1\nshe\t1\nsister\t2\nsitting\t1\ntired\t1\n" + - "twice\t1\nvery\t1\nwhat\t1\n" + "Alice\t2\n`without\t1\nbank,\t1\nbook,'\t1\nconversations\t1\nget\t1\n" + + "into\t1\nis\t1\nreading,\t1\nshe\t1\nsister\t2\nsitting\t1\ntired\t1\n" + + "twice\t1\nvery\t1\nwhat\t1\n" }; final static String[] noSortOutput = new String[] { "it,\t1\n`and\t1\nwhat\t1\nis\t1\nthe\t1\nuse\t1\nof\t1\na\t1\n" + - "book,'\t1\nthought\t1\nAlice\t1\n`without\t1\npictures\t1\nor\t1\n"+ - "conversation?'\t1\n", + "book,'\t1\nthought\t1\nAlice\t1\n`without\t1\npictures\t1\nor\t1\n"+ + "conversation?'\t1\n", - "Alice\t1\nwas\t1\nbeginning\t1\nto\t1\nget\t1\nvery\t1\ntired\t1\n"+ - "of\t1\nsitting\t1\nby\t1\nher\t1\nsister\t1\non\t1\nthe\t1\nbank,\t1\n"+ - "and\t1\nof\t1\nhaving\t1\nnothing\t1\nto\t1\ndo:\t1\nonce\t1\n", + "Alice\t1\nwas\t1\nbeginning\t1\nto\t1\nget\t1\nvery\t1\ntired\t1\n"+ + "of\t1\nsitting\t1\nby\t1\nher\t1\nsister\t1\non\t1\nthe\t1\nbank,\t1\n"+ + "and\t1\nof\t1\nhaving\t1\nnothing\t1\nto\t1\ndo:\t1\nonce\t1\n", - "or\t1\ntwice\t1\nshe\t1\nhad\t1\npeeped\t1\ninto\t1\nthe\t1\nbook\t1\n"+ - "her\t1\nsister\t1\nwas\t1\nreading,\t1\nbut\t1\nit\t1\nhad\t1\nno\t1\n"+ - "pictures\t1\nor\t1\nconversations\t1\nin\t1\n" + "or\t1\ntwice\t1\nshe\t1\nhad\t1\npeeped\t1\ninto\t1\nthe\t1\nbook\t1\n"+ + "her\t1\nsister\t1\nwas\t1\nreading,\t1\nbut\t1\nit\t1\nhad\t1\nno\t1\n"+ + "pictures\t1\nor\t1\nconversations\t1\nin\t1\n" }; - + final static String[] fixedPartitionOutput = new String[] { "Alice\t2\n`and\t1\n`without\t1\na\t1\nand\t1\nbank,\t1\nbeginning\t1\n" + - "book\t1\nbook,'\t1\nbut\t1\nby\t1\nconversation?'\t1\nconversations\t1\n"+ - "do:\t1\nget\t1\nhad\t2\nhaving\t1\nher\t2\nin\t1\ninto\t1\nis\t1\n" + - "it\t1\nit,\t1\nno\t1\nnothing\t1\nof\t3\non\t1\nonce\t1\nor\t3\n" + - "peeped\t1\npictures\t2\nreading,\t1\nshe\t1\nsister\t2\nsitting\t1\n" + - "the\t3\nthought\t1\ntired\t1\nto\t2\ntwice\t1\nuse\t1\n" + - "very\t1\nwas\t2\nwhat\t1\n", - - "" + "book\t1\nbook,'\t1\nbut\t1\nby\t1\nconversation?'\t1\nconversations\t1\n"+ + "do:\t1\nget\t1\nhad\t2\nhaving\t1\nher\t2\nin\t1\ninto\t1\nis\t1\n" + + "it\t1\nit,\t1\nno\t1\nnothing\t1\nof\t3\non\t1\nonce\t1\nor\t3\n" + + "peeped\t1\npictures\t2\nreading,\t1\nshe\t1\nsister\t2\nsitting\t1\n" + + "the\t3\nthought\t1\ntired\t1\nto\t2\ntwice\t1\nuse\t1\n" + + "very\t1\nwas\t2\nwhat\t1\n", + + "" }; - + static void writeInputFile(FileSystem fs, Path dir) throws IOException { DataOutputStream out = fs.create(new Path(dir, "part0")); out.writeBytes("Alice was beginning to get very tired of sitting by her\n"); @@ -150,7 +151,7 @@ public class TestPipes { out.close(); } - static void runProgram(MiniMRCluster mr, MiniDFSCluster dfs, + static void runProgram(MiniMRCluster mr, MiniDFSCluster dfs, Path program, Path inputPath, Path outputPath, int numMaps, int numReduces, String[] expectedResults, JobConf conf @@ -161,13 +162,13 @@ public class TestPipes { job = mr.createJobConf(); }else { job = new JobConf(conf); - } + } job.setNumMapTasks(numMaps); job.setNumReduceTasks(numReduces); { FileSystem fs = dfs.getFileSystem(); fs.delete(wordExec.getParent(), true); - fs.copyFromLocalFile(program, wordExec); + fs.copyFromLocalFile(program, wordExec); Submitter.setExecutable(job, fs.makeQualified(wordExec).toString()); Submitter.setIsJavaRecordReader(job, true); Submitter.setIsJavaRecordWriter(job, true); @@ -176,7 +177,7 @@ public class TestPipes { RunningJob rJob = null; if (numReduces == 0) { rJob = Submitter.jobSubmit(job); - + while (!rJob.isComplete()) { try { Thread.sleep(1000); @@ -188,7 +189,7 @@ public class TestPipes { rJob = Submitter.runJob(job); } assertTrue("pipes job failed", rJob.isSuccessful()); - + Counters counters = rJob.getCounters(); Counters.Group wordCountCounters = counters.getGroup("WORDCOUNT"); int numCounters = 0; @@ -205,14 +206,14 @@ public class TestPipes { .OutputFilesFilter()))) { results.add(MapReduceTestUtil.readOutput(p, job)); } - assertEquals("number of reduces is wrong", + assertEquals("number of reduces is wrong", expectedResults.length, results.size()); for(int i=0; i < results.size(); i++) { assertEquals("pipes program " + program + " output " + i + " wrong", expectedResults[i], results.get(i)); } } - + /** * Run a map/reduce word count that does all of the map input and reduce * output directly rather than sending it back up to Java. @@ -229,10 +230,10 @@ public class TestPipes { }else { job = new JobConf(conf); } - + job.setInputFormat(WordCountInputFormat.class); FileSystem local = FileSystem.getLocal(job); - Path testDir = new Path("file:" + System.getProperty("test.build.data"), + Path testDir = new Path("file:" + System.getProperty("test.build.data"), "pipes"); Path inDir = new Path(testDir, "input"); nonPipedOutDir = new Path(testDir, "output"); @@ -263,18 +264,18 @@ public class TestPipes { out = local.create(jobXml); job.writeXml(out); out.close(); - System.err.println("About to run: Submitter -conf " + jobXml + - " -input " + inDir + " -output " + nonPipedOutDir + - " -program " + + System.err.println("About to run: Submitter -conf " + jobXml + + " -input " + inDir + " -output " + nonPipedOutDir + + " -program " + dfs.getFileSystem().makeQualified(wordExec)); try { int ret = ToolRunner.run(new Submitter(), new String[]{"-conf", jobXml.toString(), - "-input", inDir.toString(), - "-output", nonPipedOutDir.toString(), - "-program", + "-input", inDir.toString(), + "-output", nonPipedOutDir.toString(), + "-program", dfs.getFileSystem().makeQualified(wordExec).toString(), - "-reduces", "2"}); + "-reduces", "2"}); assertEquals(0, ret); } catch (Exception e) { assertTrue("got exception: " + StringUtils.stringifyException(e), false); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestMapReduceLazyOutput.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestMapReduceLazyOutput.java index a69e06eacd9..7c010382eab 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestMapReduceLazyOutput.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestMapReduceLazyOutput.java @@ -50,14 +50,17 @@ import static org.junit.Assert.assertTrue; * 0 byte files */ public class TestMapReduceLazyOutput { - private static final int NUM_HADOOP_SLAVES = 3; + private static final int NUM_HADOOP_WORKERS = 3; private static final int NUM_MAPS_PER_NODE = 2; - private static final Path INPUT = new Path("/testlazy/input"); + private static final Path INPUTPATH = new Path("/testlazy/input"); - private static final List input = + private static final List INPUTLIST = Arrays.asList("All","Roads","Lead","To","Hadoop"); - public static class TestMapper + /** + * Test mapper. + */ + public static class TestMapper extends Mapper{ public void map(LongWritable key, Text value, Context context @@ -70,11 +73,13 @@ public class TestMapReduceLazyOutput { } } - - public static class TestReducer + /** + * Test Reducer. + */ + public static class TestReducer extends Reducer { - - public void reduce(LongWritable key, Iterable values, + + public void reduce(LongWritable key, Iterable values, Context context) throws IOException, InterruptedException { String id = context.getTaskAttemptID().toString(); // Reducer 0 does not output anything @@ -85,13 +90,13 @@ public class TestMapReduceLazyOutput { } } } - + private static void runTestLazyOutput(Configuration conf, Path output, - int numReducers, boolean createLazily) + int numReducers, boolean createLazily) throws Exception { Job job = Job.getInstance(conf, "Test-Lazy-Output"); - FileInputFormat.setInputPaths(job, INPUT); + FileInputFormat.setInputPaths(job, INPUTPATH); FileOutputFormat.setOutputPath(job, output); job.setJarByClass(TestMapReduceLazyOutput.class); @@ -113,10 +118,10 @@ public class TestMapReduceLazyOutput { public void createInput(FileSystem fs, int numMappers) throws Exception { for (int i =0; i < numMappers; i++) { - OutputStream os = fs.create(new Path(INPUT, + OutputStream os = fs.create(new Path(INPUTPATH, "text" + i + ".txt")); Writer wr = new OutputStreamWriter(os); - for(String inp : input) { + for(String inp : INPUTLIST) { wr.write(inp+"\n"); } wr.close(); @@ -132,22 +137,23 @@ public class TestMapReduceLazyOutput { Configuration conf = new Configuration(); // Start the mini-MR and mini-DFS clusters - dfs = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_HADOOP_SLAVES) + dfs = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_HADOOP_WORKERS) .build(); fileSys = dfs.getFileSystem(); - mr = new MiniMRCluster(NUM_HADOOP_SLAVES, fileSys.getUri().toString(), 1); + mr = new MiniMRCluster(NUM_HADOOP_WORKERS, + fileSys.getUri().toString(), 1); int numReducers = 2; - int numMappers = NUM_HADOOP_SLAVES * NUM_MAPS_PER_NODE; + int numMappers = NUM_HADOOP_WORKERS * NUM_MAPS_PER_NODE; createInput(fileSys, numMappers); Path output1 = new Path("/testlazy/output1"); - // Test 1. - runTestLazyOutput(mr.createJobConf(), output1, + // Test 1. + runTestLazyOutput(mr.createJobConf(), output1, numReducers, true); - Path[] fileList = + Path[] fileList = FileUtil.stat2Paths(fileSys.listStatus(output1, new Utils.OutputFileUtils.OutputFilesFilter())); for(int i=0; i < fileList.length; ++i) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestBinaryTokenFile.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestBinaryTokenFile.java index 7a2c03b1b0b..f504f0c6dd5 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestBinaryTokenFile.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestBinaryTokenFile.java @@ -56,7 +56,7 @@ public class TestBinaryTokenFile { private static final String KEY_SECURITY_TOKEN_FILE_NAME = "key-security-token-file"; private static final String DELEGATION_TOKEN_KEY = "Hdfs"; - + // my sleep class static class MySleepMapper extends SleepJob.SleepMapper { /** @@ -67,7 +67,7 @@ public class TestBinaryTokenFile { throws IOException, InterruptedException { // get context token storage: final Credentials contextCredentials = context.getCredentials(); - + final Collection> contextTokenCollection = contextCredentials.getAllTokens(); for (Token t : contextTokenCollection) { System.out.println("Context token: [" + t + "]"); @@ -77,17 +77,17 @@ public class TestBinaryTokenFile { throw new RuntimeException("Exactly 2 tokens are expected in the contextTokenCollection: " + "one job token and one delegation token, but was found " + contextTokenCollection.size() + " tokens."); } - + final Token dt = contextCredentials.getToken(new Text(DELEGATION_TOKEN_KEY)); if (dt == null) { throw new RuntimeException("Token for key ["+DELEGATION_TOKEN_KEY+"] not found in the job context."); } - + String tokenFile0 = context.getConfiguration().get(MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY); if (tokenFile0 != null) { throw new RuntimeException("Token file key ["+MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY+"] found in the configuration. It should have been removed from the configuration."); } - + final String tokenFile = context.getConfiguration().get(KEY_SECURITY_TOKEN_FILE_NAME); if (tokenFile == null) { throw new RuntimeException("Token file key ["+KEY_SECURITY_TOKEN_FILE_NAME+"] not found in the job configuration."); @@ -99,7 +99,8 @@ public class TestBinaryTokenFile { if (binaryTokenCollection.size() != 1) { throw new RuntimeException("The token collection read from file ["+tokenFile+"] must have size = 1."); } - final Token binTok = binaryTokenCollection.iterator().next(); + final Token binTok = binaryTokenCollection + .iterator().next(); System.out.println("The token read from binary file: t = [" + binTok + "]"); // Verify that dt is same as the token in the file: if (!dt.equals(binTok)) { @@ -107,7 +108,7 @@ public class TestBinaryTokenFile { "Delegation token in job is not same as the token passed in file:" + " tokenInFile=[" + binTok + "], dt=[" + dt + "]."); } - + // Now test the user tokens. final UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); // Print all the UGI tokens for diagnostic purposes: @@ -115,7 +116,7 @@ public class TestBinaryTokenFile { for (Token t: ugiTokenCollection) { System.out.println("UGI token: [" + t + "]"); } - final Token ugiToken + final Token ugiToken = ugi.getCredentials().getToken(new Text(DELEGATION_TOKEN_KEY)); if (ugiToken == null) { throw new RuntimeException("Token for key ["+DELEGATION_TOKEN_KEY+"] not found among the UGI tokens."); @@ -125,27 +126,27 @@ public class TestBinaryTokenFile { "UGI token is not same as the token passed in binary file:" + " tokenInBinFile=[" + binTok + "], ugiTok=[" + ugiToken + "]."); } - + super.map(key, value, context); } } - + class MySleepJob extends SleepJob { @Override - public Job createJob(int numMapper, int numReducer, - long mapSleepTime, int mapSleepCount, - long reduceSleepTime, int reduceSleepCount) + public Job createJob(int numMapper, int numReducer, + long mapSleepTime, int mapSleepCount, + long reduceSleepTime, int reduceSleepCount) throws IOException { Job job = super.createJob(numMapper, numReducer, - mapSleepTime, mapSleepCount, + mapSleepTime, mapSleepCount, reduceSleepTime, reduceSleepCount); - + job.setMapperClass(MySleepMapper.class); //Populate tokens here because security is disabled. setupBinaryTokenFile(job); return job; } - + private void setupBinaryTokenFile(Job job) { // Credentials in the job will not have delegation tokens // because security is disabled. Fetch delegation tokens @@ -161,40 +162,41 @@ public class TestBinaryTokenFile { binaryTokenFileName.toString()); } } - + private static MiniMRYarnCluster mrCluster; private static MiniDFSCluster dfsCluster; - - private static final Path TEST_DIR = + + private static final Path TEST_DIR = new Path(System.getProperty("test.build.data","/tmp")); private static final Path binaryTokenFileName = new Path(TEST_DIR, "tokenFile.binary"); - - private static final int numSlaves = 1; // num of data nodes + + private static final int NUMWORKERS = 1; // num of data nodes private static final int noOfNMs = 1; - + private static Path p1; - + @BeforeClass public static void setUp() throws Exception { final Configuration conf = new Configuration(); - + conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME); conf.set(YarnConfiguration.RM_PRINCIPAL, "jt_id/" + SecurityUtil.HOSTNAME_PATTERN + "@APACHE.ORG"); - + final MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf); builder.checkExitOnShutdown(true); - builder.numDataNodes(numSlaves); + builder.numDataNodes(NUMWORKERS); builder.format(true); builder.racks(null); dfsCluster = builder.build(); - + mrCluster = new MiniMRYarnCluster(TestBinaryTokenFile.class.getName(), noOfNMs); mrCluster.init(conf); mrCluster.start(); - NameNodeAdapter.getDtSecretManager(dfsCluster.getNamesystem()).startThreads(); - - FileSystem fs = dfsCluster.getFileSystem(); + NameNodeAdapter.getDtSecretManager(dfsCluster.getNamesystem()) + .startThreads(); + + FileSystem fs = dfsCluster.getFileSystem(); p1 = new Path("file1"); p1 = fs.makeQualified(p1); } @@ -240,13 +242,13 @@ public class TestBinaryTokenFile { @Test public void testBinaryTokenFile() throws IOException { Configuration conf = mrCluster.getConfig(); - + // provide namenodes names for the job to get the delegation tokens for final String nnUri = dfsCluster.getURI(0).toString(); conf.set(MRJobConfig.JOB_NAMENODES, nnUri + "," + nnUri); - + // using argument to pass the file name - final String[] args = { + final String[] args = { "-m", "1", "-r", "1", "-mt", "1", "-rt", "1" }; int res = -1; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestMRCredentials.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestMRCredentials.java index 85d60f0ba10..0a9c32f3494 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestMRCredentials.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestMRCredentials.java @@ -51,7 +51,7 @@ public class TestMRCredentials { static final int NUM_OF_KEYS = 10; private static MiniMRClientCluster mrCluster; private static MiniDFSCluster dfsCluster; - private static int numSlaves = 1; + private static int numWorkers = 1; private static JobConf jConf; @SuppressWarnings("deprecation") @@ -59,7 +59,7 @@ public class TestMRCredentials { public static void setUp() throws Exception { System.setProperty("hadoop.log.dir", "logs"); Configuration conf = new Configuration(); - dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(numSlaves) + dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(numWorkers) .build(); jConf = new JobConf(conf); FileSystem.setDefaultUri(conf, dfsCluster.getFileSystem().getUri().toString()); @@ -80,7 +80,7 @@ public class TestMRCredentials { } - public static void createKeysAsJson (String fileName) + public static void createKeysAsJson(String fileName) throws FileNotFoundException, IOException{ StringBuilder jsonString = new StringBuilder(); jsonString.append("{"); diff --git a/hadoop-yarn-project/hadoop-yarn/bin/start-yarn.sh b/hadoop-yarn-project/hadoop-yarn/bin/start-yarn.sh index 3b41299630d..ecc0140bb35 100755 --- a/hadoop-yarn-project/hadoop-yarn/bin/start-yarn.sh +++ b/hadoop-yarn-project/hadoop-yarn/bin/start-yarn.sh @@ -62,7 +62,7 @@ else "${HADOOP_YARN_HOME}/bin/yarn" \ --config "${HADOOP_CONF_DIR}" \ --daemon start \ - --slaves \ + --workers \ --hostnames "${RMHOSTS}" \ resourcemanager fi @@ -71,7 +71,7 @@ fi echo "Starting nodemanagers" "${HADOOP_YARN_HOME}/bin/yarn" \ --config "${HADOOP_CONF_DIR}" \ - --slaves \ + --workers \ --daemon start \ nodemanager @@ -80,7 +80,7 @@ PROXYSERVER=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey yarn.web-proxy.ad if [[ -n ${PROXYSERVER} ]]; then "${HADOOP_YARN_HOME}/bin/yarn" \ --config "${HADOOP_CONF_DIR}" \ - --slaves \ + --workers \ --hostnames "${PROXYSERVER}" \ --daemon start \ proxyserver diff --git a/hadoop-yarn-project/hadoop-yarn/bin/stop-yarn.sh b/hadoop-yarn-project/hadoop-yarn/bin/stop-yarn.sh index 358f0c90118..1ed52dd18a5 100755 --- a/hadoop-yarn-project/hadoop-yarn/bin/stop-yarn.sh +++ b/hadoop-yarn-project/hadoop-yarn/bin/stop-yarn.sh @@ -62,7 +62,7 @@ else "${HADOOP_YARN_HOME}/bin/yarn" \ --config "${HADOOP_CONF_DIR}" \ --daemon stop \ - --slaves \ + --workers \ --hostnames "${RMHOSTS}" \ resourcemanager fi @@ -71,7 +71,7 @@ fi echo "Stopping nodemanagers" "${HADOOP_YARN_HOME}/bin/yarn" \ --config "${HADOOP_CONF_DIR}" \ - --slaves \ + --workers \ --daemon stop \ nodemanager @@ -81,7 +81,7 @@ if [[ -n ${PROXYSERVER} ]]; then echo "Stopping proxy server [${PROXYSERVER}]" "${HADOOP_YARN_HOME}/bin/yarn" \ --config "${HADOOP_CONF_DIR}" \ - --slaves \ + --workers \ --hostnames "${PROXYSERVER}" \ --daemon stop \ proxyserver diff --git a/hadoop-yarn-project/hadoop-yarn/bin/yarn b/hadoop-yarn-project/hadoop-yarn/bin/yarn index 2c19cd286d8..9a5086e1b82 100755 --- a/hadoop-yarn-project/hadoop-yarn/bin/yarn +++ b/hadoop-yarn-project/hadoop-yarn/bin/yarn @@ -26,10 +26,10 @@ function hadoop_usage { hadoop_add_option "--buildpaths" "attempt to add class files from build tree" hadoop_add_option "--daemon (start|status|stop)" "operate on a daemon" - hadoop_add_option "--hostnames list[,of,host,names]" "hosts to use in slave mode" + hadoop_add_option "--hostnames list[,of,host,names]" "hosts to use in worker mode" hadoop_add_option "--loglevel level" "set the log4j level for this command" - hadoop_add_option "--hosts filename" "list of hosts to use in slave mode" - hadoop_add_option "--slaves" "turn on slave mode" + hadoop_add_option "--hosts filename" "list of hosts to use in worker mode" + hadoop_add_option "--workers" "turn on worker mode" hadoop_add_subcommand "application" "prints application(s) report/kill application" hadoop_add_subcommand "applicationattempt" "prints applicationattempt(s) report" @@ -41,7 +41,7 @@ function hadoop_usage hadoop_add_subcommand "jar " "run a jar file" hadoop_add_subcommand "logs" "dump container logs" hadoop_add_subcommand "node" "prints node report(s)" - hadoop_add_subcommand "nodemanager" "run a nodemanager on each slave" + hadoop_add_subcommand "nodemanager" "run a nodemanager on each worker" hadoop_add_subcommand "proxyserver" "run the web app proxy server" hadoop_add_subcommand "queue" "prints queue information" hadoop_add_subcommand "resourcemanager" "run the ResourceManager" @@ -266,8 +266,8 @@ fi hadoop_verify_user "${HADOOP_SUBCMD}" -if [[ ${HADOOP_SLAVE_MODE} = true ]]; then - hadoop_common_slave_mode_execute "${HADOOP_YARN_HOME}/bin/yarn" "${HADOOP_USER_PARAMS[@]}" +if [[ ${HADOOP_WORKER_MODE} = true ]]; then + hadoop_common_worker_mode_execute "${HADOOP_YARN_HOME}/bin/yarn" "${HADOOP_USER_PARAMS[@]}" exit $? fi diff --git a/hadoop-yarn-project/hadoop-yarn/bin/yarn-config.cmd b/hadoop-yarn-project/hadoop-yarn/bin/yarn-config.cmd index 41c143424b2..f2ccc8f2521 100644 --- a/hadoop-yarn-project/hadoop-yarn/bin/yarn-config.cmd +++ b/hadoop-yarn-project/hadoop-yarn/bin/yarn-config.cmd @@ -64,7 +64,7 @@ if not defined YARN_CONF_DIR ( @rem if "%1" == "--hosts" ( - set YARN_SLAVES=%YARN_CONF_DIR%\%2 + set YARN_WORKERS=%YARN_CONF_DIR%\%2 shift shift ) diff --git a/hadoop-yarn-project/hadoop-yarn/bin/yarn-config.sh b/hadoop-yarn-project/hadoop-yarn/bin/yarn-config.sh index d7fa4066f60..719a6ae43ed 100644 --- a/hadoop-yarn-project/hadoop-yarn/bin/yarn-config.sh +++ b/hadoop-yarn-project/hadoop-yarn/bin/yarn-config.sh @@ -15,7 +15,7 @@ function hadoop_subproject_init { - + # at some point in time, someone thought it would be a good idea to # create separate vars for every subproject. *sigh* # let's perform some overrides and setup some defaults for bw compat @@ -23,7 +23,7 @@ function hadoop_subproject_init # used interchangeable from here on out # ... # this should get deprecated at some point. - + if [[ -z "${HADOOP_YARN_ENV_PROCESSED}" ]]; then if [[ -e "${YARN_CONF_DIR}/yarn-env.sh" ]]; then . "${YARN_CONF_DIR}/yarn-env.sh" @@ -32,29 +32,29 @@ function hadoop_subproject_init fi export HADOOP_YARN_ENV_PROCESSED=true fi - + hadoop_deprecate_envvar YARN_CONF_DIR HADOOP_CONF_DIR hadoop_deprecate_envvar YARN_LOG_DIR HADOOP_LOG_DIR hadoop_deprecate_envvar YARN_LOGFILE HADOOP_LOGFILE - + hadoop_deprecate_envvar YARN_NICENESS HADOOP_NICENESS - + hadoop_deprecate_envvar YARN_STOP_TIMEOUT HADOOP_STOP_TIMEOUT - + hadoop_deprecate_envvar YARN_PID_DIR HADOOP_PID_DIR - + hadoop_deprecate_envvar YARN_ROOT_LOGGER HADOOP_ROOT_LOGGER hadoop_deprecate_envvar YARN_IDENT_STRING HADOOP_IDENT_STRING hadoop_deprecate_envvar YARN_OPTS HADOOP_OPTS - hadoop_deprecate_envvar YARN_SLAVES HADOOP_SLAVES - + hadoop_deprecate_envvar YARN_SLAVES HADOOP_WORKERS + HADOOP_YARN_HOME="${HADOOP_YARN_HOME:-$HADOOP_HOME}" - + # YARN-1429 added the completely superfluous YARN_USER_CLASSPATH # env var. We're going to override HADOOP_USER_CLASSPATH to keep # consistency with the rest of the duplicate/useless env vars diff --git a/hadoop-yarn-project/hadoop-yarn/bin/yarn-daemons.sh b/hadoop-yarn-project/hadoop-yarn/bin/yarn-daemons.sh index 958c8bd9754..2226422e58c 100644 --- a/hadoop-yarn-project/hadoop-yarn/bin/yarn-daemons.sh +++ b/hadoop-yarn-project/hadoop-yarn/bin/yarn-daemons.sh @@ -47,13 +47,13 @@ daemonmode=$1 shift hadoop_error "WARNING: Use of this script to ${daemonmode} YARN daemons is deprecated." -hadoop_error "WARNING: Attempting to execute replacement \"yarn --slaves --daemon ${daemonmode}\" instead." +hadoop_error "WARNING: Attempting to execute replacement \"yarn --workers --daemon ${daemonmode}\" instead." # # Original input was usually: # yarn-daemons.sh (shell options) (start|stop) nodemanager (daemon options) # we're going to turn this into -# yarn --slaves --daemon (start|stop) (rest of options) +# yarn --workers --daemon (start|stop) (rest of options) # for (( i = 0; i < ${#HADOOP_USER_PARAMS[@]}; i++ )) do @@ -64,5 +64,5 @@ do fi done -${yarnscript} --slaves --daemon "${daemonmode}" "${HADOOP_USER_PARAMS[@]}" +${yarnscript} --workers --daemon "${daemonmode}" "${HADOOP_USER_PARAMS[@]}" diff --git a/hadoop-yarn-project/hadoop-yarn/pom.xml b/hadoop-yarn-project/hadoop-yarn/pom.xml index 3e31ec047fa..eb63f804113 100644 --- a/hadoop-yarn-project/hadoop-yarn/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/pom.xml @@ -54,7 +54,7 @@ apache-rat-plugin - conf/slaves + conf/workers conf/container-executor.cfg dev-support/jdiff/**