HADOOP-12930. Dynamic subcommands for hadoop shell scripts (aw)

This commit contains the following JIRA issues:

    HADOOP-12931. bin/hadoop work for dynamic subcommands
    HADOOP-12932. bin/yarn work for dynamic subcommands
    HADOOP-12933. bin/hdfs work for dynamic subcommands
    HADOOP-12934. bin/mapred work for dynamic subcommands
    HADOOP-12935. API documentation for dynamic subcommands
    HADOOP-12936. modify hadoop-tools to take advantage of dynamic subcommands
    HADOOP-13086. enable daemonization of dynamic commands
    HADOOP-13087. env var doc update for dynamic commands
    HADOOP-13088. fix shellprofiles in hadoop-tools to allow replacement
    HADOOP-13089. hadoop distcp adds client opts twice when dynamic
    HADOOP-13094. hadoop-common unit tests for dynamic commands
    HADOOP-13095. hadoop-hdfs unit tests for dynamic commands
    HADOOP-13107. clean up how rumen is executed
    HADOOP-13108. dynamic subcommands need a way to manipulate arguments
    HADOOP-13110. add a streaming subcommand to mapred
    HADOOP-13111. convert hadoop gridmix to be dynamic
    HADOOP-13115. dynamic subcommand docs should talk about exit vs. continue program flow
    HADOOP-13117. clarify daemonization and security vars for dynamic commands
    HADOOP-13120. add a --debug message when dynamic commands have been used
    HADOOP-13121. rename sub-project shellprofiles to match the rest of Hadoop
    HADOOP-13129. fix typo in dynamic subcommand docs
    HADOOP-13151. Underscores should be escaped in dynamic subcommands document
    HADOOP-13153. fix typo in debug statement for dynamic subcommands
This commit is contained in:
Allen Wittenauer 2016-03-28 09:00:07 -07:00
parent 6a6e74acf5
commit 730bc746f9
27 changed files with 1583 additions and 664 deletions

View File

@ -23,6 +23,38 @@
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>../hadoop-archive-logs/src/main/shellprofile.d</directory>
<includes>
<include>*</include>
</includes>
<outputDirectory>/libexec/shellprofile.d</outputDirectory>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>../hadoop-archives/src/main/shellprofile.d</directory>
<includes>
<include>*</include>
</includes>
<outputDirectory>/libexec/shellprofile.d</outputDirectory>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>../hadoop-distcp/src/main/shellprofile.d</directory>
<includes>
<include>*</include>
</includes>
<outputDirectory>/libexec/shellprofile.d</outputDirectory>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>../hadoop-extras/src/main/shellprofile.d</directory>
<includes>
<include>*</include>
</includes>
<outputDirectory>/libexec/shellprofile.d</outputDirectory>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>../hadoop-pipes/src/main/native/pipes/api/hadoop</directory>
<includes>
@ -93,6 +125,14 @@
<include>*-sources.jar</include>
</includes>
</fileSet>
<fileSet>
<directory>../hadoop-gridmix/src/main/shellprofile.d</directory>
<includes>
<include>*</include>
</includes>
<outputDirectory>/libexec/shellprofile.d</outputDirectory>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>../hadoop-rumen/target</directory>
<outputDirectory>/share/hadoop/${hadoop.component}/sources</outputDirectory>
@ -100,6 +140,14 @@
<include>*-sources.jar</include>
</includes>
</fileSet>
<fileSet>
<directory>../hadoop-rumen/src/main/shellprofile.d</directory>
<includes>
<include>*</include>
</includes>
<outputDirectory>/libexec/shellprofile.d</outputDirectory>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>../hadoop-streaming/target</directory>
<outputDirectory>/share/hadoop/${hadoop.component}/sources</outputDirectory>
@ -107,6 +155,14 @@
<include>*-sources.jar</include>
</includes>
</fileSet>
<fileSet>
<directory>../hadoop-streaming/src/main/shellprofile.d</directory>
<includes>
<include>*</include>
</includes>
<outputDirectory>/libexec/shellprofile.d</outputDirectory>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>../hadoop-sls/target</directory>
<outputDirectory>/share/hadoop/${hadoop.component}/sources</outputDirectory>

View File

@ -16,7 +16,12 @@
# limitations under the License.
MYNAME="${BASH_SOURCE-$0}"
HADOOP_SHELL_EXECNAME="${MYNAME##*/}"
## @description build up the hadoop command's usage text.
## @audience public
## @stability stable
## @replaceable no
function hadoop_usage
{
hadoop_add_option "buildpaths" "attempt to add class files from build tree"
@ -25,25 +30,141 @@ function hadoop_usage
hadoop_add_option "hosts filename" "list of hosts to use in slave mode"
hadoop_add_option "slaves" "turn on slave mode"
hadoop_add_subcommand "archive" "create a Hadoop archive"
hadoop_add_subcommand "checknative" "check native Hadoop and compression libraries availability"
hadoop_add_subcommand "classpath" "prints the class path needed to get the Hadoop jar and the required libraries"
hadoop_add_subcommand "conftest" "validate configuration XML files"
hadoop_add_subcommand "credential" "interact with credential providers"
hadoop_add_subcommand "daemonlog" "get/set the log level for each daemon"
hadoop_add_subcommand "distch" "distributed metadata changer"
hadoop_add_subcommand "distcp" "copy file or directories recursively"
hadoop_add_subcommand "dtutil" "operations related to delegation tokens"
hadoop_add_subcommand "envvars" "display computed Hadoop environment variables"
hadoop_add_subcommand "fs" "run a generic filesystem user client"
hadoop_add_subcommand "gridmix" "submit a mix of synthetic job, modeling a profiled from production load"
hadoop_add_subcommand "jar <jar>" "run a jar file. NOTE: please use \"yarn jar\" to launch YARN applications, not this command."
hadoop_add_subcommand "jnipath" "prints the java.library.path"
hadoop_add_subcommand "kerbname" "show auth_to_local principal conversion"
hadoop_add_subcommand "key" "manage keys via the KeyProvider"
hadoop_add_subcommand "trace" "view and modify Hadoop tracing settings"
hadoop_add_subcommand "version" "print the version"
hadoop_generate_usage "${MYNAME}" true
hadoop_generate_usage "${HADOOP_SHELL_EXECNAME}" true
}
## @description Default command handler for hadoop command
## @audience public
## @stability stable
## @replaceable no
## @param CLI arguments
function hadoopcmd_case
{
subcmd=$1
shift
case ${subcmd} in
balancer|datanode|dfs|dfsadmin|dfsgroups| \
namenode|secondarynamenode|fsck|fetchdt|oiv| \
portmap|nfs3)
hadoop_error "WARNING: Use of this script to execute ${subcmd} is deprecated."
subcmd=${subcmd/dfsgroups/groups}
hadoop_error "WARNING: Attempting to execute replacement \"hdfs ${subcmd}\" instead."
hadoop_error ""
#try to locate hdfs and if present, delegate to it.
if [[ -f "${HADOOP_HDFS_HOME}/bin/hdfs" ]]; then
# shellcheck disable=SC2086
exec "${HADOOP_HDFS_HOME}/bin/hdfs" \
--config "${HADOOP_CONF_DIR}" "${subcmd}" "$@"
elif [[ -f "${HADOOP_HOME}/bin/hdfs" ]]; then
# shellcheck disable=SC2086
exec "${HADOOP_HOME}/bin/hdfs" \
--config "${HADOOP_CONF_DIR}" "${subcmd}" "$@"
else
hadoop_error "HADOOP_HDFS_HOME not found!"
exit 1
fi
;;
#mapred commands for backwards compatibility
pipes|job|queue|mrgroups|mradmin|jobtracker|tasktracker)
hadoop_error "WARNING: Use of this script to execute ${subcmd} is deprecated."
subcmd=${subcmd/mrgroups/groups}
hadoop_error "WARNING: Attempting to execute replacement \"mapred ${subcmd}\" instead."
hadoop_error ""
#try to locate mapred and if present, delegate to it.
if [[ -f "${HADOOP_MAPRED_HOME}/bin/mapred" ]]; then
exec "${HADOOP_MAPRED_HOME}/bin/mapred" \
--config "${HADOOP_CONF_DIR}" "${subcmd}" "$@"
elif [[ -f "${HADOOP_HOME}/bin/mapred" ]]; then
exec "${HADOOP_HOME}/bin/mapred" \
--config "${HADOOP_CONF_DIR}" "${subcmd}" "$@"
else
hadoop_error "HADOOP_MAPRED_HOME not found!"
exit 1
fi
;;
checknative)
HADOOP_CLASSNAME=org.apache.hadoop.util.NativeLibraryChecker
;;
classpath)
hadoop_do_classpath_subcommand HADOOP_CLASSNAME "$@"
;;
conftest)
HADOOP_CLASSNAME=org.apache.hadoop.util.ConfTest
;;
credential)
HADOOP_CLASSNAME=org.apache.hadoop.security.alias.CredentialShell
;;
daemonlog)
HADOOP_CLASSNAME=org.apache.hadoop.log.LogLevel
;;
dtutil)
HADOOP_CLASSNAME=org.apache.hadoop.security.token.DtUtilShell
;;
envvars)
echo "JAVA_HOME='${JAVA_HOME}'"
echo "HADOOP_COMMON_HOME='${HADOOP_COMMON_HOME}'"
echo "HADOOP_COMMON_DIR='${HADOOP_COMMON_DIR}'"
echo "HADOOP_COMMON_LIB_JARS_DIR='${HADOOP_COMMON_LIB_JARS_DIR}'"
echo "HADOOP_COMMON_LIB_NATIVE_DIR='${HADOOP_COMMON_LIB_NATIVE_DIR}'"
echo "HADOOP_CONF_DIR='${HADOOP_CONF_DIR}'"
echo "HADOOP_TOOLS_HOME='${HADOOP_TOOLS_HOME}'"
echo "HADOOP_TOOLS_DIR='${HADOOP_TOOLS_DIR}'"
echo "HADOOP_TOOLS_LIB_JARS_DIR='${HADOOP_TOOLS_LIB_JARS_DIR}'"
exit 0
;;
fs)
HADOOP_CLASSNAME=org.apache.hadoop.fs.FsShell
;;
jar)
if [[ -n "${YARN_OPTS}" ]] || [[ -n "${YARN_CLIENT_OPTS}" ]]; then
hadoop_error "WARNING: Use \"yarn jar\" to launch YARN applications."
fi
HADOOP_CLASSNAME=org.apache.hadoop.util.RunJar
;;
jnipath)
hadoop_finalize
echo "${JAVA_LIBRARY_PATH}"
exit 0
;;
kerbname)
HADOOP_CLASSNAME=org.apache.hadoop.security.HadoopKerberosName
;;
key)
HADOOP_CLASSNAME=org.apache.hadoop.crypto.key.KeyShell
;;
trace)
HADOOP_CLASSNAME=org.apache.hadoop.tracing.TraceAdmin
;;
version)
HADOOP_CLASSNAME=org.apache.hadoop.util.VersionInfo
;;
*)
HADOOP_CLASSNAME="${subcmd}"
if ! hadoop_validate_classname "${HADOOP_CLASSNAME}"; then
hadoop_exit_with_usage 1
fi
;;
esac
# Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
}
# This script runs the hadoop core commands.
@ -70,142 +191,76 @@ if [ $# = 0 ]; then
hadoop_exit_with_usage 1
fi
COMMAND=$1
HADOOP_SUBCMD=$1
shift
case ${COMMAND} in
balancer|datanode|dfs|dfsadmin|dfsgroups| \
namenode|secondarynamenode|fsck|fetchdt|oiv| \
portmap|nfs3)
hadoop_error "WARNING: Use of this script to execute ${COMMAND} is deprecated."
COMMAND=${COMMAND/dfsgroups/groups}
hadoop_error "WARNING: Attempting to execute replacement \"hdfs ${COMMAND}\" instead."
hadoop_error ""
#try to locate hdfs and if present, delegate to it.
if [[ -f "${HADOOP_HDFS_HOME}/bin/hdfs" ]]; then
# shellcheck disable=SC2086
exec "${HADOOP_HDFS_HOME}/bin/hdfs" \
--config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@"
elif [[ -f "${HADOOP_HOME}/bin/hdfs" ]]; then
# shellcheck disable=SC2086
exec "${HADOOP_HOME}/bin/hdfs" \
--config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@"
else
hadoop_error "HADOOP_HDFS_HOME not found!"
exit 1
fi
;;
HADOOP_SUBCMD_ARGS=("$@")
#mapred commands for backwards compatibility
pipes|job|queue|mrgroups|mradmin|jobtracker|tasktracker)
hadoop_error "WARNING: Use of this script to execute ${COMMAND} is deprecated."
COMMAND=${COMMAND/mrgroups/groups}
hadoop_error "WARNING: Attempting to execute replacement \"mapred ${COMMAND}\" instead."
hadoop_error ""
#try to locate mapred and if present, delegate to it.
if [[ -f "${HADOOP_MAPRED_HOME}/bin/mapred" ]]; then
exec "${HADOOP_MAPRED_HOME}/bin/mapred" \
--config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@"
elif [[ -f "${HADOOP_HOME}/bin/mapred" ]]; then
exec "${HADOOP_HOME}/bin/mapred" \
--config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@"
else
hadoop_error "HADOOP_MAPRED_HOME not found!"
exit 1
fi
;;
archive)
CLASS=org.apache.hadoop.tools.HadoopArchives
hadoop_add_to_classpath_tools hadoop-archives
;;
checknative)
CLASS=org.apache.hadoop.util.NativeLibraryChecker
;;
classpath)
hadoop_do_classpath_subcommand CLASS "$@"
;;
conftest)
CLASS=org.apache.hadoop.util.ConfTest
;;
credential)
CLASS=org.apache.hadoop.security.alias.CredentialShell
;;
daemonlog)
CLASS=org.apache.hadoop.log.LogLevel
;;
distch)
CLASS=org.apache.hadoop.tools.DistCh
hadoop_add_to_classpath_tools hadoop-extras
;;
distcp)
CLASS=org.apache.hadoop.tools.DistCp
hadoop_add_to_classpath_tools hadoop-distcp
;;
dtutil)
CLASS=org.apache.hadoop.security.token.DtUtilShell
;;
envvars)
echo "JAVA_HOME='${JAVA_HOME}'"
echo "HADOOP_COMMON_HOME='${HADOOP_COMMON_HOME}'"
echo "HADOOP_COMMON_DIR='${HADOOP_COMMON_DIR}'"
echo "HADOOP_COMMON_LIB_JARS_DIR='${HADOOP_COMMON_LIB_JARS_DIR}'"
echo "HADOOP_COMMON_LIB_NATIVE_DIR='${HADOOP_COMMON_LIB_NATIVE_DIR}'"
echo "HADOOP_CONF_DIR='${HADOOP_CONF_DIR}'"
echo "HADOOP_TOOLS_HOME='${HADOOP_TOOLS_HOME}'"
echo "HADOOP_TOOLS_DIR='${HADOOP_TOOLS_DIR}'"
echo "HADOOP_TOOLS_LIB_JARS_DIR='${HADOOP_TOOLS_LIB_JARS_DIR}'"
exit 0
;;
fs)
CLASS=org.apache.hadoop.fs.FsShell
;;
gridmix)
CLASS=org.apache.hadoop.mapred.gridmix.Gridmix
hadoop_add_to_classpath_tools hadoop-rumen
hadoop_add_to_classpath_tools hadoop-gridmix
;;
jar)
if [[ -n "${YARN_OPTS}" ]] || [[ -n "${YARN_CLIENT_OPTS}" ]]; then
hadoop_error "WARNING: Use \"yarn jar\" to launch YARN applications."
fi
CLASS=org.apache.hadoop.util.RunJar
;;
jnipath)
hadoop_finalize
echo "${JAVA_LIBRARY_PATH}"
exit 0
;;
kerbname)
CLASS=org.apache.hadoop.security.HadoopKerberosName
;;
key)
CLASS=org.apache.hadoop.crypto.key.KeyShell
;;
trace)
CLASS=org.apache.hadoop.tracing.TraceAdmin
;;
version)
CLASS=org.apache.hadoop.util.VersionInfo
;;
*)
CLASS="${COMMAND}"
if ! hadoop_validate_classname "${CLASS}"; then
hadoop_exit_with_usage 1
fi
;;
esac
if declare -f hadoop_subcommand_"${HADOOP_SUBCMD}" >/dev/null 2>&1; then
hadoop_debug "Calling dynamically: hadoop_subcommand_${HADOOP_SUBCMD} ${HADOOP_SUBCMD_ARGS[*]}"
"hadoop_subcommand_${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}"
else
hadoopcmd_case "${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}"
fi
hadoop_verify_user "${COMMAND}"
hadoop_verify_user "${HADOOP_SUBCMD}"
if [[ ${HADOOP_SLAVE_MODE} = true ]]; then
hadoop_common_slave_mode_execute "${HADOOP_HDFS_HOME}/bin/hdfs" "${HADOOP_USER_PARAMS[@]}"
hadoop_common_slave_mode_execute "${HADOOP_COMMON_HOME}/bin/hadoop" "${HADOOP_USER_PARAMS[@]}"
exit $?
fi
# Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
HADOOP_SECURE_USER="${HADOOP_SUBCMD_SECUREUSER}"
hadoop_verify_secure_prereq
hadoop_setup_secure_service
priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
priv_errfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.err"
priv_pidfile="${HADOOP_PID_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
else
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
fi
if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
# shellcheck disable=SC2034
HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
if [[ -n "${HADOOP_SUBCMD_SECURESERVICE}" ]]; then
# shellcheck disable=SC2034
HADOOP_LOGFILE="hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log"
else
# shellcheck disable=SC2034
HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log"
fi
fi
hadoop_finalize
hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"
if [[ "${HADOOP_SUBCMD_SUPPORTDAEMONIZATION}" = true ]]; then
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
hadoop_secure_daemon_handler \
"${HADOOP_DAEMON_MODE}" \
"${HADOOP_SUBCMD}" \
"${HADOOP_CLASSNAME}" \
"${daemon_pidfile}" \
"${daemon_outfile}" \
"${priv_pidfile}" \
"${priv_outfile}" \
"${priv_errfile}" \
"${HADOOP_SUBCMD_ARGS[@]}"
else
hadoop_daemon_handler \
"${HADOOP_DAEMON_MODE}" \
"${HADOOP_SUBCMD}" \
"${HADOOP_CLASSNAME}" \
"${daemon_pidfile}" \
"${daemon_outfile}" \
"${HADOOP_SUBCMD_ARGS[@]}"
fi
exit $?
else
# shellcheck disable=SC2086
hadoop_java_exec "${HADOOP_SUBCMD}" "${HADOOP_CLASSNAME}" "${HADOOP_SUBCMD_ARGS[@]}"
fi

View File

@ -85,11 +85,11 @@ Apache Hadoop allows for third parties to easily add new features through a vari
Core to this functionality is the concept of a shell profile. Shell profiles are shell snippets that can do things such as add jars to the classpath, configure Java system properties and more.
Shell profiles may be installed in either `${HADOOP_CONF_DIR}/shellprofile.d` or `${HADOOP_HOME}/libexec/shellprofile.d`. Shell profiles in the `libexec` directory are part of the base installation and cannot be overriden by the user. Shell profiles in the configuration directory may be ignored if the end user changes the configuration directory at runtime.
Shell profiles may be installed in either `${HADOOP_CONF_DIR}/shellprofile.d` or `${HADOOP_HOME}/libexec/shellprofile.d`. Shell profiles in the `libexec` directory are part of the base installation and cannot be overridden by the user. Shell profiles in the configuration directory may be ignored if the end user changes the configuration directory at runtime.
An example of a shell profile is in the libexec directory.
## Shell API
### Shell API
Apache Hadoop's shell code has a [function library](./UnixShellAPI.html) that is open for administrators and developers to use to assist in their configuration and advanced feature management. These APIs follow the standard [Apache Hadoop Interface Classification](./InterfaceClassification.html), with one addition: Replaceable.
@ -97,10 +97,8 @@ The shell code allows for core functions to be overridden. However, not all func
In order to replace a function, create a file called `hadoop-user-functions.sh` in the `${HADOOP_CONF_DIR}` directory. Simply define the new, replacement function in this file and the system will pick it up automatically. There may be as many replacement functions as needed in this file. Examples of function replacement are in the `hadoop-user-functions.sh.examples` file.
Functions that are marked Public and Stable are safe to use in shell profiles as-is. Other functions may change in a minor release.
### User-level API Access
In addition to `.hadoop-env`, which allows individual users to override `hadoop-env.sh`, user's may also use `.hadooprc`. This is called after the Apache Hadoop shell environment has been configured and allows the full set of shell API function calls.
@ -112,3 +110,84 @@ hadoop_add_classpath /some/path/custom.jar
```
would go into `.hadooprc`
### Dynamic Subcommands
Utilizing the Shell API, it is possible for third parties to add their own subcommands to the primary Hadoop shell scripts (hadoop, hdfs, mapred, yarn).
Prior to executing a subcommand, the primary scripts will check for the existence of a (scriptname)\_subcommand\_(subcommand) function. This function gets executed with the parameters set to all remaining command line arguments. For example, if the following function is defined:
```bash
function yarn_subcommand_hello
{
echo "$@"
exit $?
}
```
then executing `yarn --debug hello world I see you` will activate script debugging and call the `yarn_subcommand_hello` function as:
```bash
yarn_subcommand_hello world I see you
```
which will result in the output of:
```bash
world I see you
```
It is also possible to add the new subcommands to the usage output. The `hadoop_add_subcommand` function adds text to the usage output. Utilizing the standard HADOOP_SHELL_EXECNAME variable, we can limit which command gets our new function.
```bash
if [[ "${HADOOP_SHELL_EXECNAME}" = "yarn" ]]; then
hadoop_add_subcommand "hello" "Print some text to the screen"
fi
```
This functionality may also be use to override the built-ins. For example, defining:
```bash
function hdfs_subcommand_fetchdt
{
...
}
```
... will replace the existing `hdfs fetchdt` subcommand with a custom one.
Some key environment variables related to Dynamic Subcommands:
* HADOOP\_CLASSNAME
This is the name of the Java class to use when program execution continues.
* HADOOP\_SHELL\_EXECNAME
This is the name of the script that is being executed. It will be one of hadoop, hdfs, mapred, or yarn.
* HADOOP\_SUBCMD
This is the subcommand that was passed on the command line.
* HADOOP\_SUBCMD\_ARGS
This array contains the argument list after the Apache Hadoop common argument processing has taken place and is the same list that is passed to the subcommand function as arguments. For example, if `hadoop --debug subcmd 1 2 3` has been executed on the command line, then `${HADOOP_SUBCMD_ARGS[0]}` will be 1 and `hadoop_subcommand_subcmd` will also have $1 equal to 1. This array list MAY be modified by subcommand functions to add or delete values from the argument list for further processing.
* HADOOP\_SUBCMD\_SECURESERVICE
If this command should/will be executed as a secure daemon, set this to true.
* HADOOP\_SUBCMD\_SECUREUSER
If this command should/will be executed as a secure daemon, set the user name to be used.
* HADOOP\_SUBCMD\_SUPPORTDAEMONIZATION
If this command can be executed as a daemon, set this to true.
* HADOOP\_USER\_PARAMS
This is the full content of the command line, prior to any parsing done. It will contain flags such as `--debug`. It MAY NOT be manipulated.
The Apache Hadoop runtime facilities require functions exit if no further processing is required. For example, in the hello example above, Java and other facilities were not required so a simple `exit $?` was sufficient. However, if the function were to utilize `HADOOP_CLASSNAME`, then program execution must continue so that Java with the Apache Hadoop-specific parameters will be launched against the given Java class. Another example would be in the case of an unrecoverable error. It is the function's responsibility to print an appropriate message (preferably using the hadoop_error API call) and exit appropriately.

View File

@ -16,7 +16,7 @@
setup() {
TMP=../../../target/test-dir/bats.$$.${RANDOM}
TMP="${BATS_TEST_DIRNAME}/../../../target/test-dir/bats.$$.${RANDOM}"
mkdir -p ${TMP}
TMP=$(cd -P -- "${TMP}" >/dev/null && pwd -P)
export TMP
@ -38,7 +38,7 @@ setup() {
# shellcheck disable=SC2034
QATESTMODE=true
. ../../main/bin/hadoop-functions.sh
. "${BATS_TEST_DIRNAME}/../../main/bin/hadoop-functions.sh"
pushd "${TMP}" >/dev/null
}

View File

@ -0,0 +1,78 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load hadoop-functions_test_helper
# the loading of shell profiles are tested elseswhere
# this only tests the specific subcommand parts
subcommandsetup () {
export HADOOP_LIBEXEC_DIR="${TMP}/libexec"
export HADOOP_CONF_DIR="${TMP}/conf"
mkdir -p "${HADOOP_LIBEXEC_DIR}"
echo ". \"${BATS_TEST_DIRNAME}/../../main/bin/hadoop-functions.sh\"" > "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
cat <<-'TOKEN' >> "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
hadoop_subcommand_sub () {
echo "unittest"
exit 0
}
hadoop_subcommand_conftest ()
{
echo conftest
exit 0
}
hadoop_subcommand_envcheck ()
{
echo ${HADOOP_SHELL_EXECNAME}
exit 0
}
hadoop_subcommand_multi ()
{
echo $2
exit 0
}
TOKEN
chmod a+rx "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
}
@test "hadoop_subcommand (addition)" {
subcommandsetup
run "${BATS_TEST_DIRNAME}/../../main/bin/hadoop" sub
echo ">${output}<"
[ "${output}" = unittest ]
}
@test "hadoop_subcommand (substitute)" {
subcommandsetup
run "${BATS_TEST_DIRNAME}/../../main/bin/hadoop" conftest
echo ">${output}<"
[ "${output}" = conftest ]
}
@test "hadoop_subcommand (envcheck)" {
subcommandsetup
run "${BATS_TEST_DIRNAME}/../../main/bin/hadoop" envcheck
[ "${output}" = hadoop ]
}
@test "hadoop_subcommand (multiparams)" {
subcommandsetup
run "${BATS_TEST_DIRNAME}/../../main/bin/hadoop" multi 1 2
[ "${output}" = 2 ]
}

View File

@ -559,5 +559,41 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
</plugins>
</build>
</profile>
<!-- profile to test shell code -->
<profile>
<id>shelltest</id>
<activation>
<property>
<name>!skipTests</name>
</property>
</activation>
<build>
<plugins>
<plugin>
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<execution>
<id>hdfs-test-bats-driver</id>
<phase>test</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<target>
<exec dir="src/test/scripts"
executable="bash"
failonerror="true">
<arg value="./run-bats.sh" />
</exec>
</target>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View File

@ -16,7 +16,12 @@
# limitations under the License.
MYNAME="${BASH_SOURCE-$0}"
HADOOP_SHELL_EXECNAME="${MYNAME##*/}"
## @description build up the hdfs command's usage text.
## @audience public
## @stability stable
## @replaceable no
function hadoop_usage
{
hadoop_add_option "--buildpaths" "attempt to add class files from build tree"
@ -56,7 +61,194 @@ function hadoop_usage
hadoop_add_subcommand "storagepolicies" "list/get/set block storage policies"
hadoop_add_subcommand "version" "print the version"
hadoop_add_subcommand "zkfc" "run the ZK Failover Controller daemon"
hadoop_generate_usage "${MYNAME}" false
hadoop_generate_usage "${HADOOP_SHELL_EXECNAME}" false
}
## @description Default command handler for hadoop command
## @audience public
## @stability stable
## @replaceable no
## @param CLI arguments
function hdfscmd_case
{
subcmd=$1
shift
case ${subcmd} in
balancer)
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.server.balancer.Balancer
hadoop_debug "Appending HADOOP_BALANCER_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_BALANCER_OPTS}"
;;
cacheadmin)
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.CacheAdmin
;;
classpath)
hadoop_do_classpath_subcommand HADOOP_CLASSNAME "$@"
;;
crypto)
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.CryptoAdmin
;;
datanode)
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
# Determine if we're starting a secure datanode, and
# if so, redefine appropriate variables
if [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then
HADOOP_SUBCMD_SECURESERVICE="true"
HADOOP_SUBCMD_SECUREUSER="${HADOOP_SECURE_DN_USER}"
# backward compatiblity
HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_DN_PID_DIR}"
HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_DN_LOG_DIR}"
hadoop_debug "Appending HADOOP_DATANODE_OPTS onto HADOOP_OPTS"
hadoop_debug "Appending HADOOP_DN_SECURE_EXTRA_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_DATANODE_OPTS} ${HADOOP_DN_SECURE_EXTRA_OPTS}"
HADOOP_CLASSNAME="org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter"
else
hadoop_debug "Appending HADOOP_DATANODE_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_DATANODE_OPTS}"
HADOOP_CLASSNAME='org.apache.hadoop.hdfs.server.datanode.DataNode'
fi
;;
debug)
HADOOP_CLASSNAME='org.apache.hadoop.hdfs.tools.DebugAdmin'
;;
dfs)
HADOOP_CLASSNAME=org.apache.hadoop.fs.FsShell
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
dfsadmin)
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.DFSAdmin
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
envvars)
echo "JAVA_HOME='${JAVA_HOME}'"
echo "HADOOP_HDFS_HOME='${HADOOP_HDFS_HOME}'"
echo "HDFS_DIR='${HDFS_DIR}'"
echo "HDFS_LIB_JARS_DIR='${HDFS_LIB_JARS_DIR}'"
echo "HADOOP_CONF_DIR='${HADOOP_CONF_DIR}'"
echo "HADOOP_TOOLS_HOME='${HADOOP_TOOLS_HOME}'"
echo "HADOOP_TOOLS_DIR='${HADOOP_TOOLS_DIR}'"
echo "HADOOP_TOOLS_LIB_JARS_DIR='${HADOOP_TOOLS_LIB_JARS_DIR}'"
exit 0
;;
erasurecode)
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.erasurecode.ECCli
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
fetchdt)
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher
;;
fsck)
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.DFSck
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
getconf)
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.GetConf
;;
groups)
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.GetGroups
;;
haadmin)
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.DFSHAAdmin
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
journalnode)
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
HADOOP_CLASSNAME='org.apache.hadoop.hdfs.qjournal.server.JournalNode'
hadoop_debug "Appending HADOOP_JOURNALNODE_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_JOURNALNODE_OPTS}"
;;
jmxget)
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.JMXGet
;;
lsSnapshottableDir)
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.snapshot.LsSnapshottableDir
;;
mover)
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.server.mover.Mover
hadoop_debug "Appending HADOOP_MOVER_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_MOVER_OPTS}"
;;
namenode)
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
HADOOP_CLASSNAME='org.apache.hadoop.hdfs.server.namenode.NameNode'
hadoop_debug "Appending HADOOP_NAMENODE_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NAMENODE_OPTS}"
hadoop_add_param HADOOP_OPTS hdfs.audit.logger "-Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER}"
;;
nfs3)
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
if [[ -n "${HADOOP_PRIVILEGED_NFS_USER}" ]]; then
HADOOP_SUBCMD_SECURESERVICE="true"
HADOOP_SUBCMD_SECUREUSER="${HADOOP_PRIVILEGED_NFS_USER}"
# backward compatiblity
HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_NFS3_PID_DIR}"
HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_NFS3_LOG_DIR}"
hadoop_debug "Appending HADOOP_NFS3_OPTS onto HADOOP_OPTS"
hadoop_debug "Appending HADOOP_NFS3_SECURE_EXTRA_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NFS3_OPTS} ${HADOOP_NFS3_SECURE_EXTRA_OPTS}"
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.nfs.nfs3.PrivilegedNfsGatewayStarter
else
hadoop_debug "Appending HADOOP_NFS3_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NFS3_OPTS}"
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.nfs.nfs3.Nfs3
fi
;;
oev)
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
;;
oiv)
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB
;;
oiv_legacy)
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
;;
portmap)
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
HADOOP_CLASSNAME=org.apache.hadoop.portmap.Portmap
hadoop_debug "Appending HADOOP_PORTMAP_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_PORTMAP_OPTS}"
;;
secondarynamenode)
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
HADOOP_CLASSNAME='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode'
hadoop_debug "Appending HADOOP_SECONDARYNAMENODE_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_SECONDARYNAMENODE_OPTS}"
hadoop_add_param HADOOP_OPTS hdfs.audit.logger "-Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER}"
;;
snapshotDiff)
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff
;;
storagepolicies)
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.StoragePolicyAdmin
;;
version)
HADOOP_CLASSNAME=org.apache.hadoop.util.VersionInfo
;;
zkfc)
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
HADOOP_CLASSNAME='org.apache.hadoop.hdfs.tools.DFSZKFailoverController'
hadoop_debug "Appending HADOOP_ZKFC_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_ZKFC_OPTS}"
;;
*)
HADOOP_CLASSNAME="${subcmd}"
if ! hadoop_validate_classname "${HADOOP_CLASSNAME}"; then
hadoop_exit_with_usage 1
fi
;;
esac
}
# let's locate libexec...
@ -81,232 +273,76 @@ if [[ $# = 0 ]]; then
hadoop_exit_with_usage 1
fi
COMMAND=$1
HADOOP_SUBCMD=$1
shift
case ${COMMAND} in
balancer)
supportdaemonization="true"
CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer
hadoop_debug "Appending HADOOP_BALANCER_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_BALANCER_OPTS}"
;;
cacheadmin)
CLASS=org.apache.hadoop.hdfs.tools.CacheAdmin
;;
classpath)
hadoop_do_classpath_subcommand CLASS "$@"
;;
crypto)
CLASS=org.apache.hadoop.hdfs.tools.CryptoAdmin
;;
datanode)
supportdaemonization="true"
# Determine if we're starting a secure datanode, and
# if so, redefine appropriate variables
if [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then
secure_service="true"
secure_user="${HADOOP_SECURE_DN_USER}"
HADOOP_SUBCMD_ARGS=("$@")
# backward compatiblity
HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_DN_PID_DIR}"
HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_DN_LOG_DIR}"
if declare -f hdfs_subcommand_"${HADOOP_SUBCMD}" >/dev/null 2>&1; then
hadoop_debug "Calling dynamically: hdfs_subcommand_${HADOOP_SUBCMD} ${HADOOP_SUBCMD_ARGS[*]}"
"hdfs_subcommand_${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}"
else
hdfscmd_case "${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}"
fi
hadoop_debug "Appending HADOOP_DATANODE_OPTS onto HADOOP_OPTS"
hadoop_debug "Appending HADOOP_DN_SECURE_EXTRA_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_DATANODE_OPTS} ${HADOOP_DN_SECURE_EXTRA_OPTS}"
CLASS="org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter"
else
hadoop_debug "Appending HADOOP_DATANODE_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_DATANODE_OPTS}"
CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode'
fi
;;
debug)
CLASS='org.apache.hadoop.hdfs.tools.DebugAdmin'
;;
dfs)
CLASS=org.apache.hadoop.fs.FsShell
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
dfsadmin)
CLASS=org.apache.hadoop.hdfs.tools.DFSAdmin
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
envvars)
echo "JAVA_HOME='${JAVA_HOME}'"
echo "HADOOP_HDFS_HOME='${HADOOP_HDFS_HOME}'"
echo "HDFS_DIR='${HDFS_DIR}'"
echo "HDFS_LIB_JARS_DIR='${HDFS_LIB_JARS_DIR}'"
echo "HADOOP_CONF_DIR='${HADOOP_CONF_DIR}'"
echo "HADOOP_TOOLS_HOME='${HADOOP_TOOLS_HOME}'"
echo "HADOOP_TOOLS_DIR='${HADOOP_TOOLS_DIR}'"
echo "HADOOP_TOOLS_LIB_JARS_DIR='${HADOOP_TOOLS_LIB_JARS_DIR}'"
exit 0
;;
erasurecode)
CLASS=org.apache.hadoop.hdfs.tools.erasurecode.ECCli
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
fetchdt)
CLASS=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher
;;
fsck)
CLASS=org.apache.hadoop.hdfs.tools.DFSck
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
getconf)
CLASS=org.apache.hadoop.hdfs.tools.GetConf
;;
groups)
CLASS=org.apache.hadoop.hdfs.tools.GetGroups
;;
haadmin)
CLASS=org.apache.hadoop.hdfs.tools.DFSHAAdmin
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
journalnode)
supportdaemonization="true"
CLASS='org.apache.hadoop.hdfs.qjournal.server.JournalNode'
hadoop_debug "Appending HADOOP_JOURNALNODE_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_JOURNALNODE_OPTS}"
;;
jmxget)
CLASS=org.apache.hadoop.hdfs.tools.JMXGet
;;
lsSnapshottableDir)
CLASS=org.apache.hadoop.hdfs.tools.snapshot.LsSnapshottableDir
;;
mover)
supportdaemonization="true"
CLASS=org.apache.hadoop.hdfs.server.mover.Mover
hadoop_debug "Appending HADOOP_MOVER_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_MOVER_OPTS}"
;;
namenode)
supportdaemonization="true"
CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode'
hadoop_debug "Appending HADOOP_NAMENODE_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NAMENODE_OPTS}"
hadoop_add_param HADOOP_OPTS hdfs.audit.logger "-Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER}"
;;
nfs3)
supportdaemonization="true"
if [[ -n "${HADOOP_PRIVILEGED_NFS_USER}" ]]; then
secure_service="true"
secure_user="${HADOOP_PRIVILEGED_NFS_USER}"
# backward compatiblity
HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_NFS3_PID_DIR}"
HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_NFS3_LOG_DIR}"
hadoop_debug "Appending HADOOP_NFS3_OPTS onto HADOOP_OPTS"
hadoop_debug "Appending HADOOP_NFS3_SECURE_EXTRA_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NFS3_OPTS} ${HADOOP_NFS3_SECURE_EXTRA_OPTS}"
CLASS=org.apache.hadoop.hdfs.nfs.nfs3.PrivilegedNfsGatewayStarter
else
hadoop_debug "Appending HADOOP_NFS3_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NFS3_OPTS}"
CLASS=org.apache.hadoop.hdfs.nfs.nfs3.Nfs3
fi
;;
oev)
CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
;;
oiv)
CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB
;;
oiv_legacy)
CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
;;
portmap)
supportdaemonization="true"
CLASS=org.apache.hadoop.portmap.Portmap
hadoop_debug "Appending HADOOP_PORTMAP_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_PORTMAP_OPTS}"
;;
secondarynamenode)
supportdaemonization="true"
CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode'
hadoop_debug "Appending HADOOP_SECONDARYNAMENODE_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_SECONDARYNAMENODE_OPTS}"
hadoop_add_param HADOOP_OPTS hdfs.audit.logger "-Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER}"
;;
snapshotDiff)
CLASS=org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff
;;
storagepolicies)
CLASS=org.apache.hadoop.hdfs.tools.StoragePolicyAdmin
;;
version)
CLASS=org.apache.hadoop.util.VersionInfo
;;
zkfc)
supportdaemonization="true"
CLASS='org.apache.hadoop.hdfs.tools.DFSZKFailoverController'
hadoop_debug "Appending HADOOP_ZKFC_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_ZKFC_OPTS}"
;;
*)
CLASS="${COMMAND}"
if ! hadoop_validate_classname "${CLASS}"; then
hadoop_exit_with_usage 1
fi
;;
esac
hadoop_verify_user "${COMMAND}"
hadoop_verify_user "${HADOOP_SUBCMD}"
if [[ ${HADOOP_SLAVE_MODE} = true ]]; then
hadoop_common_slave_mode_execute "${HADOOP_HDFS_HOME}/bin/hdfs" "${HADOOP_USER_PARAMS[@]}"
exit $?
fi
if [[ -n "${secure_service}" ]]; then
HADOOP_SECURE_USER="${secure_user}"
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
HADOOP_SECURE_USER="${HADOOP_SUBCMD_SECUREUSER}"
hadoop_verify_secure_prereq
hadoop_setup_secure_service
priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
priv_errfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.err"
priv_pidfile="${HADOOP_PID_DIR}/privileged-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
priv_errfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.err"
priv_pidfile="${HADOOP_PID_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
else
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
fi
if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
# shellcheck disable=SC2034
HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
if [[ -n "${secure_service}" ]]; then
if [[ -n "${HADOOP_SUBCMD_SECURESERVICE}" ]]; then
# shellcheck disable=SC2034
HADOOP_LOGFILE="hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
HADOOP_LOGFILE="hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log"
else
# shellcheck disable=SC2034
HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log"
fi
fi
hadoop_finalize
if [[ -n "${supportdaemonization}" ]]; then
if [[ -n "${secure_service}" ]]; then
if [[ "${HADOOP_SUBCMD_SUPPORTDAEMONIZATION}" = true ]]; then
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
hadoop_secure_daemon_handler \
"${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}"\
"${daemon_pidfile}" "${daemon_outfile}" \
"${priv_pidfile}" "${priv_outfile}" "${priv_errfile}" "$@"
"${HADOOP_DAEMON_MODE}" \
"${HADOOP_SUBCMD}" \
"${HADOOP_CLASSNAME}" \
"${daemon_pidfile}" \
"${daemon_outfile}" \
"${priv_pidfile}" \
"${priv_outfile}" \
"${priv_errfile}" \
"${HADOOP_SUBCMD_ARGS[@]}"
else
hadoop_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}"\
"${daemon_pidfile}" "${daemon_outfile}" "$@"
hadoop_daemon_handler \
"${HADOOP_DAEMON_MODE}" \
"${HADOOP_SUBCMD}" \
"${HADOOP_CLASSNAME}" \
"${daemon_pidfile}" \
"${daemon_outfile}" \
"${HADOOP_SUBCMD_ARGS[@]}"
fi
exit $?
else
# shellcheck disable=SC2086
hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"
hadoop_java_exec "${HADOOP_SUBCMD}" "${HADOOP_CLASSNAME}" "${HADOOP_SUBCMD_ARGS[@]}"
fi

View File

@ -1,4 +1,4 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.

View File

@ -0,0 +1,58 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
setup() {
TMP="${BATS_TEST_DIRNAME}/../../../target/test-dir/bats.$$.${RANDOM}"
mkdir -p "${TMP}"
TMP=$(cd -P -- "${TMP}" >/dev/null && pwd -P)
export TMP
TESTBINDIR="${BATS_TEST_DIRNAME}"
HADOOP_LIBEXEC_DIR=${TESTBINDIR}/../../main/bin
HADOOP_LIBEXEC_DIR=$(cd -P -- "${HADOOP_LIBEXEC_DIR}" >/dev/null && pwd -P)
# shellcheck disable=SC2034
HADOOP_SHELL_SCRIPT_DEBUG=true
unset HADOOP_CONF_DIR
# we unset both of these for bw compat
unset HADOOP_HOME
unset HADOOP_PREFIX
echo "bindir: ${TESTBINDIR}" 2>&1
mkdir -p "${TMP}"
# shellcheck disable=SC2034
QATESTMODE=true
# shellcheck disable=SC1090
. "${BATS_TEST_DIRNAME}/../../../../../hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh"
pushd "${TMP}" >/dev/null
}
teardown() {
popd >/dev/null
rm -rf "${TMP}"
}
strstr() {
if [ "${1#*$2}" != "${1}" ]; then
echo true
else
echo false
fi
}

View File

@ -0,0 +1,78 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load hdfs-functions_test_helper
# the loading of shell profiles are tested elseswhere
# this only tests the specific subcommand parts
subcommandsetup () {
export HADOOP_LIBEXEC_DIR="${TMP}/libexec"
export HADOOP_CONF_DIR="${TMP}/conf"
mkdir -p "${HADOOP_LIBEXEC_DIR}"
echo ". \"${BATS_TEST_DIRNAME}/../../../../../hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh\"" > "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
cat <<-'TOKEN' >> "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
hdfs_subcommand_sub () {
echo "unittest"
exit 0
}
hdfs_subcommand_cacheadmin ()
{
echo cacheadmin
exit 0
}
hdfs_subcommand_envcheck ()
{
echo ${HADOOP_SHELL_EXECNAME}
exit 0
}
hdfs_subcommand_multi ()
{
echo $2
exit 0
}
TOKEN
chmod a+rx "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
}
@test "hdfs_subcommand (addition)" {
subcommandsetup
run "${BATS_TEST_DIRNAME}/../../main/bin/hdfs" sub
echo ">${output}<"
[ "${output}" = unittest ]
}
@test "hdfs_subcommand (substitute)" {
subcommandsetup
run "${BATS_TEST_DIRNAME}/../../main/bin/hdfs" cacheadmin
echo ">${output}<"
[ "${output}" = cacheadmin ]
}
@test "hdfs_subcommand (envcheck)" {
subcommandsetup
run "${BATS_TEST_DIRNAME}/../../main/bin/hdfs" envcheck
[ "${output}" = hdfs ]
}
@test "hdfs_subcommand (multiparams)" {
subcommandsetup
run "${BATS_TEST_DIRNAME}/../../main/bin/hdfs" multi 1 2
[ "${output}" = 2 ]
}

View File

@ -0,0 +1,43 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
targetdir=../../../target
mkdir -p ${targetdir}/surefire-reports ${targetdir}/tap
batsexe=$(which bats) 2>/dev/null
if [[ -z ${batsexe} ]]; then
echo "not ok - no bats executable found" > "${targetdir}/tap/shelltest.tap"
echo ""
echo ""
echo "ERROR: bats not installed. Skipping bash tests."
echo "ERROR: Please install bats as soon as possible."
echo ""
echo ""
exit 0
fi
for j in *.bats; do
echo Running bats -t "${j}"
bats -t "${j}" 2>&1 | tee "${targetdir}/tap/${j}.tap"
result=${PIPESTATUS[0]}
((exitcode=exitcode+result))
done
if [[ ${exitcode} -gt 0 ]]; then
exit 1
fi
exit 0

View File

@ -16,13 +16,15 @@
# limitations under the License.
MYNAME="${BASH_SOURCE-$0}"
HADOOP_SHELL_EXECNAME="${MYNAME##*/}"
## @description build up the mapred command's usage text.
## @audience public
## @stability stable
## @replaceable no
function hadoop_usage
{
hadoop_add_subcommand "archive" "create a hadoop archive"
hadoop_add_subcommand "archive-logs" "combine aggregated logs into hadoop archives"
hadoop_add_subcommand "classpath" "prints the class path needed for running mapreduce subcommands"
hadoop_add_subcommand "distcp" "copy file or directories recursively"
hadoop_add_subcommand "envvars" "display computed Hadoop environment variables"
hadoop_add_subcommand "historyserver" "run job history servers as a standalone daemon"
hadoop_add_subcommand "hsadmin" "job history server admin interface"
@ -31,7 +33,85 @@ function hadoop_usage
hadoop_add_subcommand "queue" "get information regarding JobQueues"
hadoop_add_subcommand "sampler" "sampler"
hadoop_add_subcommand "version" "print the version"
hadoop_generate_usage "${MYNAME}" true
hadoop_generate_usage "${HADOOP_SHELL_EXECNAME}" true
}
## @description Default command handler for hadoop command
## @audience public
## @stability stable
## @replaceable no
## @param CLI arguments
function mapredcmd_case
{
subcmd=$1
shift
case ${subcmd} in
mradmin|jobtracker|tasktracker|groups)
hadoop_error "Sorry, the ${subcmd} command is no longer supported."
hadoop_error "You may find similar functionality with the \"yarn\" shell command."
hadoop_exit_with_usage 1
;;
classpath)
hadoop_do_classpath_subcommand HADOOP_CLASSNAME "$@"
;;
envvars)
echo "JAVA_HOME='${JAVA_HOME}'"
echo "HADOOP_MAPRED_HOME='${HADOOP_MAPRED_HOME}'"
echo "MAPRED_DIR='${MAPRED_DIR}'"
echo "MAPRED_LIB_JARS_DIR='${MAPRED_LIB_JARS_DIR}'"
echo "HADOOP_CONF_DIR='${HADOOP_CONF_DIR}'"
echo "HADOOP_TOOLS_HOME='${HADOOP_TOOLS_HOME}'"
echo "HADOOP_TOOLS_DIR='${HADOOP_TOOLS_DIR}'"
echo "HADOOP_TOOLS_LIB_JARS_DIR='${HADOOP_TOOLS_LIB_JARS_DIR}'"
exit 0
;;
historyserver)
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
HADOOP_CLASSNAME=org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer
hadoop_debug "Appending HADOOP_JOB_HISTORYSERVER_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_JOB_HISTORYSERVER_OPTS}"
if [ -n "${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}" ]; then
# shellcheck disable=SC2034
HADOOP_HEAPSIZE_MAX="${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}"
fi
HADOOP_DAEMON_ROOT_LOGGER=${HADOOP_JHS_LOGGER:-$HADOOP_DAEMON_ROOT_LOGGER}
;;
hsadmin)
HADOOP_CLASSNAME=org.apache.hadoop.mapreduce.v2.hs.client.HSAdmin
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
job)
HADOOP_CLASSNAME=org.apache.hadoop.mapred.JobClient
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
pipes)
HADOOP_CLASSNAME=org.apache.hadoop.mapred.pipes.Submitter
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
queue)
HADOOP_CLASSNAME=org.apache.hadoop.mapred.JobQueueClient
;;
sampler)
HADOOP_CLASSNAME=org.apache.hadoop.mapred.lib.InputSampler
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
version)
HADOOP_CLASSNAME=org.apache.hadoop.util.VersionInfo
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
*)
HADOOP_CLASSNAME="${subcmd}"
if ! hadoop_validate_classname "${HADOOP_CLASSNAME}"; then
hadoop_exit_with_usage 1
fi
;;
esac
}
bin=$(cd -P -- "$(dirname -- "${MYNAME}")" >/dev/null && pwd -P)
@ -58,125 +138,71 @@ if [ $# = 0 ]; then
hadoop_exit_with_usage 1
fi
COMMAND=$1
HADOOP_SUBCMD=$1
shift
case ${COMMAND} in
mradmin|jobtracker|tasktracker|groups)
hadoop_error "Sorry, the ${COMMAND} command is no longer supported."
hadoop_error "You may find similar functionality with the \"yarn\" shell command."
hadoop_exit_with_usage 1
;;
archive)
CLASS=org.apache.hadoop.tools.HadoopArchives
hadoop_add_to_classpath_tools hadoop-archives
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
archive-logs)
CLASS=org.apache.hadoop.tools.HadoopArchiveLogs
hadoop_add_to_classpath_tools hadoop-archive-logs
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
classpath)
hadoop_do_classpath_subcommand CLASS "$@"
;;
distcp)
CLASS=org.apache.hadoop.tools.DistCp
hadoop_add_to_classpath_tools hadoop-distcp
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
envvars)
echo "JAVA_HOME='${JAVA_HOME}'"
echo "HADOOP_MAPRED_HOME='${HADOOP_MAPRED_HOME}'"
echo "MAPRED_DIR='${MAPRED_DIR}'"
echo "MAPRED_LIB_JARS_DIR='${MAPRED_LIB_JARS_DIR}'"
echo "HADOOP_CONF_DIR='${HADOOP_CONF_DIR}'"
echo "HADOOP_TOOLS_HOME='${HADOOP_TOOLS_HOME}'"
echo "HADOOP_TOOLS_DIR='${HADOOP_TOOLS_DIR}'"
echo "HADOOP_TOOLS_LIB_JARS_DIR='${HADOOP_TOOLS_LIB_JARS_DIR}'"
exit 0
;;
historyserver)
supportdaemonization="true"
CLASS=org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer
hadoop_debug "Appending HADOOP_JOB_HISTORYSERVER_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_JOB_HISTORYSERVER_OPTS}"
if [ -n "${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}" ]; then
HADOOP_HEAPSIZE_MAX="${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}"
fi
HADOOP_DAEMON_ROOT_LOGGER=${HADOOP_JHS_LOGGER:-$HADOOP_DAEMON_ROOT_LOGGER}
;;
hsadmin)
CLASS=org.apache.hadoop.mapreduce.v2.hs.client.HSAdmin
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
job)
CLASS=org.apache.hadoop.mapred.JobClient
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
pipes)
CLASS=org.apache.hadoop.mapred.pipes.Submitter
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
queue)
CLASS=org.apache.hadoop.mapred.JobQueueClient
;;
sampler)
CLASS=org.apache.hadoop.mapred.lib.InputSampler
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
version)
CLASS=org.apache.hadoop.util.VersionInfo
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
*)
CLASS="${COMMAND}"
if ! hadoop_validate_classname "${CLASS}"; then
hadoop_exit_with_usage 1
fi
;;
esac
HADOOP_SUBCMD_ARGS=("$@")
hadoop_verify_user "${COMMAND}"
if declare -f mapred_subcommand_"${HADOOP_SUBCMD}" >/dev/null 2>&1; then
hadoop_debug "Calling dynamically: mapred_subcommand_${HADOOP_SUBCMD} ${HADOOP_SUBCMD_ARGS[*]}"
"mapred_subcommand_${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}"
else
mapredcmd_case "${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}"
fi
hadoop_verify_user "${HADOOP_SUBCMD}"
if [[ ${HADOOP_SLAVE_MODE} = true ]]; then
hadoop_common_slave_mode_execute "${HADOOP_MAPRED_HOME}/bin/mapred" "${HADOOP_USER_PARAMS[@]}"
exit $?
fi
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
HADOOP_SECURE_USER="${HADOOP_SUBCMD_SECUREUSER}"
hadoop_verify_secure_prereq
hadoop_setup_secure_service
priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
priv_errfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.err"
priv_pidfile="${HADOOP_PID_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
else
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
fi
if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
# shellcheck disable=SC2034
HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
hadoop_add_param HADOOP_OPTS mapred.jobsummary.logger "-Dmapred.jobsummary.logger=${HADOOP_ROOT_LOGGER}"
# shellcheck disable=SC2034
HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log"
fi
hadoop_finalize
if [[ -n "${supportdaemonization}" ]]; then
if [[ -n "${secure_service}" ]]; then
hadoop_secure_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}"\
"${CLASS}" "${daemon_pidfile}" "${daemon_outfile}" \
"${priv_pidfile}" "${priv_outfile}" "${priv_errfile}" "$@"
if [[ "${HADOOP_SUBCMD_SUPPORTDAEMONIZATION}" = true ]]; then
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
hadoop_secure_daemon_handler \
"${HADOOP_DAEMON_MODE}" \
"${HADOOP_SUBCMD}" \
"${HADOOP_CLASSNAME}" \
"${daemon_pidfile}" \
"${daemon_outfile}" \
"${priv_pidfile}" \
"${priv_outfile}" \
"${priv_errfile}" \
"${HADOOP_SUBCMD_ARGS[@]}"
else
hadoop_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}" \
"${daemon_pidfile}" "${daemon_outfile}" "$@"
hadoop_daemon_handler \
"${HADOOP_DAEMON_MODE}" \
"${HADOOP_SUBCMD}" \
"${HADOOP_CLASSNAME}" \
"${daemon_pidfile}" \
"${daemon_outfile}" \
"${HADOOP_SUBCMD_ARGS[@]}"
fi
exit $?
else
hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"
hadoop_java_exec "${HADOOP_SUBCMD}" "${HADOOP_CLASSNAME}" "${HADOOP_SUBCMD_ARGS[@]}"
fi

View File

@ -1,4 +1,4 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.

View File

@ -0,0 +1,39 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if ! declare -f mapred_subcommand_archive-logs >/dev/null 2>/dev/null; then
if [[ "${HADOOP_SHELL_EXECNAME}" = mapred ]]; then
hadoop_add_subcommand "archive-logs" "combine aggregated logs into hadoop archives"
fi
# this can't be indented otherwise shelldocs won't get it
## @description archive-logs command for mapred
## @audience public
## @stability stable
## @replaceable yes
function mapred_subcommand_archive-logs
{
# shellcheck disable=SC2034
HADOOP_CLASSNAME=org.apache.hadoop.tools.HadoopArchiveLogs
hadoop_add_to_classpath_tools hadoop-archive-logs
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
}
fi

View File

@ -0,0 +1,58 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if ! declare -f hadoop_subcommand_archive >/dev/null 2>/dev/null; then
if [[ "${HADOOP_SHELL_EXECNAME}" = hadoop ]]; then
hadoop_add_subcommand "archive" "create a Hadoop archive"
fi
# this can't be indented otherwise shelldocs won't get it
## @description archive command for hadoop (and mapred)
## @audience public
## @stability stable
## @replaceable yes
function hadoop_subcommand_archive
{
# shellcheck disable=SC2034
HADOOP_CLASSNAME=org.apache.hadoop.tools.HadoopArchives
hadoop_add_to_classpath_tools hadoop-archives
}
fi
if ! declare -f mapred_subcommand_archive >/dev/null 2>/dev/null; then
if [[ "${HADOOP_SHELL_EXECNAME}" = mapred ]]; then
hadoop_add_subcommand "archive" "create a Hadoop archive"
fi
# this can't be indented otherwise shelldocs won't get it
## @description archive command for mapred (calls hadoop version)
## @audience public
## @stability stable
## @replaceable yes
function mapred_subcommand_archive
{
# shellcheck disable=SC2034
HADOOP_CLASSNAME=org.apache.hadoop.tools.HadoopArchives
hadoop_add_to_classpath_tools hadoop-archives
}
fi

View File

@ -0,0 +1,62 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if ! declare -f hadoop_subcommand_distcp >/dev/null 2>/dev/null; then
if [[ "${HADOOP_SHELL_EXECNAME}" = hadoop ]]; then
hadoop_add_subcommand "distcp" "copy file or directories recursively"
fi
# this can't be indented otherwise shelldocs won't get it
## @description distcp command for hadoop
## @audience public
## @stability stable
## @replaceable yes
function hadoop_subcommand_distcp
{
# shellcheck disable=SC2034
HADOOP_CLASSNAME=org.apache.hadoop.tools.DistCp
hadoop_add_to_classpath_tools hadoop-distcp
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
}
fi
if ! declare -f mapred_subcommand_distcp >/dev/null 2>/dev/null; then
if [[ "${HADOOP_SHELL_EXECNAME}" = mapred ]]; then
hadoop_add_subcommand "distcp" "copy file or directories recursively"
fi
# this can't be indented otherwise shelldocs won't get it
## @description distcp command for mapred (calls hadoop)
## @audience public
## @stability stable
## @replaceable yes
function mapred_subcommand_distcp
{
# shellcheck disable=SC2034
HADOOP_CLASSNAME=org.apache.hadoop.tools.DistCp
hadoop_add_to_classpath_tools hadoop-distcp
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
}
fi

View File

@ -0,0 +1,39 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if ! declare -f hadoop_subcommand_distch >/dev/null 2>/dev/null; then
if [[ "${HADOOP_SHELL_EXECNAME}" = hadoop ]]; then
hadoop_add_subcommand "distch" "distributed metadata changer"
fi
# this can't be indented otherwise shelldocs won't get it
## @description distch command for hadoop
## @audience public
## @stability stable
## @replaceable yes
function hadoop_subcommand_distch
{
# shellcheck disable=SC2034
HADOOP_CLASSNAME=org.apache.hadoop.tools.DistCh
hadoop_add_to_classpath_tools hadoop-extras
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
}
fi

View File

@ -0,0 +1,36 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if ! declare -f hadoop_subcommand_gridmix >/dev/null 2>/dev/null; then
if [[ "${HADOOP_SHELL_EXECNAME}" = hadoop ]]; then
hadoop_add_subcommand "gridmix" "submit a mix of synthetic job, modeling a profiled from production load"
fi
## @description gridmix command for hadoop
## @audience public
## @stability stable
## @replaceable yes
function hadoop_subcommand_gridmix
{
# shellcheck disable=SC2034
HADOOP_CLASSNAME=org.apache.hadoop.mapred.gridmix.Gridmix
hadoop_add_to_classpath_tools hadoop-rumen
hadoop_add_to_classpath_tools hadoop-gridmix
}
fi

View File

@ -0,0 +1,58 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if ! declare -f hadoop_subcommand_rumenfolder >/dev/null 2>/dev/null; then
if [[ "${HADOOP_SHELL_EXECNAME}" = hadoop ]]; then
hadoop_add_subcommand "rumenfolder" "scale a rumen input trace"
fi
## @description rumenfolder command for hadoop
## @audience public
## @stability stable
## @replaceable yes
function hadoop_subcommand_rumenfolder
{
# shellcheck disable=SC2034
HADOOP_CLASSNAME=org.apache.hadoop.tools.rumen.Folder
hadoop_add_to_classpath_tools hadoop-rumen
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
}
fi
if ! declare -f hadoop_subcommand_rumentrace >/dev/null 2>/dev/null; then
if [[ "${HADOOP_SHELL_EXECNAME}" = hadoop ]]; then
hadoop_add_subcommand "rumentrace" "convert logs into a rumen trace"
fi
## @description rumentrace command for hadoop
## @audience public
## @stability stable
## @replaceable yes
function hadoop_subcommand_rumentrace
{
# shellcheck disable=SC2034
HADOOP_CLASSNAME=org.apache.hadoop.tools.rumen.TraceBuilder
hadoop_add_to_classpath_tools hadoop-rumen
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
}
fi

View File

@ -50,8 +50,8 @@ but a simulation of the scheduler elects to run that task on a remote
rack, the simulator requires a runtime its input cannot provide.
To fill in these gaps, Rumen performs a statistical analysis of the
digest to estimate the variables the trace doesn't supply. Rumen traces
drive both Gridmix (a benchmark of Hadoop MapReduce clusters) and Mumak
(a simulator for the JobTracker).
drive both Gridmix (a benchmark of Hadoop MapReduce clusters) and SLS
(a simulator for the resource manager scheduler).
$H3 Motivation
@ -126,16 +126,13 @@ can use the `Folder` utility to fold the current trace to the
desired length. The remaining part of this section explains these
utilities in detail.
Examples in this section assumes that certain libraries are present
in the java CLASSPATH. See [Dependencies](#Dependencies) for more details.
$H3 Trace Builder
$H4 Command
```
java org.apache.hadoop.tools.rumen.TraceBuilder [options] <jobtrace-output> <topology-output> <inputs>
hadoop rumentrace [options] <jobtrace-output> <topology-output> <inputs>
```
This command invokes the `TraceBuilder` utility of *Rumen*.
@ -205,12 +202,8 @@ $H4 Options
$H4 Example
*Rumen* expects certain library *JARs* to be present in the *CLASSPATH*.
One simple way to run Rumen is to use
`$HADOOP_HOME/bin/hadoop jar` command to run it as example below.
```
java org.apache.hadoop.tools.rumen.TraceBuilder \
hadoop rumentrace \
file:///tmp/job-trace.json \
file:///tmp/job-topology.json \
hdfs:///tmp/hadoop-yarn/staging/history/done_intermediate/testuser
@ -229,7 +222,7 @@ $H3 Folder
$H4 Command
```
java org.apache.hadoop.tools.rumen.Folder [options] [input] [output]
hadoop rumenfolder [options] [input] [output]
```
This command invokes the `Folder` utility of
@ -350,7 +343,7 @@ $H4 Examples
$H5 Folding an input trace with 10 hours of total runtime to generate an output trace with 1 hour of total runtime
```
java org.apache.hadoop.tools.rumen.Folder \
hadoop rumenfolder \
-output-duration 1h \
-input-cycle 20m \
file:///tmp/job-trace.json \
@ -362,7 +355,7 @@ If the folded jobs are out of order then the command will bail out.
$H5 Folding an input trace with 10 hours of total runtime to generate an output trace with 1 hour of total runtime and tolerate some skewness
```
java org.apache.hadoop.tools.rumen.Folder \
hadoop rumenfolder \
-output-duration 1h \
-input-cycle 20m \
-allow-missorting \
@ -378,7 +371,7 @@ If the folded jobs are out of order, then atmost
$H5 Folding an input trace with 10 hours of total runtime to generate an output trace with 1 hour of total runtime in debug mode
```
java org.apache.hadoop.tools.rumen.Folder \
hadoop rumenfolder \
-output-duration 1h \
-input-cycle 20m \
-debug -temp-directory file:///tmp/debug \
@ -395,7 +388,7 @@ up.
$H5 Folding an input trace with 10 hours of total runtime to generate an output trace with 1 hour of total runtime with custom concentration.
```
java org.apache.hadoop.tools.rumen.Folder \
hadoop rumenfolder \
-output-duration 1h \
-input-cycle 20m \
-concentration 2 \
@ -421,18 +414,3 @@ Look at the MapReduce
<a href="https://issues.apache.org/jira/browse/MAPREDUCE/component/12313617">rumen-component</a>
for further details.
$H3 Dependencies
*Rumen* expects certain library *JARs* to be present in the *CLASSPATH*.
One simple way to run Rumen is to use
`hadoop jar` command to run it as example below.
```
$HADOOP_HOME/bin/hadoop jar \
$HADOOP_HOME/share/hadoop/tools/lib/hadoop-rumen-2.5.1.jar \
org.apache.hadoop.tools.rumen.TraceBuilder \
file:///tmp/job-trace.json \
file:///tmp/job-topology.json \
hdfs:///tmp/hadoop-yarn/staging/history/done_intermediate/testuser
```

View File

@ -91,8 +91,7 @@ public class DumpTypedBytes implements Tool {
}
private void printUsage() {
System.out.println("Usage: $HADOOP_HOME/bin/hadoop jar hadoop-streaming.jar"
+ " dumptb <glob-pattern>");
System.out.println("Usage: mapred streaming dumptb <glob-pattern>");
System.out.println(" Dumps all files that match the given pattern to " +
"standard output as typed bytes.");
System.out.println(" The files can be text or sequence files");

View File

@ -56,8 +56,7 @@ public class HadoopStreaming {
}
private static void printUsage() {
System.out.println("Usage: $HADOOP_HOME/bin/hadoop jar hadoop-streaming.jar"
+ " [options]");
System.out.println("Usage: mapred streaming [options]");
System.out.println("Options:");
System.out.println(" dumptb <glob-pattern> Dumps all files that match the"
+ " given pattern to ");

View File

@ -89,8 +89,7 @@ public class LoadTypedBytes implements Tool {
}
private void printUsage() {
System.out.println("Usage: $HADOOP_HOME/bin/hadoop jar hadoop-streaming.jar"
+ " loadtb <path>");
System.out.println("Usage: mapred streaming loadtb <path>");
System.out.println(" Reads typed bytes from standard input" +
" and stores them in a sequence file in");
System.out.println(" the specified path");

View File

@ -0,0 +1,55 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if ! declare -f mapred_subcommand_streaming >/dev/null 2>/dev/null; then
if [[ "${HADOOP_SHELL_EXECNAME}" = mapred ]]; then
hadoop_add_subcommand "streaming" "launch a mapreduce streaming job"
fi
## @description streaming command for mapred
## @audience public
## @stability stable
## @replaceable yes
function mapred_subcommand_streaming
{
declare jarname
declare oldifs
# shellcheck disable=SC2034
HADOOP_CLASSNAME=org.apache.hadoop.util.RunJar
hadoop_add_to_classpath_tools hadoop-streaming
# locate the streaming jar so we have something to
# give to RunJar
oldifs=${IFS}
IFS=:
for jarname in ${CLASSPATH}; do
if [[ "${jarname}" =~ hadoop-streaming-[0-9] ]]; then
HADOOP_SUBCMD_ARGS=("${jarname}" "${HADOOP_SUBCMD_ARGS[@]}")
break
fi
done
IFS=${oldifs}
hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
}
fi

View File

@ -62,7 +62,7 @@ Hadoop Streaming
Hadoop streaming is a utility that comes with the Hadoop distribution. The utility allows you to create and run Map/Reduce jobs with any executable or script as the mapper and/or the reducer. For example:
hadoop jar hadoop-streaming-${project.version}.jar \
mapred streaming \
-input myInputDirs \
-output myOutputDir \
-mapper /bin/cat \
@ -88,7 +88,7 @@ Streaming supports streaming command options as well as [generic command options
**Note:** Be sure to place the generic options before the streaming options, otherwise the command will fail. For an example, see [Making Archives Available to Tasks](#Making_Archives_Available_to_Tasks).
hadoop command [genericOptions] [streamingOptions]
mapred streaming [genericOptions] [streamingOptions]
The Hadoop streaming command options are listed here:
@ -115,7 +115,7 @@ $H3 Specifying a Java Class as the Mapper/Reducer
You can supply a Java class as the mapper and/or the reducer.
hadoop jar hadoop-streaming-${project.version}.jar \
mapred streaming \
-input myInputDirs \
-output myOutputDir \
-inputformat org.apache.hadoop.mapred.KeyValueTextInputFormat \
@ -128,7 +128,7 @@ $H3 Packaging Files With Job Submissions
You can specify any executable as the mapper and/or the reducer. The executables do not need to pre-exist on the machines in the cluster; however, if they don't, you will need to use "-file" option to tell the framework to pack your executable files as a part of job submission. For example:
hadoop jar hadoop-streaming-${project.version}.jar \
mapred streaming \
-input myInputDirs \
-output myOutputDir \
-mapper myPythonScript.py \
@ -139,7 +139,7 @@ The above example specifies a user defined Python executable as the mapper. The
In addition to executable files, you can also package other auxiliary files (such as dictionaries, configuration files, etc) that may be used by the mapper and/or the reducer. For example:
hadoop jar hadoop-streaming-${project.version}.jar \
mapred streaming \
-input myInputDirs \
-output myOutputDir \
-mapper myPythonScript.py \
@ -216,7 +216,7 @@ $H4 Specifying the Number of Reducers
To specify the number of reducers, for example two, use:
hadoop jar hadoop-streaming-${project.version}.jar \
mapred streaming \
-D mapreduce.job.reduces=2 \
-input myInputDirs \
-output myOutputDir \
@ -229,7 +229,7 @@ As noted earlier, when the Map/Reduce framework reads a line from the stdout of
However, you can customize this default. You can specify a field separator other than the tab character (the default), and you can specify the nth (n \>= 1) character rather than the first character in a line (the default) as the separator between the key and value. For example:
hadoop jar hadoop-streaming-${project.version}.jar \
mapred streaming \
-D stream.map.output.field.separator=. \
-D stream.num.map.output.key.fields=4 \
-input myInputDirs \
@ -279,7 +279,7 @@ User can specify a different symlink name for -archives using \#.
In this example, the input.txt file has two lines specifying the names of the two files: cachedir.jar/cache.txt and cachedir.jar/cache2.txt. "cachedir.jar" is a symlink to the archived directory, which has the files "cache.txt" and "cache2.txt".
hadoop jar hadoop-streaming-${project.version}.jar \
mapred streaming \
-archives 'hdfs://hadoop-nn1.example.com/user/me/samples/cachefile/cachedir.jar' \
-D mapreduce.job.maps=1 \
-D mapreduce.job.reduces=1 \
@ -325,7 +325,7 @@ $H3 Hadoop Partitioner Class
Hadoop has a library class, [KeyFieldBasedPartitioner](../api/org/apache/hadoop/mapred/lib/KeyFieldBasedPartitioner.html), that is useful for many applications. This class allows the Map/Reduce framework to partition the map outputs based on certain key fields, not the whole keys. For example:
hadoop jar hadoop-streaming-${project.version}.jar \
mapred streaming \
-D stream.map.output.field.separator=. \
-D stream.num.map.output.key.fields=4 \
-D map.output.key.field.separator=. \
@ -375,7 +375,7 @@ $H3 Hadoop Comparator Class
Hadoop has a library class, [KeyFieldBasedComparator](../api/org/apache/hadoop/mapreduce/lib/partition/KeyFieldBasedComparator.html), that is useful for many applications. This class provides a subset of features provided by the Unix/GNU Sort. For example:
hadoop jar hadoop-streaming-${project.version}.jar \
mapred streaming \
-D mapreduce.job.output.key.comparator.class=org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedComparator \
-D stream.map.output.field.separator=. \
-D stream.num.map.output.key.fields=4 \
@ -411,7 +411,7 @@ Hadoop has a library package called [Aggregate](../api/org/apache/hadoop/mapred/
To use Aggregate, simply specify "-reducer aggregate":
hadoop jar hadoop-streaming-${project.version}.jar \
mapred streaming \
-input myInputDirs \
-output myOutputDir \
-mapper myAggregatorForKeyCount.py \
@ -444,7 +444,7 @@ $H3 Hadoop Field Selection Class
Hadoop has a library class, [FieldSelectionMapReduce](../api/org/apache/hadoop/mapred/lib/FieldSelectionMapReduce.html), that effectively allows you to process text data like the unix "cut" utility. The map function defined in the class treats each input key/value pair as a list of fields. You can specify the field separator (the default is the tab character). You can select an arbitrary list of fields as the map output key, and an arbitrary list of fields as the map output value. Similarly, the reduce function defined in the class treats each input key/value pair as a list of fields. You can select an arbitrary list of fields as the reduce output key, and an arbitrary list of fields as the reduce output value. For example:
hadoop jar hadoop-streaming-${project.version}.jar \
mapred streaming \
-D mapreduce.map.output.key.field.separator=. \
-D mapreduce.partition.keypartitioner.options=-k1,2 \
-D mapreduce.fieldsel.data.field.separator=. \
@ -495,7 +495,7 @@ Using an alias will not work, but variable substitution is allowed as shown in t
charlie 80
dan 75
$ c2='cut -f2'; hadoop jar hadoop-streaming-${project.version}.jar \
$ c2='cut -f2'; mapred streaming \
-D mapreduce.job.name='Experiment' \
-input /user/me/samples/student_marks \
-output /user/me/samples/student_out \
@ -525,7 +525,7 @@ $H3 How do I specify multiple input directories?
You can specify multiple input directories with multiple '-input' options:
hadoop jar hadoop-streaming-${project.version}.jar \
mapred streaming \
-input '/user/foo/dir1' -input '/user/foo/dir2' \
(rest of the command)
@ -541,7 +541,7 @@ $H3 How do I parse XML documents using streaming?
You can use the record reader StreamXmlRecordReader to process XML documents.
hadoop jar hadoop-streaming-${project.version}.jar \
mapred streaming \
-inputreader "StreamXmlRecord,begin=BEGIN_STRING,end=END_STRING" \
(rest of the command)

View File

@ -16,7 +16,12 @@
# limitations under the License.
MYNAME="${BASH_SOURCE-$0}"
HADOOP_SHELL_EXECNAME="${MYNAME##*/}"
## @description build up the yarn command's usage text.
## @audience public
## @stability stable
## @replaceable no
function hadoop_usage
{
hadoop_add_option "--buildpaths" "attempt to add class files from build tree"
@ -46,54 +51,36 @@ function hadoop_usage
hadoop_add_subcommand "timelineserver" "run the timeline server"
hadoop_add_subcommand "top" "view cluster information"
hadoop_add_subcommand "version" "print the version"
hadoop_generate_usage "${MYNAME}" true
hadoop_generate_usage "${HADOOP_SHELL_EXECNAME}" true
}
## @description Default command handler for yarn command
## @audience public
## @stability stable
## @replaceable no
## @param CLI arguments
function yarncmd_case
{
subcmd=$1
shift
# let's locate libexec...
if [[ -n "${HADOOP_HOME}" ]]; then
HADOOP_DEFAULT_LIBEXEC_DIR="${HADOOP_HOME}/libexec"
else
bin=$(cd -P -- "$(dirname -- "${MYNAME}")" >/dev/null && pwd -P)
HADOOP_DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$HADOOP_DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
exit 1
fi
# if no args specified, show usage
if [[ $# = 0 ]]; then
hadoop_exit_with_usage 1
fi
# get arguments
COMMAND=$1
shift
case "${COMMAND}" in
case ${subcmd} in
application|applicationattempt|container)
CLASS=org.apache.hadoop.yarn.client.cli.ApplicationCLI
HADOOP_CLASSNAME=org.apache.hadoop.yarn.client.cli.ApplicationCLI
hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}"
set -- "${COMMAND}" "$@"
set -- "${subcmd}" "$@"
;;
classpath)
hadoop_do_classpath_subcommand CLASS "$@"
hadoop_do_classpath_subcommand HADOOP_CLASSNAME "$@"
;;
cluster)
CLASS=org.apache.hadoop.yarn.client.cli.ClusterCLI
HADOOP_CLASSNAME=org.apache.hadoop.yarn.client.cli.ClusterCLI
hadoop_debug "Append YARN_CLIENT_OPTS onto YARN_OPTS"
YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
;;
daemonlog)
CLASS=org.apache.hadoop.log.LogLevel
HADOOP_CLASSNAME=org.apache.hadoop.log.LogLevel
hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}"
;;
@ -109,30 +96,30 @@ case "${COMMAND}" in
exit 0
;;
jar)
CLASS=org.apache.hadoop.util.RunJar
HADOOP_CLASSNAME=org.apache.hadoop.util.RunJar
hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}"
;;
historyserver)
supportdaemonization="true"
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
echo "DEPRECATED: Use of this command to start the timeline server is deprecated." 1>&2
echo "Instead use the timelineserver command for it." 1>&2
echo "Starting the History Server anyway..." 1>&2
CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
;;
logs)
CLASS=org.apache.hadoop.yarn.client.cli.LogsCLI
HADOOP_CLASSNAME=org.apache.hadoop.yarn.client.cli.LogsCLI
hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}"
;;
node)
CLASS=org.apache.hadoop.yarn.client.cli.NodeCLI
HADOOP_CLASSNAME=org.apache.hadoop.yarn.client.cli.NodeCLI
hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}"
;;
nodemanager)
supportdaemonization="true"
CLASS='org.apache.hadoop.yarn.server.nodemanager.NodeManager'
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.nodemanager.NodeManager'
hadoop_debug "Append YARN_NODEMANAGER_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${YARN_NODEMANAGER_OPTS}"
# Backwards compatibility
@ -141,58 +128,61 @@ case "${COMMAND}" in
fi
;;
proxyserver)
supportdaemonization="true"
CLASS='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer'
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer'
hadoop_debug "Append YARN_PROXYSERVER_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${YARN_PROXYSERVER_OPTS}"
# Backwards compatibility
if [[ -n "${YARN_PROXYSERVER_HEAPSIZE}" ]]; then
# shellcheck disable=SC2034
HADOOP_HEAPSIZE_MAX="${YARN_PROXYSERVER_HEAPSIZE}"
fi
;;
queue)
CLASS=org.apache.hadoop.yarn.client.cli.QueueCLI
HADOOP_CLASSNAME=org.apache.hadoop.yarn.client.cli.QueueCLI
hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}"
;;
resourcemanager)
supportdaemonization="true"
CLASS='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager'
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager'
HADOOP_OPTS="${HADOOP_OPTS} ${YARN_RESOURCEMANAGER_OPTS}"
hadoop_debug "Append YARN_RESOURCEMANAGER_OPTS onto HADOOP_OPTS"
# Backwards compatibility
if [[ -n "${YARN_RESOURCEMANAGER_HEAPSIZE}" ]]; then
# shellcheck disable=SC2034
HADOOP_HEAPSIZE_MAX="${YARN_RESOURCEMANAGER_HEAPSIZE}"
fi
;;
rmadmin)
CLASS='org.apache.hadoop.yarn.client.cli.RMAdminCLI'
HADOOP_CLASSNAME='org.apache.hadoop.yarn.client.cli.RMAdminCLI'
hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}"
;;
scmadmin)
CLASS='org.apache.hadoop.yarn.client.SCMAdmin'
HADOOP_CLASSNAME='org.apache.hadoop.yarn.client.SCMAdmin'
hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}"
;;
sharedcachemanager)
supportdaemonization="true"
CLASS='org.apache.hadoop.yarn.server.sharedcachemanager.SharedCacheManager'
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.sharedcachemanager.SharedCacheManager'
hadoop_debug "Append YARN_SHAREDCACHEMANAGER_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${YARN_SHAREDCACHEMANAGER_OPTS}"
;;
timelineserver)
supportdaemonization="true"
CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
hadoop_debug "Append YARN_TIMELINESERVER_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${YARN_TIMELINESERVER_OPTS}"
# Backwards compatibility
if [[ -n "${YARN_TIMELINESERVER_HEAPSIZE}" ]]; then
# shellcheck disable=SC2034
HADOOP_HEAPSIZE_MAX="${YARN_TIMELINESERVER_HEAPSIZE}"
fi
;;
version)
CLASS=org.apache.hadoop.util.VersionInfo
HADOOP_CLASSNAME=org.apache.hadoop.util.VersionInfo
hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}"
;;
@ -223,46 +213,108 @@ case "${COMMAND}" in
set -- "${args[@]}"
fi
fi
CLASS=org.apache.hadoop.yarn.client.cli.TopCLI
HADOOP_CLASSNAME=org.apache.hadoop.yarn.client.cli.TopCLI
hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS"
HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}"
;;
*)
CLASS="${COMMAND}"
if ! hadoop_validate_classname "${CLASS}"; then
HADOOP_CLASSNAME="${subcmd}"
if ! hadoop_validate_classname "${HADOOP_CLASSNAME}"; then
hadoop_exit_with_usage 1
fi
;;
esac
esac
}
hadoop_verify_user "${COMMAND}"
# let's locate libexec...
if [[ -n "${HADOOP_HOME}" ]]; then
HADOOP_DEFAULT_LIBEXEC_DIR="${HADOOP_HOME}/libexec"
else
bin=$(cd -P -- "$(dirname -- "${MYNAME}")" >/dev/null && pwd -P)
HADOOP_DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$HADOOP_DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
exit 1
fi
# if no args specified, show usage
if [[ $# = 0 ]]; then
hadoop_exit_with_usage 1
fi
# get arguments
HADOOP_SUBCMD=$1
shift
HADOOP_SUBCMD_ARGS=("$@")
if declare -f yarn_subcommand_"${HADOOP_SUBCMD}" >/dev/null 2>&1; then
hadoop_debug "Calling dynamically: yarn_subcommand_${HADOOP_SUBCMD} ${HADOOP_SUBCMD_ARGS[*]}"
"yarn_subcommand_${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}"
else
yarncmd_case "${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}"
fi
hadoop_verify_user "${HADOOP_SUBCMD}"
if [[ ${HADOOP_SLAVE_MODE} = true ]]; then
hadoop_common_slave_mode_execute "${HADOOP_YARN_HOME}/bin/yarn" "${HADOOP_USER_PARAMS[@]}"
exit $?
fi
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
HADOOP_SECURE_USER="${HADOOP_SUBCMD_SECUREUSER}"
hadoop_verify_secure_prereq
hadoop_setup_secure_service
priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
priv_errfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.err"
priv_pidfile="${HADOOP_PID_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
else
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
fi
if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
# shellcheck disable=SC2034
HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
# shellcheck disable=SC2034
HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log"
fi
hadoop_finalize
if [[ -n "${supportdaemonization}" ]]; then
if [[ -n "${secure_service}" ]]; then
hadoop_secure_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" \
"${CLASS}" "${daemon_pidfile}" "${daemon_outfile}" \
"${priv_pidfile}" "${priv_outfile}" "${priv_errfile}" "$@"
if [[ "${HADOOP_SUBCMD_SUPPORTDAEMONIZATION}" = true ]]; then
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
hadoop_secure_daemon_handler \
"${HADOOP_DAEMON_MODE}" \
"${HADOOP_SUBCMD}" \
"${HADOOP_CLASSNAME}" \
"${daemon_pidfile}" \
"${daemon_outfile}" \
"${priv_pidfile}" \
"${priv_outfile}" \
"${priv_errfile}" \
"${HADOOP_SUBCMD_ARGS[@]}"
else
hadoop_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}" \
"${daemon_pidfile}" "${daemon_outfile}" "$@"
hadoop_daemon_handler \
"${HADOOP_DAEMON_MODE}" \
"${HADOOP_SUBCMD}" \
"${HADOOP_CLASSNAME}" \
"${daemon_pidfile}" \
"${daemon_outfile}" \
"${HADOOP_SUBCMD_ARGS[@]}"
fi
exit $?
else
hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"
# shellcheck disable=SC2086
hadoop_java_exec "${HADOOP_SUBCMD}" "${HADOOP_CLASSNAME}" "${HADOOP_SUBCMD_ARGS[@]}"
fi

View File

@ -1,4 +1,4 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.