HADOOP-14202. fix jsvc/secure user var inconsistencies

Signed-off-by: John Zhuge <jzhuge@apache.org>
This commit is contained in:
Allen Wittenauer 2017-03-29 09:56:25 -07:00
parent ad24464be8
commit 96cbb4fce2
17 changed files with 360 additions and 354 deletions

View File

@ -67,13 +67,11 @@ function hadoopcmd_case
hadoop_error ""
#try to locate hdfs and if present, delegate to it.
if [[ -f "${HADOOP_HDFS_HOME}/bin/hdfs" ]]; then
# shellcheck disable=SC2086
exec "${HADOOP_HDFS_HOME}/bin/hdfs" \
--config "${HADOOP_CONF_DIR}" "${subcmd}" "$@"
--config "${HADOOP_CONF_DIR}" "${subcmd}" "$@"
elif [[ -f "${HADOOP_HOME}/bin/hdfs" ]]; then
# shellcheck disable=SC2086
exec "${HADOOP_HOME}/bin/hdfs" \
--config "${HADOOP_CONF_DIR}" "${subcmd}" "$@"
--config "${HADOOP_CONF_DIR}" "${subcmd}" "$@"
else
hadoop_error "HADOOP_HDFS_HOME not found!"
exit 1
@ -174,9 +172,9 @@ else
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$HADOOP_DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
# shellcheck source=./hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
@ -201,7 +199,7 @@ if hadoop_need_reexec hadoop "${HADOOP_SUBCMD}"; then
exit $?
fi
hadoop_verify_user "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}"
hadoop_verify_user_perm "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}"
HADOOP_SUBCMD_ARGS=("$@")
@ -221,60 +219,5 @@ fi
hadoop_subcommand_opts "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}"
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
HADOOP_SECURE_USER="${HADOOP_SUBCMD_SECUREUSER}"
hadoop_subcommand_secure_opts "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}"
hadoop_verify_secure_prereq
hadoop_setup_secure_service
priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
priv_errfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.err"
priv_pidfile="${HADOOP_PID_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
else
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
fi
if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
# shellcheck disable=SC2034
HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
# shellcheck disable=SC2034
HADOOP_LOGFILE="hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log"
else
# shellcheck disable=SC2034
HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log"
fi
fi
hadoop_finalize
if [[ "${HADOOP_SUBCMD_SUPPORTDAEMONIZATION}" = true ]]; then
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
hadoop_secure_daemon_handler \
"${HADOOP_DAEMON_MODE}" \
"${HADOOP_SUBCMD}" \
"${HADOOP_CLASSNAME}" \
"${daemon_pidfile}" \
"${daemon_outfile}" \
"${priv_pidfile}" \
"${priv_outfile}" \
"${priv_errfile}" \
"${HADOOP_SUBCMD_ARGS[@]}"
else
hadoop_daemon_handler \
"${HADOOP_DAEMON_MODE}" \
"${HADOOP_SUBCMD}" \
"${HADOOP_CLASSNAME}" \
"${daemon_pidfile}" \
"${daemon_outfile}" \
"${HADOOP_SUBCMD_ARGS[@]}"
fi
exit $?
else
# shellcheck disable=SC2086
hadoop_java_exec "${HADOOP_SUBCMD}" "${HADOOP_CLASSNAME}" "${HADOOP_SUBCMD_ARGS[@]}"
fi
# everything is in globals at this point, so call the generic handler
hadoop_generic_java_subcmd_handler

View File

@ -38,8 +38,10 @@
# settings that might be different between daemons & interactive
# you must be this high to ride the ride
if [[ -z "${BASH_VERSINFO}" ]] || [[ "${BASH_VERSINFO}" -lt 3 ]]; then
echo "Hadoop requires bash v3 or better. Sorry."
if [[ -z "${BASH_VERSINFO[0]}" ]] \
|| [[ "${BASH_VERSINFO[0]}" -lt 3 ]] \
|| [[ "${BASH_VERSINFO[0]}" -eq 3 && "${BASH_VERSINFO[1]}" -lt 2 ]]; then
echo "bash v3.2+ is required. Sorry."
exit 1
fi
@ -55,8 +57,10 @@ fi
# get our functions defined for usage later
if [[ -n "${HADOOP_COMMON_HOME}" ]] &&
[[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-functions.sh" ]]; then
# shellcheck source=./hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh
. "${HADOOP_COMMON_HOME}/libexec/hadoop-functions.sh"
elif [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-functions.sh" ]]; then
# shellcheck source=./hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh
. "${HADOOP_LIBEXEC_DIR}/hadoop-functions.sh"
else
echo "ERROR: Unable to exec ${HADOOP_LIBEXEC_DIR}/hadoop-functions.sh." 1>&2
@ -68,8 +72,10 @@ hadoop_deprecate_envvar HADOOP_PREFIX HADOOP_HOME
# allow overrides of the above and pre-defines of the below
if [[ -n "${HADOOP_COMMON_HOME}" ]] &&
[[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-layout.sh" ]]; then
# shellcheck source=./hadoop-common-project/hadoop-common/src/main/bin/hadoop-layout.sh.example
. "${HADOOP_COMMON_HOME}/libexec/hadoop-layout.sh"
elif [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-layout.sh" ]]; then
# shellcheck source=./hadoop-common-project/hadoop-common/src/main/bin/hadoop-layout.sh.example
. "${HADOOP_LIBEXEC_DIR}/hadoop-layout.sh"
fi

View File

@ -115,9 +115,9 @@ function hadoop_verify_entry
[[ ${!1} =~ \ ${2}\ ]]
}
## @description Check if we are running with privilege
## @description Check if we are running with priv
## @description by default, this implementation looks for
## @description EUID=0. For OSes that have true privilege
## @description EUID=0. For OSes that have true priv
## @description separation, this should be something more complex
## @audience private
## @stability evolving
@ -144,16 +144,13 @@ function hadoop_su
{
declare user=$1
shift
declare idret
if hadoop_privilege_check; then
id -u "${user}" >/dev/null 2>&1
idret=$?
if [[ ${idret} != 0 ]]; then
if hadoop_verify_user_resolves user; then
su -l "${user}" -- "$@"
else
hadoop_error "ERROR: Refusing to run as root: ${user} account is not found. Aborting."
return 1
else
su -l "${user}" -- "$@"
fi
else
"$@"
@ -194,15 +191,23 @@ function hadoop_uservar_su
declare uprogram
declare ucommand
declare uvar
declare svar
if hadoop_privilege_check; then
uvar=$(hadoop_get_verify_uservar "${program}" "${command}")
uvar=$(hadoop_build_custom_subcmd_var "${program}" "${command}" USER)
svar=$(hadoop_build_custom_subcmd_var "${program}" "${command}" SECURE_USER)
if [[ -n "${!uvar}" ]]; then
hadoop_su "${!uvar}" "$@"
elif [[ -n "${!svar}" ]]; then
## if we are here, then SECURE_USER with no USER defined
## we are already privileged, so just run the command and hope
## for the best
"$@"
else
hadoop_error "ERROR: Attempting to launch ${program} ${command} as root"
hadoop_error "ERROR: but there is no ${uvar} defined. Aborting launch."
hadoop_error "ERROR: Attempting to operate on ${program} ${command} as root"
hadoop_error "ERROR: but there is no ${uvar} defined. Aborting operation."
return 1
fi
else
@ -477,9 +482,11 @@ function hadoop_bootstrap
# by default, we have not been self-re-execed
HADOOP_REEXECED_CMD=false
# shellcheck disable=SC2034
HADOOP_SUBCMD_SECURESERVICE=false
# This is the default we claim in hadoop-env.sh
JSVC_HOME=${JSVC_HOME:-"/usr/bin"}
# usage output set to zero
hadoop_reset_usage
@ -533,7 +540,7 @@ function hadoop_exec_hadoopenv
if [[ -z "${HADOOP_ENV_PROCESSED}" ]]; then
if [[ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]]; then
export HADOOP_ENV_PROCESSED=true
# shellcheck disable=SC1090
# shellcheck source=./hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh
. "${HADOOP_CONF_DIR}/hadoop-env.sh"
fi
fi
@ -789,10 +796,8 @@ function hadoop_populate_workers_file
local workersfile=$1
shift
if [[ -f "${workersfile}" ]]; then
# shellcheck disable=2034
HADOOP_WORKERS="${workersfile}"
elif [[ -f "${HADOOP_CONF_DIR}/${workersfile}" ]]; then
# shellcheck disable=2034
HADOOP_WORKERS="${HADOOP_CONF_DIR}/${workersfile}"
else
hadoop_error "ERROR: Cannot find hosts file \"${workersfile}\""
@ -2128,16 +2133,47 @@ function hadoop_secure_daemon_handler
esac
}
## @description Get the environment variable used to validate users
## @description autodetect whether this is a priv subcmd
## @description by whether or not a priv user var exists
## @description and if HADOOP_SECURE_CLASSNAME is defined
## @audience public
## @stability stable
## @replaceable yes
## @param command
## @param subcommand
## @return string
function hadoop_get_verify_uservar
## @return 1 = not priv
## @return 0 = priv
function hadoop_detect_priv_subcmd
{
declare program=$1
declare command=$2
if [[ -z "${HADOOP_SECURE_CLASSNAME}" ]]; then
hadoop_debug "No secure classname defined."
return 1
fi
uvar=$(hadoop_build_custom_subcmd_var "${program}" "${command}" SECURE_USER)
if [[ -z "${!uvar}" ]]; then
hadoop_debug "No secure user defined."
return 1
fi
return 0
}
## @description Build custom subcommand var
## @audience public
## @stability stable
## @replaceable yes
## @param command
## @param subcommand
## @param customid
## @return string
function hadoop_build_custom_subcmd_var
{
declare program=$1
declare command=$2
declare custom=$3
declare uprogram
declare ucommand
@ -2150,7 +2186,25 @@ function hadoop_get_verify_uservar
ucommand=${command^^}
fi
echo "${uprogram}_${ucommand}_USER"
echo "${uprogram}_${ucommand}_${custom}"
}
## @description Verify that username in a var converts to user id
## @audience public
## @stability stable
## @replaceable yes
## @param userstring
## @return 0 for success
## @return 1 for failure
function hadoop_verify_user_resolves
{
declare userstr=$1
if [[ -z ${userstr} || -z ${!userstr} ]] ; then
return 1
fi
id -u "${!userstr}" >/dev/null 2>&1
}
## @description Verify that ${USER} is allowed to execute the
@ -2162,13 +2216,13 @@ function hadoop_get_verify_uservar
## @param subcommand
## @return return 0 on success
## @return exit 1 on failure
function hadoop_verify_user
function hadoop_verify_user_perm
{
declare program=$1
declare command=$2
declare uvar
uvar=$(hadoop_get_verify_uservar "${program}" "${command}")
uvar=$(hadoop_build_custom_subcmd_var "${program}" "${command}" USER)
if [[ -n ${!uvar} ]]; then
if [[ ${!uvar} != "${USER}" ]]; then
@ -2204,7 +2258,7 @@ function hadoop_need_reexec
# otherwise no, don't re-exec and let the system deal with it.
if hadoop_privilege_check; then
uvar=$(hadoop_get_verify_uservar "${program}" "${command}")
uvar=$(hadoop_build_custom_subcmd_var "${program}" "${command}" USER)
if [[ -n ${!uvar} ]]; then
if [[ ${!uvar} != "${USER}" ]]; then
return 0
@ -2217,7 +2271,7 @@ function hadoop_need_reexec
## @description Add custom (program)_(command)_OPTS to HADOOP_OPTS.
## @description Also handles the deprecated cases from pre-3.x.
## @audience public
## @stability stable
## @stability evolving
## @replaceable yes
## @param program
## @param subcommand
@ -2239,6 +2293,10 @@ function hadoop_subcommand_opts
# case the contents of vars. This is faster than
# calling tr.
## We don't call hadoop_build_custom_subcmd_var here
## since we need to construct this for the deprecation
## cases. For Hadoop 4.x, this needs to get cleaned up.
if [[ -z "${BASH_VERSINFO[0]}" ]] \
|| [[ "${BASH_VERSINFO[0]}" -lt 4 ]]; then
uprogram=$(echo "${program}" | tr '[:lower:]' '[:upper:]')
@ -2288,23 +2346,10 @@ function hadoop_subcommand_secure_opts
return 1
fi
# bash 4 and up have built-in ways to upper and lower
# case the contents of vars. This is faster than
# calling tr.
if [[ -z "${BASH_VERSINFO[0]}" ]] \
|| [[ "${BASH_VERSINFO[0]}" -lt 4 ]]; then
uprogram=$(echo "${program}" | tr '[:lower:]' '[:upper:]')
ucommand=$(echo "${command}" | tr '[:lower:]' '[:upper:]')
else
uprogram=${program^^}
ucommand=${command^^}
fi
# HDFS_DATANODE_SECURE_EXTRA_OPTS
# HDFS_NFS3_SECURE_EXTRA_OPTS
# ...
uvar="${uprogram}_${ucommand}_SECURE_EXTRA_OPTS"
uvar=$(hadoop_build_custom_subcmd_var "${program}" "${command}" SECURE_EXTRA_OPTS)
if [[ -n ${!uvar} ]]; then
hadoop_debug "Appending ${uvar} onto HADOOP_OPTS"
@ -2353,7 +2398,6 @@ function hadoop_parse_args
hadoop_debug "hadoop_parse_args: processing $1"
case $1 in
--buildpaths)
# shellcheck disable=SC2034
HADOOP_ENABLE_BUILD_PATHS=true
shift
((HADOOP_PARSE_COUNTER=HADOOP_PARSE_COUNTER+1))
@ -2364,7 +2408,6 @@ function hadoop_parse_args
shift
((HADOOP_PARSE_COUNTER=HADOOP_PARSE_COUNTER+2))
if [[ -d "${confdir}" ]]; then
# shellcheck disable=SC2034
HADOOP_CONF_DIR="${confdir}"
elif [[ -z "${confdir}" ]]; then
hadoop_error "ERROR: No parameter provided for --config "
@ -2387,7 +2430,6 @@ function hadoop_parse_args
;;
--debug)
shift
# shellcheck disable=SC2034
HADOOP_SHELL_SCRIPT_DEBUG=true
((HADOOP_PARSE_COUNTER=HADOOP_PARSE_COUNTER+1))
;;
@ -2396,7 +2438,6 @@ function hadoop_parse_args
;;
--hostnames)
shift
# shellcheck disable=SC2034
HADOOP_WORKER_NAMES="$1"
shift
((HADOOP_PARSE_COUNTER=HADOOP_PARSE_COUNTER+2))
@ -2460,3 +2501,99 @@ function hadoop_sed_escape
{
sed -e 's/[\/&]/\\&/g' <<< "$1"
}
## @description Handle subcommands from main program entries
## @audience private
## @stability evolving
## @replaceable yes
function hadoop_generic_java_subcmd_handler
{
declare priv_outfile
declare priv_errfile
declare priv_pidfile
declare daemon_outfile
declare daemon_pidfile
declare secureuser
# The default/expected way to determine if a daemon is going to run in secure
# mode is defined by hadoop_detect_priv_subcmd. If this returns true
# then setup the secure user var and tell the world we're in secure mode
if hadoop_detect_priv_subcmd "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}"; then
HADOOP_SUBCMD_SECURESERVICE=true
secureuser=$(hadoop_build_custom_subcmd_var "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}" SECURE_USER)
if ! hadoop_verify_user_resolves "${secureuser}"; then
hadoop_error "ERROR: User defined in ${secureuser} (${!secureuser}) does not exist. Aborting."
exit 1
fi
HADOOP_SECURE_USER="${!secureuser}"
fi
# check if we're running in secure mode.
# breaking this up from the above lets 3rd parties
# do things a bit different
# secure services require some extra setup
# if yes, then we need to define all of the priv and daemon stuff
# if not, then we just need to define daemon stuff.
# note the daemon vars are purposefully different between the two
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
hadoop_subcommand_secure_opts "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}"
hadoop_verify_secure_prereq
hadoop_setup_secure_service
priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
priv_errfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.err"
priv_pidfile="${HADOOP_PID_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
else
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
fi
# are we actually in daemon mode?
# if yes, use the daemon logger and the appropriate log file.
if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
HADOOP_LOGFILE="hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log"
else
HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log"
fi
fi
# finish defining the environment: system properties, env vars, class paths, etc.
hadoop_finalize
# do the hard work of launching a daemon or just executing our interactive
# java class
if [[ "${HADOOP_SUBCMD_SUPPORTDAEMONIZATION}" = true ]]; then
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
hadoop_secure_daemon_handler \
"${HADOOP_DAEMON_MODE}" \
"${HADOOP_SUBCMD}" \
"${HADOOP_SECURE_CLASSNAME}" \
"${daemon_pidfile}" \
"${daemon_outfile}" \
"${priv_pidfile}" \
"${priv_outfile}" \
"${priv_errfile}" \
"${HADOOP_SUBCMD_ARGS[@]}"
else
hadoop_daemon_handler \
"${HADOOP_DAEMON_MODE}" \
"${HADOOP_SUBCMD}" \
"${HADOOP_CLASSNAME}" \
"${daemon_pidfile}" \
"${daemon_outfile}" \
"${HADOOP_SUBCMD_ARGS[@]}"
fi
exit $?
else
hadoop_java_exec "${HADOOP_SUBCMD}" "${HADOOP_CLASSNAME}" "${HADOOP_SUBCMD_ARGS[@]}"
fi
}

View File

@ -269,7 +269,7 @@ esac
#
# When running a secure daemon, the default value of HADOOP_IDENT_STRING
# ends up being a bit bogus. Therefore, by default, the code will
# replace HADOOP_IDENT_STRING with HADOOP_SECURE_xx_USER. If one wants
# replace HADOOP_IDENT_STRING with HADOOP_xx_SECURE_USER. If one wants
# to keep HADOOP_IDENT_STRING untouched, then uncomment this line.
# export HADOOP_SECURE_IDENT_PRESERVE="true"
@ -325,20 +325,13 @@ esac
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
# This will replace the hadoop.id.str Java property in secure mode.
# export HADOOP_SECURE_DN_USER=hdfs
# export HDFS_DATANODE_SECURE_USER=hdfs
# Supplemental options for secure datanodes
# By default, Hadoop uses jsvc which needs to know to launch a
# server jvm.
# export HDFS_DATANODE_SECURE_EXTRA_OPTS="-jvm server"
# Where datanode log files are stored in the secure data environment.
# This will replace the hadoop.log.dir Java property in secure mode.
# export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_SECURE_LOG_DIR}
# Where datanode pid files are stored in the secure data environment.
# export HADOOP_SECURE_DN_PID_DIR=${HADOOP_SECURE_PID_DIR}
###
# NFS3 Gateway specific parameters
###
@ -361,7 +354,7 @@ esac
# On privileged gateways, user to run the gateway as after dropping privileges
# This will replace the hadoop.id.str Java property in secure mode.
# export HADOOP_PRIVILEGED_NFS_USER=nfsserver
# export HDFS_NFS3_SECURE_USER=nfsserver
###
# ZKFailoverController specific parameters

View File

@ -168,9 +168,9 @@ Some products such as Apache Oozie which access the services of Hadoop on behalf
Because the DataNode data transfer protocol does not use the Hadoop RPC framework, DataNodes must authenticate themselves using privileged ports which are specified by `dfs.datanode.address` and `dfs.datanode.http.address`. This authentication is based on the assumption that the attacker won't be able to get root privileges on DataNode hosts.
When you execute the `hdfs datanode` command as root, the server process binds privileged ports at first, then drops privilege and runs as the user account specified by `HADOOP_SECURE_DN_USER`. This startup process uses [the jsvc program](https://commons.apache.org/proper/commons-daemon/jsvc.html "Link to Apache Commons Jsvc") installed to `JSVC_HOME`. You must specify `HADOOP_SECURE_DN_USER` and `JSVC_HOME` as environment variables on start up (in `hadoop-env.sh`).
When you execute the `hdfs datanode` command as root, the server process binds privileged ports at first, then drops privilege and runs as the user account specified by `HDFS_DATANODE_SECURE_USER`. This startup process uses [the jsvc program](https://commons.apache.org/proper/commons-daemon/jsvc.html "Link to Apache Commons Jsvc") installed to `JSVC_HOME`. You must specify `HDFS_DATANODE_SECURE_USER` and `JSVC_HOME` as environment variables on start up (in `hadoop-env.sh`).
As of version 2.6.0, SASL can be used to authenticate the data transfer protocol. In this configuration, it is no longer required for secured clusters to start the DataNode as root using `jsvc` and bind to privileged ports. To enable SASL on data transfer protocol, set `dfs.data.transfer.protection` in hdfs-site.xml, set a non-privileged port for `dfs.datanode.address`, set `dfs.http.policy` to `HTTPS_ONLY` and make sure the `HADOOP_SECURE_DN_USER` environment variable is not defined. Note that it is not possible to use SASL on data transfer protocol if `dfs.datanode.address` is set to a privileged port. This is required for backwards-compatibility reasons.
As of version 2.6.0, SASL can be used to authenticate the data transfer protocol. In this configuration, it is no longer required for secured clusters to start the DataNode as root using `jsvc` and bind to privileged ports. To enable SASL on data transfer protocol, set `dfs.data.transfer.protection` in hdfs-site.xml, set a non-privileged port for `dfs.datanode.address`, set `dfs.http.policy` to `HTTPS_ONLY` and make sure the `HDFS_DATANODE_SECURE_USER` environment variable is not defined. Note that it is not possible to use SASL on data transfer protocol if `dfs.datanode.address` is set to a privileged port. This is required for backwards-compatibility reasons.
In order to migrate an existing cluster that used root authentication to start using SASL instead, first ensure that version 2.6.0 or later has been deployed to all cluster nodes as well as any external applications that need to connect to the cluster. Only versions 2.6.0 and later of the HDFS client can connect to a DataNode that uses SASL for authentication of data transfer protocol, so it is vital that all callers have the correct version before migrating. After version 2.6.0 or later has been deployed everywhere, update configuration of any external applications to enable SASL. If an HDFS client is enabled for SASL, then it can connect successfully to a DataNode running with either root authentication or SASL authentication. Changing configuration for all clients guarantees that subsequent configuration changes on DataNodes will not disrupt the applications. Finally, each individual DataNode can be migrated by changing its configuration and restarting. It is acceptable to have a mix of some DataNodes running with root authentication and some DataNodes running with SASL authentication temporarily during this migration period, because an HDFS client enabled for SASL can connect to both.
@ -293,7 +293,7 @@ The following settings allow configuring SSL access to the NameNode web UI (opti
| `dfs.encrypt.data.transfer.algorithm` | | optionally set to `3des` or `rc4` when using data encryption to control encryption algorithm |
| `dfs.encrypt.data.transfer.cipher.suites` | | optionally set to `AES/CTR/NoPadding` to activate AES encryption when using data encryption |
| `dfs.encrypt.data.transfer.cipher.key.bitlength` | | optionally set to `128`, `192` or `256` to control key bit length when using AES with data encryption |
| `dfs.data.transfer.protection` | | `authentication` : authentication only; `integrity` : integrity check in addition to authentication; `privacy` : data encryption in addition to integrity This property is unspecified by default. Setting this property enables SASL for authentication of data transfer protocol. If this is enabled, then `dfs.datanode.address` must use a non-privileged port, `dfs.http.policy` must be set to `HTTPS_ONLY` and the `HADOOP_SECURE_DN_USER` environment variable must be undefined when starting the DataNode process. |
| `dfs.data.transfer.protection` | | `authentication` : authentication only; `integrity` : integrity check in addition to authentication; `privacy` : data encryption in addition to integrity This property is unspecified by default. Setting this property enables SASL for authentication of data transfer protocol. If this is enabled, then `dfs.datanode.address` must use a non-privileged port, `dfs.http.policy` must be set to `HTTPS_ONLY` and the `HDFS_DATANODE_SECURE_USER` environment variable must be undefined when starting the DataNode process. |
### WebHDFS
@ -413,7 +413,7 @@ Set the environment variable `HADOOP_JAAS_DEBUG` to `true`.
export HADOOP_JAAS_DEBUG=true
```
Edit the `log4j.properties` file to log Hadoop's security package at `DEBUG` level.
Edit the `log4j.properties` file to log Hadoop's security package at `DEBUG` level.
```
log4j.logger.org.apache.hadoop.security=DEBUG
@ -434,19 +434,19 @@ It contains a series of probes for the JVM's configuration and the environment,
dumps out some system files (`/etc/krb5.conf`, `/etc/ntp.conf`), prints
out some system state and then attempts to log in to Kerberos as the current user,
or a specific principal in a named keytab.
The output of the command can be used for local diagnostics, or forwarded to
whoever supports the cluster.
The `KDiag` command has its own entry point; it is currently not hooked up
to the end-user CLI.
to the end-user CLI.
It is invoked simply by passing its full classname to one of the `bin/hadoop`,
`bin/hdfs` or `bin/yarn` commands. Accordingly, it will display the kerberos client
state of the command used to invoke it.
```
hadoop org.apache.hadoop.security.KDiag
hadoop org.apache.hadoop.security.KDiag
hdfs org.apache.hadoop.security.KDiag
yarn org.apache.hadoop.security.KDiag
```
@ -557,8 +557,8 @@ hdfs org.apache.hadoop.security.KDiag --resource hbase-default.xml --resource hb
yarn org.apache.hadoop.security.KDiag --resource yarn-default.xml --resource yarn-site.xml
```
For extra logging during the operation, set the logging and `HADOOP_JAAS_DEBUG`
environment variable to the values listed in "Troubleshooting". The JVM
For extra logging during the operation, set the logging and `HADOOP_JAAS_DEBUG`
environment variable to the values listed in "Troubleshooting". The JVM
options are automatically set in KDiag.
#### `--secure`: Fail if the command is not executed on a secure cluster.
@ -589,7 +589,7 @@ hdfs org.apache.hadoop.security.KDiag \
--keylen 1024 \
--keytab zk.service.keytab --principal zookeeper/devix.example.org@REALM
```
This attempts to to perform all diagnostics without failing early, load in
the HDFS and YARN XML resources, require a minimum key length of 1024 bytes,
and log in as the principal `zookeeper/devix.example.org@REALM`, whose key must be in

View File

@ -32,6 +32,8 @@ HADOOP_CLIENT_OPTS="-Xmx1g -Dhadoop.socks.server=localhost:4000" hadoop fs -ls /
will increase the memory and send this command via a SOCKS proxy server.
NOTE: If 'YARN_CLIENT_OPTS' is defined, it will replace 'HADOOP_CLIENT_OPTS' when commands are run with 'yarn'.
### `(command)_(subcommand)_OPTS`
It is also possible to set options on a per subcommand basis. This allows for one to create special options for particular cases. The first part of the pattern is the command being used, but all uppercase. The second part of the command is the subcommand being used. Then finally followed by the string `_OPT`.
@ -103,13 +105,15 @@ In addition, daemons that run in an extra security mode also support `(command)_
Apache Hadoop provides a way to do a user check per-subcommand. While this method is easily circumvented and should not be considered a security-feature, it does provide a mechanism by which to prevent accidents. For example, setting `HDFS_NAMENODE_USER=hdfs` will make the `hdfs namenode` and `hdfs --daemon start namenode` commands verify that the user running the commands are the hdfs user by checking the `USER` environment variable. This also works for non-daemons. Setting `HADOOP_DISTCP_USER=jane` will verify that `USER` is set to `jane` before being allowed to execute the `hadoop distcp` command.
If a \_USER environment variable exists and commands are run with a privilege (e.g., as root; see hadoop_privilege_check in the API documentation), execution will switch to the specified user. For commands that support user account switching for security and therefore have a SECURE\_USER variable, the base \_USER variable needs to be the user that is expected to be used to switch to the SECURE\_USER account. For example:
If a \_USER environment variable exists and commands are run with a privilege (e.g., as root; see hadoop_privilege_check in the API documentation), execution will switch to the specified user first. For commands that support user account switching for security reasons and therefore have a SECURE\_USER variable (see more below), the base \_USER variable needs to be the user that is expected to be used to switch to the SECURE\_USER account. For example:
```bash
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
```
will force 'hdfs --daemon start datanode' to be root, but will eventually switch to the hdfs user after the privileged work has been completed.
Be aware that if the \-\-workers flag is used, the user switch happens *after* ssh is invoked. The multi-daemon start and stop commands in sbin will, however, switch (if appropriate) prior and will therefore use the keys of the specified \_USER.
## Developer and Advanced Administrator Environment
@ -172,7 +176,7 @@ which will result in the output of:
world I see you
```
It is also possible to add the new subcommands to the usage output. The `hadoop_add_subcommand` function adds text to the usage output. Utilizing the standard HADOOP_SHELL_EXECNAME variable, we can limit which command gets our new function.
It is also possible to add the new subcommands to the usage output. The `hadoop_add_subcommand` function adds text to the usage output. Utilizing the standard HADOOP\_SHELL\_EXECNAME variable, we can limit which command gets our new function.
```bash
if [[ "${HADOOP_SHELL_EXECNAME}" = "yarn" ]]; then
@ -191,12 +195,16 @@ function hdfs_subcommand_fetchdt
... will replace the existing `hdfs fetchdt` subcommand with a custom one.
Some key environment variables related to Dynamic Subcommands:
Some key environment variables for Dynamic Subcommands:
* HADOOP\_CLASSNAME
This is the name of the Java class to use when program execution continues.
* HADOOP\_PRIV\_CLASSNAME
This is the name of the Java class to use when a daemon is expected to be run in a privileged mode. (See more below.)
* HADOOP\_SHELL\_EXECNAME
This is the name of the script that is being executed. It will be one of hadoop, hdfs, mapred, or yarn.
@ -209,13 +217,13 @@ This is the subcommand that was passed on the command line.
This array contains the argument list after the Apache Hadoop common argument processing has taken place and is the same list that is passed to the subcommand function as arguments. For example, if `hadoop --debug subcmd 1 2 3` has been executed on the command line, then `${HADOOP_SUBCMD_ARGS[0]}` will be 1 and `hadoop_subcommand_subcmd` will also have $1 equal to 1. This array list MAY be modified by subcommand functions to add or delete values from the argument list for further processing.
* HADOOP\_SECURE\_CLASSNAME
If this subcommand runs a service that supports the secure mode, this variable should be set to the classname of the secure version.
* HADOOP\_SUBCMD\_SECURESERVICE
If this command should/will be executed as a secure daemon, set this to true.
* HADOOP\_SUBCMD\_SECUREUSER
If this command should/will be executed as a secure daemon, set the user name to be used.
Setting this to true will force the subcommand to run in secure mode regardless of hadoop\_detect\_priv\_subcmd. It is expected that HADOOP\_SECURE\_USER will be set to the user that will be executing the final process. See more about secure mode.
* HADOOP\_SUBCMD\_SUPPORTDAEMONIZATION
@ -226,3 +234,12 @@ If this command can be executed as a daemon, set this to true.
This is the full content of the command line, prior to any parsing done. It will contain flags such as `--debug`. It MAY NOT be manipulated.
The Apache Hadoop runtime facilities require functions exit if no further processing is required. For example, in the hello example above, Java and other facilities were not required so a simple `exit $?` was sufficient. However, if the function were to utilize `HADOOP_CLASSNAME`, then program execution must continue so that Java with the Apache Hadoop-specific parameters will be launched against the given Java class. Another example would be in the case of an unrecoverable error. It is the function's responsibility to print an appropriate message (preferably using the hadoop_error API call) and exit appropriately.
### Running with Privilege (Secure Mode)
Some daemons, such as the DataNode and the NFS gateway, may be run in a privileged mode. This means that they are expected to be launched as root and (by default) switched to another userid via jsvc. This allows for these daemons to grab a low, privileged port and then drop superuser privileges during normal execution. Running with privilege is also possible for 3rd parties utilizing Dynamic Subcommands. If the following are true:
* (command)\_(subcommand)\_SECURE\_USER environment variable is defined and points to a valid username
* HADOOP\_SECURE\_CLASSNAME is defined and points to a valid Java class
then the shell scripts will attempt to run the class as a command with privilege as it would the built-ins. In general, users are expected to define the \_SECURE\_USER variable and developers define the \_CLASSNAME in their shell script bootstrap.

View File

@ -15,7 +15,7 @@
load hadoop-functions_test_helper
@test "hadoop_get_verify_uservar" {
run hadoop_get_verify_uservar cool program
@test "hadoop_build_custom_subcmd_var" {
run hadoop_build_custom_subcmd_var cool program USER
[ "${output}" = "COOL_PROGRAM_USER" ]
}

View File

@ -0,0 +1,34 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load hadoop-functions_test_helper
@test "hadoop_detect_priv_subcmd (no classname) " {
run hadoop_detect_priv_subcmd test app
[ "${status}" = "1" ]
}
@test "hadoop_detect_priv_subcmd (classname; no user) " {
export HADOOP_SECURE_CLASSNAME=fake
run hadoop_detect_priv_subcmd test app
[ "${status}" = "1" ]
}
@test "hadoop_detect_priv_subcmd (classname; user) " {
export HADOOP_SECURE_CLASSNAME=fake
export TEST_APP_SECURE_USER=test
run hadoop_detect_priv_subcmd test app
[ "${status}" = "0" ]
}

View File

@ -15,39 +15,39 @@
load hadoop-functions_test_helper
@test "hadoop_verify_user (hadoop: no setting)" {
run hadoop_verify_user hadoop test
@test "hadoop_verify_user_perm (hadoop: no setting)" {
run hadoop_verify_user_perm hadoop test
[ "${status}" = "0" ]
}
@test "hadoop_verify_user (yarn: no setting)" {
run hadoop_verify_user yarn test
@test "hadoop_verify_user_perm (yarn: no setting)" {
run hadoop_verify_user_perm yarn test
[ "${status}" = "0" ]
}
@test "hadoop_verify_user (hadoop: allow)" {
@test "hadoop_verify_user_perm (hadoop: allow)" {
HADOOP_TEST_USER=${USER}
run hadoop_verify_user hadoop test
run hadoop_verify_user_perm hadoop test
[ "${status}" = "0" ]
}
@test "hadoop_verify_user (yarn: allow)" {
@test "hadoop_verify_user_perm (yarn: allow)" {
YARN_TEST_USER=${USER}
run hadoop_verify_user yarn test
run hadoop_verify_user_perm yarn test
[ "${status}" = "0" ]
}
# colon isn't a valid username, so let's use it
# this should fail regardless of who the user is
# that is running the test code
@test "hadoop_verify_user (hadoop: disallow)" {
@test "hadoop_verify_user_perm (hadoop: disallow)" {
HADOOP_TEST_USER=:
run hadoop_verify_user hadoop test
run hadoop_verify_user_perm hadoop test
[ "${status}" = "1" ]
}
@test "hadoop_verify_user (yarn: disallow)" {
@test "hadoop_verify_user_perm (yarn: disallow)" {
YARN_TEST_USER=:
run hadoop_verify_user yarn test
run hadoop_verify_user_perm yarn test
[ "${status}" = "1" ]
}

View File

@ -0,0 +1,44 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
load hadoop-functions_test_helper
@test "hadoop_verify_user_resolves (bad: null)" {
run hadoop_verify_user_resolves
[ "${status}" = "1" ]
}
@test "hadoop_verify_user_resolves (bad: var string)" {
run hadoop_verify_user_resolves PrObAbLyWiLlNoTeXiSt
[ "${status}" = "1" ]
}
@test "hadoop_verify_user_resolves (bad: number as var)" {
run hadoop_verify_user_resolves 501
[ "${status}" = "1" ]
}
@test "hadoop_verify_user_resolves (good: name)" {
myvar=$(id -u -n)
run hadoop_verify_user_resolves myvar
[ "${status}" = "0" ]
}
@test "hadoop_verify_user_resolves (skip: number)" {
skip "id on uids is not platform consistent"
myvar=1
run hadoop_verify_user_resolves myvar
[ "${status}" = "0" ]
}

View File

@ -91,20 +91,10 @@ function hdfscmd_case
;;
datanode)
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
# Determine if we're starting a secure datanode, and
# if so, redefine appropriate variables
if [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then
HADOOP_SUBCMD_SECURESERVICE="true"
HADOOP_SUBCMD_SECUREUSER="${HADOOP_SECURE_DN_USER}"
# backward compatiblity
HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_DN_PID_DIR}"
HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_DN_LOG_DIR}"
HADOOP_CLASSNAME="org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter"
else
HADOOP_CLASSNAME='org.apache.hadoop.hdfs.server.datanode.DataNode'
fi
HADOOP_SECURE_CLASSNAME="org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter"
HADOOP_CLASSNAME='org.apache.hadoop.hdfs.server.datanode.DataNode'
hadoop_deprecate_envvar HADOOP_SECURE_DN_PID_DIR HADOOP_SECURE_PID_DIR
hadoop_deprecate_envvar HADOOP_SECURE_DN_LOG_DIR HADOOP_SECURE_LOG_DIR
;;
debug)
HADOOP_CLASSNAME='org.apache.hadoop.hdfs.tools.DebugAdmin'
@ -168,18 +158,10 @@ function hdfscmd_case
;;
nfs3)
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
if [[ -n "${HADOOP_PRIVILEGED_NFS_USER}" ]]; then
HADOOP_SUBCMD_SECURESERVICE="true"
HADOOP_SUBCMD_SECUREUSER="${HADOOP_PRIVILEGED_NFS_USER}"
# backward compatiblity
HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_NFS3_PID_DIR}"
HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_NFS3_LOG_DIR}"
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.nfs.nfs3.PrivilegedNfsGatewayStarter
else
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.nfs.nfs3.Nfs3
fi
HADOOP_SECURE_CLASSNAME=org.apache.hadoop.hdfs.nfs.nfs3.PrivilegedNfsGatewayStarter
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.nfs.nfs3.Nfs3
hadoop_deprecate_envvar HADOOP_SECURE_NFS3_LOG_DIR HADOOP_SECURE_LOG_DIR
hadoop_deprecate_envvar HADOOP_SECURE_NFS3_PID_DIR HADOOP_SECURE_PID_DIR
;;
oev)
HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
@ -230,9 +212,9 @@ else
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$HADOOP_DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
# shellcheck source=./hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh
. "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
@ -257,7 +239,7 @@ if hadoop_need_reexec hdfs "${HADOOP_SUBCMD}"; then
exit $?
fi
hadoop_verify_user "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}"
hadoop_verify_user_perm "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}"
HADOOP_SUBCMD_ARGS=("$@")
@ -277,60 +259,5 @@ fi
hadoop_subcommand_opts "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}"
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
HADOOP_SECURE_USER="${HADOOP_SUBCMD_SECUREUSER}"
hadoop_subcommand_secure_opts "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}"
hadoop_verify_secure_prereq
hadoop_setup_secure_service
priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
priv_errfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.err"
priv_pidfile="${HADOOP_PID_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
else
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
fi
if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
# shellcheck disable=SC2034
HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
# shellcheck disable=SC2034
HADOOP_LOGFILE="hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log"
else
# shellcheck disable=SC2034
HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log"
fi
fi
hadoop_finalize
if [[ "${HADOOP_SUBCMD_SUPPORTDAEMONIZATION}" = true ]]; then
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
hadoop_secure_daemon_handler \
"${HADOOP_DAEMON_MODE}" \
"${HADOOP_SUBCMD}" \
"${HADOOP_CLASSNAME}" \
"${daemon_pidfile}" \
"${daemon_outfile}" \
"${priv_pidfile}" \
"${priv_outfile}" \
"${priv_errfile}" \
"${HADOOP_SUBCMD_ARGS[@]}"
else
hadoop_daemon_handler \
"${HADOOP_DAEMON_MODE}" \
"${HADOOP_SUBCMD}" \
"${HADOOP_CLASSNAME}" \
"${daemon_pidfile}" \
"${daemon_outfile}" \
"${HADOOP_SUBCMD_ARGS[@]}"
fi
exit $?
else
# shellcheck disable=SC2086
hadoop_java_exec "${HADOOP_SUBCMD}" "${HADOOP_CLASSNAME}" "${HADOOP_SUBCMD_ARGS[@]}"
fi
# everything is in globals at this point, so call the generic handler
hadoop_generic_java_subcmd_handler

View File

@ -53,6 +53,9 @@ function hadoop_subproject_init
hadoop_deprecate_envvar HADOOP_NFS3_SECURE_EXTRA_OPTS HDFS_NFS3_SECURE_EXTRA_OPTS
hadoop_deprecate_envvar HADOOP_SECURE_DN_USER HDFS_DATANODE_SECURE_USER
hadoop_deprecate_envvar HADOOP_PRIVILEGED_NFS_USER HDFS_NFS3_SECURE_USER
HADOOP_HDFS_HOME="${HADOOP_HDFS_HOME:-$HADOOP_HOME}"
@ -74,6 +77,8 @@ if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_hd_this}")" >/dev/null && pwd -P)
fi
# shellcheck source=./hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh
if [[ -n "${HADOOP_COMMON_HOME}" ]] &&
[[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then
. "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh"

View File

@ -89,10 +89,10 @@ The rest of the NFS gateway configurations are optional for both secure and non-
server can be assured to have been committed.
* HDFS super-user is the user with the same identity as NameNode process itself and
the super-user can do anything in that permissions checks never fail for the super-user.
the super-user can do anything in that permissions checks never fail for the super-user.
If the following property is configured, the superuser on NFS client can access any file
on HDFS. By default, the super user is not configured in the gateway.
Note that, even the the superuser is configured, "nfs.exports.allowed.hosts" still takes effect.
Note that, even the the superuser is configured, "nfs.exports.allowed.hosts" still takes effect.
For example, the superuser will not have write access to HDFS files through the gateway if
the NFS client host is not allowed to have write access in "nfs.exports.allowed.hosts".
@ -143,7 +143,7 @@ It's strongly recommended for the users to update a few configuration properties
For example: "192.168.0.0/22 rw ; \\\\w\*\\\\.example\\\\.com ; host1.test.org ro;". Only the NFS gateway needs to restart after
this property is updated. Note that, here Java regular expression is different with the regulation expression used in
Linux NFS export table, such as, using "\\\\w\*\\\\.example\\\\.com" instead of "\*.example.com", "192\\\\.168\\\\.0\\\\.(11|22)"
instead of "192.168.0.[11|22]" and so on.
instead of "192.168.0.[11|22]" and so on.
<property>
<name>nfs.exports.allowed.hosts</name>
@ -151,10 +151,10 @@ It's strongly recommended for the users to update a few configuration properties
</property>
* HDFS super-user is the user with the same identity as NameNode process itself and
the super-user can do anything in that permissions checks never fail for the super-user.
the super-user can do anything in that permissions checks never fail for the super-user.
If the following property is configured, the superuser on NFS client can access any file
on HDFS. By default, the super user is not configured in the gateway.
Note that, even the the superuser is configured, "nfs.exports.allowed.hosts" still takes effect.
Note that, even the the superuser is configured, "nfs.exports.allowed.hosts" still takes effect.
For example, the superuser will not have write access to HDFS files through the gateway if
the NFS client host is not allowed to have write access in "nfs.exports.allowed.hosts".
@ -224,7 +224,7 @@ Three daemons are required to provide NFS service: rpcbind (or portmap), mountd
[hdfs]$ $HADOOP_HOME/bin/hdfs --daemon stop nfs3
[root]> $HADOOP_HOME/bin/hdfs --daemon stop portmap
Optionally, you can forgo running the Hadoop-provided portmap daemon and instead use the system portmap daemon on all operating systems if you start the NFS Gateway as root. This will allow the HDFS NFS Gateway to work around the aforementioned bug and still register using the system portmap daemon. To do so, just start the NFS gateway daemon as you normally would, but make sure to do so as the "root" user, and also set the "HADOOP\_PRIVILEGED\_NFS\_USER" environment variable to an unprivileged user. In this mode the NFS Gateway will start as root to perform its initial registration with the system portmap, and then will drop privileges back to the user specified by the HADOOP\_PRIVILEGED\_NFS\_USER afterward and for the rest of the duration of the lifetime of the NFS Gateway process. Note that if you choose this route, you should skip steps 1 and 2 above.
Optionally, you can forgo running the Hadoop-provided portmap daemon and instead use the system portmap daemon on all operating systems if you start the NFS Gateway as root. This will allow the HDFS NFS Gateway to work around the aforementioned bug and still register using the system portmap daemon. To do so, just start the NFS gateway daemon as you normally would, but make sure to do so as the "root" user, and also set the "HDFS\_NFS3\_SECURE\_USER" environment variable to an unprivileged user. In this mode the NFS Gateway will start as root to perform its initial registration with the system portmap, and then will drop privileges back to the user specified by the HDFS\_NFS3\_SECURE\_USER afterward and for the rest of the duration of the lifetime of the NFS Gateway process. Note that if you choose this route, you should skip steps 1 and 2 above.
Verify validity of NFS related services
---------------------------------------
@ -268,7 +268,7 @@ Verify validity of NFS related services
Mount the export "/"
--------------------
Currently NFS v3 only uses TCP as the transportation protocol. NLM is not supported so mount option "nolock" is needed.
Currently NFS v3 only uses TCP as the transportation protocol. NLM is not supported so mount option "nolock" is needed.
Mount option "sync" is strongly recommended since it can minimize or avoid reordered writes, which results in more predictable throughput.
Not specifying the sync option may cause unreliable behavior when uploading large files.
It's recommended to use hard mount. This is because, even after the client sends all data to NFS gateway, it may take NFS gateway some extra time to transfer data to HDFS when writes were reorderd by NFS client Kernel.

View File

@ -69,11 +69,13 @@ function mapredcmd_case
historyserver)
HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
HADOOP_CLASSNAME=org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer
if [ -n "${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}" ]; then
# shellcheck disable=SC2034
if [[ -n "${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}" ]]; then
HADOOP_HEAPSIZE_MAX="${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}"
fi
HADOOP_DAEMON_ROOT_LOGGER=${HADOOP_JHS_LOGGER:-$HADOOP_DAEMON_ROOT_LOGGER}
if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
hadoop_add_param HADOOP_OPTS mapred.jobsummary.logger "-Dmapred.jobsummary.logger=${HADOOP_DAEMON_ROOT_LOGGER}"
fi
;;
hsadmin)
HADOOP_CLASSNAME=org.apache.hadoop.mapreduce.v2.hs.client.HSAdmin
@ -112,9 +114,9 @@ else
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$HADOOP_DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/mapred-config.sh" ]]; then
# shellcheck source=./hadoop-mapreduce-project/bin/mapred-config.sh
. "${HADOOP_LIBEXEC_DIR}/mapred-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/mapred-config.sh." 2>&1
@ -139,7 +141,7 @@ if hadoop_need_reexec mapred "${HADOOP_SUBCMD}"; then
exit $?
fi
hadoop_verify_user "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}"
hadoop_verify_user_perm "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}"
HADOOP_SUBCMD_ARGS=("$@")
@ -159,55 +161,5 @@ fi
hadoop_subcommand_opts "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}"
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
HADOOP_SECURE_USER="${HADOOP_SUBCMD_SECUREUSER}"
hadoop_subcommand_secure_opts "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}"
hadoop_verify_secure_prereq
hadoop_setup_secure_service
priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
priv_errfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.err"
priv_pidfile="${HADOOP_PID_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
else
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
fi
if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
# shellcheck disable=SC2034
HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
hadoop_add_param HADOOP_OPTS mapred.jobsummary.logger "-Dmapred.jobsummary.logger=${HADOOP_ROOT_LOGGER}"
# shellcheck disable=SC2034
HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log"
fi
hadoop_finalize
if [[ "${HADOOP_SUBCMD_SUPPORTDAEMONIZATION}" = true ]]; then
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
hadoop_secure_daemon_handler \
"${HADOOP_DAEMON_MODE}" \
"${HADOOP_SUBCMD}" \
"${HADOOP_CLASSNAME}" \
"${daemon_pidfile}" \
"${daemon_outfile}" \
"${priv_pidfile}" \
"${priv_outfile}" \
"${priv_errfile}" \
"${HADOOP_SUBCMD_ARGS[@]}"
else
hadoop_daemon_handler \
"${HADOOP_DAEMON_MODE}" \
"${HADOOP_SUBCMD}" \
"${HADOOP_CLASSNAME}" \
"${daemon_pidfile}" \
"${daemon_outfile}" \
"${HADOOP_SUBCMD_ARGS[@]}"
fi
exit $?
else
hadoop_java_exec "${HADOOP_SUBCMD}" "${HADOOP_CLASSNAME}" "${HADOOP_SUBCMD_ARGS[@]}"
fi
# everything is in globals at this point, so call the generic handler
hadoop_generic_java_subcmd_handler

View File

@ -59,6 +59,7 @@ if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_mc_this}")" >/dev/null && pwd -P)
fi
# shellcheck source=./hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh
if [[ -n "${HADOOP_COMMON_HOME}" ]] &&
[[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then
. "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh"

View File

@ -120,7 +120,6 @@ function yarncmd_case
HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer'
# Backwards compatibility
if [[ -n "${YARN_PROXYSERVER_HEAPSIZE}" ]]; then
# shellcheck disable=SC2034
HADOOP_HEAPSIZE_MAX="${YARN_PROXYSERVER_HEAPSIZE}"
fi
;;
@ -132,7 +131,6 @@ function yarncmd_case
HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager'
# Backwards compatibility
if [[ -n "${YARN_RESOURCEMANAGER_HEAPSIZE}" ]]; then
# shellcheck disable=SC2034
HADOOP_HEAPSIZE_MAX="${YARN_RESOURCEMANAGER_HEAPSIZE}"
fi
;;
@ -155,7 +153,6 @@ function yarncmd_case
HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
# Backwards compatibility
if [[ -n "${YARN_TIMELINESERVER_HEAPSIZE}" ]]; then
# shellcheck disable=SC2034
HADOOP_HEAPSIZE_MAX="${YARN_TIMELINESERVER_HEAPSIZE}"
fi
;;
@ -210,9 +207,9 @@ else
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$HADOOP_DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
# shellcheck source=./hadoop-yarn-project/hadoop-yarn/bin/yarn-config.sh
. "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
@ -239,7 +236,7 @@ if hadoop_need_reexec yarn "${HADOOP_SUBCMD}"; then
exit $?
fi
hadoop_verify_user "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}"
hadoop_verify_user_perm "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}"
HADOOP_SUBCMD_ARGS=("$@")
@ -256,7 +253,6 @@ fi
# HADOOP_CLIENT_OPTS instead before we (potentially) add it
# to the command line
if [[ -n "${YARN_CLIENT_OPTS}" ]]; then
# shellcheck disable=SC2034
HADOOP_CLIENT_OPTS=${YARN_CLIENT_OPTS}
fi
@ -269,55 +265,5 @@ fi
hadoop_subcommand_opts "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}"
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
HADOOP_SECURE_USER="${HADOOP_SUBCMD_SECUREUSER}"
hadoop_subcommand_secure_opts "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}"
hadoop_verify_secure_prereq
hadoop_setup_secure_service
priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
priv_errfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.err"
priv_pidfile="${HADOOP_PID_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
else
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid"
fi
if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
# shellcheck disable=SC2034
HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
# shellcheck disable=SC2034
HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log"
fi
hadoop_finalize
if [[ "${HADOOP_SUBCMD_SUPPORTDAEMONIZATION}" = true ]]; then
if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then
hadoop_secure_daemon_handler \
"${HADOOP_DAEMON_MODE}" \
"${HADOOP_SUBCMD}" \
"${HADOOP_CLASSNAME}" \
"${daemon_pidfile}" \
"${daemon_outfile}" \
"${priv_pidfile}" \
"${priv_outfile}" \
"${priv_errfile}" \
"${HADOOP_SUBCMD_ARGS[@]}"
else
hadoop_daemon_handler \
"${HADOOP_DAEMON_MODE}" \
"${HADOOP_SUBCMD}" \
"${HADOOP_CLASSNAME}" \
"${daemon_pidfile}" \
"${daemon_outfile}" \
"${HADOOP_SUBCMD_ARGS[@]}"
fi
exit $?
else
# shellcheck disable=SC2086
hadoop_java_exec "${HADOOP_SUBCMD}" "${HADOOP_CLASSNAME}" "${HADOOP_SUBCMD_ARGS[@]}"
fi
# everything is in globals at this point, so call the generic handler
hadoop_generic_java_subcmd_handler

View File

@ -69,6 +69,7 @@ if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_yc_this}")" >/dev/null && pwd -P)
fi
# shellcheck source=./hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh
if [[ -n "${HADOOP_COMMON_HOME}" ]] &&
[[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then
. "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh"