Merge from trunk to branch.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/fs-encryption@1619018 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Wang 2014-08-20 01:22:42 +00:00
commit d2a39b61aa
61 changed files with 3614 additions and 2034 deletions

View File

@ -29,6 +29,7 @@
<exclude>*-config.cmd</exclude>
<exclude>start-*.cmd</exclude>
<exclude>stop-*.cmd</exclude>
<exclude>hadoop-layout.sh.example</exclude>
</excludes>
<fileMode>0755</fileMode>
</fileSet>
@ -42,6 +43,8 @@
<includes>
<include>*-config.sh</include>
<include>*-config.cmd</include>
<include>*-functions.sh</include>
<include>hadoop-layout.sh.example</include>
</includes>
<fileMode>0755</fileMode>
</fileSet>
@ -57,6 +60,10 @@
<exclude>hadoop.cmd</exclude>
<exclude>hdfs.cmd</exclude>
<exclude>hadoop-config.cmd</exclude>
<exclude>hadoop-functions.sh</exclude>
<exclude>hadoop-layout.sh.example</exclude>
<exclude>hdfs-config.cmd</exclude>
<exclude>hdfs-config.sh</exclude>
</excludes>
<fileMode>0755</fileMode>
</fileSet>

View File

@ -9,6 +9,8 @@ Trunk (Unreleased)
HADOOP-10474 Move o.a.h.record to hadoop-streaming. (wheat9)
HADOOP-9902. Shell script rewrite (aw)
NEW FEATURES
HADOOP-10433. Key Management Server based on KeyProvider API. (tucu)
@ -615,6 +617,15 @@ Release 2.6.0 - UNRELEASED
HADOOP-10973. Native Libraries Guide contains format error. (Peter Klavins
via Arpit Agarwal)
HADOOP-10972. Native Libraries Guide contains mis-spelt build line (Peter
Klavins via aw)
HADOOP-10873. Fix dead link in Configuration javadoc (Akira AJISAKA
via aw)
HADOOP-10968. hadoop native build fails to detect java_libarch on
ppc64le (Dinar Valeev via Colin Patrick McCabe)
Release 2.5.0 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -78,6 +78,12 @@ IF("${CMAKE_SYSTEM}" MATCHES "Linux")
SET(_java_libarch "amd64")
ELSEIF (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
SET(_java_libarch "arm")
ELSEIF (CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64le")
IF(EXISTS "${_JAVA_HOME}/jre/lib/ppc64le")
SET(_java_libarch "ppc64le")
ELSE()
SET(_java_libarch "ppc64")
ENDIF()
ELSE()
SET(_java_libarch ${CMAKE_SYSTEM_PROCESSOR})
ENDIF()

View File

@ -15,130 +15,164 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# This script runs the hadoop core commands.
bin=`which $0`
bin=`dirname ${bin}`
bin=`cd "$bin" > /dev/null; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
function print_usage(){
function hadoop_usage()
{
echo "Usage: hadoop [--config confdir] COMMAND"
echo " where COMMAND is one of:"
echo " fs run a generic filesystem user client"
echo " version print the version"
echo " jar <jar> run a jar file"
echo " checknative [-a|-h] check native hadoop and compression libraries availability"
echo " distcp <srcurl> <desturl> copy file or directories recursively"
echo " archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
echo " archive -archiveName NAME -p <parent path> <src>* <dest>"
echo " create a Hadoop archive"
echo " checknative [-a|-h] check native Hadoop and compression "
echo " libraries availability"
echo " classpath prints the class path needed to get the"
echo " Hadoop jar and the required libraries"
echo " credential interact with credential providers"
echo " Hadoop jar and the required libraries"
echo " daemonlog get/set the log level for each daemon"
echo " distch path:owner:group:permisson"
echo " distributed metadata changer"
echo " distcp <srcurl> <desturl> "
echo " copy file or directories recursively"
echo " fs run a generic filesystem user client"
echo " jar <jar> run a jar file"
echo " jnipath prints the java.library.path"
echo " key manage keys via the KeyProvider"
echo " version print the version"
echo " or"
echo " CLASSNAME run the class named CLASSNAME"
echo ""
echo "Most commands print help when invoked w/o parameters."
}
# This script runs the hadoop core commands.
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
exit 1
fi
if [ $# = 0 ]; then
print_usage
exit
hadoop_exit_with_usage 1
fi
COMMAND=$1
case $COMMAND in
# usage flags
--help|-help|-h)
print_usage
exit
;;
shift
#hdfs commands
namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer|fetchdt|oiv|dfsgroups|portmap|nfs3)
echo "DEPRECATED: Use of this script to execute hdfs command is deprecated." 1>&2
echo "Instead use the hdfs command for it." 1>&2
echo "" 1>&2
#try to locate hdfs and if present, delegate to it.
shift
if [ -f "${HADOOP_HDFS_HOME}"/bin/hdfs ]; then
exec "${HADOOP_HDFS_HOME}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@"
elif [ -f "${HADOOP_PREFIX}"/bin/hdfs ]; then
exec "${HADOOP_PREFIX}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@"
case ${COMMAND} in
balancer|datanode|dfs|dfsadmin|dfsgroups| \
namenode|secondarynamenode|fsck|fetchdt|oiv| \
portmap|nfs3)
hadoop_error "WARNING: Use of this script to execute ${COMMAND} is deprecated."
COMMAND=${COMMAND/dfsgroups/groups}
hadoop_error "WARNING: Attempting to execute replacement \"hdfs ${COMMAND}\" instead."
hadoop_error ""
#try to locate hdfs and if present, delegate to it.
if [[ -f "${HADOOP_HDFS_HOME}/bin/hdfs" ]]; then
# shellcheck disable=SC2086
exec "${HADOOP_HDFS_HOME}/bin/hdfs" \
--config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@"
elif [[ -f "${HADOOP_PREFIX}/bin/hdfs" ]]; then
# shellcheck disable=SC2086
exec "${HADOOP_PREFIX}/bin/hdfs" \
--config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@"
else
echo "HADOOP_HDFS_HOME not found!"
hadoop_error "HADOOP_HDFS_HOME not found!"
exit 1
fi
;;
;;
#mapred commands for backwards compatibility
pipes|job|queue|mrgroups|mradmin|jobtracker|tasktracker)
echo "DEPRECATED: Use of this script to execute mapred command is deprecated." 1>&2
echo "Instead use the mapred command for it." 1>&2
echo "" 1>&2
hadoop_error "WARNING: Use of this script to execute ${COMMAND} is deprecated."
COMMAND=${COMMAND/mrgroups/groups}
hadoop_error "WARNING: Attempting to execute replacement \"mapred ${COMMAND}\" instead."
hadoop_error ""
#try to locate mapred and if present, delegate to it.
shift
if [ -f "${HADOOP_MAPRED_HOME}"/bin/mapred ]; then
exec "${HADOOP_MAPRED_HOME}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
elif [ -f "${HADOOP_PREFIX}"/bin/mapred ]; then
exec "${HADOOP_PREFIX}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
if [[ -f "${HADOOP_MAPRED_HOME}/bin/mapred" ]]; then
exec "${HADOOP_MAPRED_HOME}/bin/mapred" \
--config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@"
elif [[ -f "${HADOOP_PREFIX}/bin/mapred" ]]; then
exec "${HADOOP_PREFIX}/bin/mapred" \
--config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@"
else
echo "HADOOP_MAPRED_HOME not found!"
hadoop_error "HADOOP_MAPRED_HOME not found!"
exit 1
fi
;;
#core commands
*)
# the core commands
if [ "$COMMAND" = "fs" ] ; then
CLASS=org.apache.hadoop.fs.FsShell
elif [ "$COMMAND" = "version" ] ; then
CLASS=org.apache.hadoop.util.VersionInfo
elif [ "$COMMAND" = "jar" ] ; then
CLASS=org.apache.hadoop.util.RunJar
elif [ "$COMMAND" = "key" ] ; then
CLASS=org.apache.hadoop.crypto.key.KeyShell
elif [ "$COMMAND" = "checknative" ] ; then
CLASS=org.apache.hadoop.util.NativeLibraryChecker
elif [ "$COMMAND" = "distcp" ] ; then
CLASS=org.apache.hadoop.tools.DistCp
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
elif [ "$COMMAND" = "daemonlog" ] ; then
CLASS=org.apache.hadoop.log.LogLevel
elif [ "$COMMAND" = "archive" ] ; then
CLASS=org.apache.hadoop.tools.HadoopArchives
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
elif [ "$COMMAND" = "credential" ] ; then
CLASS=org.apache.hadoop.security.alias.CredentialShell
elif [ "$COMMAND" = "classpath" ] ; then
if [ "$#" -eq 1 ]; then
# No need to bother starting up a JVM for this simple case.
echo $CLASSPATH
exit
else
CLASS=org.apache.hadoop.util.Classpath
fi
elif [[ "$COMMAND" = -* ]] ; then
# class and package names cannot begin with a -
echo "Error: No command named \`$COMMAND' was found. Perhaps you meant \`hadoop ${COMMAND#-}'"
exit 1
else
CLASS=$COMMAND
;;
archive)
CLASS=org.apache.hadoop.tools.HadoopArchives
hadoop_add_classpath "${TOOL_PATH}"
;;
checknative)
CLASS=org.apache.hadoop.util.NativeLibraryChecker
;;
classpath)
if [[ "$#" -eq 1 ]]; then
CLASS=org.apache.hadoop.util.Classpath
else
hadoop_finalize
echo "${CLASSPATH}"
exit 0
fi
shift
# Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
#make sure security appender is turned off
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"
export CLASSPATH=$CLASSPATH
exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
;;
;;
credential)
CLASS=org.apache.hadoop.security.alias.CredentialShell
;;
daemonlog)
CLASS=org.apache.hadoop.log.LogLevel
;;
distch)
CLASS=org.apache.hadoop.tools.DistCh
hadoop_add_classpath "${TOOL_PATH}"
;;
distcp)
CLASS=org.apache.hadoop.tools.DistCp
hadoop_add_classpath "${TOOL_PATH}"
;;
fs)
CLASS=org.apache.hadoop.fs.FsShell
;;
jar)
CLASS=org.apache.hadoop.util.RunJar
;;
jnipath)
hadoop_finalize
echo "${JAVA_LIBRARY_PATH}"
exit 0
;;
key)
CLASS=org.apache.hadoop.crypto.key.KeyShell
;;
version)
CLASS=org.apache.hadoop.util.VersionInfo
;;
-*|hdfs)
hadoop_exit_with_usage 1
;;
*)
CLASS="${COMMAND}"
;;
esac
# Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}"
hadoop_finalize
export CLASSPATH
hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"

View File

@ -1,3 +1,5 @@
#
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
@ -13,280 +15,176 @@
# See the License for the specific language governing permissions and
# limitations under the License.
####
# IMPORTANT
####
## The hadoop-config.sh tends to get executed by non-Hadoop scripts.
## Those parts expect this script to parse/manipulate $@. In order
## to maintain backward compatibility, this means a surprising
## lack of functions for bits that would be much better off in
## a function.
##
## In other words, yes, there is some bad things happen here and
## unless we break the rest of the ecosystem, we can't change it. :(
# included in all the hadoop scripts with source command
# should not be executable directly
# also should not be passed any arguments, since we need original $*
# Resolve links ($0 may be a softlink) and convert a relative path
# to an absolute path. NB: The -P option requires bash built-ins
# or POSIX:2001 compliant cd and pwd.
# HADOOP_CLASSPATH Extra Java CLASSPATH entries.
#
# HADOOP_USER_CLASSPATH_FIRST When defined, the HADOOP_CLASSPATH is
# added in the beginning of the global
# classpath. Can be defined, for example,
# by doing
# export HADOOP_USER_CLASSPATH_FIRST=true
#
# after doing more config, caller should also exec finalize
# function to finish last minute/default configs for
# settings that might be different between daemons & interactive
this="${BASH_SOURCE-$0}"
common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P)
script="$(basename -- "$this")"
this="$common_bin/$script"
[ -f "$common_bin/hadoop-layout.sh" ] && . "$common_bin/hadoop-layout.sh"
HADOOP_COMMON_DIR=${HADOOP_COMMON_DIR:-"share/hadoop/common"}
HADOOP_COMMON_LIB_JARS_DIR=${HADOOP_COMMON_LIB_JARS_DIR:-"share/hadoop/common/lib"}
HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_COMMON_LIB_NATIVE_DIR:-"lib/native"}
HDFS_DIR=${HDFS_DIR:-"share/hadoop/hdfs"}
HDFS_LIB_JARS_DIR=${HDFS_LIB_JARS_DIR:-"share/hadoop/hdfs/lib"}
YARN_DIR=${YARN_DIR:-"share/hadoop/yarn"}
YARN_LIB_JARS_DIR=${YARN_LIB_JARS_DIR:-"share/hadoop/yarn/lib"}
MAPRED_DIR=${MAPRED_DIR:-"share/hadoop/mapreduce"}
MAPRED_LIB_JARS_DIR=${MAPRED_LIB_JARS_DIR:-"share/hadoop/mapreduce/lib"}
# the root of the Hadoop installation
# See HADOOP-6255 for directory structure layout
HADOOP_DEFAULT_PREFIX=$(cd -P -- "$common_bin"/.. && pwd -P)
HADOOP_PREFIX=${HADOOP_PREFIX:-$HADOOP_DEFAULT_PREFIX}
export HADOOP_PREFIX
#check to see if the conf dir is given as an optional argument
if [ $# -gt 1 ]
then
if [ "--config" = "$1" ]
then
shift
confdir=$1
if [ ! -d "$confdir" ]; then
echo "Error: Cannot find configuration directory: $confdir"
exit 1
fi
shift
HADOOP_CONF_DIR=$confdir
fi
# you must be this high to ride the ride
if [[ -z "${BASH_VERSINFO}" ]] || [[ "${BASH_VERSINFO}" -lt 3 ]]; then
echo "Hadoop requires bash v3 or better. Sorry."
exit 1
fi
# Allow alternate conf dir location.
if [ -e "${HADOOP_PREFIX}/conf/hadoop-env.sh" ]; then
DEFAULT_CONF_DIR="conf"
# In order to get partially bootstrapped, we need to figure out where
# we are located. Chances are good that our caller has already done
# this work for us, but just in case...
if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
_hadoop_common_this="${BASH_SOURCE-$0}"
HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_hadoop_common_this}")" >/dev/null && pwd -P)
fi
# get our functions defined for usage later
if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-functions.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-functions.sh"
else
DEFAULT_CONF_DIR="etc/hadoop"
fi
export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_PREFIX/$DEFAULT_CONF_DIR}"
if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
. "${HADOOP_CONF_DIR}/hadoop-env.sh"
fi
# User can specify hostnames or a file where the hostnames are (not both)
if [[ ( "$HADOOP_SLAVES" != '' ) && ( "$HADOOP_SLAVE_NAMES" != '' ) ]] ; then
echo \
"Error: Please specify one variable HADOOP_SLAVES or " \
"HADOOP_SLAVE_NAME and not both."
echo "ERROR: Unable to exec ${HADOOP_LIBEXEC_DIR}/hadoop-functions.sh." 1>&2
exit 1
fi
# Process command line options that specify hosts or file with host
# list
if [ $# -gt 1 ]
then
if [ "--hosts" = "$1" ]
then
shift
export HADOOP_SLAVES="${HADOOP_CONF_DIR}/$1"
shift
elif [ "--hostnames" = "$1" ]
then
shift
export HADOOP_SLAVE_NAMES=$1
shift
fi
# allow overrides of the above and pre-defines of the below
if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-layout.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-layout.sh"
fi
# User can specify hostnames or a file where the hostnames are (not both)
# (same check as above but now we know it's command line options that cause
# the problem)
if [[ ( "$HADOOP_SLAVES" != '' ) && ( "$HADOOP_SLAVE_NAMES" != '' ) ]] ; then
echo \
"Error: Please specify one of --hosts or --hostnames options and not both."
exit 1
#
# IMPORTANT! We are not executing user provided code yet!
#
# Let's go! Base definitions so we can move forward
hadoop_bootstrap_init
# let's find our conf.
#
# first, check and process params passed to us
# we process this in-line so that we can directly modify $@
# if something downstream is processing that directly,
# we need to make sure our params have been ripped out
# note that we do many of them here for various utilities.
# this provides consistency and forces a more consistent
# user experience
# save these off in case our caller needs them
# shellcheck disable=SC2034
HADOOP_USER_PARAMS="$@"
HADOOP_DAEMON_MODE="default"
while [[ -z "${_hadoop_common_done}" ]]; do
case $1 in
--buildpaths)
# shellcheck disable=SC2034
HADOOP_ENABLE_BUILD_PATHS=true
shift
;;
--config)
shift
confdir=$1
shift
if [[ -d "${confdir}" ]]; then
# shellcheck disable=SC2034
YARN_CONF_DIR="${confdir}"
# shellcheck disable=SC2034
HADOOP_CONF_DIR="${confdir}"
elif [[ -z "${confdir}" ]]; then
hadoop_error "ERROR: No parameter provided for --config "
hadoop_exit_with_usage 1
else
hadoop_error "ERROR: Cannot find configuration directory \"${confdir}\""
hadoop_exit_with_usage 1
fi
;;
--daemon)
shift
HADOOP_DAEMON_MODE=$1
shift
if [[ -z "${HADOOP_DAEMON_MODE}" || \
! "${HADOOP_DAEMON_MODE}" =~ ^st(art|op|atus)$ ]]; then
hadoop_error "ERROR: --daemon must be followed by either \"start\", \"stop\", or \"status\"."
hadoop_exit_with_usage 1
fi
;;
--help|-help|-h|help|--h|--\?|-\?|\?)
hadoop_exit_with_usage 0
;;
--hostnames)
shift
# shellcheck disable=SC2034
HADOOP_SLAVE_NAMES="$1"
shift
;;
--hosts)
shift
hadoop_populate_slaves_file "$1"
shift
;;
*)
_hadoop_common_done=true
;;
esac
done
hadoop_find_confdir
hadoop_exec_hadoopenv
#
# IMPORTANT! User provided code is now available!
#
# do all the OS-specific startup bits here
# this allows us to get a decent JAVA_HOME,
# call crle for LD_LIBRARY_PATH, etc.
hadoop_os_tricks
hadoop_java_setup
hadoop_basic_init
# inject any sub-project overrides, defaults, etc.
if declare -F hadoop_subproject_init >/dev/null ; then
hadoop_subproject_init
fi
# check if net.ipv6.bindv6only is set to 1
bindv6only=$(/sbin/sysctl -n net.ipv6.bindv6only 2> /dev/null)
if [ -n "$bindv6only" ] && [ "$bindv6only" -eq "1" ] && [ "$HADOOP_ALLOW_IPV6" != "yes" ]
then
echo "Error: \"net.ipv6.bindv6only\" is set to 1 - Java networking could be broken"
echo "For more info: http://wiki.apache.org/hadoop/HadoopIPv6"
exit 1
fi
# Newer versions of glibc use an arena memory allocator that causes virtual
# memory usage to explode. This interacts badly with the many threads that
# we use in Hadoop. Tune the variable down to prevent vmem explosion.
export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4}
# Attempt to set JAVA_HOME if it is not set
if [[ -z $JAVA_HOME ]]; then
# On OSX use java_home (or /Library for older versions)
if [ "Darwin" == "$(uname -s)" ]; then
if [ -x /usr/libexec/java_home ]; then
export JAVA_HOME=($(/usr/libexec/java_home))
else
export JAVA_HOME=(/Library/Java/Home)
fi
fi
# Bail if we did not detect it
if [[ -z $JAVA_HOME ]]; then
echo "Error: JAVA_HOME is not set and could not be found." 1>&2
exit 1
fi
fi
JAVA=$JAVA_HOME/bin/java
# check envvars which might override default args
if [ "$HADOOP_HEAPSIZE" != "" ]; then
#echo "run with heapsize $HADOOP_HEAPSIZE"
JAVA_HEAP_MAX="-Xmx""$HADOOP_HEAPSIZE""m"
#echo $JAVA_HEAP_MAX
fi
# CLASSPATH initially contains $HADOOP_CONF_DIR
CLASSPATH="${HADOOP_CONF_DIR}"
# so that filenames w/ spaces are handled correctly in loops below
IFS=
if [ "$HADOOP_COMMON_HOME" = "" ]; then
if [ -d "${HADOOP_PREFIX}/$HADOOP_COMMON_DIR" ]; then
export HADOOP_COMMON_HOME=$HADOOP_PREFIX
fi
fi
# for releases, add core hadoop jar & webapps to CLASSPATH
if [ -d "$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR
fi
if [ -d "$HADOOP_COMMON_HOME/$HADOOP_COMMON_LIB_JARS_DIR" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_LIB_JARS_DIR'/*'
fi
CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR'/*'
# default log directory & file
if [ "$HADOOP_LOG_DIR" = "" ]; then
HADOOP_LOG_DIR="$HADOOP_PREFIX/logs"
fi
if [ "$HADOOP_LOGFILE" = "" ]; then
HADOOP_LOGFILE='hadoop.log'
fi
# default policy file for service-level authorization
if [ "$HADOOP_POLICYFILE" = "" ]; then
HADOOP_POLICYFILE="hadoop-policy.xml"
fi
# restore ordinary behaviour
unset IFS
# setup 'java.library.path' for native-hadoop code if necessary
if [ -d "${HADOOP_PREFIX}/build/native" -o -d "${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR" ]; then
if [ -d "${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR" ]; then
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR
else
JAVA_LIBRARY_PATH=${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR
fi
fi
fi
# setup a default TOOL_PATH
TOOL_PATH="${TOOL_PATH:-$HADOOP_PREFIX/share/hadoop/tools/lib/*}"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_LOGFILE"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.home.dir=$HADOOP_PREFIX"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_ROOT_LOGGER:-INFO,console}"
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$JAVA_LIBRARY_PATH
fi
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.policy.file=$HADOOP_POLICYFILE"
# Disable ipv6 as it can cause issues
HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
# put hdfs in classpath if present
if [ "$HADOOP_HDFS_HOME" = "" ]; then
if [ -d "${HADOOP_PREFIX}/$HDFS_DIR" ]; then
export HADOOP_HDFS_HOME=$HADOOP_PREFIX
fi
fi
if [ -d "$HADOOP_HDFS_HOME/$HDFS_DIR/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_DIR
fi
if [ -d "$HADOOP_HDFS_HOME/$HDFS_LIB_JARS_DIR" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_LIB_JARS_DIR'/*'
fi
CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_DIR'/*'
# put yarn in classpath if present
if [ "$HADOOP_YARN_HOME" = "" ]; then
if [ -d "${HADOOP_PREFIX}/$YARN_DIR" ]; then
export HADOOP_YARN_HOME=$HADOOP_PREFIX
fi
fi
if [ -d "$HADOOP_YARN_HOME/$YARN_DIR/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/$YARN_DIR
fi
if [ -d "$HADOOP_YARN_HOME/$YARN_LIB_JARS_DIR" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/$YARN_LIB_JARS_DIR'/*'
fi
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/$YARN_DIR'/*'
# put mapred in classpath if present AND different from YARN
if [ "$HADOOP_MAPRED_HOME" = "" ]; then
if [ -d "${HADOOP_PREFIX}/$MAPRED_DIR" ]; then
export HADOOP_MAPRED_HOME=$HADOOP_PREFIX
fi
fi
if [ "$HADOOP_MAPRED_HOME/$MAPRED_DIR" != "$HADOOP_YARN_HOME/$YARN_DIR" ] ; then
if [ -d "$HADOOP_MAPRED_HOME/$MAPRED_DIR/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_DIR
fi
if [ -d "$HADOOP_MAPRED_HOME/$MAPRED_LIB_JARS_DIR" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_LIB_JARS_DIR'/*'
fi
CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_DIR'/*'
fi
# Add the user-specified CLASSPATH via HADOOP_CLASSPATH
# Add it first or last depending on if user has
# set env-var HADOOP_USER_CLASSPATH_FIRST
if [ "$HADOOP_CLASSPATH" != "" ]; then
# Prefix it if its to be preceded
if [ "$HADOOP_USER_CLASSPATH_FIRST" != "" ]; then
CLASSPATH=${HADOOP_CLASSPATH}:${CLASSPATH}
else
CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH}
fi
# get the native libs in there pretty quick
hadoop_add_javalibpath "${HADOOP_PREFIX}/build/native"
hadoop_add_javalibpath "${HADOOP_PREFIX}/${HADOOP_COMMON_LIB_NATIVE_DIR}"
# get the basic java class path for these subprojects
# in as quickly as possible since other stuff
# will definitely depend upon it.
#
# at some point, this will get replaced with something pluggable
# so that these functions can sit in their projects rather than
# common
#
for i in common hdfs yarn mapred
do
hadoop_add_to_classpath_$i
done
#
# backwards compatibility. new stuff should
# call this when they are ready
#
if [[ -z "${HADOOP_NEW_CONFIG}" ]]; then
hadoop_finalize
fi

View File

@ -15,200 +15,42 @@
# See the License for the specific language governing permissions and
# limitations under the License.
function hadoop_usage
{
echo "Usage: hadoop-daemon.sh [--config confdir] (start|stop|status) <hadoop-command> <args...>"
}
# Runs a Hadoop command as a daemon.
#
# Environment Variables
#
# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_PREFIX}/conf.
# HADOOP_LOG_DIR Where log files are stored. PWD by default.
# HADOOP_MASTER host:path where hadoop code should be rsync'd from
# HADOOP_PID_DIR The pid files are stored. /tmp by default.
# HADOOP_IDENT_STRING A string representing this instance of hadoop. $USER by default
# HADOOP_NICENESS The scheduling priority for daemons. Defaults to 0.
##
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
usage="Usage: hadoop-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] [--script script] (start|stop) <hadoop-command> <args...>"
# if no args specified, show usage
if [ $# -le 1 ]; then
echo $usage
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
exit 1
fi
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
# get arguments
#default value
hadoopScript="$HADOOP_PREFIX"/bin/hadoop
if [ "--script" = "$1" ]
then
shift
hadoopScript=$1
shift
if [[ $# = 0 ]]; then
hadoop_exit_with_usage 1
fi
startStop=$1
shift
command=$1
daemonmode=$1
shift
hadoop_rotate_log ()
{
log=$1;
num=5;
if [ -n "$2" ]; then
num=$2
fi
if [ -f "$log" ]; then # rotate logs
while [ $num -gt 1 ]; do
prev=`expr $num - 1`
[ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
num=$prev
done
mv "$log" "$log.$num";
fi
}
if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
. "${HADOOP_CONF_DIR}/hadoop-env.sh"
if [[ -z "${HADOOP_HDFS_HOME}" ]]; then
hdfsscript="${HADOOP_PREFIX}/bin/hdfs"
else
hdfsscript="${HADOOP_HDFS_HOME}/bin/hdfs"
fi
# Determine if we're starting a secure datanode, and if so, redefine appropriate variables
if [ "$command" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then
export HADOOP_PID_DIR=$HADOOP_SECURE_DN_PID_DIR
export HADOOP_LOG_DIR=$HADOOP_SECURE_DN_LOG_DIR
export HADOOP_IDENT_STRING=$HADOOP_SECURE_DN_USER
starting_secure_dn="true"
fi
#Determine if we're starting a privileged NFS, if so, redefine the appropriate variables
if [ "$command" == "nfs3" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_PRIVILEGED_NFS_USER" ]; then
export HADOOP_PID_DIR=$HADOOP_PRIVILEGED_NFS_PID_DIR
export HADOOP_LOG_DIR=$HADOOP_PRIVILEGED_NFS_LOG_DIR
export HADOOP_IDENT_STRING=$HADOOP_PRIVILEGED_NFS_USER
starting_privileged_nfs="true"
fi
if [ "$HADOOP_IDENT_STRING" = "" ]; then
export HADOOP_IDENT_STRING="$USER"
fi
# get log directory
if [ "$HADOOP_LOG_DIR" = "" ]; then
export HADOOP_LOG_DIR="$HADOOP_PREFIX/logs"
fi
if [ ! -w "$HADOOP_LOG_DIR" ] ; then
mkdir -p "$HADOOP_LOG_DIR"
chown $HADOOP_IDENT_STRING $HADOOP_LOG_DIR
fi
if [ "$HADOOP_PID_DIR" = "" ]; then
HADOOP_PID_DIR=/tmp
fi
# some variables
export HADOOP_LOGFILE=hadoop-$HADOOP_IDENT_STRING-$command-$HOSTNAME.log
export HADOOP_ROOT_LOGGER=${HADOOP_ROOT_LOGGER:-"INFO,RFA"}
export HADOOP_SECURITY_LOGGER=${HADOOP_SECURITY_LOGGER:-"INFO,RFAS"}
export HDFS_AUDIT_LOGGER=${HDFS_AUDIT_LOGGER:-"INFO,NullAppender"}
log=$HADOOP_LOG_DIR/hadoop-$HADOOP_IDENT_STRING-$command-$HOSTNAME.out
pid=$HADOOP_PID_DIR/hadoop-$HADOOP_IDENT_STRING-$command.pid
HADOOP_STOP_TIMEOUT=${HADOOP_STOP_TIMEOUT:-5}
# Set default scheduling priority
if [ "$HADOOP_NICENESS" = "" ]; then
export HADOOP_NICENESS=0
fi
case $startStop in
(start)
[ -w "$HADOOP_PID_DIR" ] || mkdir -p "$HADOOP_PID_DIR"
if [ -f $pid ]; then
if kill -0 `cat $pid` > /dev/null 2>&1; then
echo $command running as process `cat $pid`. Stop it first.
exit 1
fi
fi
if [ "$HADOOP_MASTER" != "" ]; then
echo rsync from $HADOOP_MASTER
rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $HADOOP_MASTER/ "$HADOOP_PREFIX"
fi
hadoop_rotate_log $log
echo starting $command, logging to $log
cd "$HADOOP_PREFIX"
case $command in
namenode|secondarynamenode|datanode|journalnode|dfs|dfsadmin|fsck|balancer|zkfc)
if [ -z "$HADOOP_HDFS_HOME" ]; then
hdfsScript="$HADOOP_PREFIX"/bin/hdfs
else
hdfsScript="$HADOOP_HDFS_HOME"/bin/hdfs
fi
nohup nice -n $HADOOP_NICENESS $hdfsScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
;;
(*)
nohup nice -n $HADOOP_NICENESS $hadoopScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
;;
esac
echo $! > $pid
sleep 1
head "$log"
# capture the ulimit output
if [ "true" = "$starting_secure_dn" ]; then
echo "ulimit -a for secure datanode user $HADOOP_SECURE_DN_USER" >> $log
# capture the ulimit info for the appropriate user
su --shell=/bin/bash $HADOOP_SECURE_DN_USER -c 'ulimit -a' >> $log 2>&1
elif [ "true" = "$starting_privileged_nfs" ]; then
echo "ulimit -a for privileged nfs user $HADOOP_PRIVILEGED_NFS_USER" >> $log
su --shell=/bin/bash $HADOOP_PRIVILEGED_NFS_USER -c 'ulimit -a' >> $log 2>&1
else
echo "ulimit -a for user $USER" >> $log
ulimit -a >> $log 2>&1
fi
sleep 3;
if ! ps -p $! > /dev/null ; then
exit 1
fi
;;
(stop)
if [ -f $pid ]; then
TARGET_PID=`cat $pid`
if kill -0 $TARGET_PID > /dev/null 2>&1; then
echo stopping $command
kill $TARGET_PID
sleep $HADOOP_STOP_TIMEOUT
if kill -0 $TARGET_PID > /dev/null 2>&1; then
echo "$command did not stop gracefully after $HADOOP_STOP_TIMEOUT seconds: killing with kill -9"
kill -9 $TARGET_PID
fi
else
echo no $command to stop
fi
rm -f $pid
else
echo no $command to stop
fi
;;
(*)
echo $usage
exit 1
;;
esac
exec "$hdfsscript" --config "${HADOOP_CONF_DIR}" --daemon "${daemonmode}" "$@"

View File

@ -18,19 +18,34 @@
# Run a Hadoop command on all slave hosts.
usage="Usage: hadoop-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] command args..."
function hadoop_usage
{
echo "Usage: hadoop-daemons.sh [--config confdir] [--hosts hostlistfile] (start|stop|status) <hadoop-command> <args...>"
}
# if no args specified, show usage
if [ $# -le 1 ]; then
echo $usage
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
exit 1
fi
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
if [[ $# = 0 ]]; then
hadoop_exit_with_usage 1
fi
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
exec "$bin/slaves.sh" --config $HADOOP_CONF_DIR cd "$HADOOP_PREFIX" \; "$bin/hadoop-daemon.sh" --config $HADOOP_CONF_DIR "$@"
hadoop_connect_to_hosts "${bin}/hadoop-daemon.sh" \
--config "${HADOOP_CONF_DIR}" "$@"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,93 @@
# Copyright 2014 The Apache Software Foundation
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##
## VENDORS!
##
## This is where you can redefine the layout of Hadoop directories
## and expect to be reasonably compatible. Needless to say, this
## is expert level stuff and one needs to tread carefully.
##
## If you move HADOOP_LIBEXEC_DIR from some location that
## isn't bin/../libexec, you MUST define either HADOOP_LIBEXEC_DIR
## or have HADOOP_PREFIX/libexec/hadoop-config.sh and
## HADOOP_PREFIX/libexec/hadoop-layout.sh (this file) exist.
## NOTE:
##
## hadoop-functions.sh gets executed BEFORE this file. So you can
## redefine all of those functions here.
##
## *-env.sh get executed AFTER this file but generally too late to
## override the settings (but not the functions!) here. However, this
## also means you cannot use things like HADOOP_CONF_DIR for these
## definitions.
####
# Common disk layout
####
# Default location for the common/core Hadoop project
# export HADOOP_COMMON_HOME=$HADOOP_PREFIX
# Relative locations where components under HADOOP_COMMON_HOME are located
# export HADOOP_COMMON_DIR="share/hadoop/common"
# export HADOOP_COMMON_LIB_JARS_DIR="share/hadoop/common/lib"
# export HADOOP_COMMON_LIB_NATIVE_DIR="lib/native"
####
# HDFS disk layout
####
# Default location for the HDFS subproject
# export HADOOP_HDFS_HOME=$HADOOP_PREFIX
# Relative locations where components under HADOOP_HDFS_HOME are located
# export HDFS_DIR="share/hadoop/hdfs"
# export HDFS_LIB_JARS_DIR="share/hadoop/hdfs/lib"
####
# YARN disk layout
####
# Default location for the YARN subproject
# export HADOOP_YARN_HOME=$HADOOP_PREFIX
# Relative locations where components under HADOOP_YARN_HOME are located
# export YARN_DIR="share/hadoop/yarn"
# export YARN_LIB_JARS_DIR="share/hadoop/yarn/lib"
# Default location for the MapReduce subproject
# export HADOOP_MAPRED_HOME=$HADOOP_PREFIX
####
# MapReduce disk layout
####
# Relative locations where components under HADOOP_MAPRED_HOME are located
# export MAPRED_DIR="share/hadoop/mapreduce"
# export MAPRED_LIB_JARS_DIR="share/hadoop/mapreduce/lib"
####
# Misc paths
####
# setup a default TOOL_PATH, where things like distcp lives
# note that this path only gets added for certain commands and not
# part of the general classpath
# export TOOL_PATH="$HADOOP_PREFIX/share/hadoop/tools/lib/*"

View File

@ -15,47 +15,28 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# The Hadoop record compiler
#
# Environment Variables
#
# JAVA_HOME The java implementation to use. Overrides JAVA_HOME.
#
# HADOOP_OPTS Extra Java runtime options.
#
# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_PREFIX}/conf.
#
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
# This script runs the hadoop core commands.
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "$this")" >/dev/null && pwd -P)
script="$(basename -- "$this")"
this="$bin/$script"
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
HADOOP_NEW_CONFIG=true
. "$HADOOP_LIBEXEC_DIR/hadoop-config.sh"
if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
. "${HADOOP_CONF_DIR}/hadoop-env.sh"
if [ $# = 0 ]; then
hadoop_exit_with_usage 1
fi
# some Java parameters
if [ "$JAVA_HOME" != "" ]; then
#echo "run java in $JAVA_HOME"
JAVA_HOME=$JAVA_HOME
fi
if [ "$JAVA_HOME" = "" ]; then
echo "Error: JAVA_HOME is not set."
exit 1
fi
JAVA=$JAVA_HOME/bin/java
JAVA_HEAP_MAX=-Xmx1000m
# restore ordinary behaviour
unset IFS
CLASS='org.apache.hadoop.record.compiler.generated.Rcc'
# run it
exec "$JAVA" $HADOOP_OPTS -classpath "$CLASSPATH" $CLASS "$@"
# Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
hadoop_add_param HADOOP_OPTS Xmx "$JAVA_HEAP_MAX"
hadoop_finalize
export CLASSPATH
hadoop_java_exec rcc "${CLASS}" "$@"

View File

@ -27,38 +27,33 @@
# HADOOP_SSH_OPTS Options passed to ssh when running remote commands.
##
usage="Usage: slaves.sh [--config confdir] command..."
function hadoop_usage {
echo "Usage: slaves.sh [--config confdir] command..."
}
# if no args specified, show usage
if [ $# -le 0 ]; then
echo $usage
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >dev/null && pwd -P)
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
exit 1
fi
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
# Where to start the script, see hadoop-config.sh
# (it set up the variables based on command line options)
if [ "$HADOOP_SLAVE_NAMES" != '' ] ; then
SLAVE_NAMES=$HADOOP_SLAVE_NAMES
else
SLAVE_FILE=${HADOOP_SLAVES:-${HADOOP_CONF_DIR}/slaves}
SLAVE_NAMES=$(cat "$SLAVE_FILE" | sed 's/#.*$//;/^$/d')
# if no args specified, show usage
if [[ $# -le 0 ]]; then
hadoop_exit_with_usage 1
fi
# start the daemons
for slave in $SLAVE_NAMES ; do
ssh $HADOOP_SSH_OPTS $slave $"${@// /\\ }" \
2>&1 | sed "s/^/$slave: /" &
if [ "$HADOOP_SLAVE_SLEEP" != "" ]; then
sleep $HADOOP_SLAVE_SLEEP
fi
done
hadoop_connect_to_hosts "$@"
wait

View File

@ -15,24 +15,38 @@
# See the License for the specific language governing permissions and
# limitations under the License.
echo "This script is deprecated. Use start-dfs.sh and start-yarn.sh instead."
exit 1
# Start all hadoop daemons. Run this on master node.
echo "This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh"
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
exit 1
fi
# start hdfs daemons if hdfs is present
if [ -f "${HADOOP_HDFS_HOME}"/sbin/start-dfs.sh ]; then
"${HADOOP_HDFS_HOME}"/sbin/start-dfs.sh --config $HADOOP_CONF_DIR
if [[ -f "${HADOOP_HDFS_HOME}/sbin/start-dfs.sh" ]]; then
"${HADOOP_HDFS_HOME}/sbin/start-dfs.sh" --config "${HADOOP_CONF_DIR}"
fi
# start yarn daemons if yarn is present
if [ -f "${HADOOP_YARN_HOME}"/sbin/start-yarn.sh ]; then
"${HADOOP_YARN_HOME}"/sbin/start-yarn.sh --config $HADOOP_CONF_DIR
if [[ -f "${HADOOP_YARN_HOME}/sbin/start-yarn.sh" ]]; then
"${HADOOP_YARN_HOME}/sbin/start-yarn.sh" --config "${HADOOP_CONF_DIR}"
fi

View File

@ -18,21 +18,35 @@
# Stop all hadoop daemons. Run this on master node.
echo "This script is Deprecated. Instead use stop-dfs.sh and stop-yarn.sh"
echo "This script is deprecated. Use stop-dfs.sh and stop-yarn.sh instead."
exit 1
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >dev/null && pwd -P)
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
exit 1
fi
# stop hdfs daemons if hdfs is present
if [ -f "${HADOOP_HDFS_HOME}"/sbin/stop-dfs.sh ]; then
"${HADOOP_HDFS_HOME}"/sbin/stop-dfs.sh --config $HADOOP_CONF_DIR
if [[ -f "${HADOOP_HDFS_HOME}/sbin/stop-dfs.sh" ]]; then
"${HADOOP_HDFS_HOME}/sbin/stop-dfs.sh" --config "${HADOOP_CONF_DIR}"
fi
# stop yarn daemons if yarn is present
if [ -f "${HADOOP_HDFS_HOME}"/sbin/stop-yarn.sh ]; then
"${HADOOP_HDFS_HOME}"/sbin/stop-yarn.sh --config $HADOOP_CONF_DIR
if [[ -f "${HADOOP_HDFS_HOME}/sbin/stop-yarn.sh" ]]; then
"${HADOOP_HDFS_HOME}/sbin/stop-yarn.sh" --config "${HADOOP_CONF_DIR}"
fi

View File

@ -1,3 +1,4 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@ -16,71 +17,393 @@
# Set Hadoop-specific environment variables here.
# The only required environment variable is JAVA_HOME. All others are
# optional. When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.
##
## THIS FILE ACTS AS THE MASTER FILE FOR ALL HADOOP PROJECTS.
## SETTINGS HERE WILL BE READ BY ALL HADOOP COMMANDS. THEREFORE,
## ONE CAN USE THIS FILE TO SET YARN, HDFS, AND MAPREDUCE
## CONFIGURATION OPTIONS INSTEAD OF xxx-env.sh.
##
## Precedence rules:
##
## {yarn-env.sh|hdfs-env.sh} > hadoop-env.sh > hard-coded defaults
##
## {YARN_xyz|HDFS_xyz} > HADOOP_xyz > hard-coded defaults
##
# Many of the options here are built from the perspective that users
# may want to provide OVERWRITING values on the command line.
# For example:
#
# JAVA_HOME=/usr/java/testing hdfs dfs -ls
#
# Therefore, the vast majority (BUT NOT ALL!) of these defaults
# are configured for substitution and not append. If you would
# like append, you'll # need to modify this file accordingly.
###
# Generic settings for HADOOP
###
# Technically, the only required environment variable is JAVA_HOME.
# All others are optional. However, our defaults are probably not
# your defaults. Many sites configure these options outside of Hadoop,
# such as in /etc/profile.d
# The java implementation to use.
export JAVA_HOME=${JAVA_HOME}
export JAVA_HOME=${JAVA_HOME:-"hadoop-env.sh is not configured"}
# The jsvc implementation to use. Jsvc is required to run secure datanodes.
#export JSVC_HOME=${JSVC_HOME}
# Location of Hadoop's configuration information. i.e., where this
# file is probably living. You will almost certainly want to set
# this in /etc/profile.d or equivalent.
# export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
# The maximum amount of heap to use, in MB. Default is 1024.
# export HADOOP_HEAPSIZE=1024
# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
if [ "$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
done
# Extra Java runtime options for all Hadoop commands. We don't support
# IPv6 yet/still, so by default we set preference to IPv4.
# export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true"
# The maximum amount of heap to use, in MB. Default is 1000.
#export HADOOP_HEAPSIZE=
#export HADOOP_NAMENODE_INIT_HEAPSIZE=""
# Some parts of the shell code may do special things dependent upon
# the operating system. We have to set this here. See the next
# section as to why....
export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
# Extra Java runtime options. Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
MAC_OSX=false
case "`uname`" in
Darwin*) MAC_OSX=true;;
# Under certain conditions, Java on OS X will throw SCDynamicStore errors
# in the system logs.
# See HADOOP-8719 for more information. If you need Kerberos
# support on OS X, you'll want to change/remove this extra bit.
case ${HADOOP_OS_TYPE} in
Darwin*)
export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.realm= "
export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.kdc= "
export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.conf= "
;;
esac
if $MAC_OSX; then
export HADOOP_OPTS="$HADOOP_OPTS -Djava.security.krb5.realm= -Djava.security.krb5.kdc="
fi
# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"
# Extra Java runtime options for Hadoop clients (i.e., hdfs dfs -blah)
# These get added to HADOOP_OPTS for such commands. In most cases,
# this should be left empty and let users supply it on the
# command line.
# extra HADOOP_CLIENT_OPTS=""
export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
#
# A note about classpaths.
#
# The classpath is configured such that entries are stripped prior
# to handing to Java based either upon duplication or non-existence.
# Wildcards and/or directories are *NOT* expanded as the
# de-duplication is fairly simple. So if two directories are in
# the classpath that both contain awesome-methods-1.0.jar,
# awesome-methods-1.0.jar will still be seen by java. But if
# the classpath specifically has awesome-methods-1.0.jar from the
# same directory listed twice, the last one will be removed.
#
export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"
# An additional, custom CLASSPATH. This is really meant for
# end users, but as an administrator, one might want to push
# something extra in here too, such as the jar to the topology
# method. Just be sure to append to the existing HADOOP_USER_CLASSPATH
# so end users have a way to add stuff.
# export HADOOP_USER_CLASSPATH="/some/cool/path/on/your/machine"
# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
# Should HADOOP_USER_CLASSPATH be first in the official CLASSPATH?
# export HADOOP_USER_CLASSPATH_FIRST="yes"
# On secure datanodes, user to run the datanode as after dropping privileges
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
###
# Options for remote shell connectivity
###
# Where log files are stored. $HADOOP_HOME/logs by default.
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER
# There are some optional components of hadoop that allow for
# command and control of remote hosts. For example,
# start-dfs.sh will attempt to bring up all NNs, DNS, etc.
# Where log files are stored in the secure data environment.
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
# Options to pass to SSH when one of the "log into a host and
# start/stop daemons" scripts is executed
# export HADOOP_SSH_OPTS="-o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=10s"
# The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by
# the user that will run the hadoop daemons. Otherwise there is the
# potential for a symlink attack.
export HADOOP_PID_DIR=${HADOOP_PID_DIR}
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
# The built-in ssh handler will limit itself to 10 simultaneous connections.
# For pdsh users, this sets the fanout size ( -f )
# Change this to increase/decrease as necessary.
# export HADOOP_SSH_PARALLEL=10
# Filename which contains all of the hosts for any remote execution
# helper scripts # such as slaves.sh, start-dfs.sh, etc.
# export HADOOP_SLAVES="${HADOOP_CONF_DIR}/slaves"
###
# Options for all daemons
###
#
#
# You can define variables right here and then re-use them later on.
# For example, it is common to use the same garbage collection settings
# for all the daemons. So we could define:
#
# export HADOOP_GC_SETTINGS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
#
# .. and then use it as per the b option under the namenode.
# Where (primarily) daemon log files are stored.
# $HADOOP_PREFIX/logs by default.
# export HADOOP_LOG_DIR=${HADOOP_PREFIX}/logs
# A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER
# This is used in writing log and pid files, so keep that in mind!
# export HADOOP_IDENT_STRING=$USER
# How many seconds to pause after stopping a daemon
# export HADOOP_STOP_TIMEOUT=5
# Where pid files are stored. /tmp by default.
# export HADOOP_PID_DIR=/tmp
# Default log level and output location
# This sets the hadoop.root.logger property
# export HADOOP_ROOT_LOGGER=INFO,console
# Default log level for daemons spawned explicitly by hadoop-daemon.sh
# This sets the hadoop.root.logger property
# export HADOOP_DAEMON_ROOT_LOGGER=INFO,RFA
# Default log level and output location for security-related messages.
# It sets -Dhadoop.security.logger on the command line.
# You will almost certainly want to change this on a per-daemon basis!
# export HADOOP_SECURITY_LOGGER=INFO,NullAppender
# Default log level for file system audit messages.
# It sets -Dhdfs.audit.logger on the command line.
# You will almost certainly want to change this on a per-daemon basis!
# export HADOOP_AUDIT_LOGGER=INFO,NullAppender
# Default process priority level
# Note that sub-processes will also run at this level!
# export HADOOP_NICENESS=0
# Default name for the service level authorization file
# export HADOOP_POLICYFILE="hadoop-policy.xml"
###
# Secure/privileged execution
###
#
# Out of the box, Hadoop uses jsvc from Apache Commons to launch daemons
# on privileged ports. This functionality can be replaced by providing
# custom functions. See hadoop-functions.sh for more information.
#
# The jsvc implementation to use. Jsvc is required to run secure datanodes.
# export JSVC_HOME=/usr/bin
#
# This directory contains pids for secure and privileged processes.
#export HADOOP_SECURE_PID_DIR=${HADOOP_PID_DIR}
#
# This directory contains the logs for secure and privileged processes.
# export HADOOP_SECURE_LOG=${HADOOP_LOG_DIR}
#
# When running a secure daemon, the default value of HADOOP_IDENT_STRING
# ends up being a bit bogus. Therefore, by default, the code will
# replace HADOOP_IDENT_STRING with HADOOP_SECURE_xx_USER. If you want
# to keep HADOOP_IDENT_STRING untouched, then uncomment this line.
# export HADOOP_SECURE_IDENT_PRESERVE="true"
###
# NameNode specific parameters
###
# Specify the JVM options to be used when starting the NameNode.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# a) Set JMX options
# export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1026"
#
# b) Set garbage collection logs
# export HADOOP_NAMENODE_OPTS="${HADOOP_GC_SETTINGS} -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
#
# c) ... or set them directly
# export HADOOP_NAMENODE_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
# this is the default:
# export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender"
###
# SecondaryNameNode specific parameters
###
# Specify the JVM options to be used when starting the SecondaryNameNode.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# This is the default:
# export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender"
###
# DataNode specific parameters
###
# Specify the JVM options to be used when starting the DataNode.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# This is the default:
# export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS"
# On secure datanodes, user to run the datanode as after dropping privileges
# This **MUST** be uncommented to enable secure HDFS!
# export HADOOP_SECURE_DN_USER=hdfs
# Supplemental options for secure datanodes
# By default, we use jsvc which needs to know to launch a
# server jvm.
# export HADOOP_DN_SECURE_EXTRA_OPTS="-jvm server"
# Where datanode log files are stored in the secure data environment.
# export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_SECURE_LOG_DIR}
# Where datanode pid files are stored in the secure data environment.
# export HADOOP_SECURE_DN_PID_DIR=${HADOOP_SECURE_PID_DIR}
###
# NFS3 Gateway specific parameters
###
# Specify the JVM options to be used when starting the NFS3 Gateway.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_NFS3_OPTS=""
# Specify the JVM options to be used when starting the Hadoop portmapper.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_PORTMAP_OPTS="-Xmx512m"
# Supplemental options for priviliged gateways
# By default, we use jsvc which needs to know to launch a
# server jvm.
# export HADOOP_NFS3_SECURE_EXTRA_OPTS="-jvm server"
# On privileged gateways, user to run the gateway as after dropping privileges
# export HADOOP_PRIVILEGED_NFS_USER=nfsserver
###
# ZKFailoverController specific parameters
###
# Specify the JVM options to be used when starting the ZKFailoverController.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_ZKFC_OPTS=""
###
# QuorumJournalNode specific parameters
###
# Specify the JVM options to be used when starting the QuorumJournalNode.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_JOURNALNODE_OPTS=""
###
# HDFS Balancer specific parameters
###
# Specify the JVM options to be used when starting the HDFS Balancer.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_BALANCER_OPTS=""
###
# Advanced Users Only!
###
#
# When building Hadoop, you can add the class paths to your commands
# via this special env var:
# HADOOP_ENABLE_BUILD_PATHS="true"
# You can do things like replace parts of the shell underbelly.
# Most of this code is in hadoop-functions.sh.
#
#
# For example, if you want to add compression to the rotation
# menthod for the .out files that daemons generate, you can do
# that by redefining the hadoop_rotate_log function by
# uncommenting this code block:
#function hadoop_rotate_log
#{
# #
# # log rotation (mainly used for .out files)
# # Users are likely to replace this one for something
# # that gzips or uses dates or who knows what.
# #
# # be aware that &1 and &2 might go through here
# # so don't do anything too crazy...
# #
# local log=$1;
# local num=${2:-5};
#
# if [[ -f "${log}" ]]; then # rotate logs
# while [[ ${num} -gt 1 ]]; do
# #shellcheck disable=SC2086
# let prev=${num}-1
# if [[ -f "${log}.${prev}" ]]; then
# mv "${log}.${prev}" "${log}.${num}"
# fi
# num=${prev}
# done
# mv "${log}" "${log}.${num}"
# gzip -9 "${log}.${num}"
# fi
#}
#
#
# Another example: finding java
#
# By default, Hadoop assumes that $JAVA_HOME is always defined
# outside of its configuration. Eons ago, Apple standardized
# on a helper program called java_home to find it for you.
#
#function hadoop_java_setup
#{
#
# if [[ -z "${JAVA_HOME}" ]]; then
# case $HADOOP_OS_TYPE in
# Darwin*)
# JAVA_HOME=$(/usr/libexec/java_home)
# ;;
# esac
# fi
#
# # Bail if we did not detect it
# if [[ -z "${JAVA_HOME}" ]]; then
# echo "ERROR: JAVA_HOME is not set and could not be found." 1>&2
# exit 1
# fi
#
# if [[ ! -d "${JAVA_HOME}" ]]; then
# echo "ERROR: JAVA_HOME (${JAVA_HOME}) does not exist." 1>&2
# exit 1
# fi
#
# JAVA="${JAVA_HOME}/bin/java"
#
# if [[ ! -x ${JAVA} ]]; then
# echo "ERROR: ${JAVA} is not executable." 1>&2
# exit 1
# fi
# JAVA_HEAP_MAX=-Xmx1g
# HADOOP_HEAPSIZE=${HADOOP_HEAPSIZE:-128}
#
# # check envvars which might override default args
# if [[ -n "$HADOOP_HEAPSIZE" ]]; then
# JAVA_HEAP_MAX="-Xmx${HADOOP_HEAPSIZE}m"
# fi
#}

View File

@ -110,8 +110,9 @@ import com.google.common.base.Preconditions;
*
* <p>Unless explicitly turned off, Hadoop by default specifies two
* resources, loaded in-order from the classpath: <ol>
* <li><tt><a href="{@docRoot}/../core-default.html">core-default.xml</a>
* </tt>: Read-only defaults for hadoop.</li>
* <li><tt>
* <a href="{@docRoot}/../hadoop-project-dist/hadoop-common/core-default.xml">
* core-default.xml</a></tt>: Read-only defaults for hadoop.</li>
* <li><tt>core-site.xml</tt>: Site-specific configuration for a given hadoop
* installation.</li>
* </ol>

View File

@ -129,7 +129,7 @@ Native Libraries Guide
library:
----
$ mvn package -Pdist,native -Dskiptests -Dtar
$ mvn package -Pdist,native -DskipTests -Dtar
----
You should see the newly-built library in:

View File

@ -402,6 +402,9 @@ Release 2.6.0 - UNRELEASED
HDFS-6850. Move NFS out of order write unit tests into TestWrites class.
(Zhe Zhang via atm)
HDFS-6188. An ip whitelist based implementation of TrustedChannelResolver.
(Benoy Antony via Arpit Agarwal)
OPTIMIZATIONS
HDFS-6690. Deduplicate xattr names in memory. (wang)
@ -514,6 +517,9 @@ Release 2.6.0 - UNRELEASED
HDFS-6825. Edit log corruption due to delayed block removal.
(Yongjun Zhang via wang)
HDFS-6569. OOB message can't be sent to the client when DataNode shuts down for upgrade
(brandonli)
Release 2.5.0 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -57,9 +57,9 @@ excludeFilenameRemote=$("$HADOOP_PREFIX/bin/hdfs" getconf -excludeFile)
if [ "$excludeFilenameRemote" = '' ] ; then
echo \
"Error: hdfs getconf -excludeFile returned empty string, " \
"please setup dfs.hosts.exclude in hdfs-site.xml in local cluster " \
"configuration and on all namenodes"
"Error: hdfs getconf -excludeFile returned empty string, " \
"please setup dfs.hosts.exclude in hdfs-site.xml in local cluster " \
"configuration and on all namenodes"
exit 1
fi

View File

@ -15,253 +15,241 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Environment Variables
#
# JSVC_HOME home directory of jsvc binary. Required for starting secure
# datanode.
#
# JSVC_OUTFILE path to jsvc output file. Defaults to
# $HADOOP_LOG_DIR/jsvc.out.
#
# JSVC_ERRFILE path to jsvc error file. Defaults to $HADOOP_LOG_DIR/jsvc.err.
bin=`which $0`
bin=`dirname ${bin}`
bin=`cd "$bin" > /dev/null; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
function print_usage(){
echo "Usage: hdfs [--config confdir] COMMAND"
function hadoop_usage
{
echo "Usage: hdfs [--config confdir] [--daemon (start|stop|status)] COMMAND"
echo " where COMMAND is one of:"
echo " dfs run a filesystem command on the file systems supported in Hadoop."
echo " namenode -format format the DFS filesystem"
echo " secondarynamenode run the DFS secondary namenode"
echo " namenode run the DFS namenode"
echo " journalnode run the DFS journalnode"
echo " zkfc run the ZK Failover Controller daemon"
echo " datanode run a DFS datanode"
echo " dfsadmin run a DFS admin client"
echo " haadmin run a DFS HA admin client"
echo " fsck run a DFS filesystem checking utility"
echo " balancer run a cluster balancing utility"
echo " jmxget get JMX exported values from NameNode or DataNode."
echo " oiv apply the offline fsimage viewer to an fsimage"
echo " oiv_legacy apply the offline fsimage viewer to an legacy fsimage"
echo " oev apply the offline edits viewer to an edits file"
echo " cacheadmin configure the HDFS cache"
echo " classpath prints the class path needed to get the"
echo " Hadoop jar and the required libraries"
echo " datanode run a DFS datanode"
echo " dfs run a filesystem command on the file system"
echo " dfsadmin run a DFS admin client"
echo " fetchdt fetch a delegation token from the NameNode"
echo " fsck run a DFS filesystem checking utility"
echo " getconf get config values from configuration"
echo " groups get the groups which users belong to"
echo " haadmin run a DFS HA admin client"
echo " jmxget get JMX exported values from NameNode or DataNode."
echo " journalnode run the DFS journalnode"
echo " lsSnapshottableDir list all snapshottable dirs owned by the current user"
echo " Use -help to see options"
echo " namenode run the DFS namenode"
echo " Use -format to initialize the DFS filesystem"
echo " nfs3 run an NFS version 3 gateway"
echo " oev apply the offline edits viewer to an edits file"
echo " oiv apply the offline fsimage viewer to an fsimage"
echo " oiv_legacy apply the offline fsimage viewer to a legacy fsimage"
echo " portmap run a portmap service"
echo " secondarynamenode run the DFS secondary namenode"
echo " snapshotDiff diff two snapshots of a directory or diff the"
echo " current directory contents with a snapshot"
echo " lsSnapshottableDir list all snapshottable dirs owned by the current user"
echo " Use -help to see options"
echo " portmap run a portmap service"
echo " nfs3 run an NFS version 3 gateway"
echo " cacheadmin configure the HDFS cache"
echo " zkfc run the ZK Failover Controller daemon"
echo " crypto configure HDFS encryption zones"
echo ""
echo "Most commands print help when invoked w/o parameters."
}
if [ $# = 0 ]; then
print_usage
exit
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
exit 1
fi
if [[ $# = 0 ]]; then
hadoop_exit_with_usage 1
fi
COMMAND=$1
shift
case $COMMAND in
# usage flags
--help|-help|-h)
print_usage
case ${COMMAND} in
balancer)
CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_BALANCER_OPTS}"
;;
cacheadmin)
CLASS=org.apache.hadoop.hdfs.tools.CacheAdmin
;;
classpath)
hadoop_finalize
echo "${CLASSPATH}"
exit
;;
;;
crypto)
CLASS=org.apache.hadoop.hdfs.tools.CryptoAdmin
;;
datanode)
daemon="true"
# Determine if we're starting a secure datanode, and
# if so, redefine appropriate variables
if [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then
secure_service="true"
secure_user="${HADOOP_SECURE_DN_USER}"
# backward compatiblity
HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_DN_PID_DIR}"
HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_DN_LOG_DIR}"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_DN_SECURE_EXTRA_OPTS} ${HADOOP_DATANODE_OPTS}"
CLASS="org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter"
else
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_DATANODE_OPTS}"
CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode'
fi
;;
dfs)
CLASS=org.apache.hadoop.fs.FsShell
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
dfsadmin)
CLASS=org.apache.hadoop.hdfs.tools.DFSAdmin
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
fetchdt)
CLASS=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher
;;
fsck)
CLASS=org.apache.hadoop.hdfs.tools.DFSck
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
getconf)
CLASS=org.apache.hadoop.hdfs.tools.GetConf
;;
groups)
CLASS=org.apache.hadoop.hdfs.tools.GetGroups
;;
haadmin)
CLASS=org.apache.hadoop.hdfs.tools.DFSHAAdmin
CLASSPATH="${CLASSPATH}:${TOOL_PATH}"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
journalnode)
daemon="true"
CLASS='org.apache.hadoop.hdfs.qjournal.server.JournalNode'
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_JOURNALNODE_OPTS}"
;;
jmxget)
CLASS=org.apache.hadoop.hdfs.tools.JMXGet
;;
lsSnapshottableDir)
CLASS=org.apache.hadoop.hdfs.tools.snapshot.LsSnapshottableDir
;;
namenode)
daemon="true"
CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode'
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NAMENODE_OPTS}"
;;
nfs3)
daemon="true"
if [[ -n "${HADOOP_PRIVILEGED_NFS_USER}" ]]; then
secure_service="true"
secure_user="${HADOOP_PRIVILEGED_NFS_USER}"
# backward compatiblity
HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_NFS3_PID_DIR}"
HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_NFS3_LOG_DIR}"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NFS3_SECURE_EXTRA_OPTS} ${HADOOP_NFS3_OPTS}"
CLASS=org.apache.hadoop.hdfs.nfs.nfs3.PrivilegedNfsGatewayStarter
else
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NFS3_OPTS}"
CLASS=org.apache.hadoop.hdfs.nfs.nfs3.Nfs3
fi
;;
oev)
CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
;;
oiv)
CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB
;;
oiv_legacy)
CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
;;
portmap)
daemon="true"
CLASS=org.apache.hadoop.portmap.Portmap
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_PORTMAP_OPTS}"
;;
secondarynamenode)
daemon="true"
CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode'
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_SECONDARYNAMENODE_OPTS}"
;;
snapshotDiff)
CLASS=org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff
;;
zkfc)
daemon="true"
CLASS='org.apache.hadoop.hdfs.tools.DFSZKFailoverController'
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_ZKFC_OPTS}"
;;
-*)
hadoop_exit_with_usage 1
;;
*)
CLASS="${COMMAND}"
;;
esac
# Determine if we're starting a secure datanode, and if so, redefine appropriate variables
if [ "$COMMAND" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then
if [ -n "$JSVC_HOME" ]; then
if [ -n "$HADOOP_SECURE_DN_PID_DIR" ]; then
HADOOP_PID_DIR=$HADOOP_SECURE_DN_PID_DIR
fi
if [ -n "$HADOOP_SECURE_DN_LOG_DIR" ]; then
HADOOP_LOG_DIR=$HADOOP_SECURE_DN_LOG_DIR
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
fi
HADOOP_IDENT_STRING=$HADOOP_SECURE_DN_USER
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
starting_secure_dn="true"
else
echo "It looks like you're trying to start a secure DN, but \$JSVC_HOME"\
"isn't set. Falling back to starting insecure DN."
if [[ -n "${secure_service}" ]]; then
HADOOP_SECURE_USER="${secure_user}"
if hadoop_verify_secure_prereq; then
hadoop_setup_secure_service
priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${COMMAND-$HOSTNAME}.out"
priv_errfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${COMMAND-$HOSTNAME}.err"
priv_pidfile="${HADOOP_PID_DIR}/privileged-${HADOOP_IDENT_STRING}-${COMMAND-$HOSTNAME}.pid"
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
fi
fi
# Determine if we're starting a privileged NFS daemon, and if so, redefine appropriate variables
if [ "$COMMAND" == "nfs3" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_PRIVILEGED_NFS_USER" ]; then
if [ -n "$JSVC_HOME" ]; then
if [ -n "$HADOOP_PRIVILEGED_NFS_PID_DIR" ]; then
HADOOP_PID_DIR=$HADOOP_PRIVILEGED_NFS_PID_DIR
fi
if [ -n "$HADOOP_PRIVILEGED_NFS_LOG_DIR" ]; then
HADOOP_LOG_DIR=$HADOOP_PRIVILEGED_NFS_LOG_DIR
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
fi
HADOOP_IDENT_STRING=$HADOOP_PRIVILEGED_NFS_USER
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
starting_privileged_nfs="true"
else
echo "It looks like you're trying to start a privileged NFS server, but"\
"\$JSVC_HOME isn't set. Falling back to starting unprivileged NFS server."
fi
fi
if [ "$COMMAND" = "namenode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode'
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NAMENODE_OPTS"
elif [ "$COMMAND" = "zkfc" ] ; then
CLASS='org.apache.hadoop.hdfs.tools.DFSZKFailoverController'
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_ZKFC_OPTS"
elif [ "$COMMAND" = "secondarynamenode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode'
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_SECONDARYNAMENODE_OPTS"
elif [ "$COMMAND" = "datanode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode'
if [ "$starting_secure_dn" = "true" ]; then
HADOOP_OPTS="$HADOOP_OPTS -jvm server $HADOOP_DATANODE_OPTS"
else
HADOOP_OPTS="$HADOOP_OPTS -server $HADOOP_DATANODE_OPTS"
fi
elif [ "$COMMAND" = "journalnode" ] ; then
CLASS='org.apache.hadoop.hdfs.qjournal.server.JournalNode'
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_JOURNALNODE_OPTS"
elif [ "$COMMAND" = "dfs" ] ; then
CLASS=org.apache.hadoop.fs.FsShell
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "dfsadmin" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.DFSAdmin
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "haadmin" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.DFSHAAdmin
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "fsck" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.DFSck
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "balancer" ] ; then
CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_BALANCER_OPTS"
elif [ "$COMMAND" = "jmxget" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.JMXGet
elif [ "$COMMAND" = "oiv" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB
elif [ "$COMMAND" = "oiv_legacy" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
elif [ "$COMMAND" = "oev" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
elif [ "$COMMAND" = "fetchdt" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher
elif [ "$COMMAND" = "getconf" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.GetConf
elif [ "$COMMAND" = "groups" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.GetGroups
elif [ "$COMMAND" = "snapshotDiff" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff
elif [ "$COMMAND" = "lsSnapshottableDir" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.snapshot.LsSnapshottableDir
elif [ "$COMMAND" = "portmap" ] ; then
CLASS=org.apache.hadoop.portmap.Portmap
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_PORTMAP_OPTS"
elif [ "$COMMAND" = "nfs3" ] ; then
CLASS=org.apache.hadoop.hdfs.nfs.nfs3.Nfs3
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NFS3_OPTS"
elif [ "$COMMAND" = "cacheadmin" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.CacheAdmin
elif [ "$COMMAND" = "crypto" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.CryptoAdmin
else
CLASS="$COMMAND"
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
fi
export CLASSPATH=$CLASSPATH
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"
# Check to see if we should start a secure datanode
if [ "$starting_secure_dn" = "true" ]; then
if [ "$HADOOP_PID_DIR" = "" ]; then
HADOOP_SECURE_DN_PID="/tmp/hadoop_secure_dn.pid"
if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
# shellcheck disable=SC2034
HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
if [[ -n "${secure_service}" ]]; then
# shellcheck disable=SC2034
HADOOP_LOGFILE="hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
else
HADOOP_SECURE_DN_PID="$HADOOP_PID_DIR/hadoop_secure_dn.pid"
# shellcheck disable=SC2034
HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
fi
fi
JSVC=$JSVC_HOME/jsvc
if [ ! -f $JSVC ]; then
echo "JSVC_HOME is not set correctly so jsvc cannot be found. jsvc is required to run secure datanodes. "
echo "Please download and install jsvc from http://archive.apache.org/dist/commons/daemon/binaries/ "\
"and set JSVC_HOME to the directory containing the jsvc binary."
exit
fi
hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}"
hadoop_finalize
if [[ ! $JSVC_OUTFILE ]]; then
JSVC_OUTFILE="$HADOOP_LOG_DIR/jsvc.out"
fi
export CLASSPATH
if [[ ! $JSVC_ERRFILE ]]; then
JSVC_ERRFILE="$HADOOP_LOG_DIR/jsvc.err"
fi
exec "$JSVC" \
-Dproc_$COMMAND -outfile "$JSVC_OUTFILE" \
-errfile "$JSVC_ERRFILE" \
-pidfile "$HADOOP_SECURE_DN_PID" \
-nodetach \
-user "$HADOOP_SECURE_DN_USER" \
-cp "$CLASSPATH" \
$JAVA_HEAP_MAX $HADOOP_OPTS \
org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter "$@"
elif [ "$starting_privileged_nfs" = "true" ] ; then
if [ "$HADOOP_PID_DIR" = "" ]; then
HADOOP_PRIVILEGED_NFS_PID="/tmp/hadoop_privileged_nfs3.pid"
if [[ -n "${daemon}" ]]; then
if [[ -n "${secure_service}" ]]; then
hadoop_secure_daemon_handler \
"${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}"\
"${daemon_pidfile}" "${daemon_outfile}" \
"${priv_pidfile}" "${priv_outfile}" "${priv_errfile}" "$@"
else
HADOOP_PRIVILEGED_NFS_PID="$HADOOP_PID_DIR/hadoop_privileged_nfs3.pid"
hadoop_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}"\
"${daemon_pidfile}" "${daemon_outfile}" "$@"
fi
JSVC=$JSVC_HOME/jsvc
if [ ! -f $JSVC ]; then
echo "JSVC_HOME is not set correctly so jsvc cannot be found. jsvc is required to run privileged NFS gateways. "
echo "Please download and install jsvc from http://archive.apache.org/dist/commons/daemon/binaries/ "\
"and set JSVC_HOME to the directory containing the jsvc binary."
exit
fi
if [[ ! $JSVC_OUTFILE ]]; then
JSVC_OUTFILE="$HADOOP_LOG_DIR/nfs3_jsvc.out"
fi
if [[ ! $JSVC_ERRFILE ]]; then
JSVC_ERRFILE="$HADOOP_LOG_DIR/nfs3_jsvc.err"
fi
exec "$JSVC" \
-Dproc_$COMMAND -outfile "$JSVC_OUTFILE" \
-errfile "$JSVC_ERRFILE" \
-pidfile "$HADOOP_PRIVILEGED_NFS_PID" \
-nodetach \
-user "$HADOOP_PRIVILEGED_NFS_USER" \
-cp "$CLASSPATH" \
$JAVA_HEAP_MAX $HADOOP_OPTS \
org.apache.hadoop.hdfs.nfs.nfs3.PrivilegedNfsGatewayStarter "$@"
exit $?
else
# run it
exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
# shellcheck disable=SC2086
hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"
fi

View File

@ -18,19 +18,67 @@
# included in all the hdfs scripts with source command
# should not be executed directly
bin=`which "$0"`
bin=`dirname "${bin}"`
bin=`cd "$bin"; pwd`
function hadoop_subproject_init
{
if [ -e "${HADOOP_CONF_DIR}/hdfs-env.sh" ]; then
. "${HADOOP_CONF_DIR}/hdfs-env.sh"
fi
# at some point in time, someone thought it would be a good idea to
# create separate vars for every subproject. *sigh*
# let's perform some overrides and setup some defaults for bw compat
# this way the common hadoop var's == subproject vars and can be
# used interchangeable from here on out
# ...
# this should get deprecated at some point.
HADOOP_LOG_DIR="${HADOOP_HDFS_LOG_DIR:-$HADOOP_LOG_DIR}"
HADOOP_HDFS_LOG_DIR="${HADOOP_LOG_DIR}"
HADOOP_LOGFILE="${HADOOP_HDFS_LOGFILE:-$HADOOP_LOGFILE}"
HADOOP_HDFS_LOGFILE="${HADOOP_LOGFILE}"
HADOOP_NICENESS=${HADOOP_HDFS_NICENESS:-$HADOOP_NICENESS}
HADOOP_HDFS_NICENESS="${HADOOP_NICENESS}"
HADOOP_STOP_TIMEOUT=${HADOOP_HDFS_STOP_TIMEOUT:-$HADOOP_STOP_TIMEOUT}
HADOOP_HDFS_STOP_TIMEOUT="${HADOOP_STOP_TIMEOUT}"
HADOOP_PID_DIR="${HADOOP_HDFS_PID_DIR:-$HADOOP_PID_DIR}"
HADOOP_HDFS_PID_DIR="${HADOOP_PID_DIR}"
HADOOP_ROOT_LOGGER=${HADOOP_HDFS_ROOT_LOGGER:-$HADOOP_ROOT_LOGGER}
HADOOP_HDFS_ROOT_LOGGER="${HADOOP_ROOT_LOGGER}"
HADOOP_HDFS_HOME="${HADOOP_HDFS_HOME:-$HADOOP_HOME_DIR}"
HADOOP_IDENT_STRING="${HADOOP_HDFS_IDENT_STRING:-$HADOOP_IDENT_STRING}"
HADOOP_HDFS_IDENT_STRING="${HADOOP_IDENT_STRING}"
# turn on the defaults
export HADOOP_NAMENODE_OPTS=${HADOOP_NAMENODE_OPTS:-"-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender"}
export HADOOP_SECONDARYNAMENODE_OPTS=${HADOOP_SECONDARYNAMENODE_OPTS:-"-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender"}
export HADOOP_DATANODE_OPTS=${HADOOP_DATANODE_OPTS:-"-Dhadoop.security.logger=ERROR,RFAS"}
export HADOOP_DN_SECURE_EXTRA_OPTS=${HADOOP_DN_SECURE_EXTRA_OPTS:-"-jvm server"}
export HADOOP_NFS3_SECURE_EXTRA_OPTS=${HADOOP_NFS3_SECURE_EXTRA_OPTS:-"-jvm server"}
export HADOOP_PORTMAP_OPTS=${HADOOP_PORTMAP_OPTS:-"-Xmx512m"}
}
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then
. ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh
elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then
. "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh
elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then
. "$HADOOP_HOME"/libexec/hadoop-config.sh
else
echo "Hadoop common not found."
exit
if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
_hd_this="${BASH_SOURCE-$0}"
HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_hd_this}")" >/dev/null && pwd -P)
fi
if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then
. "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh"
elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then
. "${HADOOP_HOME}/libexec/hadoop-config.sh"
else
echo "ERROR: Hadoop common not found." 2>&1
exit 1
fi

View File

@ -20,24 +20,40 @@
# This script refreshes all namenodes, it's a simple wrapper
# for dfsadmin to support multiple namenodes.
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
namenodes=$("$HADOOP_PREFIX/bin/hdfs" getconf -nnRpcAddresses)
if [ "$?" != '0' ] ; then errorFlag='1' ;
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
for namenode in $namenodes ; do
echo "Refreshing namenode [$namenode]"
"$HADOOP_PREFIX/bin/hdfs" dfsadmin -fs hdfs://$namenode -refreshNodes
if [ "$?" != '0' ] ; then errorFlag='1' ; fi
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
exit 1
fi
namenodes=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -nnRpcAddresses)
if [[ "$?" != '0' ]] ; then
errorFlag='1' ;
else
for namenode in ${namenodes} ; do
echo "Refreshing namenode [${namenode}]"
"${HADOOP_HDFS_HOME}/bin/hdfs" dfsadmin \
-fs hdfs://${namenode} -refreshNodes
if [[ "$?" != '0' ]]; then
errorFlag='1'
fi
done
fi
if [ "$errorFlag" = '1' ] ; then
if [[ "${errorFlag}" = '1' ]] ; then
echo "Error: refresh of namenodes failed, see error messages above."
exit 1
else

View File

@ -15,13 +15,31 @@
# See the License for the specific language governing permissions and
# limitations under the License.
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
function usage
{
echo "Usage: start-balancer.sh [--config confdir] [-policy <policy>] [-threshold <threshold>]"
}
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
exit 1
fi
# Start balancer daemon.
"$HADOOP_PREFIX"/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs start balancer $@
exec "${bin}/hadoop-daemon.sh" --config "${HADOOP_CONF_DIR}" start balancer "$@"

View File

@ -20,98 +20,128 @@
# Optinally upgrade or rollback dfs state.
# Run this on master node.
usage="Usage: start-dfs.sh [-upgrade|-rollback] [other options such as -clusterId]"
function hadoop_usage
{
echo "Usage: start-dfs.sh [-upgrade|-rollback] [-clusterId]"
}
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
exit 1
fi
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
# get arguments
if [ $# -ge 1 ]; then
nameStartOpt="$1"
shift
case "$nameStartOpt" in
(-upgrade)
;;
(-rollback)
dataStartOpt="$nameStartOpt"
;;
(*)
echo $usage
exit 1
;;
esac
if [[ $# -ge 1 ]]; then
nameStartOpt="$1"
shift
case "$nameStartOpt" in
-upgrade)
;;
-rollback)
dataStartOpt="$nameStartOpt"
;;
*)
hadoop_exit_with_usage 1
;;
esac
fi
#Add other possible options
nameStartOpt="$nameStartOpt $@"
#---------------------------------------------------------
# namenodes
NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -namenodes)
NAMENODES=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -namenodes 2>/dev/null)
if [[ -z "${NAMENODES}" ]]; then
NAMENODES=$(hostname)
fi
echo "Starting namenodes on [$NAMENODES]"
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \
--hostnames "$NAMENODES" \
--script "$bin/hdfs" start namenode $nameStartOpt
"${bin}/hadoop-daemons.sh" \
--config "${HADOOP_CONF_DIR}" \
--hostnames "${NAMENODES}" \
start namenode ${nameStartOpt}
#---------------------------------------------------------
# datanodes (using default slaves file)
if [ -n "$HADOOP_SECURE_DN_USER" ]; then
echo \
"Attempting to start secure cluster, skipping datanodes. " \
"Run start-secure-dns.sh as root to complete startup."
if [[ -n "${HADOOP_SECURE_DN_USER}" ]] &&
[[ -z "${HADOOP_SECURE_COMMAND}" ]]; then
echo "ERROR: Attempting to start secure cluster, skipping datanodes. "
echo "Run start-secure-dns.sh as root or configure "
echo "\${HADOOP_SECURE_COMMAND} to complete startup."
else
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \
--script "$bin/hdfs" start datanode $dataStartOpt
echo "Starting datanodes"
"${bin}/hadoop-daemons.sh" \
--config "${HADOOP_CONF_DIR}" \
start datanode ${dataStartOpt}
fi
#---------------------------------------------------------
# secondary namenodes (if any)
SECONDARY_NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -secondarynamenodes 2>/dev/null)
SECONDARY_NAMENODES=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -secondarynamenodes 2>/dev/null)
if [ -n "$SECONDARY_NAMENODES" ]; then
echo "Starting secondary namenodes [$SECONDARY_NAMENODES]"
if [[ "${SECONDARY_NAMENODES}" == "0.0.0.0" ]]; then
SECONDARY_NAMENODES=$(hostname)
fi
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \
--hostnames "$SECONDARY_NAMENODES" \
--script "$bin/hdfs" start secondarynamenode
if [[ -n "${SECONDARY_NAMENODES}" ]]; then
echo "Starting secondary namenodes [${SECONDARY_NAMENODES}]"
"${bin}/hadoop-daemons.sh" \
--config "${HADOOP_CONF_DIR}" \
--hostnames "${SECONDARY_NAMENODES}" \
start secondarynamenode
fi
#---------------------------------------------------------
# quorumjournal nodes (if any)
SHARED_EDITS_DIR=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.namenode.shared.edits.dir 2>&-)
SHARED_EDITS_DIR=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey dfs.namenode.shared.edits.dir 2>&-)
case "$SHARED_EDITS_DIR" in
qjournal://*)
JOURNAL_NODES=$(echo "$SHARED_EDITS_DIR" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g')
echo "Starting journal nodes [$JOURNAL_NODES]"
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \
--hostnames "$JOURNAL_NODES" \
--script "$bin/hdfs" start journalnode ;;
case "${SHARED_EDITS_DIR}" in
qjournal://*)
JOURNAL_NODES=$(echo "${SHARED_EDITS_DIR}" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g')
echo "Starting journal nodes [${JOURNAL_NODES}]"
"${bin}/hadoop-daemons.sh" \
--config "${HADOOP_CONF_DIR}" \
--hostnames "${JOURNAL_NODES}" \
start journalnode
;;
esac
#---------------------------------------------------------
# ZK Failover controllers, if auto-HA is enabled
AUTOHA_ENABLED=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.ha.automatic-failover.enabled)
if [ "$(echo "$AUTOHA_ENABLED" | tr A-Z a-z)" = "true" ]; then
echo "Starting ZK Failover Controllers on NN hosts [$NAMENODES]"
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \
--hostnames "$NAMENODES" \
--script "$bin/hdfs" start zkfc
AUTOHA_ENABLED=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey dfs.ha.automatic-failover.enabled | tr '[:upper:]' '[:lower:]')
if [[ "${AUTOHA_ENABLED}" = "true" ]]; then
echo "Starting ZK Failover Controllers on NN hosts [${NAMENODES}]"
"${bin}/hadoop-daemons.sh" \
--config "${HADOOP_CONF_DIR}" \
--hostnames "${NAMENODES}" \
start zkfc
fi
# eof

View File

@ -17,17 +17,33 @@
# Run as root to start secure datanodes in a security-enabled cluster.
usage="Usage (run as root in order to start secure datanodes): start-secure-dns.sh"
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
function hadoop_usage {
echo "Usage: start-secure-dns.sh"
}
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
if [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then
"$HADOOP_PREFIX"/sbin/hadoop-daemons.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs start datanode $dataStartOpt
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
echo $usage
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
exit 1
fi
if [[ "${EUID}" -eq 0 ]] && [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then
exec "${bin}/hadoop-daemons.sh" --config "${HADOOP_CONF_DIR}" start datanode "${dataStartOpt}"
else
echo hadoop_usage_and_exit 1
fi

View File

@ -15,14 +15,32 @@
# See the License for the specific language governing permissions and
# limitations under the License.
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
function hadoop_usage
{
echo "Usage: stop-balancer.sh [--config confdir]"
}
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
exit 1
fi
# Stop balancer daemon.
# Run this on the machine where the balancer is running
"$HADOOP_PREFIX"/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs stop balancer
"${bin}/hadoop-daemon.sh" --config "${HADOOP_CONF_DIR}" stop balancer

View File

@ -15,75 +15,100 @@
# See the License for the specific language governing permissions and
# limitations under the License.
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
function hadoop_usage
{
echo "Usage: start-balancer.sh [--config confdir] [-policy <policy>] [-threshold <threshold>]"
}
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
exit 1
fi
#---------------------------------------------------------
# namenodes
NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -namenodes)
NAMENODES=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -namenodes)
echo "Stopping namenodes on [$NAMENODES]"
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \
--hostnames "$NAMENODES" \
--script "$bin/hdfs" stop namenode
"${bin}/hadoop-daemons.sh" \
--config "${HADOOP_CONF_DIR}" \
--hostnames "${NAMENODES}" \
stop namenode
#---------------------------------------------------------
# datanodes (using default slaves file)
if [ -n "$HADOOP_SECURE_DN_USER" ]; then
if [[ -n "${HADOOP_SECURE_DN_USER}" ]] &&
[[ -z "${HADOOP_SECURE_COMMAND}" ]]; then
echo \
"Attempting to stop secure cluster, skipping datanodes. " \
"Run stop-secure-dns.sh as root to complete shutdown."
"ERROR: Attempting to stop secure cluster, skipping datanodes. " \
"Run stop-secure-dns.sh as root to complete shutdown."
else
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \
--script "$bin/hdfs" stop datanode
echo "Stopping datanodes"
"${bin}/hadoop-daemons.sh" --config "${HADOOP_CONF_DIR}" stop datanode
fi
#---------------------------------------------------------
# secondary namenodes (if any)
SECONDARY_NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -secondarynamenodes 2>/dev/null)
SECONDARY_NAMENODES=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -secondarynamenodes 2>/dev/null)
if [ -n "$SECONDARY_NAMENODES" ]; then
echo "Stopping secondary namenodes [$SECONDARY_NAMENODES]"
if [[ "${SECONDARY_NAMENODES}" == "0.0.0.0" ]]; then
SECONDARY_NAMENODES=$(hostname)
fi
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \
--hostnames "$SECONDARY_NAMENODES" \
--script "$bin/hdfs" stop secondarynamenode
if [[ -n "${SECONDARY_NAMENODES}" ]]; then
echo "Stopping secondary namenodes [${SECONDARY_NAMENODES}]"
"${bin}/hadoop-daemons.sh" \
--config "${HADOOP_CONF_DIR}" \
--hostnames "${SECONDARY_NAMENODES}" \
stop secondarynamenode
fi
#---------------------------------------------------------
# quorumjournal nodes (if any)
SHARED_EDITS_DIR=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.namenode.shared.edits.dir 2>&-)
SHARED_EDITS_DIR=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey dfs.namenode.shared.edits.dir 2>&-)
case "$SHARED_EDITS_DIR" in
qjournal://*)
JOURNAL_NODES=$(echo "$SHARED_EDITS_DIR" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g')
echo "Stopping journal nodes [$JOURNAL_NODES]"
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \
--hostnames "$JOURNAL_NODES" \
--script "$bin/hdfs" stop journalnode ;;
case "${SHARED_EDITS_DIR}" in
qjournal://*)
JOURNAL_NODES=$(echo "${SHARED_EDITS_DIR}" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g')
echo "Stopping journal nodes [${JOURNAL_NODES}]"
"${bin}/hadoop-daemons.sh" \
--config "${HADOOP_CONF_DIR}" \
--hostnames "${JOURNAL_NODES}" \
stop journalnode
;;
esac
#---------------------------------------------------------
# ZK Failover controllers, if auto-HA is enabled
AUTOHA_ENABLED=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.ha.automatic-failover.enabled)
if [ "$(echo "$AUTOHA_ENABLED" | tr A-Z a-z)" = "true" ]; then
echo "Stopping ZK Failover Controllers on NN hosts [$NAMENODES]"
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \
--hostnames "$NAMENODES" \
--script "$bin/hdfs" stop zkfc
AUTOHA_ENABLED=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey dfs.ha.automatic-failover.enabled | tr '[:upper:]' '[:lower:]')
if [[ "${AUTOHA_ENABLED}" = "true" ]]; then
echo "Stopping ZK Failover Controllers on NN hosts [${NAMENODES}]"
"${bin}/hadoop-daemons.sh" \
--config "${HADOOP_CONF_DIR}" \
--hostnames "${NAMENODES}" \
stop zkfc
fi
# eof

View File

@ -17,17 +17,33 @@
# Run as root to start secure datanodes in a security-enabled cluster.
usage="Usage (run as root in order to stop secure datanodes): stop-secure-dns.sh"
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
function hadoop_usage {
echo "Usage (run as root in order to stop secure datanodes): stop-secure-dns.sh"
}
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
if [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then
"$HADOOP_PREFIX"/sbin/hadoop-daemons.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs stop datanode
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
echo $usage
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
exit 1
fi
if [[ "${EUID}" -eq 0 ]] && [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then
"${bin}/hadoop-daemons.sh" --config "${HADOOP_CONF_DIR}" stop datanode
else
hadoop_exit_with_usage 1
fi

View File

@ -0,0 +1,119 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.protocol.datatransfer;
import java.net.InetAddress;
import java.net.UnknownHostException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.protocol.datatransfer.TrustedChannelResolver;
import org.apache.hadoop.util.CombinedIPWhiteList;
public class WhitelistBasedTrustedChannelResolver extends TrustedChannelResolver {
private CombinedIPWhiteList whiteListForServer;
private CombinedIPWhiteList whitelistForClient;
private static final String FIXEDWHITELIST_DEFAULT_LOCATION = "/etc/hadoop/fixedwhitelist";
private static final String VARIABLEWHITELIST_DEFAULT_LOCATION = "/etc/hadoop/whitelist";
/**
* Path to the file to containing subnets and ip addresses to form fixed whitelist.
*/
public static final String DFS_DATATRANSFER_SERVER_FIXEDWHITELIST_FILE =
"dfs.datatransfer.server.fixedwhitelist.file";
/**
* Enables/Disables variable whitelist
*/
public static final String DFS_DATATRANSFER_SERVER_VARIABLEWHITELIST_ENABLE =
"dfs.datatransfer.server.variablewhitelist.enable";
/**
* Path to the file to containing subnets and ip addresses to form variable whitelist.
*/
public static final String DFS_DATATRANSFER_SERVER_VARIABLEWHITELIST_FILE =
"dfs.datatransfer.server.variablewhitelist.file";
/**
* time in seconds by which the variable whitelist file is checked for updates
*/
public static final String DFS_DATATRANSFER_SERVER_VARIABLEWHITELIST_CACHE_SECS =
"dfs.datatransfer.server.variablewhitelist.cache.secs";
/**
* Path to the file to containing subnets and ip addresses to form fixed whitelist.
*/
public static final String DFS_DATATRANSFER_CLIENT_FIXEDWHITELIST_FILE =
"dfs.datatransfer.client.fixedwhitelist.file";
/**
* Enables/Disables variable whitelist
*/
public static final String DFS_DATATRANSFER_CLIENT_VARIABLEWHITELIST_ENABLE =
"dfs.datatransfer.client.variablewhitelist.enable";
/**
* Path to the file to containing subnets and ip addresses to form variable whitelist.
*/
public static final String DFS_DATATRANSFER_CLIENT_VARIABLEWHITELIST_FILE =
"dfs.datatransfer.client.variablewhitelist.file";
/**
* time in seconds by which the variable whitelist file is checked for updates
*/
public static final String DFS_DATATRANSFER_CLIENT_VARIABLEWHITELIST_CACHE_SECS =
"dfs.datatransfer.client.variablewhitelist.cache.secs";
@Override
public void setConf(Configuration conf) {
super.setConf(conf);
String fixedFile = conf.get(DFS_DATATRANSFER_SERVER_FIXEDWHITELIST_FILE,
FIXEDWHITELIST_DEFAULT_LOCATION);
String variableFile = null;
long expiryTime = 0;
if (conf.getBoolean(DFS_DATATRANSFER_SERVER_VARIABLEWHITELIST_ENABLE, false)) {
variableFile = conf.get(DFS_DATATRANSFER_SERVER_VARIABLEWHITELIST_FILE,
VARIABLEWHITELIST_DEFAULT_LOCATION);
expiryTime =
conf.getLong(DFS_DATATRANSFER_SERVER_VARIABLEWHITELIST_CACHE_SECS,3600) * 1000;
}
whiteListForServer = new CombinedIPWhiteList(fixedFile,variableFile,expiryTime);
fixedFile = conf.get(DFS_DATATRANSFER_CLIENT_FIXEDWHITELIST_FILE, fixedFile);
expiryTime = 0;
if (conf.getBoolean(DFS_DATATRANSFER_CLIENT_VARIABLEWHITELIST_ENABLE, false)) {
variableFile = conf.get(DFS_DATATRANSFER_CLIENT_VARIABLEWHITELIST_FILE,variableFile);
expiryTime =
conf.getLong(DFS_DATATRANSFER_CLIENT_VARIABLEWHITELIST_CACHE_SECS,3600) * 1000;
}
whitelistForClient = new CombinedIPWhiteList(fixedFile,variableFile,expiryTime);
}
public boolean isTrusted() {
try {
return whitelistForClient.isIn(InetAddress.getLocalHost().getHostAddress());
} catch (UnknownHostException e) {
return false;
}
}
public boolean isTrusted(InetAddress clientAddress) {
return whiteListForServer.isIn(clientAddress.getHostAddress());
}
}

View File

@ -738,7 +738,12 @@ class BlockReceiver implements Closeable {
LOG.warn("Error managing cache for writer of block " + block, t);
}
}
public void sendOOB() throws IOException, InterruptedException {
((PacketResponder) responder.getRunnable()).sendOOBResponse(PipelineAck
.getRestartOOBStatus());
}
void receiveBlock(
DataOutputStream mirrOut, // output to next datanode
DataInputStream mirrIn, // input from next datanode
@ -830,9 +835,7 @@ class BlockReceiver implements Closeable {
// The worst case is not recovering this RBW replica.
// Client will fall back to regular pipeline recovery.
}
try {
((PacketResponder) responder.getRunnable()).
sendOOBResponse(PipelineAck.getRestartOOBStatus());
try {
// Even if the connection is closed after the ack packet is
// flushed, the client can react to the connection closure
// first. Insert a delay to lower the chance of client
@ -840,8 +843,6 @@ class BlockReceiver implements Closeable {
Thread.sleep(1000);
} catch (InterruptedException ie) {
// It is already going down. Ignore this.
} catch (IOException ioe) {
LOG.info("Error sending OOB Ack.", ioe);
}
}
responder.interrupt();

View File

@ -270,6 +270,7 @@ public class DataNode extends Configured
public final static String EMPTY_DEL_HINT = "";
final AtomicInteger xmitsInProgress = new AtomicInteger();
Daemon dataXceiverServer = null;
DataXceiverServer xserver = null;
Daemon localDataXceiverServer = null;
ShortCircuitRegistry shortCircuitRegistry = null;
ThreadGroup threadGroup = null;
@ -649,8 +650,8 @@ public class DataNode extends Configured
streamingAddr = tcpPeerServer.getStreamingAddr();
LOG.info("Opened streaming server at " + streamingAddr);
this.threadGroup = new ThreadGroup("dataXceiverServer");
this.dataXceiverServer = new Daemon(threadGroup,
new DataXceiverServer(tcpPeerServer, conf, this));
xserver = new DataXceiverServer(tcpPeerServer, conf, this);
this.dataXceiverServer = new Daemon(threadGroup, xserver);
this.threadGroup.setDaemon(true); // auto destroy when empty
if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
@ -1137,6 +1138,11 @@ public class DataNode extends Configured
dataNodeInfoBeanName = MBeans.register("DataNode", "DataNodeInfo", this);
}
@VisibleForTesting
public DataXceiverServer getXferServer() {
return xserver;
}
@VisibleForTesting
public int getXferPort() {
return streamingAddr.getPort();
@ -1395,6 +1401,7 @@ public class DataNode extends Configured
// in order to avoid any further acceptance of requests, but the peers
// for block writes are not closed until the clients are notified.
if (dataXceiverServer != null) {
xserver.sendOOBToPeers();
((DataXceiverServer) this.dataXceiverServer.getRunnable()).kill();
this.dataXceiverServer.interrupt();
}

View File

@ -103,7 +103,8 @@ class DataXceiver extends Receiver implements Runnable {
private long opStartTime; //the start time of receiving an Op
private final InputStream socketIn;
private OutputStream socketOut;
private BlockReceiver blockReceiver = null;
/**
* Client Name used in previous operation. Not available on first request
* on the socket.
@ -159,6 +160,12 @@ class DataXceiver extends Receiver implements Runnable {
return socketOut;
}
public void sendOOB() throws IOException, InterruptedException {
LOG.info("Sending OOB to peer: " + peer);
if(blockReceiver!=null)
blockReceiver.sendOOB();
}
/**
* Read/write data from/to the DataXceiverServer.
*/
@ -168,7 +175,7 @@ class DataXceiver extends Receiver implements Runnable {
Op op = null;
try {
dataXceiverServer.addPeer(peer, Thread.currentThread());
dataXceiverServer.addPeer(peer, Thread.currentThread(), this);
peer.setWriteTimeout(datanode.getDnConf().socketWriteTimeout);
InputStream input = socketIn;
IOStreamPair saslStreams = datanode.saslServer.receive(peer, socketOut,
@ -584,7 +591,6 @@ class DataXceiver extends Receiver implements Runnable {
DataOutputStream mirrorOut = null; // stream to next target
DataInputStream mirrorIn = null; // reply from next target
Socket mirrorSock = null; // socket to next target
BlockReceiver blockReceiver = null; // responsible for data handling
String mirrorNode = null; // the name:port of next target
String firstBadLink = ""; // first datanode that failed in connection setup
Status mirrorInStatus = SUCCESS;
@ -747,6 +753,7 @@ class DataXceiver extends Receiver implements Runnable {
IOUtils.closeStream(replyOut);
IOUtils.closeSocket(mirrorSock);
IOUtils.closeStream(blockReceiver);
blockReceiver = null;
}
//update metrics

View File

@ -27,11 +27,11 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.net.Peer;
import org.apache.hadoop.hdfs.net.PeerServer;
import org.apache.hadoop.hdfs.server.balancer.Balancer;
import org.apache.hadoop.hdfs.util.DataTransferThrottler;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Daemon;
import com.google.common.annotations.VisibleForTesting;
/**
* Server used for receiving/sending a block of data.
@ -45,6 +45,7 @@ class DataXceiverServer implements Runnable {
private final PeerServer peerServer;
private final DataNode datanode;
private final HashMap<Peer, Thread> peers = new HashMap<Peer, Thread>();
private final HashMap<Peer, DataXceiver> peersXceiver = new HashMap<Peer, DataXceiver>();
private boolean closed = false;
/**
@ -217,18 +218,38 @@ class DataXceiverServer implements Runnable {
}
}
synchronized void addPeer(Peer peer, Thread t) throws IOException {
synchronized void addPeer(Peer peer, Thread t, DataXceiver xceiver)
throws IOException {
if (closed) {
throw new IOException("Server closed.");
}
peers.put(peer, t);
peersXceiver.put(peer, xceiver);
}
synchronized void closePeer(Peer peer) {
peers.remove(peer);
peersXceiver.remove(peer);
IOUtils.cleanup(null, peer);
}
// Sending OOB to all peers
public synchronized void sendOOBToPeers() {
if (!datanode.shutdownForUpgrade) {
return;
}
for (Peer p : peers.keySet()) {
try {
peersXceiver.get(p).sendOOB();
} catch (IOException e) {
LOG.warn("Got error when sending OOB message.", e);
} catch (InterruptedException e) {
LOG.warn("Interrupted when sending OOB message.");
}
}
}
// Notify all peers of the shutdown and restart.
// datanode.shouldRun should still be true and datanode.restarting should
// be set true before calling this method.
@ -247,6 +268,7 @@ class DataXceiverServer implements Runnable {
IOUtils.cleanup(LOG, p);
}
peers.clear();
peersXceiver.clear();
}
// Return the number of peers.
@ -254,7 +276,14 @@ class DataXceiverServer implements Runnable {
return peers.size();
}
// Return the number of peers and DataXceivers.
@VisibleForTesting
synchronized int getNumPeersXceiver() {
return peersXceiver.size();
}
synchronized void releasePeer(Peer peer) {
peers.remove(peer);
peersXceiver.remove(peer);
}
}

View File

@ -27,11 +27,14 @@ import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.IOException;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.DFSOutputStream;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration;
@ -67,6 +70,7 @@ public class TestDataNodeRollingUpgrade {
private void startCluster() throws IOException {
conf = new HdfsConfiguration();
conf.setInt("dfs.blocksize", 1024*1024);
cluster = new Builder(conf).numDataNodes(REPL_FACTOR).build();
cluster.waitActive();
fs = cluster.getFileSystem();
@ -243,4 +247,48 @@ public class TestDataNodeRollingUpgrade {
shutdownCluster();
}
}
@Test (timeout=600000)
// Test DatanodeXceiver has correct peer-dataxceiver pairs for sending OOB message
public void testDatanodePeersXceiver() throws Exception {
try {
startCluster();
// Create files in DFS.
String testFile1 = "/TestDataNodeXceiver1.dat";
String testFile2 = "/TestDataNodeXceiver2.dat";
String testFile3 = "/TestDataNodeXceiver3.dat";
DFSClient client1 = new DFSClient(NameNode.getAddress(conf), conf);
DFSClient client2 = new DFSClient(NameNode.getAddress(conf), conf);
DFSClient client3 = new DFSClient(NameNode.getAddress(conf), conf);
DFSOutputStream s1 = (DFSOutputStream) client1.create(testFile1, true);
DFSOutputStream s2 = (DFSOutputStream) client2.create(testFile2, true);
DFSOutputStream s3 = (DFSOutputStream) client3.create(testFile3, true);
byte[] toWrite = new byte[1024*1024*8];
Random rb = new Random(1111);
rb.nextBytes(toWrite);
s1.write(toWrite, 0, 1024*1024*8);
s1.flush();
s2.write(toWrite, 0, 1024*1024*8);
s2.flush();
s3.write(toWrite, 0, 1024*1024*8);
s3.flush();
assertTrue(dn.getXferServer().getNumPeersXceiver() == dn.getXferServer()
.getNumPeersXceiver());
s1.close();
s2.close();
s3.close();
assertTrue(dn.getXferServer().getNumPeersXceiver() == dn.getXferServer()
.getNumPeersXceiver());
client1.close();
client2.close();
client3.close();
} finally {
shutdownCluster();
}
}
}

View File

@ -233,6 +233,9 @@ Release 2.6.0 - UNRELEASED
MAPREDUCE-6036. TestJobEndNotifier fails intermittently in branch-2 (chang
li via jlowe)
MAPREDUCE-6012. DBInputSplit creates invalid ranges on Oracle.
(Wei Yan via kasha)
Release 2.5.0 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -15,138 +15,129 @@
# See the License for the specific language governing permissions and
# limitations under the License.
bin=`which $0`
bin=`dirname ${bin}`
bin=`cd "$bin" > /dev/null; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
if [ -e ${HADOOP_LIBEXEC_DIR}/mapred-config.sh ]; then
. ${HADOOP_LIBEXEC_DIR}/mapred-config.sh
else
. "$bin/mapred-config.sh"
fi
function print_usage(){
echo "Usage: mapred [--config confdir] COMMAND"
function hadoop_usage
{
echo "Usage: mapred [--config confdir] [--daemon (start|stop|status)] COMMAND"
echo " where COMMAND is one of:"
echo " pipes run a Pipes job"
echo " job manipulate MapReduce jobs"
echo " queue get information regarding JobQueues"
echo " archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
echo " classpath prints the class path needed for running"
echo " mapreduce subcommands"
echo " historyserver run job history servers as a standalone daemon"
echo " distcp <srcurl> <desturl> copy file or directories recursively"
echo " archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
echo " hsadmin job history server admin interface"
echo " job manipulate MapReduce jobs"
echo " historyserver run job history servers as a standalone daemon"
echo " pipes run a Pipes job"
echo " queue get information regarding JobQueues"
echo " sampler sampler"
echo ""
echo "Most commands print help when invoked w/o parameters."
}
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/mapred-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/mapred-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/mapred-config.sh." 2>&1
exit 1
fi
if [ $# = 0 ]; then
print_usage
exit
hadoop_exit_with_usage 1
fi
COMMAND=$1
shift
case $COMMAND in
# usage flags
--help|-help|-h)
print_usage
exit
;;
case ${COMMAND} in
mradmin|jobtracker|tasktracker|groups)
echo "Sorry, the ${COMMAND} command is no longer supported."
echo "You may find similar functionality with the \"yarn\" shell command."
hadoop_exit_with_usage 1
;;
archive)
CLASS=org.apache.hadoop.tools.HadoopArchives
hadoop_add_classpath "${TOOL_PATH}"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
classpath)
hadoop_finalize
echo "${CLASSPATH}"
exit 0
;;
distcp)
CLASS=org.apache.hadoop.tools.DistCp
hadoop_add_classpath "${TOOL_PATH}"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
historyserver)
daemon="true"
CLASS=org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_JOB_HISTORYSERVER_OPTS}"
if [ -n "${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}" ]; then
JAVA_HEAP_MAX="-Xmx${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}m"
fi
HADOOP_DAEMON_ROOT_LOGGER=${HADOOP_JHS_LOGGER:-$HADOOP_DAEMON_ROOT_LOGGER}
;;
job)
CLASS=org.apache.hadoop.mapred.JobClient
;;
pipes)
CLASS=org.apache.hadoop.mapred.pipes.Submitter
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
queue)
CLASS=org.apache.hadoop.mapred.JobQueueClient
;;
sampler)
CLASS=org.apache.hadoop.mapred.lib.InputSampler
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
-*|*)
hadoop_exit_with_usage 1
;;
esac
if [ "$COMMAND" = "job" ] ; then
CLASS=org.apache.hadoop.mapred.JobClient
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "queue" ] ; then
CLASS=org.apache.hadoop.mapred.JobQueueClient
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "pipes" ] ; then
CLASS=org.apache.hadoop.mapred.pipes.Submitter
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "sampler" ] ; then
CLASS=org.apache.hadoop.mapred.lib.InputSampler
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "classpath" ] ; then
echo -n
elif [ "$COMMAND" = "historyserver" ] ; then
CLASS=org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer
HADOOP_OPTS="$HADOOP_OPTS -Dmapred.jobsummary.logger=${HADOOP_JHS_LOGGER:-INFO,console} $HADOOP_JOB_HISTORYSERVER_OPTS"
if [ "$HADOOP_JOB_HISTORYSERVER_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$HADOOP_JOB_HISTORYSERVER_HEAPSIZE""m"
fi
elif [ "$COMMAND" = "mradmin" ] \
|| [ "$COMMAND" = "jobtracker" ] \
|| [ "$COMMAND" = "tasktracker" ] \
|| [ "$COMMAND" = "groups" ] ; then
echo "Sorry, the $COMMAND command is no longer supported."
echo "You may find similar functionality with the \"yarn\" shell command."
print_usage
exit 1
elif [ "$COMMAND" = "distcp" ] ; then
CLASS=org.apache.hadoop.tools.DistCp
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "archive" ] ; then
CLASS=org.apache.hadoop.tools.HadoopArchives
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "hsadmin" ] ; then
CLASS=org.apache.hadoop.mapreduce.v2.hs.client.HSAdmin
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
else
echo $COMMAND - invalid command
print_usage
exit 1
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
# shellcheck disable=SC2034
HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
hadoop_add_param HADOOP_OPTS mapred.jobsummary.logger "-Dmapred.jobsummary.logger=${HADOOP_ROOT_LOGGER}"
# shellcheck disable=SC2034
HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
fi
# for developers, add mapred classes to CLASSPATH
if [ -d "$HADOOP_MAPRED_HOME/build/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/classes
fi
if [ -d "$HADOOP_MAPRED_HOME/build/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build
fi
if [ -d "$HADOOP_MAPRED_HOME/build/test/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/test/classes
fi
if [ -d "$HADOOP_MAPRED_HOME/build/tools" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/tools
fi
# for releases, add core mapred jar & webapps to CLASSPATH
if [ -d "$HADOOP_PREFIX/${MAPRED_DIR}/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/${MAPRED_DIR}
fi
for f in $HADOOP_MAPRED_HOME/${MAPRED_DIR}/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
# Need YARN jars also
for f in $HADOOP_YARN_HOME/${YARN_DIR}/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
# add libs to CLASSPATH
for f in $HADOOP_MAPRED_HOME/${MAPRED_LIB_JARS_DIR}/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
# add modules to CLASSPATH
for f in $HADOOP_MAPRED_HOME/modules/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
if [ "$COMMAND" = "classpath" ] ; then
echo $CLASSPATH
exit
fi
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"
hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}"
hadoop_finalize
export CLASSPATH
exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
if [[ -n "${daemon}" ]]; then
if [[ -n "${secure_service}" ]]; then
hadoop_secure_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}"\
"${CLASS}" "${daemon_pidfile}" "${daemon_outfile}" \
"${priv_pidfile}" "${priv_outfile}" "${priv_errfile}" "$@"
else
hadoop_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}" \
"${daemon_pidfile}" "${daemon_outfile}" "$@"
fi
exit $?
else
hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"
fi

View File

@ -18,35 +18,55 @@
# included in all the mapred scripts with source command
# should not be executed directly
bin=`which "$0"`
bin=`dirname "${bin}"`
bin=`cd "$bin"; pwd`
function hadoop_subproject_init
{
if [ -e "${HADOOP_CONF_DIR}/mapred-env.sh" ]; then
. "${HADOOP_CONF_DIR}/mapred-env.sh"
fi
# at some point in time, someone thought it would be a good idea to
# create separate vars for every subproject. *sigh*
# let's perform some overrides and setup some defaults for bw compat
# this way the common hadoop var's == subproject vars and can be
# used interchangeable from here on out
# ...
# this should get deprecated at some point.
HADOOP_LOG_DIR="${HADOOP_MAPRED_LOG_DIR:-$HADOOP_LOG_DIR}"
HADOOP_MAPRED_LOG_DIR="${HADOOP_LOG_DIR}"
HADOOP_LOGFILE="${HADOOP_MAPRED_LOGFILE:-$HADOOP_LOGFILE}"
HADOOP_MAPRED_LOGFILE="${HADOOP_LOGFILE}"
HADOOP_NICENESS="${HADOOP_MAPRED_NICENESS:-$HADOOP_NICENESS}"
HADOOP_MAPRED_NICENESS="${HADOOP_NICENESS}"
HADOOP_STOP_TIMEOUT="${HADOOP_MAPRED_STOP_TIMEOUT:-$HADOOP_STOP_TIMEOUT}"
HADOOP_MAPRED_STOP_TIMEOUT="${HADOOP_STOP_TIMEOUT}"
HADOOP_PID_DIR="${HADOOP_MAPRED_PID_DIR:-$HADOOP_PID_DIR}"
HADOOP_MAPRED_PID_DIR="${HADOOP_PID_DIR}"
HADOOP_ROOT_LOGGER="${HADOOP_MAPRED_ROOT_LOGGER:-INFO,console}"
HADOOP_MAPRED_ROOT_LOGGER="${HADOOP_ROOT_LOGGER}"
HADOOP_MAPRED_HOME="${HADOOP_MAPRED_HOME:-$HADOOP_HOME_DIR}"
HADOOP_IDENT_STRING="${HADOOP_MAPRED_IDENT_STRING:-$HADOOP_IDENT_STRING}"
HADOOP_MAPRED_IDENT_STRING="${HADOOP_IDENT_STRING}"
}
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then
if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
_mc_this="${BASH_SOURCE-$0}"
HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_mc_this}")" >/dev/null && pwd -P)
fi
if [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then
. "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh
elif [ -e "${HADOOP_COMMON_HOME}/bin/hadoop-config.sh" ]; then
. "$HADOOP_COMMON_HOME"/bin/hadoop-config.sh
elif [ -e "${HADOOP_HOME}/bin/hadoop-config.sh" ]; then
. "$HADOOP_HOME"/bin/hadoop-config.sh
elif [ -e "${HADOOP_MAPRED_HOME}/bin/hadoop-config.sh" ]; then
. "$HADOOP_MAPRED_HOME"/bin/hadoop-config.sh
elif [[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then
. "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh"
elif [[ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]]; then
. "${HADOOP_HOME}/libexec/hadoop-config.sh"
else
echo "Hadoop common not found."
exit
fi
# Only set locally to use in HADOOP_OPTS. No need to export.
# The following defaults are useful when somebody directly invokes bin/mapred.
HADOOP_MAPRED_LOG_DIR=${HADOOP_MAPRED_LOG_DIR:-${HADOOP_MAPRED_HOME}/logs}
HADOOP_MAPRED_LOGFILE=${HADOOP_MAPRED_LOGFILE:-hadoop.log}
HADOOP_MAPRED_ROOT_LOGGER=${HADOOP_MAPRED_ROOT_LOGGER:-INFO,console}
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_MAPRED_LOG_DIR"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_MAPRED_LOGFILE"
export HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_MAPRED_ROOT_LOGGER}"

View File

@ -15,133 +15,32 @@
# See the License for the specific language governing permissions and
# limitations under the License.
function hadoop_usage
{
echo "Usage: mr-jobhistory-daemon.sh [--config confdir] (start|stop|status) <hadoop-command> <args...>"
}
#
# Environment Variables
#
# HADOOP_JHS_LOGGER Hadoop JobSummary logger.
# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_MAPRED_HOME}/conf.
# HADOOP_MAPRED_PID_DIR The pid files are stored. /tmp by default.
# HADOOP_MAPRED_NICENESS The scheduling priority for daemons. Defaults to 0.
##
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
usage="Usage: mr-jobhistory-daemon.sh [--config <conf-dir>] (start|stop) <mapred-command> "
# if no args specified, show usage
if [ $# -le 1 ]; then
echo $usage
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
exit 1
fi
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
if [ -e ${HADOOP_LIBEXEC_DIR}/mapred-config.sh ]; then
. $HADOOP_LIBEXEC_DIR/mapred-config.sh
fi
# get arguments
startStop=$1
shift
command=$1
daemonmode=$1
shift
hadoop_rotate_log ()
{
log=$1;
num=5;
if [ -n "$2" ]; then
num=$2
fi
if [ -f "$log" ]; then # rotate logs
while [ $num -gt 1 ]; do
prev=`expr $num - 1`
[ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
num=$prev
done
mv "$log" "$log.$num";
fi
}
if [ "$HADOOP_MAPRED_IDENT_STRING" = "" ]; then
export HADOOP_MAPRED_IDENT_STRING="$USER"
fi
export HADOOP_MAPRED_HOME=${HADOOP_MAPRED_HOME:-${HADOOP_PREFIX}}
export HADOOP_MAPRED_LOGFILE=mapred-$HADOOP_MAPRED_IDENT_STRING-$command-$HOSTNAME.log
export HADOOP_MAPRED_ROOT_LOGGER=${HADOOP_MAPRED_ROOT_LOGGER:-INFO,RFA}
export HADOOP_JHS_LOGGER=${HADOOP_JHS_LOGGER:-INFO,JSA}
if [ -f "${HADOOP_CONF_DIR}/mapred-env.sh" ]; then
. "${HADOOP_CONF_DIR}/mapred-env.sh"
fi
mkdir -p "$HADOOP_MAPRED_LOG_DIR"
chown $HADOOP_MAPRED_IDENT_STRING $HADOOP_MAPRED_LOG_DIR
if [ "$HADOOP_MAPRED_PID_DIR" = "" ]; then
HADOOP_MAPRED_PID_DIR=/tmp
fi
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_MAPRED_IDENT_STRING"
log=$HADOOP_MAPRED_LOG_DIR/mapred-$HADOOP_MAPRED_IDENT_STRING-$command-$HOSTNAME.out
pid=$HADOOP_MAPRED_PID_DIR/mapred-$HADOOP_MAPRED_IDENT_STRING-$command.pid
HADOOP_MAPRED_STOP_TIMEOUT=${HADOOP_MAPRED_STOP_TIMEOUT:-5}
# Set default scheduling priority
if [ "$HADOOP_MAPRED_NICENESS" = "" ]; then
export HADOOP_MAPRED_NICENESS=0
fi
case $startStop in
(start)
mkdir -p "$HADOOP_MAPRED_PID_DIR"
if [ -f $pid ]; then
if kill -0 `cat $pid` > /dev/null 2>&1; then
echo $command running as process `cat $pid`. Stop it first.
exit 1
fi
fi
hadoop_rotate_log $log
echo starting $command, logging to $log
cd "$HADOOP_MAPRED_HOME"
nohup nice -n $HADOOP_MAPRED_NICENESS "$HADOOP_MAPRED_HOME"/bin/mapred --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
echo $! > $pid
sleep 1; head "$log"
;;
(stop)
if [ -f $pid ]; then
TARGET_PID=`cat $pid`
if kill -0 $TARGET_PID > /dev/null 2>&1; then
echo stopping $command
kill $TARGET_PID
sleep $HADOOP_MAPRED_STOP_TIMEOUT
if kill -0 $TARGET_PID > /dev/null 2>&1; then
echo "$command did not stop gracefully after $HADOOP_MAPRED_STOP_TIMEOUT seconds: killing with kill -9"
kill -9 $TARGET_PID
fi
else
echo no $command to stop
fi
rm -f $pid
else
echo no $command to stop
fi
;;
(*)
echo $usage
exit 1
;;
esac
exec "${HADOOP_MAPRED_HOME}/bin/mapred" \
--config "${HADOOP_CONF_DIR}" --daemon "${daemonmode}" "$@"

View File

@ -13,15 +13,59 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
##
## THIS FILE ACTS AS AN OVERRIDE FOR hadoop-env.sh FOR ALL
## WORK DONE BY THE mapred AND RELATED COMMANDS.
##
## Precedence rules:
##
## mapred-env.sh > hadoop-env.sh > hard-coded defaults
##
## MAPRED_xyz > HADOOP_xyz > hard-coded defaults
##
export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000
###
# Generic settings for MapReduce
###
export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA
#Override the log4j settings for all MR apps
# export MAPRED_ROOT_LOGGER="INFO,console"
# Override Hadoop's log directory & file
# export HADOOP_MAPRED_LOG_DIR=""
# Override Hadoop's pid directory
# export HADOOP_MAPRED_PID_DIR=
# Override Hadoop's identity string. $USER by default.
# This is used in writing log and pid files, so keep that in mind!
# export HADOOP_MAPRED_IDENT_STRING=$USER
# Override Hadoop's process priority
# Note that sub-processes will also run at this level!
# export HADOOP_MAPRED_NICENESS=0
###
# Job History Server specific parameters
###
# Specify the max heapsize for the Job History Server using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000.
# This value will be overridden by an Xmx setting specified in either
# MAPRED_OPTS, HADOOP_OPTS, and/or HADOOP_JOB_HISTORYSERVER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
#
#export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000
# Specify the JVM options to be used when starting the ResourceManager.
# These options will be appended to the options specified as YARN_OPTS
# and therefore may override any similar flags set in YARN_OPTS
#export HADOOP_JOB_HISTORYSERVER_OPTS=
#export HADOOP_MAPRED_LOG_DIR="" # Where log files are stored. $HADOOP_MAPRED_HOME/logs by default.
#export HADOOP_JHS_LOGGER=INFO,RFA # Hadoop JobSummary logger.
#export HADOOP_MAPRED_PID_DIR= # The pid files are stored. /tmp by default.
#export HADOOP_MAPRED_IDENT_STRING= #A string representing this instance of hadoop. $USER by default
#export HADOOP_MAPRED_NICENESS= #The scheduling priority for daemons. Defaults to 0.
# Specify the log4j settings for the JobHistoryServer
#export HADOOP_JHS_LOGGER=INFO,RFA

View File

@ -81,15 +81,14 @@ public class OracleDBRecordReader<T extends DBWritable> extends DBRecordReader<T
try {
DBInputFormat.DBInputSplit split = getSplit();
if (split.getLength() > 0 && split.getStart() > 0){
if (split.getLength() > 0){
String querystring = query.toString();
query = new StringBuilder();
query.append("SELECT * FROM (SELECT a.*,ROWNUM dbif_rno FROM ( ");
query.append(querystring);
query.append(" ) a WHERE rownum <= ").append(split.getStart());
query.append(" + ").append(split.getLength());
query.append(" ) WHERE dbif_rno >= ").append(split.getStart());
query.append(" ) a WHERE rownum <= ").append(split.getEnd());
query.append(" ) WHERE dbif_rno > ").append(split.getStart());
}
} catch (IOException ex) {
// ignore, will not throw.

View File

@ -110,7 +110,7 @@ public class TestDbClasses {
splitter, NullDBWritable.class, configuration, connect,
dbConfiguration, "condition", fields, "table");
assertEquals(
"SELECT * FROM (SELECT a.*,ROWNUM dbif_rno FROM ( SELECT f1, f2 FROM table WHERE condition ORDER BY Order ) a WHERE rownum <= 1 + 9 ) WHERE dbif_rno >= 1",
"SELECT * FROM (SELECT a.*,ROWNUM dbif_rno FROM ( SELECT f1, f2 FROM table WHERE condition ORDER BY Order ) a WHERE rownum <= 10 ) WHERE dbif_rno > 1",
recorder.getSelectQuery());
}

View File

@ -36,8 +36,8 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceAudience.LimitedPrivate;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
@ -61,6 +61,7 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
@ -933,4 +934,10 @@ final public class ResourceSchedulerWrapper
return new HashMap<ApplicationId,
SchedulerApplication<SchedulerApplicationAttempt>>();
}
@Override
protected void completedContainer(RMContainer rmContainer,
ContainerStatus containerStatus, RMContainerEventType event) {
// do nothing
}
}

View File

@ -211,6 +211,12 @@ Release 2.6.0 - UNRELEASED
YARN-2397. Avoided loading two authentication filters for RM and TS web
interfaces. (Varun Vasudev via zjshen)
YARN-2409. RM ActiveToStandBy transition missing stoping previous rmDispatcher.
(Rohith via jianhe)
YARN-2249. Avoided AM release requests being lost on work preserving RM
restart. (Jian He via zjshen)
Release 2.5.0 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -1,70 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Run a shell command on all slave hosts.
#
# Environment Variables
#
# YARN_SLAVES File naming remote hosts.
# Default is ${YARN_CONF_DIR}/slaves.
# YARN_CONF_DIR Alternate conf dir. Default is ${HADOOP_YARN_HOME}/conf.
# YARN_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
# YARN_SSH_OPTS Options passed to ssh when running remote commands.
##
usage="Usage: slaves.sh [--config confdir] command..."
# if no args specified, show usage
if [ $# -le 0 ]; then
echo $usage
exit 1
fi
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/yarn-config.sh
# If the slaves file is specified in the command line,
# then it takes precedence over the definition in
# yarn-env.sh. Save it here.
HOSTLIST=$YARN_SLAVES
if [ -f "${YARN_CONF_DIR}/yarn-env.sh" ]; then
. "${YARN_CONF_DIR}/yarn-env.sh"
fi
if [ "$HOSTLIST" = "" ]; then
if [ "$YARN_SLAVES" = "" ]; then
export HOSTLIST="${YARN_CONF_DIR}/slaves"
else
export HOSTLIST="${YARN_SLAVES}"
fi
fi
for slave in `cat "$HOSTLIST"|sed "s/#.*$//;/^$/d"`; do
ssh $YARN_SSH_OPTS $slave $"${@// /\\ }" \
2>&1 | sed "s/^/$slave: /" &
if [ "$YARN_SLAVE_SLEEP" != "" ]; then
sleep $YARN_SLAVE_SLEEP
fi
done
wait

View File

@ -16,20 +16,34 @@
# limitations under the License.
# Start all yarn daemons. Run this on master node.
function hadoop_usage
{
echo "Usage: start-yarn.sh [--config confdir]"
}
echo "starting yarn daemons"
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/yarn-config.sh
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
exit 1
fi
# start resourceManager
"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR start resourcemanager
"${bin}/yarn-daemon.sh" --config "${YARN_CONF_DIR}" start resourcemanager
# start nodeManager
"$bin"/yarn-daemons.sh --config $YARN_CONF_DIR start nodemanager
"${bin}/yarn-daemons.sh" --config "${YARN_CONF_DIR}" start nodemanager
# start proxyserver
#"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR start proxyserver
#"${bin}/yarn-daemon.sh" --config "${YARN_CONF_DIR}" start proxyserver

View File

@ -18,18 +18,34 @@
# Stop all yarn daemons. Run this on master node.
echo "stopping yarn daemons"
function hadoop_usage
{
echo "Usage: stop-yarn.sh [--config confdir]"
}
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/yarn-config.sh
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
# stop resourceManager
"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR stop resourcemanager
# stop nodeManager
"$bin"/yarn-daemons.sh --config $YARN_CONF_DIR stop nodemanager
# stop proxy server
"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR stop proxyserver
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
exit 1
fi
# start resourceManager
"${bin}/yarn-daemon.sh" --config "${YARN_CONF_DIR}" stop resourcemanager
# start nodeManager
"${bin}/yarn-daemons.sh" --config "${YARN_CONF_DIR}" stop nodemanager
# start proxyserver
#"${bin}/yarn-daemon.sh" --config "${YARN_CONF_DIR}" stop proxyserver

View File

@ -15,266 +15,182 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# The Hadoop command script
#
# Environment Variables
#
# JAVA_HOME The java implementation to use. Overrides JAVA_HOME.
#
# YARN_USER_CLASSPATH Additional user CLASSPATH entries.
#
# YARN_USER_CLASSPATH_FIRST If set to non empty value then the user classpath
# specified in YARN_USER_CLASSPATH will be
# appended at the beginning of YARN's final
# classpath instead of at the end.
#
# YARN_HEAPSIZE The maximum amount of heap to use, in MB.
# Default is 1000.
#
# YARN_{COMMAND}_HEAPSIZE overrides YARN_HEAPSIZE for a given command
# eg YARN_NODEMANAGER_HEAPSIZE sets the heap
# size for the NodeManager. If you set the
# heap size in YARN_{COMMAND}_OPTS or YARN_OPTS
# they take precedence.
#
# YARN_OPTS Extra Java runtime options.
#
# YARN_CLIENT_OPTS when the respective command is run.
# YARN_{COMMAND}_OPTS etc YARN_NODEMANAGER_OPTS applies to NodeManager
# for e.g. YARN_CLIENT_OPTS applies to
# more than one command (fs, dfs, fsck,
# dfsadmin etc)
#
# YARN_CONF_DIR Alternate conf dir. Default is ${HADOOP_YARN_HOME}/conf.
#
# YARN_ROOT_LOGGER The root appender. Default is INFO,console
#
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin" > /dev/null; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/yarn-config.sh
function print_usage(){
echo "Usage: yarn [--config confdir] COMMAND"
function hadoop_usage
{
echo "Usage: yarn [--config confdir] [--daemon (start|stop|status)] COMMAND"
echo "where COMMAND is one of:"
echo " resourcemanager -format-state-store deletes the RMStateStore"
echo " resourcemanager run the ResourceManager"
echo " nodemanager run a nodemanager on each slave"
echo " timelineserver run the timeline server"
echo " rmadmin admin tools"
echo " version print the version"
echo " jar <jar> run a jar file"
echo " application prints application(s)"
echo " report/kill application"
echo " applicationattempt prints applicationattempt(s)"
echo " report"
echo " application prints application(s) report/kill application"
echo " applicationattempt prints applicationattempt(s) report"
echo " classpath prints the class path needed to get the"
echo " Hadoop jar and the required libraries"
echo " container prints container(s) report"
echo " node prints node report(s)"
echo " daemonlog get/set the log level for each daemon"
echo " jar <jar> run a jar file"
echo " logs dump container logs"
echo " classpath prints the class path needed to"
echo " get the Hadoop jar and the"
echo " required libraries"
echo " daemonlog get/set the log level for each"
echo " daemon"
echo " node prints node report(s)"
echo " nodemanager run a nodemanager on each slave"
echo " proxyserver run the web app proxy server"
echo " resourcemanager run the ResourceManager"
echo " resourcemanager -format-state-store deletes the RMStateStore"
echo " rmadmin admin tools"
echo " timelineserver run the timeline server"
echo " version print the version"
echo " or"
echo " CLASSNAME run the class named CLASSNAME"
echo "Most commands print help when invoked w/o parameters."
}
# if no args specified, show usage
if [ $# = 0 ]; then
print_usage
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
exit 1
fi
# if no args specified, show usage
if [[ $# = 0 ]]; then
hadoop_exit_with_usage 1
fi
# get arguments
COMMAND=$1
shift
case $COMMAND in
# usage flags
--help|-help|-h)
print_usage
case "${COMMAND}" in
application|applicationattempt|container)
CLASS=org.apache.hadoop.yarn.client.cli.ApplicationCLI
YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
;;
classpath)
hadoop_finalize
echo "${CLASSPATH}"
exit
;;
;;
daemonlog)
CLASS=org.apache.hadoop.log.LogLevel
YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
;;
jar)
CLASS=org.apache.hadoop.util.RunJar
YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
;;
historyserver)
daemon="true"
echo "DEPRECATED: Use of this command to start the timeline server is deprecated." 1>&2
echo "Instead use the timelineserver command for it." 1>&2
echo "Starting the History Server anyway..." 1>&2
CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
;;
logs)
CLASS=org.apache.hadoop.yarn.logaggregation.LogDumper
YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
;;
node)
CLASS=org.apache.hadoop.yarn.client.cli.NodeCLI
YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
;;
nodemanager)
daemon="true"
CLASS='org.apache.hadoop.yarn.server.nodemanager.NodeManager'
YARN_OPTS="${YARN_OPTS} ${YARN_NODEMANAGER_OPTS}"
if [[ -n "${YARN_NODEMANAGER_HEAPSIZE}" ]]; then
JAVA_HEAP_MAX="-Xmx${YARN_NODEMANAGER_HEAPSIZE}m"
fi
;;
proxyserver)
daemon="true"
CLASS='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer'
YARN_OPTS="${YARN_OPTS} ${YARN_PROXYSERVER_OPTS}"
if [[ -n "${YARN_PROXYSERVER_HEAPSIZE}" ]]; then
JAVA_HEAP_MAX="-Xmx${YARN_PROXYSERVER_HEAPSIZE}m"
fi
;;
resourcemanager)
daemon="true"
CLASS='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager'
YARN_OPTS="${YARN_OPTS} ${YARN_RESOURCEMANAGER_OPTS}"
if [[ -n "${YARN_RESOURCEMANAGER_HEAPSIZE}" ]]; then
JAVA_HEAP_MAX="-Xmx${YARN_RESOURCEMANAGER_HEAPSIZE}m"
fi
;;
rmadmin)
CLASS='org.apache.hadoop.yarn.client.cli.RMAdminCLI'
YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
;;
timelineserver)
daemon="true"
CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
YARN_OPTS="${YARN_OPTS} ${YARN_TIMELINESERVER_OPTS}"
if [[ -n "${YARN_TIMELINESERVER_HEAPSIZE}" ]]; then
JAVA_HEAP_MAX="-Xmx${YARN_TIMELINESERVER_HEAPSIZE}m"
fi
;;
version)
CLASS=org.apache.hadoop.util.VersionInfo
YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
;;
-*)
hadoop_exit_with_usage 1
;;
*)
CLASS="${COMMAND}"
;;
esac
if [ -f "${YARN_CONF_DIR}/yarn-env.sh" ]; then
. "${YARN_CONF_DIR}/yarn-env.sh"
# set HADOOP_OPTS to YARN_OPTS so that we can use
# finalize, etc, without doing anything funky
HADOOP_OPTS="${YARN_OPTS}"
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
# shellcheck disable=SC2034
HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
YARN_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
fi
# some Java parameters
if [ "$JAVA_HOME" != "" ]; then
#echo "run java in $JAVA_HOME"
JAVA_HOME=$JAVA_HOME
fi
if [ "$JAVA_HOME" = "" ]; then
echo "Error: JAVA_HOME is not set."
exit 1
fi
hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}"
JAVA=$JAVA_HOME/bin/java
JAVA_HEAP_MAX=-Xmx1000m
# Add YARN custom options to comamnd line in case someone actaully
# used these.
#
# Note that we are replacing ' ' with '\ ' so that when we exec
# stuff it works
#
hadoop_add_param HADOOP_OPTS yarn.log.dir "-Dyarn.log.dir=${HADOOP_LOG_DIR/ /\ }"
hadoop_add_param HADOOP_OPTS yarn.log.file "-Dyarn.log.file=${HADOOP_LOGFILE/ /\ }"
hadoop_add_param HADOOP_OPTS yarn.home.dir "-Dyarn.home.dir=${HADOOP_YARN_HOME/ /\ }"
hadoop_add_param HADOOP_OPTS yarn.root.logger "-Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
# check envvars which might override default args
if [ "$YARN_HEAPSIZE" != "" ]; then
#echo "run with heapsize $YARN_HEAPSIZE"
JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m"
#echo $JAVA_HEAP_MAX
fi
hadoop_finalize
# CLASSPATH initially contains $HADOOP_CONF_DIR & $YARN_CONF_DIR
if [ ! -d "$HADOOP_CONF_DIR" ]; then
echo No HADOOP_CONF_DIR set.
echo Please specify it either in yarn-env.sh or in the environment.
exit 1
fi
export CLASSPATH
CLASSPATH="${HADOOP_CONF_DIR}:${YARN_CONF_DIR}:${CLASSPATH}"
# for developers, add Hadoop classes to CLASSPATH
if [ -d "$HADOOP_YARN_HOME/yarn-api/target/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-api/target/classes
fi
if [ -d "$HADOOP_YARN_HOME/yarn-common/target/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-common/target/classes
fi
if [ -d "$HADOOP_YARN_HOME/yarn-mapreduce/target/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-mapreduce/target/classes
fi
if [ -d "$HADOOP_YARN_HOME/yarn-master-worker/target/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-master-worker/target/classes
fi
if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-nodemanager/target/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-nodemanager/target/classes
fi
if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-common/target/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-common/target/classes
fi
if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-resourcemanager/target/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-resourcemanager/target/classes
fi
if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-applicationhistoryservice/target/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-applicationhistoryservice/target/classes
fi
if [ -d "$HADOOP_YARN_HOME/build/test/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/target/test/classes
fi
if [ -d "$HADOOP_YARN_HOME/build/tools" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/build/tools
fi
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/${YARN_DIR}/*
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/${YARN_LIB_JARS_DIR}/*
# Add user defined YARN_USER_CLASSPATH to the class path (if defined)
if [ -n "$YARN_USER_CLASSPATH" ]; then
if [ -n "$YARN_USER_CLASSPATH_FIRST" ]; then
# User requested to add the custom entries at the beginning
CLASSPATH=${YARN_USER_CLASSPATH}:${CLASSPATH}
if [[ -n "${daemon}" ]]; then
if [[ -n "${secure_service}" ]]; then
hadoop_secure_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" \
"${CLASS}" "${daemon_pidfile}" "${daemon_outfile}" \
"${priv_pidfile}" "${priv_outfile}" "${priv_errfile}" "$@"
else
# By default we will just append the extra entries at the end
CLASSPATH=${CLASSPATH}:${YARN_USER_CLASSPATH}
hadoop_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}" \
"${daemon_pidfile}" "${daemon_outfile}" "$@"
fi
fi
# so that filenames w/ spaces are handled correctly in loops below
IFS=
# default log directory & file
if [ "$YARN_LOG_DIR" = "" ]; then
YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
fi
if [ "$YARN_LOGFILE" = "" ]; then
YARN_LOGFILE='yarn.log'
fi
# restore ordinary behaviour
unset IFS
# figure out which class to run
if [ "$COMMAND" = "classpath" ] ; then
echo $CLASSPATH
exit
elif [ "$COMMAND" = "rmadmin" ] ; then
CLASS='org.apache.hadoop.yarn.client.cli.RMAdminCLI'
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
elif [ "$COMMAND" = "application" ] ||
[ "$COMMAND" = "applicationattempt" ] ||
[ "$COMMAND" = "container" ]; then
CLASS=org.apache.hadoop.yarn.client.cli.ApplicationCLI
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
set -- $COMMAND $@
elif [ "$COMMAND" = "node" ] ; then
CLASS=org.apache.hadoop.yarn.client.cli.NodeCLI
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
elif [ "$COMMAND" = "resourcemanager" ] ; then
CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/rm-config/log4j.properties
CLASS='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager'
YARN_OPTS="$YARN_OPTS $YARN_RESOURCEMANAGER_OPTS"
if [ "$YARN_RESOURCEMANAGER_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_RESOURCEMANAGER_HEAPSIZE""m"
fi
elif [ "$COMMAND" = "historyserver" ] ; then
echo "DEPRECATED: Use of this command to start the timeline server is deprecated." 1>&2
echo "Instead use the timelineserver command for it." 1>&2
CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/ahs-config/log4j.properties
CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
YARN_OPTS="$YARN_OPTS $YARN_HISTORYSERVER_OPTS"
if [ "$YARN_HISTORYSERVER_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_HISTORYSERVER_HEAPSIZE""m"
fi
elif [ "$COMMAND" = "timelineserver" ] ; then
CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/timelineserver-config/log4j.properties
CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
YARN_OPTS="$YARN_OPTS $YARN_TIMELINESERVER_OPTS"
if [ "$YARN_TIMELINESERVER_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_TIMELINESERVER_HEAPSIZE""m"
fi
elif [ "$COMMAND" = "nodemanager" ] ; then
CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/nm-config/log4j.properties
CLASS='org.apache.hadoop.yarn.server.nodemanager.NodeManager'
YARN_OPTS="$YARN_OPTS -server $YARN_NODEMANAGER_OPTS"
if [ "$YARN_NODEMANAGER_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_NODEMANAGER_HEAPSIZE""m"
fi
elif [ "$COMMAND" = "proxyserver" ] ; then
CLASS='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer'
YARN_OPTS="$YARN_OPTS $YARN_PROXYSERVER_OPTS"
if [ "$YARN_PROXYSERVER_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_PROXYSERVER_HEAPSIZE""m"
fi
elif [ "$COMMAND" = "version" ] ; then
CLASS=org.apache.hadoop.util.VersionInfo
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
elif [ "$COMMAND" = "jar" ] ; then
CLASS=org.apache.hadoop.util.RunJar
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
elif [ "$COMMAND" = "logs" ] ; then
CLASS=org.apache.hadoop.yarn.client.cli.LogsCLI
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
elif [ "$COMMAND" = "daemonlog" ] ; then
CLASS=org.apache.hadoop.log.LogLevel
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
exit $?
else
CLASS=$COMMAND
hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"
fi
YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR"
YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR"
YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE"
YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE"
YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$HADOOP_YARN_HOME"
YARN_OPTS="$YARN_OPTS -Dhadoop.home.dir=$HADOOP_YARN_HOME"
YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
fi
exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $YARN_OPTS -classpath "$CLASSPATH" $CLASS "$@"

View File

@ -13,53 +13,81 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# included in all the hadoop scripts with source command
# should not be executable directly
bin=`which "$0"`
bin=`dirname "${bin}"`
bin=`cd "$bin"; pwd`
function hadoop_subproject_init
{
# at some point in time, someone thought it would be a good idea to
# create separate vars for every subproject. *sigh*
# let's perform some overrides and setup some defaults for bw compat
# this way the common hadoop var's == subproject vars and can be
# used interchangeable from here on out
# ...
# this should get deprecated at some point.
if [[ -e "${YARN_CONF_DIR}/yarn-env.sh" ]]; then
. "${YARN_CONF_DIR}/yarn-env.sh"
elif [[ -e "${HADOOP_CONF_DIR}/yarn-env.sh" ]]; then
. "${HADOOP_CONF_DIR}/yarn-env.sh"
fi
if [[ -n "${YARN_CONF_DIR}" ]]; then
HADOOP_CONF_DIR="${YARN_CONF_DIR}"
fi
YARN_CONF_DIR="${HADOOP_CONF_DIR}"
# YARN_CONF_DIR needs precedence over HADOOP_CONF_DIR
# and the various jar dirs
hadoop_add_classpath "${YARN_CONF_DIR}" before
HADOOP_LOG_DIR="${YARN_LOG_DIR:-$HADOOP_LOG_DIR}"
YARN_LOG_DIR="${HADOOP_LOG_DIR}"
HADOOP_LOGFILE="${YARN_LOGFILE:-$HADOOP_LOGFILE}"
YARN_LOGFILE="${HADOOP_LOGFILE}"
HADOOP_NICENESS="${YARN_NICENESS:-$HADOOP_NICENESS}"
YARN_NICENESS="${HADOOP_NICENESS}"
HADOOP_STOP_TIMEOUT="${YARN_STOP_TIMEOUT:-$HADOOP_STOP_TIMEOUT}"
YARN_STOP_TIMEOUT="${HADOOP_STOP_TIMEOUT}"
HADOOP_PID_DIR="${YARN_PID_DIR:-$HADOOP_PID_DIR}"
YARN_PID_DIR="${HADOOP_PID_DIR}"
HADOOP_ROOT_LOGGER="${YARN_ROOT_LOGGER:-INFO,console}"
YARN_ROOT_LOGGER="${HADOOP_ROOT_LOGGER}"
HADOOP_YARN_HOME="${HADOOP_YARN_HOME:-$HADOOP_PREFIX}"
HADOOP_IDENT_STRING="${YARN_IDENT_STRING:-$HADOOP_IDENT_STRING}"
YARN_IDENT_STRING="${HADOOP_IDENT_STRING}"
YARN_OPTS="${YARN_OPTS:-$HADOOP_OPTS}"
# YARN-1429 added the completely superfluous YARN_USER_CLASSPATH
# env var. We're going to override HADOOP_USER_CLASSPATH to keep
# consistency with the rest of the duplicate/useless env vars
HADOOP_USER_CLASSPATH="${YARN_USER_CLASSPATH:-$HADOOP_USER_CLASSPATH}"
YARN_USER_CLASSPATH="${HADOOP_USER_CLASSPATH}"
HADOOP_USER_CLASSPATH_FIRST="${YARN_USER_CLASSPATH_FIRST:-$HADOOP_USER_CLASSPATH_FIRST}"
YARN_USER_CLASSPATH_FIRST="${HADOOP_USER_CLASSPATH_FIRST}"
}
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then
. ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh
elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then
. "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh
elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then
. "$HADOOP_HOME"/libexec/hadoop-config.sh
if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
_yc_this="${BASH_SOURCE-$0}"
HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_yc_this}")" >/dev/null && pwd -P)
fi
if [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
elif [[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then
. "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh"
elif [[ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]]; then
. "${HADOOP_HOME}/libexec/hadoop-config.sh"
else
echo "Hadoop common not found."
exit
fi
# Same glibc bug that discovered in Hadoop.
# Without this you can see very large vmem settings on containers.
export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4}
#check to see if the conf dir is given as an optional argument
if [ $# -gt 1 ]
then
if [ "--config" = "$1" ]
then
shift
confdir=$1
shift
YARN_CONF_DIR=$confdir
fi
fi
# Allow alternate conf dir location.
export YARN_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
#check to see it is specified whether to use the slaves or the
# masters file
if [ $# -gt 1 ]
then
if [ "--hosts" = "$1" ]
then
shift
slavesfile=$1
shift
export YARN_SLAVES="${YARN_CONF_DIR}/$slavesfile"
fi
fi

View File

@ -15,147 +15,32 @@
# See the License for the specific language governing permissions and
# limitations under the License.
function hadoop_usage
{
echo "Usage: yarn-daemon.sh [--config confdir] (start|stop|status) <hadoop-command> <args...>"
}
# Runs a yarn command as a daemon.
#
# Environment Variables
#
# YARN_CONF_DIR Alternate conf dir. Default is ${HADOOP_YARN_HOME}/conf.
# YARN_LOG_DIR Where log files are stored. PWD by default.
# YARN_MASTER host:path where hadoop code should be rsync'd from
# YARN_PID_DIR The pid files are stored. /tmp by default.
# YARN_IDENT_STRING A string representing this instance of hadoop. $USER by default
# YARN_NICENESS The scheduling priority for daemons. Defaults to 0.
##
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
usage="Usage: yarn-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] (start|stop) <yarn-command> "
# if no args specified, show usage
if [ $# -le 1 ]; then
echo $usage
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
exit 1
fi
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/yarn-config.sh
# get arguments
startStop=$1
shift
command=$1
daemonmode=$1
shift
hadoop_rotate_log ()
{
log=$1;
num=5;
if [ -n "$2" ]; then
num=$2
fi
if [ -f "$log" ]; then # rotate logs
while [ $num -gt 1 ]; do
prev=`expr $num - 1`
[ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
num=$prev
done
mv "$log" "$log.$num";
fi
}
if [ -f "${YARN_CONF_DIR}/yarn-env.sh" ]; then
. "${YARN_CONF_DIR}/yarn-env.sh"
fi
if [ "$YARN_IDENT_STRING" = "" ]; then
export YARN_IDENT_STRING="$USER"
fi
# get log directory
if [ "$YARN_LOG_DIR" = "" ]; then
export YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
fi
if [ ! -w "$YARN_LOG_DIR" ] ; then
mkdir -p "$YARN_LOG_DIR"
chown $YARN_IDENT_STRING $YARN_LOG_DIR
fi
if [ "$YARN_PID_DIR" = "" ]; then
YARN_PID_DIR=/tmp
fi
# some variables
export YARN_LOGFILE=yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.log
export YARN_ROOT_LOGGER=${YARN_ROOT_LOGGER:-INFO,RFA}
log=$YARN_LOG_DIR/yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.out
pid=$YARN_PID_DIR/yarn-$YARN_IDENT_STRING-$command.pid
YARN_STOP_TIMEOUT=${YARN_STOP_TIMEOUT:-5}
# Set default scheduling priority
if [ "$YARN_NICENESS" = "" ]; then
export YARN_NICENESS=0
fi
case $startStop in
(start)
[ -w "$YARN_PID_DIR" ] || mkdir -p "$YARN_PID_DIR"
if [ -f $pid ]; then
if kill -0 `cat $pid` > /dev/null 2>&1; then
echo $command running as process `cat $pid`. Stop it first.
exit 1
fi
fi
if [ "$YARN_MASTER" != "" ]; then
echo rsync from $YARN_MASTER
rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $YARN_MASTER/ "$HADOOP_YARN_HOME"
fi
hadoop_rotate_log $log
echo starting $command, logging to $log
cd "$HADOOP_YARN_HOME"
nohup nice -n $YARN_NICENESS "$HADOOP_YARN_HOME"/bin/yarn --config $YARN_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
echo $! > $pid
sleep 1
head "$log"
# capture the ulimit output
echo "ulimit -a" >> $log
ulimit -a >> $log 2>&1
;;
(stop)
if [ -f $pid ]; then
TARGET_PID=`cat $pid`
if kill -0 $TARGET_PID > /dev/null 2>&1; then
echo stopping $command
kill $TARGET_PID
sleep $YARN_STOP_TIMEOUT
if kill -0 $TARGET_PID > /dev/null 2>&1; then
echo "$command did not stop gracefully after $YARN_STOP_TIMEOUT seconds: killing with kill -9"
kill -9 $TARGET_PID
fi
else
echo no $command to stop
fi
rm -f $pid
else
echo no $command to stop
fi
;;
(*)
echo $usage
exit 1
;;
esac
exec "${HADOOP_YARN_HOME}/bin/yarn" \
--config "${HADOOP_CONF_DIR}" --daemon "${daemonmode}" "$@"

View File

@ -16,23 +16,31 @@
# limitations under the License.
# Run a Yarn command on all slave hosts.
function hadoop_usage
{
echo "Usage: yarn-daemons.sh [--config confdir] [--hosts hostlistfile] (start|stop|status) <yarn-command> <args...>"
}
usage="Usage: yarn-daemons.sh [--config confdir] [--hosts hostlistfile] [start
|stop] command args..."
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
# if no args specified, show usage
if [ $# -le 1 ]; then
echo $usage
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
exit 1
fi
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/yarn-config.sh
exec "$bin/slaves.sh" --config $YARN_CONF_DIR cd "$HADOOP_YARN_HOME" \; "$bin/yarn-daemon.sh" --config $YARN_CONF_DIR "$@"
hadoop_connect_to_hosts "${bin}/yarn-daemon.sh" \
--config "${HADOOP_CONF_DIR}" "$@"

View File

@ -13,118 +13,115 @@
# See the License for the specific language governing permissions and
# limitations under the License.
##
## THIS FILE ACTS AS AN OVERRIDE FOR hadoop-env.sh FOR ALL
## WORK DONE BY THE yarn AND RELATED COMMANDS.
##
## Precedence rules:
##
## yarn-env.sh > hadoop-env.sh > hard-coded defaults
##
## YARN_xyz > HADOOP_xyz > hard-coded defaults
##
###
# Generic settings for YARN
###
# User for YARN daemons
export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn}
# resolve links - $0 may be a softlink
export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
#
# By default, YARN will use HADOOP_CONF_DIR. Specify a custom
# YARN_CONF_DIR here
# export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
#
# some Java parameters
# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
if [ "$JAVA_HOME" != "" ]; then
#echo "run java in $JAVA_HOME"
JAVA_HOME=$JAVA_HOME
fi
if [ "$JAVA_HOME" = "" ]; then
echo "Error: JAVA_HOME is not set."
exit 1
fi
# Override Hadoop's log directory & file
# export YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
# export YARN_LOGFILE='yarn.log'
JAVA=$JAVA_HOME/bin/java
JAVA_HEAP_MAX=-Xmx1000m
# Need a custom-to-YARN service-level authorization policy file?
# export YARN_POLICYFILE="yarn-policy.xml"
# For setting YARN specific HEAP sizes please use this
# Parameter and set appropriately
# YARN_HEAPSIZE=1000
# check envvars which might override default args
if [ "$YARN_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m"
fi
#Override the log4j settings for all YARN apps
# export YARN_ROOT_LOGGER="INFO,console"
###
# Resource Manager specific parameters
###
# Specify the max Heapsize for the ResourceManager using a numerical value
# Specify the max heapsize for the ResourceManager using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000.
# This value will be overridden by an Xmx setting specified in either YARN_OPTS
# and/or YARN_RESOURCEMANAGER_OPTS.
# This value will be overridden by an Xmx setting specified in either YARN_OPTS,
# HADOOP_OPTS, and/or YARN_RESOURCEMANAGER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
#
#export YARN_RESOURCEMANAGER_HEAPSIZE=1000
# Specify the max Heapsize for the timeline server using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000.
# This value will be overridden by an Xmx setting specified in either YARN_OPTS
# and/or YARN_TIMELINESERVER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
#export YARN_TIMELINESERVER_HEAPSIZE=1000
# Specify the JVM options to be used when starting the ResourceManager.
# These options will be appended to the options specified as YARN_OPTS
# and therefore may override any similar flags set in YARN_OPTS
#export YARN_RESOURCEMANAGER_OPTS=
#
# Examples for a Sun/Oracle JDK:
# a) override the appsummary log file:
# export YARN_RESOURCEMANAGER_OPTS="-Dyarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log -Dyarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY"
#
# b) Set JMX options
# export YARN_RESOURCEMANAGER_OPTS="-Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1026"
#
# c) Set garbage collection logs from hadoop-env.sh
# export YARN_RESOURCE_MANAGER_OPTS="${HADOOP_GC_SETTINGS} -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
#
# d) ... or set them directly
# export YARN_RESOURCEMANAGER_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
#
#
# export YARN_RESOURCEMANAGER_OPTS=
###
# Node Manager specific parameters
###
# Specify the max Heapsize for the NodeManager using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000.
# This value will be overridden by an Xmx setting specified in either YARN_OPTS
# and/or YARN_NODEMANAGER_OPTS.
# This value will be overridden by an Xmx setting specified in either YARN_OPTS,
# HADOOP_OPTS, and/or YARN_NODEMANAGER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
#
#export YARN_NODEMANAGER_HEAPSIZE=1000
# Specify the JVM options to be used when starting the NodeManager.
# These options will be appended to the options specified as YARN_OPTS
# and therefore may override any similar flags set in YARN_OPTS
#
# See ResourceManager for some examples
#
#export YARN_NODEMANAGER_OPTS=
# so that filenames w/ spaces are handled correctly in loops below
IFS=
###
# TimeLineServer specifc parameters
###
# Specify the max Heapsize for the timeline server using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000.
# This value will be overridden by an Xmx setting specified in either YARN_OPTS,
# HADOOP_OPTS, and/or YARN_TIMELINESERVER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
#
#export YARN_TIMELINESERVER_HEAPSIZE=1000
# default log directory & file
if [ "$YARN_LOG_DIR" = "" ]; then
YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
fi
if [ "$YARN_LOGFILE" = "" ]; then
YARN_LOGFILE='yarn.log'
fi
# default policy file for service-level authorization
if [ "$YARN_POLICYFILE" = "" ]; then
YARN_POLICYFILE="hadoop-policy.xml"
fi
# restore ordinary behaviour
unset IFS
MAC_OSX=false
case "`uname`" in
Darwin*) MAC_OSX=true;;
esac
if $MAC_OSX; then
YARN_OPTS="$YARN_OPTS -Djava.security.krb5.realm= -Djava.security.krb5.kdc="
fi
YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR"
YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR"
YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE"
YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE"
YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME"
YARN_OPTS="$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING"
YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
fi
YARN_OPTS="$YARN_OPTS -Dyarn.policy.file=$YARN_POLICYFILE"
# Specify the JVM options to be used when starting the TimeLineServer.
# These options will be appended to the options specified as YARN_OPTS
# and therefore may override any similar flags set in YARN_OPTS
#
# See ResourceManager for some examples
#
#export YARN_TIMELINESERVER_OPTS=

View File

@ -1161,6 +1161,9 @@ public class ResourceManager extends CompositeService implements Recoverable {
((Service)dispatcher).init(this.conf);
((Service)dispatcher).start();
removeService((Service)rmDispatcher);
// Need to stop previous rmDispatcher before assigning new dispatcher
// otherwise causes "AsyncDispatcher event handler" thread leak
((Service) rmDispatcher).stop();
rmDispatcher = dispatcher;
addIfService(rmDispatcher);
rmContext.setDispatcher(rmDispatcher);

View File

@ -23,10 +23,14 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
@ -34,18 +38,25 @@ import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMoveEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerRecoverEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@ -54,6 +65,7 @@ import org.apache.hadoop.yarn.util.resource.Resources;
import com.google.common.util.concurrent.SettableFuture;
@SuppressWarnings("unchecked")
public abstract class AbstractYarnScheduler
<T extends SchedulerApplicationAttempt, N extends SchedulerNode>
@ -72,6 +84,7 @@ public abstract class AbstractYarnScheduler
protected RMContext rmContext;
protected Map<ApplicationId, SchedulerApplication<T>> applications;
protected int nmExpireInterval;
protected final static List<Container> EMPTY_CONTAINER_LIST =
new ArrayList<Container>();
@ -87,6 +100,15 @@ public abstract class AbstractYarnScheduler
super(name);
}
@Override
public void serviceInit(Configuration conf) throws Exception {
nmExpireInterval =
conf.getInt(YarnConfiguration.RM_NM_EXPIRY_INTERVAL_MS,
YarnConfiguration.DEFAULT_RM_NM_EXPIRY_INTERVAL_MS);
createReleaseCache();
super.serviceInit(conf);
}
public synchronized List<Container> getTransferredContainers(
ApplicationAttemptId currentAttempt) {
ApplicationId appId = currentAttempt.getApplicationId();
@ -281,6 +303,19 @@ public abstract class AbstractYarnScheduler
((RMContainerImpl)rmContainer).setAMContainer(true);
}
}
synchronized (schedulerAttempt) {
Set<ContainerId> releases = schedulerAttempt.getPendingRelease();
if (releases.contains(container.getContainerId())) {
// release the container
rmContainer.handle(new RMContainerFinishedEvent(container
.getContainerId(), SchedulerUtils.createAbnormalContainerStatus(
container.getContainerId(), SchedulerUtils.RELEASED_CONTAINER),
RMContainerEventType.RELEASED));
releases.remove(container.getContainerId());
LOG.info(container.getContainerId() + " is released by application.");
}
}
}
}
@ -320,6 +355,62 @@ public abstract class AbstractYarnScheduler
}
}
protected void createReleaseCache() {
// Cleanup the cache after nm expire interval.
new Timer().schedule(new TimerTask() {
@Override
public void run() {
for (SchedulerApplication<T> app : applications.values()) {
T attempt = app.getCurrentAppAttempt();
synchronized (attempt) {
for (ContainerId containerId : attempt.getPendingRelease()) {
RMAuditLogger.logFailure(
app.getUser(),
AuditConstants.RELEASE_CONTAINER,
"Unauthorized access or invalid container",
"Scheduler",
"Trying to release container not owned by app or with invalid id.",
attempt.getApplicationId(), containerId);
}
attempt.getPendingRelease().clear();
}
}
LOG.info("Release request cache is cleaned up");
}
}, nmExpireInterval);
}
// clean up a completed container
protected abstract void completedContainer(RMContainer rmContainer,
ContainerStatus containerStatus, RMContainerEventType event);
protected void releaseContainers(List<ContainerId> containers,
SchedulerApplicationAttempt attempt) {
for (ContainerId containerId : containers) {
RMContainer rmContainer = getRMContainer(containerId);
if (rmContainer == null) {
if (System.currentTimeMillis() - ResourceManager.getClusterTimeStamp()
< nmExpireInterval) {
LOG.info(containerId + " doesn't exist. Add the container"
+ " to the release request cache as it maybe on recovery.");
synchronized (attempt) {
attempt.getPendingRelease().add(containerId);
}
} else {
RMAuditLogger.logFailure(attempt.getUser(),
AuditConstants.RELEASE_CONTAINER,
"Unauthorized access or invalid container", "Scheduler",
"Trying to release container not owned by app or with invalid id.",
attempt.getApplicationId(), containerId);
}
}
completedContainer(rmContainer,
SchedulerUtils.createAbnormalContainerStatus(containerId,
SchedulerUtils.RELEASED_CONTAINER), RMContainerEventType.RELEASED);
}
}
public SchedulerNode getSchedulerNode(NodeId nodeId) {
return nodes.get(nodeId);
}

View File

@ -17,13 +17,14 @@
*/
package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
import com.google.common.base.Preconditions;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -41,7 +42,6 @@ import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
@ -53,6 +53,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerStat
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
import org.apache.hadoop.yarn.util.resource.Resources;
import com.google.common.base.Preconditions;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multiset;
@ -87,6 +88,13 @@ public class SchedulerApplicationAttempt {
protected List<RMContainer> newlyAllocatedContainers =
new ArrayList<RMContainer>();
// This pendingRelease is used in work-preserving recovery scenario to keep
// track of the AM's outstanding release requests. RM on recovery could
// receive the release request form AM before it receives the container status
// from NM for recovery. In this case, the to-be-recovered containers reported
// by NM should not be recovered.
private Set<ContainerId> pendingRelease = null;
/**
* Count how many times the application has been given an opportunity
* to schedule a task at each priority. Each time the scheduler
@ -114,7 +122,7 @@ public class SchedulerApplicationAttempt {
new AppSchedulingInfo(applicationAttemptId, user, queue,
activeUsersManager, rmContext.getEpoch());
this.queue = queue;
this.pendingRelease = new HashSet<ContainerId>();
if (rmContext.getRMApps() != null &&
rmContext.getRMApps()
.containsKey(applicationAttemptId.getApplicationId())) {
@ -163,6 +171,10 @@ public class SchedulerApplicationAttempt {
return appSchedulingInfo.getResourceRequests(priority);
}
public Set<ContainerId> getPendingRelease() {
return this.pendingRelease;
}
public int getNewContainerId() {
return appSchedulingInfo.getNewContainerId();
}

View File

@ -54,8 +54,6 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.*;
@ -199,7 +197,7 @@ public class CapacityScheduler extends
private static final long DEFAULT_ASYNC_SCHEDULER_INTERVAL = 5;
private boolean overrideWithQueueMappings = false;
private List<QueueMapping> mappings = new ArrayList<QueueMapping>();
private List<QueueMapping> mappings = null;
private Groups groups;
@VisibleForTesting
@ -789,21 +787,7 @@ public class CapacityScheduler extends
getMinimumResourceCapability(), maximumAllocation);
// Release containers
for (ContainerId releasedContainerId : release) {
RMContainer rmContainer = getRMContainer(releasedContainerId);
if (rmContainer == null) {
RMAuditLogger.logFailure(application.getUser(),
AuditConstants.RELEASE_CONTAINER,
"Unauthorized access or invalid container", "CapacityScheduler",
"Trying to release container not owned by app or with invalid id",
application.getApplicationId(), releasedContainerId);
}
completedContainer(rmContainer,
SchedulerUtils.createAbnormalContainerStatus(
releasedContainerId,
SchedulerUtils.RELEASED_CONTAINER),
RMContainerEventType.RELEASED);
}
releaseContainers(release, application);
synchronized (application) {
@ -1098,7 +1082,8 @@ public class CapacityScheduler extends
}
@Lock(CapacityScheduler.class)
private synchronized void completedContainer(RMContainer rmContainer,
@Override
protected synchronized void completedContainer(RMContainer rmContainer,
ContainerStatus containerStatus, RMContainerEventType event) {
if (rmContainer == null) {
LOG.info("Null container completed...");

View File

@ -49,8 +49,6 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights;
@ -810,7 +808,8 @@ public class FairScheduler extends
/**
* Clean up a completed container.
*/
private synchronized void completedContainer(RMContainer rmContainer,
@Override
protected synchronized void completedContainer(RMContainer rmContainer,
ContainerStatus containerStatus, RMContainerEventType event) {
if (rmContainer == null) {
LOG.info("Null container completed...");
@ -913,21 +912,7 @@ public class FairScheduler extends
}
// Release containers
for (ContainerId releasedContainerId : release) {
RMContainer rmContainer = getRMContainer(releasedContainerId);
if (rmContainer == null) {
RMAuditLogger.logFailure(application.getUser(),
AuditConstants.RELEASE_CONTAINER,
"Unauthorized access or invalid container", "FairScheduler",
"Trying to release container not owned by app or with invalid id",
application.getApplicationId(), releasedContainerId);
}
completedContainer(rmContainer,
SchedulerUtils.createAbnormalContainerStatus(
releasedContainerId,
SchedulerUtils.RELEASED_CONTAINER),
RMContainerEventType.RELEASED);
}
releaseContainers(release, application);
synchronized (application) {
if (!ask.isEmpty()) {

View File

@ -52,8 +52,6 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
@ -89,7 +87,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSc
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
import org.apache.hadoop.yarn.server.utils.Lock;
import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
@ -295,21 +292,7 @@ public class FifoScheduler extends
clusterResource, minimumAllocation, maximumAllocation);
// Release containers
for (ContainerId releasedContainer : release) {
RMContainer rmContainer = getRMContainer(releasedContainer);
if (rmContainer == null) {
RMAuditLogger.logFailure(application.getUser(),
AuditConstants.RELEASE_CONTAINER,
"Unauthorized access or invalid container", "FifoScheduler",
"Trying to release container not owned by app or with invalid id",
application.getApplicationId(), releasedContainer);
}
containerCompleted(rmContainer,
SchedulerUtils.createAbnormalContainerStatus(
releasedContainer,
SchedulerUtils.RELEASED_CONTAINER),
RMContainerEventType.RELEASED);
}
releaseContainers(release, application);
synchronized (application) {
@ -443,7 +426,7 @@ public class FifoScheduler extends
LOG.info("Skip killing " + container.getContainerId());
continue;
}
containerCompleted(container,
completedContainer(container,
SchedulerUtils.createAbnormalContainerStatus(
container.getContainerId(), SchedulerUtils.COMPLETED_APPLICATION),
RMContainerEventType.KILL);
@ -717,7 +700,7 @@ public class FifoScheduler extends
for (ContainerStatus completedContainer : completedContainers) {
ContainerId containerId = completedContainer.getContainerId();
LOG.debug("Container FINISHED: " + containerId);
containerCompleted(getRMContainer(containerId),
completedContainer(getRMContainer(containerId),
completedContainer, RMContainerEventType.FINISHED);
}
@ -818,7 +801,7 @@ public class FifoScheduler extends
ContainerExpiredSchedulerEvent containerExpiredEvent =
(ContainerExpiredSchedulerEvent) event;
ContainerId containerid = containerExpiredEvent.getContainerId();
containerCompleted(getRMContainer(containerid),
completedContainer(getRMContainer(containerid),
SchedulerUtils.createAbnormalContainerStatus(
containerid,
SchedulerUtils.EXPIRED_CONTAINER),
@ -831,7 +814,8 @@ public class FifoScheduler extends
}
@Lock(FifoScheduler.class)
private synchronized void containerCompleted(RMContainer rmContainer,
@Override
protected synchronized void completedContainer(RMContainer rmContainer,
ContainerStatus containerStatus, RMContainerEventType event) {
if (rmContainer == null) {
LOG.info("Null container completed...");
@ -881,7 +865,7 @@ public class FifoScheduler extends
}
// Kill running containers
for(RMContainer container : node.getRunningContainers()) {
containerCompleted(container,
completedContainer(container,
SchedulerUtils.createAbnormalContainerStatus(
container.getContainerId(),
SchedulerUtils.LOST_CONTAINER),

View File

@ -49,7 +49,7 @@ public class MockAM {
private volatile int responseId = 0;
private final ApplicationAttemptId attemptId;
private final RMContext context;
private RMContext context;
private ApplicationMasterProtocol amRMProtocol;
private final List<ResourceRequest> requests = new ArrayList<ResourceRequest>();
@ -61,8 +61,10 @@ public class MockAM {
this.amRMProtocol = amRMProtocol;
this.attemptId = attemptId;
}
void setAMRMProtocol(ApplicationMasterProtocol amRMProtocol) {
public void setAMRMProtocol(ApplicationMasterProtocol amRMProtocol,
RMContext context) {
this.context = context;
this.amRMProtocol = amRMProtocol;
}

View File

@ -171,7 +171,6 @@ public class TestApplicationMasterService {
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
am1.setAMRMProtocol(rm.getApplicationMasterService());
AllocateRequestPBImpl allocateRequest = new AllocateRequestPBImpl();
List<ContainerId> release = new ArrayList<ContainerId>();

View File

@ -331,6 +331,10 @@ public class TestRMHA {
rm.adminService.transitionToStandby(requestInfo);
rm.adminService.transitionToActive(requestInfo);
rm.adminService.transitionToStandby(requestInfo);
MyCountingDispatcher dispatcher =
(MyCountingDispatcher) rm.getRMContext().getDispatcher();
assertTrue(!dispatcher.isStopped());
rm.adminService.transitionToActive(requestInfo);
assertEquals(errorMessageForEventHandler, expectedEventHandlerCount,
@ -339,6 +343,11 @@ public class TestRMHA {
assertEquals(errorMessageForService, expectedServiceCount,
rm.getServices().size());
// Keep the dispatcher reference before transitioning to standby
dispatcher = (MyCountingDispatcher) rm.getRMContext().getDispatcher();
rm.adminService.transitionToStandby(requestInfo);
assertEquals(errorMessageForEventHandler, expectedEventHandlerCount,
((MyCountingDispatcher) rm.getRMContext().getDispatcher())
@ -346,6 +355,8 @@ public class TestRMHA {
assertEquals(errorMessageForService, expectedServiceCount,
rm.getServices().size());
assertTrue(dispatcher.isStopped());
rm.stop();
}
@ -492,6 +503,8 @@ public class TestRMHA {
private int eventHandlerCount;
private volatile boolean stopped = false;
public MyCountingDispatcher() {
super("MyCountingDispatcher");
this.eventHandlerCount = 0;
@ -510,5 +523,15 @@ public class TestRMHA {
public int getEventHandlerCount() {
return this.eventHandlerCount;
}
@Override
protected void serviceStop() throws Exception {
this.stopped = true;
super.serviceStop();
}
public boolean isStopped() {
return this.stopped;
}
}
}

View File

@ -289,7 +289,7 @@ public class TestRMRestart {
// verify old AM is not accepted
// change running AM to talk to new RM
am1.setAMRMProtocol(rm2.getApplicationMasterService());
am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
AllocateResponse allocResponse = am1.allocate(
new ArrayList<ResourceRequest>(),
new ArrayList<ContainerId>());
@ -1663,7 +1663,7 @@ public class TestRMRestart {
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
// recover app
RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
am1.setAMRMProtocol(rm2.getApplicationMasterService());
am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
nm1.nodeHeartbeat(true);
nm1 = new MockNM("127.0.0.1:1234", 15120, rm2.getResourceTrackerService());

View File

@ -33,10 +33,13 @@ import java.util.Set;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
@ -72,6 +75,9 @@ import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import com.google.common.base.Supplier;
@SuppressWarnings({"rawtypes", "unchecked"})
@RunWith(value = Parameterized.class)
public class TestWorkPreservingRMRestart {
@ -572,8 +578,8 @@ public class TestWorkPreservingRMRestart {
rm2.waitForState(app0.getApplicationId(), RMAppState.ACCEPTED);
rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.LAUNCHED);
am0.setAMRMProtocol(rm2.getApplicationMasterService());
am0.registerAppAttempt(false);
am0.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
am0.registerAppAttempt(true);
rm2.waitForState(app0.getApplicationId(), RMAppState.RUNNING);
rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.RUNNING);
@ -646,6 +652,69 @@ public class TestWorkPreservingRMRestart {
waitForNumContainersToRecover(2, rm2, am0.getApplicationAttemptId());
}
// Test if RM on recovery receives the container release request from AM
// before it receives the container status reported by NM for recovery. this
// container should not be recovered.
@Test (timeout = 30000)
public void testReleasedContainerNotRecovered() throws Exception {
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
rm1 = new MockRM(conf, memStore);
MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
rm1.start();
RMApp app1 = rm1.submitApp(1024);
final MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
// Re-start RM
conf.setInt(YarnConfiguration.RM_NM_EXPIRY_INTERVAL_MS, 8000);
rm2 = new MockRM(conf, memStore);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
rm2.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
am1.registerAppAttempt(true);
// try to release a container before the container is actually recovered.
final ContainerId runningContainer =
ContainerId.newInstance(am1.getApplicationAttemptId(), 2);
am1.allocate(null, Arrays.asList(runningContainer));
// send container statuses to recover the containers
List<NMContainerStatus> containerStatuses =
createNMContainerStatusForApp(am1);
nm1.registerNode(containerStatuses, null);
// only the am container should be recovered.
waitForNumContainersToRecover(1, rm2, am1.getApplicationAttemptId());
final AbstractYarnScheduler scheduler =
(AbstractYarnScheduler) rm2.getResourceScheduler();
// cached release request is cleaned.
// assertFalse(scheduler.getPendingRelease().contains(runningContainer));
AllocateResponse response = am1.allocate(null, null);
// AM gets notified of the completed container.
boolean receivedCompletedContainer = false;
for (ContainerStatus status : response.getCompletedContainersStatuses()) {
if (status.getContainerId().equals(runningContainer)) {
receivedCompletedContainer = true;
}
}
assertTrue(receivedCompletedContainer);
GenericTestUtils.waitFor(new Supplier<Boolean>() {
public Boolean get() {
// release cache is cleaned up and previous running container is not
// recovered
return scheduler.getApplicationAttempt(am1.getApplicationAttemptId())
.getPendingRelease().isEmpty()
&& scheduler.getRMContainer(runningContainer) == null;
}
}, 1000, 20000);
}
private void asserteMetrics(QueueMetrics qm, int appsSubmitted,
int appsPending, int appsRunning, int appsCompleted,
int allocatedContainers, int availableMB, int availableVirtualCores,
@ -661,7 +730,7 @@ public class TestWorkPreservingRMRestart {
assertEquals(allocatedVirtualCores, qm.getAllocatedVirtualCores());
}
private void waitForNumContainersToRecover(int num, MockRM rm,
public static void waitForNumContainersToRecover(int num, MockRM rm,
ApplicationAttemptId attemptId) throws Exception {
AbstractYarnScheduler scheduler =
(AbstractYarnScheduler) rm.getResourceScheduler();
@ -674,7 +743,9 @@ public class TestWorkPreservingRMRestart {
attempt = scheduler.getApplicationAttempt(attemptId);
}
while (attempt.getLiveContainers().size() < num) {
System.out.println("Wait for " + num + " containers to recover.");
System.out.println("Wait for " + num
+ " containers to recover. currently: "
+ attempt.getLiveContainers().size());
Thread.sleep(200);
}
}