HADOOP-7599. Script improvements to setup a secure Hadoop cluster. Contributed by Eric Yang.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1169986 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6165875dc6
commit
468775a21d
|
@ -78,6 +78,13 @@
|
|||
<include>*-site.xml</include>
|
||||
</includes>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>${basedir}/src/main/packages/templates/conf</directory>
|
||||
<outputDirectory>/share/hadoop/${hadoop.component}/templates/conf</outputDirectory>
|
||||
<includes>
|
||||
<include>*</include>
|
||||
</includes>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>${project.build.directory}</directory>
|
||||
<outputDirectory>/share/hadoop/${hadoop.component}</outputDirectory>
|
||||
|
|
|
@ -380,6 +380,9 @@ Release 0.23.0 - Unreleased
|
|||
HADOOP-7612. Change test-patch to run tests for all nested modules.
|
||||
(tomwhite)
|
||||
|
||||
HADOOP-7599. Script improvements to setup a secure Hadoop cluster
|
||||
(Eric Yang via ddas)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
HADOOP-7333. Performance improvement in PureJavaCrc32. (Eric Caspole
|
||||
|
|
|
@ -39,6 +39,14 @@ fi
|
|||
|
||||
. /lib/lsb/init-functions
|
||||
|
||||
if [ -n "$HADOOP_SECURE_DN_USER" ]; then
|
||||
DN_USER="root"
|
||||
IDENT_USER=${HADOOP_SECURE_DN_USER}
|
||||
else
|
||||
DN_USER="hdfs"
|
||||
IDENT_USER=${DN_USER}
|
||||
fi
|
||||
|
||||
# Are we running from init?
|
||||
run_by_init() {
|
||||
([ "$previous" ] && [ "$runlevel" ]) || [ "$runlevel" = S ]
|
||||
|
@ -67,13 +75,14 @@ check_privsep_dir() {
|
|||
}
|
||||
|
||||
export PATH="${PATH:+$PATH:}/usr/sbin:/usr/bin"
|
||||
export HADOOP_PREFIX="/usr"
|
||||
|
||||
case "$1" in
|
||||
start)
|
||||
check_privsep_dir
|
||||
check_for_no_start
|
||||
log_daemon_msg "Starting Apache Hadoop Data Node server" "hadoop-datanode"
|
||||
if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid -c hdfs -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start datanode; then
|
||||
if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-${IDENT_USER}-datanode.pid -c ${DN_USER} -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start datanode; then
|
||||
log_end_msg 0
|
||||
else
|
||||
log_end_msg 1
|
||||
|
@ -81,7 +90,7 @@ case "$1" in
|
|||
;;
|
||||
stop)
|
||||
log_daemon_msg "Stopping Apache Hadoop Data Node server" "hadoop-datanode"
|
||||
if start-stop-daemon --stop --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid; then
|
||||
if start-stop-daemon --stop --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-${IDENT_USER}-datanode.pid; then
|
||||
log_end_msg 0
|
||||
else
|
||||
log_end_msg 1
|
||||
|
@ -91,9 +100,9 @@ case "$1" in
|
|||
restart)
|
||||
check_privsep_dir
|
||||
log_daemon_msg "Restarting Apache Hadoop Data Node server" "hadoop-datanode"
|
||||
start-stop-daemon --stop --quiet --oknodo --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid
|
||||
start-stop-daemon --stop --quiet --oknodo --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-${IDENT_USER}-datanode.pid
|
||||
check_for_no_start log_end_msg
|
||||
if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid -c hdfs -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start datanode; then
|
||||
if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-${IDENT_USER}-datanode.pid -c ${DN_USER} -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start datanode; then
|
||||
log_end_msg 0
|
||||
else
|
||||
log_end_msg 1
|
||||
|
@ -104,14 +113,14 @@ case "$1" in
|
|||
check_privsep_dir
|
||||
log_daemon_msg "Restarting Apache Hadoop Data Node server" "hadoop-datanode"
|
||||
set +e
|
||||
start-stop-daemon --stop --quiet --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid
|
||||
start-stop-daemon --stop --quiet --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-${IDENT_USER}-datanode.pid
|
||||
RET="$?"
|
||||
set -e
|
||||
case $RET in
|
||||
0)
|
||||
# old daemon stopped
|
||||
check_for_no_start log_end_msg
|
||||
if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid -c hdfs -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start datanode; then
|
||||
if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-${IDENT_USER}-datanode.pid -c ${DN_USER} -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start datanode; then
|
||||
log_end_msg 0
|
||||
else
|
||||
log_end_msg 1
|
||||
|
@ -131,7 +140,7 @@ case "$1" in
|
|||
;;
|
||||
|
||||
status)
|
||||
status_of_proc -p ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid ${JAVA_HOME}/bin/java hadoop-datanode && exit 0 || exit $?
|
||||
status_of_proc -p ${HADOOP_PID_DIR}/hadoop-${IDENT_USER}-datanode.pid ${JAVA_HOME}/bin/java hadoop-datanode && exit 0 || exit $?
|
||||
;;
|
||||
|
||||
*)
|
||||
|
|
|
@ -67,6 +67,7 @@ check_privsep_dir() {
|
|||
}
|
||||
|
||||
export PATH="${PATH:+$PATH:}/usr/sbin:/usr/bin"
|
||||
export HADOOP_PREFIX="/usr"
|
||||
|
||||
case "$1" in
|
||||
start)
|
||||
|
|
|
@ -67,10 +67,11 @@ check_privsep_dir() {
|
|||
}
|
||||
|
||||
format() {
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} namenode -format' hdfs
|
||||
sudo -u hdfs ${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} namenode -format
|
||||
}
|
||||
|
||||
export PATH="${PATH:+$PATH:}/usr/sbin:/usr/bin"
|
||||
export HADOOP_PREFIX="/usr"
|
||||
|
||||
case "$1" in
|
||||
start)
|
||||
|
|
|
@ -67,6 +67,7 @@ check_privsep_dir() {
|
|||
}
|
||||
|
||||
export PATH="${PATH:+$PATH:}/usr/sbin:/usr/bin"
|
||||
export HADOOP_PREFIX="/usr"
|
||||
|
||||
case "$1" in
|
||||
start)
|
||||
|
|
|
@ -14,9 +14,10 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
bin=`dirname "$0"`
|
||||
bin=`cd "$bin"; pwd`
|
||||
this="${BASH_SOURCE-$0}"
|
||||
bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P)
|
||||
script="$(basename -- "$this")"
|
||||
this="$bin/$script"
|
||||
|
||||
if [ "$HADOOP_HOME" != "" ]; then
|
||||
echo "Warning: \$HADOOP_HOME is deprecated."
|
||||
|
@ -29,30 +30,86 @@ usage() {
|
|||
echo "
|
||||
usage: $0 <parameters>
|
||||
Require parameter:
|
||||
-u <username> Create user on HDFS
|
||||
--config /etc/hadoop Location of Hadoop configuration file
|
||||
-u <username> Create user on HDFS
|
||||
Optional parameters:
|
||||
-h Display this message
|
||||
-h Display this message
|
||||
--kerberos-realm=KERBEROS.EXAMPLE.COM Set Kerberos realm
|
||||
--super-user=hdfs Set super user id
|
||||
--super-user-keytab=/etc/security/keytabs/hdfs.keytab Set super user keytab location
|
||||
"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Parse script parameters
|
||||
if [ $# != 2 ] ; then
|
||||
OPTS=$(getopt \
|
||||
-n $0 \
|
||||
-o '' \
|
||||
-l 'kerberos-realm:' \
|
||||
-l 'super-user:' \
|
||||
-l 'super-user-keytab:' \
|
||||
-o 'h' \
|
||||
-o 'u' \
|
||||
-- "$@")
|
||||
|
||||
if [ $? != 0 ] ; then
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
while getopts "hu:" OPTION
|
||||
do
|
||||
case $OPTION in
|
||||
u)
|
||||
SETUP_USER=$2; shift 2
|
||||
;;
|
||||
h)
|
||||
create_user() {
|
||||
if [ "${SETUP_USER}" = "" ]; then
|
||||
break
|
||||
fi
|
||||
HADOOP_HDFS_USER=${HADOOP_HDFS_USER:-hdfs}
|
||||
export HADOOP_PREFIX
|
||||
export HADOOP_CONF_DIR
|
||||
export JAVA_HOME
|
||||
export SETUP_USER=${SETUP_USER}
|
||||
export SETUP_PATH=/user/${SETUP_USER}
|
||||
|
||||
if [ ! "${KERBEROS_REALM}" = "" ]; then
|
||||
# locate kinit cmd
|
||||
if [ -e /etc/lsb-release ]; then
|
||||
KINIT_CMD="/usr/bin/kinit -kt ${HDFS_USER_KEYTAB} ${HADOOP_HDFS_USER}"
|
||||
else
|
||||
KINIT_CMD="/usr/kerberos/bin/kinit -kt ${HDFS_USER_KEYTAB} ${HADOOP_HDFS_USER}"
|
||||
fi
|
||||
su -c "${KINIT_CMD}" ${HADOOP_HDFS_USER}
|
||||
fi
|
||||
|
||||
su -c "${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -mkdir ${SETUP_PATH}" ${HADOOP_HDFS_USER}
|
||||
su -c "${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -chown ${SETUP_USER}:${SETUP_USER} ${SETUP_PATH}" ${HADOOP_HDFS_USER}
|
||||
su -c "${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -chmod 711 ${SETUP_PATH}" ${HADOOP_HDFS_USER}
|
||||
|
||||
if [ "$?" == "0" ]; then
|
||||
echo "User directory has been setup: ${SETUP_PATH}"
|
||||
fi
|
||||
}
|
||||
|
||||
eval set -- "${OPTS}"
|
||||
while true; do
|
||||
case "$1" in
|
||||
-u)
|
||||
shift
|
||||
;;
|
||||
--kerberos-realm)
|
||||
KERBEROS_REALM=$2; shift 2
|
||||
;;
|
||||
--super-user)
|
||||
HADOOP_HDFS_USER=$2; shift 2
|
||||
;;
|
||||
--super-user-keytab)
|
||||
HDFS_USER_KEYTAB=$2; shift 2
|
||||
;;
|
||||
-h)
|
||||
usage
|
||||
;;
|
||||
--)
|
||||
shift ; break
|
||||
while shift; do
|
||||
SETUP_USER=$1
|
||||
create_user
|
||||
done
|
||||
break
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1"
|
||||
|
@ -62,15 +119,3 @@ do
|
|||
esac
|
||||
done
|
||||
|
||||
# Create user directory on HDFS
|
||||
export SETUP_USER
|
||||
export SETUP_PATH=/user/${SETUP_USER}
|
||||
export HADOOP_PREFIX
|
||||
export HADOOP_CONF_DIR
|
||||
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -mkdir ${SETUP_PATH}' hdfs
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -chown ${SETUP_USER}:${SETUP_USER} ${SETUP_PATH}' hdfs
|
||||
|
||||
if [ "$?" == "0" ]; then
|
||||
echo "User directory has been setup: ${SETUP_PATH}"
|
||||
fi
|
||||
|
|
|
@ -18,35 +18,60 @@
|
|||
bin=`dirname "$0"`
|
||||
bin=`cd "$bin"; pwd`
|
||||
|
||||
if [ "$HADOOP_HOME" != "" ]; then
|
||||
echo "Warning: \$HADOOP_HOME is deprecated."
|
||||
echo
|
||||
fi
|
||||
|
||||
. "$bin"/../libexec/hadoop-config.sh
|
||||
this="${BASH_SOURCE-$0}"
|
||||
export HADOOP_PREFIX=`dirname "$this"`/..
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
usage: $0 <parameters>
|
||||
|
||||
Optional parameters:
|
||||
--auto Setup automatically
|
||||
--default Generate default config
|
||||
--conf-dir=/etc/hadoop Set config directory
|
||||
--auto Setup path and configuration automatically
|
||||
--default Setup configuration as default
|
||||
--conf-dir=/etc/hadoop Set configuration directory
|
||||
--datanode-dir=/var/lib/hadoop/hdfs/datanode Set datanode directory
|
||||
--group=hadoop Set Hadoop group name
|
||||
-h Display this message
|
||||
--jobtracker-url=hostname:9001 Set jobtracker url
|
||||
--hdfs-user=hdfs Set HDFS user
|
||||
--jobtracker-host=hostname Set jobtracker host
|
||||
--namenode-host=hostname Set namenode host
|
||||
--secondarynamenode-host=hostname Set secondary namenode host
|
||||
--kerberos-realm=KERBEROS.EXAMPLE.COM Set Kerberos realm
|
||||
--kinit-location=/usr/kerberos/bin/kinit Set kinit location
|
||||
--keytab-dir=/etc/security/keytabs Set keytab directory
|
||||
--log-dir=/var/log/hadoop Set log directory
|
||||
--hdfs-dir=/var/lib/hadoop/hdfs Set hdfs directory
|
||||
--pid-dir=/var/run/hadoop Set pid directory
|
||||
--hdfs-dir=/var/lib/hadoop/hdfs Set HDFS directory
|
||||
--hdfs-user-keytab=/home/hdfs/hdfs.keytab Set HDFS user key tab
|
||||
--mapred-dir=/var/lib/hadoop/mapred Set mapreduce directory
|
||||
--mapreduce-user=mr Set mapreduce user
|
||||
--mapreduce-user-keytab=/home/mr/hdfs.keytab Set mapreduce user key tab
|
||||
--namenode-dir=/var/lib/hadoop/hdfs/namenode Set namenode directory
|
||||
--namenode-url=hdfs://hostname:9000/ Set namenode url
|
||||
--replication=3 Set replication factor
|
||||
--taskscheduler=org.apache.hadoop.mapred.JobQueueTaskScheduler Set task scheduler
|
||||
--datanodes=hostname1,hostname2,... SET the datanodes
|
||||
--tasktrackers=hostname1,hostname2,... SET the tasktrackers
|
||||
"
|
||||
exit 1
|
||||
}
|
||||
|
||||
check_permission() {
|
||||
TARGET=$1
|
||||
OWNER="0"
|
||||
RESULT=0
|
||||
while [ "$TARGET" != "/" ]; do
|
||||
PARENT=`dirname $TARGET`
|
||||
NAME=`basename $TARGET`
|
||||
OWNER=`ls -ln $PARENT | grep $NAME| awk '{print $3}'`
|
||||
if [ "$OWNER" != "0" ]; then
|
||||
RESULT=1
|
||||
break
|
||||
fi
|
||||
TARGET=`dirname $TARGET`
|
||||
done
|
||||
return $RESULT
|
||||
}
|
||||
|
||||
template_generator() {
|
||||
REGEX='(\$\{[a-zA-Z_][a-zA-Z_0-9]*\})'
|
||||
cat $1 |
|
||||
|
@ -64,17 +89,30 @@ OPTS=$(getopt \
|
|||
-n $0 \
|
||||
-o '' \
|
||||
-l 'auto' \
|
||||
-l 'java-home:' \
|
||||
-l 'conf-dir:' \
|
||||
-l 'default' \
|
||||
-l 'group:' \
|
||||
-l 'hdfs-dir:' \
|
||||
-l 'namenode-dir:' \
|
||||
-l 'datanode-dir:' \
|
||||
-l 'mapred-dir:' \
|
||||
-l 'namenode-url:' \
|
||||
-l 'jobtracker-url:' \
|
||||
-l 'namenode-host:' \
|
||||
-l 'secondarynamenode-host:' \
|
||||
-l 'jobtracker-host:' \
|
||||
-l 'log-dir:' \
|
||||
-l 'pid-dir:' \
|
||||
-l 'replication:' \
|
||||
-l 'taskscheduler:' \
|
||||
-l 'hdfs-user:' \
|
||||
-l 'hdfs-user-keytab:' \
|
||||
-l 'mapreduce-user:' \
|
||||
-l 'mapreduce-user-keytab:' \
|
||||
-l 'keytab-dir:' \
|
||||
-l 'kerberos-realm:' \
|
||||
-l 'kinit-location:' \
|
||||
-l 'datanodes:' \
|
||||
-l 'tasktrackers:' \
|
||||
-o 'h' \
|
||||
-- "$@")
|
||||
|
||||
|
@ -84,8 +122,7 @@ fi
|
|||
|
||||
# Make sure the HADOOP_LOG_DIR is not picked up from user environment.
|
||||
unset HADOOP_LOG_DIR
|
||||
|
||||
# Parse script parameters
|
||||
|
||||
eval set -- "${OPTS}"
|
||||
while true ; do
|
||||
case "$1" in
|
||||
|
@ -94,6 +131,10 @@ while true ; do
|
|||
AUTOMATED=1
|
||||
shift
|
||||
;;
|
||||
--java-home)
|
||||
JAVA_HOME=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
--conf-dir)
|
||||
HADOOP_CONF_DIR=$2; shift 2
|
||||
AUTOMATED=1
|
||||
|
@ -101,6 +142,10 @@ while true ; do
|
|||
--default)
|
||||
AUTOMATED=1; shift
|
||||
;;
|
||||
--group)
|
||||
HADOOP_GROUP=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
-h)
|
||||
usage
|
||||
;;
|
||||
|
@ -120,11 +165,15 @@ while true ; do
|
|||
HADOOP_MAPRED_DIR=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
--namenode-url)
|
||||
--namenode-host)
|
||||
HADOOP_NN_HOST=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
--jobtracker-url)
|
||||
--secondarynamenode-host)
|
||||
HADOOP_SNN_HOST=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
--jobtracker-host)
|
||||
HADOOP_JT_HOST=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
|
@ -132,6 +181,10 @@ while true ; do
|
|||
HADOOP_LOG_DIR=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
--pid-dir)
|
||||
HADOOP_PID_DIR=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
--replication)
|
||||
HADOOP_REPLICATION=$2; shift 2
|
||||
AUTOMATED=1
|
||||
|
@ -139,7 +192,46 @@ while true ; do
|
|||
--taskscheduler)
|
||||
HADOOP_TASK_SCHEDULER=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
;;
|
||||
--hdfs-user)
|
||||
HADOOP_HDFS_USER=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
--mapreduce-user)
|
||||
HADOOP_MR_USER=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
--keytab-dir)
|
||||
KEYTAB_DIR=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
--hdfs-user-keytab)
|
||||
HDFS_KEYTAB=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
--mapreduce-user-keytab)
|
||||
MR_KEYTAB=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
--kerberos-realm)
|
||||
KERBEROS_REALM=$2; shift 2
|
||||
SECURITY_TYPE="kerberos"
|
||||
AUTOMATED=1
|
||||
;;
|
||||
--kinit-location)
|
||||
KINIT=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
--datanodes)
|
||||
DATANODES=$2; shift 2
|
||||
AUTOMATED=1
|
||||
DATANODES=$(echo $DATANODES | tr ',' ' ')
|
||||
;;
|
||||
--tasktrackers)
|
||||
TASKTRACKERS=$2; shift 2
|
||||
AUTOMATED=1
|
||||
TASKTRACKERS=$(echo $TASKTRACKERS | tr ',' ' ')
|
||||
;;
|
||||
--)
|
||||
shift ; break
|
||||
;;
|
||||
|
@ -151,21 +243,40 @@ while true ; do
|
|||
esac
|
||||
done
|
||||
|
||||
# Fill in default values, if parameters have not been defined.
|
||||
AUTOSETUP=${AUTOSETUP:-1}
|
||||
JAVA_HOME=${JAVA_HOME:-/usr/java/default}
|
||||
HADOOP_NN_HOST=${HADOOP_NN_HOST:-hdfs://`hostname`:9000/}
|
||||
HADOOP_GROUP=${HADOOP_GROUP:-hadoop}
|
||||
HADOOP_NN_HOST=${HADOOP_NN_HOST:-`hostname`}
|
||||
HADOOP_NN_DIR=${HADOOP_NN_DIR:-/var/lib/hadoop/hdfs/namenode}
|
||||
HADOOP_DN_DIR=${HADOOP_DN_DIR:-/var/lib/hadoop/hdfs/datanode}
|
||||
HADOOP_JT_HOST=${HADOOP_JT_HOST:-`hostname`:9001}
|
||||
HADOOP_JT_HOST=${HADOOP_JT_HOST:-`hostname`}
|
||||
HADOOP_HDFS_DIR=${HADOOP_HDFS_DIR:-/var/lib/hadoop/hdfs}
|
||||
HADOOP_MAPRED_DIR=${HADOOP_MAPRED_DIR:-/var/lib/hadoop/mapred}
|
||||
HADOOP_LOG_DIR=${HADOOP_LOG_DIR:-/var/log/hadoop}
|
||||
HADOOP_PID_DIR=${HADOOP_PID_DIR:-/var/log/hadoop}
|
||||
HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop}
|
||||
HADOOP_REPLICATION=${HADOOP_RELICATION:-3}
|
||||
HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler}
|
||||
HADOOP_HDFS_USER=${HADOOP_HDFS_USER:-hdfs}
|
||||
HADOOP_MR_USER=${HADOOP_MR_USER:-mr}
|
||||
KEYTAB_DIR=${KEYTAB_DIR:-/etc/security/keytabs}
|
||||
HDFS_KEYTAB=${HDFS_KEYTAB:-/home/hdfs/hdfs.keytab}
|
||||
MR_KEYTAB=${MR_KEYTAB:-/home/mr/mr.keytab}
|
||||
KERBEROS_REALM=${KERBEROS_REALM:-KERBEROS.EXAMPLE.COM}
|
||||
SECURITY_TYPE=${SECURITY_TYPE:-simple}
|
||||
KINIT=${KINIT:-/usr/kerberos/bin/kinit}
|
||||
if [ "${SECURITY_TYPE}" = "kerberos" ]; then
|
||||
TASK_CONTROLLER="org.apache.hadoop.mapred.LinuxTaskController"
|
||||
HADOOP_DN_ADDR="0.0.0.0:1019"
|
||||
HADOOP_DN_HTTP_ADDR="0.0.0.0:1022"
|
||||
SECURITY="true"
|
||||
else
|
||||
TASK_CONTROLLER="org.apache.hadoop.mapred.DefaultTaskController"
|
||||
HADDOP_DN_ADDR="0.0.0.0:50010"
|
||||
HADOOP_DN_HTTP_ADDR="0.0.0.0:50075"
|
||||
SECURITY="false"
|
||||
fi
|
||||
|
||||
# Interactive setup wizard
|
||||
if [ "${AUTOMATED}" != "1" ]; then
|
||||
echo "Setup Hadoop Configuration"
|
||||
echo
|
||||
|
@ -173,18 +284,18 @@ if [ "${AUTOMATED}" != "1" ]; then
|
|||
read USER_HADOOP_CONF_DIR
|
||||
echo -n "Where would you like to put log directory? (${HADOOP_LOG_DIR}) "
|
||||
read USER_HADOOP_LOG_DIR
|
||||
echo -n "What is the url of the namenode? (${HADOOP_NN_HOST}) "
|
||||
echo -n "Where would you like to put pid directory? (${HADOOP_PID_DIR}) "
|
||||
read USER_HADOOP_PID_DIR
|
||||
echo -n "What is the host of the namenode? (${HADOOP_NN_HOST}) "
|
||||
read USER_HADOOP_NN_HOST
|
||||
echo -n "Where would you like to put namenode data directory? (${HADOOP_NN_DIR}) "
|
||||
read USER_HADOOP_NN_DIR
|
||||
echo -n "Where would you like to put datanode data directory? (${HADOOP_DN_DIR}) "
|
||||
read USER_HADOOP_DN_DIR
|
||||
echo -n "What is the url of the jobtracker? (${HADOOP_JT_HOST}) "
|
||||
echo -n "What is the host of the jobtracker? (${HADOOP_JT_HOST}) "
|
||||
read USER_HADOOP_JT_HOST
|
||||
echo -n "Where would you like to put jobtracker/tasktracker data directory? (${HADOOP_MAPRED_DIR}) "
|
||||
read USER_HADOOP_MAPRED_DIR
|
||||
echo -n "Which taskscheduler would you like? (${HADOOP_TASK_SCHEDULER}) "
|
||||
read USER_HADOOP_TASK_SCHEDULER
|
||||
echo -n "Where is JAVA_HOME directory? (${JAVA_HOME}) "
|
||||
read USER_JAVA_HOME
|
||||
echo -n "Would you like to create directories/copy conf files to localhost? (Y/n) "
|
||||
|
@ -199,16 +310,18 @@ if [ "${AUTOMATED}" != "1" ]; then
|
|||
HADOOP_MAPRED_DIR=${USER_HADOOP_MAPRED_DIR:-$HADOOP_MAPRED_DIR}
|
||||
HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler}
|
||||
HADOOP_LOG_DIR=${USER_HADOOP_LOG_DIR:-$HADOOP_LOG_DIR}
|
||||
HADOOP_PID_DIR=${USER_HADOOP_PID_DIR:-$HADOOP_PID_DIR}
|
||||
HADOOP_CONF_DIR=${USER_HADOOP_CONF_DIR:-$HADOOP_CONF_DIR}
|
||||
AUTOSETUP=${USER_AUTOSETUP:-y}
|
||||
echo "Review your choices:"
|
||||
echo
|
||||
echo "Config directory : ${HADOOP_CONF_DIR}"
|
||||
echo "Log directory : ${HADOOP_LOG_DIR}"
|
||||
echo "Namenode url : ${HADOOP_NN_HOST}"
|
||||
echo "PID directory : ${HADOOP_PID_DIR}"
|
||||
echo "Namenode host : ${HADOOP_NN_HOST}"
|
||||
echo "Namenode directory : ${HADOOP_NN_DIR}"
|
||||
echo "Datanode directory : ${HADOOP_DN_DIR}"
|
||||
echo "Jobtracker url : ${HADOOP_JT_HOST}"
|
||||
echo "Jobtracker host : ${HADOOP_JT_HOST}"
|
||||
echo "Mapreduce directory : ${HADOOP_MAPRED_DIR}"
|
||||
echo "Task scheduler : ${HADOOP_TASK_SCHEDULER}"
|
||||
echo "JAVA_HOME directory : ${JAVA_HOME}"
|
||||
|
@ -222,61 +335,180 @@ if [ "${AUTOMATED}" != "1" ]; then
|
|||
fi
|
||||
fi
|
||||
|
||||
if [ "${AUTOSETUP}" == "1" ]; then
|
||||
# If user wants to setup local system automatically,
|
||||
# set config file generation location to HADOOP_CONF_DIR.
|
||||
DEST=${HADOOP_CONF_DIR}
|
||||
else
|
||||
# If user is only interested to generate config file locally,
|
||||
# place config files in the current working directory.
|
||||
DEST=`pwd`
|
||||
fi
|
||||
|
||||
# remove existing config file, they are existed in current directory.
|
||||
rm -f ${DEST}/core-site.xml >/dev/null
|
||||
rm -f ${DEST}/hdfs-site.xml >/dev/null
|
||||
rm -f ${DEST}/mapred-site.xml >/dev/null
|
||||
rm -f ${DEST}/hadoop-env.sh >/dev/null
|
||||
|
||||
# Generate config file with specified parameters.
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/core-site.xml ${DEST}/core-site.xml
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/hdfs/templates/hdfs-site.xml ${DEST}/hdfs-site.xml
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/mapreduce/templates/mapred-site.xml ${DEST}/mapred-site.xml
|
||||
template_generator ${HADOOP_CONF_DIR}/hadoop-env.sh.template ${DEST}/hadoop-env.sh
|
||||
|
||||
chown root:hadoop ${DEST}/hadoop-env.sh
|
||||
chmod 755 ${DEST}/hadoop-env.sh
|
||||
|
||||
# Setup directory path and copy config files, if AUTOSETUP is chosen.
|
||||
if [ "${AUTOSETUP}" == "1" -o "${AUTOSETUP}" == "y" ]; then
|
||||
mkdir -p ${HADOOP_HDFS_DIR}
|
||||
mkdir -p ${HADOOP_NN_DIR}
|
||||
mkdir -p ${HADOOP_DN_DIR}
|
||||
mkdir -p ${HADOOP_MAPRED_DIR}
|
||||
if [ -d ${KEYTAB_DIR} ]; then
|
||||
chmod 700 ${KEYTAB_DIR}/*
|
||||
chown ${HADOOP_MR_USER}:${HADOOP_GROUP} ${KEYTAB_DIR}/[jt]t.service.keytab
|
||||
chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${KEYTAB_DIR}/[dns]n.service.keytab
|
||||
fi
|
||||
chmod 755 -R ${HADOOP_PREFIX}/sbin/*hadoop*
|
||||
chmod 755 -R ${HADOOP_PREFIX}/bin/hadoop
|
||||
chmod 755 -R ${HADOOP_PREFIX}/libexec/hadoop-config.sh
|
||||
mkdir -p /home/${HADOOP_MR_USER}
|
||||
chown ${HADOOP_MR_USER}:${HADOOP_GROUP} /home/${HADOOP_MR_USER}
|
||||
HDFS_DIR=`echo ${HADOOP_HDFS_DIR} | sed -e 's/,/ /g'`
|
||||
mkdir -p ${HDFS_DIR}
|
||||
if [ -e ${HADOOP_NN_DIR} ]; then
|
||||
rm -rf ${HADOOP_NN_DIR}
|
||||
fi
|
||||
DATANODE_DIR=`echo ${HADOOP_DN_DIR} | sed -e 's/,/ /g'`
|
||||
mkdir -p ${DATANODE_DIR}
|
||||
MAPRED_DIR=`echo ${HADOOP_MAPRED_DIR} | sed -e 's/,/ /g'`
|
||||
mkdir -p ${MAPRED_DIR}
|
||||
mkdir -p ${HADOOP_CONF_DIR}
|
||||
check_permission ${HADOOP_CONF_DIR}
|
||||
if [ $? == 1 ]; then
|
||||
echo "Full path to ${HADOOP_CONF_DIR} should be owned by root."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p ${HADOOP_LOG_DIR}
|
||||
mkdir -p ${HADOOP_LOG_DIR}/hdfs
|
||||
mkdir -p ${HADOOP_LOG_DIR}/mapred
|
||||
chown hdfs:hadoop ${HADOOP_HDFS_DIR}
|
||||
chown hdfs:hadoop ${HADOOP_NN_DIR}
|
||||
chown hdfs:hadoop ${HADOOP_DN_DIR}
|
||||
chown mapred:hadoop ${HADOOP_MAPRED_DIR}
|
||||
chown root:hadoop ${HADOOP_LOG_DIR}
|
||||
#create the log sub dir for diff users
|
||||
mkdir -p ${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
|
||||
mkdir -p ${HADOOP_LOG_DIR}/${HADOOP_MR_USER}
|
||||
|
||||
mkdir -p ${HADOOP_PID_DIR}
|
||||
chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${HDFS_DIR}
|
||||
chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${DATANODE_DIR}
|
||||
chmod 700 -R ${DATANODE_DIR}
|
||||
chown ${HADOOP_MR_USER}:${HADOOP_GROUP} ${MAPRED_DIR}
|
||||
chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${HADOOP_LOG_DIR}
|
||||
chmod 775 ${HADOOP_LOG_DIR}
|
||||
chown hdfs:hadoop ${HADOOP_LOG_DIR}/hdfs
|
||||
chown mapred:hadoop ${HADOOP_LOG_DIR}/mapred
|
||||
chmod 775 ${HADOOP_PID_DIR}
|
||||
chown root:${HADOOP_GROUP} ${HADOOP_PID_DIR}
|
||||
|
||||
#change the permission and the owner
|
||||
chmod 755 ${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
|
||||
chown ${HADOOP_HDFS_USER}:${HADOOP_GROUP} ${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
|
||||
chmod 755 ${HADOOP_LOG_DIR}/${HADOOP_MR_USER}
|
||||
chown ${HADOOP_MR_USER}:${HADOOP_GROUP} ${HADOOP_LOG_DIR}/${HADOOP_MR_USER}
|
||||
|
||||
if [ -e ${HADOOP_CONF_DIR}/core-site.xml ]; then
|
||||
mv -f ${HADOOP_CONF_DIR}/core-site.xml ${HADOOP_CONF_DIR}/core-site.xml.bak
|
||||
fi
|
||||
if [ -e ${HADOOP_CONF_DIR}/hdfs-site.xml ]; then
|
||||
mv -f ${HADOOP_CONF_DIR}/hdfs-site.xml ${HADOOP_CONF_DIR}/hdfs-site.xml.bak
|
||||
fi
|
||||
if [ -e ${HADOOP_CONF_DIR}/mapred-site.xml ]; then
|
||||
mv -f ${HADOOP_CONF_DIR}/mapred-site.xml ${HADOOP_CONF_DIR}/mapred-site.xml.bak
|
||||
fi
|
||||
if [ -e ${HADOOP_CONF_DIR}/hadoop-env.sh ]; then
|
||||
mv -f ${HADOOP_CONF_DIR}/hadoop-env.sh ${HADOOP_CONF_DIR}/hadoop-env.sh.bak
|
||||
fi
|
||||
if [ -e ${HADOOP_CONF_DIR}/hadoop-policy.xml ]; then
|
||||
mv -f ${HADOOP_CONF_DIR}/hadoop-policy.xml ${HADOOP_CONF_DIR}/hadoop-policy.xml.bak
|
||||
fi
|
||||
if [ -e ${HADOOP_CONF_DIR}/mapred-queue-acls.xml ]; then
|
||||
mv -f ${HADOOP_CONF_DIR}/mapred-queue-acls.xml ${HADOOP_CONF_DIR}/mapred-queue-acls.xml.bak
|
||||
fi
|
||||
if [ -e ${HADOOP_CONF_DIR}/commons-logging.properties ]; then
|
||||
mv -f ${HADOOP_CONF_DIR}/commons-logging.properties ${HADOOP_CONF_DIR}/commons-logging.properties.bak
|
||||
fi
|
||||
if [ -e ${HADOOP_CONF_DIR}/taskcontroller.cfg ]; then
|
||||
mv -f ${HADOOP_CONF_DIR}/taskcontroller.cfg ${HADOOP_CONF_DIR}/taskcontroller.cfg.bak
|
||||
fi
|
||||
if [ -e ${HADOOP_CONF_DIR}/slaves ]; then
|
||||
mv -f ${HADOOP_CONF_DIR}/slaves ${HADOOP_CONF_DIR}/slaves.bak
|
||||
fi
|
||||
if [ -e ${HADOOP_CONF_DIR}/dfs.include ]; then
|
||||
mv -f ${HADOOP_CONF_DIR}/dfs.include ${HADOOP_CONF_DIR}/dfs.include.bak
|
||||
fi
|
||||
if [ -e ${HADOOP_CONF_DIR}/dfs.exclude ]; then
|
||||
mv -f ${HADOOP_CONF_DIR}/dfs.exclude ${HADOOP_CONF_DIR}/dfs.exclude.bak
|
||||
fi
|
||||
if [ -e ${HADOOP_CONF_DIR}/mapred.include ]; then
|
||||
mv -f ${HADOOP_CONF_DIR}/mapred.include ${HADOOP_CONF_DIR}/mapred.include.bak
|
||||
fi
|
||||
if [ -e ${HADOOP_CONF_DIR}/mapred.exclude ]; then
|
||||
mv -f ${HADOOP_CONF_DIR}/mapred.exclude ${HADOOP_CONF_DIR}/mapred.exclude.bak
|
||||
fi
|
||||
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/core-site.xml ${HADOOP_CONF_DIR}/core-site.xml
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hdfs-site.xml ${HADOOP_CONF_DIR}/hdfs-site.xml
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/mapred-site.xml ${HADOOP_CONF_DIR}/mapred-site.xml
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-env.sh ${HADOOP_CONF_DIR}/hadoop-env.sh
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-policy.xml ${HADOOP_CONF_DIR}/hadoop-policy.xml
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/commons-logging.properties ${HADOOP_CONF_DIR}/commons-logging.properties
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/mapred-queue-acls.xml ${HADOOP_CONF_DIR}/mapred-queue-acls.xml
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/taskcontroller.cfg ${HADOOP_CONF_DIR}/taskcontroller.cfg
|
||||
|
||||
#set the owner of the hadoop dir to root
|
||||
chown root ${HADOOP_PREFIX}
|
||||
chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/hadoop-env.sh
|
||||
chmod 755 ${HADOOP_CONF_DIR}/hadoop-env.sh
|
||||
|
||||
#set taskcontroller
|
||||
chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/taskcontroller.cfg
|
||||
chmod 400 ${HADOOP_CONF_DIR}/taskcontroller.cfg
|
||||
chown root:${HADOOP_GROUP} ${HADOOP_PREFIX}/bin/task-controller
|
||||
chmod 6050 ${HADOOP_PREFIX}/bin/task-controller
|
||||
|
||||
|
||||
#generate the slaves file and include and exclude files for hdfs and mapred
|
||||
echo '' > ${HADOOP_CONF_DIR}/slaves
|
||||
echo '' > ${HADOOP_CONF_DIR}/dfs.include
|
||||
echo '' > ${HADOOP_CONF_DIR}/dfs.exclude
|
||||
echo '' > ${HADOOP_CONF_DIR}/mapred.include
|
||||
echo '' > ${HADOOP_CONF_DIR}/mapred.exclude
|
||||
for dn in $DATANODES
|
||||
do
|
||||
echo $dn >> ${HADOOP_CONF_DIR}/slaves
|
||||
echo $dn >> ${HADOOP_CONF_DIR}/dfs.include
|
||||
done
|
||||
for tt in $TASKTRACKERS
|
||||
do
|
||||
echo $tt >> ${HADOOP_CONF_DIR}/mapred.include
|
||||
done
|
||||
|
||||
echo "Configuration setup is completed."
|
||||
if [[ "$HADOOP_NN_HOST" =~ "`hostname`" ]]; then
|
||||
echo "Proceed to run hadoop-setup-hdfs.sh on namenode."
|
||||
fi
|
||||
else
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/core-site.xml ${HADOOP_CONF_DIR}/core-site.xml
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hdfs-site.xml ${HADOOP_CONF_DIR}/hdfs-site.xml
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/mapred-site.xml ${HADOOP_CONF_DIR}/mapred-site.xml
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-env.sh ${HADOOP_CONF_DIR}/hadoop-env.sh
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-policy.xml ${HADOOP_CONF_DIR}/hadoop-policy.xml
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/commons-logging.properties ${HADOOP_CONF_DIR}/commons-logging.properties
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/mapred-queue-acls.xml ${HADOOP_CONF_DIR}/mapred-queue-acls.xml
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/taskcontroller.cfg ${HADOOP_CONF_DIR}/taskcontroller.cfg
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-metrics2.properties ${HADOOP_CONF_DIR}/hadoop-metrics2.properties
|
||||
if [ ! -e ${HADOOP_CONF_DIR}/capacity-scheduler.xml ]; then
|
||||
template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/capacity-scheduler.xml ${HADOOP_CONF_DIR}/capacity-scheduler.xml
|
||||
fi
|
||||
|
||||
chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/hadoop-env.sh
|
||||
chmod 755 ${HADOOP_CONF_DIR}/hadoop-env.sh
|
||||
#set taskcontroller
|
||||
chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/taskcontroller.cfg
|
||||
chmod 400 ${HADOOP_CONF_DIR}/taskcontroller.cfg
|
||||
chown root:${HADOOP_GROUP} ${HADOOP_PREFIX}/bin/task-controller
|
||||
chmod 6050 ${HADOOP_PREFIX}/bin/task-controller
|
||||
|
||||
#generate the slaves file and include and exclude files for hdfs and mapred
|
||||
echo '' > ${HADOOP_CONF_DIR}/slaves
|
||||
echo '' > ${HADOOP_CONF_DIR}/dfs.include
|
||||
echo '' > ${HADOOP_CONF_DIR}/dfs.exclude
|
||||
echo '' > ${HADOOP_CONF_DIR}/mapred.include
|
||||
echo '' > ${HADOOP_CONF_DIR}/mapred.exclude
|
||||
for dn in $DATANODES
|
||||
do
|
||||
echo $dn >> ${HADOOP_CONF_DIR}/slaves
|
||||
echo $dn >> ${HADOOP_CONF_DIR}/dfs.include
|
||||
done
|
||||
for tt in $TASKTRACKERS
|
||||
do
|
||||
echo $tt >> ${HADOOP_CONF_DIR}/mapred.include
|
||||
done
|
||||
|
||||
echo
|
||||
echo "Configuration file has been generated, please copy:"
|
||||
echo "Configuration file has been generated in:"
|
||||
echo
|
||||
echo "core-site.xml"
|
||||
echo "hdfs-site.xml"
|
||||
echo "mapred-site.xml"
|
||||
echo "hadoop-env.sh"
|
||||
echo "${HADOOP_CONF_DIR}/core-site.xml"
|
||||
echo "${HADOOP_CONF_DIR}/hdfs-site.xml"
|
||||
echo "${HADOOP_CONF_DIR}/mapred-site.xml"
|
||||
echo "${HADOOP_CONF_DIR}/hadoop-env.sh"
|
||||
echo
|
||||
echo " to ${HADOOP_CONF_DIR} on all nodes, and proceed to run hadoop-setup-hdfs.sh on namenode."
|
||||
fi
|
||||
|
|
|
@ -18,37 +18,65 @@
|
|||
bin=`dirname "$0"`
|
||||
bin=`cd "$bin"; pwd`
|
||||
|
||||
if [ "$HADOOP_HOME" != "" ]; then
|
||||
echo "Warning: \$HADOOP_HOME is deprecated."
|
||||
echo
|
||||
fi
|
||||
|
||||
. "$bin"/../libexec/hadoop-config.sh
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
usage: $0 <parameters>
|
||||
Require parameter:
|
||||
-c <clusterid> Set cluster identifier for HDFS
|
||||
|
||||
Optional parameters:
|
||||
-h Display this message
|
||||
--format Force namenode format
|
||||
--group=hadoop Set Hadoop group
|
||||
-h Display this message
|
||||
--hdfs-user=hdfs Set HDFS user
|
||||
--kerberos-realm=KERBEROS.EXAMPLE.COM Set Kerberos realm
|
||||
--hdfs-user-keytab=/home/hdfs/hdfs.keytab Set HDFS user key tab
|
||||
--mapreduce-user=mr Set mapreduce user
|
||||
"
|
||||
exit 1
|
||||
}
|
||||
|
||||
if [ $# != 2 ] ; then
|
||||
OPTS=$(getopt \
|
||||
-n $0 \
|
||||
-o '' \
|
||||
-l 'format' \
|
||||
-l 'hdfs-user:' \
|
||||
-l 'hdfs-user-keytab:' \
|
||||
-l 'mapreduce-user:' \
|
||||
-l 'kerberos-realm:' \
|
||||
-o 'h' \
|
||||
-- "$@")
|
||||
|
||||
if [ $? != 0 ] ; then
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
while getopts "hc:" OPTION
|
||||
do
|
||||
case $OPTION in
|
||||
c)
|
||||
SETUP_CLUSTER=$2; shift 2
|
||||
eval set -- "${OPTS}"
|
||||
while true ; do
|
||||
case "$1" in
|
||||
--format)
|
||||
FORMAT_NAMENODE=1; shift
|
||||
AUTOMATED=1
|
||||
;;
|
||||
h)
|
||||
usage
|
||||
--group)
|
||||
HADOOP_GROUP=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
--hdfs-user)
|
||||
HADOOP_HDFS_USER=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
--mapreduce-user)
|
||||
HADOOP_MR_USER=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
--hdfs-user-keytab)
|
||||
HDFS_KEYTAB=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
--kerberos-realm)
|
||||
KERBEROS_REALM=$2; shift 2
|
||||
AUTOMATED=1
|
||||
;;
|
||||
--)
|
||||
shift ; break
|
||||
|
@ -61,30 +89,56 @@ do
|
|||
esac
|
||||
done
|
||||
|
||||
export HADOOP_PREFIX
|
||||
export HADOOP_CONF_DIR
|
||||
export SETUP_CLUSTER
|
||||
HADOOP_GROUP=${HADOOP_GROUP:-hadoop}
|
||||
HADOOP_HDFS_USER=${HADOOP_HDFS_USER:-hdfs}
|
||||
HADOOP_MAPREDUCE_USER=${HADOOP_MR_USER:-mapred}
|
||||
|
||||
if [ "${KERBEROS_REALM}" != "" ]; then
|
||||
# Determine kerberos location base on Linux distro.
|
||||
if [ -e /etc/lsb-release ]; then
|
||||
KERBEROS_BIN=/usr/bin
|
||||
else
|
||||
KERBEROS_BIN=/usr/kerberos/bin
|
||||
fi
|
||||
kinit_cmd="${KERBEROS_BIN}/kinit -k -t ${HDFS_KEYTAB} ${HADOOP_HDFS_USER}"
|
||||
su -c "${kinit_cmd}" ${HADOOP_HDFS_USER}
|
||||
fi
|
||||
|
||||
# Start namenode and initialize file system structure
|
||||
echo "Setup Hadoop Distributed File System"
|
||||
echo
|
||||
echo "Formatting namenode"
|
||||
echo
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} namenode -format -clusterid ${SETUP_CLUSTER}' hdfs
|
||||
echo
|
||||
|
||||
# Format namenode
|
||||
if [ "${FORMAT_NAMENODE}" == "1" ]; then
|
||||
echo "Formatting namenode"
|
||||
echo
|
||||
su -c "echo Y | ${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} namenode -format" ${HADOOP_HDFS_USER}
|
||||
echo
|
||||
fi
|
||||
|
||||
# Start namenode process
|
||||
echo "Starting namenode process"
|
||||
echo
|
||||
/etc/init.d/hadoop-namenode start
|
||||
if [ -e ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh ]; then
|
||||
DAEMON_PATH=${HADOOP_PREFIX}/sbin
|
||||
else
|
||||
DAEMON_PATH=${HADOOP_PREFIX}/bin
|
||||
fi
|
||||
su -c "${DAEMON_PATH}/hadoop-daemon.sh --config ${HADOOP_CONF_DIR} start namenode" ${HADOOP_HDFS_USER}
|
||||
echo
|
||||
echo "Initialize HDFS file system: "
|
||||
echo
|
||||
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -mkdir /jobtracker' hdfs
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -chown mapred:mapred /jobtracker' hdfs
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -mkdir /user/mapred' hdfs
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -chown mapred:mapred /user/mapred' hdfs
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -mkdir /tmp' hdfs
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -chmod 777 /tmp' hdfs
|
||||
#create the /user dir
|
||||
su -c "${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -mkdir /user" ${HADOOP_HDFS_USER}
|
||||
|
||||
#create /tmp and give it 777
|
||||
su -c "${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -mkdir /tmp" ${HADOOP_HDFS_USER}
|
||||
su -c "${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -chmod 777 /tmp" ${HADOOP_HDFS_USER}
|
||||
|
||||
#create /mapred
|
||||
su -c "${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -mkdir /mapred" ${HADOOP_HDFS_USER}
|
||||
su -c "${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -chmod 700 /mapred" ${HADOOP_HDFS_USER}
|
||||
su -c "${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -chown ${HADOOP_MAPREDUCE_USER}:system /mapred" ${HADOOP_HDFS_USER}
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "Completed."
|
||||
|
|
|
@ -17,16 +17,16 @@
|
|||
|
||||
# Script for setup HDFS file system for single node deployment
|
||||
|
||||
bin=`which $0`
|
||||
bin=`dirname ${bin}`
|
||||
bin=`dirname "$0"`
|
||||
bin=`cd "$bin"; pwd`
|
||||
|
||||
export HADOOP_PREFIX=${bin}/..
|
||||
|
||||
if [ -e /etc/hadoop/hadoop-env.sh ]; then
|
||||
. /etc/hadoop/hadoop-env.sh
|
||||
if [ "$HADOOP_HOME" != "" ]; then
|
||||
echo "Warning: \$HADOOP_HOME is deprecated."
|
||||
echo
|
||||
fi
|
||||
|
||||
. "$bin"/../libexec/hadoop-config.sh
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
usage: $0 <parameters>
|
||||
|
@ -38,7 +38,19 @@ usage: $0 <parameters>
|
|||
exit 1
|
||||
}
|
||||
|
||||
# Parse script parameters
|
||||
template_generator() {
|
||||
REGEX='(\$\{[a-zA-Z_][a-zA-Z_0-9]*\})'
|
||||
cat $1 |
|
||||
while read line ; do
|
||||
while [[ "$line" =~ $REGEX ]] ; do
|
||||
LHS=${BASH_REMATCH[1]}
|
||||
RHS="$(eval echo "\"$LHS\"")"
|
||||
line=${line//$LHS/$RHS}
|
||||
done
|
||||
echo $line >> $2
|
||||
done
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n $0 \
|
||||
-o '' \
|
||||
|
@ -49,6 +61,10 @@ if [ $? != 0 ] ; then
|
|||
usage
|
||||
fi
|
||||
|
||||
if [ -e /etc/hadoop/hadoop-env.sh ]; then
|
||||
. /etc/hadoop/hadoop-env.sh
|
||||
fi
|
||||
|
||||
eval set -- "${OPTS}"
|
||||
while true ; do
|
||||
case "$1" in
|
||||
|
@ -69,7 +85,6 @@ while true ; do
|
|||
esac
|
||||
done
|
||||
|
||||
# Interactive setup wizard
|
||||
if [ "${AUTOMATED}" != "1" ]; then
|
||||
echo "Welcome to Hadoop single node setup wizard"
|
||||
echo
|
||||
|
@ -119,68 +134,59 @@ SET_REBOOT=${SET_REBOOT:-y}
|
|||
/etc/init.d/hadoop-jobtracker stop 2>/dev/null >/dev/null
|
||||
/etc/init.d/hadoop-tasktracker stop 2>/dev/null >/dev/null
|
||||
|
||||
# Default settings
|
||||
JAVA_HOME=${JAVA_HOME:-/usr/java/default}
|
||||
HADOOP_NN_HOST=${HADOOP_NN_HOST:-hdfs://localhost:9000/}
|
||||
HADOOP_NN_DIR=${HADOOP_NN_DIR:-/var/lib/hadoop/hdfs/namenode}
|
||||
HADOOP_DN_DIR=${HADOOP_DN_DIR:-/var/lib/hadoop/hdfs/datanode}
|
||||
HADOOP_JT_HOST=${HADOOP_JT_HOST:-localhost:9001}
|
||||
HADOOP_HDFS_DIR=${HADOOP_MAPRED_DIR:-/var/lib/hadoop/hdfs}
|
||||
HADOOP_MAPRED_DIR=${HADOOP_MAPRED_DIR:-/var/lib/hadoop/mapred}
|
||||
HADOOP_LOG_DIR="/var/log/hadoop"
|
||||
HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop}
|
||||
HADOOP_REPLICATION=${HADOOP_RELICATION:-1}
|
||||
HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler}
|
||||
|
||||
# Setup config files
|
||||
if [ "${SET_CONFIG}" == "y" ]; then
|
||||
JAVA_HOME=${JAVA_HOME:-/usr/java/default}
|
||||
HADOOP_NN_HOST=${HADOOP_NN_HOST:-localhost}
|
||||
HADOOP_NN_DIR=${HADOOP_NN_DIR:-/var/lib/hadoop/hdfs/namenode}
|
||||
HADOOP_DN_DIR=${HADOOP_DN_DIR:-/var/lib/hadoop/hdfs/datanode}
|
||||
HADOOP_JT_HOST=${HADOOP_JT_HOST:-localhost}
|
||||
HADOOP_HDFS_DIR=${HADOOP_MAPRED_DIR:-/var/lib/hadoop/hdfs}
|
||||
HADOOP_MAPRED_DIR=${HADOOP_MAPRED_DIR:-/var/lib/hadoop/mapred}
|
||||
HADOOP_PID_DIR=${HADOOP_PID_DIR:-/var/run/hadoop}
|
||||
HADOOP_LOG_DIR="/var/log/hadoop"
|
||||
HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop}
|
||||
HADOOP_REPLICATION=${HADOOP_RELICATION:-1}
|
||||
${HADOOP_PREFIX}/sbin/hadoop-setup-conf.sh --auto \
|
||||
--hdfs-user=hdfs \
|
||||
--mapreduce-user=mapred \
|
||||
--conf-dir=${HADOOP_CONF_DIR} \
|
||||
--datanode-dir=${HADOOP_DN_DIR} \
|
||||
--hdfs-dir=${HADOOP_HDFS_DIR} \
|
||||
--jobtracker-url=${HADOOP_JT_HOST} \
|
||||
--jobtracker-host=${HADOOP_JT_HOST} \
|
||||
--log-dir=${HADOOP_LOG_DIR} \
|
||||
--pid-dir=${HADOOP_PID_DIR} \
|
||||
--mapred-dir=${HADOOP_MAPRED_DIR} \
|
||||
--namenode-dir=${HADOOP_NN_DIR} \
|
||||
--namenode-url=${HADOOP_NN_HOST} \
|
||||
--namenode-host=${HADOOP_NN_HOST} \
|
||||
--replication=${HADOOP_REPLICATION}
|
||||
fi
|
||||
|
||||
export HADOOP_CONF_DIR
|
||||
|
||||
# Format namenode
|
||||
if [ ! -e ${HADOOP_NN_DIR} ]; then
|
||||
rm -rf ${HADOOP_HDFS_DIR} 2>/dev/null >/dev/null
|
||||
mkdir -p ${HADOOP_HDFS_DIR}
|
||||
chmod 755 ${HADOOP_HDFS_DIR}
|
||||
chown hdfs:hadoop ${HADOOP_HDFS_DIR}
|
||||
su -c '${HADOOP_PREFIX}/bin/hdfs --config ${HADOOP_CONF_DIR} namenode -format -clusterid hadoop' hdfs
|
||||
/etc/init.d/hadoop-namenode format
|
||||
elif [ "${SET_FORMAT}" == "y" ]; then
|
||||
rm -rf ${HADOOP_HDFS_DIR} 2>/dev/null >/dev/null
|
||||
mkdir -p ${HADOOP_HDFS_DIR}
|
||||
chmod 755 ${HADOOP_HDFS_DIR}
|
||||
chown hdfs:hadoop ${HADOOP_HDFS_DIR}
|
||||
rm -rf /var/lib/hadoop/hdfs/namenode
|
||||
su -c '${HADOOP_PREFIX}/bin/hdfs --config ${HADOOP_CONF_DIR} namenode -format -clusterid hadoop' hdfs
|
||||
rm -rf ${HADOOP_NN_DIR}
|
||||
/etc/init.d/hadoop-namenode format
|
||||
fi
|
||||
|
||||
# Start hdfs service
|
||||
/etc/init.d/hadoop-namenode start
|
||||
/etc/init.d/hadoop-datanode start
|
||||
|
||||
# Initialize file system structure
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -mkdir /user/mapred' hdfs
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -chown mapred:mapred /user/mapred' hdfs
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -mkdir /tmp' hdfs
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -chmod 777 /tmp' hdfs
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -mkdir /jobtracker' hdfs
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -chown mapred:mapred /jobtracker' hdfs
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -mkdir /user/mapred' hdfs
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -chown mapred:mapred /user/mapred' hdfs
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -mkdir /tmp' hdfs
|
||||
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -chmod 777 /tmp' hdfs
|
||||
|
||||
# Start mapreduce service
|
||||
/etc/init.d/hadoop-jobtracker start
|
||||
/etc/init.d/hadoop-tasktracker start
|
||||
|
||||
# Toggle service startup on reboot
|
||||
if [ "${SET_REBOOT}" == "y" ]; then
|
||||
if [ -e /etc/debian_version ]; then
|
||||
ln -sf ../init.d/hadoop-namenode /etc/rc2.d/S90hadoop-namenode
|
||||
|
@ -203,7 +209,6 @@ if [ "${SET_REBOOT}" == "y" ]; then
|
|||
fi
|
||||
fi
|
||||
|
||||
# Shutdown service, if user choose to stop services after setup
|
||||
if [ "${STARTUP}" != "y" ]; then
|
||||
/etc/init.d/hadoop-namenode stop
|
||||
/etc/init.d/hadoop-datanode stop
|
||||
|
|
|
@ -27,10 +27,15 @@ source /etc/default/hadoop-env.sh
|
|||
RETVAL=0
|
||||
PIDFILE="${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid"
|
||||
desc="Hadoop datanode daemon"
|
||||
HADOOP_PREFIX="/usr"
|
||||
|
||||
start() {
|
||||
echo -n $"Starting $desc (hadoop-datanode): "
|
||||
daemon --user hdfs ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" start datanode
|
||||
if [ -n "$HADOOP_SECURE_DN_USER" ]; then
|
||||
daemon ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" start datanode
|
||||
else
|
||||
daemon --user hdfs ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" start datanode
|
||||
fi
|
||||
RETVAL=$?
|
||||
echo
|
||||
[ $RETVAL -eq 0 ] && touch /var/lock/subsys/hadoop-datanode
|
||||
|
@ -39,7 +44,11 @@ start() {
|
|||
|
||||
stop() {
|
||||
echo -n $"Stopping $desc (hadoop-datanode): "
|
||||
daemon --user hdfs ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" stop datanode
|
||||
if [ -n "$HADOOP_SECURE_DN_USER" ]; then
|
||||
daemon ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" stop datanode
|
||||
else
|
||||
daemon --user hdfs ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" stop datanode
|
||||
fi
|
||||
RETVAL=$?
|
||||
sleep 5
|
||||
echo
|
||||
|
|
|
@ -27,6 +27,7 @@ source /etc/default/hadoop-env.sh
|
|||
RETVAL=0
|
||||
PIDFILE="${HADOOP_PID_DIR}/hadoop-mapred-jobtracker.pid"
|
||||
desc="Hadoop jobtracker daemon"
|
||||
export HADOOP_PREFIX="/usr"
|
||||
|
||||
start() {
|
||||
echo -n $"Starting $desc (hadoop-jobtracker): "
|
||||
|
|
|
@ -27,6 +27,7 @@ source /etc/default/hadoop-env.sh
|
|||
RETVAL=0
|
||||
PIDFILE="${HADOOP_PID_DIR}/hadoop-hdfs-namenode.pid"
|
||||
desc="Hadoop namenode daemon"
|
||||
export HADOOP_PREFIX="/usr"
|
||||
|
||||
start() {
|
||||
echo -n $"Starting $desc (hadoop-namenode): "
|
||||
|
|
|
@ -27,6 +27,7 @@ source /etc/default/hadoop-env.sh
|
|||
RETVAL=0
|
||||
PIDFILE="${HADOOP_PID_DIR}/hadoop-mapred-tasktracker.pid"
|
||||
desc="Hadoop tasktracker daemon"
|
||||
export HADOOP_PREFIX="/usr"
|
||||
|
||||
start() {
|
||||
echo -n $"Starting $desc (hadoop-tasktracker): "
|
||||
|
|
|
@ -0,0 +1,178 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<!-- This is the configuration file for the resource manager in Hadoop. -->
|
||||
<!-- You can configure various scheduling parameters related to queues. -->
|
||||
<!-- The properties for a queue follow a naming convention,such as, -->
|
||||
<!-- mapred.capacity-scheduler.queue.<queue-name>.property-name. -->
|
||||
|
||||
<configuration>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.maximum-system-jobs</name>
|
||||
<value>3000</value>
|
||||
<description>Maximum number of jobs in the system which can be initialized,
|
||||
concurrently, by the CapacityScheduler.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.queue.default.capacity</name>
|
||||
<value>100</value>
|
||||
<description>Percentage of the number of slots in the cluster that are
|
||||
to be available for jobs in this queue.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.queue.default.maximum-capacity</name>
|
||||
<value>-1</value>
|
||||
<description>
|
||||
maximum-capacity defines a limit beyond which a queue cannot use the capacity of the cluster.
|
||||
This provides a means to limit how much excess capacity a queue can use. By default, there is no limit.
|
||||
The maximum-capacity of a queue can only be greater than or equal to its minimum capacity.
|
||||
Default value of -1 implies a queue can use complete capacity of the cluster.
|
||||
|
||||
This property could be to curtail certain jobs which are long running in nature from occupying more than a
|
||||
certain percentage of the cluster, which in the absence of pre-emption, could lead to capacity guarantees of
|
||||
other queues being affected.
|
||||
|
||||
One important thing to note is that maximum-capacity is a percentage , so based on the cluster's capacity
|
||||
the max capacity would change. So if large no of nodes or racks get added to the cluster , max Capacity in
|
||||
absolute terms would increase accordingly.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.queue.default.supports-priority</name>
|
||||
<value>false</value>
|
||||
<description>If true, priorities of jobs will be taken into
|
||||
account in scheduling decisions.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.queue.default.minimum-user-limit-percent</name>
|
||||
<value>100</value>
|
||||
<description> Each queue enforces a limit on the percentage of resources
|
||||
allocated to a user at any given time, if there is competition for them.
|
||||
This user limit can vary between a minimum and maximum value. The former
|
||||
depends on the number of users who have submitted jobs, and the latter is
|
||||
set to this property value. For example, suppose the value of this
|
||||
property is 25. If two users have submitted jobs to a queue, no single
|
||||
user can use more than 50% of the queue resources. If a third user submits
|
||||
a job, no single user can use more than 33% of the queue resources. With 4
|
||||
or more users, no user can use more than 25% of the queue's resources. A
|
||||
value of 100 implies no user limits are imposed.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.queue.default.user-limit-factor</name>
|
||||
<value>1</value>
|
||||
<description>The multiple of the queue capacity which can be configured to
|
||||
allow a single user to acquire more slots.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.queue.default.maximum-initialized-active-tasks</name>
|
||||
<value>200000</value>
|
||||
<description>The maximum number of tasks, across all jobs in the queue,
|
||||
which can be initialized concurrently. Once the queue's jobs exceed this
|
||||
limit they will be queued on disk.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.queue.default.maximum-initialized-active-tasks-per-user</name>
|
||||
<value>100000</value>
|
||||
<description>The maximum number of tasks per-user, across all the of the
|
||||
user's jobs in the queue, which can be initialized concurrently. Once the
|
||||
user's jobs exceed this limit they will be queued on disk.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.queue.default.init-accept-jobs-factor</name>
|
||||
<value>10</value>
|
||||
<description>The multipe of (maximum-system-jobs * queue-capacity) used to
|
||||
determine the number of jobs which are accepted by the scheduler.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<!-- The default configuration settings for the capacity task scheduler -->
|
||||
<!-- The default values would be applied to all the queues which don't have -->
|
||||
<!-- the appropriate property for the particular queue -->
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.default-supports-priority</name>
|
||||
<value>false</value>
|
||||
<description>If true, priorities of jobs will be taken into
|
||||
account in scheduling decisions by default in a job queue.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.default-minimum-user-limit-percent</name>
|
||||
<value>100</value>
|
||||
<description>The percentage of the resources limited to a particular user
|
||||
for the job queue at any given point of time by default.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.default-user-limit-factor</name>
|
||||
<value>1</value>
|
||||
<description>The default multiple of queue-capacity which is used to
|
||||
determine the amount of slots a single user can consume concurrently.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.default-maximum-active-tasks-per-queue</name>
|
||||
<value>200000</value>
|
||||
<description>The default maximum number of tasks, across all jobs in the
|
||||
queue, which can be initialized concurrently. Once the queue's jobs exceed
|
||||
this limit they will be queued on disk.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.default-maximum-active-tasks-per-user</name>
|
||||
<value>100000</value>
|
||||
<description>The default maximum number of tasks per-user, across all the of
|
||||
the user's jobs in the queue, which can be initialized concurrently. Once
|
||||
the user's jobs exceed this limit they will be queued on disk.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.default-init-accept-jobs-factor</name>
|
||||
<value>10</value>
|
||||
<description>The default multipe of (maximum-system-jobs * queue-capacity)
|
||||
used to determine the number of jobs which are accepted by the scheduler.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<!-- Capacity scheduler Job Initialization configuration parameters -->
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.init-poll-interval</name>
|
||||
<value>5000</value>
|
||||
<description>The amount of time in miliseconds which is used to poll
|
||||
the job queues for jobs to initialize.
|
||||
</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapred.capacity-scheduler.init-worker-threads</name>
|
||||
<value>5</value>
|
||||
<description>Number of worker threads which would be used by
|
||||
Initialization poller to initialize jobs in a set of queue.
|
||||
If number mentioned in property is equal to number of job queues
|
||||
then a single thread would initialize jobs in a queue. If lesser
|
||||
then a thread would get a set of queues assigned. If the number
|
||||
is greater then number of threads would be equal to number of
|
||||
job queues.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
</configuration>
|
|
@ -0,0 +1,7 @@
|
|||
#Logging Implementation
|
||||
|
||||
#Log4J
|
||||
org.apache.commons.logging.Log=org.apache.commons.logging.impl.Log4JLogger
|
||||
|
||||
#JDK Logger
|
||||
#org.apache.commons.logging.Log=org.apache.commons.logging.impl.Jdk14Logger
|
|
@ -1,27 +1,78 @@
|
|||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!-- Put site-specific property overrides in this file. -->
|
||||
|
||||
<configuration>
|
||||
|
||||
<property>
|
||||
<name>local.realm</name>
|
||||
<value>${KERBEROS_REALM}</value>
|
||||
</property>
|
||||
|
||||
<!-- file system properties -->
|
||||
|
||||
<property>
|
||||
<name>fs.default.name</name>
|
||||
<value>${HADOOP_NN_HOST}</value>
|
||||
<value>hdfs://${HADOOP_NN_HOST}:8020</value>
|
||||
<description>The name of the default file system. Either the
|
||||
literal string "local" or a host:port for NDFS.
|
||||
</description>
|
||||
<final>true</final>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.trash.interval</name>
|
||||
<value>360</value>
|
||||
<description>Number of minutes between trash checkpoints.
|
||||
If zero, the trash feature is disabled.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.security.auth_to_local</name>
|
||||
<value>
|
||||
RULE:[2:$1@$0]([jt]t@.*${KERBEROS_REALM})s/.*/${HADOOP_MR_USER}/
|
||||
RULE:[2:$1@$0]([nd]n@.*${KERBEROS_REALM})s/.*/${HADOOP_HDFS_USER}/
|
||||
RULE:[2:$1@$0](mapred@.*${KERBEROS_REALM})s/.*/${HADOOP_MR_USER}/
|
||||
RULE:[2:$1@$0](hdfs@.*${KERBEROS_REALM})s/.*/${HADOOP_HDFS_USER}/
|
||||
RULE:[2:$1@$0](mapredqa@.*${KERBEROS_REALM})s/.*/${HADOOP_MR_USER}/
|
||||
RULE:[2:$1@$0](hdfsqa@.*${KERBEROS_REALM})s/.*/${HADOOP_HDFS_USER}/
|
||||
DEFAULT
|
||||
</value>
|
||||
<description></description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.security.authentication</name>
|
||||
<value>${SECURITY_TYPE}</value>
|
||||
<description>
|
||||
Set the authentication for the cluster. Valid values are: simple or
|
||||
kerberos.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.security.authorization</name>
|
||||
<value>${SECURITY}</value>
|
||||
<description>
|
||||
Enable authorization for different protocols.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.security.groups.cache.secs</name>
|
||||
<value>14400</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.kerberos.kinit.command</name>
|
||||
<value>${KINIT}</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>hadoop.http.filter.initializers</name>
|
||||
<value>org.apache.hadoop.http.lib.StaticUserWebFilter</value>
|
||||
</property>
|
||||
|
||||
</configuration>
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
# Set Hadoop-specific environment variables here.
|
||||
|
||||
# The only required environment variable is JAVA_HOME. All others are
|
||||
# optional. When running a distributed configuration it is best to
|
||||
# set JAVA_HOME in this file, so that it is correctly defined on
|
||||
# remote nodes.
|
||||
|
||||
# The java implementation to use.
|
||||
export JAVA_HOME=${JAVA_HOME}
|
||||
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
|
||||
|
||||
# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
|
||||
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
|
||||
if [ "$HADOOP_CLASSPATH" ]; then
|
||||
export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:$f
|
||||
else
|
||||
export HADOOP_CLASSPATH=$f
|
||||
fi
|
||||
done
|
||||
|
||||
# The maximum amount of heap to use, in MB. Default is 1000.
|
||||
#export HADOOP_HEAPSIZE=
|
||||
#export HADOOP_NAMENODE_INIT_HEAPSIZE=""
|
||||
|
||||
# Extra Java runtime options. Empty by default.
|
||||
export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true ${HADOOP_OPTS}"
|
||||
|
||||
# Command specific options appended to HADOOP_OPTS when specified
|
||||
export HADOOP_NAMENODE_OPTS="-Dsecurity.audit.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT ${HADOOP_NAMENODE_OPTS}"
|
||||
HADOOP_JOBTRACKER_OPTS="-Dsecurity.audit.logger=INFO,DRFAS -Dmapred.audit.logger=INFO,MRAUDIT -Dmapred.jobsummary.logger=INFO,JSA ${HADOOP_JOBTRACKER_OPTS}"
|
||||
HADOOP_TASKTRACKER_OPTS="-Dsecurity.audit.logger=ERROR,console -Dmapred.audit.logger=ERROR,console ${HADOOP_TASKTRACKER_OPTS}"
|
||||
HADOOP_DATANODE_OPTS="-Dsecurity.audit.logger=ERROR,DRFAS ${HADOOP_DATANODE_OPTS}"
|
||||
|
||||
export HADOOP_SECONDARYNAMENODE_OPTS="-Dsecurity.audit.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT ${HADOOP_SECONDARYNAMENODE_OPTS}"
|
||||
|
||||
# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
|
||||
export HADOOP_CLIENT_OPTS="-Xmx128m ${HADOOP_CLIENT_OPTS}"
|
||||
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData ${HADOOP_JAVA_PLATFORM_OPTS}"
|
||||
|
||||
# On secure datanodes, user to run the datanode as after dropping privileges
|
||||
export HADOOP_SECURE_DN_USER=${HADOOP_HDFS_USER}
|
||||
|
||||
# Where log files are stored. $HADOOP_HOME/logs by default.
|
||||
export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER
|
||||
|
||||
# Where log files are stored in the secure data environment.
|
||||
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
|
||||
|
||||
# The directory where pid files are stored. /tmp by default.
|
||||
export HADOOP_PID_DIR=${HADOOP_PID_DIR}
|
||||
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
|
||||
|
||||
# A string representing this instance of hadoop. $USER by default.
|
||||
export HADOOP_IDENT_STRING=$USER
|
|
@ -0,0 +1,118 @@
|
|||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
|
||||
<!-- Put site-specific property overrides in this file. -->
|
||||
|
||||
<configuration>
|
||||
<property>
|
||||
<name>security.client.protocol.acl</name>
|
||||
<value>*</value>
|
||||
<description>ACL for ClientProtocol, which is used by user code
|
||||
via the DistributedFileSystem.
|
||||
The ACL is a comma-separated list of user and group names. The user and
|
||||
group list is separated by a blank. For e.g. "alice,bob users,wheel".
|
||||
A special value of "*" means all users are allowed.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>security.client.datanode.protocol.acl</name>
|
||||
<value>*</value>
|
||||
<description>ACL for ClientDatanodeProtocol, the client-to-datanode protocol
|
||||
for block recovery.
|
||||
The ACL is a comma-separated list of user and group names. The user and
|
||||
group list is separated by a blank. For e.g. "alice,bob users,wheel".
|
||||
A special value of "*" means all users are allowed.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>security.datanode.protocol.acl</name>
|
||||
<value>*</value>
|
||||
<description>ACL for DatanodeProtocol, which is used by datanodes to
|
||||
communicate with the namenode.
|
||||
The ACL is a comma-separated list of user and group names. The user and
|
||||
group list is separated by a blank. For e.g. "alice,bob users,wheel".
|
||||
A special value of "*" means all users are allowed.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>security.inter.datanode.protocol.acl</name>
|
||||
<value>*</value>
|
||||
<description>ACL for InterDatanodeProtocol, the inter-datanode protocol
|
||||
for updating generation timestamp.
|
||||
The ACL is a comma-separated list of user and group names. The user and
|
||||
group list is separated by a blank. For e.g. "alice,bob users,wheel".
|
||||
A special value of "*" means all users are allowed.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>security.namenode.protocol.acl</name>
|
||||
<value>*</value>
|
||||
<description>ACL for NamenodeProtocol, the protocol used by the secondary
|
||||
namenode to communicate with the namenode.
|
||||
The ACL is a comma-separated list of user and group names. The user and
|
||||
group list is separated by a blank. For e.g. "alice,bob users,wheel".
|
||||
A special value of "*" means all users are allowed.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>security.inter.tracker.protocol.acl</name>
|
||||
<value>*</value>
|
||||
<description>ACL for InterTrackerProtocol, used by the tasktrackers to
|
||||
communicate with the jobtracker.
|
||||
The ACL is a comma-separated list of user and group names. The user and
|
||||
group list is separated by a blank. For e.g. "alice,bob users,wheel".
|
||||
A special value of "*" means all users are allowed.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>security.job.submission.protocol.acl</name>
|
||||
<value>*</value>
|
||||
<description>ACL for JobSubmissionProtocol, used by job clients to
|
||||
communciate with the jobtracker for job submission, querying job status etc.
|
||||
The ACL is a comma-separated list of user and group names. The user and
|
||||
group list is separated by a blank. For e.g. "alice,bob users,wheel".
|
||||
A special value of "*" means all users are allowed.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>security.task.umbilical.protocol.acl</name>
|
||||
<value>*</value>
|
||||
<description>ACL for TaskUmbilicalProtocol, used by the map and reduce
|
||||
tasks to communicate with the parent tasktracker.
|
||||
The ACL is a comma-separated list of user and group names. The user and
|
||||
group list is separated by a blank. For e.g. "alice,bob users,wheel".
|
||||
A special value of "*" means all users are allowed.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>security.admin.operations.protocol.acl</name>
|
||||
<value>${HADOOP_HDFS_USER}</value>
|
||||
<description>ACL for AdminOperationsProtocol. Used for admin commands.
|
||||
The ACL is a comma-separated list of user and group names. The user and
|
||||
group list is separated by a blank. For e.g. "alice,bob users,wheel".
|
||||
A special value of "*" means all users are allowed.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>security.refresh.usertogroups.mappings.protocol.acl</name>
|
||||
<value>${HADOOP_HDFS_USER}</value>
|
||||
<description>ACL for RefreshUserMappingsProtocol. Used to refresh
|
||||
users mappings. The ACL is a comma-separated list of user and
|
||||
group names. The user and group list is separated by a blank. For
|
||||
e.g. "alice,bob users,wheel". A special value of "*" means all
|
||||
users are allowed.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>security.refresh.policy.protocol.acl</name>
|
||||
<value>${HADOOP_HDFS_USER}</value>
|
||||
<description>ACL for RefreshAuthorizationPolicyProtocol, used by the
|
||||
dfsadmin and mradmin commands to refresh the security policy in-effect.
|
||||
The ACL is a comma-separated list of user and group names. The user and
|
||||
group list is separated by a blank. For e.g. "alice,bob users,wheel".
|
||||
A special value of "*" means all users are allowed.</description>
|
||||
</property>
|
||||
|
||||
|
||||
|
||||
</configuration>
|
|
@ -0,0 +1,225 @@
|
|||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
|
||||
<configuration>
|
||||
|
||||
<!-- file system properties -->
|
||||
|
||||
<property>
|
||||
<name>dfs.name.dir</name>
|
||||
<value>${HADOOP_NN_DIR}</value>
|
||||
<description>Determines where on the local filesystem the DFS name node
|
||||
should store the name table. If this is a comma-delimited list
|
||||
of directories then the name table is replicated in all of the
|
||||
directories, for redundancy. </description>
|
||||
<final>true</final>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.data.dir</name>
|
||||
<value>${HADOOP_DN_DIR}</value>
|
||||
<description>Determines where on the local filesystem an DFS data node
|
||||
should store its blocks. If this is a comma-delimited
|
||||
list of directories, then data will be stored in all named
|
||||
directories, typically on different devices.
|
||||
Directories that do not exist are ignored.
|
||||
</description>
|
||||
<final>true</final>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.safemode.threshold.pct</name>
|
||||
<value>1.0f</value>
|
||||
<description>
|
||||
Specifies the percentage of blocks that should satisfy
|
||||
the minimal replication requirement defined by dfs.replication.min.
|
||||
Values less than or equal to 0 mean not to start in safe mode.
|
||||
Values greater than 1 will make safe mode permanent.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.address</name>
|
||||
<value>${HADOOP_DN_ADDR}</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.http.address</name>
|
||||
<value>${HADOOP_DN_HTTP_ADDR}</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.http.address</name>
|
||||
<value>${HADOOP_NN_HOST}:50070</value>
|
||||
<description>The name of the default file system. Either the
|
||||
literal string "local" or a host:port for NDFS.
|
||||
</description>
|
||||
<final>true</final>
|
||||
</property>
|
||||
|
||||
<!-- Permissions configuration -->
|
||||
<property>
|
||||
<name>dfs.umaskmode</name>
|
||||
<value>077</value>
|
||||
<description>
|
||||
The octal umask used when creating files and directories.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.block.access.token.enable</name>
|
||||
<value>${SECURITY}</value>
|
||||
<description>
|
||||
Are access tokens are used as capabilities for accessing datanodes.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.kerberos.principal</name>
|
||||
<value>nn/_HOST@${local.realm}</value>
|
||||
<description>
|
||||
Kerberos principal name for the NameNode
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.secondary.namenode.kerberos.principal</name>
|
||||
<value>nn/_HOST@${local.realm}</value>
|
||||
<description>
|
||||
Kerberos principal name for the secondary NameNode.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.kerberos.https.principal</name>
|
||||
<value>host/_HOST@${local.realm}</value>
|
||||
<description>
|
||||
The Kerberos principal for the host that the NameNode runs on.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.secondary.namenode.kerberos.https.principal</name>
|
||||
<value>host/_HOST@${local.realm}</value>
|
||||
<description>
|
||||
The Kerberos principal for the hostthat the secondary NameNode runs on.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.secondary.https.port</name>
|
||||
<value>50490</value>
|
||||
<description>The https port where secondary-namenode binds</description>
|
||||
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.kerberos.principal</name>
|
||||
<value>dn/_HOST@${local.realm}</value>
|
||||
<description>
|
||||
The Kerberos principal that the DataNode runs as. "_HOST" is replaced by
|
||||
the real host name.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.keytab.file</name>
|
||||
<value>/etc/security/keytabs/nn.service.keytab</value>
|
||||
<description>
|
||||
Combined keytab file containing the namenode service and host principals.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.secondary.namenode.keytab.file</name>
|
||||
<value>/etc/security/keytabs/nn.service.keytab</value>
|
||||
<description>
|
||||
Combined keytab file containing the namenode service and host principals.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.keytab.file</name>
|
||||
<value>/etc/security/keytabs/dn.service.keytab</value>
|
||||
<description>
|
||||
The filename of the keytab file for the DataNode.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.https.port</name>
|
||||
<value>50470</value>
|
||||
<description>The https port where namenode binds</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.https.address</name>
|
||||
<value>${HADOOP_NN_HOST}:50470</value>
|
||||
<description>The https address where namenode binds</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.data.dir.perm</name>
|
||||
<value>700</value>
|
||||
<description>The permissions that should be there on dfs.data.dir
|
||||
directories. The datanode will not come up if the permissions are
|
||||
different on existing dfs.data.dir directories. If the directories
|
||||
don't exist, they will be created with this permission.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.cluster.administrators</name>
|
||||
<value>${HADOOP_HDFS_USER}</value>
|
||||
<description>ACL for who all can view the default servlets in the HDFS</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.permissions.superusergroup</name>
|
||||
<value>${HADOOP_GROUP}</value>
|
||||
<description>The name of the group of super-users.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.http-address</name>
|
||||
<value>${HADOOP_NN_HOST}:50070</value>
|
||||
<description>
|
||||
The address and the base port where the dfs namenode web ui will listen on.
|
||||
If the port is 0 then the server will start on a free port.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.https-address</name>
|
||||
<value>${HADOOP_NN_HOST}:50470</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.secondary.http.address</name>
|
||||
<value>${HADOOP_SNN_HOST}:50090</value>
|
||||
<description>
|
||||
The secondary namenode http server address and port.
|
||||
If the port is 0 then the server will start on a free port.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.hosts</name>
|
||||
<value>${HADOOP_CONF_DIR}/dfs.include</value>
|
||||
<description>Names a file that contains a list of hosts that are
|
||||
permitted to connect to the namenode. The full pathname of the file
|
||||
must be specified. If the value is empty, all hosts are
|
||||
permitted.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.hosts.exclude</name>
|
||||
<value>${HADOOP_CONF_DIR}/dfs.exclude</value>
|
||||
<description>Names a file that contains a list of hosts that are
|
||||
not permitted to connect to the namenode. The full pathname of the
|
||||
file must be specified. If the value is empty, no hosts are
|
||||
excluded.
|
||||
</description>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,12 @@
|
|||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapred.queue.default.acl-submit-job</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapred.queue.default.acl-administer-jobs</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,268 @@
|
|||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
|
||||
<!-- Put site-specific property overrides in this file. -->
|
||||
|
||||
<configuration>
|
||||
|
||||
<property>
|
||||
<name>mapred.tasktracker.tasks.sleeptime-before-sigkill</name>
|
||||
<value>250</value>
|
||||
<description>Normally, this is the amount of time before killing
|
||||
processes, and the recommended-default is 5.000 seconds - a value of
|
||||
5000 here. In this case, we are using it solely to blast tasks before
|
||||
killing them, and killing them very quickly (1/4 second) to guarantee
|
||||
that we do not leave VMs around for later jobs.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.system.dir</name>
|
||||
<value>/mapred/mapredsystem</value>
|
||||
<final>true</final>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.job.tracker</name>
|
||||
<value>${HADOOP_JT_HOST}:9000</value>
|
||||
<final>true</final>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.job.tracker.http.address</name>
|
||||
<value>${HADOOP_JT_HOST}:50030</value>
|
||||
<final>true</final>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.local.dir</name>
|
||||
<value>${HADOOP_MAPRED_DIR}</value>
|
||||
<final>true</final>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapreduce.cluster.administrators</name>
|
||||
<value>${HADOOP_MR_USER}</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.map.tasks.speculative.execution</name>
|
||||
<value>false</value>
|
||||
<description>If true, then multiple instances of some map tasks
|
||||
may be executed in parallel.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.reduce.tasks.speculative.execution</name>
|
||||
<value>false</value>
|
||||
<description>If true, then multiple instances of some reduce tasks
|
||||
may be executed in parallel.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.output.compression.type</name>
|
||||
<value>BLOCK</value>
|
||||
<description>If the job outputs are to compressed as SequenceFiles, how
|
||||
should they be compressed? Should be one of NONE, RECORD or BLOCK.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>jetty.connector</name>
|
||||
<value>org.mortbay.jetty.nio.SelectChannelConnector</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.task.tracker.task-controller</name>
|
||||
<value>${TASK_CONTROLLER}</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.child.root.logger</name>
|
||||
<value>INFO,TLA</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>stream.tmpdir</name>
|
||||
<value>${mapred.temp.dir}</value>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>mapred.child.java.opts</name>
|
||||
<value>-server -Xmx640m -Djava.net.preferIPv4Stack=true</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.child.ulimit</name>
|
||||
<value>8388608</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.job.tracker.persist.jobstatus.active</name>
|
||||
<value>true</value>
|
||||
<description>Indicates if persistency of job status information is
|
||||
active or not.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.job.tracker.persist.jobstatus.dir</name>
|
||||
<value>file:///${HADOOP_LOG_DIR}/${HADOOP_MR_USER}/jobstatus</value>
|
||||
<description>The directory where the job status information is persisted
|
||||
in a file system to be available after it drops of the memory queue and
|
||||
between jobtracker restarts.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.job.tracker.history.completed.location</name>
|
||||
<value>/mapred/history/done</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.heartbeats.in.second</name>
|
||||
<value>200</value>
|
||||
<description>to enable HADOOP:5784</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapreduce.tasktracker.outofband.heartbeat</name>
|
||||
<value>true</value>
|
||||
<description>to enable MAPREDUCE:270</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.jobtracker.maxtasks.per.job</name>
|
||||
<value>200000</value>
|
||||
<final>true</final>
|
||||
<description>The maximum number of tasks for a single job.
|
||||
A value of -1 indicates that there is no maximum.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapreduce.jobtracker.kerberos.principal</name>
|
||||
<value>jt/_HOST@${local.realm}</value>
|
||||
<description>
|
||||
JT principal
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapreduce.tasktracker.kerberos.principal</name>
|
||||
<value>tt/_HOST@${local.realm}</value>
|
||||
<description>
|
||||
TT principal.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>hadoop.job.history.user.location</name>
|
||||
<value>none</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapreduce.jobtracker.keytab.file</name>
|
||||
<value>/etc/security/keytabs/jt.service.keytab</value>
|
||||
<description>
|
||||
The keytab for the jobtracker principal.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapreduce.tasktracker.keytab.file</name>
|
||||
<value>/etc/security/keytabs/tt.service.keytab</value>
|
||||
<description>The filename of the keytab for the task tracker</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapreduce.jobtracker.staging.root.dir</name>
|
||||
<value>/user</value>
|
||||
<description>The Path prefix for where the staging directories should be
|
||||
placed. The next level is always the user's
|
||||
name. It is a path in the default file system.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>mapreduce.job.acl-modify-job</name>
|
||||
<value></value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapreduce.job.acl-view-job</name>
|
||||
<value>Dr.Who</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapreduce.tasktracker.group</name>
|
||||
<value>${HADOOP_GROUP}</value>
|
||||
<description>The group that the task controller uses for accessing the
|
||||
task controller. The mapred user must be a member and users should *not*
|
||||
be members.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.acls.enabled</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.jobtracker.taskScheduler</name>
|
||||
<value>org.apache.hadoop.mapred.CapacityTaskScheduler</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapred.queue.names</name>
|
||||
<value>default</value>
|
||||
</property>
|
||||
|
||||
<!-- settings for the history server -->
|
||||
<property>
|
||||
<name>mapreduce.history.server.embedded</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.history.server.http.address</name>
|
||||
<value>${HADOOP_JT_HOST}:51111</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.jobhistory.kerberos.principal</name>
|
||||
<value>jt/_HOST@${local.realm}</value>
|
||||
<description>history server principal</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.jobhistory.keytab.file</name>
|
||||
<value>/etc/security/keytabs/jt.service.keytab</value>
|
||||
<description>
|
||||
The keytab for the jobtracker principal.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.hosts</name>
|
||||
<value>${HADOOP_CONF_DIR}/mapred.include</value>
|
||||
<description>Names a file that contains the list of nodes that may
|
||||
connect to the jobtracker. If the value is empty, all hosts are
|
||||
permitted.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapred.hosts.exclude</name>
|
||||
<value>${HADOOP_CONF_DIR}/mapred.exclude</value>
|
||||
<description>Names a file that contains the list of hosts that
|
||||
should be excluded by the jobtracker. If the value is empty, no
|
||||
hosts are excluded.</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapred.jobtracker.retirejob.check</name>
|
||||
<value>10000</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapred.jobtracker.retirejob.interval</name>
|
||||
<value>0</value>
|
||||
</property>
|
||||
</configuration>
|
|
@ -0,0 +1,3 @@
|
|||
mapreduce.cluster.local.dir=${HADOOP_MAPRED_DIR}
|
||||
mapreduce.tasktracker.group=${HADOOP_GROUP}
|
||||
hadoop.log.dir=${HADOOP_LOG_DIR}/${HADOOP_MR_USER}
|
Loading…
Reference in New Issue