HADOOP-6255. Create RPM and Debian packages for common. Changes deployment

layout to be consistent across the binary tgz, rpm, and deb. Adds setup
scripts for easy one node cluster configuration and user creation.
(Eric Yang via omalley)


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1128385 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Owen O'Malley 2011-05-27 16:35:02 +00:00
parent 7df9a15ea6
commit dec6fcdb00
54 changed files with 2593 additions and 157 deletions

View File

@ -2402,6 +2402,15 @@ Release 0.20.3 - Unreleased
HADOOP-7072. Remove java5 dependencies from build. (cos)
Release 0.20.204.0 - Unreleased
NEW FEATURES
HADOOP-6255. Create RPM and Debian packages for common. Changes deployment
layout to be consistent across the binary tgz, rpm, and deb. Adds setup
scripts for easy one node cluster configuration and user creation.
(Eric Yang via omalley)
Release 0.20.203.0 - 2011-5-11
BUG FIXES

View File

@ -17,10 +17,11 @@
# This script runs the hadoop core commands.
bin=`dirname "$0"`
bin=`which $0`
bin=`dirname ${bin}`
bin=`cd "$bin"; pwd`
. "$bin"/hadoop-config.sh
. "$bin"/../libexec/hadoop-config.sh
function print_usage(){
echo "Usage: hadoop [--config confdir] COMMAND"
@ -54,8 +55,8 @@ case $COMMAND in
#try to locate hdfs and if present, delegate to it.
if [ -f "${HADOOP_HDFS_HOME}"/bin/hdfs ]; then
exec "${HADOOP_HDFS_HOME}"/bin/hdfs $*
elif [ -f "${HADOOP_HOME}"/bin/hdfs ]; then
exec "${HADOOP_HOME}"/bin/hdfs $*
elif [ -f "${HADOOP_PREFIX}"/bin/hdfs ]; then
exec "${HADOOP_PREFIX}"/bin/hdfs $*
else
echo "HDFS not found."
exit
@ -70,8 +71,8 @@ case $COMMAND in
#try to locate mapred and if present, delegate to it.
if [ -f "${HADOOP_MAPRED_HOME}"/bin/mapred ]; then
exec "${HADOOP_MAPRED_HOME}"/bin/mapred $*
elif [ -f "${HADOOP_HOME}"/bin/mapred ]; then
exec "${HADOOP_HOME}"/bin/mapred $*
elif [ -f "${HADOOP_PREFIX}"/bin/mapred ]; then
exec "${HADOOP_PREFIX}"/bin/mapred $*
else
echo "MAPRED not found."
exit

View File

@ -26,11 +26,8 @@ script="$(basename -- "$this")"
this="$common_bin/$script"
# the root of the Hadoop installation
#TODO: change the env variable when dir structure is changed
export HADOOP_HOME=`dirname "$this"`/..
export HADOOP_COMMON_HOME="${HADOOP_HOME}"
#export HADOOP_HOME=`dirname "$this"`/../..
#export HADOOP_COMMON_HOME="${HADOOP_COMMON_HOME:-`dirname "$this"`/..}"
# See HADOOP-6255 for directory structure layout
export HADOOP_PREFIX=`dirname "$this"`/..
#check to see if the conf dir is given as an optional argument
if [ $# -gt 1 ]
@ -45,7 +42,13 @@ then
fi
# Allow alternate conf dir location.
export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_HOME/conf}"
if [ -e "${HADOOP_PREFIX}/conf/hadoop-env.sh" ]; then
DEFAULT_CONF_DIR="conf"
else
DEFAULT_CONF_DIR="etc/hadoop"
fi
export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_PREFIX/$DEFAULT_CONF_DIR}"
# User can specify hostnames or a file where the hostnames are (not both)
if [[ ( "$HADOOP_SLAVES" != '' ) && ( "$HADOOP_SLAVE_NAMES" != '' ) ]] ; then
@ -130,54 +133,61 @@ CLASSPATH="${HADOOP_CONF_DIR}"
CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
# for developers, add Hadoop classes to CLASSPATH
if [ -d "$HADOOP_COMMON_HOME/build/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/build/classes
if [ -d "$HADOOP_PREFIX/build/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/build/classes
fi
if [ -d "$HADOOP_COMMON_HOME/build/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/build
if [ -d "$HADOOP_PREFIX/build/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/build
fi
if [ -d "$HADOOP_COMMON_HOME/build/test/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/build/test/classes
if [ -d "$HADOOP_PREFIX/build/test/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/build/test/classes
fi
if [ -d "$HADOOP_COMMON_HOME/build/test/core/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/build/test/core/classes
if [ -d "$HADOOP_PREFIX/build/test/core/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/build/test/core/classes
fi
# so that filenames w/ spaces are handled correctly in loops below
IFS=
# for releases, add core hadoop jar & webapps to CLASSPATH
if [ -d "$HADOOP_COMMON_HOME/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME
if [ -d "$HADOOP_PREFIX/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX
fi
for f in $HADOOP_COMMON_HOME/hadoop-*.jar; do
if [ -d "$HADOOP_PREFIX/share/hadoop/common/lib" ]; then
for f in $HADOOP_PREFIX/share/hadoop/common/lib/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
fi
for f in $HADOOP_PREFIX/share/hadoop/common/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
# add libs to CLASSPATH
for f in $HADOOP_COMMON_HOME/lib/*.jar; do
# for developers, add libs to CLASSPATH
for f in $HADOOP_PREFIX/lib/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
if [ -d "$HADOOP_COMMON_HOME/build/ivy/lib/Hadoop-Common/common" ]; then
for f in $HADOOP_COMMON_HOME/build/ivy/lib/Hadoop-Common/common/*.jar; do
if [ -d "$HADOOP_PREFIX/build/ivy/lib/Hadoop-Common/common" ]; then
for f in $HADOOP_PREFIX/build/ivy/lib/Hadoop-Common/common/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
fi
if [ -d "$HADOOP_COMMON_HOME/build/ivy/lib/Hadoop-Hdfs/common" ]; then
for f in $HADOOP_COMMON_HOME/build/ivy/lib/Hadoop-Hdfs/common/*.jar; do
if [ -d "$HADOOP_PREFIX/build/ivy/lib/hadoop-hdfs/hdfs" ]; then
for f in $HADOOP_PREFIX/build/ivy/lib/hadoop-hdfs/hdfs/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
fi
if [ -d "$HADOOP_COMMON_HOME/build/ivy/lib/Hadoop/common" ]; then
for f in $HADOOP_COMMON_HOME/build/ivy/lib/Hadoop/common/*.jar; do
if [ -d "$HADOOP_PREFIX/build/ivy/lib/Hadoop/mapred" ]; then
for f in $HADOOP_PREFIX/build/ivy/lib/Hadoop/mapred/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
fi
for f in $HADOOP_COMMON_HOME/lib/jsp-2.1/*.jar; do
for f in $HADOOP_PREFIX/lib/jsp-2.1/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
@ -188,7 +198,7 @@ fi
# default log directory & file
if [ "$HADOOP_LOG_DIR" = "" ]; then
HADOOP_LOG_DIR="$HADOOP_HOME/logs"
HADOOP_LOG_DIR="$HADOOP_PREFIX/logs"
fi
if [ "$HADOOP_LOGFILE" = "" ]; then
HADOOP_LOGFILE='hadoop.log'
@ -204,33 +214,37 @@ unset IFS
# cygwin path translation
if $cygwin; then
HADOOP_COMMON_HOME=`cygpath -w "$HADOOP_COMMON_HOME"`
HADOOP_PREFIX=`cygpath -w "$HADOOP_PREFIX"`
HADOOP_LOG_DIR=`cygpath -w "$HADOOP_LOG_DIR"`
JAVA_LIBRARY_PATH=`cygpath -w "$JAVA_LIBRARY_PATH"`
fi
# setup 'java.library.path' for native-hadoop code if necessary
if [ -d "${HADOOP_COMMON_HOME}/build/native" -o -d "${HADOOP_COMMON_HOME}/lib/native" ]; then
if [ -d "${HADOOP_PREFIX}/build/native" -o -d "${HADOOP_PREFIX}/lib/native" ]; then
JAVA_PLATFORM=`CLASSPATH=${CLASSPATH} ${JAVA} -Xmx32m ${HADOOP_JAVA_PLATFORM_OPTS} org.apache.hadoop.util.PlatformName | sed -e "s/ /_/g"`
if [ -d "$HADOOP_COMMON_HOME/build/native" ]; then
if [ -d "$HADOOP_PREFIX/build/native" ]; then
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_COMMON_HOME}/build/native/${JAVA_PLATFORM}/lib
JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_PREFIX}/build/native/${JAVA_PLATFORM}/lib
else
JAVA_LIBRARY_PATH=${HADOOP_COMMON_HOME}/build/native/${JAVA_PLATFORM}/lib
JAVA_LIBRARY_PATH=${HADOOP_PREFIX}/build/native/${JAVA_PLATFORM}/lib
fi
fi
if [ -d "${HADOOP_COMMON_HOME}/lib/native" ]; then
if [ -d "${HADOOP_PREFIX}/lib/native" ]; then
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_COMMON_HOME}/lib/native/${JAVA_PLATFORM}
JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_PREFIX}/lib/native/${JAVA_PLATFORM}
else
JAVA_LIBRARY_PATH=${HADOOP_COMMON_HOME}/lib/native/${JAVA_PLATFORM}
JAVA_LIBRARY_PATH=${HADOOP_PREFIX}/lib/native/${JAVA_PLATFORM}
fi
fi
fi
if [ -e "${HADOOP_PREFIX}/lib/libhadoop.a" ]; then
JAVA_LIBRARY_PATH=${HADOOP_PREFIX}/lib
fi
# cygwin path translation
if $cygwin; then
JAVA_LIBRARY_PATH=`cygpath -p "$JAVA_LIBRARY_PATH"`
@ -238,7 +252,7 @@ fi
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_LOGFILE"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.home.dir=$HADOOP_COMMON_HOME"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.home.dir=$HADOOP_PREFIX"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_ROOT_LOGGER:-INFO,console}"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,console}"
@ -252,8 +266,8 @@ HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
# put hdfs in classpath if present
if [ "$HADOOP_HDFS_HOME" = "" ]; then
if [ -d "${HADOOP_HOME}/hdfs" ]; then
HADOOP_HDFS_HOME=$HADOOP_HOME/hdfs
if [ -d "${HADOOP_PREFIX}/share/hadoop/hdfs" ]; then
HADOOP_HDFS_HOME=$HADOOP_PREFIX/share/hadoop/hdfs
#echo Found HDFS installed at $HADOOP_HDFS_HOME
fi
fi
@ -273,9 +287,11 @@ if [ -d "${HADOOP_HDFS_HOME}" ]; then
done
# add libs to CLASSPATH
for f in $HADOOP_HDFS_HOME/lib/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
if [ -d "${HADOOP_HDFS_HOME}/lib" ]; then
for f in $HADOOP_HDFS_HOME/lib/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
fi
if [ -d "$HADOOP_HDFS_HOME/build/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/build/classes
@ -289,9 +305,8 @@ fi
# set mapred home if mapred is present
if [ "$HADOOP_MAPRED_HOME" = "" ]; then
if [ -d "${HADOOP_HOME}/mapred" ]; then
HADOOP_MAPRED_HOME=$HADOOP_HOME/mapred
#echo Found MAPRED installed at $HADOOP_MAPRED_HOME
if [ -d "${HADOOP_PREFIX}/share/hadoop/mapreduce" ]; then
HADOOP_MAPRED_HOME=$HADOOP_PREFIX/share/hadoop/mapreduce
fi
fi
@ -305,13 +320,15 @@ if [ -d "${HADOOP_MAPRED_HOME}" ]; then
CLASSPATH=${CLASSPATH}:${HADOOP_MAPRED_HOME}/conf
fi
for f in $HADOOP_MAPRED_HOME/hadoop-mapred-*.jar; do
for f in $HADOOP_MAPRED_HOME/hadoop-mapreduce-*.jar; do
CLASSPATH=${CLASSPATH}:$f
done
for f in $HADOOP_MAPRED_HOME/lib/*.jar; do
CLASSPATH=${CLASSPATH}:$f
done
if [ -d "${HADOOP_MAPRED_HOME}/lib" ]; then
for f in $HADOOP_MAPRED_HOME/lib/*.jar; do
CLASSPATH=${CLASSPATH}:$f
done
fi
if [ -d "$HADOOP_MAPRED_HOME/build/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/classes
@ -321,10 +338,10 @@ if [ -d "${HADOOP_MAPRED_HOME}" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/tools
fi
for f in $HADOOP_MAPRED_HOME/hadoop-mapred-tools-*.jar; do
for f in $HADOOP_MAPRED_HOME/hadoop-mapreduce-tools-*.jar; do
TOOL_PATH=${TOOL_PATH}:$f;
done
for f in $HADOOP_MAPRED_HOME/build/hadoop-mapred-tools-*.jar; do
for f in $HADOOP_MAPRED_HOME/build/hadoop-mapreduce-tools-*.jar; do
TOOL_PATH=${TOOL_PATH}:$f;
done
fi

View File

@ -20,7 +20,7 @@
#
# Environment Variables
#
# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_HOME}/conf.
# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_PREFIX}/conf.
# HADOOP_LOG_DIR Where log files are stored. PWD by default.
# HADOOP_MASTER host:path where hadoop code should be rsync'd from
# HADOOP_PID_DIR The pid files are stored. /tmp by default.
@ -39,12 +39,12 @@ fi
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
. "$bin"/hadoop-config.sh
. "$bin"/../libexec/hadoop-config.sh
# get arguments
#default value
hadoopScript="$HADOOP_HOME"/bin/hadoop
hadoopScript="$HADOOP_PREFIX"/bin/hadoop
if [ "--script" = "$1" ]
then
shift
@ -91,7 +91,7 @@ fi
# get log directory
if [ "$HADOOP_LOG_DIR" = "" ]; then
export HADOOP_LOG_DIR="$HADOOP_HOME/logs"
export HADOOP_LOG_DIR="$HADOOP_PREFIX/logs"
fi
mkdir -p "$HADOOP_LOG_DIR"
chown $HADOOP_IDENT_STRING $HADOOP_LOG_DIR
@ -127,12 +127,12 @@ case $startStop in
if [ "$HADOOP_MASTER" != "" ]; then
echo rsync from $HADOOP_MASTER
rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $HADOOP_MASTER/ "$HADOOP_HOME"
rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $HADOOP_MASTER/ "$HADOOP_PREFIX"
fi
hadoop_rotate_log $log
echo starting $command, logging to $log
cd "$HADOOP_HOME"
cd "$HADOOP_PREFIX"
nohup nice -n $HADOOP_NICENESS $hadoopScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
echo $! > $pid
sleep 1; head "$log"

View File

@ -29,6 +29,6 @@ fi
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
. $bin/hadoop-config.sh
. $bin/../libexec/hadoop-config.sh
exec "$bin/slaves.sh" --config $HADOOP_CONF_DIR cd "$HADOOP_HOME" \; "$bin/hadoop-daemon.sh" --config $HADOOP_CONF_DIR "$@"
exec "$bin/slaves.sh" --config $HADOOP_CONF_DIR cd "$HADOOP_PREFIX" \; "$bin/hadoop-daemon.sh" --config $HADOOP_CONF_DIR "$@"

26
bin/rcc
View File

@ -24,13 +24,13 @@
#
# HADOOP_OPTS Extra Java runtime options.
#
# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_HOME}/conf.
# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_PREFIX}/conf.
#
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
. "$bin"/hadoop-config.sh
. "$bin"/../libexec/hadoop-config.sh
if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
. "${HADOOP_CONF_DIR}/hadoop-env.sh"
@ -55,33 +55,33 @@ CLASSPATH="${HADOOP_CONF_DIR}"
CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
# for developers, add Hadoop classes to CLASSPATH
if [ -d "$HADOOP_HOME/build/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/classes
if [ -d "$HADOOP_PREFIX/build/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/build/classes
fi
if [ -d "$HADOOP_HOME/build/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build
if [ -d "$HADOOP_PREFIX/build/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/build
fi
if [ -d "$HADOOP_HOME/build/test/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/test/classes
if [ -d "$HADOOP_PREFIX/build/test/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/build/test/classes
fi
# so that filenames w/ spaces are handled correctly in loops below
IFS=
# for releases, add core hadoop jar & webapps to CLASSPATH
if [ -d "$HADOOP_HOME/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_HOME
if [ -d "$HADOOP_PREFIX/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX
fi
for f in $HADOOP_HOME/hadoop-*.jar; do
for f in $HADOOP_PREFIX/hadoop-*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
# add libs to CLASSPATH
for f in $HADOOP_HOME/lib/*.jar; do
for f in $HADOOP_PREFIX/lib/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
for f in $HADOOP_HOME/lib/jetty-ext/*.jar; do
for f in $HADOOP_PREFIX/lib/jetty-ext/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done

View File

@ -22,7 +22,7 @@
#
# HADOOP_SLAVES File naming remote hosts.
# Default is ${HADOOP_CONF_DIR}/slaves.
# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_HOME}/conf.
# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_PREFIX}/conf.
# HADOOP_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
# HADOOP_SSH_OPTS Options passed to ssh when running remote commands.
##
@ -38,7 +38,7 @@ fi
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
. "$bin"/hadoop-config.sh
. "$bin"/../libexec/hadoop-config.sh
if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
. "${HADOOP_CONF_DIR}/hadoop-env.sh"

View File

@ -23,7 +23,7 @@ echo "This script is Deprecated. Instead use start-dfs.sh and start-mapred.sh"
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
. "$bin"/hadoop-config.sh
. "$bin"/../libexec/hadoop-config.sh
# start hdfs daemons if hdfs is present
if [ -f "${HADOOP_HDFS_HOME}"/bin/start-dfs.sh ]; then

View File

@ -23,7 +23,7 @@ echo "This script is Deprecated. Instead use stop-dfs.sh and stop-mapred.sh"
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
. "$bin"/hadoop-config.sh
. "$bin"/../libexec/hadoop-config.sh
# stop hdfs daemons if hdfs is present
if [ -f "${HADOOP_HDFS_HOME}"/bin/stop-dfs.sh ]; then

206
build.xml
View File

@ -26,12 +26,15 @@
<property file="${user.home}/build.properties" />
<property file="${basedir}/build.properties" />
<property name="Name" value="Hadoop-common"/>
<property name="name" value="hadoop-common"/>
<property name="version" value="0.23.0-SNAPSHOT"/>
<property name="module" value="common"/>
<property name="Name" value="Hadoop-${module}"/>
<property name="name" value="hadoop-${module}"/>
<property name="_version" value="0.23.0"/>
<property name="version" value="${_version}-SNAPSHOT"/>
<property name="final.name" value="${name}-${version}"/>
<property name="test.final.name" value="${name}-test-${version}"/>
<property name="year" value="2009"/>
<property name="package.release" value="1"/>
<property name="src.dir" value="${basedir}/src"/>
<property name="java.src.dir" value="${src.dir}/java"/>
@ -196,7 +199,18 @@
<equals arg1="${repo}" arg2="staging"/>
</condition>
<!-- the normal classpath -->
<!-- packaging properties -->
<property name="package.prefix" value="/usr"/>
<property name="package.conf.dir" value="/etc/hadoop"/>
<property name="package.log.dir" value="/var/log/hadoop"/>
<property name="package.pid.dir" value="/var/run/hadoop"/>
<property name="package.var.dir" value="/var/lib/hadoop"/>
<property name="package.share.dir" value="/share/hadoop/${module}"/>
<!-- Use fixed path to build rpm for avoiding rpmbuild conflict with dash path names -->
<property name="package.buildroot" value="/tmp/hadoop_package_build_${user.name}"/>
<property name="package.build.dir" value="/tmp/hadoop_package_build_${user.name}/BUILD"/>
<!-- the normal classpath -->
<path id="classpath">
<pathelement location="${build.classes}"/>
<pathelement location="${conf.dir}"/>
@ -1073,6 +1087,7 @@
description="Build distribution">
<mkdir dir="${dist.dir}"/>
<mkdir dir="${dist.dir}/lib"/>
<mkdir dir="${dist.dir}/libexec"/>
<mkdir dir="${dist.dir}/bin"/>
<mkdir dir="${dist.dir}/docs"/>
<mkdir dir="${dist.dir}/docs/api"/>
@ -1122,6 +1137,12 @@
<fileset dir="ivy"/>
</copy>
<copy todir="${dist.dir}/libexec">
<fileset dir="bin">
<include name="hadoop-config.sh"/>
</fileset>
</copy>
<copy todir="${dist.dir}">
<fileset dir=".">
<include name="*.txt" />
@ -1160,17 +1181,24 @@
</macro_tar>
</target>
<target name="bin-package" depends="compile, jar, jar-test"
<target name="bin-package" depends="compile, jar, jar-test, javadoc"
description="assembles artifacts for binary target">
<mkdir dir="${dist.dir}"/>
<mkdir dir="${dist.dir}/lib"/>
<mkdir dir="${dist.dir}/${package.share.dir}/contrib"/>
<mkdir dir="${dist.dir}/${package.share.dir}/templates"/>
<mkdir dir="${dist.dir}/${package.share.dir}/webapps"/>
<mkdir dir="${dist.dir}/bin"/>
<mkdir dir="${dist.dir}/libexec"/>
<mkdir dir="${dist.dir}/sbin"/>
<mkdir dir="${dist.dir}/var/log"/>
<mkdir dir="${dist.dir}/var/run"/>
<copy todir="${dist.dir}/lib" includeEmptyDirs="false" flatten="true">
<copy todir="${dist.dir}/${package.share.dir}/lib" includeEmptyDirs="false" flatten="true">
<fileset dir="${common.ivy.lib.dir}"/>
</copy>
<copy todir="${dist.dir}/lib" includeEmptyDirs="false">
<copy todir="${dist.dir}/${package.share.dir}" includeEmptyDirs="false">
<fileset dir="lib">
<exclude name="**/native/**"/>
</fileset>
@ -1179,38 +1207,57 @@
<exec dir="${dist.dir}" executable="sh" failonerror="true">
<env key="BASE_NATIVE_LIB_DIR" value="${lib.dir}/native"/>
<env key="BUILD_NATIVE_DIR" value="${build.dir}/native"/>
<env key="DIST_LIB_DIR" value="${dist.dir}/lib/native"/>
<env key="DIST_LIB_DIR" value="${dist.dir}/lib"/>
<arg line="${native.src.dir}/packageNativeHadoop.sh"/>
</exec>
<copy todir="${dist.dir}">
<fileset file="${build.dir}/${final.name}.jar"/>
<copy todir="${dist.dir}/${package.share.dir}">
<fileset file="${build.dir}/*.jar"/>
</copy>
<copy todir="${dist.dir}/bin">
<fileset dir="bin"/>
<fileset dir="bin">
<include name="hadoop"/>
</fileset>
</copy>
<copy todir="${dist.dir}/conf">
<copy todir="${dist.dir}/libexec">
<fileset dir="bin">
<include name="hadoop-config.sh"/>
</fileset>
</copy>
<copy todir="${dist.dir}/sbin">
<fileset dir="bin">
<include name="hadoop-daemon.sh"/>
<include name="hadoop-daemons.sh"/>
<include name="slaves.sh"/>
<include name="start-all.sh"/>
<include name="stop-all.sh"/>
</fileset>
<fileset dir="${basedir}/src/packages">
<include name="*.sh" />
</fileset>
</copy>
<copy todir="${dist.dir}/etc/hadoop">
<fileset dir="${conf.dir}" excludes="**/*.template"/>
<fileset dir="${conf.dir}" includes="hadoop-env.sh.template"/>
</copy>
<copy file="ivy.xml" tofile="${dist.dir}/ivy.xml"/>
<copy todir="${dist.dir}/ivy">
<fileset dir="ivy"/>
<copy todir="${dist.dir}/${package.share.dir}/templates">
<fileset dir="${basedir}/src/packages/templates/conf" includes="*"/>
</copy>
<copy todir="${dist.dir}">
<copy todir="${dist.dir}/share/doc/hadoop/${module}">
<fileset dir=".">
<include name="*.txt" />
</fileset>
</copy>
<copy todir="${dist.dir}/" file="build.xml"/>
<chmod perm="ugo+x" type="file" parallel="false">
<fileset dir="${dist.dir}/bin"/>
<fileset dir="${dist.dir}/sbin"/>
</chmod>
</target>
@ -1219,16 +1266,123 @@
<param.listofitems>
<tarfileset dir="${build.dir}" mode="664">
<exclude name="${final.name}/bin/*" />
<exclude name="${final.name}/libexec/*" />
<exclude name="${final.name}/sbin/*" />
<exclude name="${final.name}/src/**" />
<exclude name="${final.name}/docs/**" />
<include name="${final.name}/**" />
</tarfileset>
<tarfileset dir="${build.dir}" mode="755">
<include name="${final.name}/bin/*" />
<include name="${final.name}/libexec/*" />
<include name="${final.name}/sbin/*" />
</tarfileset>
</param.listofitems>
</macro_tar>
</target>
<target name="rpm" depends="binary" description="Make rpm package">
<mkdir dir="${package.buildroot}/BUILD" />
<mkdir dir="${package.buildroot}/RPMS" />
<mkdir dir="${package.buildroot}/SRPMS" />
<mkdir dir="${package.buildroot}/SOURCES" />
<mkdir dir="${package.buildroot}/SPECS" />
<copy todir="${package.buildroot}/SOURCES">
<fileset dir="${build.dir}">
<include name="${final.name}-bin.tar.gz" />
</fileset>
</copy>
<copy file="${src.dir}/packages/rpm/spec/hadoop.spec" todir="${package.buildroot}/SPECS">
<filterchain>
<replacetokens>
<token key="final.name" value="${final.name}" />
<token key="version" value="${_version}" />
<token key="package.release" value="${package.release}" />
<token key="package.build.dir" value="${package.build.dir}" />
<token key="package.prefix" value="${package.prefix}" />
<token key="package.conf.dir" value="${package.conf.dir}" />
<token key="package.log.dir" value="${package.log.dir}" />
<token key="package.pid.dir" value="${package.pid.dir}" />
<token key="package.var.dir" value="${package.var.dir}" />
</replacetokens>
</filterchain>
</copy>
<rpm specFile="hadoop.spec" command="-bb --target ${os.arch}" topDir="${package.buildroot}" cleanBuildDir="true" failOnError="true"/>
<copy todir="${build.dir}/" flatten="true">
<fileset dir="${package.buildroot}/RPMS">
<include name="**/*.rpm" />
</fileset>
</copy>
<delete dir="${package.buildroot}" quiet="true" verbose="false"/>
</target>
<target name="deb" depends="ivy-retrieve-package, binary" description="Make deb package">
<taskdef name="deb"
classname="org.vafer.jdeb.ant.DebAntTask">
<classpath refid="ivy-package.classpath" />
</taskdef>
<mkdir dir="${package.build.dir}/hadoop.control" />
<mkdir dir="${package.buildroot}/${package.prefix}" />
<copy todir="${package.buildroot}/${package.prefix}">
<fileset dir="${build.dir}/${final.name}">
<include name="**" />
</fileset>
</copy>
<copy todir="${package.build.dir}/hadoop.control">
<fileset dir="${src.dir}/packages/deb/hadoop.control">
<exclude name="control" />
</fileset>
</copy>
<copy file="${src.dir}/packages/deb/hadoop.control/control" todir="${package.build.dir}/hadoop.control">
<filterchain>
<replacetokens>
<token key="final.name" value="${final.name}" />
<token key="version" value="${_version}" />
<token key="package.release" value="${package.release}" />
<token key="package.build.dir" value="${package.build.dir}" />
<token key="package.prefix" value="${package.prefix}" />
<token key="package.conf.dir" value="${package.conf.dir}" />
<token key="package.log.dir" value="${package.log.dir}" />
<token key="package.pid.dir" value="${package.pid.dir}" />
</replacetokens>
</filterchain>
</copy>
<deb destfile="${package.buildroot}/${name}_${_version}-${package.release}_${os.arch}.deb" control="${package.build.dir}/hadoop.control">
<tarfileset dir="${build.dir}/${final.name}" filemode="644" prefix="${package.prefix}">
<exclude name="bin" />
<exclude name="etc" />
<exclude name="libexec" />
<exclude name="etc/**" />
<exclude name="sbin" />
<include name="**" />
</tarfileset>
<tarfileset dir="${build.dir}/${final.name}/bin" filemode="755" prefix="${package.prefix}/bin">
<include name="*" />
</tarfileset>
<tarfileset dir="${build.dir}/${final.name}/libexec" filemode="755" prefix="${package.prefix}/libexec">
<include name="*" />
</tarfileset>
<tarfileset dir="${build.dir}/${final.name}/sbin" filemode="755" prefix="${package.prefix}/sbin">
<include name="*" />
</tarfileset>
<tarfileset dir="${src.dir}/packages" filemode="755" prefix="${package.prefix}/sbin">
<include name="*.sh" />
</tarfileset>
<tarfileset dir="${build.dir}/${final.name}/etc/hadoop" filemode="644" prefix="${package.conf.dir}">
<exclude name="core-site.xml" />
<exclude name="hdfs-site.xml" />
<exclude name="mapred-site.xml" />
<include name="**" />
</tarfileset>
</deb>
<copy todir="${build.dir}/" flatten="true">
<fileset dir="${package.buildroot}">
<include name="**/hadoop*.deb" />
</fileset>
</copy>
<delete dir="${package.buildroot}" quiet="true" verbose="false"/>
</target>
<target name="ant-task-download" description="To download mvn-ant-task" unless="offline">
<get src="${ant_task_repo_url}" dest="${ant_task.jar}" usetimestamp="true"/>
@ -1420,6 +1574,7 @@
<!-- ================================================================== -->
<target name="clean" depends="clean-sign, clean-fi" description="Clean. Delete the build files, and their directories">
<delete dir="${build.dir}"/>
<delete dir="${package.buildroot}"/>
<delete file="${basedir}/ivy/hadoop-common.xml"/>
<delete file="${basedir}/ivy/hadoop-common-pom.xml"/>
<delete file="${basedir}/ivy/hadoop-common-test.xml"/>
@ -1653,6 +1808,11 @@
log="${ivyresolvelog}"/>
</target>
<target name="ivy-resolve-package" depends="ivy-init">
<ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="package"
log="${ivyresolvelog}"/>
</target>
<target name="ivy-resolve-jdiff" depends="ivy-init">
<ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="jdiff"
log="${ivyresolvelog}"/>
@ -1710,6 +1870,14 @@
<ivy:cachepath pathid="ivy-common.classpath" conf="common"/>
</target>
<target name="ivy-retrieve-package" depends="ivy-resolve-package"
description="Retrieve Ivy-managed artifacts for the package configurations">
<ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
log="${ivyretrievelog}"/>
<ivy:cachepath pathid="ivy-package.classpath" conf="package"/>
</target>
<target name="ivy-retrieve-releaseaudit" depends="ivy-resolve-releaseaudit"
description="Retrieve Ivy-managed artifacts for the compile configurations">
<ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"

View File

@ -6,7 +6,14 @@
# remote nodes.
# The java implementation to use. Required.
# export JAVA_HOME=/usr/lib/j2sdk1.6-sun
export JAVA_HOME=${JAVA_HOME}
# Hadoop Installation Prefix
HADOOP_PREFIX=${HADOOP_PREFIX}
# Hadoop Configuration Directory
HADOOP_CONF_DIR=${HADOOP_CONF_DIR}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-$HADOOP_PREFIX/conf}
# Extra Java CLASSPATH elements. Optional.
# export HADOOP_CLASSPATH="<extra_entries>:$HADOOP_CLASSPATH"
@ -30,11 +37,8 @@ export HADOOP_TASKTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_TASKTRACK
# Extra ssh options. Empty by default.
# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"
# Where log files are stored. $HADOOP_HOME/logs by default.
# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
# File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default.
# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
# File naming remote slave hosts. $HADOOP_PREFIX/conf/slaves by default.
export HADOOP_SLAVES=${HADOOP_CONF_DIR}/slaves
# host:path where hadoop code should be rsync'd from. Unset by default.
# export HADOOP_MASTER=master:/home/$USER/src/hadoop
@ -45,13 +49,18 @@ export HADOOP_TASKTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_TASKTRACK
# export HADOOP_SLAVE_SLEEP=0.1
# The directory where pid files are stored. /tmp by default.
# export HADOOP_PID_DIR=/var/hadoop/pids
HADOOP_PID_DIR=${HADOOP_PID_DIR}
export HADOOP_PID_DIR=${HADOOP_PID_DIR:-$HADOOP_PREFIX/var/run}
# A string representing this instance of hadoop. $USER by default.
# export HADOOP_IDENT_STRING=$USER
export HADOOP_IDENT_STRING=`whoami`
# The scheduling priority for daemon processes. See 'man nice'.
# export HADOOP_NICENESS=10
# Allow Hadoop to run with sysctl net.ipv6.bindv6only = 1
# export HADOOP_ALLOW_IPV6=yes
# Where log files are stored. $HADOOP_PREFIX/logs by default.
HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$HADOOP_IDENT_STRING
export HADOOP_LOG_DIR=${HADOOP_LOG_DIR:-$HADOOP_PREFIX/var/log}

10
ivy.xml
View File

@ -55,6 +55,9 @@
<!--Testing pulls in everything-->
<conf name="test" extends="master" description="the classpath needed to run tests"/>
<!--Packaging pulls in everything-->
<conf name="package" extends="master" description="the classpath needed for packaging"/>
<!--Private configurations. -->
<conf name="javadoc" visibility="private" description="artiracts required while performing doc generation"
@ -301,6 +304,13 @@
rev="${jsch.version}"
conf="common->default">
</dependency>
<!--Configuration: package -->
<!--artifacts needed for packaging -->
<dependency org="org.vafer"
name="jdeb"
rev="${jdeb.version}"
conf="package->master">
</dependency>
<dependency org="commons-configuration"
name="commons-configuration"
rev="${commons-configuration.version}"

View File

@ -48,6 +48,7 @@ hsqldb.version=1.8.0.10
ivy.version=2.1.0
jasper.version=5.5.12
jdeb.version=0.8
jsp.version=2.1
jsp-api.version=5.5.12
jets3t.version=0.7.1

View File

@ -55,7 +55,7 @@
<p>通常,集群里的一台机器被指定为
<code>NameNode</code>,另一台不同的机器被指定为<code>JobTracker</code>。这些机器是<em>masters</em>。余下的机器即作为<code>DataNode</code><em></em>作为<code>TaskTracker</code>。这些机器是<em>slaves</em></p>
<p>我们用<code>HADOOP_HOME</code>指代安装的根路径。通常,集群里的所有机器的<code>HADOOP_HOME</code>路径相同。</p>
<p>我们用<code>HADOOP_PREFIX</code>指代安装的根路径。通常,集群里的所有机器的<code>HADOOP_PREFIX</code>路径相同。</p>
</section>
<section>
@ -335,7 +335,7 @@
</section>
</section>
<p>一但全部必要的配置完成,将这些文件分发到所有机器的<code>HADOOP_CONF_DIR</code>路径下,通常是<code>${HADOOP_HOME}/conf</code></p>
<p>一但全部必要的配置完成,将这些文件分发到所有机器的<code>HADOOP_CONF_DIR</code>路径下,通常是<code>${HADOOP_PREFIX}/conf</code></p>
</section>
<section>

View File

@ -39,7 +39,7 @@
<tr>
<td><code>--config confdir</code></td>
<td>覆盖缺省配置目录。缺省是${HADOOP_HOME}/conf。</td>
<td>覆盖缺省配置目录。缺省是${HADOOP_PREFIX}/conf。</td>
</tr>
<tr>
<td><code>GENERIC_OPTIONS</code></td>

View File

@ -128,7 +128,7 @@
<ul>
<li>${JAVA_HOME}Hadoop的Java的安装位置。Hadoop支持Sun JDK 1.5.x及以上版本。</li>
<li>${CLUSTER_NAME}:集群名称,由'node property'指定,在资源管理器配置中曾提到过。</li>
<li>${HADOOP_HOME}Hadoop在计算节点和提交节点上的安装位置。</li>
<li>${HADOOP_PREFIX}Hadoop在计算节点和提交节点上的安装位置。</li>
<li>${RM_QUEUE}:在资源管理器配置中设置的作业提交队列。</li>
<li>${RM_HOME}:资源管理器在计算节点和提交节点的安装位置。</li>
</ul>

View File

@ -511,11 +511,11 @@
<section>
<title>用法</title>
<p>假设环境变量<code>HADOOP_HOME</code>对应安装时的根目录,<code>HADOOP_VERSION</code>对应Hadoop的当前安装版本编译<code>WordCount.java</code>来创建jar包可如下操作</p>
<p>假设环境变量<code>HADOOP_PREFIX</code>对应安装时的根目录,<code>HADOOP_VERSION</code>对应Hadoop的当前安装版本编译<code>WordCount.java</code>来创建jar包可如下操作</p>
<p>
<code>$ mkdir wordcount_classes</code><br/>
<code>
$ javac -classpath ${HADOOP_HOME}/hadoop-${HADOOP_VERSION}-core.jar
$ javac -classpath ${HADOOP_PREFIX}/hadoop-${HADOOP_VERSION}-core.jar
-d wordcount_classes WordCount.java
</code><br/>
<code>$ jar -cvf /usr/joe/wordcount.jar -C wordcount_classes/ .</code>

View File

@ -192,7 +192,7 @@
<p>Hadoop守护进程的日志写入到
<code>${HADOOP_LOG_DIR}</code> 目录 (默认是
<code>${HADOOP_HOME}/logs</code>).</p>
<code>${HADOOP_PREFIX}/logs</code>).</p>
<p>浏览NameNode和JobTracker的网络接口它们的地址默认为</p>
<ul>

View File

@ -36,7 +36,7 @@ Hadoop streaming是Hadoop的一个工具
这些特殊的map/reduce作业是由一些可执行文件或脚本文件充当mapper或者reducer。例如
</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
$HADOOP_PREFIX/bin/hadoop jar $HADOOP_PREFIX/hadoop-streaming.jar \
-input myInputDirs \
-output myOutputDir \
-mapper /bin/cat \
@ -68,7 +68,7 @@ Reducer任务运行时它把输入切分成行并把每一行提供给可执
用户也可以使用java类作为mapper或者reducer。上面的例子与这里的代码等价
</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
$HADOOP_PREFIX/bin/hadoop jar $HADOOP_PREFIX/hadoop-streaming.jar \
-input myInputDirs \
-output myOutputDir \
-mapper org.apache.hadoop.mapred.lib.IdentityMapper \
@ -88,7 +88,7 @@ $HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
如果在集群上还没有,则需要用-file选项让framework把可执行文件作为作业的一部分一起打包提交。例如
</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
$HADOOP_PREFIX/bin/hadoop jar $HADOOP_PREFIX/hadoop-streaming.jar \
-input myInputDirs \
-output myOutputDir \
-mapper myPythonScript.py \
@ -103,7 +103,7 @@ $HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
除了可执行文件外其他mapper或reducer需要用到的辅助文件比如字典配置文件等也可以用这种方式打包上传。例如
</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
$HADOOP_PREFIX/bin/hadoop jar $HADOOP_PREFIX/hadoop-streaming.jar \
-input myInputDirs \
-output myOutputDir \
-mapper myPythonScript.py \
@ -173,7 +173,7 @@ $HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
下面是使用-cacheArchive选项的另一个例子。其中input.txt文件有两行内容分别是两个文件的名字testlink/cache.txt和testlink/cache2.txt。“testlink”是指向档案目录jar文件解压后的目录的符号链接这个目录下有“cache.txt”和“cache2.txt”两个文件。
</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
$HADOOP_PREFIX/bin/hadoop jar $HADOOP_PREFIX/hadoop-streaming.jar \
-input "/user/me/samples/cachefile/input.txt" \
-mapper "xargs cat" \
-reducer "cat" \
@ -220,7 +220,7 @@ This is just the second cache string
用户可以使用“-jobconf &lt;n&gt;=&lt;v&gt;”增加一些配置变量。例如:
</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
$HADOOP_PREFIX/bin/hadoop jar $HADOOP_PREFIX/hadoop-streaming.jar \
-input myInputDirs \
-output myOutputDir \
-mapper org.apache.hadoop.mapred.lib.IdentityMapper\
@ -254,7 +254,7 @@ Streaming 作业的其他选项如下表:
<tr><td> -verbose </td><td> 可选 </td><td> 详细输出 </td></tr>
</table>
<p>
使用-cluster &lt;name&gt;实现“本地”Hadoop和一个或多个远程Hadoop集群间切换。默认情况下使用hadoop-default.xml和hadoop-site.xml当使用-cluster &lt;name&gt;选项时,会使用$HADOOP_HOME/conf/hadoop-&lt;name&gt;.xml。
使用-cluster &lt;name&gt;实现“本地”Hadoop和一个或多个远程Hadoop集群间切换。默认情况下使用hadoop-default.xml和hadoop-site.xml当使用-cluster &lt;name&gt;选项时,会使用$HADOOP_PREFIX/conf/hadoop-&lt;name&gt;.xml。
</p>
<p>
下面的选项改变temp目录
@ -295,7 +295,7 @@ Streaming 作业的其他选项如下表:
</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
$HADOOP_PREFIX/bin/hadoop jar $HADOOP_PREFIX/hadoop-streaming.jar \
-input myInputDirs \
-output myOutputDir \
-mapper org.apache.hadoop.mapred.lib.IdentityMapper \
@ -320,7 +320,7 @@ Hadoop有一个工具类org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner
切分是基于key值的前缀而不是整个key。例如
</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
$HADOOP_PREFIX/bin/hadoop jar $HADOOP_PREFIX/hadoop-streaming.jar \
-input myInputDirs \
-output myOutputDir \
-mapper org.apache.hadoop.mapred.lib.IdentityMapper \
@ -390,7 +390,7 @@ combiner/reducer利用适当的聚合器聚合这些可聚合项。
</p><p>
要使用Aggregate只需指定“-reducer aggregate”</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
$HADOOP_PREFIX/bin/hadoop jar $HADOOP_PREFIX/hadoop-streaming.jar \
-input myInputDirs \
-output myOutputDir \
-mapper myAggregatorForKeyCount.py \
@ -434,7 +434,7 @@ Hadoop的工具类org.apache.hadoop.mapred.lib.FieldSelectionMapReduce帮助用
同样工具类中的reduce函数也把输入的key/value对看作字段的列表用户可以选取任意一段作为reduce输出的key或value。例如
</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
$HADOOP_PREFIX/bin/hadoop jar $HADOOP_PREFIX/hadoop-streaming.jar \
-input myInputDirs \
-output myOutputDir \
-mapper org.apache.hadoop.mapred.lib.FieldSelectionMapReduce\
@ -523,7 +523,7 @@ bruce 70
charlie 80
dan 75
$ c2='cut -f2'; $HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
$ c2='cut -f2'; $HADOOP_PREFIX/bin/hadoop jar $HADOOP_PREFIX/hadoop-streaming.jar \
-input /user/me/samples/student_marks
-mapper \"$c2\" -reducer 'cat'
-output /user/me/samples/student_out

View File

@ -64,8 +64,8 @@
<code>TaskTracker</code>. These are the <em>slaves</em>.</p>
<p>The root of the distribution is referred to as
<code>HADOOP_HOME</code>. All machines in the cluster usually have the same
<code>HADOOP_HOME</code> path.</p>
<code>HADOOP_PREFIX</code>. All machines in the cluster usually have the same
<code>HADOOP_PREFIX</code> path.</p>
</section>
<section>
@ -1084,7 +1084,7 @@
on the cluster nodes where a configuration file for the setuid
executable would be located. The executable would be built to
<em>build.dir/dist.dir/bin</em> and should be installed to
<em>$HADOOP_HOME/bin</em>.
<em>$HADOOP_PREFIX/bin</em>.
</p>
<p>
@ -1274,7 +1274,7 @@
<p>Once all the necessary configuration is complete, distribute the files
to the <code>HADOOP_CONF_DIR</code> directory on all the machines,
typically <code>${HADOOP_HOME}/conf</code>.</p>
typically <code>${HADOOP_PREFIX}/conf</code>.</p>
</section>
<section>
<title>Cluster Restartability</title>

View File

@ -40,7 +40,7 @@
<tr>
<td><code>--config confdir</code></td>
<td>Overwrites the default Configuration directory. Default is ${HADOOP_HOME}/conf.</td>
<td>Overwrites the default Configuration directory. Default is ${HADOOP_PREFIX}/conf.</td>
</tr>
<tr>
<td><code>GENERIC_OPTIONS</code></td>

View File

@ -0,0 +1,147 @@
<?xml version="1.0"?>
<!--
Copyright 2002-2004 The Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
"http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>
Hadoop Deployment Layout
</title>
</header>
<body>
<section>
<title> Introduction </title>
<p>
This document describes the standard deployment layout for Hadoop. With increased complexity and evolving Hadoop ecosystem, having standard deployment layout ensures better integration between Hadoop sub-projects. By making the installation process easier, we can lower the barrier to entry and increase Hadoop adoption.
</p>
</section>
<section>
<title> Packages </title>
<p>
We need to divide Hadoop up into packages that can be independently upgraded. The list of packages should include:
</p>
<ul>
<li>Hadoop Common - Common including the native code and required jar files.</li>
<li>HDFS Client - HDFS jars, scripts, and shared libraries.</li>
<li>HDFS Server - jsvc executable</li>
<li>Yarn Client - Yarn client jars and scripts</li>
<li>Yarn Server - Yarn server jars and scripts</li>
<li>MapReduce - MapReduce jars, scripts, and shared libraries</li>
<li>LZO - LZ0 codec from github.com/omally/hadoop-gpl-compression</li>
<li>Metrics - Plugins for Chukwa and Ganglia</li>
</ul>
<p>Packages from other teams will include:</p>
<ul>
<li>Pig</li>
<li>Hive</li>
<li>Oozie client</li>
<li>Oozie server</li>
<li>Howl client</li>
<li>Howl server</li>
</ul>
<p>These packages should be deployable with RPM on RedHat. We also need a package that depends on a version of each of these packages. In general, we can generate tarballs in the new deployment layout.</p>
<p>Note that some packages, like Pig, which are user facing, will have 2 versions installed in a given deployment. This will be accomplished by modifying the package name and the associated binaries to include the version number.</p>
<p>All of the following paths are based on a prefix directory that is the root of the installation. Our packages must support having multiple Hadoop stack installation on a computer at the same time. For RPMs, this means that the packages must be relocatable and honor the --prefix option.</p>
</section>
<section>
<title> Deployment </title>
<p>It is important to have a standard deployment that results from installing the packages regardless of the package manager. Here are the top level directories and a sample of what would be under each. Note that all of the packages are installed "flattened" into the prefix directory. For compatibility reasons, we should create "share/hadoop" that matches the old HADOOP_PREFIX and set the HADOOP_PREFIX variable to that.</p>
<source>
$PREFIX/ bin / hadoop
| | mapred
| | pig -> pig7
| | pig6
| + pig7
|
+ etc / hadoop / core-site.xml
| | hdfs-site.xml
| + mapred-site.xml
|
+ include / hadoop / Pipes.hh
| | + TemplateFactory.hh
| + hdfs.h
|
+ lib / jni / hadoop-common / libhadoop.so.0.20.0
| |
| | libhdfs.so -> libhdfs.so.0.20.0
| + libhdfs.so.0.20.0
|
+ libexec / task-controller
|
+ man / man1 / hadoop.1
| | mapred.1
| | pig6.1
| + pig7.1
|
+ share / hadoop-common
| | hadoop-hdfs
| | hadoop-mapreduce
| | pig6
| + pig7
|
+ sbin / hdfs-admin
| | mapred-admin
|
+ src / hadoop-common
| | hadoop-hdfs
| + hadoop-mapreduce
|
+ var / lib / data-node
| + task-tracker
|
| log / hadoop-datanode
| + hadoop-tasktracker
|
+ run / hadoop-datanode.pid
+ hadoop-tasktracker.pid
</source>
<p>Note that we must continue to honor HADOOP_CONF_DIR to override the configuration location, but that it should default to $prefix/etc. User facing binaries and scripts go into bin. Configuration files go into etc with multiple configuration files having a directory. JNI shared libraries go into lib/jni/$tool since Java does not allow to specify the version of the library to load. Libraries that aren't loaded via System.loadLibrary are placed directly under lib. 64 bit versions of the libraries for platforms that support them should be placed in lib64. All of the architecture-independent pieces, including the jars for each tool will be placed in share/$tool. The default location for all the run time information will be in var. The storage will be in var/lib, the logs in var/log and the pid files in var/run.</p>
</section>
<section>
<title> Path Configurations </title>
<p>Path can be configured at compile phase or installation phase. For RPM, it takes advantage of the --relocate directive to allow path reconfiguration at install phase. For Debian package, path is configured at compile phase.
</p>
<p>Build phase parameter:</p>
<ul>
<li>package.prefix - Location of package prefix (Default /usr)</li>
<li>package.conf.dir - Location of configuration directory (Default /etc/hadoop)</li>
<li>package.log.dir - Location of log directory (Default /var/log/hadoop)</li>
<li>package.pid.dir - Location of pid directory (Default /var/run/hadoop)</li>
</ul>
<p>Install phase parameter:</p>
<source>
rpm -i hadoop-[version]-[rev].[arch].rpm \
--relocate /usr=/usr/local/hadoop \
--relocate /etc/hadoop=/usr/local/etc/hadoop \
--relocate /var/log/hadoop=/opt/logs/hadoop \
--relocate /var/run/hadoop=/opt/run/hadoop
</source>
</section>
</body>
</document>

View File

@ -229,7 +229,7 @@
<p>The hadoop daemon log output is written to the
<code>${HADOOP_LOG_DIR}</code> directory (defaults to
<code>${HADOOP_HOME}/logs</code>).</p>
<code>${HADOOP_PREFIX}/logs</code>).</p>
<p>Browse the web interface for the NameNode and the JobTracker; by
default they are available at:</p>

View File

@ -39,6 +39,7 @@ See http://forrest.apache.org/docs/linking.html for more info.
</docs>
<docs label="Guides">
<deployment label="Deployment Layout" href="deployment_layout.html" />
<commands_manual label="Hadoop Commands" href="commands_manual.html" />
<fsshell label="File System Shell" href="file_system_shell.html" />
<SLA label="Service Level Authorization" href="service_level_auth.html"/>

View File

@ -18,7 +18,7 @@
#
# Notes:
# 1. This makefile is designed to do the actual builds in $(HADOOP_HOME)/build/native/${os.name}-${os-arch}.
# 1. This makefile is designed to do the actual builds in $(HADOOP_PREFIX)/build/native/${os.name}-${os-arch}.
# 2. This makefile depends on the following environment variables to function correctly:
# * HADOOP_NATIVE_SRCDIR
# * JAVA_HOME

View File

@ -22,7 +22,7 @@
#
# Notes:
# 1. This makefile is designed to do the actual builds in $(HADOOP_HOME)/build/native/${os.name}-${os.arch}/lib
# 1. This makefile is designed to do the actual builds in $(HADOOP_PREFIX)/build/native/${os.name}-${os.arch}/lib
# 2. This makefile depends on the following environment variables to function correctly:
# * HADOOP_NATIVE_SRCDIR
# * JAVA_HOME

View File

@ -35,14 +35,14 @@ if [ -d $BASE_NATIVE_LIB_DIR ]
then
for platform in `ls $BASE_NATIVE_LIB_DIR`
do
if [ ! -d $DIST_LIB_DIR/$platform ]
if [ ! -d $DIST_LIB_DIR ]
then
mkdir -p $DIST_LIB_DIR/$platform
echo "Created $DIST_LIB_DIR/$platform"
mkdir -p $DIST_LIB_DIR
echo "Created $DIST_LIB_DIR"
fi
echo "Copying libraries in $BASE_NATIVE_LIB_DIR/$platform to $DIST_LIB_DIR/$platform/"
cd $BASE_NATIVE_LIB_DIR/$platform/
$TAR . | (cd $DIST_LIB_DIR/$platform/; $UNTAR)
echo "Copying libraries in $BASE_NATIVE_LIB_DIR/$platform to $DIST_LIB_DIR/"
cd $BASE_NATIVE_LIB_DIR/
$TAR . | (cd $DIST_LIB_DIR/; $UNTAR)
done
fi
@ -51,14 +51,14 @@ if [ -d $BUILD_NATIVE_DIR ]
then
for platform in `ls $BUILD_NATIVE_DIR`
do
if [ ! -d $DIST_LIB_DIR/$platform ]
if [ ! -d $DIST_LIB_DIR ]
then
mkdir -p $DIST_LIB_DIR/$platform
echo "Created $DIST_LIB_DIR/$platform"
mkdir -p $DIST_LIB_DIR
echo "Created $DIST_LIB_DIR"
fi
echo "Copying libraries in $BUILD_NATIVE_DIR/$platform/lib to $DIST_LIB_DIR/$platform/"
echo "Copying libraries in $BUILD_NATIVE_DIR/$platform/lib to $DIST_LIB_DIR/"
cd $BUILD_NATIVE_DIR/$platform/lib
$TAR . | (cd $DIST_LIB_DIR/$platform/; $UNTAR)
$TAR . | (cd $DIST_LIB_DIR/; $UNTAR)
done
fi

View File

@ -22,7 +22,7 @@
#
# Notes:
# 1. This makefile is designed to do the actual builds in $(HADOOP_HOME)/build/native/${os.name}-${os.arch}/$(subdir) .
# 1. This makefile is designed to do the actual builds in $(HADOOP_PREFIX)/build/native/${os.name}-${os.arch}/$(subdir) .
# 2. This makefile depends on the following environment variables to function correctly:
# * HADOOP_NATIVE_SRCDIR
# * JAVA_HOME
@ -31,7 +31,7 @@
# * PLATFORM
# All these are setup by build.xml and/or the top-level makefile.
# 3. The creation of requisite jni headers/stubs are also done by build.xml and they are
# assumed to be in $(HADOOP_HOME)/build/native/src/org/apache/hadoop/io/compress/zlib.
# assumed to be in $(HADOOP_PREFIX)/build/native/src/org/apache/hadoop/io/compress/zlib.
#
# The 'vpath directive' to locate the actual source files

View File

@ -0,0 +1,15 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
/etc/hadoop

View File

@ -0,0 +1,24 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Package: hadoop-common
Version: @version@
Section: misc
Priority: optional
Provides: hadoop-common
Architecture: all
Depends: openjdk-6-jre-headless
Maintainer: Apache Software Foundation <general@hadoop.apache.org>
Description: The Apache Hadoop project develops open-source software for reliable, scalable, distributed computing.
Distribution: development

View File

@ -0,0 +1,24 @@
#!/bin/sh
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
bash /usr/sbin/update-hadoop-env.sh \
--prefix=/usr \
--bin-dir=/usr/bin \
--sbin-dir=/usr/sbin \
--conf-dir=/etc/hadoop \
--log-dir=/var/log/hadoop \
--pid-dir=/var/run/hadoop

View File

@ -0,0 +1,19 @@
#!/bin/sh
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
/usr/sbin/groupdel hadoop 2> /dev/null >dev/null
exit 0

View File

@ -0,0 +1,18 @@
#!/bin/sh
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
getent group hadoop 2>/dev/null >/dev/null || /usr/sbin/groupadd -r hadoop

View File

@ -0,0 +1,25 @@
#!/bin/sh
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
bash /usr/sbin/update-hadoop-env.sh \
--prefix=/usr \
--bin-dir=/usr/bin \
--sbin-dir=/usr/sbin \
--conf-dir=/etc/hadoop \
--log-dir=/var/log/hadoop \
--pid-dir=/var/run/hadoop \
--uninstal

View File

@ -0,0 +1,142 @@
#! /bin/sh
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
### BEGIN INIT INFO
# Provides: hadoop-datanode
# Required-Start: $remote_fs $syslog
# Required-Stop: $remote_fs $syslog
# Default-Start: 2 3 4 5
# Default-Stop:
# Short-Description: Apache Hadoop Name Node server
### END INIT INFO
set -e
# /etc/init.d/hadoop-datanode: start and stop the Apache Hadoop Data Node daemon
test -x /usr/bin/hadoop || exit 0
( /usr/bin/hadoop 2>&1 | grep -q hadoop ) 2>/dev/null || exit 0
umask 022
if test -f /etc/default/hadoop-env.sh; then
. /etc/default/hadoop-env.sh
fi
. /lib/lsb/init-functions
# Are we running from init?
run_by_init() {
([ "$previous" ] && [ "$runlevel" ]) || [ "$runlevel" = S ]
}
check_for_no_start() {
# forget it if we're trying to start, and /etc/hadoop/hadoop-datanode_not_to_be_run exists
if [ -e /etc/hadoop/hadoop-datanode_not_to_be_run ]; then
if [ "$1" = log_end_msg ]; then
log_end_msg 0
fi
if ! run_by_init; then
log_action_msg "Apache Hadoop Data Node server not in use (/etc/hadoop/hadoop-datanode_not_to_be_run)"
fi
exit 0
fi
}
check_privsep_dir() {
# Create the PrivSep empty dir if necessary
if [ ! -d ${HADOOP_PID_DIR} ]; then
mkdir -p ${HADOOP_PID_DIR}
chown root:hadoop ${HADOOP_PID_DIR}
chmod 0775 ${HADOOP_PID_DIR}
fi
}
export PATH="${PATH:+$PATH:}/usr/sbin:/usr/bin"
case "$1" in
start)
check_privsep_dir
check_for_no_start
log_daemon_msg "Starting Apache Hadoop Data Node server" "hadoop-datanode"
if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid -c hdfs -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start datanode; then
log_end_msg 0
else
log_end_msg 1
fi
;;
stop)
log_daemon_msg "Stopping Apache Hadoop Data Node server" "hadoop-datanode"
if start-stop-daemon --stop --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid; then
log_end_msg 0
else
log_end_msg 1
fi
;;
restart)
check_privsep_dir
log_daemon_msg "Restarting Apache Hadoop Data Node server" "hadoop-datanode"
start-stop-daemon --stop --quiet --oknodo --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid
check_for_no_start log_end_msg
if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid -c hdfs -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start datanode; then
log_end_msg 0
else
log_end_msg 1
fi
;;
try-restart)
check_privsep_dir
log_daemon_msg "Restarting Apache Hadoop Data Node server" "hadoop-datanode"
set +e
start-stop-daemon --stop --quiet --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid
RET="$?"
set -e
case $RET in
0)
# old daemon stopped
check_for_no_start log_end_msg
if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid -c hdfs -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start datanode; then
log_end_msg 0
else
log_end_msg 1
fi
;;
1)
# daemon not running
log_progress_msg "(not running)"
log_end_msg 0
;;
*)
# failed to stop
log_progress_msg "(failed to stop)"
log_end_msg 1
;;
esac
;;
status)
status_of_proc -p ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid ${JAVA_HOME}/bin/java hadoop-datanode && exit 0 || exit $?
;;
*)
log_action_msg "Usage: /etc/init.d/hadoop-datanode {start|stop|restart|try-restart|status}"
exit 1
esac
exit 0

View File

@ -0,0 +1,142 @@
#! /bin/sh
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
### BEGIN INIT INFO
# Provides: hadoop-jobtracker
# Required-Start: $remote_fs $syslog
# Required-Stop: $remote_fs $syslog
# Default-Start: 2 3 4 5
# Default-Stop:
# Short-Description: Apache Hadoop Job Tracker server
### END INIT INFO
set -e
# /etc/init.d/hadoop-jobtracker: start and stop the Apache Hadoop Job Tracker daemon
test -x /usr/bin/hadoop || exit 0
( /usr/bin/hadoop 2>&1 | grep -q hadoop ) 2>/dev/null || exit 0
umask 022
if test -f /etc/default/hadoop-env.sh; then
. /etc/default/hadoop-env.sh
fi
. /lib/lsb/init-functions
# Are we running from init?
run_by_init() {
([ "$previous" ] && [ "$runlevel" ]) || [ "$runlevel" = S ]
}
check_for_no_start() {
# forget it if we're trying to start, and /etc/hadoop/hadoop-jobtracker_not_to_be_run exists
if [ -e /etc/hadoop/hadoop-jobtracker_not_to_be_run ]; then
if [ "$1" = log_end_msg ]; then
log_end_msg 0
fi
if ! run_by_init; then
log_action_msg "Apache Hadoop Job Tracker server not in use (/etc/hadoop/hadoop-jobtracker_not_to_be_run)"
fi
exit 0
fi
}
check_privsep_dir() {
# Create the PrivSep empty dir if necessary
if [ ! -d ${HADOOP_PID_DIR} ]; then
mkdir -p ${HADOOP_PID_DIR}
chown root:hadoop ${HADOOP_PID_DIR}
chmod 0775 ${HADOOP_PID_DIR}
fi
}
export PATH="${PATH:+$PATH:}/usr/sbin:/usr/bin"
case "$1" in
start)
check_privsep_dir
check_for_no_start
log_daemon_msg "Starting Apache Hadoop Job Tracker server" "hadoop-jobtracker"
if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-jobtracker.pid -c mapred -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start jobtracker; then
log_end_msg 0
else
log_end_msg 1
fi
;;
stop)
log_daemon_msg "Stopping Apache Hadoop Job Tracker server" "hadoop-jobtracker"
if start-stop-daemon --stop --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-jobtracker.pid; then
log_end_msg 0
else
log_end_msg 1
fi
;;
restart)
check_privsep_dir
log_daemon_msg "Restarting Apache Hadoop Job Tracker server" "hadoop-jobtracker"
start-stop-daemon --stop --quiet --oknodo --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-jobtracker.pid
check_for_no_start log_end_msg
if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-jobtracker.pid -c mapred -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start jobtracker; then
log_end_msg 0
else
log_end_msg 1
fi
;;
try-restart)
check_privsep_dir
log_daemon_msg "Restarting Apache Hadoop Job Tracker server" "hadoop-jobtracker"
set +e
start-stop-daemon --stop --quiet --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-jobtracker.pid
RET="$?"
set -e
case $RET in
0)
# old daemon stopped
check_for_no_start log_end_msg
if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-jobtracker.pid -c mapred -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start jobtracker; then
log_end_msg 0
else
log_end_msg 1
fi
;;
1)
# daemon not running
log_progress_msg "(not running)"
log_end_msg 0
;;
*)
# failed to stop
log_progress_msg "(failed to stop)"
log_end_msg 1
;;
esac
;;
status)
status_of_proc -p ${HADOOP_PID_DIR}/hadoop-mapred-jobtracker.pid ${JAVA_HOME}/bin/java hadoop-jobtracker && exit 0 || exit $?
;;
*)
log_action_msg "Usage: /etc/init.d/hadoop-jobtracker {start|stop|restart|try-restart|status}"
exit 1
esac
exit 0

View File

@ -0,0 +1,154 @@
#! /bin/sh
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
### BEGIN INIT INFO
# Provides: hadoop-namenode
# Required-Start: $remote_fs $syslog
# Required-Stop: $remote_fs $syslog
# Default-Start: 2 3 4 5
# Default-Stop:
# Short-Description: Apache Hadoop Name Node server
### END INIT INFO
set -e
# /etc/init.d/hadoop-namenode: start and stop the Apache Hadoop Name Node daemon
test -x /usr/bin/hadoop || exit 0
( /usr/bin/hadoop 2>&1 | grep -q hadoop ) 2>/dev/null || exit 0
umask 022
if test -f /etc/default/hadoop-env.sh; then
. /etc/default/hadoop-env.sh
fi
. /lib/lsb/init-functions
# Are we running from init?
run_by_init() {
([ "$previous" ] && [ "$runlevel" ]) || [ "$runlevel" = S ]
}
check_for_no_start() {
# forget it if we're trying to start, and /etc/hadoop/hadoop-namenode_not_to_be_run exists
if [ -e /etc/hadoop/hadoop-namenode_not_to_be_run ]; then
if [ "$1" = log_end_msg ]; then
log_end_msg 0
fi
if ! run_by_init; then
log_action_msg "Apache Hadoop Name Node server not in use (/etc/hadoop/hadoop-namenode_not_to_be_run)"
fi
exit 0
fi
}
check_privsep_dir() {
# Create the PrivSep empty dir if necessary
if [ ! -d ${HADOOP_PID_DIR} ]; then
mkdir -p ${HADOOP_PID_DIR}
chown root:hadoop ${HADOOP_PID_DIR}
chmod 0775 ${HADOOP_PID_DIR}
fi
}
format() {
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} namenode -format' hdfs
}
export PATH="${PATH:+$PATH:}/usr/sbin:/usr/bin"
case "$1" in
start)
check_privsep_dir
check_for_no_start
log_daemon_msg "Starting Apache Hadoop Name Node server" "hadoop-namenode"
if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-namenode.pid -c hdfs -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start namenode; then
log_end_msg 0
else
log_end_msg 1
fi
;;
stop)
log_daemon_msg "Stopping Apache Hadoop Name Node server" "hadoop-namenode"
if start-stop-daemon --stop --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-namenode.pid; then
log_end_msg 0
else
log_end_msg 1
fi
;;
format)
log_daemon_msg "Formatting Apache Hadoop Name Node" "hadoop-namenode"
format
if [ $? -eq 0 ]; then
log_end_msg 0
else
log_end_msg 1
fi
;;
restart)
check_privsep_dir
log_daemon_msg "Restarting Apache Hadoop Name Node server" "hadoop-namenode"
start-stop-daemon --stop --quiet --oknodo --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-namenode.pid
check_for_no_start log_end_msg
if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-namenode.pid -c hdfs -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start namenode; then
log_end_msg 0
else
log_end_msg 1
fi
;;
try-restart)
check_privsep_dir
log_daemon_msg "Restarting Apache Hadoop Name Node server" "hadoop-namenode"
set +e
start-stop-daemon --stop --quiet --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-namenode.pid
RET="$?"
set -e
case $RET in
0)
# old daemon stopped
check_for_no_start log_end_msg
if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-namenode.pid -c hdfs -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start namenode; then
log_end_msg 0
else
log_end_msg 1
fi
;;
1)
# daemon not running
log_progress_msg "(not running)"
log_end_msg 0
;;
*)
# failed to stop
log_progress_msg "(failed to stop)"
log_end_msg 1
;;
esac
;;
status)
status_of_proc -p ${HADOOP_PID_DIR}/hadoop-hdfs-namenode.pid ${JAVA_HOME}/bin/java hadoop-namenode && exit 0 || exit $?
;;
*)
log_action_msg "Usage: /etc/init.d/hadoop-namenode {start|stop|restart|try-restart|status}"
exit 1
esac
exit 0

View File

@ -0,0 +1,142 @@
#! /bin/sh
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
### BEGIN INIT INFO
# Provides: hadoop-tasktracker
# Required-Start: $remote_fs $syslog
# Required-Stop: $remote_fs $syslog
# Default-Start: 2 3 4 5
# Default-Stop:
# Short-Description: Apache Hadoop Task Tracker server
### END INIT INFO
set -e
# /etc/init.d/hadoop-tasktracker: start and stop the Apache Hadoop Task Tracker daemon
test -x /usr/bin/hadoop || exit 0
( /usr/bin/hadoop 2>&1 | grep -q hadoop ) 2>/dev/null || exit 0
umask 022
if test -f /etc/default/hadoop-env.sh; then
. /etc/default/hadoop-env.sh
fi
. /lib/lsb/init-functions
# Are we running from init?
run_by_init() {
([ "$previous" ] && [ "$runlevel" ]) || [ "$runlevel" = S ]
}
check_for_no_start() {
# forget it if we're trying to start, and /etc/hadoop/hadoop-tasktracker_not_to_be_run exists
if [ -e /etc/hadoop/hadoop-tasktracker_not_to_be_run ]; then
if [ "$1" = log_end_msg ]; then
log_end_msg 0
fi
if ! run_by_init; then
log_action_msg "Apache Hadoop Task Tracker server not in use (/etc/hadoop/hadoop-tasktracker_not_to_be_run)"
fi
exit 0
fi
}
check_privsep_dir() {
# Create the PrivSep empty dir if necessary
if [ ! -d ${HADOOP_PID_DIR} ]; then
mkdir -p ${HADOOP_PID_DIR}
chown root:hadoop ${HADOOP_PID_DIR}
chmod 0775 ${HADOOP_PID_DIR}
fi
}
export PATH="${PATH:+$PATH:}/usr/sbin:/usr/bin"
case "$1" in
start)
check_privsep_dir
check_for_no_start
log_daemon_msg "Starting Apache Hadoop Task Tracker server" "hadoop-tasktracker"
if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-tasktracker.pid -c mapred -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start tasktracker; then
log_end_msg 0
else
log_end_msg 1
fi
;;
stop)
log_daemon_msg "Stopping Apache Hadoop Task Tracker server" "hadoop-tasktracker"
if start-stop-daemon --stop --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-tasktracker.pid; then
log_end_msg 0
else
log_end_msg 1
fi
;;
restart)
check_privsep_dir
log_daemon_msg "Restarting Apache Hadoop Task Tracker server" "hadoop-tasktracker"
start-stop-daemon --stop --quiet --oknodo --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-tasktracker.pid
check_for_no_start log_end_msg
if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-tasktracker.pid -c mapred -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start tasktracker; then
log_end_msg 0
else
log_end_msg 1
fi
;;
try-restart)
check_privsep_dir
log_daemon_msg "Restarting Apache Hadoop Task Tracker server" "hadoop-tasktracker"
set +e
start-stop-daemon --stop --quiet --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-tasktracker.pid
RET="$?"
set -e
case $RET in
0)
# old daemon stopped
check_for_no_start log_end_msg
if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-tasktracker.pid -c mapred -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start tasktracker; then
log_end_msg 0
else
log_end_msg 1
fi
;;
1)
# daemon not running
log_progress_msg "(not running)"
log_end_msg 0
;;
*)
# failed to stop
log_progress_msg "(failed to stop)"
log_end_msg 1
;;
esac
;;
status)
status_of_proc -p ${HADOOP_PID_DIR}/hadoop-mapred-tasktracker.pid ${JAVA_HOME}/bin/java hadoop-tasktracker && exit 0 || exit $?
;;
*)
log_action_msg "Usage: /etc/init.d/hadoop-tasktracker {start|stop|restart|try-restart|status}"
exit 1
esac
exit 0

View File

@ -0,0 +1,76 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
if [ "$HADOOP_HOME" != "" ]; then
echo "Warning: \$HADOOP_HOME is deprecated."
echo
fi
. "$bin"/../libexec/hadoop-config.sh
usage() {
echo "
usage: $0 <parameters>
Require parameter:
-u <username> Create user on HDFS
Optional parameters:
-h Display this message
"
exit 1
}
# Parse script parameters
if [ $# != 2 ] ; then
usage
exit 1
fi
while getopts "hu:" OPTION
do
case $OPTION in
u)
SETUP_USER=$2; shift 2
;;
h)
usage
;;
--)
shift ; break
;;
*)
echo "Unknown option: $1"
usage
exit 1
;;
esac
done
# Create user directory on HDFS
export SETUP_USER
export SETUP_PATH=/user/${SETUP_USER}
export HADOOP_PREFIX
export HADOOP_CONF_DIR
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -mkdir ${SETUP_PATH}' hdfs
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -chown ${SETUP_USER}:${SETUP_USER} ${SETUP_PATH}' hdfs
if [ "$?" == "0" ]; then
echo "User directory has been setup: ${SETUP_PATH}"
fi

View File

@ -0,0 +1,282 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
if [ "$HADOOP_HOME" != "" ]; then
echo "Warning: \$HADOOP_HOME is deprecated."
echo
fi
. "$bin"/../libexec/hadoop-config.sh
usage() {
echo "
usage: $0 <parameters>
Optional parameters:
--auto Setup automatically
--default Generate default config
--conf-dir=/etc/hadoop Set config directory
--datanode-dir=/var/lib/hadoop/hdfs/datanode Set datanode directory
-h Display this message
--jobtracker-url=hostname:9001 Set jobtracker url
--log-dir=/var/log/hadoop Set log directory
--hdfs-dir=/var/lib/hadoop/hdfs Set hdfs directory
--mapred-dir=/var/lib/hadoop/mapred Set mapreduce directory
--namenode-dir=/var/lib/hadoop/hdfs/namenode Set namenode directory
--namenode-url=hdfs://hostname:9000/ Set namenode url
--replication=3 Set replication factor
--taskscheduler=org.apache.hadoop.mapred.JobQueueTaskScheduler Set task scheduler
"
exit 1
}
template_generator() {
REGEX='(\$\{[a-zA-Z_][a-zA-Z_0-9]*\})'
cat $1 |
while read line ; do
while [[ "$line" =~ $REGEX ]] ; do
LHS=${BASH_REMATCH[1]}
RHS="$(eval echo "\"$LHS\"")"
line=${line//$LHS/$RHS}
done
echo $line >> $2
done
}
OPTS=$(getopt \
-n $0 \
-o '' \
-l 'auto' \
-l 'conf-dir:' \
-l 'default' \
-l 'hdfs-dir:' \
-l 'namenode-dir:' \
-l 'datanode-dir:' \
-l 'mapred-dir:' \
-l 'namenode-url:' \
-l 'jobtracker-url:' \
-l 'log-dir:' \
-l 'replication:' \
-l 'taskscheduler:' \
-o 'h' \
-- "$@")
if [ $? != 0 ] ; then
usage
fi
# Make sure the HADOOP_LOG_DIR is not picked up from user environment.
unset HADOOP_LOG_DIR
# Parse script parameters
eval set -- "${OPTS}"
while true ; do
case "$1" in
--auto)
AUTOSETUP=1
AUTOMATED=1
shift
;;
--conf-dir)
HADOOP_CONF_DIR=$2; shift 2
AUTOMATED=1
;;
--default)
AUTOMATED=1; shift
;;
-h)
usage
;;
--hdfs-dir)
HADOOP_HDFS_DIR=$2; shift 2
AUTOMATED=1
;;
--namenode-dir)
HADOOP_NN_DIR=$2; shift 2
AUTOMATED=1
;;
--datanode-dir)
HADOOP_DN_DIR=$2; shift 2
AUTOMATED=1
;;
--mapred-dir)
HADOOP_MAPRED_DIR=$2; shift 2
AUTOMATED=1
;;
--namenode-url)
HADOOP_NN_HOST=$2; shift 2
AUTOMATED=1
;;
--jobtracker-url)
HADOOP_JT_HOST=$2; shift 2
AUTOMATED=1
;;
--log-dir)
HADOOP_LOG_DIR=$2; shift 2
AUTOMATED=1
;;
--replication)
HADOOP_REPLICATION=$2; shift 2
AUTOMATED=1
;;
--taskscheduler)
HADOOP_TASK_SCHEDULER=$2; shift 2
AUTOMATED=1
;;
--)
shift ; break
;;
*)
echo "Unknown option: $1"
usage
exit 1
;;
esac
done
# Fill in default values, if parameters have not been defined.
AUTOSETUP=${AUTOSETUP:-1}
JAVA_HOME=${JAVA_HOME:-/usr/java/default}
HADOOP_NN_HOST=${HADOOP_NN_HOST:-hdfs://`hostname`:9000/}
HADOOP_NN_DIR=${HADOOP_NN_DIR:-/var/lib/hadoop/hdfs/namenode}
HADOOP_DN_DIR=${HADOOP_DN_DIR:-/var/lib/hadoop/hdfs/datanode}
HADOOP_JT_HOST=${HADOOP_JT_HOST:-`hostname`:9001}
HADOOP_HDFS_DIR=${HADOOP_HDFS_DIR:-/var/lib/hadoop/hdfs}
HADOOP_MAPRED_DIR=${HADOOP_MAPRED_DIR:-/var/lib/hadoop/mapred}
HADOOP_LOG_DIR=${HADOOP_LOG_DIR:-/var/log/hadoop}
HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop}
HADOOP_REPLICATION=${HADOOP_RELICATION:-3}
HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler}
# Interactive setup wizard
if [ "${AUTOMATED}" != "1" ]; then
echo "Setup Hadoop Configuration"
echo
echo -n "Where would you like to put config directory? (${HADOOP_CONF_DIR}) "
read USER_HADOOP_CONF_DIR
echo -n "Where would you like to put log directory? (${HADOOP_LOG_DIR}) "
read USER_HADOOP_LOG_DIR
echo -n "What is the url of the namenode? (${HADOOP_NN_HOST}) "
read USER_HADOOP_NN_HOST
echo -n "Where would you like to put namenode data directory? (${HADOOP_NN_DIR}) "
read USER_HADOOP_NN_DIR
echo -n "Where would you like to put datanode data directory? (${HADOOP_DN_DIR}) "
read USER_HADOOP_DN_DIR
echo -n "What is the url of the jobtracker? (${HADOOP_JT_HOST}) "
read USER_HADOOP_JT_HOST
echo -n "Where would you like to put jobtracker/tasktracker data directory? (${HADOOP_MAPRED_DIR}) "
read USER_HADOOP_MAPRED_DIR
echo -n "Which taskscheduler would you like? (${HADOOP_TASK_SCHEDULER}) "
read USER_HADOOP_TASK_SCHEDULER
echo -n "Where is JAVA_HOME directory? (${JAVA_HOME}) "
read USER_JAVA_HOME
echo -n "Would you like to create directories/copy conf files to localhost? (Y/n) "
read USER_AUTOSETUP
echo
JAVA_HOME=${USER_USER_JAVA_HOME:-$JAVA_HOME}
HADOOP_NN_HOST=${USER_HADOOP_NN_HOST:-$HADOOP_NN_HOST}
HADOOP_NN_DIR=${USER_HADOOP_NN_DIR:-$HADOOP_NN_DIR}
HADOOP_DN_DIR=${USER_HADOOP_DN_DIR:-$HADOOP_DN_DIR}
HADOOP_JT_HOST=${USER_HADOOP_JT_HOST:-$HADOOP_JT_HOST}
HADOOP_HDFS_DIR=${USER_HADOOP_HDFS_DIR:-$HADOOP_HDFS_DIR}
HADOOP_MAPRED_DIR=${USER_HADOOP_MAPRED_DIR:-$HADOOP_MAPRED_DIR}
HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler}
HADOOP_LOG_DIR=${USER_HADOOP_LOG_DIR:-$HADOOP_LOG_DIR}
HADOOP_CONF_DIR=${USER_HADOOP_CONF_DIR:-$HADOOP_CONF_DIR}
AUTOSETUP=${USER_AUTOSETUP:-y}
echo "Review your choices:"
echo
echo "Config directory : ${HADOOP_CONF_DIR}"
echo "Log directory : ${HADOOP_LOG_DIR}"
echo "Namenode url : ${HADOOP_NN_HOST}"
echo "Namenode directory : ${HADOOP_NN_DIR}"
echo "Datanode directory : ${HADOOP_DN_DIR}"
echo "Jobtracker url : ${HADOOP_JT_HOST}"
echo "Mapreduce directory : ${HADOOP_MAPRED_DIR}"
echo "Task scheduler : ${HADOOP_TASK_SCHEDULER}"
echo "JAVA_HOME directory : ${JAVA_HOME}"
echo "Create dirs/copy conf files : ${AUTOSETUP}"
echo
echo -n "Proceed with generate configuration? (y/N) "
read CONFIRM
if [ "${CONFIRM}" != "y" ]; then
echo "User aborted setup, exiting..."
exit 1
fi
fi
if [ "${AUTOSETUP}" == "1" ]; then
# If user wants to setup local system automatically,
# set config file generation location to HADOOP_CONF_DIR.
DEST=${HADOOP_CONF_DIR}
else
# If user is only interested to generate config file locally,
# place config files in the current working directory.
DEST=`pwd`
fi
# remove existing config file, they are existed in current directory.
rm -f ${DEST}/core-site.xml >/dev/null
rm -f ${DEST}/hdfs-site.xml >/dev/null
rm -f ${DEST}/mapred-site.xml >/dev/null
rm -f ${DEST}/hadoop-env.sh >/dev/null
# Generate config file with specified parameters.
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/core-site.xml ${DEST}/core-site.xml
template_generator ${HADOOP_PREFIX}/share/hadoop/hdfs/templates/hdfs-site.xml ${DEST}/hdfs-site.xml
template_generator ${HADOOP_PREFIX}/share/hadoop/mapreduce/templates/mapred-site.xml ${DEST}/mapred-site.xml
template_generator ${HADOOP_CONF_DIR}/hadoop-env.sh.template ${DEST}/hadoop-env.sh
chown root:hadoop ${DEST}/hadoop-env.sh
chmod 755 ${DEST}/hadoop-env.sh
# Setup directory path and copy config files, if AUTOSETUP is chosen.
if [ "${AUTOSETUP}" == "1" -o "${AUTOSETUP}" == "y" ]; then
mkdir -p ${HADOOP_HDFS_DIR}
mkdir -p ${HADOOP_NN_DIR}
mkdir -p ${HADOOP_DN_DIR}
mkdir -p ${HADOOP_MAPRED_DIR}
mkdir -p ${HADOOP_CONF_DIR}
mkdir -p ${HADOOP_LOG_DIR}
mkdir -p ${HADOOP_LOG_DIR}/hdfs
mkdir -p ${HADOOP_LOG_DIR}/mapred
chown hdfs:hadoop ${HADOOP_HDFS_DIR}
chown hdfs:hadoop ${HADOOP_NN_DIR}
chown hdfs:hadoop ${HADOOP_DN_DIR}
chown mapred:hadoop ${HADOOP_MAPRED_DIR}
chown root:hadoop ${HADOOP_LOG_DIR}
chmod 775 ${HADOOP_LOG_DIR}
chown hdfs:hadoop ${HADOOP_LOG_DIR}/hdfs
chown mapred:hadoop ${HADOOP_LOG_DIR}/mapred
echo "Configuration setup is completed."
if [[ "$HADOOP_NN_HOST" =~ "`hostname`" ]]; then
echo "Proceed to run hadoop-setup-hdfs.sh on namenode."
fi
else
echo
echo "Configuration file has been generated, please copy:"
echo
echo "core-site.xml"
echo "hdfs-site.xml"
echo "mapred-site.xml"
echo "hadoop-env.sh"
echo
echo " to ${HADOOP_CONF_DIR} on all nodes, and proceed to run hadoop-setup-hdfs.sh on namenode."
fi

View File

@ -0,0 +1,96 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
if [ "$HADOOP_HOME" != "" ]; then
echo "Warning: \$HADOOP_HOME is deprecated."
echo
fi
. "$bin"/../libexec/hadoop-config.sh
usage() {
echo "
usage: $0 <parameters>
Require parameter:
-c <clusterid> Set cluster identifier for HDFS
Optional parameters:
-h Display this message
"
exit 1
}
if [ $# != 2 ] ; then
usage
exit 1
fi
while getopts "hc:" OPTION
do
case $OPTION in
c)
SETUP_CLUSTER=$2; shift 2
;;
h)
usage
;;
--)
shift ; break
;;
*)
echo "Unknown option: $1"
usage
exit 1
;;
esac
done
export HADOOP_PREFIX
export HADOOP_CONF_DIR
export SETUP_CLUSTER
# Start namenode and initialize file system structure
echo "Setup Hadoop Distributed File System"
echo
echo "Formatting namenode"
echo
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} namenode -format -clusterid ${SETUP_CLUSTER}' hdfs
echo
echo "Starting namenode process"
echo
/etc/init.d/hadoop-namenode start
echo
echo "Initialize HDFS file system: "
echo
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -mkdir /jobtracker' hdfs
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -chown mapred:mapred /jobtracker' hdfs
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -mkdir /user/mapred' hdfs
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -chown mapred:mapred /user/mapred' hdfs
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -mkdir /tmp' hdfs
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -chmod 777 /tmp' hdfs
if [ $? -eq 0 ]; then
echo "Completed."
else
echo "Unknown error occurred, check hadoop logs for details."
fi
echo
echo "Please startup datanode processes: /etc/init.d/hadoop-datanode start"

View File

@ -0,0 +1,212 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Script for setup HDFS file system for single node deployment
bin=`which $0`
bin=`dirname ${bin}`
bin=`cd "$bin"; pwd`
export HADOOP_PREFIX=${bin}/..
if [ -e /etc/hadoop/hadoop-env.sh ]; then
. /etc/hadoop/hadoop-env.sh
fi
usage() {
echo "
usage: $0 <parameters>
Optional parameters:
--default Setup system as default
-h Display this message
"
exit 1
}
# Parse script parameters
OPTS=$(getopt \
-n $0 \
-o '' \
-l 'default' \
-- "$@")
if [ $? != 0 ] ; then
usage
fi
eval set -- "${OPTS}"
while true ; do
case "$1" in
--default)
AUTOMATED=1; shift
;;
-h)
usage
;;
--)
shift ; break
;;
*)
echo "Unknown option: $1"
usage
exit 1
;;
esac
done
# Interactive setup wizard
if [ "${AUTOMATED}" != "1" ]; then
echo "Welcome to Hadoop single node setup wizard"
echo
echo -n "Would you like to use default single node configuration? (y/n) "
read SET_CONFIG
echo -n "Would you like to format name node? (y/n) "
read SET_FORMAT
echo -n "Would you like to setup default directory structure? (y/n) "
read SET_MKDIR
echo -n "Would you like to start up Hadoop? (y/n) "
read STARTUP
echo -n "Would you like to start up Hadoop on reboot? (y/n) "
read SET_REBOOT
echo
echo "Review your choices:"
echo
echo "Setup single node configuration : ${SET_CONFIG}"
echo "Format namenode : ${SET_FORMAT}"
echo "Setup default file system structure: ${SET_MKDIR}"
echo "Start up Hadoop : ${STARTUP}"
echo "Start up Hadoop on reboot : ${SET_REBOOT}"
echo
echo -n "Proceed with setup? (y/n) "
read CONFIRM
if [ "${CONFIRM}" != "y" ]; then
echo "User aborted setup, exiting..."
exit 1
fi
else
SET_CONFIG="y"
SET_FORMAT="y"
SET_MKDIR="y"
STARTUP="y"
SET_REBOOT="y"
fi
AUTOMATED=${AUTOMATED:-0}
SET_CONFIG=${SET_CONFIG:-y}
SET_FORMAT=${SET_FORMAT:-n}
SET_MKDIR=${SET_MKDIR:-y}
STARTUP=${STARTUP:-y}
SET_REBOOT=${SET_REBOOT:-y}
# Make sure system is not already started
/etc/init.d/hadoop-namenode stop 2>/dev/null >/dev/null
/etc/init.d/hadoop-datanode stop 2>/dev/null >/dev/null
/etc/init.d/hadoop-jobtracker stop 2>/dev/null >/dev/null
/etc/init.d/hadoop-tasktracker stop 2>/dev/null >/dev/null
# Default settings
JAVA_HOME=${JAVA_HOME:-/usr/java/default}
HADOOP_NN_HOST=${HADOOP_NN_HOST:-hdfs://localhost:9000/}
HADOOP_NN_DIR=${HADOOP_NN_DIR:-/var/lib/hadoop/hdfs/namenode}
HADOOP_DN_DIR=${HADOOP_DN_DIR:-/var/lib/hadoop/hdfs/datanode}
HADOOP_JT_HOST=${HADOOP_JT_HOST:-localhost:9001}
HADOOP_HDFS_DIR=${HADOOP_MAPRED_DIR:-/var/lib/hadoop/hdfs}
HADOOP_MAPRED_DIR=${HADOOP_MAPRED_DIR:-/var/lib/hadoop/mapred}
HADOOP_LOG_DIR="/var/log/hadoop"
HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop}
HADOOP_REPLICATION=${HADOOP_RELICATION:-1}
HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler}
# Setup config files
if [ "${SET_CONFIG}" == "y" ]; then
${HADOOP_PREFIX}/sbin/hadoop-setup-conf.sh --auto \
--conf-dir=${HADOOP_CONF_DIR} \
--datanode-dir=${HADOOP_DN_DIR} \
--hdfs-dir=${HADOOP_HDFS_DIR} \
--jobtracker-url=${HADOOP_JT_HOST} \
--log-dir=${HADOOP_LOG_DIR} \
--mapred-dir=${HADOOP_MAPRED_DIR} \
--namenode-dir=${HADOOP_NN_DIR} \
--namenode-url=${HADOOP_NN_HOST} \
--replication=${HADOOP_REPLICATION}
fi
export HADOOP_CONF_DIR
# Format namenode
if [ ! -e ${HADOOP_NN_DIR} ]; then
rm -rf ${HADOOP_HDFS_DIR} 2>/dev/null >/dev/null
mkdir -p ${HADOOP_HDFS_DIR}
chmod 755 ${HADOOP_HDFS_DIR}
chown hdfs:hadoop ${HADOOP_HDFS_DIR}
su -c '${HADOOP_PREFIX}/bin/hdfs --config ${HADOOP_CONF_DIR} namenode -format -clusterid hadoop' hdfs
elif [ "${SET_FORMAT}" == "y" ]; then
rm -rf ${HADOOP_HDFS_DIR} 2>/dev/null >/dev/null
mkdir -p ${HADOOP_HDFS_DIR}
chmod 755 ${HADOOP_HDFS_DIR}
chown hdfs:hadoop ${HADOOP_HDFS_DIR}
rm -rf /var/lib/hadoop/hdfs/namenode
su -c '${HADOOP_PREFIX}/bin/hdfs --config ${HADOOP_CONF_DIR} namenode -format -clusterid hadoop' hdfs
fi
# Start hdfs service
/etc/init.d/hadoop-namenode start
/etc/init.d/hadoop-datanode start
# Initialize file system structure
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -mkdir /user/mapred' hdfs
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -chown mapred:mapred /user/mapred' hdfs
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -mkdir /tmp' hdfs
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -chmod 777 /tmp' hdfs
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -mkdir /jobtracker' hdfs
su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -chown mapred:mapred /jobtracker' hdfs
# Start mapreduce service
/etc/init.d/hadoop-jobtracker start
/etc/init.d/hadoop-tasktracker start
# Toggle service startup on reboot
if [ "${SET_REBOOT}" == "y" ]; then
if [ -e /etc/debian_version ]; then
ln -sf ../init.d/hadoop-namenode /etc/rc2.d/S90hadoop-namenode
ln -sf ../init.d/hadoop-datanode /etc/rc2.d/S91hadoop-datanode
ln -sf ../init.d/hadoop-jobtracker /etc/rc2.d/S92hadoop-jobtracker
ln -sf ../init.d/hadoop-tasktracker /etc/rc2.d/S93hadoop-tasktracker
ln -sf ../init.d/hadoop-namenode /etc/rc6.d/S10hadoop-namenode
ln -sf ../init.d/hadoop-datanode /etc/rc6.d/S11hadoop-datanode
ln -sf ../init.d/hadoop-jobtracker /etc/rc6.d/S12hadoop-jobtracker
ln -sf ../init.d/hadoop-tasktracker /etc/rc6.d/S13hadoop-tasktracker
elif [ -e /etc/redhat-release ]; then
/sbin/chkconfig hadoop-namenode --add
/sbin/chkconfig hadoop-datanode --add
/sbin/chkconfig hadoop-jobtracker --add
/sbin/chkconfig hadoop-tasktracker --add
/sbin/chkconfig hadoop-namenode on
/sbin/chkconfig hadoop-datanode on
/sbin/chkconfig hadoop-jobtracker on
/sbin/chkconfig hadoop-tasktracker on
fi
fi
# Shutdown service, if user choose to stop services after setup
if [ "${STARTUP}" != "y" ]; then
/etc/init.d/hadoop-namenode stop
/etc/init.d/hadoop-datanode stop
/etc/init.d/hadoop-jobtracker stop
/etc/init.d/hadoop-tasktracker stop
fi

View File

@ -0,0 +1,84 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Starts a Hadoop datanode
#
# chkconfig: 2345 90 10
# description: Hadoop datanode
source /etc/rc.d/init.d/functions
source /etc/default/hadoop-env.sh
RETVAL=0
PIDFILE="${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid"
desc="Hadoop datanode daemon"
start() {
echo -n $"Starting $desc (hadoop-datanode): "
daemon --user hdfs ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" start datanode
RETVAL=$?
echo
[ $RETVAL -eq 0 ] && touch /var/lock/subsys/hadoop-datanode
return $RETVAL
}
stop() {
echo -n $"Stopping $desc (hadoop-datanode): "
daemon --user hdfs ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" stop datanode
RETVAL=$?
sleep 5
echo
[ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/hadoop-datanode $PIDFILE
}
restart() {
stop
start
}
checkstatus(){
status -p $PIDFILE ${JAVA_HOME}/bin/java
RETVAL=$?
}
condrestart(){
[ -e /var/lock/subsys/hadoop-datanode ] && restart || :
}
case "$1" in
start)
start
;;
stop)
stop
;;
status)
checkstatus
;;
restart)
restart
;;
condrestart)
condrestart
;;
*)
echo $"Usage: $0 {start|stop|status|restart|condrestart}"
exit 1
esac
exit $RETVAL

View File

@ -0,0 +1,84 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Starts a Hadoop jobtracker
#
# chkconfig: 2345 90 10
# description: Hadoop jobtracker
source /etc/rc.d/init.d/functions
source /etc/default/hadoop-env.sh
RETVAL=0
PIDFILE="${HADOOP_PID_DIR}/hadoop-mapred-jobtracker.pid"
desc="Hadoop jobtracker daemon"
start() {
echo -n $"Starting $desc (hadoop-jobtracker): "
daemon --user mapred ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" start jobtracker
RETVAL=$?
echo
[ $RETVAL -eq 0 ] && touch /var/lock/subsys/hadoop-jobtracker
return $RETVAL
}
stop() {
echo -n $"Stopping $desc (hadoop-jobtracker): "
daemon --user mapred ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" stop jobtracker
RETVAL=$?
sleep 5
echo
[ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/hadoop-jobtracker $PIDFILE
}
restart() {
stop
start
}
checkstatus(){
status -p $PIDFILE ${JAVA_HOME}/bin/java
RETVAL=$?
}
condrestart(){
[ -e /var/lock/subsys/hadoop-jobtracker ] && restart || :
}
case "$1" in
start)
start
;;
stop)
stop
;;
status)
checkstatus
;;
restart)
restart
;;
condrestart)
condrestart
;;
*)
echo $"Usage: $0 {start|stop|status|restart|condrestart}"
exit 1
esac
exit $RETVAL

View File

@ -0,0 +1,98 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Starts a Hadoop namenode
#
# chkconfig: 2345 90 10
# description: Hadoop namenode
source /etc/rc.d/init.d/functions
source /etc/default/hadoop-env.sh
RETVAL=0
PIDFILE="${HADOOP_PID_DIR}/hadoop-hdfs-namenode.pid"
desc="Hadoop namenode daemon"
start() {
echo -n $"Starting $desc (hadoop-namenode): "
daemon --user hdfs ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" start namenode $1
RETVAL=$?
echo
[ $RETVAL -eq 0 ] && touch /var/lock/subsys/hadoop-namenode
return $RETVAL
}
upgrade() {
start -upgrade
}
stop() {
echo -n $"Stopping $desc (hadoop-namenode): "
daemon --user hdfs ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" stop namenode
RETVAL=$?
sleep 5
echo
[ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/hadoop-namenode $PIDFILE
}
checkstatus(){
status -p $PIDFILE ${JAVA_HOME}/bin/java
RETVAL=$?
}
restart() {
stop
start
}
condrestart(){
[ -e /var/lock/subsys/hadoop-namenode ] && restart || :
}
format() {
daemon --user hdfs ${HADOOP_PREFIX}/bin/hadoop namenode -format
}
case "$1" in
start)
start
;;
upgrade)
upgrade
;;
format)
format
;;
stop)
stop
;;
status)
checkstatus
;;
restart)
restart
;;
condrestart|try-restart)
condrestart
;;
*)
echo $"Usage: $0 {start|stop|status|restart|try-restart|upgrade}"
exit 1
esac
exit $RETVAL

View File

@ -0,0 +1,84 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Starts a Hadoop tasktracker
#
# chkconfig: 2345 90 10
# description: Hadoop tasktracker
source /etc/rc.d/init.d/functions
source /etc/default/hadoop-env.sh
RETVAL=0
PIDFILE="${HADOOP_PID_DIR}/hadoop-mapred-tasktracker.pid"
desc="Hadoop tasktracker daemon"
start() {
echo -n $"Starting $desc (hadoop-tasktracker): "
daemon --user mapred ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" start tasktracker
RETVAL=$?
echo
[ $RETVAL -eq 0 ] && touch /var/lock/subsys/hadoop-tasktracker
return $RETVAL
}
stop() {
echo -n $"Stopping $desc (hadoop-tasktracker): "
daemon --user mapred ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" stop tasktracker
RETVAL=$?
sleep 5
echo
[ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/hadoop-tasktracker $PIDFILE
}
restart() {
stop
start
}
checkstatus(){
status -p $PIDFILE ${JAVA_HOME}/bin/java
RETVAL=$?
}
condrestart(){
[ -e /var/lock/subsys/hadoop-tasktracker ] && restart || :
}
case "$1" in
start)
start
;;
stop)
stop
;;
status)
checkstatus
;;
restart)
restart
;;
condrestart)
condrestart
;;
*)
echo $"Usage: $0 {start|stop|status|restart|condrestart}"
exit 1
esac
exit $RETVAL

View File

@ -0,0 +1,173 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# RPM Spec file for Hadoop version @version@
#
%define name hadoop-common
%define version @version@
%define release @package.release@
# Installation Locations
%define _prefix @package.prefix@
%define _bin_dir %{_prefix}/bin
%define _conf_dir @package.conf.dir@
%define _lib_dir %{_prefix}/lib
%define _lib64_dir %{_prefix}/lib64
%define _libexec_dir %{_prefix}/libexec
%define _log_dir @package.log.dir@
%define _pid_dir @package.pid.dir@
%define _sbin_dir %{_prefix}/sbin
%define _share_dir %{_prefix}/share
%define _var_dir @package.var.dir@
# Build time settings
%define _build_dir @package.build.dir@
%define _final_name @final.name@
%define debug_package %{nil}
# Disable brp-java-repack-jars for aspect J
%define __os_install_post \
/usr/lib/rpm/redhat/brp-compress \
%{!?__debug_package:/usr/lib/rpm/redhat/brp-strip %{__strip}} \
/usr/lib/rpm/redhat/brp-strip-static-archive %{__strip} \
/usr/lib/rpm/redhat/brp-strip-comment-note %{__strip} %{__objdump} \
/usr/lib/rpm/brp-python-bytecompile %{nil}
# RPM searches perl files for dependancies and this breaks for non packaged perl lib
# like thrift so disable this
%define _use_internal_dependency_generator 0
%ifarch i386
%global hadoop_arch Linux-i386-32
%endif
%ifarch amd64 x86_64
%global hadoop_arch Linux-amd64-64
%endif
%ifarch noarch
%global hadoop_arch ""
%endif
Summary: The Apache Hadoop project develops open-source software for reliable, scalable, distributed computing
License: Apache License, Version 2.0
URL: http://hadoop.apache.org/core/
Vendor: Apache Software Foundation
Group: Development/Libraries
Name: %{name}
Version: %{version}
Release: %{release}
Source0: %{_final_name}-bin.tar.gz
Prefix: %{_prefix}
Prefix: %{_conf_dir}
Prefix: %{_log_dir}
Prefix: %{_pid_dir}
Buildroot: %{_build_dir}
Requires: sh-utils, textutils, /usr/sbin/useradd, /usr/sbin/usermod, /sbin/chkconfig, /sbin/service, jdk >= 1.6
AutoReqProv: no
Provides: hadoop
%description
The Apache Hadoop project develops open-source software for reliable, scalable,
distributed computing. Hadoop includes these subprojects:
Hadoop Common: The common utilities that support the other Hadoop subprojects.
%prep
%setup -n %{_final_name}
%build
if [ -d ${RPM_BUILD_DIR}%{_prefix} ]; then
rm -rf ${RPM_BUILD_DIR}%{_prefix}
fi
if [ -d ${RPM_BUILD_DIR}%{_log_dir} ]; then
rm -rf ${RPM_BUILD_DIR}%{_log_dir}
fi
if [ -d ${RPM_BUILD_DIR}%{_conf_dir} ]; then
rm -rf ${RPM_BUILD_DIR}%{_conf_dir}
fi
if [ -d ${RPM_BUILD_DIR}%{_pid_dir} ]; then
rm -rf ${RPM_BUILD_DIR}%{_pid_dir}
fi
mkdir -p ${RPM_BUILD_DIR}%{_prefix}
mkdir -p ${RPM_BUILD_DIR}%{_bin_dir}
mkdir -p ${RPM_BUILD_DIR}%{_lib_dir}
%ifarch amd64 x86_64
mkdir -p ${RPM_BUILD_DIR}%{_lib64_dir}
%endif
mkdir -p ${RPM_BUILD_DIR}%{_libexec_dir}
mkdir -p ${RPM_BUILD_DIR}%{_log_dir}
mkdir -p ${RPM_BUILD_DIR}%{_conf_dir}
mkdir -p ${RPM_BUILD_DIR}%{_pid_dir}
mkdir -p ${RPM_BUILD_DIR}%{_sbin_dir}
mkdir -p ${RPM_BUILD_DIR}%{_share_dir}
mkdir -p ${RPM_BUILD_DIR}%{_var_dir}
#########################
#### INSTALL SECTION ####
#########################
%install
mv ${RPM_BUILD_DIR}/%{_final_name}/bin/* ${RPM_BUILD_DIR}%{_bin_dir}
mv ${RPM_BUILD_DIR}/%{_final_name}/etc/hadoop/* ${RPM_BUILD_DIR}%{_conf_dir}
mv ${RPM_BUILD_DIR}/%{_final_name}/lib/* ${RPM_BUILD_DIR}%{_lib_dir}
mv ${RPM_BUILD_DIR}/%{_final_name}/libexec/* ${RPM_BUILD_DIR}%{_libexec_dir}
mv ${RPM_BUILD_DIR}/%{_final_name}/sbin/* ${RPM_BUILD_DIR}%{_sbin_dir}
mv ${RPM_BUILD_DIR}/%{_final_name}/share/* ${RPM_BUILD_DIR}%{_share_dir}
rm -rf ${RPM_BUILD_DIR}/%{_final_name}/etc
%pre
getent group hadoop 2>/dev/null >/dev/null || /usr/sbin/groupadd -r hadoop
%post
bash ${RPM_INSTALL_PREFIX0}/sbin/update-hadoop-env.sh \
--prefix=${RPM_INSTALL_PREFIX0} \
--bin-dir=${RPM_INSTALL_PREFIX0}/bin \
--sbin-dir=${RPM_INSTALL_PREFIX0}/sbin \
--conf-dir=${RPM_INSTALL_PREFIX1} \
--log-dir=${RPM_INSTALL_PREFIX2} \
--pid-dir=${RPM_INSTALL_PREFIX3}
%preun
bash ${RPM_INSTALL_PREFIX0}/sbin/update-hadoop-env.sh \
--prefix=${RPM_INSTALL_PREFIX0} \
--bin-dir=${RPM_INSTALL_PREFIX0}/bin \
--sbin-dir=${RPM_INSTALL_PREFIX0}/sbin \
--conf-dir=${RPM_INSTALL_PREFIX1} \
--log-dir=${RPM_INSTALL_PREFIX2} \
--pid-dir=${RPM_INSTALL_PREFIX3} \
--uninstall
%files
%defattr(-,root,root)
%attr(0755,root,hadoop) %{_log_dir}
%attr(0775,root,hadoop) %{_pid_dir}
%config(noreplace) %{_conf_dir}/configuration.xsl
%config(noreplace) %{_conf_dir}/core-site.xml
%config(noreplace) %{_conf_dir}/hadoop-env.sh
%config(noreplace) %{_conf_dir}/hadoop-metrics.properties
%config(noreplace) %{_conf_dir}/hadoop-metrics2.properties
%config(noreplace) %{_conf_dir}/hadoop-policy.xml
%config(noreplace) %{_conf_dir}/log4j.properties
%config(noreplace) %{_conf_dir}/masters
%config(noreplace) %{_conf_dir}/slaves
%{_conf_dir}/hadoop-env.sh.template
%{_conf_dir}/ssl-client.xml.example
%{_conf_dir}/ssl-server.xml.example
%{_prefix}

View File

@ -0,0 +1,11 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.default.name</name>
<value>${HADOOP_NN_HOST}</value>
</property>
</configuration>

View File

@ -0,0 +1,168 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script configures hadoop-env.sh and symlinkis directories for
# relocating RPM locations.
usage() {
echo "
usage: $0 <parameters>
Required parameters:
--prefix=PREFIX path to install into
Optional parameters:
--arch=i386 OS Architecture
--bin-dir=PREFIX/bin Executable directory
--conf-dir=/etc/hadoop Configuration directory
--log-dir=/var/log/hadoop Log directory
--pid-dir=/var/run PID file location
--sbin-dir=PREFIX/sbin System executable directory
"
exit 1
}
template_generator() {
REGEX='(\$\{[a-zA-Z_][a-zA-Z_0-9]*\})'
cat $1 |
while read line ; do
while [[ "$line" =~ $REGEX ]] ; do
LHS=${BASH_REMATCH[1]}
RHS="$(eval echo "\"$LHS\"")"
line=${line//$LHS/$RHS}
done
echo $line >> $2
done
}
OPTS=$(getopt \
-n $0 \
-o '' \
-l 'arch:' \
-l 'prefix:' \
-l 'bin-dir:' \
-l 'conf-dir:' \
-l 'lib-dir:' \
-l 'log-dir:' \
-l 'pid-dir:' \
-l 'sbin-dir:' \
-l 'uninstall' \
-- "$@")
if [ $? != 0 ] ; then
usage
fi
eval set -- "${OPTS}"
while true ; do
case "$1" in
--arch)
ARCH=$2 ; shift 2
;;
--prefix)
PREFIX=$2 ; shift 2
;;
--bin-dir)
BIN_DIR=$2 ; shift 2
;;
--log-dir)
LOG_DIR=$2 ; shift 2
;;
--lib-dir)
LIB_DIR=$2 ; shift 2
;;
--conf-dir)
CONF_DIR=$2 ; shift 2
;;
--pid-dir)
PID_DIR=$2 ; shift 2
;;
--sbin-dir)
SBIN_DIR=$2 ; shift 2
;;
--uninstall)
UNINSTALL=1; shift
;;
--)
shift ; break
;;
*)
echo "Unknown option: $1"
usage
exit 1
;;
esac
done
for var in PREFIX; do
if [ -z "$(eval "echo \$$var")" ]; then
echo Missing param: $var
usage
fi
done
ARCH=${ARCH:-i386}
HADOOP_PREFIX=$PREFIX
HADOOP_BIN_DIR=${BIN_DIR:-$PREFIX/bin}
HADOOP_CONF_DIR=${CONF_DIR:-$PREFIX/etc/hadoop}
HADOOP_LIB_DIR=${LIB_DIR:-$PREFIX/lib}
HADOOP_LOG_DIR=${LOG_DIR:-$PREFIX/var/log}
HADOOP_PID_DIR=${PID_DIR:-$PREFIX/var/run}
HADOOP_SBIN_DIR=${SBIN_DIR:-$PREFIX/sbin}
UNINSTALL=${UNINSTALL:-0}
if [ "${ARCH}" != "i386" ]; then
HADOOP_LIB_DIR=${HADOOP_LIB_DIR}64
fi
if [ "${UNINSTALL}" -eq "1" ]; then
# Remove symlinks
if [ "${HADOOP_CONF_DIR}" != "${HADOOP_PREFIX}/etc/hadoop" ]; then
rm -rf ${HADOOP_PREFIX}/etc/hadoop
fi
rm -f /etc/default/hadoop-env.sh
rm -f /etc/profile.d/hadoop-env.sh
else
# Create symlinks
if [ "${HADOOP_CONF_DIR}" != "${HADOOP_PREFIX}/etc/hadoop" ]; then
mkdir -p ${HADOOP_PREFIX}/etc
ln -sf ${HADOOP_CONF_DIR} ${HADOOP_PREFIX}/etc/hadoop
fi
ln -sf ${HADOOP_CONF_DIR}/hadoop-env.sh /etc/default/hadoop-env.sh
ln -sf ${HADOOP_CONF_DIR}/hadoop-env.sh /etc/profile.d/hadoop-env.sh
mkdir -p ${HADOOP_LOG_DIR}
chown root:hadoop ${HADOOP_LOG_DIR}
chmod 775 ${HADOOP_LOG_DIR}
if [ ! -d ${HADOOP_PID_DIR} ]; then
mkdir -p ${HADOOP_PID_DIR}
chown root:hadoop ${HADOOP_PID_DIR}
chmod 775 ${HADOOP_PID_DIR}
fi
TFILE="/tmp/$(basename $0).$$.tmp"
if [ -z "${JAVA_HOME}" ]; then
if [ -e /etc/debian_version ]; then
JAVA_HOME=`update-alternatives --config java | grep java | cut -f2 -d':' | cut -f2 -d' ' | sed -e 's/\/bin\/java//'`
else
JAVA_HOME=/usr/java/default
fi
fi
template_generator ${HADOOP_CONF_DIR}/hadoop-env.sh.template $TFILE
cp ${TFILE} ${CONF_DIR}/hadoop-env.sh
rm -f ${TFILE}
fi

View File

@ -3349,11 +3349,11 @@ fi
done
#check for HADOOP_HOME
#check for HADOOP_PREFIX
if test "$with_home" != ""
then
cat >>confdefs.h <<_ACEOF
#define HADOOP_HOME "$with_home"
#define HADOOP_PREFIX "$with_home"
_ACEOF
fi

View File

@ -40,10 +40,10 @@ AC_PROG_CC
AC_HEADER_STDC
AC_CHECK_HEADERS([stdlib.h string.h unistd.h fcntl.h])
#check for HADOOP_HOME
#check for HADOOP_PREFIX
if test "$with_home" != ""
then
AC_DEFINE_UNQUOTED(HADOOP_HOME,"$with_home")
AC_DEFINE_UNQUOTED(HADOOP_PREFIX,"$with_home")
fi
# Checks for typedefs, structures, and compiler characteristics.

View File

@ -86,7 +86,7 @@ int process_cluster_command(char * user, char * node , char *command) {
}
len = STRLEN + strlen(command);
finalcommandstr = (char *) malloc((len + 1) * sizeof(char));
snprintf(finalcommandstr, len, SCRIPT_DIR_PATTERN, HADOOP_HOME,
snprintf(finalcommandstr, len, SCRIPT_DIR_PATTERN, HADOOP_PREFIX,
command);
finalcommandstr[len + 1] = '\0';
errorcode = switchuser(user);

View File

@ -39,13 +39,13 @@ enum errorcodes {
INVALID_COMMAND_PASSED, //6
};
#undef HADOOP_HOME
#undef HADOOP_PREFIX
#define SSH_COMMAND "ssh"
#define SCRIPT_DIR_PATTERN "%s/bin/hadoop-daemon.sh %s" //%s to be substituded
#define STRLEN strlen(SCRIPT_DIR_PATTERN) + strlen(HADOOP_HOME)
#define STRLEN strlen(SCRIPT_DIR_PATTERN) + strlen(HADOOP_PREFIX)
/*
* Function to get the user details populated given a user name.

View File

@ -59,7 +59,7 @@ public abstract class HadoopDaemonRemoteCluster
public static final String CONF_HADOOPNEWCONFDIR =
"test.system.hdrc.hadoopnewconfdir";
/**
* Key used to configure the HADOOP_HOME to be used by the
* Key used to configure the HADOOP_PREFIX to be used by the
* HadoopDaemonRemoteCluster.
*/
public final static String CONF_HADOOPHOME =
@ -188,7 +188,7 @@ public abstract class HadoopDaemonRemoteCluster
if (hadoopHome == null || hadoopConfDir == null || hadoopHome.isEmpty()
|| hadoopConfDir.isEmpty()) {
LOG.error("No configuration "
+ "for the HADOOP_HOME and HADOOP_CONF_DIR passed");
+ "for the HADOOP_PREFIX and HADOOP_CONF_DIR passed");
throw new IllegalArgumentException(
"No Configuration passed for hadoop home " +
"and hadoop conf directories");