#!/usr/bin/env bash # Script that is run on each EC2 instance on boot. It is passed in the EC2 user # data, so should not exceed 16K in size. MASTER_HOST="%MASTER_HOST%" ZOOKEEPER_QUORUM="%ZOOKEEPER_QUORUM%" NUM_SLAVES="%NUM_SLAVES%" EXTRA_PACKAGES="%EXTRA_PACKAGES%" SECURITY_GROUPS=`wget -q -O - http://169.254.169.254/latest/meta-data/security-groups` IS_MASTER=`echo $SECURITY_GROUPS | awk '{ a = match ($0, "-master$"); if (a) print "true"; else print "false"; }'` if [ "$IS_MASTER" = "true" ]; then MASTER_HOST=`wget -q -O - http://169.254.169.254/latest/meta-data/local-hostname` fi HADOOP_HOME=`ls -d /usr/local/hadoop-*` HADOOP_VERSION=`echo $HADOOP_HOME | cut -d '-' -f 2` HBASE_HOME=`ls -d /usr/local/hbase-*` HBASE_VERSION=`echo $HBASE_HOME | cut -d '-' -f 2` export USER="root" # up file-max sysctl -w fs.file-max=32768 # up ulimits echo "root soft nofile 32768" >> /etc/security/limits.conf echo "root hard nofile 32768" >> /etc/security/limits.conf # up epoll limits; ok if this fails, only valid for kernels 2.6.27+ sysctl -w fs.epoll.max_user_instances=32768 > /dev/null 2>&1 [ ! -f /etc/hosts ] && echo "127.0.0.1 localhost" > /etc/hosts # Extra packages if [ "$EXTRA_PACKAGES" != "" ] ; then # format should be ... pkg=( $EXTRA_PACKAGES ) wget -nv -O /etc/yum.repos.d/user.repo ${pkg[0]} yum -y update yum yum -y install ${pkg[@]:1} fi # Ganglia if [ "$IS_MASTER" = "true" ]; then sed -i -e "s|\( *mcast_join *=.*\)|#\1|" \ -e "s|\( *bind *=.*\)|#\1|" \ -e "s|\( *mute *=.*\)| mute = yes|" \ -e "s|\( *location *=.*\)| location = \"master-node\"|" \ /etc/gmond.conf mkdir -p /mnt/ganglia/rrds chown -R ganglia:ganglia /mnt/ganglia/rrds rm -rf /var/lib/ganglia; cd /var/lib; ln -s /mnt/ganglia ganglia; cd service gmond start service gmetad start apachectl start else sed -i -e "s|\( *mcast_join *=.*\)|#\1|" \ -e "s|\( *bind *=.*\)|#\1|" \ -e "s|\(udp_send_channel {\)|\1\n host=$MASTER_HOST|" \ /etc/gmond.conf service gmond start fi # Reformat sdb as xfs umount /mnt mkfs.xfs -f /dev/sdb mount -o noatime /dev/sdb /mnt # Probe for additional instance volumes # /dev/sdb as /mnt is always set up by base image DFS_NAME_DIR="/mnt/hadoop/dfs/name" DFS_DATA_DIR="/mnt/hadoop/dfs/data" i=2 for d in c d e f g h i j k l m n o p q r s t u v w x y z; do m="/mnt${i}" mkdir -p $m mkfs.xfs -f /dev/sd${d} if [ $? -eq 0 ] ; then mount -o noatime /dev/sd${d} $m > /dev/null 2>&1 if [ $i -lt 3 ] ; then # no more than two namedirs DFS_NAME_DIR="${DFS_NAME_DIR},${m}/hadoop/dfs/name" fi DFS_DATA_DIR="${DFS_DATA_DIR},${m}/hadoop/dfs/data" i=$(( i + 1 )) fi done # Hadoop configuration (cd /usr/local && ln -s $HADOOP_HOME hadoop) || true cat > $HADOOP_HOME/conf/core-site.xml < hadoop.tmp.dir /mnt/hadoop fs.default.name hdfs://$MASTER_HOST:8020 EOF cat > $HADOOP_HOME/conf/hdfs-site.xml < fs.default.name hdfs://$MASTER_HOST:8020 dfs.name.dir $DFS_NAME_DIR dfs.data.dir $DFS_DATA_DIR dfs.replication 3 dfs.datanode.handler.count 10 dfs.datanode.max.xcievers 4096 EOF cat > $HADOOP_HOME/conf/mapred-site.xml < mapred.job.tracker $MASTER_HOST:8021 io.compression.codecs org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec io.compression.codec.lzo.class com.hadoop.compression.lzo.LzoCodec mapred.map.tasks.speculative.execution false mapred.child.java.opts -Xmx512m -XX:+UseCompressedOops EOF # Add JVM options cat >> $HADOOP_HOME/conf/hadoop-env.sh <> $HADOOP_HOME/conf/hadoop-env.sh < $HADOOP_HOME/conf/hadoop-metrics.properties < $HBASE_HOME/conf/hbase-site.xml < hbase.rootdir hdfs://$MASTER_HOST:8020/hbase hbase.cluster.distributed true hbase.regions.server.count.min $NUM_SLAVES hbase.zookeeper.quorum $ZOOKEEPER_QUORUM hbase.regionserver.handler.count 100 hbase.hregion.memstore.block.multiplier 3 hbase.hstore.blockingStoreFiles 15 dfs.replication 3 dfs.client.block.write.retries 100 zookeeper.session.timeout 60000 hbase.tmp.dir /mnt/hbase EOF # Copy over mapred configuration for jobs started with 'hbase ...' cp $HADOOP_HOME/conf/mapred-site.xml $HBASE_HOME/conf/mapred-site.xml # Override JVM options cat >> $HBASE_HOME/conf/hbase-env.sh < $HBASE_HOME/conf/hadoop-metrics.properties <