diff --git a/CHANGES.txt b/CHANGES.txt index 2bcca600944..c0de674665a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -258,6 +258,9 @@ Release 0.22.0 - Unreleased HADOOP-6578. Configuration should trim whitespace around a lot of value types. (Michele Catasta via eli) + HADOOP-6811. Remove EC2 bash scripts. They are replaced by Apache Whirr + (incubating, http://incubator.apache.org/whirr). (tomwhite) + OPTIMIZATIONS HADOOP-6884. Add LOG.isDebugEnabled() guard for each LOG.debug(..). diff --git a/src/contrib/ec2/README.txt b/src/contrib/ec2/README.txt deleted file mode 100644 index 3ac8c99f47b..00000000000 --- a/src/contrib/ec2/README.txt +++ /dev/null @@ -1,15 +0,0 @@ -Hadoop EC2 - -NOTE: these scripts have been deprecated. See http://incubator.apache.org/whirr. - -This collection of scripts allows you to run Hadoop clusters on Amazon.com's Elastic Compute Cloud (EC2) service described at: - - http://aws.amazon.com/ec2 - -To get help, type the following in a shell: - - bin/hadoop-ec2 - -For full instructions, please visit the Hadoop wiki at: - - http://wiki.apache.org/hadoop/AmazonEC2#AutomatedScripts diff --git a/src/contrib/ec2/bin/cmd-hadoop-cluster b/src/contrib/ec2/bin/cmd-hadoop-cluster deleted file mode 100644 index 5678dee8f9a..00000000000 --- a/src/contrib/ec2/bin/cmd-hadoop-cluster +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Run commands on master or specified node of a running Hadoop EC2 cluster. - -set -o errexit - -# if no args specified, show usage -if [ $# = 0 ]; then - echo "Command required!" - exit 1 -fi - -# get arguments -COMMAND="$1" -shift -# get group -CLUSTER="$1" -shift - -if [ -z $CLUSTER ]; then - echo "Cluster name or instance id required!" - exit -1 -fi - -# Import variables -bin=`dirname "$0"` -bin=`cd "$bin"; pwd` -. "$bin"/hadoop-ec2-env.sh - -if [[ $CLUSTER == i-* ]]; then - HOST=`ec2-describe-instances $CLUSTER | grep running | awk '{print $4}'` - [ -z $HOST ] && echo "Instance still pending or no longer running: $CLUSTER" && exit -1 -else - [ ! -f $MASTER_IP_PATH ] && echo "Wrong group name, or cluster not launched! $CLUSTER" && exit -1 - HOST=`cat $MASTER_IP_PATH` -fi - -if [ "$COMMAND" = "login" ] ; then - echo "Logging in to host $HOST." - ssh $SSH_OPTS "root@$HOST" -elif [ "$COMMAND" = "proxy" ] ; then - echo "Proxying to host $HOST via local port 6666" - echo "Gangia: http://$HOST/ganglia" - echo "JobTracker: http://$HOST:50030/" - echo "NameNode: http://$HOST:50070/" - ssh $SSH_OPTS -D 6666 -N "root@$HOST" -elif [ "$COMMAND" = "push" ] ; then - echo "Pushing $1 to host $HOST." - scp $SSH_OPTS -r $1 "root@$HOST:" -elif [ "$COMMAND" = "screen" ] ; then - echo "Logging in and attaching screen on host $HOST." - ssh $SSH_OPTS -t "root@$HOST" 'screen -D -R' -else - echo "Executing command on host $HOST." - ssh $SSH_OPTS -t "root@$HOST" "$COMMAND" -fi \ No newline at end of file diff --git a/src/contrib/ec2/bin/create-hadoop-image b/src/contrib/ec2/bin/create-hadoop-image deleted file mode 100755 index 98064c2d314..00000000000 --- a/src/contrib/ec2/bin/create-hadoop-image +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Create a Hadoop AMI. -# Inspired by Jonathan Siegel's EC2 script (http://blogsiegel.blogspot.com/2006/08/sandboxing-amazon-ec2.html) - -set -o errexit - -# Import variables -bin=`dirname "$0"` -bin=`cd "$bin"; pwd` -. "$bin"/hadoop-ec2-env.sh - -AMI_IMAGE=`ec2-describe-images -a | grep $S3_BUCKET | grep $HADOOP_VERSION | grep $ARCH | grep available | awk '{print $2}'` - -[ ! -z $AMI_IMAGE ] && echo "AMI already registered, use: ec2-deregister $AMI_IMAGE" && exit -1 - -echo "Starting a AMI with ID $BASE_AMI_IMAGE." -OUTPUT=`ec2-run-instances $BASE_AMI_IMAGE -k $KEY_NAME -t $INSTANCE_TYPE` -BOOTING_INSTANCE=`echo $OUTPUT | awk '{print $6}'` - -echo "Instance is $BOOTING_INSTANCE." - -echo "Polling server status (ec2-describe-instances $BOOTING_INSTANCE)" -while true; do - printf "." - HOSTNAME=`ec2-describe-instances $BOOTING_INSTANCE | grep running | awk '{print $4}'` - if [ ! -z $HOSTNAME ]; then - break; - fi - sleep 1 -done - -echo "The server is available at $HOSTNAME." -while true; do - REPLY=`ssh $SSH_OPTS "root@$HOSTNAME" 'echo "hello"'` - if [ ! -z $REPLY ]; then - break; - fi - sleep 5 -done - -#read -p "Login first? [yes or no]: " answer - -if [ "$answer" == "yes" ]; then - ssh $SSH_OPTS "root@$HOSTNAME" -fi - -echo "Copying scripts." - -# Copy setup scripts -scp $SSH_OPTS "$bin"/hadoop-ec2-env.sh "root@$HOSTNAME:/mnt" -scp $SSH_OPTS "$bin"/image/create-hadoop-image-remote "root@$HOSTNAME:/mnt" -scp $SSH_OPTS "$bin"/image/ec2-run-user-data "root@$HOSTNAME:/etc/init.d" - -# Copy private key and certificate (for bundling image) -scp $SSH_OPTS $EC2_KEYDIR/pk*.pem "root@$HOSTNAME:/mnt" -scp $SSH_OPTS $EC2_KEYDIR/cert*.pem "root@$HOSTNAME:/mnt" - -# Connect to it -ssh $SSH_OPTS "root@$HOSTNAME" '/mnt/create-hadoop-image-remote' - -# Register image -ec2-register $S3_BUCKET/hadoop-$HADOOP_VERSION-$ARCH.manifest.xml - -echo "Terminate with: ec2-terminate-instances $BOOTING_INSTANCE" diff --git a/src/contrib/ec2/bin/delete-hadoop-cluster b/src/contrib/ec2/bin/delete-hadoop-cluster deleted file mode 100644 index 96e8d5b4d74..00000000000 --- a/src/contrib/ec2/bin/delete-hadoop-cluster +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Delete the groups and local files associated with a cluster. - -set -o errexit - -if [ -z $1 ]; then - echo "Cluster name required!" - exit -1 -fi - -CLUSTER=$1 - -# Finding Hadoop clusters -CLUSTERS=`ec2-describe-instances | \ - awk '"RESERVATION" == $1 && $4 ~ /-master$/, "INSTANCE" == $1' | tr '\n' '\t' | \ - grep "$CLUSTER" | grep running | cut -f4 | rev | cut -d'-' -f2- | rev` - -if [ -n "$CLUSTERS" ]; then - echo "Cluster $CLUSTER has running instances. Please terminate them first." - exit 0 -fi - -# Import variables -bin=`dirname "$0"` -bin=`cd "$bin"; pwd` -. "$bin"/hadoop-ec2-env.sh - -rm -f $MASTER_IP_PATH -rm -f $MASTER_PRIVATE_IP_PATH - -if ec2-describe-group $CLUSTER_MASTER > /dev/null 2>&1; then - if ec2-describe-group $CLUSTER > /dev/null 2>&1; then - echo "Revoking authorization between $CLUSTER_MASTER and $CLUSTER" - ec2-revoke $CLUSTER_MASTER -o $CLUSTER -u $AWS_ACCOUNT_ID || true - ec2-revoke $CLUSTER -o $CLUSTER_MASTER -u $AWS_ACCOUNT_ID || true - fi - echo "Deleting group $CLUSTER_MASTER" - ec2-delete-group $CLUSTER_MASTER -fi - -if ec2-describe-group $CLUSTER > /dev/null 2>&1; then - echo "Deleting group $CLUSTER" - ec2-delete-group $CLUSTER -fi diff --git a/src/contrib/ec2/bin/hadoop-ec2 b/src/contrib/ec2/bin/hadoop-ec2 deleted file mode 100644 index f98e4ccd008..00000000000 --- a/src/contrib/ec2/bin/hadoop-ec2 +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -o errexit - -bin=`dirname "$0"` -bin=`cd "$bin"; pwd` - -echo "DEPRECATED. See http://incubator.apache.org/whirr." >&2 - -# if no args specified, show usage -if [ $# = 0 ]; then - echo "Usage: hadoop-ec2 COMMAND" - echo "where COMMAND is one of:" - echo " list list all running Hadoop EC2 clusters" - echo " launch-cluster launch a cluster of Hadoop EC2 instances - launch-master then launch-slaves" - echo " launch-master launch or find a cluster master" - echo " launch-slaves launch the cluster slaves" - echo " terminate-cluster terminate all Hadoop EC2 instances" - echo " delete-cluster delete the group information for a terminated cluster" - echo " login login to the master node of the Hadoop EC2 cluster" - echo " screen start or attach 'screen' on the master node of the Hadoop EC2 cluster" - echo " proxy start a socks proxy on localhost:6666 (use w/foxyproxy)" - echo " push scp a file to the master node of the Hadoop EC2 cluster" - echo " execute any command remotely on the master" - echo " create-image create a Hadoop AMI" - exit 1 -fi - -# get arguments -COMMAND="$1" -shift - -if [ "$COMMAND" = "create-image" ] ; then - . "$bin"/create-hadoop-image $* -elif [ "$COMMAND" = "launch-cluster" ] ; then - . "$bin"/launch-hadoop-cluster $* -elif [ "$COMMAND" = "launch-master" ] ; then - . "$bin"/launch-hadoop-master $* -elif [ "$COMMAND" = "launch-slaves" ] ; then - . "$bin"/launch-hadoop-slaves $* -elif [ "$COMMAND" = "delete-cluster" ] ; then - . "$bin"/delete-hadoop-cluster $* -elif [ "$COMMAND" = "terminate-cluster" ] ; then - . "$bin"/terminate-hadoop-cluster $* -elif [ "$COMMAND" = "list" ] ; then - . "$bin"/list-hadoop-clusters -else - . "$bin"/cmd-hadoop-cluster "$COMMAND" $* -fi - diff --git a/src/contrib/ec2/bin/hadoop-ec2-env.sh.template b/src/contrib/ec2/bin/hadoop-ec2-env.sh.template deleted file mode 100644 index 9d8655e4a86..00000000000 --- a/src/contrib/ec2/bin/hadoop-ec2-env.sh.template +++ /dev/null @@ -1,93 +0,0 @@ -# Set environment variables for running Hadoop on Amazon EC2 here. All are required. - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Your Amazon Account Number. -AWS_ACCOUNT_ID= - -# Your Amazon AWS access key. -AWS_ACCESS_KEY_ID= - -# Your Amazon AWS secret access key. -AWS_SECRET_ACCESS_KEY= - -# Location of EC2 keys. -# The default setting is probably OK if you set up EC2 following the Amazon Getting Started guide. -EC2_KEYDIR=`dirname "$EC2_PRIVATE_KEY"` - -# The EC2 key name used to launch instances. -# The default is the value used in the Amazon Getting Started guide. -KEY_NAME=gsg-keypair - -# Where your EC2 private key is stored (created when following the Amazon Getting Started guide). -# You need to change this if you don't store this with your other EC2 keys. -PRIVATE_KEY_PATH=`echo "$EC2_KEYDIR"/"id_rsa-$KEY_NAME"` - -# SSH options used when connecting to EC2 instances. -SSH_OPTS=`echo -i "$PRIVATE_KEY_PATH" -o StrictHostKeyChecking=no -o ServerAliveInterval=30` - -# The version of Hadoop to use. -HADOOP_VERSION=0.19.0 - -# The Amazon S3 bucket where the Hadoop AMI is stored. -# The default value is for public images, so can be left if you are using running a public image. -# Change this value only if you are creating your own (private) AMI -# so you can store it in a bucket you own. -S3_BUCKET=hadoop-images - -# Enable public access to JobTracker and TaskTracker web interfaces -ENABLE_WEB_PORTS=true - -# The script to run on instance boot. -USER_DATA_FILE=hadoop-ec2-init-remote.sh - -# The EC2 instance type: m1.small, m1.large, m1.xlarge -INSTANCE_TYPE="m1.small" -#INSTANCE_TYPE="m1.large" -#INSTANCE_TYPE="m1.xlarge" -#INSTANCE_TYPE="c1.medium" -#INSTANCE_TYPE="c1.xlarge" - -# The EC2 group master name. CLUSTER is set by calling scripts -CLUSTER_MASTER=$CLUSTER-master - -# Cached values for a given cluster -MASTER_PRIVATE_IP_PATH=~/.hadooop-private-$CLUSTER_MASTER -MASTER_IP_PATH=~/.hadooop-$CLUSTER_MASTER -MASTER_ZONE_PATH=~/.hadooop-zone-$CLUSTER_MASTER - -# -# The following variables are only used when creating an AMI. -# - -# The version number of the installed JDK. -JAVA_VERSION=1.6.0_07 - -# SUPPORTED_ARCHITECTURES = ['i386', 'x86_64'] -# The download URL for the Sun JDK. Visit http://java.sun.com/javase/downloads/index.jsp and get the URL for the "Linux self-extracting file". -if [ "$INSTANCE_TYPE" == "m1.small" -o "$INSTANCE_TYPE" == "c1.medium" ]; then - ARCH='i386' - BASE_AMI_IMAGE="ami-2b5fba42" # ec2-public-images/fedora-8-i386-base-v1.07.manifest.xml - JAVA_BINARY_URL='' -else - ARCH='x86_64' - BASE_AMI_IMAGE="ami-2a5fba43" # ec2-public-images/fedora-8-x86_64-base-v1.07.manifest.xml - JAVA_BINARY_URL='' -fi - -if [ "$AMI_KERNEL" != "" ]; then - KERNEL_ARG="--kernel ${AMI_KERNEL}" -fi diff --git a/src/contrib/ec2/bin/hadoop-ec2-init-remote.sh b/src/contrib/ec2/bin/hadoop-ec2-init-remote.sh deleted file mode 100644 index 49ce37cf02c..00000000000 --- a/src/contrib/ec2/bin/hadoop-ec2-init-remote.sh +++ /dev/null @@ -1,171 +0,0 @@ -#!/usr/bin/env bash - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -################################################################################ -# Script that is run on each EC2 instance on boot. It is passed in the EC2 user -# data, so should not exceed 16K in size. -################################################################################ - -################################################################################ -# Initialize variables -################################################################################ - -# Slaves are started after the master, and are told its address by sending a -# modified copy of this file which sets the MASTER_HOST variable. -# A node knows if it is the master or not by inspecting the security group -# name. If it is the master then it retrieves its address using instance data. -MASTER_HOST=%MASTER_HOST% # Interpolated before being sent to EC2 node -SECURITY_GROUPS=`wget -q -O - http://169.254.169.254/latest/meta-data/security-groups` -IS_MASTER=`echo $SECURITY_GROUPS | awk '{ a = match ($0, "-master$"); if (a) print "true"; else print "false"; }'` -if [ "$IS_MASTER" == "true" ]; then - # use public hostnames for master. private hostnames can be used by substituting: - # MASTER_HOST=`wget -q -O - http://169.254.169.254/latest/meta-data/local-hostname` - MASTER_HOST=`wget -q -O - 'http://169.254.169.254/latest/meta-data/public-hostname'` -fi - -HADOOP_HOME=`ls -d /usr/local/hadoop-*` - -################################################################################ -# Hadoop configuration -# Modify this section to customize your Hadoop cluster. -################################################################################ - -cat > $HADOOP_HOME/conf/hadoop-site.xml < - - - - - - hadoop.tmp.dir - /mnt/hadoop - - - - fs.default.name - hdfs://$MASTER_HOST:50001 - - - - mapred.job.tracker - hdfs://$MASTER_HOST:50002 - - - - tasktracker.http.threads - 80 - - - - mapred.tasktracker.map.tasks.maximum - 3 - - - - mapred.tasktracker.reduce.tasks.maximum - 3 - - - - mapred.output.compress - true - - - - mapred.output.compression.type - BLOCK - - - - dfs.client.block.write.retries - 3 - - - - hadoop.rpc.socket.factory.class.default - org.apache.hadoop.net.StandardSocketFactory - true - - - -EOF - -# Configure Hadoop for Ganglia -# overwrite hadoop-metrics.properties -cat > $HADOOP_HOME/conf/hadoop-metrics.properties < /etc/hosts - -mkdir -p /mnt/hadoop/logs - -# not set on boot -export USER="root" - -if [ "$IS_MASTER" == "true" ]; then - # MASTER - # Prep Ganglia - sed -i -e "s|\( *mcast_join *=.*\)|#\1|" \ - -e "s|\( *bind *=.*\)|#\1|" \ - -e "s|\( *mute *=.*\)| mute = yes|" \ - -e "s|\( *location *=.*\)| location = \"master-node\"|" \ - /etc/gmond.conf - mkdir -p /mnt/ganglia/rrds - chown -R ganglia:ganglia /mnt/ganglia/rrds - rm -rf /var/lib/ganglia; cd /var/lib; ln -s /mnt/ganglia ganglia; cd - service gmond start - service gmetad start - apachectl start - - # Hadoop - # only format on first boot - [ ! -e /mnt/hadoop/dfs ] && "$HADOOP_HOME"/bin/hadoop namenode -format - - "$HADOOP_HOME"/bin/hadoop-daemon.sh start namenode - "$HADOOP_HOME"/bin/hadoop-daemon.sh start jobtracker -else - # SLAVE - # Prep Ganglia - sed -i -e "s|\( *mcast_join *=.*\)|#\1|" \ - -e "s|\( *bind *=.*\)|#\1|" \ - -e "s|\(udp_send_channel {\)|\1\n host=$MASTER_HOST|" \ - /etc/gmond.conf - service gmond start - - # Hadoop - "$HADOOP_HOME"/bin/hadoop-daemon.sh start datanode - "$HADOOP_HOME"/bin/hadoop-daemon.sh start tasktracker -fi - -# Run this script on next boot -rm -f /var/ec2/ec2-run-user-data.* diff --git a/src/contrib/ec2/bin/image/create-hadoop-image-remote b/src/contrib/ec2/bin/image/create-hadoop-image-remote deleted file mode 100755 index 4c67c02a094..00000000000 --- a/src/contrib/ec2/bin/image/create-hadoop-image-remote +++ /dev/null @@ -1,80 +0,0 @@ -#!/bin/sh - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# Create a Hadoop AMI. Runs on the EC2 instance. - -# Import variables -bin=`dirname "$0"` -bin=`cd "$bin"; pwd` -. "$bin"/hadoop-ec2-env.sh - -# Remove environment script since it contains sensitive information -rm -f "$bin"/hadoop-ec2-env.sh - -# Install Java -echo "Downloading and installing java binary." -cd /usr/local -wget -nv -O java.bin $JAVA_BINARY_URL -sh java.bin -rm -f java.bin - -# Install tools -echo "Installing rpms." -yum -y install rsync lynx screen ganglia-gmetad ganglia-gmond ganglia-web httpd php -yum -y clean all - -# Install Hadoop -echo "Installing Hadoop $HADOOP_VERSION." -cd /usr/local -wget -nv http://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz -[ ! -f hadoop-$HADOOP_VERSION.tar.gz ] && wget -nv http://www.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz -tar xzf hadoop-$HADOOP_VERSION.tar.gz -rm -f hadoop-$HADOOP_VERSION.tar.gz - -# Configure Hadoop -sed -i -e "s|# export JAVA_HOME=.*|export JAVA_HOME=/usr/local/jdk${JAVA_VERSION}|" \ - -e 's|# export HADOOP_LOG_DIR=.*|export HADOOP_LOG_DIR=/mnt/hadoop/logs|' \ - -e 's|# export HADOOP_SLAVE_SLEEP=.*|export HADOOP_SLAVE_SLEEP=1|' \ - -e 's|# export HADOOP_OPTS=.*|export HADOOP_OPTS=-server|' \ - /usr/local/hadoop-$HADOOP_VERSION/conf/hadoop-env.sh - -# Run user data as script on instance startup -chmod +x /etc/init.d/ec2-run-user-data -echo "/etc/init.d/ec2-run-user-data" >> /etc/rc.d/rc.local - -# Setup root user bash environment -echo "export JAVA_HOME=/usr/local/jdk${JAVA_VERSION}" >> /root/.bash_profile -echo "export HADOOP_HOME=/usr/local/hadoop-${HADOOP_VERSION}" >> /root/.bash_profile -echo 'export PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$PATH' >> /root/.bash_profile - -# Configure networking. -# Delete SSH authorized_keys since it includes the key it was launched with. (Note that it is re-populated when an instance starts.) -rm -f /root/.ssh/authorized_keys -# Ensure logging in to new hosts is seamless. -echo ' StrictHostKeyChecking no' >> /etc/ssh/ssh_config - -# Bundle and upload image -cd ~root -# Don't need to delete .bash_history since it isn't written until exit. -df -h -ec2-bundle-vol -d /mnt -k /mnt/pk*.pem -c /mnt/cert*.pem -u $AWS_ACCOUNT_ID -s 3072 -p hadoop-$HADOOP_VERSION-$ARCH -r $ARCH - -ec2-upload-bundle -b $S3_BUCKET -m /mnt/hadoop-$HADOOP_VERSION-$ARCH.manifest.xml -a $AWS_ACCESS_KEY_ID -s $AWS_SECRET_ACCESS_KEY - -# End -echo Done diff --git a/src/contrib/ec2/bin/image/ec2-run-user-data b/src/contrib/ec2/bin/image/ec2-run-user-data deleted file mode 100644 index 5f2e9050154..00000000000 --- a/src/contrib/ec2/bin/image/ec2-run-user-data +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/bash - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# ec2-run-user-data - Run instance user-data if it looks like a script. -# -# Only retrieves and runs the user-data script once per instance. If -# you want the user-data script to run again (e.g., on the next boot) -# then add this command in the user-data script: -# rm -f /var/ec2/ec2-run-user-data.* -# -# History: -# 2008-05-16 Eric Hammond -# - Initial version including code from Kim Scheibel, Jorge Oliveira -# 2008-08-06 Tom White -# - Updated to use mktemp on fedora -# - -prog=$(basename $0) -logger="logger -t $prog" -curl="curl --retry 3 --silent --show-error --fail" -instance_data_url=http://169.254.169.254/2008-02-01 - -# Wait until networking is up on the EC2 instance. -perl -MIO::Socket::INET -e ' - until(new IO::Socket::INET("169.254.169.254:80")){print"Waiting for network...\n";sleep 1} -' | $logger - -# Exit if we have already run on this instance (e.g., previous boot). -ami_id=$($curl $instance_data_url/meta-data/ami-id) -been_run_file=/var/ec2/$prog.$ami_id -mkdir -p $(dirname $been_run_file) -if [ -f $been_run_file ]; then - $logger < $been_run_file - exit -fi - -# Retrieve the instance user-data and run it if it looks like a script -user_data_file=`mktemp -t ec2-user-data.XXXXXXXXXX` -chmod 700 $user_data_file -$logger "Retrieving user-data" -$curl -o $user_data_file $instance_data_url/user-data 2>&1 | $logger -if [ ! -s $user_data_file ]; then - $logger "No user-data available" -elif head -1 $user_data_file | egrep -v '^#!'; then - $logger "Skipping user-data as it does not begin with #!" -else - $logger "Running user-data" - echo "user-data has already been run on this instance" > $been_run_file - $user_data_file 2>&1 | logger -t "user-data" - $logger "user-data exit code: $?" -fi -rm -f $user_data_file diff --git a/src/contrib/ec2/bin/launch-hadoop-cluster b/src/contrib/ec2/bin/launch-hadoop-cluster deleted file mode 100644 index f1aefc4e8c5..00000000000 --- a/src/contrib/ec2/bin/launch-hadoop-cluster +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Launch an EC2 cluster of Hadoop instances. - -set -o errexit - -# Import variables -bin=`dirname "$0"` -bin=`cd "$bin"; pwd` - -if [ -z $1 ]; then - echo "Cluster name required!" - exit -1 -fi - -if [ -z $2 ]; then - echo "Must specify the number of slaves to start." - exit -1 -fi - -if ! "$bin"/launch-hadoop-master $1 ; then - exit $? -fi - -if ! "$bin"/launch-hadoop-slaves $*; then - exit $? -fi diff --git a/src/contrib/ec2/bin/launch-hadoop-master b/src/contrib/ec2/bin/launch-hadoop-master deleted file mode 100644 index 01b9724f6e0..00000000000 --- a/src/contrib/ec2/bin/launch-hadoop-master +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Launch an EC2 Hadoop master. - -set -o errexit - -if [ -z $1 ]; then - echo "Cluster name required!" - exit -1 -fi - -CLUSTER=$1 - -# Import variables -bin=`dirname "$0"` -bin=`cd "$bin"; pwd` -. "$bin"/hadoop-ec2-env.sh - -if [ -z $AWS_ACCOUNT_ID ]; then - echo "Please set AWS_ACCOUNT_ID in $bin/hadoop-ec2-env.sh." - exit -1 -fi - -echo "Testing for existing master in group: $CLUSTER" -MASTER_EC2_HOST=`ec2-describe-instances | awk '"RESERVATION" == $1 && "'$CLUSTER_MASTER'" == $4, "RESERVATION" == $1 && "'$CLUSTER_MASTER'" != $4'` -MASTER_EC2_HOST=`echo "$MASTER_EC2_HOST" | awk '"INSTANCE" == $1 && "running" == $6 {print $4}'` - -if [ ! -z "$MASTER_EC2_HOST" ]; then - echo "Master already running on: $MASTER_EC2_HOST" - MASTER_HOST=`ec2-describe-instances $INSTANCE | grep INSTANCE | grep running | grep $MASTER_EC2_HOST | awk '{print $5}'` - echo $MASTER_HOST > $MASTER_PRIVATE_IP_PATH - echo $MASTER_EC2_HOST > $MASTER_IP_PATH - exit 0 -fi - -if ! ec2-describe-group $CLUSTER_MASTER > /dev/null 2>&1; then - echo "Creating group $CLUSTER_MASTER" - ec2-add-group $CLUSTER_MASTER -d "Group for Hadoop Master." - ec2-authorize $CLUSTER_MASTER -o $CLUSTER_MASTER -u $AWS_ACCOUNT_ID - ec2-authorize $CLUSTER_MASTER -p 22 # ssh - - if [ $ENABLE_WEB_PORTS == "true" ]; then - ec2-authorize $CLUSTER_MASTER -p 50030 # JobTracker web interface - ec2-authorize $CLUSTER_MASTER -p 50060 # TaskTracker web interface - ec2-authorize $CLUSTER_MASTER -p 50070 # NameNode web interface - ec2-authorize $CLUSTER_MASTER -p 50075 # DataNode web interface - fi -fi - -if ! ec2-describe-group $CLUSTER > /dev/null 2>&1; then - echo "Creating group $CLUSTER" - ec2-add-group $CLUSTER -d "Group for Hadoop Slaves." - ec2-authorize $CLUSTER -o $CLUSTER -u $AWS_ACCOUNT_ID - ec2-authorize $CLUSTER -p 22 # ssh - - if [ $ENABLE_WEB_PORTS == "true" ]; then - ec2-authorize $CLUSTER -p 50030 # JobTracker web interface - ec2-authorize $CLUSTER -p 50060 # TaskTracker web interface - ec2-authorize $CLUSTER -p 50070 # NameNode web interface - ec2-authorize $CLUSTER -p 50075 # DataNode web interface - fi - - ec2-authorize $CLUSTER_MASTER -o $CLUSTER -u $AWS_ACCOUNT_ID - ec2-authorize $CLUSTER -o $CLUSTER_MASTER -u $AWS_ACCOUNT_ID -fi - -# Finding Hadoop image -AMI_IMAGE=`ec2-describe-images -a | grep $S3_BUCKET | grep $HADOOP_VERSION | grep $ARCH | grep available | awk '{print $2}'` - -# Start a master -echo "Starting master with AMI $AMI_IMAGE" -USER_DATA="MASTER_HOST=master,MAX_MAP_TASKS=$MAX_MAP_TASKS,MAX_REDUCE_TASKS=$MAX_REDUCE_TASKS,COMPRESS=$COMPRESS" -INSTANCE=`ec2-run-instances $AMI_IMAGE -n 1 -g $CLUSTER_MASTER -k $KEY_NAME -f "$bin"/$USER_DATA_FILE -t $INSTANCE_TYPE $KERNEL_ARG | grep INSTANCE | awk '{print $2}'` -echo "Waiting for instance $INSTANCE to start" -while true; do - printf "." - # get private dns - MASTER_HOST=`ec2-describe-instances $INSTANCE | grep running | awk '{print $5}'` - if [ ! -z $MASTER_HOST ]; then - echo "Started as $MASTER_HOST" - break; - fi - sleep 1 -done - -MASTER_EC2_HOST=`ec2-describe-instances $INSTANCE | grep INSTANCE | grep running | grep $MASTER_HOST | awk '{print $4}'` -echo $MASTER_HOST > $MASTER_PRIVATE_IP_PATH -echo $MASTER_EC2_HOST > $MASTER_IP_PATH -MASTER_EC2_ZONE=`ec2-describe-instances $INSTANCE | grep INSTANCE | grep running | grep $MASTER_HOST | awk '{print $11}'` -echo $MASTER_EC2_ZONE > $MASTER_ZONE_PATH - -while true; do - if ssh $SSH_OPTS "root@$MASTER_EC2_HOST" 'echo "hello"' > /dev/null 2>&1; then - break; - fi - sleep 5 -done - -echo "Copying private key to master" -scp $SSH_OPTS $PRIVATE_KEY_PATH "root@$MASTER_EC2_HOST:/root/.ssh/id_rsa" -ssh $SSH_OPTS "root@$MASTER_EC2_HOST" "chmod 600 /root/.ssh/id_rsa" - -MASTER_IP=`dig +short $MASTER_EC2_HOST` -echo "Master is $MASTER_EC2_HOST, ip is $MASTER_IP, zone is $MASTER_EC2_ZONE." diff --git a/src/contrib/ec2/bin/launch-hadoop-slaves b/src/contrib/ec2/bin/launch-hadoop-slaves deleted file mode 100644 index 503e57f94ef..00000000000 --- a/src/contrib/ec2/bin/launch-hadoop-slaves +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Launch an EC2 Hadoop slaves. - -set -o errexit - -if [ -z $1 ]; then - echo "Cluster name required!" - exit -1 -fi - -if [ -z $2 ]; then - echo "Must specify the number of slaves to start." - exit -1 -fi - -CLUSTER=$1 -NO_INSTANCES=$2 - -# Import variables -bin=`dirname "$0"` -bin=`cd "$bin"; pwd` -. "$bin"/hadoop-ec2-env.sh - -if [ ! -f $MASTER_IP_PATH ]; then - echo "Must start Cluster Master first!" - exit -1 -fi - -# Finding Hadoop image -AMI_IMAGE=`ec2-describe-images -a | grep $S3_BUCKET | grep $HADOOP_VERSION | grep $ARCH |grep available | awk '{print $2}'` -# to use private master hostname, substitute below with: -# MASTER_HOST=`cat $MASTER_PRIVATE_IP_PATH` -MASTER_HOST=`cat $MASTER_IP_PATH` -MASTER_ZONE=`cat $MASTER_ZONE_PATH` - -# Substituting master hostname -sed -e "s|%MASTER_HOST%|$MASTER_HOST|" "$bin"/$USER_DATA_FILE > "$bin"/$USER_DATA_FILE.slave - -# Start slaves -echo "Adding $1 node(s) to cluster group $CLUSTER with AMI $AMI_IMAGE" -ec2-run-instances $AMI_IMAGE -n "$NO_INSTANCES" -g "$CLUSTER" -k "$KEY_NAME" -f "$bin"/$USER_DATA_FILE.slave -t "$INSTANCE_TYPE" -z "$MASTER_ZONE" $KERNEL_ARG | grep INSTANCE | awk '{print $2}' - -rm "$bin"/$USER_DATA_FILE.slave diff --git a/src/contrib/ec2/bin/list-hadoop-clusters b/src/contrib/ec2/bin/list-hadoop-clusters deleted file mode 100644 index 422e4d0c357..00000000000 --- a/src/contrib/ec2/bin/list-hadoop-clusters +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# List running clusters. - -set -o errexit - -# Import variables -bin=`dirname "$0"` -bin=`cd "$bin"; pwd` -. "$bin"/hadoop-ec2-env.sh - -# Finding Hadoop clusters -CLUSTERS=`ec2-describe-instances | awk '"RESERVATION" == $1 && $4 ~ /-master$/, "INSTANCE" == $1' | tr '\n' '\t' | grep running | cut -f4 | rev | cut -d'-' -f2- | rev` - -[ -z "$CLUSTERS" ] && echo "No running clusters." && exit 0 - -echo "Running Hadoop clusters:" -echo "$CLUSTERS" diff --git a/src/contrib/ec2/bin/terminate-hadoop-cluster b/src/contrib/ec2/bin/terminate-hadoop-cluster deleted file mode 100755 index ff20cfec013..00000000000 --- a/src/contrib/ec2/bin/terminate-hadoop-cluster +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Terminate a cluster. - -set -o errexit - -if [ -z $1 ]; then - echo "Cluster name required!" - exit -1 -fi - -CLUSTER=$1 - -# Import variables -bin=`dirname "$0"` -bin=`cd "$bin"; pwd` -. "$bin"/hadoop-ec2-env.sh - -# Finding Hadoop image -HADOOP_INSTANCES=`ec2-describe-instances | awk '"RESERVATION" == $1 && ("'$CLUSTER'" == $4 || "'$CLUSTER_MASTER'" == $4), "RESERVATION" == $1 && ("'$CLUSTER'" != $4 && "'$CLUSTER_MASTER'" != $4)'` -HADOOP_INSTANCES=`echo "$HADOOP_INSTANCES" | grep INSTANCE | grep running` - -[ -z "$HADOOP_INSTANCES" ] && echo "No running instances in cluster $CLUSTER." && exit 0 - -echo "Running Hadoop instances:" -echo "$HADOOP_INSTANCES" -read -p "Terminate all instances? [yes or no]: " answer - -if [ "$answer" != "yes" ]; then - exit 1 -fi - -ec2-terminate-instances `echo "$HADOOP_INSTANCES" | awk '{print $2}'`