HADOOP-6811. Remove EC2 bash scripts.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1057795 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Thomas White 2011-01-11 18:50:24 +00:00
parent 496b24d30b
commit f015768d3f
15 changed files with 3 additions and 999 deletions

View File

@ -258,6 +258,9 @@ Release 0.22.0 - Unreleased
HADOOP-6578. Configuration should trim whitespace around a lot of value HADOOP-6578. Configuration should trim whitespace around a lot of value
types. (Michele Catasta via eli) types. (Michele Catasta via eli)
HADOOP-6811. Remove EC2 bash scripts. They are replaced by Apache Whirr
(incubating, http://incubator.apache.org/whirr). (tomwhite)
OPTIMIZATIONS OPTIMIZATIONS
HADOOP-6884. Add LOG.isDebugEnabled() guard for each LOG.debug(..). HADOOP-6884. Add LOG.isDebugEnabled() guard for each LOG.debug(..).

View File

@ -1,15 +0,0 @@
Hadoop EC2
NOTE: these scripts have been deprecated. See http://incubator.apache.org/whirr.
This collection of scripts allows you to run Hadoop clusters on Amazon.com's Elastic Compute Cloud (EC2) service described at:
http://aws.amazon.com/ec2
To get help, type the following in a shell:
bin/hadoop-ec2
For full instructions, please visit the Hadoop wiki at:
http://wiki.apache.org/hadoop/AmazonEC2#AutomatedScripts

View File

@ -1,71 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Run commands on master or specified node of a running Hadoop EC2 cluster.
set -o errexit
# if no args specified, show usage
if [ $# = 0 ]; then
echo "Command required!"
exit 1
fi
# get arguments
COMMAND="$1"
shift
# get group
CLUSTER="$1"
shift
if [ -z $CLUSTER ]; then
echo "Cluster name or instance id required!"
exit -1
fi
# Import variables
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/hadoop-ec2-env.sh
if [[ $CLUSTER == i-* ]]; then
HOST=`ec2-describe-instances $CLUSTER | grep running | awk '{print $4}'`
[ -z $HOST ] && echo "Instance still pending or no longer running: $CLUSTER" && exit -1
else
[ ! -f $MASTER_IP_PATH ] && echo "Wrong group name, or cluster not launched! $CLUSTER" && exit -1
HOST=`cat $MASTER_IP_PATH`
fi
if [ "$COMMAND" = "login" ] ; then
echo "Logging in to host $HOST."
ssh $SSH_OPTS "root@$HOST"
elif [ "$COMMAND" = "proxy" ] ; then
echo "Proxying to host $HOST via local port 6666"
echo "Gangia: http://$HOST/ganglia"
echo "JobTracker: http://$HOST:50030/"
echo "NameNode: http://$HOST:50070/"
ssh $SSH_OPTS -D 6666 -N "root@$HOST"
elif [ "$COMMAND" = "push" ] ; then
echo "Pushing $1 to host $HOST."
scp $SSH_OPTS -r $1 "root@$HOST:"
elif [ "$COMMAND" = "screen" ] ; then
echo "Logging in and attaching screen on host $HOST."
ssh $SSH_OPTS -t "root@$HOST" 'screen -D -R'
else
echo "Executing command on host $HOST."
ssh $SSH_OPTS -t "root@$HOST" "$COMMAND"
fi

View File

@ -1,80 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Create a Hadoop AMI.
# Inspired by Jonathan Siegel's EC2 script (http://blogsiegel.blogspot.com/2006/08/sandboxing-amazon-ec2.html)
set -o errexit
# Import variables
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/hadoop-ec2-env.sh
AMI_IMAGE=`ec2-describe-images -a | grep $S3_BUCKET | grep $HADOOP_VERSION | grep $ARCH | grep available | awk '{print $2}'`
[ ! -z $AMI_IMAGE ] && echo "AMI already registered, use: ec2-deregister $AMI_IMAGE" && exit -1
echo "Starting a AMI with ID $BASE_AMI_IMAGE."
OUTPUT=`ec2-run-instances $BASE_AMI_IMAGE -k $KEY_NAME -t $INSTANCE_TYPE`
BOOTING_INSTANCE=`echo $OUTPUT | awk '{print $6}'`
echo "Instance is $BOOTING_INSTANCE."
echo "Polling server status (ec2-describe-instances $BOOTING_INSTANCE)"
while true; do
printf "."
HOSTNAME=`ec2-describe-instances $BOOTING_INSTANCE | grep running | awk '{print $4}'`
if [ ! -z $HOSTNAME ]; then
break;
fi
sleep 1
done
echo "The server is available at $HOSTNAME."
while true; do
REPLY=`ssh $SSH_OPTS "root@$HOSTNAME" 'echo "hello"'`
if [ ! -z $REPLY ]; then
break;
fi
sleep 5
done
#read -p "Login first? [yes or no]: " answer
if [ "$answer" == "yes" ]; then
ssh $SSH_OPTS "root@$HOSTNAME"
fi
echo "Copying scripts."
# Copy setup scripts
scp $SSH_OPTS "$bin"/hadoop-ec2-env.sh "root@$HOSTNAME:/mnt"
scp $SSH_OPTS "$bin"/image/create-hadoop-image-remote "root@$HOSTNAME:/mnt"
scp $SSH_OPTS "$bin"/image/ec2-run-user-data "root@$HOSTNAME:/etc/init.d"
# Copy private key and certificate (for bundling image)
scp $SSH_OPTS $EC2_KEYDIR/pk*.pem "root@$HOSTNAME:/mnt"
scp $SSH_OPTS $EC2_KEYDIR/cert*.pem "root@$HOSTNAME:/mnt"
# Connect to it
ssh $SSH_OPTS "root@$HOSTNAME" '/mnt/create-hadoop-image-remote'
# Register image
ec2-register $S3_BUCKET/hadoop-$HADOOP_VERSION-$ARCH.manifest.xml
echo "Terminate with: ec2-terminate-instances $BOOTING_INSTANCE"

View File

@ -1,60 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Delete the groups and local files associated with a cluster.
set -o errexit
if [ -z $1 ]; then
echo "Cluster name required!"
exit -1
fi
CLUSTER=$1
# Finding Hadoop clusters
CLUSTERS=`ec2-describe-instances | \
awk '"RESERVATION" == $1 && $4 ~ /-master$/, "INSTANCE" == $1' | tr '\n' '\t' | \
grep "$CLUSTER" | grep running | cut -f4 | rev | cut -d'-' -f2- | rev`
if [ -n "$CLUSTERS" ]; then
echo "Cluster $CLUSTER has running instances. Please terminate them first."
exit 0
fi
# Import variables
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/hadoop-ec2-env.sh
rm -f $MASTER_IP_PATH
rm -f $MASTER_PRIVATE_IP_PATH
if ec2-describe-group $CLUSTER_MASTER > /dev/null 2>&1; then
if ec2-describe-group $CLUSTER > /dev/null 2>&1; then
echo "Revoking authorization between $CLUSTER_MASTER and $CLUSTER"
ec2-revoke $CLUSTER_MASTER -o $CLUSTER -u $AWS_ACCOUNT_ID || true
ec2-revoke $CLUSTER -o $CLUSTER_MASTER -u $AWS_ACCOUNT_ID || true
fi
echo "Deleting group $CLUSTER_MASTER"
ec2-delete-group $CLUSTER_MASTER
fi
if ec2-describe-group $CLUSTER > /dev/null 2>&1; then
echo "Deleting group $CLUSTER"
ec2-delete-group $CLUSTER
fi

View File

@ -1,65 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -o errexit
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
echo "DEPRECATED. See http://incubator.apache.org/whirr." >&2
# if no args specified, show usage
if [ $# = 0 ]; then
echo "Usage: hadoop-ec2 COMMAND"
echo "where COMMAND is one of:"
echo " list list all running Hadoop EC2 clusters"
echo " launch-cluster <group> <num slaves> launch a cluster of Hadoop EC2 instances - launch-master then launch-slaves"
echo " launch-master <group> launch or find a cluster master"
echo " launch-slaves <group> <num slaves> launch the cluster slaves"
echo " terminate-cluster <group> terminate all Hadoop EC2 instances"
echo " delete-cluster <group> delete the group information for a terminated cluster"
echo " login <group|instance id> login to the master node of the Hadoop EC2 cluster"
echo " screen <group|instance id> start or attach 'screen' on the master node of the Hadoop EC2 cluster"
echo " proxy <group|instance id> start a socks proxy on localhost:6666 (use w/foxyproxy)"
echo " push <group> <file> scp a file to the master node of the Hadoop EC2 cluster"
echo " <shell cmd> <group|instance id> execute any command remotely on the master"
echo " create-image create a Hadoop AMI"
exit 1
fi
# get arguments
COMMAND="$1"
shift
if [ "$COMMAND" = "create-image" ] ; then
. "$bin"/create-hadoop-image $*
elif [ "$COMMAND" = "launch-cluster" ] ; then
. "$bin"/launch-hadoop-cluster $*
elif [ "$COMMAND" = "launch-master" ] ; then
. "$bin"/launch-hadoop-master $*
elif [ "$COMMAND" = "launch-slaves" ] ; then
. "$bin"/launch-hadoop-slaves $*
elif [ "$COMMAND" = "delete-cluster" ] ; then
. "$bin"/delete-hadoop-cluster $*
elif [ "$COMMAND" = "terminate-cluster" ] ; then
. "$bin"/terminate-hadoop-cluster $*
elif [ "$COMMAND" = "list" ] ; then
. "$bin"/list-hadoop-clusters
else
. "$bin"/cmd-hadoop-cluster "$COMMAND" $*
fi

View File

@ -1,93 +0,0 @@
# Set environment variables for running Hadoop on Amazon EC2 here. All are required.
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Your Amazon Account Number.
AWS_ACCOUNT_ID=
# Your Amazon AWS access key.
AWS_ACCESS_KEY_ID=
# Your Amazon AWS secret access key.
AWS_SECRET_ACCESS_KEY=
# Location of EC2 keys.
# The default setting is probably OK if you set up EC2 following the Amazon Getting Started guide.
EC2_KEYDIR=`dirname "$EC2_PRIVATE_KEY"`
# The EC2 key name used to launch instances.
# The default is the value used in the Amazon Getting Started guide.
KEY_NAME=gsg-keypair
# Where your EC2 private key is stored (created when following the Amazon Getting Started guide).
# You need to change this if you don't store this with your other EC2 keys.
PRIVATE_KEY_PATH=`echo "$EC2_KEYDIR"/"id_rsa-$KEY_NAME"`
# SSH options used when connecting to EC2 instances.
SSH_OPTS=`echo -i "$PRIVATE_KEY_PATH" -o StrictHostKeyChecking=no -o ServerAliveInterval=30`
# The version of Hadoop to use.
HADOOP_VERSION=0.19.0
# The Amazon S3 bucket where the Hadoop AMI is stored.
# The default value is for public images, so can be left if you are using running a public image.
# Change this value only if you are creating your own (private) AMI
# so you can store it in a bucket you own.
S3_BUCKET=hadoop-images
# Enable public access to JobTracker and TaskTracker web interfaces
ENABLE_WEB_PORTS=true
# The script to run on instance boot.
USER_DATA_FILE=hadoop-ec2-init-remote.sh
# The EC2 instance type: m1.small, m1.large, m1.xlarge
INSTANCE_TYPE="m1.small"
#INSTANCE_TYPE="m1.large"
#INSTANCE_TYPE="m1.xlarge"
#INSTANCE_TYPE="c1.medium"
#INSTANCE_TYPE="c1.xlarge"
# The EC2 group master name. CLUSTER is set by calling scripts
CLUSTER_MASTER=$CLUSTER-master
# Cached values for a given cluster
MASTER_PRIVATE_IP_PATH=~/.hadooop-private-$CLUSTER_MASTER
MASTER_IP_PATH=~/.hadooop-$CLUSTER_MASTER
MASTER_ZONE_PATH=~/.hadooop-zone-$CLUSTER_MASTER
#
# The following variables are only used when creating an AMI.
#
# The version number of the installed JDK.
JAVA_VERSION=1.6.0_07
# SUPPORTED_ARCHITECTURES = ['i386', 'x86_64']
# The download URL for the Sun JDK. Visit http://java.sun.com/javase/downloads/index.jsp and get the URL for the "Linux self-extracting file".
if [ "$INSTANCE_TYPE" == "m1.small" -o "$INSTANCE_TYPE" == "c1.medium" ]; then
ARCH='i386'
BASE_AMI_IMAGE="ami-2b5fba42" # ec2-public-images/fedora-8-i386-base-v1.07.manifest.xml
JAVA_BINARY_URL=''
else
ARCH='x86_64'
BASE_AMI_IMAGE="ami-2a5fba43" # ec2-public-images/fedora-8-x86_64-base-v1.07.manifest.xml
JAVA_BINARY_URL=''
fi
if [ "$AMI_KERNEL" != "" ]; then
KERNEL_ARG="--kernel ${AMI_KERNEL}"
fi

View File

@ -1,171 +0,0 @@
#!/usr/bin/env bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
# Script that is run on each EC2 instance on boot. It is passed in the EC2 user
# data, so should not exceed 16K in size.
################################################################################
################################################################################
# Initialize variables
################################################################################
# Slaves are started after the master, and are told its address by sending a
# modified copy of this file which sets the MASTER_HOST variable.
# A node knows if it is the master or not by inspecting the security group
# name. If it is the master then it retrieves its address using instance data.
MASTER_HOST=%MASTER_HOST% # Interpolated before being sent to EC2 node
SECURITY_GROUPS=`wget -q -O - http://169.254.169.254/latest/meta-data/security-groups`
IS_MASTER=`echo $SECURITY_GROUPS | awk '{ a = match ($0, "-master$"); if (a) print "true"; else print "false"; }'`
if [ "$IS_MASTER" == "true" ]; then
# use public hostnames for master. private hostnames can be used by substituting:
# MASTER_HOST=`wget -q -O - http://169.254.169.254/latest/meta-data/local-hostname`
MASTER_HOST=`wget -q -O - 'http://169.254.169.254/latest/meta-data/public-hostname'`
fi
HADOOP_HOME=`ls -d /usr/local/hadoop-*`
################################################################################
# Hadoop configuration
# Modify this section to customize your Hadoop cluster.
################################################################################
cat > $HADOOP_HOME/conf/hadoop-site.xml <<EOF
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/mnt/hadoop</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://$MASTER_HOST:50001</value>
</property>
<property>
<name>mapred.job.tracker</name>
<value>hdfs://$MASTER_HOST:50002</value>
</property>
<property>
<name>tasktracker.http.threads</name>
<value>80</value>
</property>
<property>
<name>mapred.tasktracker.map.tasks.maximum</name>
<value>3</value>
</property>
<property>
<name>mapred.tasktracker.reduce.tasks.maximum</name>
<value>3</value>
</property>
<property>
<name>mapred.output.compress</name>
<value>true</value>
</property>
<property>
<name>mapred.output.compression.type</name>
<value>BLOCK</value>
</property>
<property>
<name>dfs.client.block.write.retries</name>
<value>3</value>
</property>
<property>
<name>hadoop.rpc.socket.factory.class.default</name>
<value>org.apache.hadoop.net.StandardSocketFactory</value>
<final>true</final>
</property>
</configuration>
EOF
# Configure Hadoop for Ganglia
# overwrite hadoop-metrics.properties
cat > $HADOOP_HOME/conf/hadoop-metrics.properties <<EOF
# Ganglia
# we push to the master gmond so hostnames show up properly
dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext
dfs.period=10
dfs.servers=$MASTER_HOST:8649
mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext
mapred.period=10
mapred.servers=$MASTER_HOST:8649
jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext
jvm.period=10
jvm.servers=$MASTER_HOST:8649
EOF
################################################################################
# Start services
################################################################################
[ ! -f /etc/hosts ] && echo "127.0.0.1 localhost" > /etc/hosts
mkdir -p /mnt/hadoop/logs
# not set on boot
export USER="root"
if [ "$IS_MASTER" == "true" ]; then
# MASTER
# Prep Ganglia
sed -i -e "s|\( *mcast_join *=.*\)|#\1|" \
-e "s|\( *bind *=.*\)|#\1|" \
-e "s|\( *mute *=.*\)| mute = yes|" \
-e "s|\( *location *=.*\)| location = \"master-node\"|" \
/etc/gmond.conf
mkdir -p /mnt/ganglia/rrds
chown -R ganglia:ganglia /mnt/ganglia/rrds
rm -rf /var/lib/ganglia; cd /var/lib; ln -s /mnt/ganglia ganglia; cd
service gmond start
service gmetad start
apachectl start
# Hadoop
# only format on first boot
[ ! -e /mnt/hadoop/dfs ] && "$HADOOP_HOME"/bin/hadoop namenode -format
"$HADOOP_HOME"/bin/hadoop-daemon.sh start namenode
"$HADOOP_HOME"/bin/hadoop-daemon.sh start jobtracker
else
# SLAVE
# Prep Ganglia
sed -i -e "s|\( *mcast_join *=.*\)|#\1|" \
-e "s|\( *bind *=.*\)|#\1|" \
-e "s|\(udp_send_channel {\)|\1\n host=$MASTER_HOST|" \
/etc/gmond.conf
service gmond start
# Hadoop
"$HADOOP_HOME"/bin/hadoop-daemon.sh start datanode
"$HADOOP_HOME"/bin/hadoop-daemon.sh start tasktracker
fi
# Run this script on next boot
rm -f /var/ec2/ec2-run-user-data.*

View File

@ -1,80 +0,0 @@
#!/bin/sh
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Create a Hadoop AMI. Runs on the EC2 instance.
# Import variables
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/hadoop-ec2-env.sh
# Remove environment script since it contains sensitive information
rm -f "$bin"/hadoop-ec2-env.sh
# Install Java
echo "Downloading and installing java binary."
cd /usr/local
wget -nv -O java.bin $JAVA_BINARY_URL
sh java.bin
rm -f java.bin
# Install tools
echo "Installing rpms."
yum -y install rsync lynx screen ganglia-gmetad ganglia-gmond ganglia-web httpd php
yum -y clean all
# Install Hadoop
echo "Installing Hadoop $HADOOP_VERSION."
cd /usr/local
wget -nv http://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
[ ! -f hadoop-$HADOOP_VERSION.tar.gz ] && wget -nv http://www.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
tar xzf hadoop-$HADOOP_VERSION.tar.gz
rm -f hadoop-$HADOOP_VERSION.tar.gz
# Configure Hadoop
sed -i -e "s|# export JAVA_HOME=.*|export JAVA_HOME=/usr/local/jdk${JAVA_VERSION}|" \
-e 's|# export HADOOP_LOG_DIR=.*|export HADOOP_LOG_DIR=/mnt/hadoop/logs|' \
-e 's|# export HADOOP_SLAVE_SLEEP=.*|export HADOOP_SLAVE_SLEEP=1|' \
-e 's|# export HADOOP_OPTS=.*|export HADOOP_OPTS=-server|' \
/usr/local/hadoop-$HADOOP_VERSION/conf/hadoop-env.sh
# Run user data as script on instance startup
chmod +x /etc/init.d/ec2-run-user-data
echo "/etc/init.d/ec2-run-user-data" >> /etc/rc.d/rc.local
# Setup root user bash environment
echo "export JAVA_HOME=/usr/local/jdk${JAVA_VERSION}" >> /root/.bash_profile
echo "export HADOOP_HOME=/usr/local/hadoop-${HADOOP_VERSION}" >> /root/.bash_profile
echo 'export PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$PATH' >> /root/.bash_profile
# Configure networking.
# Delete SSH authorized_keys since it includes the key it was launched with. (Note that it is re-populated when an instance starts.)
rm -f /root/.ssh/authorized_keys
# Ensure logging in to new hosts is seamless.
echo ' StrictHostKeyChecking no' >> /etc/ssh/ssh_config
# Bundle and upload image
cd ~root
# Don't need to delete .bash_history since it isn't written until exit.
df -h
ec2-bundle-vol -d /mnt -k /mnt/pk*.pem -c /mnt/cert*.pem -u $AWS_ACCOUNT_ID -s 3072 -p hadoop-$HADOOP_VERSION-$ARCH -r $ARCH
ec2-upload-bundle -b $S3_BUCKET -m /mnt/hadoop-$HADOOP_VERSION-$ARCH.manifest.xml -a $AWS_ACCESS_KEY_ID -s $AWS_SECRET_ACCESS_KEY
# End
echo Done

View File

@ -1,63 +0,0 @@
#!/bin/bash
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ec2-run-user-data - Run instance user-data if it looks like a script.
#
# Only retrieves and runs the user-data script once per instance. If
# you want the user-data script to run again (e.g., on the next boot)
# then add this command in the user-data script:
# rm -f /var/ec2/ec2-run-user-data.*
#
# History:
# 2008-05-16 Eric Hammond <ehammond@thinksome.com>
# - Initial version including code from Kim Scheibel, Jorge Oliveira
# 2008-08-06 Tom White
# - Updated to use mktemp on fedora
#
prog=$(basename $0)
logger="logger -t $prog"
curl="curl --retry 3 --silent --show-error --fail"
instance_data_url=http://169.254.169.254/2008-02-01
# Wait until networking is up on the EC2 instance.
perl -MIO::Socket::INET -e '
until(new IO::Socket::INET("169.254.169.254:80")){print"Waiting for network...\n";sleep 1}
' | $logger
# Exit if we have already run on this instance (e.g., previous boot).
ami_id=$($curl $instance_data_url/meta-data/ami-id)
been_run_file=/var/ec2/$prog.$ami_id
mkdir -p $(dirname $been_run_file)
if [ -f $been_run_file ]; then
$logger < $been_run_file
exit
fi
# Retrieve the instance user-data and run it if it looks like a script
user_data_file=`mktemp -t ec2-user-data.XXXXXXXXXX`
chmod 700 $user_data_file
$logger "Retrieving user-data"
$curl -o $user_data_file $instance_data_url/user-data 2>&1 | $logger
if [ ! -s $user_data_file ]; then
$logger "No user-data available"
elif head -1 $user_data_file | egrep -v '^#!'; then
$logger "Skipping user-data as it does not begin with #!"
else
$logger "Running user-data"
echo "user-data has already been run on this instance" > $been_run_file
$user_data_file 2>&1 | logger -t "user-data"
$logger "user-data exit code: $?"
fi
rm -f $user_data_file

View File

@ -1,42 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Launch an EC2 cluster of Hadoop instances.
set -o errexit
# Import variables
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
if [ -z $1 ]; then
echo "Cluster name required!"
exit -1
fi
if [ -z $2 ]; then
echo "Must specify the number of slaves to start."
exit -1
fi
if ! "$bin"/launch-hadoop-master $1 ; then
exit $?
fi
if ! "$bin"/launch-hadoop-slaves $*; then
exit $?
fi

View File

@ -1,119 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Launch an EC2 Hadoop master.
set -o errexit
if [ -z $1 ]; then
echo "Cluster name required!"
exit -1
fi
CLUSTER=$1
# Import variables
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/hadoop-ec2-env.sh
if [ -z $AWS_ACCOUNT_ID ]; then
echo "Please set AWS_ACCOUNT_ID in $bin/hadoop-ec2-env.sh."
exit -1
fi
echo "Testing for existing master in group: $CLUSTER"
MASTER_EC2_HOST=`ec2-describe-instances | awk '"RESERVATION" == $1 && "'$CLUSTER_MASTER'" == $4, "RESERVATION" == $1 && "'$CLUSTER_MASTER'" != $4'`
MASTER_EC2_HOST=`echo "$MASTER_EC2_HOST" | awk '"INSTANCE" == $1 && "running" == $6 {print $4}'`
if [ ! -z "$MASTER_EC2_HOST" ]; then
echo "Master already running on: $MASTER_EC2_HOST"
MASTER_HOST=`ec2-describe-instances $INSTANCE | grep INSTANCE | grep running | grep $MASTER_EC2_HOST | awk '{print $5}'`
echo $MASTER_HOST > $MASTER_PRIVATE_IP_PATH
echo $MASTER_EC2_HOST > $MASTER_IP_PATH
exit 0
fi
if ! ec2-describe-group $CLUSTER_MASTER > /dev/null 2>&1; then
echo "Creating group $CLUSTER_MASTER"
ec2-add-group $CLUSTER_MASTER -d "Group for Hadoop Master."
ec2-authorize $CLUSTER_MASTER -o $CLUSTER_MASTER -u $AWS_ACCOUNT_ID
ec2-authorize $CLUSTER_MASTER -p 22 # ssh
if [ $ENABLE_WEB_PORTS == "true" ]; then
ec2-authorize $CLUSTER_MASTER -p 50030 # JobTracker web interface
ec2-authorize $CLUSTER_MASTER -p 50060 # TaskTracker web interface
ec2-authorize $CLUSTER_MASTER -p 50070 # NameNode web interface
ec2-authorize $CLUSTER_MASTER -p 50075 # DataNode web interface
fi
fi
if ! ec2-describe-group $CLUSTER > /dev/null 2>&1; then
echo "Creating group $CLUSTER"
ec2-add-group $CLUSTER -d "Group for Hadoop Slaves."
ec2-authorize $CLUSTER -o $CLUSTER -u $AWS_ACCOUNT_ID
ec2-authorize $CLUSTER -p 22 # ssh
if [ $ENABLE_WEB_PORTS == "true" ]; then
ec2-authorize $CLUSTER -p 50030 # JobTracker web interface
ec2-authorize $CLUSTER -p 50060 # TaskTracker web interface
ec2-authorize $CLUSTER -p 50070 # NameNode web interface
ec2-authorize $CLUSTER -p 50075 # DataNode web interface
fi
ec2-authorize $CLUSTER_MASTER -o $CLUSTER -u $AWS_ACCOUNT_ID
ec2-authorize $CLUSTER -o $CLUSTER_MASTER -u $AWS_ACCOUNT_ID
fi
# Finding Hadoop image
AMI_IMAGE=`ec2-describe-images -a | grep $S3_BUCKET | grep $HADOOP_VERSION | grep $ARCH | grep available | awk '{print $2}'`
# Start a master
echo "Starting master with AMI $AMI_IMAGE"
USER_DATA="MASTER_HOST=master,MAX_MAP_TASKS=$MAX_MAP_TASKS,MAX_REDUCE_TASKS=$MAX_REDUCE_TASKS,COMPRESS=$COMPRESS"
INSTANCE=`ec2-run-instances $AMI_IMAGE -n 1 -g $CLUSTER_MASTER -k $KEY_NAME -f "$bin"/$USER_DATA_FILE -t $INSTANCE_TYPE $KERNEL_ARG | grep INSTANCE | awk '{print $2}'`
echo "Waiting for instance $INSTANCE to start"
while true; do
printf "."
# get private dns
MASTER_HOST=`ec2-describe-instances $INSTANCE | grep running | awk '{print $5}'`
if [ ! -z $MASTER_HOST ]; then
echo "Started as $MASTER_HOST"
break;
fi
sleep 1
done
MASTER_EC2_HOST=`ec2-describe-instances $INSTANCE | grep INSTANCE | grep running | grep $MASTER_HOST | awk '{print $4}'`
echo $MASTER_HOST > $MASTER_PRIVATE_IP_PATH
echo $MASTER_EC2_HOST > $MASTER_IP_PATH
MASTER_EC2_ZONE=`ec2-describe-instances $INSTANCE | grep INSTANCE | grep running | grep $MASTER_HOST | awk '{print $11}'`
echo $MASTER_EC2_ZONE > $MASTER_ZONE_PATH
while true; do
if ssh $SSH_OPTS "root@$MASTER_EC2_HOST" 'echo "hello"' > /dev/null 2>&1; then
break;
fi
sleep 5
done
echo "Copying private key to master"
scp $SSH_OPTS $PRIVATE_KEY_PATH "root@$MASTER_EC2_HOST:/root/.ssh/id_rsa"
ssh $SSH_OPTS "root@$MASTER_EC2_HOST" "chmod 600 /root/.ssh/id_rsa"
MASTER_IP=`dig +short $MASTER_EC2_HOST`
echo "Master is $MASTER_EC2_HOST, ip is $MASTER_IP, zone is $MASTER_EC2_ZONE."

View File

@ -1,59 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Launch an EC2 Hadoop slaves.
set -o errexit
if [ -z $1 ]; then
echo "Cluster name required!"
exit -1
fi
if [ -z $2 ]; then
echo "Must specify the number of slaves to start."
exit -1
fi
CLUSTER=$1
NO_INSTANCES=$2
# Import variables
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/hadoop-ec2-env.sh
if [ ! -f $MASTER_IP_PATH ]; then
echo "Must start Cluster Master first!"
exit -1
fi
# Finding Hadoop image
AMI_IMAGE=`ec2-describe-images -a | grep $S3_BUCKET | grep $HADOOP_VERSION | grep $ARCH |grep available | awk '{print $2}'`
# to use private master hostname, substitute below with:
# MASTER_HOST=`cat $MASTER_PRIVATE_IP_PATH`
MASTER_HOST=`cat $MASTER_IP_PATH`
MASTER_ZONE=`cat $MASTER_ZONE_PATH`
# Substituting master hostname
sed -e "s|%MASTER_HOST%|$MASTER_HOST|" "$bin"/$USER_DATA_FILE > "$bin"/$USER_DATA_FILE.slave
# Start slaves
echo "Adding $1 node(s) to cluster group $CLUSTER with AMI $AMI_IMAGE"
ec2-run-instances $AMI_IMAGE -n "$NO_INSTANCES" -g "$CLUSTER" -k "$KEY_NAME" -f "$bin"/$USER_DATA_FILE.slave -t "$INSTANCE_TYPE" -z "$MASTER_ZONE" $KERNEL_ARG | grep INSTANCE | awk '{print $2}'
rm "$bin"/$USER_DATA_FILE.slave

View File

@ -1,33 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# List running clusters.
set -o errexit
# Import variables
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/hadoop-ec2-env.sh
# Finding Hadoop clusters
CLUSTERS=`ec2-describe-instances | awk '"RESERVATION" == $1 && $4 ~ /-master$/, "INSTANCE" == $1' | tr '\n' '\t' | grep running | cut -f4 | rev | cut -d'-' -f2- | rev`
[ -z "$CLUSTERS" ] && echo "No running clusters." && exit 0
echo "Running Hadoop clusters:"
echo "$CLUSTERS"

View File

@ -1,48 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Terminate a cluster.
set -o errexit
if [ -z $1 ]; then
echo "Cluster name required!"
exit -1
fi
CLUSTER=$1
# Import variables
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/hadoop-ec2-env.sh
# Finding Hadoop image
HADOOP_INSTANCES=`ec2-describe-instances | awk '"RESERVATION" == $1 && ("'$CLUSTER'" == $4 || "'$CLUSTER_MASTER'" == $4), "RESERVATION" == $1 && ("'$CLUSTER'" != $4 && "'$CLUSTER_MASTER'" != $4)'`
HADOOP_INSTANCES=`echo "$HADOOP_INSTANCES" | grep INSTANCE | grep running`
[ -z "$HADOOP_INSTANCES" ] && echo "No running instances in cluster $CLUSTER." && exit 0
echo "Running Hadoop instances:"
echo "$HADOOP_INSTANCES"
read -p "Terminate all instances? [yes or no]: " answer
if [ "$answer" != "yes" ]; then
exit 1
fi
ec2-terminate-instances `echo "$HADOOP_INSTANCES" | awk '{print $2}'`