HBASE-2299 [EC2] mapreduce fixups for PE

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@920254 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Kyle Purtell 2010-03-08 09:54:03 +00:00
parent 3baabbdb84
commit 1ccce74312
5 changed files with 40 additions and 19 deletions

View File

@ -228,6 +228,7 @@ Release 0.21.0 - Unreleased
HBASE-2293 CME in RegionManager#isMetaServer
HBASE-2261 The javadoc in WhileMatchFilter and it's tests in TestFilter
are not accurate/wrong
HBASE-2299 [EC2] mapreduce fixups for PE
IMPROVEMENTS
HBASE-1760 Cleanup TODOs in HTable

View File

@ -5,6 +5,7 @@
MASTER_HOST="%MASTER_HOST%"
ZOOKEEPER_QUORUM="%ZOOKEEPER_QUORUM%"
NUM_SLAVES="%NUM_SLAVES%"
EXTRA_PACKAGES="%EXTRA_PACKAGES%"
SECURITY_GROUPS=`wget -q -O - http://169.254.169.254/latest/meta-data/security-groups`
IS_MASTER=`echo $SECURITY_GROUPS | awk '{ a = match ($0, "-master$"); if (a) print "true"; else print "false"; }'`
@ -119,6 +120,10 @@ cat > $HADOOP_HOME/conf/hdfs-site.xml <<EOF
<name>dfs.data.dir</name>
<value>$DFS_DATA_DIR</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.datanode.handler.count</name>
<value>10</value>
@ -137,14 +142,6 @@ cat > $HADOOP_HOME/conf/mapred-site.xml <<EOF
<name>mapred.job.tracker</name>
<value>$MASTER_HOST:8021</value>
</property>
<property>
<name>mapred.output.compress</name>
<value>true</value>
</property>
<property>
<name>mapred.output.compression.type</name>
<value>BLOCK</value>
</property>
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec</value>
@ -154,14 +151,22 @@ cat > $HADOOP_HOME/conf/mapred-site.xml <<EOF
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
<property>
<name>mapred.map.output.compression.codec</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
<name>mapred.map.tasks.speculative.execution</name>
<value>false</value>
</property>
<property>
<name>mapred.child.java.opts</name>
<value>-Xmx512m -XX:+UseCompressedOops</value>
</property>
</configuration>
EOF
# Add JVM options
cat >> $HADOOP_HOME/conf/hadoop-env.sh <<EOF
export HADOOP_OPTS="$HADOOP_OPTS -XX:+UseCompressedOops"
EOF
# Update classpath to include HBase jars and config
cat >> $HADOOP_HOME/conf/hadoop-env.sh <<EOF
HADOOP_CLASSPATH="$HBASE_HOME/hbase-${HBASE_VERSION}.jar:$HBASE_HOME/lib/AgileJSON-2009-03-30.jar:$HBASE_HOME/lib/json.jar:$HBASE_HOME/lib/zookeeper-3.2.1.jar:$HBASE_HOME/conf"
export HADOOP_CLASSPATH="$HBASE_HOME/hbase-${HBASE_VERSION}.jar:$HBASE_HOME/lib/AgileJSON-2009-03-30.jar:$HBASE_HOME/lib/json.jar:$HBASE_HOME/lib/zookeeper-3.2.2.jar:$HBASE_HOME/conf"
EOF
# Configure Hadoop for Ganglia
cat > $HADOOP_HOME/conf/hadoop-metrics.properties <<EOF
@ -190,6 +195,10 @@ cat > $HBASE_HOME/conf/hbase-site.xml <<EOF
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.regions.server.count.min</name>
<value>$NUM_SLAVES</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>$ZOOKEEPER_QUORUM</value>
@ -224,10 +233,12 @@ cat > $HBASE_HOME/conf/hbase-site.xml <<EOF
</property>
</configuration>
EOF
# Copy over mapred configuration for jobs started with 'hbase ...'
cp $HADOOP_HOME/conf/mapred-site.xml $HBASE_HOME/conf/mapred-site.xml
# Override JVM options
cat >> $HBASE_HOME/conf/hbase-env.sh <<EOF
export HBASE_MASTER_OPTS="-Xmx1000m -XX:+UseConcMarkSweepGC -XX:+DoEscapeAnalysis -XX:+AggressiveOpts -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Xloggc:/mnt/hbase/logs/hbase-master-gc.log"
export HBASE_REGIONSERVER_OPTS="-Xmx2000m -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=88 -XX:NewSize=64m -XX:MaxNewSize=64m -XX:+DoEscapeAnalysis -XX:+AggressiveOpts -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Xloggc:/mnt/hbase/logs/hbase-regionserver-gc.log"
export HBASE_MASTER_OPTS="-Xmx1000m -XX:+UseCompressedOops -XX:+UseConcMarkSweepGC -XX:+DoEscapeAnalysis -XX:+AggressiveOpts -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/mnt/hbase/logs/hbase-master-gc.log"
export HBASE_REGIONSERVER_OPTS="-Xmx2000m -XX:+UseCompressedOops -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=88 -XX:NewSize=64m -XX:MaxNewSize=64m -XX:+DoEscapeAnalysis -XX:+AggressiveOpts -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/mnt/hbase/logs/hbase-regionserver-gc.log"
EOF
# Configure HBase for Ganglia
cat > $HBASE_HOME/conf/hadoop-metrics.properties <<EOF

View File

@ -52,7 +52,7 @@ fi
# Launch the HBase master
if ! "$bin"/launch-hbase-master $CLUSTER ; then
if ! "$bin"/launch-hbase-master $CLUSTER $SLAVES ; then
exit $?
fi

View File

@ -24,6 +24,13 @@ fi
CLUSTER=$1
if [ -z $2 ]; then
echo "Must specify the number of slaves to start."
exit 1
fi
NUM_SLAVES=$2
# Import variables
bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
@ -52,13 +59,14 @@ if [ ! -z "$MASTER_EC2_HOST" ]; then
fi
# Finding HBase image
[ -z "$AMI_IMAGE" ] && AMI_IMAGE=`ec2-describe-images $TOOL_OPTS -a | grep $S3_BUCKET | grep $HBASE_VERSION-$arch | grep available | awk '{print $2}'`
[ -z "$AMI_IMAGE" ] && AMI_IMAGE=`ec2-describe-images $TOOL_OPTS -a | grep $S3_BUCKET | grep hbase | grep $HBASE_VERSION-$arch | grep available | awk '{print $2}'`
# Start a master
echo "Starting master with AMI $AMI_IMAGE (arch $arch)"
# Substituting zookeeper quorum
ZOOKEEPER_QUORUM=`cat $ZOOKEEPER_QUORUM_PATH`
sed -e "s|%ZOOKEEPER_QUORUM%|$ZOOKEEPER_QUORUM|" \
-e "s|%NUM_SLAVES%|$NUM_SLAVES|" \
-e "s|%EXTRA_PACKAGES%|$EXTRA_PACKAGES|" \
"$bin"/$USER_DATA_FILE > "$bin"/$USER_DATA_FILE.master
INSTANCE=`ec2-run-instances $AMI_IMAGE $TOOL_OPTS -n 1 -g $CLUSTER_MASTER -k root -f "$bin"/$USER_DATA_FILE.master -t $type | grep INSTANCE | awk '{print $2}'`

View File

@ -29,7 +29,7 @@ if [ -z $2 ]; then
exit 1
fi
NO_INSTANCES=$2
NUM_SLAVES=$2
# Import variables
bin=`dirname "$0"`
@ -41,7 +41,7 @@ if [ ! -f $MASTER_IP_PATH ]; then
exit 1
fi
[ -z "$AMI_IMAGE" ] && AMI_IMAGE=`ec2-describe-images $TOOL_OPTS -a | grep $S3_BUCKET | grep $HBASE_VERSION-$SLAVE_ARCH |grep available | awk '{print $2}'`
[ -z "$AMI_IMAGE" ] && AMI_IMAGE=`ec2-describe-images $TOOL_OPTS -a | grep $S3_BUCKET | grep hbase | grep $HBASE_VERSION-$SLAVE_ARCH |grep available | awk '{print $2}'`
MASTER_HOST=`cat $MASTER_PRIVATE_IP_PATH`
MASTER_ZONE=`cat $MASTER_ZONE_PATH`
@ -49,12 +49,13 @@ ZOOKEEPER_QUORUM=`cat $ZOOKEEPER_QUORUM_PATH`
# Substituting master hostname and zookeeper quorum
sed -e "s|%MASTER_HOST%|$MASTER_HOST|" \
-e "s|%NUM_SLAVES%|$NUM_SLAVES|" \
-e "s|%ZOOKEEPER_QUORUM%|$ZOOKEEPER_QUORUM|" \
-e "s|%EXTRA_PACKAGES%|$EXTRA_PACKAGES|" \
"$bin"/$USER_DATA_FILE > "$bin"/$USER_DATA_FILE.slave
# Start slaves
echo "Starting $NO_INSTANCES AMI(s) with ID $AMI_IMAGE (arch $SLAVE_ARCH) in group $CLUSTER in zone $MASTER_ZONE"
ec2-run-instances $AMI_IMAGE $TOOL_OPTS -n "$NO_INSTANCES" -g "$CLUSTER" -k root -f "$bin"/$USER_DATA_FILE.slave -t "$SLAVE_INSTANCE_TYPE" -z "$MASTER_ZONE" | grep INSTANCE | awk '{print $2}'
echo "Starting $NUM_SLAVES AMI(s) with ID $AMI_IMAGE (arch $SLAVE_ARCH) in group $CLUSTER in zone $MASTER_ZONE"
ec2-run-instances $AMI_IMAGE $TOOL_OPTS -n "$NUM_SLAVES" -g "$CLUSTER" -k root -f "$bin"/$USER_DATA_FILE.slave -t "$SLAVE_INSTANCE_TYPE" -z "$MASTER_ZONE" | grep INSTANCE | awk '{print $2}'
rm "$bin"/$USER_DATA_FILE.slave