From 1ccce74312adc0eeadce8e05d9116cbd91c51cea Mon Sep 17 00:00:00 2001 From: Andrew Kyle Purtell Date: Mon, 8 Mar 2010 09:54:03 +0000 Subject: [PATCH] HBASE-2299 [EC2] mapreduce fixups for PE git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@920254 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + contrib/ec2/bin/hbase-ec2-init-remote.sh | 37 +++++++++++++++--------- contrib/ec2/bin/launch-hbase-cluster | 2 +- contrib/ec2/bin/launch-hbase-master | 10 ++++++- contrib/ec2/bin/launch-hbase-slaves | 9 +++--- 5 files changed, 40 insertions(+), 19 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index e05837d5a18..829badbbe36 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -228,6 +228,7 @@ Release 0.21.0 - Unreleased HBASE-2293 CME in RegionManager#isMetaServer HBASE-2261 The javadoc in WhileMatchFilter and it's tests in TestFilter are not accurate/wrong + HBASE-2299 [EC2] mapreduce fixups for PE IMPROVEMENTS HBASE-1760 Cleanup TODOs in HTable diff --git a/contrib/ec2/bin/hbase-ec2-init-remote.sh b/contrib/ec2/bin/hbase-ec2-init-remote.sh index 915dbfbd1be..c0782a4985d 100644 --- a/contrib/ec2/bin/hbase-ec2-init-remote.sh +++ b/contrib/ec2/bin/hbase-ec2-init-remote.sh @@ -5,6 +5,7 @@ MASTER_HOST="%MASTER_HOST%" ZOOKEEPER_QUORUM="%ZOOKEEPER_QUORUM%" +NUM_SLAVES="%NUM_SLAVES%" EXTRA_PACKAGES="%EXTRA_PACKAGES%" SECURITY_GROUPS=`wget -q -O - http://169.254.169.254/latest/meta-data/security-groups` IS_MASTER=`echo $SECURITY_GROUPS | awk '{ a = match ($0, "-master$"); if (a) print "true"; else print "false"; }'` @@ -119,6 +120,10 @@ cat > $HADOOP_HOME/conf/hdfs-site.xml <dfs.data.dir $DFS_DATA_DIR + + dfs.replication + 3 + dfs.datanode.handler.count 10 @@ -137,14 +142,6 @@ cat > $HADOOP_HOME/conf/mapred-site.xml <mapred.job.tracker $MASTER_HOST:8021 - - mapred.output.compress - true - - - mapred.output.compression.type - BLOCK - io.compression.codecs org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec @@ -154,14 +151,22 @@ cat > $HADOOP_HOME/conf/mapred-site.xml <com.hadoop.compression.lzo.LzoCodec - mapred.map.output.compression.codec - com.hadoop.compression.lzo.LzoCodec + mapred.map.tasks.speculative.execution + false + + + mapred.child.java.opts + -Xmx512m -XX:+UseCompressedOops EOF +# Add JVM options +cat >> $HADOOP_HOME/conf/hadoop-env.sh <> $HADOOP_HOME/conf/hadoop-env.sh < $HADOOP_HOME/conf/hadoop-metrics.properties < $HBASE_HOME/conf/hbase-site.xml <hbase.cluster.distributed true + + hbase.regions.server.count.min + $NUM_SLAVES + hbase.zookeeper.quorum $ZOOKEEPER_QUORUM @@ -224,10 +233,12 @@ cat > $HBASE_HOME/conf/hbase-site.xml < EOF +# Copy over mapred configuration for jobs started with 'hbase ...' +cp $HADOOP_HOME/conf/mapred-site.xml $HBASE_HOME/conf/mapred-site.xml # Override JVM options cat >> $HBASE_HOME/conf/hbase-env.sh < $HBASE_HOME/conf/hadoop-metrics.properties < "$bin"/$USER_DATA_FILE.master INSTANCE=`ec2-run-instances $AMI_IMAGE $TOOL_OPTS -n 1 -g $CLUSTER_MASTER -k root -f "$bin"/$USER_DATA_FILE.master -t $type | grep INSTANCE | awk '{print $2}'` diff --git a/contrib/ec2/bin/launch-hbase-slaves b/contrib/ec2/bin/launch-hbase-slaves index d36e300664b..4e70a8d5503 100755 --- a/contrib/ec2/bin/launch-hbase-slaves +++ b/contrib/ec2/bin/launch-hbase-slaves @@ -29,7 +29,7 @@ if [ -z $2 ]; then exit 1 fi -NO_INSTANCES=$2 +NUM_SLAVES=$2 # Import variables bin=`dirname "$0"` @@ -41,7 +41,7 @@ if [ ! -f $MASTER_IP_PATH ]; then exit 1 fi -[ -z "$AMI_IMAGE" ] && AMI_IMAGE=`ec2-describe-images $TOOL_OPTS -a | grep $S3_BUCKET | grep $HBASE_VERSION-$SLAVE_ARCH |grep available | awk '{print $2}'` +[ -z "$AMI_IMAGE" ] && AMI_IMAGE=`ec2-describe-images $TOOL_OPTS -a | grep $S3_BUCKET | grep hbase | grep $HBASE_VERSION-$SLAVE_ARCH |grep available | awk '{print $2}'` MASTER_HOST=`cat $MASTER_PRIVATE_IP_PATH` MASTER_ZONE=`cat $MASTER_ZONE_PATH` @@ -49,12 +49,13 @@ ZOOKEEPER_QUORUM=`cat $ZOOKEEPER_QUORUM_PATH` # Substituting master hostname and zookeeper quorum sed -e "s|%MASTER_HOST%|$MASTER_HOST|" \ + -e "s|%NUM_SLAVES%|$NUM_SLAVES|" \ -e "s|%ZOOKEEPER_QUORUM%|$ZOOKEEPER_QUORUM|" \ -e "s|%EXTRA_PACKAGES%|$EXTRA_PACKAGES|" \ "$bin"/$USER_DATA_FILE > "$bin"/$USER_DATA_FILE.slave # Start slaves -echo "Starting $NO_INSTANCES AMI(s) with ID $AMI_IMAGE (arch $SLAVE_ARCH) in group $CLUSTER in zone $MASTER_ZONE" -ec2-run-instances $AMI_IMAGE $TOOL_OPTS -n "$NO_INSTANCES" -g "$CLUSTER" -k root -f "$bin"/$USER_DATA_FILE.slave -t "$SLAVE_INSTANCE_TYPE" -z "$MASTER_ZONE" | grep INSTANCE | awk '{print $2}' +echo "Starting $NUM_SLAVES AMI(s) with ID $AMI_IMAGE (arch $SLAVE_ARCH) in group $CLUSTER in zone $MASTER_ZONE" +ec2-run-instances $AMI_IMAGE $TOOL_OPTS -n "$NUM_SLAVES" -g "$CLUSTER" -k root -f "$bin"/$USER_DATA_FILE.slave -t "$SLAVE_INSTANCE_TYPE" -z "$MASTER_ZONE" | grep INSTANCE | awk '{print $2}' rm "$bin"/$USER_DATA_FILE.slave