HBASE-2299 [EC2] mapreduce fixups for PE
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@920254 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3baabbdb84
commit
1ccce74312
|
@ -228,6 +228,7 @@ Release 0.21.0 - Unreleased
|
|||
HBASE-2293 CME in RegionManager#isMetaServer
|
||||
HBASE-2261 The javadoc in WhileMatchFilter and it's tests in TestFilter
|
||||
are not accurate/wrong
|
||||
HBASE-2299 [EC2] mapreduce fixups for PE
|
||||
|
||||
IMPROVEMENTS
|
||||
HBASE-1760 Cleanup TODOs in HTable
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
MASTER_HOST="%MASTER_HOST%"
|
||||
ZOOKEEPER_QUORUM="%ZOOKEEPER_QUORUM%"
|
||||
NUM_SLAVES="%NUM_SLAVES%"
|
||||
EXTRA_PACKAGES="%EXTRA_PACKAGES%"
|
||||
SECURITY_GROUPS=`wget -q -O - http://169.254.169.254/latest/meta-data/security-groups`
|
||||
IS_MASTER=`echo $SECURITY_GROUPS | awk '{ a = match ($0, "-master$"); if (a) print "true"; else print "false"; }'`
|
||||
|
@ -119,6 +120,10 @@ cat > $HADOOP_HOME/conf/hdfs-site.xml <<EOF
|
|||
<name>dfs.data.dir</name>
|
||||
<value>$DFS_DATA_DIR</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.replication</name>
|
||||
<value>3</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.datanode.handler.count</name>
|
||||
<value>10</value>
|
||||
|
@ -137,14 +142,6 @@ cat > $HADOOP_HOME/conf/mapred-site.xml <<EOF
|
|||
<name>mapred.job.tracker</name>
|
||||
<value>$MASTER_HOST:8021</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapred.output.compress</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapred.output.compression.type</name>
|
||||
<value>BLOCK</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>io.compression.codecs</name>
|
||||
<value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec</value>
|
||||
|
@ -154,14 +151,22 @@ cat > $HADOOP_HOME/conf/mapred-site.xml <<EOF
|
|||
<value>com.hadoop.compression.lzo.LzoCodec</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapred.map.output.compression.codec</name>
|
||||
<value>com.hadoop.compression.lzo.LzoCodec</value>
|
||||
<name>mapred.map.tasks.speculative.execution</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapred.child.java.opts</name>
|
||||
<value>-Xmx512m -XX:+UseCompressedOops</value>
|
||||
</property>
|
||||
</configuration>
|
||||
EOF
|
||||
# Add JVM options
|
||||
cat >> $HADOOP_HOME/conf/hadoop-env.sh <<EOF
|
||||
export HADOOP_OPTS="$HADOOP_OPTS -XX:+UseCompressedOops"
|
||||
EOF
|
||||
# Update classpath to include HBase jars and config
|
||||
cat >> $HADOOP_HOME/conf/hadoop-env.sh <<EOF
|
||||
HADOOP_CLASSPATH="$HBASE_HOME/hbase-${HBASE_VERSION}.jar:$HBASE_HOME/lib/AgileJSON-2009-03-30.jar:$HBASE_HOME/lib/json.jar:$HBASE_HOME/lib/zookeeper-3.2.1.jar:$HBASE_HOME/conf"
|
||||
export HADOOP_CLASSPATH="$HBASE_HOME/hbase-${HBASE_VERSION}.jar:$HBASE_HOME/lib/AgileJSON-2009-03-30.jar:$HBASE_HOME/lib/json.jar:$HBASE_HOME/lib/zookeeper-3.2.2.jar:$HBASE_HOME/conf"
|
||||
EOF
|
||||
# Configure Hadoop for Ganglia
|
||||
cat > $HADOOP_HOME/conf/hadoop-metrics.properties <<EOF
|
||||
|
@ -190,6 +195,10 @@ cat > $HBASE_HOME/conf/hbase-site.xml <<EOF
|
|||
<name>hbase.cluster.distributed</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.regions.server.count.min</name>
|
||||
<value>$NUM_SLAVES</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.zookeeper.quorum</name>
|
||||
<value>$ZOOKEEPER_QUORUM</value>
|
||||
|
@ -224,10 +233,12 @@ cat > $HBASE_HOME/conf/hbase-site.xml <<EOF
|
|||
</property>
|
||||
</configuration>
|
||||
EOF
|
||||
# Copy over mapred configuration for jobs started with 'hbase ...'
|
||||
cp $HADOOP_HOME/conf/mapred-site.xml $HBASE_HOME/conf/mapred-site.xml
|
||||
# Override JVM options
|
||||
cat >> $HBASE_HOME/conf/hbase-env.sh <<EOF
|
||||
export HBASE_MASTER_OPTS="-Xmx1000m -XX:+UseConcMarkSweepGC -XX:+DoEscapeAnalysis -XX:+AggressiveOpts -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Xloggc:/mnt/hbase/logs/hbase-master-gc.log"
|
||||
export HBASE_REGIONSERVER_OPTS="-Xmx2000m -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=88 -XX:NewSize=64m -XX:MaxNewSize=64m -XX:+DoEscapeAnalysis -XX:+AggressiveOpts -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Xloggc:/mnt/hbase/logs/hbase-regionserver-gc.log"
|
||||
export HBASE_MASTER_OPTS="-Xmx1000m -XX:+UseCompressedOops -XX:+UseConcMarkSweepGC -XX:+DoEscapeAnalysis -XX:+AggressiveOpts -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/mnt/hbase/logs/hbase-master-gc.log"
|
||||
export HBASE_REGIONSERVER_OPTS="-Xmx2000m -XX:+UseCompressedOops -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=88 -XX:NewSize=64m -XX:MaxNewSize=64m -XX:+DoEscapeAnalysis -XX:+AggressiveOpts -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/mnt/hbase/logs/hbase-regionserver-gc.log"
|
||||
EOF
|
||||
# Configure HBase for Ganglia
|
||||
cat > $HBASE_HOME/conf/hadoop-metrics.properties <<EOF
|
||||
|
|
|
@ -52,7 +52,7 @@ fi
|
|||
|
||||
# Launch the HBase master
|
||||
|
||||
if ! "$bin"/launch-hbase-master $CLUSTER ; then
|
||||
if ! "$bin"/launch-hbase-master $CLUSTER $SLAVES ; then
|
||||
exit $?
|
||||
fi
|
||||
|
||||
|
|
|
@ -24,6 +24,13 @@ fi
|
|||
|
||||
CLUSTER=$1
|
||||
|
||||
if [ -z $2 ]; then
|
||||
echo "Must specify the number of slaves to start."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
NUM_SLAVES=$2
|
||||
|
||||
# Import variables
|
||||
bin=`dirname "$0"`
|
||||
bin=`cd "$bin"; pwd`
|
||||
|
@ -52,13 +59,14 @@ if [ ! -z "$MASTER_EC2_HOST" ]; then
|
|||
fi
|
||||
|
||||
# Finding HBase image
|
||||
[ -z "$AMI_IMAGE" ] && AMI_IMAGE=`ec2-describe-images $TOOL_OPTS -a | grep $S3_BUCKET | grep $HBASE_VERSION-$arch | grep available | awk '{print $2}'`
|
||||
[ -z "$AMI_IMAGE" ] && AMI_IMAGE=`ec2-describe-images $TOOL_OPTS -a | grep $S3_BUCKET | grep hbase | grep $HBASE_VERSION-$arch | grep available | awk '{print $2}'`
|
||||
|
||||
# Start a master
|
||||
echo "Starting master with AMI $AMI_IMAGE (arch $arch)"
|
||||
# Substituting zookeeper quorum
|
||||
ZOOKEEPER_QUORUM=`cat $ZOOKEEPER_QUORUM_PATH`
|
||||
sed -e "s|%ZOOKEEPER_QUORUM%|$ZOOKEEPER_QUORUM|" \
|
||||
-e "s|%NUM_SLAVES%|$NUM_SLAVES|" \
|
||||
-e "s|%EXTRA_PACKAGES%|$EXTRA_PACKAGES|" \
|
||||
"$bin"/$USER_DATA_FILE > "$bin"/$USER_DATA_FILE.master
|
||||
INSTANCE=`ec2-run-instances $AMI_IMAGE $TOOL_OPTS -n 1 -g $CLUSTER_MASTER -k root -f "$bin"/$USER_DATA_FILE.master -t $type | grep INSTANCE | awk '{print $2}'`
|
||||
|
|
|
@ -29,7 +29,7 @@ if [ -z $2 ]; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
NO_INSTANCES=$2
|
||||
NUM_SLAVES=$2
|
||||
|
||||
# Import variables
|
||||
bin=`dirname "$0"`
|
||||
|
@ -41,7 +41,7 @@ if [ ! -f $MASTER_IP_PATH ]; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
[ -z "$AMI_IMAGE" ] && AMI_IMAGE=`ec2-describe-images $TOOL_OPTS -a | grep $S3_BUCKET | grep $HBASE_VERSION-$SLAVE_ARCH |grep available | awk '{print $2}'`
|
||||
[ -z "$AMI_IMAGE" ] && AMI_IMAGE=`ec2-describe-images $TOOL_OPTS -a | grep $S3_BUCKET | grep hbase | grep $HBASE_VERSION-$SLAVE_ARCH |grep available | awk '{print $2}'`
|
||||
|
||||
MASTER_HOST=`cat $MASTER_PRIVATE_IP_PATH`
|
||||
MASTER_ZONE=`cat $MASTER_ZONE_PATH`
|
||||
|
@ -49,12 +49,13 @@ ZOOKEEPER_QUORUM=`cat $ZOOKEEPER_QUORUM_PATH`
|
|||
|
||||
# Substituting master hostname and zookeeper quorum
|
||||
sed -e "s|%MASTER_HOST%|$MASTER_HOST|" \
|
||||
-e "s|%NUM_SLAVES%|$NUM_SLAVES|" \
|
||||
-e "s|%ZOOKEEPER_QUORUM%|$ZOOKEEPER_QUORUM|" \
|
||||
-e "s|%EXTRA_PACKAGES%|$EXTRA_PACKAGES|" \
|
||||
"$bin"/$USER_DATA_FILE > "$bin"/$USER_DATA_FILE.slave
|
||||
|
||||
# Start slaves
|
||||
echo "Starting $NO_INSTANCES AMI(s) with ID $AMI_IMAGE (arch $SLAVE_ARCH) in group $CLUSTER in zone $MASTER_ZONE"
|
||||
ec2-run-instances $AMI_IMAGE $TOOL_OPTS -n "$NO_INSTANCES" -g "$CLUSTER" -k root -f "$bin"/$USER_DATA_FILE.slave -t "$SLAVE_INSTANCE_TYPE" -z "$MASTER_ZONE" | grep INSTANCE | awk '{print $2}'
|
||||
echo "Starting $NUM_SLAVES AMI(s) with ID $AMI_IMAGE (arch $SLAVE_ARCH) in group $CLUSTER in zone $MASTER_ZONE"
|
||||
ec2-run-instances $AMI_IMAGE $TOOL_OPTS -n "$NUM_SLAVES" -g "$CLUSTER" -k root -f "$bin"/$USER_DATA_FILE.slave -t "$SLAVE_INSTANCE_TYPE" -z "$MASTER_ZONE" | grep INSTANCE | awk '{print $2}'
|
||||
|
||||
rm "$bin"/$USER_DATA_FILE.slave
|
||||
|
|
Loading…
Reference in New Issue