hbase/dev-support/hbase_nightly_pseudo-distri...

536 lines
20 KiB
Bash
Executable File

#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
set -e
function usage {
echo "Usage: ${0} [options] /path/to/component/bin-install /path/to/hadoop/executable /path/to/share/hadoop/yarn/timelineservice /path/to/hadoop/hadoop-yarn-server-tests-tests.jar /path/to/hadoop/hadoop-mapreduce-client-jobclient-tests.jar /path/to/mapred/executable"
echo ""
echo " --zookeeper-data /path/to/use Where the embedded zookeeper instance should write its data."
echo " defaults to 'zk-data' in the working-dir."
echo " --working-dir /path/to/use Path for writing configs and logs. must exist."
echo " defaults to making a directory via mktemp."
echo " --hadoop-client-classpath /path/to/some.jar:/path/to/another.jar classpath for hadoop jars."
echo " defaults to 'hadoop classpath'"
echo " --hbase-client-install /path/to/unpacked/client/tarball if given we'll look here for hbase client jars instead of the bin-install"
echo " --force-data-clean Delete all data in HDFS and ZK prior to starting up hbase"
echo " --single-process Run as single process instead of pseudo-distributed"
echo ""
exit 1
}
# if no args specified, show usage
if [ $# -lt 5 ]; then
usage
fi
# Get arguments
declare component_install
declare hadoop_exec
declare working_dir
declare zk_data_dir
declare clean
declare distributed="true"
declare hadoop_jars
declare hbase_client
while [ $# -gt 0 ]
do
case "$1" in
--working-dir) shift; working_dir=$1; shift;;
--force-data-clean) shift; clean="true";;
--zookeeper-data) shift; zk_data_dir=$1; shift;;
--single-process) shift; distributed="false";;
--hadoop-client-classpath) shift; hadoop_jars="$1"; shift;;
--hbase-client-install) shift; hbase_client="$1"; shift;;
--) shift; break;;
-*) usage ;;
*) break;; # terminate while loop
esac
done
# should still have where component checkout is.
if [ $# -lt 5 ]; then
usage
fi
component_install="$(cd "$(dirname "$1")"; pwd)/$(basename "$1")"
hadoop_exec="$(cd "$(dirname "$2")"; pwd)/$(basename "$2")"
timeline_service_dir="$(cd "$(dirname "$3")"; pwd)/$(basename "$3")"
yarn_server_tests_test_jar="$(cd "$(dirname "$4")"; pwd)/$(basename "$4")"
mapred_jobclient_test_jar="$(cd "$(dirname "$5")"; pwd)/$(basename "$5")"
mapred_exec="$(cd "$(dirname "$6")"; pwd)/$(basename "$6")"
if [ ! -x "${hadoop_exec}" ]; then
echo "hadoop cli does not appear to be executable." >&2
exit 1
fi
if [ ! -x "${mapred_exec}" ]; then
echo "mapred cli does not appear to be executable." >&2
exit 1
fi
if [ ! -d "${component_install}" ]; then
echo "Path to HBase binary install should be a directory." >&2
exit 1
fi
if [ ! -f "${yarn_server_tests_test_jar}" ]; then
echo "Specified YARN server tests test jar is not a file." >&2
exit 1
fi
if [ ! -f "${mapred_jobclient_test_jar}" ]; then
echo "Specified MapReduce jobclient test jar is not a file." >&2
exit 1
fi
if [ -z "${working_dir}" ]; then
if ! working_dir="$(mktemp -d -t hbase-pseudo-dist-test)" ; then
echo "Failed to create temporary working directory. Please specify via --working-dir" >&2
exit 1
fi
else
# absolutes please
working_dir="$(cd "$(dirname "${working_dir}")"; pwd)/$(basename "${working_dir}")"
if [ ! -d "${working_dir}" ]; then
echo "passed working directory '${working_dir}' must already exist." >&2
exit 1
fi
fi
if [ -z "${zk_data_dir}" ]; then
zk_data_dir="${working_dir}/zk-data"
mkdir "${zk_data_dir}"
else
# absolutes please
zk_data_dir="$(cd "$(dirname "${zk_data_dir}")"; pwd)/$(basename "${zk_data_dir}")"
if [ ! -d "${zk_data_dir}" ]; then
echo "passed directory for unpacking the source tarball '${zk_data_dir}' must already exist."
exit 1
fi
fi
if [ -z "${hbase_client}" ]; then
hbase_client="${component_install}"
else
echo "Using HBase client-side artifact"
# absolutes please
hbase_client="$(cd "$(dirname "${hbase_client}")"; pwd)/$(basename "${hbase_client}")"
if [ ! -d "${hbase_client}" ]; then
echo "If given hbase client install should be a directory with contents of the client tarball." >&2
exit 1
fi
fi
if [ -n "${hadoop_jars}" ]; then
declare -a tmp_jars
for entry in $(echo "${hadoop_jars}" | tr ':' '\n'); do
tmp_jars=("${tmp_jars[@]}" "$(cd "$(dirname "${entry}")"; pwd)/$(basename "${entry}")")
done
hadoop_jars="$(IFS=:; echo "${tmp_jars[*]}")"
fi
echo "You'll find logs and temp files in ${working_dir}"
function redirect_and_run {
log_base=$1
shift
echo "$*" >"${log_base}.err"
"$@" >"${log_base}.out" 2>>"${log_base}.err"
}
(cd "${working_dir}"
echo "Hadoop version information:"
"${hadoop_exec}" version
hadoop_version=$("${hadoop_exec}" version | head -n 1)
hadoop_version="${hadoop_version#Hadoop }"
if [ "${hadoop_version%.*.*}" -gt 2 ]; then
"${hadoop_exec}" envvars
else
echo "JAVA_HOME: ${JAVA_HOME}"
fi
# Ensure that if some other Hadoop install happens to be present in the environment we ignore it.
HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP="true"
export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP
if [ -n "${clean}" ]; then
echo "Cleaning out ZooKeeper..."
rm -rf "${zk_data_dir:?}/*"
fi
echo "HBase version information:"
"${component_install}/bin/hbase" version 2>/dev/null
hbase_version=$("${component_install}/bin/hbase" version | head -n 1 2>/dev/null)
hbase_version="${hbase_version#HBase }"
if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-mapreduce-${hbase_version}.jar" ]; then
echo "HBase binary install doesn't appear to include a shaded mapreduce artifact." >&2
exit 1
fi
if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-client-${hbase_version}.jar" ]; then
echo "HBase binary install doesn't appear to include a shaded client artifact." >&2
exit 1
fi
if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar" ]; then
echo "HBase binary install doesn't appear to include a shaded client artifact." >&2
exit 1
fi
echo "Writing out configuration for HBase."
rm -rf "${working_dir}/hbase-conf"
mkdir "${working_dir}/hbase-conf"
if [ -f "${component_install}/conf/log4j.properties" ]; then
cp "${component_install}/conf/log4j.properties" "${working_dir}/hbase-conf/log4j.properties"
else
cat >"${working_dir}/hbase-conf/log4j.properties" <<EOF
# Define some default values that can be overridden by system properties
hbase.root.logger=INFO,console
# Define the root logger to the system property "hbase.root.logger".
log4j.rootLogger=${hbase.root.logger}
# Logging Threshold
log4j.threshold=ALL
# console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %.1000m%n
EOF
fi
cat >"${working_dir}/hbase-conf/hbase-site.xml" <<EOF
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-->
<configuration>
<property>
<name>hbase.rootdir</name>
<!-- We rely on the defaultFS being set in our hadoop confs -->
<value>/hbase</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>${zk_data_dir}</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>${distributed}</value>
</property>
</configuration>
EOF
if [ "true" = "${distributed}" ]; then
cat >"${working_dir}/hbase-conf/regionservers" <<EOF
localhost
EOF
fi
function cleanup {
echo "Shutting down HBase"
HBASE_CONF_DIR="${working_dir}/hbase-conf/" "${component_install}/bin/stop-hbase.sh"
if [ -f "${working_dir}/hadoop.pid" ]; then
echo "Shutdown: listing HDFS contents"
redirect_and_run "${working_dir}/hadoop_listing_at_end" \
"${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -ls -R /
echo "Shutting down Hadoop"
kill -6 "$(cat "${working_dir}/hadoop.pid")"
fi
}
trap cleanup EXIT SIGQUIT
echo "Starting up Hadoop"
if [ "${hadoop_version%.*.*}" -gt 2 ]; then
"${mapred_exec}" minicluster -format -writeConfig "${working_dir}/hbase-conf/core-site.xml" -writeDetails "${working_dir}/hadoop_cluster_info.json" >"${working_dir}/hadoop_cluster_command.out" 2>"${working_dir}/hadoop_cluster_command.err" &
else
HADOOP_CLASSPATH="${timeline_service_dir}/*:${timeline_service_dir}/lib/*:${yarn_server_tests_test_jar}" "${hadoop_exec}" jar "${mapred_jobclient_test_jar}" minicluster -format -writeConfig "${working_dir}/hbase-conf/core-site.xml" -writeDetails "${working_dir}/hadoop_cluster_info.json" >"${working_dir}/hadoop_cluster_command.out" 2>"${working_dir}/hadoop_cluster_command.err" &
fi
echo "$!" > "${working_dir}/hadoop.pid"
# 2 + 4 + 8 + .. + 256 ~= 8.5 minutes.
max_sleep_time=512
sleep_time=2
until [[ -s "${working_dir}/hbase-conf/core-site.xml" || "${sleep_time}" -ge "${max_sleep_time}" ]]; do
printf '\twaiting for Hadoop to finish starting up.\n'
sleep "${sleep_time}"
sleep_time="$((sleep_time*2))"
done
if [ "${sleep_time}" -ge "${max_sleep_time}" ] ; then
echo "time out waiting for Hadoop to startup" >&2
exit 1
fi
if [ "${hadoop_version%.*.*}" -gt 2 ]; then
echo "Verifying configs"
"${hadoop_exec}" --config "${working_dir}/hbase-conf/" conftest
fi
if [ -n "${clean}" ]; then
echo "Cleaning out HDFS..."
"${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r /hbase
"${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r example/
"${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r example-region-listing.data
fi
echo "Listing HDFS contents"
redirect_and_run "${working_dir}/hadoop_cluster_smoke" \
"${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -ls -R /
echo "Starting up HBase"
HBASE_CONF_DIR="${working_dir}/hbase-conf/" "${component_install}/bin/start-hbase.sh"
sleep_time=2
until "${component_install}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/waiting_hbase_startup.log" 2>&1 <<EOF
count 'hbase:meta'
EOF
do
printf '\tretry waiting for hbase to come up.\n'
sleep "${sleep_time}"
sleep_time="$((sleep_time*2))"
done
echo "Setting up table 'test:example' with 1,000 regions"
"${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/table_create.log" 2>&1 <<EOF
create_namespace 'test'
create 'test:example', 'family1', 'family2', {NUMREGIONS => 1000, SPLITALGO => 'UniformSplit'}
EOF
echo "writing out example TSV to example.tsv"
cat >"${working_dir}/example.tsv" <<EOF
row1 value8 value8
row3 value2
row2 value9
row10 value1
pow1 value8 value8
pow3 value2
pow2 value9
pow10 value1
paw1 value8 value8
paw3 value2
paw2 value9
paw10 value1
raw1 value8 value8
raw3 value2
raw2 value9
raw10 value1
aow1 value8 value8
aow3 value2
aow2 value9
aow10 value1
aaw1 value8 value8
aaw3 value2
aaw2 value9
aaw10 value1
how1 value8 value8
how3 value2
how2 value9
how10 value1
zow1 value8 value8
zow3 value2
zow2 value9
zow10 value1
zaw1 value8 value8
zaw3 value2
zaw2 value9
zaw10 value1
haw1 value8 value8
haw3 value2
haw2 value9
haw10 value1
low1 value8 value8
low3 value2
low2 value9
low10 value1
law1 value8 value8
law3 value2
law2 value9
law10 value1
EOF
echo "uploading example.tsv to HDFS"
"${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -mkdir example
"${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -copyFromLocal "${working_dir}/example.tsv" "example/"
echo "Importing TSV via shaded client artifact for HBase - MapReduce integration."
# hbase_thirdparty_jars=("${component_install}"/lib/htrace-core4*.jar \
# "${component_install}"/lib/slf4j-api-*.jar \
# "${component_install}"/lib/commons-logging-*.jar \
# "${component_install}"/lib/slf4j-log4j12-*.jar \
# "${component_install}"/lib/log4j-1.2.*.jar \
# "${working_dir}/hbase-conf/log4j.properties")
# hbase_dep_classpath=$(IFS=:; echo "${hbase_thirdparty_jars[*]}")
hbase_dep_classpath="$("${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" mapredcp)"
HADOOP_CLASSPATH="${hbase_dep_classpath}" redirect_and_run "${working_dir}/mr-importtsv" \
"${hadoop_exec}" --config "${working_dir}/hbase-conf/" jar "${hbase_client}/lib/shaded-clients/hbase-shaded-mapreduce-${hbase_version}.jar" importtsv -Dimporttsv.columns=HBASE_ROW_KEY,family1:column1,family1:column4,family1:column3 test:example example/ -libjars "${hbase_dep_classpath}"
"${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/scan_import.out" 2>"${working_dir}/scan_import.err" <<EOF
scan 'test:example'
EOF
echo "Verifying row count from import."
import_rowcount=$(echo 'count "test:example"' | "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive 2>/dev/null | tail -n 1)
if [ ! "${import_rowcount}" -eq 48 ]; then
echo "ERROR: Instead of finding 48 rows, we found ${import_rowcount}."
exit 2
fi
if [ -z "${hadoop_jars}" ]; then
echo "Hadoop client jars not given; getting them from 'hadoop classpath' for the example."
hadoop_jars=$("${hadoop_exec}" --config "${working_dir}/hbase-conf/" classpath)
fi
echo "Building shaded client example."
cat >"${working_dir}/HBaseClientReadWriteExample.java" <<EOF
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellBuilder;
import org.apache.hadoop.hbase.CellBuilderFactory;
import org.apache.hadoop.hbase.CellBuilderType;
import org.apache.hadoop.hbase.ClusterMetrics;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.RegionMetrics;
import org.apache.hadoop.hbase.ServerMetrics;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
import java.util.LinkedList;
import java.util.List;
public class HBaseClientReadWriteExample {
private static final byte[] FAMILY_BYTES = Bytes.toBytes("family2");
public static void main(String[] args) throws Exception {
Configuration hbase = HBaseConfiguration.create();
Configuration hadoop = new Configuration();
try (Connection connection = ConnectionFactory.createConnection(hbase)) {
System.out.println("Generating list of regions");
final List<String> regions = new LinkedList<>();
try (Admin admin = connection.getAdmin()) {
final ClusterMetrics cluster = admin.getClusterMetrics();
System.out.println(String.format("\tCluster reports version %s, ave load %f, region count %d", cluster.getHBaseVersion(), cluster.getAverageLoad(), cluster.getRegionCount()));
for (ServerMetrics server : cluster.getLiveServerMetrics().values()) {
for (RegionMetrics region : server.getRegionMetrics().values()) {
regions.add(region.getNameAsString());
}
}
}
final Path listing = new Path("example-region-listing.data");
System.out.println("Writing list to HDFS");
try (FileSystem fs = FileSystem.newInstance(hadoop)) {
final Path path = fs.makeQualified(listing);
try (FSDataOutputStream out = fs.create(path)) {
out.writeInt(regions.size());
for (String region : regions) {
out.writeUTF(region);
}
out.hsync();
}
}
final List<Put> puts = new LinkedList<>();
final Put marker = new Put(new byte[] { (byte)0 });
System.out.println("Reading list from HDFS");
try (FileSystem fs = FileSystem.newInstance(hadoop)) {
final Path path = fs.makeQualified(listing);
final CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY);
try (FSDataInputStream in = fs.open(path)) {
final int count = in.readInt();
marker.addColumn(FAMILY_BYTES, Bytes.toBytes("count"), Bytes.toBytes(count));
for(int i = 0; i < count; i++) {
builder.clear();
final byte[] row = Bytes.toBytes(in.readUTF());
final Put put = new Put(row);
builder.setRow(row);
builder.setFamily(FAMILY_BYTES);
builder.setType(Cell.Type.Put);
put.add(builder.build());
puts.add(put);
}
}
}
System.out.println("Writing list into HBase table");
try (Table table = connection.getTable(TableName.valueOf("test:example"))) {
table.put(marker);
table.put(puts);
}
}
}
}
EOF
redirect_and_run "${working_dir}/hbase-shaded-client-compile" \
javac -cp "${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar:${hadoop_jars}" "${working_dir}/HBaseClientReadWriteExample.java"
echo "Running shaded client example. It'll fetch the set of regions, round-trip them to a file in HDFS, then write them one-per-row into the test table."
# The order of classpath entries here is important. if we're using non-shaded Hadoop 3 / 2.9.0 jars, we have to work around YARN-2190.
redirect_and_run "${working_dir}/hbase-shaded-client-example" \
java -cp "${working_dir}/hbase-conf/:${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar:${hbase_dep_classpath}:${working_dir}:${hadoop_jars}" HBaseClientReadWriteExample
echo "Checking on results of example program."
"${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -copyToLocal "example-region-listing.data" "${working_dir}/example-region-listing.data"
"${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/scan_example.out" 2>"${working_dir}/scan_example.err" <<EOF
scan 'test:example'
EOF
echo "Verifying row count from example."
example_rowcount=$(echo 'count "test:example"' | "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive 2>/dev/null | tail -n 1)
if [ "${example_rowcount}" -gt "1050" ]; then
echo "Found ${example_rowcount} rows, which is enough to cover 48 for import, 1000 example's use of user table regions, 2 for example's use of meta/namespace regions, and 1 for example's count record"
else
echo "ERROR: Only found ${example_rowcount} rows."
fi
)