#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. set -e function usage { echo "Usage: ${0} [options] /path/to/component/bin-install /path/to/hadoop/executable /path/to/share/hadoop/yarn/timelineservice /path/to/hadoop/hadoop-yarn-server-tests-tests.jar /path/to/hadoop/hadoop-mapreduce-client-jobclient-tests.jar /path/to/mapred/executable" echo "" echo " --zookeeper-data /path/to/use Where the embedded zookeeper instance should write its data." echo " defaults to 'zk-data' in the working-dir." echo " --working-dir /path/to/use Path for writing configs and logs. must exist." echo " defaults to making a directory via mktemp." echo " --hadoop-client-classpath /path/to/some.jar:/path/to/another.jar classpath for hadoop jars." echo " defaults to 'hadoop classpath'" echo " --hbase-client-install /path/to/unpacked/client/tarball if given we'll look here for hbase client jars instead of the bin-install" echo " --force-data-clean Delete all data in HDFS and ZK prior to starting up hbase" echo " --single-process Run as single process instead of pseudo-distributed" echo "" exit 1 } # if no args specified, show usage if [ $# -lt 5 ]; then usage fi # Get arguments declare component_install declare hadoop_exec declare working_dir declare zk_data_dir declare clean declare distributed="true" declare hadoop_jars declare hbase_client while [ $# -gt 0 ] do case "$1" in --working-dir) shift; working_dir=$1; shift;; --force-data-clean) shift; clean="true";; --zookeeper-data) shift; zk_data_dir=$1; shift;; --single-process) shift; distributed="false";; --hadoop-client-classpath) shift; hadoop_jars="$1"; shift;; --hbase-client-install) shift; hbase_client="$1"; shift;; --) shift; break;; -*) usage ;; *) break;; # terminate while loop esac done # should still have where component checkout is. if [ $# -lt 5 ]; then usage fi component_install="$(cd "$(dirname "$1")"; pwd)/$(basename "$1")" hadoop_exec="$(cd "$(dirname "$2")"; pwd)/$(basename "$2")" timeline_service_dir="$(cd "$(dirname "$3")"; pwd)/$(basename "$3")" yarn_server_tests_test_jar="$(cd "$(dirname "$4")"; pwd)/$(basename "$4")" mapred_jobclient_test_jar="$(cd "$(dirname "$5")"; pwd)/$(basename "$5")" mapred_exec="$(cd "$(dirname "$6")"; pwd)/$(basename "$6")" if [ ! -x "${hadoop_exec}" ]; then echo "hadoop cli does not appear to be executable." >&2 exit 1 fi if [ ! -x "${mapred_exec}" ]; then echo "mapred cli does not appear to be executable." >&2 exit 1 fi if [ ! -d "${component_install}" ]; then echo "Path to HBase binary install should be a directory." >&2 exit 1 fi if [ ! -f "${yarn_server_tests_test_jar}" ]; then echo "Specified YARN server tests test jar is not a file." >&2 exit 1 fi if [ ! -f "${mapred_jobclient_test_jar}" ]; then echo "Specified MapReduce jobclient test jar is not a file." >&2 exit 1 fi if [ -z "${working_dir}" ]; then if ! working_dir="$(mktemp -d -t hbase-pseudo-dist-test)" ; then echo "Failed to create temporary working directory. Please specify via --working-dir" >&2 exit 1 fi else # absolutes please working_dir="$(cd "$(dirname "${working_dir}")"; pwd)/$(basename "${working_dir}")" if [ ! -d "${working_dir}" ]; then echo "passed working directory '${working_dir}' must already exist." >&2 exit 1 fi fi if [ -z "${zk_data_dir}" ]; then zk_data_dir="${working_dir}/zk-data" mkdir "${zk_data_dir}" else # absolutes please zk_data_dir="$(cd "$(dirname "${zk_data_dir}")"; pwd)/$(basename "${zk_data_dir}")" if [ ! -d "${zk_data_dir}" ]; then echo "passed directory for unpacking the source tarball '${zk_data_dir}' must already exist." exit 1 fi fi if [ -z "${hbase_client}" ]; then hbase_client="${component_install}" else echo "Using HBase client-side artifact" # absolutes please hbase_client="$(cd "$(dirname "${hbase_client}")"; pwd)/$(basename "${hbase_client}")" if [ ! -d "${hbase_client}" ]; then echo "If given hbase client install should be a directory with contents of the client tarball." >&2 exit 1 fi fi if [ -n "${hadoop_jars}" ]; then declare -a tmp_jars for entry in $(echo "${hadoop_jars}" | tr ':' '\n'); do tmp_jars=("${tmp_jars[@]}" "$(cd "$(dirname "${entry}")"; pwd)/$(basename "${entry}")") done hadoop_jars="$(IFS=:; echo "${tmp_jars[*]}")" fi echo "You'll find logs and temp files in ${working_dir}" function redirect_and_run { log_base=$1 shift echo "$*" >"${log_base}.err" "$@" >"${log_base}.out" 2>>"${log_base}.err" } (cd "${working_dir}" echo "Hadoop version information:" "${hadoop_exec}" version hadoop_version=$("${hadoop_exec}" version | head -n 1) hadoop_version="${hadoop_version#Hadoop }" if [ "${hadoop_version%.*.*}" -gt 2 ]; then "${hadoop_exec}" envvars else echo "JAVA_HOME: ${JAVA_HOME}" fi # Ensure that if some other Hadoop install happens to be present in the environment we ignore it. HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP="true" export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP if [ -n "${clean}" ]; then echo "Cleaning out ZooKeeper..." rm -rf "${zk_data_dir:?}/*" fi echo "HBase version information:" "${component_install}/bin/hbase" version 2>/dev/null hbase_version=$("${component_install}/bin/hbase" version | head -n 1 2>/dev/null) hbase_version="${hbase_version#HBase }" if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-mapreduce-${hbase_version}.jar" ]; then echo "HBase binary install doesn't appear to include a shaded mapreduce artifact." >&2 exit 1 fi if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-client-${hbase_version}.jar" ]; then echo "HBase binary install doesn't appear to include a shaded client artifact." >&2 exit 1 fi if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar" ]; then echo "HBase binary install doesn't appear to include a shaded client artifact." >&2 exit 1 fi echo "Writing out configuration for HBase." rm -rf "${working_dir}/hbase-conf" mkdir "${working_dir}/hbase-conf" if [ -f "${component_install}/conf/log4j.properties" ]; then cp "${component_install}/conf/log4j.properties" "${working_dir}/hbase-conf/log4j.properties" else cat >"${working_dir}/hbase-conf/log4j.properties" <<EOF # Define some default values that can be overridden by system properties hbase.root.logger=INFO,console # Define the root logger to the system property "hbase.root.logger". log4j.rootLogger=${hbase.root.logger} # Logging Threshold log4j.threshold=ALL # console log4j.appender.console=org.apache.log4j.ConsoleAppender log4j.appender.console.target=System.err log4j.appender.console.layout=org.apache.log4j.PatternLayout log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2}: %.1000m%n EOF fi cat >"${working_dir}/hbase-conf/hbase-site.xml" <<EOF <?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <!-- /** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ --> <configuration> <property> <name>hbase.rootdir</name> <!-- We rely on the defaultFS being set in our hadoop confs --> <value>/hbase</value> </property> <property> <name>hbase.zookeeper.property.dataDir</name> <value>${zk_data_dir}</value> </property> <property> <name>hbase.cluster.distributed</name> <value>${distributed}</value> </property> </configuration> EOF if [ "true" = "${distributed}" ]; then cat >"${working_dir}/hbase-conf/regionservers" <<EOF localhost EOF fi function cleanup { echo "Shutting down HBase" HBASE_CONF_DIR="${working_dir}/hbase-conf/" "${component_install}/bin/stop-hbase.sh" if [ -f "${working_dir}/hadoop.pid" ]; then echo "Shutdown: listing HDFS contents" redirect_and_run "${working_dir}/hadoop_listing_at_end" \ "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -ls -R / echo "Shutting down Hadoop" kill -6 "$(cat "${working_dir}/hadoop.pid")" fi } trap cleanup EXIT SIGQUIT echo "Starting up Hadoop" if [ "${hadoop_version%.*.*}" -gt 2 ]; then "${mapred_exec}" minicluster -format -writeConfig "${working_dir}/hbase-conf/core-site.xml" -writeDetails "${working_dir}/hadoop_cluster_info.json" >"${working_dir}/hadoop_cluster_command.out" 2>"${working_dir}/hadoop_cluster_command.err" & else HADOOP_CLASSPATH="${timeline_service_dir}/*:${timeline_service_dir}/lib/*:${yarn_server_tests_test_jar}" "${hadoop_exec}" jar "${mapred_jobclient_test_jar}" minicluster -format -writeConfig "${working_dir}/hbase-conf/core-site.xml" -writeDetails "${working_dir}/hadoop_cluster_info.json" >"${working_dir}/hadoop_cluster_command.out" 2>"${working_dir}/hadoop_cluster_command.err" & fi echo "$!" > "${working_dir}/hadoop.pid" # 2 + 4 + 8 + .. + 256 ~= 8.5 minutes. max_sleep_time=512 sleep_time=2 until [[ -s "${working_dir}/hbase-conf/core-site.xml" || "${sleep_time}" -ge "${max_sleep_time}" ]]; do printf '\twaiting for Hadoop to finish starting up.\n' sleep "${sleep_time}" sleep_time="$((sleep_time*2))" done if [ "${sleep_time}" -ge "${max_sleep_time}" ] ; then echo "time out waiting for Hadoop to startup" >&2 exit 1 fi if [ "${hadoop_version%.*.*}" -gt 2 ]; then echo "Verifying configs" "${hadoop_exec}" --config "${working_dir}/hbase-conf/" conftest fi if [ -n "${clean}" ]; then echo "Cleaning out HDFS..." "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r /hbase "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r example/ "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r example-region-listing.data fi echo "Listing HDFS contents" redirect_and_run "${working_dir}/hadoop_cluster_smoke" \ "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -ls -R / echo "Starting up HBase" HBASE_CONF_DIR="${working_dir}/hbase-conf/" "${component_install}/bin/start-hbase.sh" sleep_time=2 until "${component_install}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/waiting_hbase_startup.log" 2>&1 <<EOF count 'hbase:meta' EOF do printf '\tretry waiting for hbase to come up.\n' sleep "${sleep_time}" sleep_time="$((sleep_time*2))" done echo "Setting up table 'test:example' with 1,000 regions" "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/table_create.log" 2>&1 <<EOF create_namespace 'test' create 'test:example', 'family1', 'family2', {NUMREGIONS => 1000, SPLITALGO => 'UniformSplit'} EOF echo "writing out example TSV to example.tsv" cat >"${working_dir}/example.tsv" <<EOF row1 value8 value8 row3 value2 row2 value9 row10 value1 pow1 value8 value8 pow3 value2 pow2 value9 pow10 value1 paw1 value8 value8 paw3 value2 paw2 value9 paw10 value1 raw1 value8 value8 raw3 value2 raw2 value9 raw10 value1 aow1 value8 value8 aow3 value2 aow2 value9 aow10 value1 aaw1 value8 value8 aaw3 value2 aaw2 value9 aaw10 value1 how1 value8 value8 how3 value2 how2 value9 how10 value1 zow1 value8 value8 zow3 value2 zow2 value9 zow10 value1 zaw1 value8 value8 zaw3 value2 zaw2 value9 zaw10 value1 haw1 value8 value8 haw3 value2 haw2 value9 haw10 value1 low1 value8 value8 low3 value2 low2 value9 low10 value1 law1 value8 value8 law3 value2 law2 value9 law10 value1 EOF echo "uploading example.tsv to HDFS" "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -mkdir example "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -copyFromLocal "${working_dir}/example.tsv" "example/" echo "Importing TSV via shaded client artifact for HBase - MapReduce integration." # hbase_thirdparty_jars=("${component_install}"/lib/htrace-core4*.jar \ # "${component_install}"/lib/slf4j-api-*.jar \ # "${component_install}"/lib/commons-logging-*.jar \ # "${component_install}"/lib/slf4j-log4j12-*.jar \ # "${component_install}"/lib/log4j-1.2.*.jar \ # "${working_dir}/hbase-conf/log4j.properties") # hbase_dep_classpath=$(IFS=:; echo "${hbase_thirdparty_jars[*]}") hbase_dep_classpath="$("${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" mapredcp)" HADOOP_CLASSPATH="${hbase_dep_classpath}" redirect_and_run "${working_dir}/mr-importtsv" \ "${hadoop_exec}" --config "${working_dir}/hbase-conf/" jar "${hbase_client}/lib/shaded-clients/hbase-shaded-mapreduce-${hbase_version}.jar" importtsv -Dimporttsv.columns=HBASE_ROW_KEY,family1:column1,family1:column4,family1:column3 test:example example/ -libjars "${hbase_dep_classpath}" "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/scan_import.out" 2>"${working_dir}/scan_import.err" <<EOF scan 'test:example' EOF echo "Verifying row count from import." import_rowcount=$(echo 'count "test:example"' | "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive 2>/dev/null | tail -n 1) if [ ! "${import_rowcount}" -eq 48 ]; then echo "ERROR: Instead of finding 48 rows, we found ${import_rowcount}." exit 2 fi if [ -z "${hadoop_jars}" ]; then echo "Hadoop client jars not given; getting them from 'hadoop classpath' for the example." hadoop_jars=$("${hadoop_exec}" --config "${working_dir}/hbase-conf/" classpath) fi echo "Building shaded client example." cat >"${working_dir}/HBaseClientReadWriteExample.java" <<EOF import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellBuilder; import org.apache.hadoop.hbase.CellBuilderFactory; import org.apache.hadoop.hbase.CellBuilderType; import org.apache.hadoop.hbase.ClusterMetrics; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.RegionMetrics; import org.apache.hadoop.hbase.ServerMetrics; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.util.Bytes; import java.util.LinkedList; import java.util.List; public class HBaseClientReadWriteExample { private static final byte[] FAMILY_BYTES = Bytes.toBytes("family2"); public static void main(String[] args) throws Exception { Configuration hbase = HBaseConfiguration.create(); Configuration hadoop = new Configuration(); try (Connection connection = ConnectionFactory.createConnection(hbase)) { System.out.println("Generating list of regions"); final List<String> regions = new LinkedList<>(); try (Admin admin = connection.getAdmin()) { final ClusterMetrics cluster = admin.getClusterMetrics(); System.out.println(String.format("\tCluster reports version %s, ave load %f, region count %d", cluster.getHBaseVersion(), cluster.getAverageLoad(), cluster.getRegionCount())); for (ServerMetrics server : cluster.getLiveServerMetrics().values()) { for (RegionMetrics region : server.getRegionMetrics().values()) { regions.add(region.getNameAsString()); } } } final Path listing = new Path("example-region-listing.data"); System.out.println("Writing list to HDFS"); try (FileSystem fs = FileSystem.newInstance(hadoop)) { final Path path = fs.makeQualified(listing); try (FSDataOutputStream out = fs.create(path)) { out.writeInt(regions.size()); for (String region : regions) { out.writeUTF(region); } out.hsync(); } } final List<Put> puts = new LinkedList<>(); final Put marker = new Put(new byte[] { (byte)0 }); System.out.println("Reading list from HDFS"); try (FileSystem fs = FileSystem.newInstance(hadoop)) { final Path path = fs.makeQualified(listing); final CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY); try (FSDataInputStream in = fs.open(path)) { final int count = in.readInt(); marker.addColumn(FAMILY_BYTES, Bytes.toBytes("count"), Bytes.toBytes(count)); for(int i = 0; i < count; i++) { builder.clear(); final byte[] row = Bytes.toBytes(in.readUTF()); final Put put = new Put(row); builder.setRow(row); builder.setFamily(FAMILY_BYTES); builder.setType(Cell.Type.Put); put.add(builder.build()); puts.add(put); } } } System.out.println("Writing list into HBase table"); try (Table table = connection.getTable(TableName.valueOf("test:example"))) { table.put(marker); table.put(puts); } } } } EOF redirect_and_run "${working_dir}/hbase-shaded-client-compile" \ javac -cp "${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar:${hadoop_jars}" "${working_dir}/HBaseClientReadWriteExample.java" echo "Running shaded client example. It'll fetch the set of regions, round-trip them to a file in HDFS, then write them one-per-row into the test table." # The order of classpath entries here is important. if we're using non-shaded Hadoop 3 / 2.9.0 jars, we have to work around YARN-2190. redirect_and_run "${working_dir}/hbase-shaded-client-example" \ java -cp "${working_dir}/hbase-conf/:${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar:${hbase_dep_classpath}:${working_dir}:${hadoop_jars}" HBaseClientReadWriteExample echo "Checking on results of example program." "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -copyToLocal "example-region-listing.data" "${working_dir}/example-region-listing.data" "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/scan_example.out" 2>"${working_dir}/scan_example.err" <<EOF scan 'test:example' EOF echo "Verifying row count from example." example_rowcount=$(echo 'count "test:example"' | "${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive 2>/dev/null | tail -n 1) if [ "${example_rowcount}" -gt "1050" ]; then echo "Found ${example_rowcount} rows, which is enough to cover 48 for import, 1000 example's use of user table regions, 2 for example's use of meta/namespace regions, and 1 for example's count record" else echo "ERROR: Only found ${example_rowcount} rows." fi )