diff --git a/hbase-mapreduce/.flattened-pom.xml b/hbase-mapreduce/.flattened-pom.xml new file mode 100644 index 00000000000..2c60e3aa8f6 --- /dev/null +++ b/hbase-mapreduce/.flattened-pom.xml @@ -0,0 +1,1179 @@ + + + 4.0.0 + org.apache.hbase + hbase-mapreduce + 3.0.0-beta-1-SNAPSHOT + Apache HBase - MapReduce + This module contains implementations of InputFormat, OutputFormat, Mapper, Reducer, etc which + are needed for running MR jobs on tables, WALs, HFiles and other HBase specific constructs. + It also contains a bunch of tools: RowCounter, ImportTsv, Import, Export, CompactionTool, + ExportSnapshot, WALPlayer, etc + https://hbase.apache.org/hbase-build-configuration/hbase-mapreduce + 2007 + + The Apache Software Foundation + https://www.apache.org/ + + + + Apache License, Version 2.0 + https://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + + + achouhan + Abhishek Singh Chouhan + achouhan@apache.org + +5 + + + acube123 + Amitanand S. Aiyer + acube123@apache.org + -8 + + + allan163 + Allan Yang + allan163@apache.org + +8 + + + appy + Apekshit Sharma + appy@apache.org + -8 + + + anastasia + Anastasia Braginsky + anastasia@apache.org + +2 + + + apurtell + Andrew Purtell + apurtell@apache.org + -8 + + + anoopsamjohn + Anoop Sam John + anoopsamjohn@apache.org + +5 + + + antonov + Mikhail Antonov + antonov@apache.org + -8 + + + ashishsinghi + Ashish Singhi + ashishsinghi@apache.org + +5 + + + ashu + Ashu Pachauri + ashu@apache.org + +5 + + + bharathv + Bharath Vissapragada + bharathv@apache.org + -8 + + + binlijin + Lijin Bin + binlijin@apache.org + +8 + + + brfrn169 + Toshihiro Suzuki + brfrn169@apache.org + +9 + + + busbey + Sean Busbey + busbey@apache.org + -6 + + + chenglei + Cheng Lei + chenglei@apache.org + +8 + + + chenheng + Heng Chen + chenheng@apache.org + +8 + + + chia7712 + Chia-Ping Tsai + chia7712@apache.org + +8 + + + ddas + Devaraj Das + ddas@apache.org + -8 + + + dimaspivak + Dima Spivak + dimaspivak@apache.org + -8 + + + dmeil + Doug Meil + dmeil@apache.org + -5 + + + eclark + Elliott Clark + eclark@apache.org + -8 + + + elserj + Josh Elser + elserj@apache.org + -5 + + + enis + Enis Soztutar + enis@apache.org + -8 + + + eshcar + Eshcar Hillel + eshcar@apache.org + +2 + + + fenghh + Honghua Feng + fenghh@apache.org + +8 + + + garyh + Gary Helmling + garyh@apache.org + -8 + + + gchanan + Gregory Chanan + gchanan@apache.org + -8 + + + gjacoby + Geoffrey Jacoby + gjacoby@apache.org + -5 + + + gxcheng + Guangxu Cheng + gxcheng@apache.org + +8 + + + haxiaolin + Xiaolin Ha + haxiaolin@apache.org + +8 + + + huaxiangsun + Huaxiang Sun + huaxiangsun@apache.org + -8 + + + jdcryans + Jean-Daniel Cryans + jdcryans@apache.org + -8 + + + jeffreyz + Jeffrey Zhong + jeffreyz@apache.org + -8 + + + jerryjch + Jing Chen (Jerry) He + jerryjch@apache.org + -8 + + + jyates + Jesse Yates + jyates@apache.org + -8 + + + jgray + Jonathan Gray + jgray@fb.com + -8 + + + jingchengdu + Jingcheng Du + jingchengdu@apache.org + +8 + + + esteban + Esteban Gutierrez + esteban@apache.org + -8 + + + janh + Jan Hentschel + janh@apache.org + +1 + + + jmhsieh + Jonathan Hsieh + jmhsieh@apache.org + -8 + + + jxiang + Jimmy Xiang + jxiang@apache.org + -8 + + + kannan + Kannan Muthukkaruppan + kannan@fb.com + -8 + + + karthik + Karthik Ranganathan + kranganathan@fb.com + -8 + + + larsfrancke + Lars Francke + larsfrancke@apache.org + Europe/Berlin + + + larsgeorge + Lars George + larsgeorge@apache.org + +1 + + + larsh + Lars Hofhansl + larsh@apache.org + -8 + + + liangxie + Liang Xie + liangxie@apache.org + +8 + + + liushaohui + Shaohui Liu + liushaohui@apache.org + +8 + + + liyin + Liyin Tang + liyin.tang@fb.com + -8 + + + liyu + Yu Li + liyu@apache.org + +8 + + + mbautin + Mikhail Bautin + mbautin@apache.org + -8 + + + mbertozzi + Matteo Bertozzi + mbertozzi@apache.org + 0 + + + mdrob + Mike Drob + mdrob@apache.org + -5 + + + meszibalu + Balazs Meszaros + meszibalu@apache.org + +1 + + + misty + Misty Stanley-Jones + misty@apache.org + -8 + + + ndimiduk + Nick Dimiduk + ndimiduk@apache.org + -8 + + + nihaljain + Nihal Jain + nihaljain@apache.org + +5 + + + niuyulin + Yulin Niu + niuyulin@apache.org + +8 + + + nkeywal + Nicolas Liochon + nkeywal@apache.org + +1 + + + nspiegelberg + Nicolas Spiegelberg + nspiegelberg@fb.com + -8 + + + octo47 + Andrey Stepachev + octo47@gmail.com + 0 + + + openinx + Zheng Hu + openinx@apache.org + +8 + + + pankajkumar + Pankaj Kumar + pankajkumar@apache.org + +5 + + + psomogyi + Peter Somogyi + psomogyi@apache.org + +1 + + + rajeshbabu + Rajeshbabu Chintaguntla + rajeshbabu@apache.org + +5 + + + ramkrishna + Ramkrishna S Vasudevan + ramkrishna@apache.org + +5 + + + rawson + Ryan Rawson + rawson@apache.org + -8 + + + reidchan + Reid Chan + reidchan@apache.org + +8 + + + shahrs87 + Rushabh Shah + shahrs87@apache.org + -8 + + + sakthi + Sakthi Vel + sakthi@apache.org + -8 + + + sershe + Sergey Shelukhin + sershe@apache.org + -8 + + + ssrungarapu + Srikanth Srungarapu + ssrungarapu@apache.org + -8 + + + stack + Michael Stack + stack@apache.org + -8 + + + syuanjiang + Stephen Yuan Jiang + syuanjiang@apache.org + -8 + + + taklwu + Tak-Lon (Stephen) Wu + taklwu@apache.org + -8 + + + tedyu + Ted Yu + yuzhihong@gmail.com + -8 + + + tianhang + Tianhang Tang + tianhang@apache.org + +8 + + + tianjy + tianjy@apache.org + +8 + + + todd + Todd Lipcon + todd@apache.org + -8 + + + toffer + Francis Liu + toffer@apache.org + -8 + + + vikasv + Vikas Vishwakarma + vikasv@apache.org + +5 + + + virag + Virag Kothari + virag@yahoo-inc.com + -8 + + + vjasani + Viraj Jasani + vjasani@apache.org + +5 + + + water + Xiang Li + xiangli@apache.org + +8 + + + wchevreuil + Wellington Chevreuil + wchevreuil@apache.org + 0 + + + weichiu + Wei-Chiu Chuang + weichiu@apache.org + -8 + + + xucang + Xu Cang + xucang@apache.org + -8 + + + yangzhe1991 + Phil Yang + yangzhe1991@apache.org + +8 + + + zghao + Guanghao Zhang + zghao@apache.org + +8 + + + zhangduo + Duo Zhang + zhangduo@apache.org + +8 + + + zhaobaiqiang + Baiqiang Zhao + zhaobaiqiang@apache.org + +8 + + + zjushch + Chunhui Shen + zjushch@apache.org + +8 + + + churro + Rahul Gidwani + churro@apache.org + -8 + + + yiliang + Yi Liang + yiliang@apache.org + -8 + + + zyork + Zach York + zyork@apache.org + -8 + + + meiyi + Yi Mei + meiyi@apache.org + +8 + + + wangzheng + Zheng (bsglz) Wang + wangzheng@apache.org + +8 + + + sunxin + Xin Sun + sunxin@apache.org + +8 + + + huangzhuoyue + Zhuoyue Huang + huangzhuoyue@apache.org + +8 + + + xiaoyt + Yutong Xiao + xiaoyt@apache.org + +8 + + + bbeaudreault + Bryan Beaudreault + bbeaudreault@apache.org + -5 + + + heliangjun + Liangjun He + heliangjun@apache.org + +8 + + + + + User List + user-subscribe@hbase.apache.org + user-unsubscribe@hbase.apache.org + user@hbase.apache.org + https://lists.apache.org/list.html?user@hbase.apache.org + + https://dir.gmane.org/gmane.comp.java.hadoop.hbase.user + + + + Developer List + dev-subscribe@hbase.apache.org + dev-unsubscribe@hbase.apache.org + dev@hbase.apache.org + https://lists.apache.org/list.html?dev@hbase.apache.org + + https://dir.gmane.org/gmane.comp.java.hadoop.hbase.devel + + + + Commits List + commits-subscribe@hbase.apache.org + commits-unsubscribe@hbase.apache.org + https://lists.apache.org/list.html?commits@hbase.apache.org + + + Issues List + issues-subscribe@hbase.apache.org + issues-unsubscribe@hbase.apache.org + https://lists.apache.org/list.html?issues@hbase.apache.org + + + Builds List + builds-subscribe@hbase.apache.org + builds-unsubscribe@hbase.apache.org + https://lists.apache.org/list.html?builds@hbase.apache.org + + + User (ZH) List + user-zh-subscribe@hbase.apache.org + user-zh-unsubscribe@hbase.apache.org + user-zh@hbase.apache.org + https://lists.apache.org/list.html?user-zh@hbase.apache.org + + + + scm:git:git://gitbox.apache.org/repos/asf/hbase.git/hbase-build-configuration/hbase-mapreduce + scm:git:https://gitbox.apache.org/repos/asf/hbase.git/hbase-build-configuration/hbase-mapreduce + https://gitbox.apache.org/repos/asf?p=hbase.git/hbase-build-configuration/hbase-mapreduce + + + JIRA + https://issues.apache.org/jira/browse/HBASE + + + + apache.releases.https + Apache Release Distribution Repository + https://repository.apache.org/service/local/staging/deploy/maven2 + + + apache.snapshots.https + Apache Development Snapshot Repository + https://repository.apache.org/content/repositories/snapshots + + + hbase.apache.org + HBase Website at hbase.apache.org + file:///tmp/hbase-build-configuration/hbase-mapreduce + + + + + org.apache.hbase.thirdparty + hbase-shaded-miscellaneous + 4.1.4 + compile + + + org.apache.hbase.thirdparty + hbase-shaded-netty + 4.1.4 + compile + + + org.apache.hbase.thirdparty + hbase-shaded-protobuf + 4.1.4 + compile + + + org.apache.hbase + hbase-common + 3.0.0-beta-1-SNAPSHOT + compile + + + org.apache.hbase + hbase-zookeeper + 3.0.0-beta-1-SNAPSHOT + compile + + + com.google.code.findbugs + jsr305 + + + com.github.spotbugs + spotbugs-annotations + + + + + org.apache.hbase + hbase-protocol-shaded + 3.0.0-beta-1-SNAPSHOT + compile + + + org.apache.hbase + hbase-metrics + 3.0.0-beta-1-SNAPSHOT + compile + + + org.apache.hbase + hbase-metrics-api + 3.0.0-beta-1-SNAPSHOT + compile + + + org.apache.hbase + hbase-asyncfs + 3.0.0-beta-1-SNAPSHOT + compile + + + io.dropwizard.metrics + metrics-core + 3.2.6 + compile + + + org.slf4j + slf4j-api + 1.7.30 + compile + + + io.opentelemetry + opentelemetry-api + 1.15.0 + compile + + + org.apache.hbase + hbase-client + 3.0.0-beta-1-SNAPSHOT + compile + + + org.apache.hbase + hbase-hadoop-compat + 3.0.0-beta-1-SNAPSHOT + compile + + + org.apache.hbase + hbase-server + 3.0.0-beta-1-SNAPSHOT + compile + + + commons-logging + commons-logging + + + + + org.apache.hbase + hbase-replication + 3.0.0-beta-1-SNAPSHOT + compile + + + com.github.stephenc.findbugs + findbugs-annotations + 1.3.9-1 + compile + true + + + commons-io + commons-io + 2.11.0 + compile + + + org.apache.commons + commons-lang3 + 3.9 + compile + + + org.apache.zookeeper + zookeeper + 3.5.7 + compile + + + com.google.code.findbugs + jsr305 + + + com.github.spotbugs + spotbugs-annotations + + + jline + jline + + + com.sun.jmx + jmxri + + + com.sun.jdmk + jmxtools + + + javax.jms + jms + + + io.netty + netty + + + io.netty + netty-all + + + org.slf4j + slf4j-log4j12 + + + log4j + log4j + + + + + org.apache.yetus + audience-annotations + 0.13.0 + compile + + + org.apache.hadoop + hadoop-common + 3.2.4 + compile + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-servlet + + + com.sun.jersey + jersey-server + + + javax.servlet.jsp + jsp-api + + + javax.servlet + javax.servlet-api + + + stax + stax-api + + + io.netty + netty + + + com.google.code.findbugs + jsr305 + + + junit + junit + + + org.codehaus.jackson + * + + + org.slf4j + slf4j-log4j12 + + + log4j + log4j + + + ch.qos.reload4j + reload4j + + + org.slf4j + slf4j-reload4j + + + io.netty + netty + + + io.netty + netty-all + + + + + org.apache.hadoop + hadoop-hdfs + 3.2.4 + compile + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-server + + + javax.servlet.jsp + jsp-api + + + javax.servlet + servlet-api + + + stax + stax-api + + + xerces + xercesImpl + + + org.codehaus.jackson + * + + + com.google.guava + guava + + + org.slf4j + slf4j-log4j12 + + + log4j + log4j + + + ch.qos.reload4j + reload4j + + + org.slf4j + slf4j-reload4j + + + org.fusesource.leveldbjni + leveldbjni-all + + + org.openlabtesting.leveldbjni + leveldbjni-all + + + + + org.apache.hadoop + hadoop-hdfs + 3.2.4 + test-jar + test + + + javax.servlet.jsp + jsp-api + + + javax.servlet + servlet-api + + + stax + stax-api + + + xerces + xercesImpl + + + org.codehaus.jackson + * + + + com.google.guava + guava + + + com.sun.jersey + jersey-core + + + org.slf4j + slf4j-log4j12 + + + log4j + log4j + + + ch.qos.reload4j + reload4j + + + org.slf4j + slf4j-reload4j + + + + + javax.ws.rs + javax.ws.rs-api + 2.1.1 + test + + + org.apache.hadoop + hadoop-minicluster + 3.2.4 + test + + + javax.ws.rs + jsr311-api + + + + + org.apache.hadoop + hadoop-minikdc + 3.2.4 + test + + + org.slf4j + slf4j-log4j12 + + + ch.qos.reload4j + reload4j + + + org.slf4j + slf4j-reload4j + + + bouncycastle + bcprov-jdk15 + + + + + org.apache.hadoop + hadoop-mapreduce-client-jobclient + 3.2.4 + test-jar + test + + + org.codehaus.jackson + jackson-mapper-asl + + + org.codehaus.jackson + jackson-core-asl + + + javax.xml.bind + jaxb-api + + + javax.ws.rs + jsr311-api + + + + + + + + false + + apache.snapshots + Apache Snapshot Repository + https://repository.apache.org/snapshots + + + + + + org.apache.felix + maven-bundle-plugin + 3.3.0 + true + + + + diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorInputFormat.java index eaea8c78e3f..f40951e945d 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorInputFormat.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorInputFormat.java @@ -1,5 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hbase.util.bulkdatagenerator; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; @@ -8,17 +30,13 @@ import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; +import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; public class BulkDataGeneratorInputFormat extends InputFormat { - public static final String MAPPER_TASK_COUNT_KEY = BulkDataGeneratorInputFormat.class.getName() + "mapper.task.count"; + public static final String MAPPER_TASK_COUNT_KEY = + BulkDataGeneratorInputFormat.class.getName() + "mapper.task.count"; @Override public List getSplits(JobContext job) throws IOException { @@ -35,9 +53,10 @@ public class BulkDataGeneratorInputFormat extends InputFormat createRecordReader(InputSplit split, TaskAttemptContext context) - throws IOException, InterruptedException { - BulkDataGeneratorRecordReader bulkDataGeneratorRecordReader = new BulkDataGeneratorRecordReader(); + public RecordReader createRecordReader(InputSplit split, + TaskAttemptContext context) throws IOException, InterruptedException { + BulkDataGeneratorRecordReader bulkDataGeneratorRecordReader = + new BulkDataGeneratorRecordReader(); bulkDataGeneratorRecordReader.initialize(split, context); return bulkDataGeneratorRecordReader; } @@ -47,17 +66,21 @@ public class BulkDataGeneratorInputFormat extends InputFormat { +public class BulkDataGeneratorMapper + extends Mapper { /** Counter enumeration to count number of rows generated. */ public static enum Counters { ROWS_GENERATED } - public static final String SPLIT_COUNT_KEY = BulkDataGeneratorMapper.class.getName() + "split.count"; + public static final String SPLIT_COUNT_KEY = + BulkDataGeneratorMapper.class.getName() + "split.count"; private static final String ORG_ID = "00D000000000062"; private static final int MAX_EVENT_ID = Integer.MAX_VALUE; @@ -35,7 +53,8 @@ public class BulkDataGeneratorMapper extends private static final int NUM_LOCATIONS = 10; private static int splitCount = 1; private static final Random random = new Random(System.currentTimeMillis()); - private static final Map> LOCATIONS = Maps.newHashMapWithExpectedSize(NUM_LOCATIONS); + private static final Map> LOCATIONS = + Maps.newHashMapWithExpectedSize(NUM_LOCATIONS); private static final List LOCATION_KEYS = Lists.newArrayListWithCapacity(NUM_LOCATIONS); static { LOCATIONS.put("Belém", new Pair<>(BigDecimal.valueOf(-01.45), BigDecimal.valueOf(-48.48))); @@ -43,9 +62,11 @@ public class BulkDataGeneratorMapper extends LOCATIONS.put("Campinas", new Pair<>(BigDecimal.valueOf(-22.90), BigDecimal.valueOf(-47.05))); LOCATIONS.put("Cuiaba", new Pair<>(BigDecimal.valueOf(-07.25), BigDecimal.valueOf(-58.42))); LOCATIONS.put("Manaus", new Pair<>(BigDecimal.valueOf(-03.10), BigDecimal.valueOf(-60.00))); - LOCATIONS.put("Porto Velho", new Pair<>(BigDecimal.valueOf(-08.75), BigDecimal.valueOf(-63.90))); + LOCATIONS.put("Porto Velho", + new Pair<>(BigDecimal.valueOf(-08.75), BigDecimal.valueOf(-63.90))); LOCATIONS.put("Recife", new Pair<>(BigDecimal.valueOf(-08.10), BigDecimal.valueOf(-34.88))); - LOCATIONS.put("Rio de Janeiro", new Pair<>(BigDecimal.valueOf(-22.90), BigDecimal.valueOf(-43.23))); + LOCATIONS.put("Rio de Janeiro", + new Pair<>(BigDecimal.valueOf(-22.90), BigDecimal.valueOf(-43.23))); LOCATIONS.put("Santarém", new Pair<>(BigDecimal.valueOf(-02.43), BigDecimal.valueOf(-54.68))); LOCATIONS.put("São Paulo", new Pair<>(BigDecimal.valueOf(-23.53), BigDecimal.valueOf(-46.62))); LOCATION_KEYS.addAll(LOCATIONS.keySet()); @@ -55,29 +76,34 @@ public class BulkDataGeneratorMapper extends /** {@inheritDoc} */ @Override - protected void setup(Context context) throws IOException, - InterruptedException { + protected void setup(Context context) throws IOException, InterruptedException { Configuration c = context.getConfiguration(); splitCount = c.getInt(SPLIT_COUNT_KEY, 1); } /** - * Generates a single record based on value set to the key by {@link BulkDataGeneratorRecordReader#getCurrentKey()}. - * {@link Utility.TableColumnNames#TOOL_EVENT_ID} is first part of row key. Keeping first {@link Utility#SPLIT_PREFIX_LENGTH} characters as index of the record to be generated ensures that records are equally distributed across all regions of the table since region boundaries are generated in similar fashion. Check {@link Utility#createTable(Admin, String, int, Map)} method for region split info. - * @param key - The key having index of next record to be generated - * @param value - Value associated with the key (not used) + * Generates a single record based on value set to the key by + * {@link BulkDataGeneratorRecordReader#getCurrentKey()}. + * {@link Utility.TableColumnNames#TOOL_EVENT_ID} is first part of row key. Keeping first + * {@link Utility#SPLIT_PREFIX_LENGTH} characters as index of the record to be generated ensures + * that records are equally distributed across all regions of the table since region boundaries + * are generated in similar fashion. Check {@link Utility#createTable(Admin, String, int, Map)} + * method for region split info. + * @param key - The key having index of next record to be generated + * @param value - Value associated with the key (not used) * @param context - Context of the mapper container - * @throws IOException - * @throws InterruptedException */ @Override protected void map(Text key, NullWritable value, Context context) - throws IOException, InterruptedException { + throws IOException, InterruptedException { - int recordIndex = Integer.parseInt(key.toString()); + int recordIndex = Integer.parseInt(key.toString()); // <6-characters-for-region-boundary-prefix>_<15-random-characters>_ - final String toolEventId = String.format("%0" + Utility.SPLIT_PREFIX_LENGTH + "d", recordIndex%(splitCount+1)) + "_" + EnvironmentEdgeManager.currentTime() + (1e14 + (random.nextFloat() * 9e13)) + "_" + recordIndex; + final String toolEventId = + String.format("%0" + Utility.SPLIT_PREFIX_LENGTH + "d", recordIndex % (splitCount + 1)) + "_" + + EnvironmentEdgeManager.currentTime() + (1e14 + (random.nextFloat() * 9e13)) + "_" + + recordIndex; final String eventId = String.valueOf(Math.abs(random.nextInt(MAX_EVENT_ID))); final String vechileId = String.valueOf(Math.abs(random.nextInt(MAX_VEHICLE_ID))); final String speed = String.valueOf(Math.abs(random.nextInt(MAX_SPEED_KPH))); @@ -86,7 +112,8 @@ public class BulkDataGeneratorMapper extends final BigDecimal latitude = coordinates.getFirst(); final BigDecimal longitude = coordinates.getSecond(); - final ImmutableBytesWritable hKey = new ImmutableBytesWritable(String.format("%s:%s", toolEventId, ORG_ID).getBytes()); + final ImmutableBytesWritable hKey = + new ImmutableBytesWritable(String.format("%s:%s", toolEventId, ORG_ID).getBytes()); addKeyValue(context, hKey, Utility.TableColumnNames.ORG_ID, ORG_ID); addKeyValue(context, hKey, Utility.TableColumnNames.TOOL_EVENT_ID, toolEventId); addKeyValue(context, hKey, Utility.TableColumnNames.EVENT_ID, eventId); @@ -95,14 +122,17 @@ public class BulkDataGeneratorMapper extends addKeyValue(context, hKey, Utility.TableColumnNames.LATITUDE, latitude.toString()); addKeyValue(context, hKey, Utility.TableColumnNames.LONGITUDE, longitude.toString()); addKeyValue(context, hKey, Utility.TableColumnNames.LOCATION, location); - addKeyValue(context, hKey, Utility.TableColumnNames.TIMESTAMP, String.valueOf(EnvironmentEdgeManager.currentTime())); + addKeyValue(context, hKey, Utility.TableColumnNames.TIMESTAMP, + String.valueOf(EnvironmentEdgeManager.currentTime())); context.getCounter(Counters.ROWS_GENERATED).increment(1); } - private void addKeyValue(final Context context, ImmutableBytesWritable key, final Utility.TableColumnNames columnName, final String value) - throws IOException, InterruptedException { - KeyValue kv = new KeyValue(key.get(), COLUMN_FAMILY_BYTES, columnName.getColumnName(), value.getBytes()); + private void addKeyValue(final Context context, ImmutableBytesWritable key, + final Utility.TableColumnNames columnName, final String value) + throws IOException, InterruptedException { + KeyValue kv = + new KeyValue(key.get(), COLUMN_FAMILY_BYTES, columnName.getColumnName(), value.getBytes()); context.write(key, kv); } } diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorRecordReader.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorRecordReader.java index 010e53a7512..f4ecc659e51 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorRecordReader.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorRecordReader.java @@ -1,13 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hbase.util.bulkdatagenerator; +import java.io.IOException; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; -import java.io.IOException; +import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; public class BulkDataGeneratorRecordReader extends RecordReader { @@ -16,14 +33,16 @@ public class BulkDataGeneratorRecordReader extends RecordReader 0, "Number of records to be created by per mapper should be greater than 0."); + Preconditions.checkArgument(numRecordsToCreate > 0, + "Number of records to be created by per mapper should be greater than 0."); } @Override diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorTool.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorTool.java index 2a26f18ec20..322431cd90b 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorTool.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorTool.java @@ -1,15 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hbase.util.bulkdatagenerator; -import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; -import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; -import org.apache.hbase.thirdparty.org.apache.commons.cli.GnuParser; -import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter; -import org.apache.hbase.thirdparty.org.apache.commons.cli.Option; -import org.apache.hbase.thirdparty.org.apache.commons.cli.Options; -import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException; -import org.apache.hbase.thirdparty.org.apache.commons.cli.Parser; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -25,16 +38,21 @@ import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; +import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; +import org.apache.hbase.thirdparty.org.apache.commons.cli.GnuParser; +import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter; +import org.apache.hbase.thirdparty.org.apache.commons.cli.Option; +import org.apache.hbase.thirdparty.org.apache.commons.cli.Options; +import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException; +import org.apache.hbase.thirdparty.org.apache.commons.cli.Parser; /** - * A command line utility to generate pre-splitted HBase Tables with large amount (TBs) of random data, equally distributed among all regions. + * A command line utility to generate pre-splitted HBase Tables with large amount (TBs) of random + * data, equally distributed among all regions. */ public class BulkDataGeneratorTool { @@ -43,7 +61,7 @@ public class BulkDataGeneratorTool { /** * Prefix for the generated HFiles directory */ - private static final String OUTPUT_DIRECTORY_PREFIX = "/bulk_data_generator/" ; + private static final String OUTPUT_DIRECTORY_PREFIX = "/bulk_data_generator/"; /** * Number of mapper container to be launched for generating of HFiles @@ -61,7 +79,8 @@ public class BulkDataGeneratorTool { private String table; /** - * Number of splits for the {@link #table}. Number of regions for the table will be ({@link #splitCount} + 1). + * Number of splits for the {@link #table}. Number of regions for the table will be + * ({@link #splitCount} + 1). */ private int splitCount; @@ -93,7 +112,7 @@ public class BulkDataGeneratorTool { return false; } - if(line.hasOption("-h")) { + if (line.hasOption("-h")) { printUsage(); return true; } @@ -101,12 +120,13 @@ public class BulkDataGeneratorTool { Path outputDirectory = generateOutputDirectory(); logger.info("HFiles will be generated at " + outputDirectory.toString()); - try(Connection connection = ConnectionFactory.createConnection(conf)) { + try (Connection connection = ConnectionFactory.createConnection(conf)) { final Admin admin = connection.getAdmin(); final TableName tableName = TableName.valueOf(table); - if(admin.tableExists(tableName)) { - if(deleteTableIfExist) { - logger.info("Deleting the table since it already exist and delete-if-exist flag is set to true"); + if (admin.tableExists(tableName)) { + if (deleteTableIfExist) { + logger.info( + "Deleting the table since it already exist and delete-if-exist flag is set to true"); Utility.deleteTable(admin, table); } else { logger.info("Table already exists, cannot generate HFiles for existing table."); @@ -129,10 +149,11 @@ public class BulkDataGeneratorTool { boolean result = job.waitForCompletion(true); - if(result) { + if (result) { logger.info("HFiles generated successfully. Starting bulk load to " + table); LoadIncrementalHFiles loadIncrementalHFiles = new LoadIncrementalHFiles(conf); - int loadIncrementalResult = loadIncrementalHFiles.run(new String[] {outputDirectory.getFileSystem(conf).makeQualified(outputDirectory).toString(), table}); + int loadIncrementalResult = loadIncrementalHFiles.run(new String[] { + outputDirectory.getFileSystem(conf).makeQualified(outputDirectory).toString(), table }); return (loadIncrementalResult == 0); } else { logger.info("Failed to generate HFiles."); @@ -146,8 +167,7 @@ public class BulkDataGeneratorTool { } } - protected Job createSubmittableJob(Configuration conf) - throws IOException { + protected Job createSubmittableJob(Configuration conf) throws IOException { conf.setInt(BulkDataGeneratorMapper.SPLIT_COUNT_KEY, splitCount); conf.setInt(BulkDataGeneratorInputFormat.MAPPER_TASK_COUNT_KEY, mapperCount); @@ -167,30 +187,28 @@ public class BulkDataGeneratorTool { return job; } - /** - * Get the random output directory path where HFiles will be generated - * @return - */ + /** Returns Random output directory path where HFiles will be generated */ protected Path generateOutputDirectory() { - final String outputDirectory = OUTPUT_DIRECTORY_PREFIX + "/" + table + "-" + System.currentTimeMillis(); + final String outputDirectory = + OUTPUT_DIRECTORY_PREFIX + "/" + table + "-" + System.currentTimeMillis(); return new Path(outputDirectory); } /** * This method parses the command line parameters into instance variables - * @throws ParseException */ protected void readCommandLineParameters(Configuration conf, CommandLine line) - throws ParseException, IOException { + throws ParseException, IOException { final List genericParameters = new ArrayList(); - //Parse the generic options + // Parse the generic options for (Map.Entry entry : line.getOptionProperties("D").entrySet()) { genericParameters.add("-D"); genericParameters.add(entry.getKey() + "=" + entry.getValue()); } - logger.info("Parsed generic parameters: " + Arrays.toString(genericParameters.toArray(new String[0]))); + logger.info( + "Parsed generic parameters: " + Arrays.toString(genericParameters.toArray(new String[0]))); new GenericOptionsParser(conf, genericParameters.toArray(new String[0])); @@ -199,7 +217,8 @@ public class BulkDataGeneratorTool { mapperCount = Integer.parseInt(line.getOptionValue("mapper-count")); Preconditions.checkArgument(mapperCount > 0, "Mapper count must be greater than 0"); splitCount = Integer.parseInt(line.getOptionValue("split-count")); - Preconditions.checkArgument((splitCount > 0) && (splitCount < Utility.MAX_SPLIT_COUNT), "Split count must be greater than 0 and less than " + Utility.MAX_SPLIT_COUNT); + Preconditions.checkArgument((splitCount > 0) && (splitCount < Utility.MAX_SPLIT_COUNT), + "Split count must be greater than 0 and less than " + Utility.MAX_SPLIT_COUNT); rowsPerMapper = Long.parseLong(line.getOptionValue("rows-per-mapper")); Preconditions.checkArgument(rowsPerMapper > 0, "Rows per mapper must be greater than 0"); deleteTableIfExist = line.hasOption("delete-if-exist"); @@ -208,8 +227,8 @@ public class BulkDataGeneratorTool { private void parseTableOptions(final CommandLine line) { final String tableOptionsAsString = line.getOptionValue("table-options"); - if(!StringUtils.isEmpty(tableOptionsAsString)) { - for(String tableOption : tableOptionsAsString.split(",")) { + if (!StringUtils.isEmpty(tableOptionsAsString)) { + for (String tableOption : tableOptionsAsString.split(",")) { final String[] keyValueSplit = tableOption.split("="); final String key = keyValueSplit[0]; final String value = keyValueSplit[1]; @@ -218,37 +237,34 @@ public class BulkDataGeneratorTool { } } - /** - * @return the command line options required by the sor job. - */ + /** Returns the command line option for {@link BulkDataGeneratorTool} */ protected Options getOptions() { final Options options = new Options(); - Option option = new Option("t", "table", true, - "The table name for which data need to be generated."); + Option option = + new Option("t", "table", true, "The table name for which data need to be generated."); options.addOption(option); option = new Option("d", "delete-if-exist", false, - "If it's set, the table will be deleted if already exist."); + "If it's set, the table will be deleted if already exist."); options.addOption(option); - option = new Option("mc", "mapper-count", true, - "The number of mapper containers to be launched."); + option = + new Option("mc", "mapper-count", true, "The number of mapper containers to be launched."); options.addOption(option); option = new Option("sc", "split-count", true, - "The number of regions/pre-splits to be created for the table."); + "The number of regions/pre-splits to be created for the table."); options.addOption(option); - option = new Option("r", "rows-per-mapper", true, - "The number of rows to be generated PER mapper."); + option = + new Option("r", "rows-per-mapper", true, "The number of rows to be generated PER mapper."); options.addOption(option); - option = new Option("o", "table-options", true, - "Table options to be set while creating the table."); + option = + new Option("o", "table-options", true, "Table options to be set while creating the table."); options.addOption(option); - option = new Option("h", "help", false, - "Show help message for the tool"); + option = new Option("h", "help", false, "Show help message for the tool"); options.addOption(option); return options; @@ -259,10 +275,11 @@ public class BulkDataGeneratorTool { helpFormatter.setWidth(120); final String helpMessageCommand = "hbase " + BulkDataGeneratorTool.class.getName(); final String commandSyntax = helpMessageCommand + " [-D]*"; - final String helpMessageSuffix = "Examples:\n" - + helpMessageCommand + " -t TEST_TABLE -mc 10 -r 100 -sc 10\n" - + helpMessageCommand + " -t TEST_TABLE -mc 10 -r 100 -sc 10 -d -o \"DISABLE_BACKUP=true,NORMALIZATION_ENABLED=false\"\n" - + helpMessageCommand + " -t TEST_TABLE -mc 10 -r 100 -sc 10 -Dmapreduce.map.memory.mb=8192 -Dmapreduce.map.java.opts=-Xmx7782m\n"; + final String helpMessageSuffix = "Examples:\n" + helpMessageCommand + + " -t TEST_TABLE -mc 10 -r 100 -sc 10\n" + helpMessageCommand + + " -t TEST_TABLE -mc 10 -r 100 -sc 10 -d -o \"DISABLE_BACKUP=true,NORMALIZATION_ENABLED=false\"\n" + + helpMessageCommand + + " -t TEST_TABLE -mc 10 -r 100 -sc 10 -Dmapreduce.map.memory.mb=8192 -Dmapreduce.map.java.opts=-Xmx7782m\n"; helpFormatter.printHelp(commandSyntax, "", getOptions(), helpMessageSuffix); } } diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/Utility.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/Utility.java index 8fcc2936e61..cc5488224ca 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/Utility.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/Utility.java @@ -1,34 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hbase.util.bulkdatagenerator; +import java.io.IOException; +import java.util.Map; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableDescriptorBuilder; -import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; -import java.io.IOException; -import java.util.Map; +import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; public class Utility { /** - * Schema for HBase table to be generated by generated and populated by {@link BulkDataGeneratorTool} + * Schema for HBase table to be generated by generated and populated by + * {@link BulkDataGeneratorTool} */ public enum TableColumnNames { - ORG_ID ("orgId".getBytes()), - TOOL_EVENT_ID ("toolEventId".getBytes()), - EVENT_ID ("eventId".getBytes()), - VEHICLE_ID ("vehicleId".getBytes()), - SPEED ("speed".getBytes()), - LATITUDE ("latitude".getBytes()), - LONGITUDE ("longitude".getBytes()), - LOCATION ("location".getBytes()), - TIMESTAMP ("timestamp".getBytes()); + ORG_ID("orgId".getBytes()), + TOOL_EVENT_ID("toolEventId".getBytes()), + EVENT_ID("eventId".getBytes()), + VEHICLE_ID("vehicleId".getBytes()), + SPEED("speed".getBytes()), + LATITUDE("latitude".getBytes()), + LONGITUDE("longitude".getBytes()), + LOCATION("location".getBytes()), + TIMESTAMP("timestamp".getBytes()); private final byte[] columnName; - TableColumnNames (byte[] column) { + TableColumnNames(byte[] column) { this.columnName = column; } @@ -49,23 +67,28 @@ public class Utility { } /** - * Creates a pre-splitted HBase Table having single column family ({@link #COLUMN_FAMILY}) and sequential splits with {@link #SPLIT_PREFIX_LENGTH} length character prefix. - * Example: If a table (TEST_TABLE_1) need to be generated with splitCount as 10, table would be created with (10+1) regions with boundaries end-keys as (000000-000001, 000001-000002, 000002-000003, ...., 0000010-) - * @param admin - Admin object associated with HBase connection - * @param tableName - Name of table to be created - * @param splitCount - Number of splits for the table (Number of regions will be splitCount + 1) + * Creates a pre-splitted HBase Table having single column family ({@link #COLUMN_FAMILY}) and + * sequential splits with {@link #SPLIT_PREFIX_LENGTH} length character prefix. Example: If a + * table (TEST_TABLE_1) need to be generated with splitCount as 10, table would be created with + * (10+1) regions with boundaries end-keys as (000000-000001, 000001-000002, 000002-000003, ...., + * 0000010-) + * @param admin - Admin object associated with HBase connection + * @param tableName - Name of table to be created + * @param splitCount - Number of splits for the table (Number of regions will be splitCount + 1) * @param tableOptions - Additional HBase metadata properties to be set for the table - * @throws IOException */ - public static void createTable(Admin admin, String tableName, int splitCount, Map tableOptions) throws IOException { + public static void createTable(Admin admin, String tableName, int splitCount, + Map tableOptions) throws IOException { Preconditions.checkArgument(splitCount > 0, "Split count must be greater than 0"); - TableDescriptorBuilder tableDescriptorBuilder = TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName)); + TableDescriptorBuilder tableDescriptorBuilder = + TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName)); tableOptions.forEach(tableDescriptorBuilder::setValue); - TableDescriptor tableDescriptor = tableDescriptorBuilder.setColumnFamily(ColumnFamilyDescriptorBuilder.of(COLUMN_FAMILY)).build(); + TableDescriptor tableDescriptor = tableDescriptorBuilder + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(COLUMN_FAMILY)).build(); // Pre-splitting table based on splitCount byte[][] splitKeys = new byte[splitCount][]; - for(int i = 0; i