diff --git a/hbase-mapreduce/.flattened-pom.xml b/hbase-mapreduce/.flattened-pom.xml
new file mode 100644
index 00000000000..2c60e3aa8f6
--- /dev/null
+++ b/hbase-mapreduce/.flattened-pom.xml
@@ -0,0 +1,1179 @@
+ 4.0.0
+ org.apache.hbase
+ hbase-mapreduce
+ 3.0.0-beta-1-SNAPSHOT
+ Apache HBase - MapReduce
+ This module contains implementations of InputFormat, OutputFormat, Mapper, Reducer, etc which
+ are needed for running MR jobs on tables, WALs, HFiles and other HBase specific constructs.
+ It also contains a bunch of tools: RowCounter, ImportTsv, Import, Export, CompactionTool,
+ ExportSnapshot, WALPlayer, etc
+ https://hbase.apache.org/hbase-build-configuration/hbase-mapreduce
+ 2007
+ The Apache Software Foundation
+ https://www.apache.org/
+ Apache License, Version 2.0
+ https://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+ achouhan
+ Abhishek Singh Chouhan
+ achouhan@apache.org
+ +5
+ acube123
+ Amitanand S. Aiyer
+ acube123@apache.org
+ -8
+ allan163
+ Allan Yang
+ allan163@apache.org
+ +8
+ appy
+ Apekshit Sharma
+ appy@apache.org
+ -8
+ anastasia
+ Anastasia Braginsky
+ anastasia@apache.org
+ +2
+ apurtell
+ Andrew Purtell
+ apurtell@apache.org
+ -8
+ anoopsamjohn
+ Anoop Sam John
+ anoopsamjohn@apache.org
+ +5
+ antonov
+ Mikhail Antonov
+ antonov@apache.org
+ -8
+ ashishsinghi
+ Ashish Singhi
+ ashishsinghi@apache.org
+ +5
+ ashu
+ Ashu Pachauri
+ ashu@apache.org
+ +5
+ bharathv
+ Bharath Vissapragada
+ bharathv@apache.org
+ -8
+ binlijin
+ Lijin Bin
+ binlijin@apache.org
+ +8
+ brfrn169
+ Toshihiro Suzuki
+ brfrn169@apache.org
+ +9
+ busbey
+ Sean Busbey
+ busbey@apache.org
+ -6
+ chenglei
+ Cheng Lei
+ chenglei@apache.org
+ +8
+ chenheng
+ Heng Chen
+ chenheng@apache.org
+ +8
+ chia7712
+ Chia-Ping Tsai
+ chia7712@apache.org
+ +8
+ ddas
+ Devaraj Das
+ ddas@apache.org
+ -8
+ dimaspivak
+ Dima Spivak
+ dimaspivak@apache.org
+ -8
+ dmeil
+ Doug Meil
+ dmeil@apache.org
+ -5
+ eclark
+ Elliott Clark
+ eclark@apache.org
+ -8
+ elserj
+ Josh Elser
+ elserj@apache.org
+ -5
+ enis
+ Enis Soztutar
+ enis@apache.org
+ -8
+ eshcar
+ Eshcar Hillel
+ eshcar@apache.org
+ +2
+ fenghh
+ Honghua Feng
+ fenghh@apache.org
+ +8
+ garyh
+ Gary Helmling
+ garyh@apache.org
+ -8
+ gchanan
+ Gregory Chanan
+ gchanan@apache.org
+ -8
+ gjacoby
+ Geoffrey Jacoby
+ gjacoby@apache.org
+ -5
+ gxcheng
+ Guangxu Cheng
+ gxcheng@apache.org
+ +8
+ haxiaolin
+ Xiaolin Ha
+ haxiaolin@apache.org
+ +8
+ huaxiangsun
+ Huaxiang Sun
+ huaxiangsun@apache.org
+ -8
+ jdcryans
+ Jean-Daniel Cryans
+ jdcryans@apache.org
+ -8
+ jeffreyz
+ Jeffrey Zhong
+ jeffreyz@apache.org
+ -8
+ jerryjch
+ Jing Chen (Jerry) He
+ jerryjch@apache.org
+ -8
+ jyates
+ Jesse Yates
+ jyates@apache.org
+ -8
+ jgray
+ Jonathan Gray
+ jgray@fb.com
+ -8
+ jingchengdu
+ Jingcheng Du
+ jingchengdu@apache.org
+ +8
+ esteban
+ Esteban Gutierrez
+ esteban@apache.org
+ -8
+ janh
+ Jan Hentschel
+ janh@apache.org
+ +1
+ jmhsieh
+ Jonathan Hsieh
+ jmhsieh@apache.org
+ -8
+ jxiang
+ Jimmy Xiang
+ jxiang@apache.org
+ -8
+ kannan
+ Kannan Muthukkaruppan
+ kannan@fb.com
+ -8
+ karthik
+ Karthik Ranganathan
+ kranganathan@fb.com
+ -8
+ larsfrancke
+ Lars Francke
+ larsfrancke@apache.org
+ Europe/Berlin
+ larsgeorge
+ Lars George
+ larsgeorge@apache.org
+ +1
+ larsh
+ Lars Hofhansl
+ larsh@apache.org
+ -8
+ liangxie
+ Liang Xie
+ liangxie@apache.org
+ +8
+ liushaohui
+ Shaohui Liu
+ liushaohui@apache.org
+ +8
+ liyin
+ Liyin Tang
+ liyin.tang@fb.com
+ -8
+ liyu
+ Yu Li
+ liyu@apache.org
+ +8
+ mbautin
+ Mikhail Bautin
+ mbautin@apache.org
+ -8
+ mbertozzi
+ Matteo Bertozzi
+ mbertozzi@apache.org
+ 0
+ mdrob
+ Mike Drob
+ mdrob@apache.org
+ -5
+ meszibalu
+ Balazs Meszaros
+ meszibalu@apache.org
+ +1
+ misty
+ Misty Stanley-Jones
+ misty@apache.org
+ -8
+ ndimiduk
+ Nick Dimiduk
+ ndimiduk@apache.org
+ -8
+ nihaljain
+ Nihal Jain
+ nihaljain@apache.org
+ +5
+ niuyulin
+ Yulin Niu
+ niuyulin@apache.org
+ +8
+ nkeywal
+ Nicolas Liochon
+ nkeywal@apache.org
+ +1
+ nspiegelberg
+ Nicolas Spiegelberg
+ nspiegelberg@fb.com
+ -8
+ octo47
+ Andrey Stepachev
+ octo47@gmail.com
+ 0
+ openinx
+ Zheng Hu
+ openinx@apache.org
+ +8
+ pankajkumar
+ Pankaj Kumar
+ pankajkumar@apache.org
+ +5
+ psomogyi
+ Peter Somogyi
+ psomogyi@apache.org
+ +1
+ rajeshbabu
+ Rajeshbabu Chintaguntla
+ rajeshbabu@apache.org
+ +5
+ ramkrishna
+ Ramkrishna S Vasudevan
+ ramkrishna@apache.org
+ +5
+ rawson
+ Ryan Rawson
+ rawson@apache.org
+ -8
+ reidchan
+ Reid Chan
+ reidchan@apache.org
+ +8
+ shahrs87
+ Rushabh Shah
+ shahrs87@apache.org
+ -8
+ sakthi
+ Sakthi Vel
+ sakthi@apache.org
+ -8
+ sershe
+ Sergey Shelukhin
+ sershe@apache.org
+ -8
+ ssrungarapu
+ Srikanth Srungarapu
+ ssrungarapu@apache.org
+ -8
+ stack
+ Michael Stack
+ stack@apache.org
+ -8
+ syuanjiang
+ Stephen Yuan Jiang
+ syuanjiang@apache.org
+ -8
+ taklwu
+ Tak-Lon (Stephen) Wu
+ taklwu@apache.org
+ -8
+ tedyu
+ Ted Yu
+ yuzhihong@gmail.com
+ -8
+ tianhang
+ Tianhang Tang
+ tianhang@apache.org
+ +8
+ tianjy
+ tianjy@apache.org
+ +8
+ todd
+ Todd Lipcon
+ todd@apache.org
+ -8
+ toffer
+ Francis Liu
+ toffer@apache.org
+ -8
+ vikasv
+ Vikas Vishwakarma
+ vikasv@apache.org
+ +5
+ virag
+ Virag Kothari
+ virag@yahoo-inc.com
+ -8
+ vjasani
+ Viraj Jasani
+ vjasani@apache.org
+ +5
+ water
+ Xiang Li
+ xiangli@apache.org
+ +8
+ wchevreuil
+ Wellington Chevreuil
+ wchevreuil@apache.org
+ 0
+ weichiu
+ Wei-Chiu Chuang
+ weichiu@apache.org
+ -8
+ xucang
+ Xu Cang
+ xucang@apache.org
+ -8
+ yangzhe1991
+ Phil Yang
+ yangzhe1991@apache.org
+ +8
+ zghao
+ Guanghao Zhang
+ zghao@apache.org
+ +8
+ zhangduo
+ Duo Zhang
+ zhangduo@apache.org
+ +8
+ zhaobaiqiang
+ Baiqiang Zhao
+ zhaobaiqiang@apache.org
+ +8
+ zjushch
+ Chunhui Shen
+ zjushch@apache.org
+ +8
+ churro
+ Rahul Gidwani
+ churro@apache.org
+ -8
+ yiliang
+ Yi Liang
+ yiliang@apache.org
+ -8
+ zyork
+ Zach York
+ zyork@apache.org
+ -8
+ meiyi
+ Yi Mei
+ meiyi@apache.org
+ +8
+ wangzheng
+ Zheng (bsglz) Wang
+ wangzheng@apache.org
+ +8
+ sunxin
+ Xin Sun
+ sunxin@apache.org
+ +8
+ huangzhuoyue
+ Zhuoyue Huang
+ huangzhuoyue@apache.org
+ +8
+ xiaoyt
+ Yutong Xiao
+ xiaoyt@apache.org
+ +8
+ bbeaudreault
+ Bryan Beaudreault
+ bbeaudreault@apache.org
+ -5
+ heliangjun
+ Liangjun He
+ heliangjun@apache.org
+ +8
+ User List
+ user-subscribe@hbase.apache.org
+ user-unsubscribe@hbase.apache.org
+ user@hbase.apache.org
+ https://lists.apache.org/list.html?user@hbase.apache.org
+ https://dir.gmane.org/gmane.comp.java.hadoop.hbase.user
+ Developer List
+ dev-subscribe@hbase.apache.org
+ dev-unsubscribe@hbase.apache.org
+ dev@hbase.apache.org
+ https://lists.apache.org/list.html?dev@hbase.apache.org
+ https://dir.gmane.org/gmane.comp.java.hadoop.hbase.devel
+ Commits List
+ commits-subscribe@hbase.apache.org
+ commits-unsubscribe@hbase.apache.org
+ https://lists.apache.org/list.html?commits@hbase.apache.org
+ Issues List
+ issues-subscribe@hbase.apache.org
+ issues-unsubscribe@hbase.apache.org
+ https://lists.apache.org/list.html?issues@hbase.apache.org
+ Builds List
+ builds-subscribe@hbase.apache.org
+ builds-unsubscribe@hbase.apache.org
+ https://lists.apache.org/list.html?builds@hbase.apache.org
+ User (ZH) List
+ user-zh-subscribe@hbase.apache.org
+ user-zh-unsubscribe@hbase.apache.org
+ user-zh@hbase.apache.org
+ https://lists.apache.org/list.html?user-zh@hbase.apache.org
+ scm:git:git://gitbox.apache.org/repos/asf/hbase.git/hbase-build-configuration/hbase-mapreduce
+ scm:git:https://gitbox.apache.org/repos/asf/hbase.git/hbase-build-configuration/hbase-mapreduce
+ https://gitbox.apache.org/repos/asf?p=hbase.git/hbase-build-configuration/hbase-mapreduce
+ https://issues.apache.org/jira/browse/HBASE
+ apache.releases.https
+ Apache Release Distribution Repository
+ https://repository.apache.org/service/local/staging/deploy/maven2
+ apache.snapshots.https
+ Apache Development Snapshot Repository
+ https://repository.apache.org/content/repositories/snapshots
+ hbase.apache.org
+ HBase Website at hbase.apache.org
+ file:///tmp/hbase-build-configuration/hbase-mapreduce
+ org.apache.hbase.thirdparty
+ hbase-shaded-miscellaneous
+ 4.1.4
+ compile
+ org.apache.hbase.thirdparty
+ hbase-shaded-netty
+ 4.1.4
+ compile
+ org.apache.hbase.thirdparty
+ hbase-shaded-protobuf
+ 4.1.4
+ compile
+ org.apache.hbase
+ hbase-common
+ 3.0.0-beta-1-SNAPSHOT
+ compile
+ org.apache.hbase
+ hbase-zookeeper
+ 3.0.0-beta-1-SNAPSHOT
+ compile
+ com.google.code.findbugs
+ jsr305
+ com.github.spotbugs
+ spotbugs-annotations
+ org.apache.hbase
+ hbase-protocol-shaded
+ 3.0.0-beta-1-SNAPSHOT
+ compile
+ org.apache.hbase
+ hbase-metrics
+ 3.0.0-beta-1-SNAPSHOT
+ compile
+ org.apache.hbase
+ hbase-metrics-api
+ 3.0.0-beta-1-SNAPSHOT
+ compile
+ org.apache.hbase
+ hbase-asyncfs
+ 3.0.0-beta-1-SNAPSHOT
+ compile
+ io.dropwizard.metrics
+ metrics-core
+ 3.2.6
+ compile
+ org.slf4j
+ slf4j-api
+ 1.7.30
+ compile
+ io.opentelemetry
+ opentelemetry-api
+ 1.15.0
+ compile
+ org.apache.hbase
+ hbase-client
+ 3.0.0-beta-1-SNAPSHOT
+ compile
+ org.apache.hbase
+ hbase-hadoop-compat
+ 3.0.0-beta-1-SNAPSHOT
+ compile
+ org.apache.hbase
+ hbase-server
+ 3.0.0-beta-1-SNAPSHOT
+ compile
+ commons-logging
+ commons-logging
+ org.apache.hbase
+ hbase-replication
+ 3.0.0-beta-1-SNAPSHOT
+ compile
+ com.github.stephenc.findbugs
+ findbugs-annotations
+ 1.3.9-1
+ compile
+ true
+ commons-io
+ commons-io
+ 2.11.0
+ compile
+ org.apache.commons
+ commons-lang3
+ 3.9
+ compile
+ org.apache.zookeeper
+ zookeeper
+ 3.5.7
+ compile
+ com.google.code.findbugs
+ jsr305
+ com.github.spotbugs
+ spotbugs-annotations
+ jline
+ jline
+ com.sun.jmx
+ jmxri
+ com.sun.jdmk
+ jmxtools
+ javax.jms
+ jms
+ io.netty
+ netty
+ io.netty
+ netty-all
+ org.slf4j
+ slf4j-log4j12
+ log4j
+ log4j
+ org.apache.yetus
+ audience-annotations
+ 0.13.0
+ compile
+ org.apache.hadoop
+ hadoop-common
+ 3.2.4
+ compile
+ com.sun.jersey
+ jersey-core
+ com.sun.jersey
+ jersey-json
+ com.sun.jersey
+ jersey-servlet
+ com.sun.jersey
+ jersey-server
+ javax.servlet.jsp
+ jsp-api
+ javax.servlet
+ javax.servlet-api
+ stax
+ stax-api
+ io.netty
+ netty
+ com.google.code.findbugs
+ jsr305
+ junit
+ junit
+ org.codehaus.jackson
+ *
+ org.slf4j
+ slf4j-log4j12
+ log4j
+ log4j
+ ch.qos.reload4j
+ reload4j
+ org.slf4j
+ slf4j-reload4j
+ io.netty
+ netty
+ io.netty
+ netty-all
+ org.apache.hadoop
+ hadoop-hdfs
+ 3.2.4
+ compile
+ com.sun.jersey
+ jersey-core
+ com.sun.jersey
+ jersey-server
+ javax.servlet.jsp
+ jsp-api
+ javax.servlet
+ servlet-api
+ stax
+ stax-api
+ xerces
+ xercesImpl
+ org.codehaus.jackson
+ *
+ com.google.guava
+ guava
+ org.slf4j
+ slf4j-log4j12
+ log4j
+ log4j
+ ch.qos.reload4j
+ reload4j
+ org.slf4j
+ slf4j-reload4j
+ org.fusesource.leveldbjni
+ leveldbjni-all
+ org.openlabtesting.leveldbjni
+ leveldbjni-all
+ org.apache.hadoop
+ hadoop-hdfs
+ 3.2.4
+ test-jar
+ test
+ javax.servlet.jsp
+ jsp-api
+ javax.servlet
+ servlet-api
+ stax
+ stax-api
+ xerces
+ xercesImpl
+ org.codehaus.jackson
+ *
+ com.google.guava
+ guava
+ com.sun.jersey
+ jersey-core
+ org.slf4j
+ slf4j-log4j12
+ log4j
+ log4j
+ ch.qos.reload4j
+ reload4j
+ org.slf4j
+ slf4j-reload4j
+ javax.ws.rs
+ javax.ws.rs-api
+ 2.1.1
+ test
+ org.apache.hadoop
+ hadoop-minicluster
+ 3.2.4
+ test
+ javax.ws.rs
+ jsr311-api
+ org.apache.hadoop
+ hadoop-minikdc
+ 3.2.4
+ test
+ org.slf4j
+ slf4j-log4j12
+ ch.qos.reload4j
+ reload4j
+ org.slf4j
+ slf4j-reload4j
+ bouncycastle
+ bcprov-jdk15
+ org.apache.hadoop
+ hadoop-mapreduce-client-jobclient
+ 3.2.4
+ test-jar
+ test
+ org.codehaus.jackson
+ jackson-mapper-asl
+ org.codehaus.jackson
+ jackson-core-asl
+ javax.xml.bind
+ jaxb-api
+ javax.ws.rs
+ jsr311-api
+ false
+ apache.snapshots
+ Apache Snapshot Repository
+ https://repository.apache.org/snapshots
+ org.apache.felix
+ maven-bundle-plugin
+ 3.3.0
+ true
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorInputFormat.java
index eaea8c78e3f..f40951e945d 100644
--- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorInputFormat.java
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorInputFormat.java
@@ -1,5 +1,27 @@
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package org.apache.hadoop.hbase.util.bulkdatagenerator;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
@@ -8,17 +30,13 @@ import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
+import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
public class BulkDataGeneratorInputFormat extends InputFormat {
- public static final String MAPPER_TASK_COUNT_KEY = BulkDataGeneratorInputFormat.class.getName() + "mapper.task.count";
+ public static final String MAPPER_TASK_COUNT_KEY =
+ BulkDataGeneratorInputFormat.class.getName() + "mapper.task.count";
public List getSplits(JobContext job) throws IOException {
@@ -35,9 +53,10 @@ public class BulkDataGeneratorInputFormat extends InputFormat createRecordReader(InputSplit split, TaskAttemptContext context)
- throws IOException, InterruptedException {
- BulkDataGeneratorRecordReader bulkDataGeneratorRecordReader = new BulkDataGeneratorRecordReader();
+ public RecordReader createRecordReader(InputSplit split,
+ TaskAttemptContext context) throws IOException, InterruptedException {
+ BulkDataGeneratorRecordReader bulkDataGeneratorRecordReader =
+ new BulkDataGeneratorRecordReader();
bulkDataGeneratorRecordReader.initialize(split, context);
return bulkDataGeneratorRecordReader;
@@ -47,17 +66,21 @@ public class BulkDataGeneratorInputFormat extends InputFormat {
+public class BulkDataGeneratorMapper
+ extends Mapper {
/** Counter enumeration to count number of rows generated. */
public static enum Counters {
- public static final String SPLIT_COUNT_KEY = BulkDataGeneratorMapper.class.getName() + "split.count";
+ public static final String SPLIT_COUNT_KEY =
+ BulkDataGeneratorMapper.class.getName() + "split.count";
private static final String ORG_ID = "00D000000000062";
private static final int MAX_EVENT_ID = Integer.MAX_VALUE;
@@ -35,7 +53,8 @@ public class BulkDataGeneratorMapper extends
private static final int NUM_LOCATIONS = 10;
private static int splitCount = 1;
private static final Random random = new Random(System.currentTimeMillis());
- private static final Map> LOCATIONS = Maps.newHashMapWithExpectedSize(NUM_LOCATIONS);
+ private static final Map> LOCATIONS =
+ Maps.newHashMapWithExpectedSize(NUM_LOCATIONS);
private static final List LOCATION_KEYS = Lists.newArrayListWithCapacity(NUM_LOCATIONS);
static {
LOCATIONS.put("Belém", new Pair<>(BigDecimal.valueOf(-01.45), BigDecimal.valueOf(-48.48)));
@@ -43,9 +62,11 @@ public class BulkDataGeneratorMapper extends
LOCATIONS.put("Campinas", new Pair<>(BigDecimal.valueOf(-22.90), BigDecimal.valueOf(-47.05)));
LOCATIONS.put("Cuiaba", new Pair<>(BigDecimal.valueOf(-07.25), BigDecimal.valueOf(-58.42)));
LOCATIONS.put("Manaus", new Pair<>(BigDecimal.valueOf(-03.10), BigDecimal.valueOf(-60.00)));
- LOCATIONS.put("Porto Velho", new Pair<>(BigDecimal.valueOf(-08.75), BigDecimal.valueOf(-63.90)));
+ LOCATIONS.put("Porto Velho",
+ new Pair<>(BigDecimal.valueOf(-08.75), BigDecimal.valueOf(-63.90)));
LOCATIONS.put("Recife", new Pair<>(BigDecimal.valueOf(-08.10), BigDecimal.valueOf(-34.88)));
- LOCATIONS.put("Rio de Janeiro", new Pair<>(BigDecimal.valueOf(-22.90), BigDecimal.valueOf(-43.23)));
+ LOCATIONS.put("Rio de Janeiro",
+ new Pair<>(BigDecimal.valueOf(-22.90), BigDecimal.valueOf(-43.23)));
LOCATIONS.put("Santarém", new Pair<>(BigDecimal.valueOf(-02.43), BigDecimal.valueOf(-54.68)));
LOCATIONS.put("São Paulo", new Pair<>(BigDecimal.valueOf(-23.53), BigDecimal.valueOf(-46.62)));
@@ -55,29 +76,34 @@ public class BulkDataGeneratorMapper extends
/** {@inheritDoc} */
- protected void setup(Context context) throws IOException,
- InterruptedException {
+ protected void setup(Context context) throws IOException, InterruptedException {
Configuration c = context.getConfiguration();
splitCount = c.getInt(SPLIT_COUNT_KEY, 1);
- * Generates a single record based on value set to the key by {@link BulkDataGeneratorRecordReader#getCurrentKey()}.
- * {@link Utility.TableColumnNames#TOOL_EVENT_ID} is first part of row key. Keeping first {@link Utility#SPLIT_PREFIX_LENGTH} characters as index of the record to be generated ensures that records are equally distributed across all regions of the table since region boundaries are generated in similar fashion. Check {@link Utility#createTable(Admin, String, int, Map)} method for region split info.
- * @param key - The key having index of next record to be generated
- * @param value - Value associated with the key (not used)
+ * Generates a single record based on value set to the key by
+ * {@link BulkDataGeneratorRecordReader#getCurrentKey()}.
+ * {@link Utility.TableColumnNames#TOOL_EVENT_ID} is first part of row key. Keeping first
+ * {@link Utility#SPLIT_PREFIX_LENGTH} characters as index of the record to be generated ensures
+ * that records are equally distributed across all regions of the table since region boundaries
+ * are generated in similar fashion. Check {@link Utility#createTable(Admin, String, int, Map)}
+ * method for region split info.
+ * @param key - The key having index of next record to be generated
+ * @param value - Value associated with the key (not used)
* @param context - Context of the mapper container
- * @throws IOException
- * @throws InterruptedException
protected void map(Text key, NullWritable value, Context context)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
- int recordIndex = Integer.parseInt(key.toString());
+ int recordIndex = Integer.parseInt(key.toString());
// <6-characters-for-region-boundary-prefix>_<15-random-characters>_
- final String toolEventId = String.format("%0" + Utility.SPLIT_PREFIX_LENGTH + "d", recordIndex%(splitCount+1)) + "_" + EnvironmentEdgeManager.currentTime() + (1e14 + (random.nextFloat() * 9e13)) + "_" + recordIndex;
+ final String toolEventId =
+ String.format("%0" + Utility.SPLIT_PREFIX_LENGTH + "d", recordIndex % (splitCount + 1)) + "_"
+ + EnvironmentEdgeManager.currentTime() + (1e14 + (random.nextFloat() * 9e13)) + "_"
+ + recordIndex;
final String eventId = String.valueOf(Math.abs(random.nextInt(MAX_EVENT_ID)));
final String vechileId = String.valueOf(Math.abs(random.nextInt(MAX_VEHICLE_ID)));
final String speed = String.valueOf(Math.abs(random.nextInt(MAX_SPEED_KPH)));
@@ -86,7 +112,8 @@ public class BulkDataGeneratorMapper extends
final BigDecimal latitude = coordinates.getFirst();
final BigDecimal longitude = coordinates.getSecond();
- final ImmutableBytesWritable hKey = new ImmutableBytesWritable(String.format("%s:%s", toolEventId, ORG_ID).getBytes());
+ final ImmutableBytesWritable hKey =
+ new ImmutableBytesWritable(String.format("%s:%s", toolEventId, ORG_ID).getBytes());
addKeyValue(context, hKey, Utility.TableColumnNames.ORG_ID, ORG_ID);
addKeyValue(context, hKey, Utility.TableColumnNames.TOOL_EVENT_ID, toolEventId);
addKeyValue(context, hKey, Utility.TableColumnNames.EVENT_ID, eventId);
@@ -95,14 +122,17 @@ public class BulkDataGeneratorMapper extends
addKeyValue(context, hKey, Utility.TableColumnNames.LATITUDE, latitude.toString());
addKeyValue(context, hKey, Utility.TableColumnNames.LONGITUDE, longitude.toString());
addKeyValue(context, hKey, Utility.TableColumnNames.LOCATION, location);
- addKeyValue(context, hKey, Utility.TableColumnNames.TIMESTAMP, String.valueOf(EnvironmentEdgeManager.currentTime()));
+ addKeyValue(context, hKey, Utility.TableColumnNames.TIMESTAMP,
+ String.valueOf(EnvironmentEdgeManager.currentTime()));
- private void addKeyValue(final Context context, ImmutableBytesWritable key, final Utility.TableColumnNames columnName, final String value)
- throws IOException, InterruptedException {
- KeyValue kv = new KeyValue(key.get(), COLUMN_FAMILY_BYTES, columnName.getColumnName(), value.getBytes());
+ private void addKeyValue(final Context context, ImmutableBytesWritable key,
+ final Utility.TableColumnNames columnName, final String value)
+ throws IOException, InterruptedException {
+ KeyValue kv =
+ new KeyValue(key.get(), COLUMN_FAMILY_BYTES, columnName.getColumnName(), value.getBytes());
context.write(key, kv);
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorRecordReader.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorRecordReader.java
index 010e53a7512..f4ecc659e51 100644
--- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorRecordReader.java
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorRecordReader.java
@@ -1,13 +1,30 @@
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package org.apache.hadoop.hbase.util.bulkdatagenerator;
+import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
-import java.io.IOException;
+import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
public class BulkDataGeneratorRecordReader extends RecordReader {
@@ -16,14 +33,16 @@ public class BulkDataGeneratorRecordReader extends RecordReader 0, "Number of records to be created by per mapper should be greater than 0.");
+ Preconditions.checkArgument(numRecordsToCreate > 0,
+ "Number of records to be created by per mapper should be greater than 0.");
diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorTool.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorTool.java
index 2a26f18ec20..322431cd90b 100644
--- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorTool.java
+++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorTool.java
@@ -1,15 +1,28 @@
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package org.apache.hadoop.hbase.util.bulkdatagenerator;
-import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
-import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
-import org.apache.hbase.thirdparty.org.apache.commons.cli.GnuParser;
-import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter;
-import org.apache.hbase.thirdparty.org.apache.commons.cli.Option;
-import org.apache.hbase.thirdparty.org.apache.commons.cli.Options;
-import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException;
-import org.apache.hbase.thirdparty.org.apache.commons.cli.Parser;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@@ -25,16 +38,21 @@ import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
+import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
+import org.apache.hbase.thirdparty.org.apache.commons.cli.GnuParser;
+import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter;
+import org.apache.hbase.thirdparty.org.apache.commons.cli.Option;
+import org.apache.hbase.thirdparty.org.apache.commons.cli.Options;
+import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException;
+import org.apache.hbase.thirdparty.org.apache.commons.cli.Parser;
- * A command line utility to generate pre-splitted HBase Tables with large amount (TBs) of random data, equally distributed among all regions.
+ * A command line utility to generate pre-splitted HBase Tables with large amount (TBs) of random
+ * data, equally distributed among all regions.
public class BulkDataGeneratorTool {
@@ -43,7 +61,7 @@ public class BulkDataGeneratorTool {
* Prefix for the generated HFiles directory
- private static final String OUTPUT_DIRECTORY_PREFIX = "/bulk_data_generator/" ;
+ private static final String OUTPUT_DIRECTORY_PREFIX = "/bulk_data_generator/";
* Number of mapper container to be launched for generating of HFiles
@@ -61,7 +79,8 @@ public class BulkDataGeneratorTool {
private String table;
- * Number of splits for the {@link #table}. Number of regions for the table will be ({@link #splitCount} + 1).
+ * Number of splits for the {@link #table}. Number of regions for the table will be
+ * ({@link #splitCount} + 1).
private int splitCount;
@@ -93,7 +112,7 @@ public class BulkDataGeneratorTool {
return false;
- if(line.hasOption("-h")) {
+ if (line.hasOption("-h")) {
return true;
@@ -101,12 +120,13 @@ public class BulkDataGeneratorTool {
Path outputDirectory = generateOutputDirectory();
logger.info("HFiles will be generated at " + outputDirectory.toString());
- try(Connection connection = ConnectionFactory.createConnection(conf)) {
+ try (Connection connection = ConnectionFactory.createConnection(conf)) {
final Admin admin = connection.getAdmin();
final TableName tableName = TableName.valueOf(table);
- if(admin.tableExists(tableName)) {
- if(deleteTableIfExist) {
- logger.info("Deleting the table since it already exist and delete-if-exist flag is set to true");
+ if (admin.tableExists(tableName)) {
+ if (deleteTableIfExist) {
+ logger.info(
+ "Deleting the table since it already exist and delete-if-exist flag is set to true");
Utility.deleteTable(admin, table);
} else {
logger.info("Table already exists, cannot generate HFiles for existing table.");
@@ -129,10 +149,11 @@ public class BulkDataGeneratorTool {
boolean result = job.waitForCompletion(true);
- if(result) {
+ if (result) {
logger.info("HFiles generated successfully. Starting bulk load to " + table);
LoadIncrementalHFiles loadIncrementalHFiles = new LoadIncrementalHFiles(conf);
- int loadIncrementalResult = loadIncrementalHFiles.run(new String[] {outputDirectory.getFileSystem(conf).makeQualified(outputDirectory).toString(), table});
+ int loadIncrementalResult = loadIncrementalHFiles.run(new String[] {
+ outputDirectory.getFileSystem(conf).makeQualified(outputDirectory).toString(), table });
return (loadIncrementalResult == 0);
} else {
logger.info("Failed to generate HFiles.");
@@ -146,8 +167,7 @@ public class BulkDataGeneratorTool {
- protected Job createSubmittableJob(Configuration conf)
- throws IOException {
+ protected Job createSubmittableJob(Configuration conf) throws IOException {
conf.setInt(BulkDataGeneratorMapper.SPLIT_COUNT_KEY, splitCount);
conf.setInt(BulkDataGeneratorInputFormat.MAPPER_TASK_COUNT_KEY, mapperCount);
@@ -167,30 +187,28 @@ public class BulkDataGeneratorTool {
return job;
- /**
- * Get the random output directory path where HFiles will be generated
- * @return
- */
+ /** Returns Random output directory path where HFiles will be generated */
protected Path generateOutputDirectory() {
- final String outputDirectory = OUTPUT_DIRECTORY_PREFIX + "/" + table + "-" + System.currentTimeMillis();
+ final String outputDirectory =
+ OUTPUT_DIRECTORY_PREFIX + "/" + table + "-" + System.currentTimeMillis();
return new Path(outputDirectory);
* This method parses the command line parameters into instance variables
- * @throws ParseException
protected void readCommandLineParameters(Configuration conf, CommandLine line)
- throws ParseException, IOException {
+ throws ParseException, IOException {
final List genericParameters = new ArrayList();
- //Parse the generic options
+ // Parse the generic options
for (Map.Entry