diff --git a/CHANGES.txt b/CHANGES.txt
index 7f5d5868717..148e987d8ab 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1014,6 +1014,8 @@ Release 0.21.0 - Unreleased
HBASE-3097 Merge in hbase-1200 doc on bloomfilters into hbase book
HBASE-2700 Test of: Handle master failover for regions in transition
HBASE-3115 HBaseClient wastes 1 TCP packet per RPC
+ HBASE-3076 Allow to disable automatic shipping of dependency jars
+ for mapreduce jobs (Bruno Dumon)
NEW FEATURES
HBASE-1961 HBase EC2 scripts
diff --git a/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java b/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
index e8558a024a5..db07ed1e0aa 100644
--- a/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
+++ b/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
@@ -56,6 +56,26 @@ public class TableMapReduceUtil {
Class extends TableMap> mapper,
Class extends WritableComparable> outputKeyClass,
Class extends Writable> outputValueClass, JobConf job) {
+ initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job, true);
+ }
+
+ /**
+ * Use this before submitting a TableMap job. It will
+ * appropriately set up the JobConf.
+ *
+ * @param table The table name to read from.
+ * @param columns The columns to scan.
+ * @param mapper The mapper class to use.
+ * @param outputKeyClass The class of the output key.
+ * @param outputValueClass The class of the output value.
+ * @param job The current job configuration to adjust.
+ * @param addDependencyJars upload HBase jars and jars for any of the configured
+ * job classes via the distributed cache (tmpjars).
+ */
+ public static void initTableMapJob(String table, String columns,
+ Class extends TableMap> mapper,
+ Class extends WritableComparable> outputKeyClass,
+ Class extends Writable> outputValueClass, JobConf job, boolean addDependencyJars) {
job.setInputFormat(TableInputFormat.class);
job.setMapOutputValueClass(outputValueClass);
@@ -63,10 +83,12 @@ public class TableMapReduceUtil {
job.setMapperClass(mapper);
FileInputFormat.addInputPaths(job, table);
job.set(TableInputFormat.COLUMN_LIST, columns);
- try {
- addDependencyJars(job);
- } catch (IOException e) {
- e.printStackTrace();
+ if (addDependencyJars) {
+ try {
+ addDependencyJars(job);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
}
}
@@ -99,6 +121,25 @@ public class TableMapReduceUtil {
public static void initTableReduceJob(String table,
Class extends TableReduce> reducer, JobConf job, Class partitioner)
throws IOException {
+ initTableReduceJob(table, reducer, job, partitioner, true);
+ }
+
+ /**
+ * Use this before submitting a TableReduce job. It will
+ * appropriately set up the JobConf.
+ *
+ * @param table The output table.
+ * @param reducer The reducer class to use.
+ * @param job The current job configuration to adjust.
+ * @param partitioner Partitioner to use. Pass null
to use
+ * default partitioner.
+ * @param addDependencyJars upload HBase jars and jars for any of the configured
+ * job classes via the distributed cache (tmpjars).
+ * @throws IOException When determining the region count fails.
+ */
+ public static void initTableReduceJob(String table,
+ Class extends TableReduce> reducer, JobConf job, Class partitioner,
+ boolean addDependencyJars) throws IOException {
job.setOutputFormat(TableOutputFormat.class);
job.setReducerClass(reducer);
job.set(TableOutputFormat.OUTPUT_TABLE, table);
@@ -114,7 +155,9 @@ public class TableMapReduceUtil {
} else if (partitioner != null) {
job.setPartitionerClass(partitioner);
}
- addDependencyJars(job);
+ if (addDependencyJars) {
+ addDependencyJars(job);
+ }
}
/**
diff --git a/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java b/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
index fd4432605cd..4ca8053e044 100644
--- a/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
+++ b/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
@@ -69,6 +69,31 @@ public class TableMapReduceUtil {
Class extends TableMapper> mapper,
Class extends WritableComparable> outputKeyClass,
Class extends Writable> outputValueClass, Job job)
+ throws IOException {
+ initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
+ job, true);
+ }
+
+ /**
+ * Use this before submitting a TableMap job. It will appropriately set up
+ * the job.
+ *
+ * @param table The table name to read from.
+ * @param scan The scan instance with the columns, time range etc.
+ * @param mapper The mapper class to use.
+ * @param outputKeyClass The class of the output key.
+ * @param outputValueClass The class of the output value.
+ * @param job The current job to adjust. Make sure the passed job is
+ * carrying all necessary HBase configuration.
+ * @param addDependencyJars upload HBase jars and jars for any of the configured
+ * job classes via the distributed cache (tmpjars).
+ * @throws IOException When setting up the details fails.
+ */
+ public static void initTableMapperJob(String table, Scan scan,
+ Class extends TableMapper> mapper,
+ Class extends WritableComparable> outputKeyClass,
+ Class extends Writable> outputValueClass, Job job,
+ boolean addDependencyJars)
throws IOException {
job.setInputFormatClass(TableInputFormat.class);
if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
@@ -77,7 +102,9 @@ public class TableMapReduceUtil {
job.getConfiguration().set(TableInputFormat.INPUT_TABLE, table);
job.getConfiguration().set(TableInputFormat.SCAN,
convertScanToString(scan));
- addDependencyJars(job);
+ if (addDependencyJars) {
+ addDependencyJars(job);
+ }
}
/**
@@ -167,6 +194,38 @@ public class TableMapReduceUtil {
Class extends TableReducer> reducer, Job job,
Class partitioner, String quorumAddress, String serverClass,
String serverImpl) throws IOException {
+ initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
+ serverClass, serverImpl, true);
+ }
+
+ /**
+ * Use this before submitting a TableReduce job. It will
+ * appropriately set up the JobConf.
+ *
+ * @param table The output table.
+ * @param reducer The reducer class to use.
+ * @param job The current job to adjust. Make sure the passed job is
+ * carrying all necessary HBase configuration.
+ * @param partitioner Partitioner to use. Pass null
to use
+ * default partitioner.
+ * @param quorumAddress Distant cluster to write to; default is null for
+ * output to the cluster that is designated in hbase-site.xml
.
+ * Set this String to the zookeeper ensemble of an alternate remote cluster
+ * when you would have the reduce write a cluster that is other than the
+ * default; e.g. copying tables between clusters, the source would be
+ * designated by hbase-site.xml
and this param would have the
+ * ensemble address of the remote cluster. The format to pass is particular.
+ * Pass <hbase.zookeeper.quorum> ':' <ZOOKEEPER_ZNODE_PARENT>
.
+ * @param serverClass redefined hbase.regionserver.class
+ * @param serverImpl redefined hbase.regionserver.impl
+ * @param addDependencyJars upload HBase jars and jars for any of the configured
+ * job classes via the distributed cache (tmpjars).
+ * @throws IOException When determining the region count fails.
+ */
+ public static void initTableReducerJob(String table,
+ Class extends TableReducer> reducer, Job job,
+ Class partitioner, String quorumAddress, String serverClass,
+ String serverImpl, boolean addDependencyJars) throws IOException {
Configuration conf = job.getConfiguration();
job.setOutputFormatClass(TableOutputFormat.class);
@@ -198,7 +257,10 @@ public class TableMapReduceUtil {
} else if (partitioner != null) {
job.setPartitionerClass(partitioner);
}
- addDependencyJars(job);
+
+ if (addDependencyJars) {
+ addDependencyJars(job);
+ }
}
/**