diff --git a/CHANGES.txt b/CHANGES.txt index 7f5d5868717..148e987d8ab 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1014,6 +1014,8 @@ Release 0.21.0 - Unreleased HBASE-3097 Merge in hbase-1200 doc on bloomfilters into hbase book HBASE-2700 Test of: Handle master failover for regions in transition HBASE-3115 HBaseClient wastes 1 TCP packet per RPC + HBASE-3076 Allow to disable automatic shipping of dependency jars + for mapreduce jobs (Bruno Dumon) NEW FEATURES HBASE-1961 HBase EC2 scripts diff --git a/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java b/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java index e8558a024a5..db07ed1e0aa 100644 --- a/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java +++ b/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java @@ -56,6 +56,26 @@ public class TableMapReduceUtil { Class mapper, Class outputKeyClass, Class outputValueClass, JobConf job) { + initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job, true); + } + + /** + * Use this before submitting a TableMap job. It will + * appropriately set up the JobConf. + * + * @param table The table name to read from. + * @param columns The columns to scan. + * @param mapper The mapper class to use. + * @param outputKeyClass The class of the output key. + * @param outputValueClass The class of the output value. + * @param job The current job configuration to adjust. + * @param addDependencyJars upload HBase jars and jars for any of the configured + * job classes via the distributed cache (tmpjars). + */ + public static void initTableMapJob(String table, String columns, + Class mapper, + Class outputKeyClass, + Class outputValueClass, JobConf job, boolean addDependencyJars) { job.setInputFormat(TableInputFormat.class); job.setMapOutputValueClass(outputValueClass); @@ -63,10 +83,12 @@ public class TableMapReduceUtil { job.setMapperClass(mapper); FileInputFormat.addInputPaths(job, table); job.set(TableInputFormat.COLUMN_LIST, columns); - try { - addDependencyJars(job); - } catch (IOException e) { - e.printStackTrace(); + if (addDependencyJars) { + try { + addDependencyJars(job); + } catch (IOException e) { + e.printStackTrace(); + } } } @@ -99,6 +121,25 @@ public class TableMapReduceUtil { public static void initTableReduceJob(String table, Class reducer, JobConf job, Class partitioner) throws IOException { + initTableReduceJob(table, reducer, job, partitioner, true); + } + + /** + * Use this before submitting a TableReduce job. It will + * appropriately set up the JobConf. + * + * @param table The output table. + * @param reducer The reducer class to use. + * @param job The current job configuration to adjust. + * @param partitioner Partitioner to use. Pass null to use + * default partitioner. + * @param addDependencyJars upload HBase jars and jars for any of the configured + * job classes via the distributed cache (tmpjars). + * @throws IOException When determining the region count fails. + */ + public static void initTableReduceJob(String table, + Class reducer, JobConf job, Class partitioner, + boolean addDependencyJars) throws IOException { job.setOutputFormat(TableOutputFormat.class); job.setReducerClass(reducer); job.set(TableOutputFormat.OUTPUT_TABLE, table); @@ -114,7 +155,9 @@ public class TableMapReduceUtil { } else if (partitioner != null) { job.setPartitionerClass(partitioner); } - addDependencyJars(job); + if (addDependencyJars) { + addDependencyJars(job); + } } /** diff --git a/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java b/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java index fd4432605cd..4ca8053e044 100644 --- a/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java +++ b/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java @@ -69,6 +69,31 @@ public class TableMapReduceUtil { Class mapper, Class outputKeyClass, Class outputValueClass, Job job) + throws IOException { + initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass, + job, true); + } + + /** + * Use this before submitting a TableMap job. It will appropriately set up + * the job. + * + * @param table The table name to read from. + * @param scan The scan instance with the columns, time range etc. + * @param mapper The mapper class to use. + * @param outputKeyClass The class of the output key. + * @param outputValueClass The class of the output value. + * @param job The current job to adjust. Make sure the passed job is + * carrying all necessary HBase configuration. + * @param addDependencyJars upload HBase jars and jars for any of the configured + * job classes via the distributed cache (tmpjars). + * @throws IOException When setting up the details fails. + */ + public static void initTableMapperJob(String table, Scan scan, + Class mapper, + Class outputKeyClass, + Class outputValueClass, Job job, + boolean addDependencyJars) throws IOException { job.setInputFormatClass(TableInputFormat.class); if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass); @@ -77,7 +102,9 @@ public class TableMapReduceUtil { job.getConfiguration().set(TableInputFormat.INPUT_TABLE, table); job.getConfiguration().set(TableInputFormat.SCAN, convertScanToString(scan)); - addDependencyJars(job); + if (addDependencyJars) { + addDependencyJars(job); + } } /** @@ -167,6 +194,38 @@ public class TableMapReduceUtil { Class reducer, Job job, Class partitioner, String quorumAddress, String serverClass, String serverImpl) throws IOException { + initTableReducerJob(table, reducer, job, partitioner, quorumAddress, + serverClass, serverImpl, true); + } + + /** + * Use this before submitting a TableReduce job. It will + * appropriately set up the JobConf. + * + * @param table The output table. + * @param reducer The reducer class to use. + * @param job The current job to adjust. Make sure the passed job is + * carrying all necessary HBase configuration. + * @param partitioner Partitioner to use. Pass null to use + * default partitioner. + * @param quorumAddress Distant cluster to write to; default is null for + * output to the cluster that is designated in hbase-site.xml. + * Set this String to the zookeeper ensemble of an alternate remote cluster + * when you would have the reduce write a cluster that is other than the + * default; e.g. copying tables between clusters, the source would be + * designated by hbase-site.xml and this param would have the + * ensemble address of the remote cluster. The format to pass is particular. + * Pass <hbase.zookeeper.quorum> ':' <ZOOKEEPER_ZNODE_PARENT>. + * @param serverClass redefined hbase.regionserver.class + * @param serverImpl redefined hbase.regionserver.impl + * @param addDependencyJars upload HBase jars and jars for any of the configured + * job classes via the distributed cache (tmpjars). + * @throws IOException When determining the region count fails. + */ + public static void initTableReducerJob(String table, + Class reducer, Job job, + Class partitioner, String quorumAddress, String serverClass, + String serverImpl, boolean addDependencyJars) throws IOException { Configuration conf = job.getConfiguration(); job.setOutputFormatClass(TableOutputFormat.class); @@ -198,7 +257,10 @@ public class TableMapReduceUtil { } else if (partitioner != null) { job.setPartitionerClass(partitioner); } - addDependencyJars(job); + + if (addDependencyJars) { + addDependencyJars(job); + } } /**