HBASE-3076 Allow to disable automatic shipping of dependency jars for mapreduce jobs

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1024317 13f79535-47bb-0310-9956-ffa450edef68
2010-10-19 16:44:06 +00:00 · 2010-10-19 16:44:06 +00:00 · b825581bc4
parent d4d7ceef68
commit b825581bc4
3 changed files with 114 additions and 7 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -1014,6 +1014,8 @@ Release 0.21.0 - Unreleased
   HBASE-3097  Merge in hbase-1200 doc on bloomfilters into hbase book
   HBASE-2700  Test of: Handle master failover for regions in transition
   HBASE-3115  HBaseClient wastes 1 TCP packet per RPC
+   HBASE-3076  Allow to disable automatic shipping of dependency jars
+               for mapreduce jobs (Bruno Dumon)

  NEW FEATURES
   HBASE-1961  HBase EC2 scripts
--- a/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
+++ b/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java
@ -56,6 +56,26 @@ public class TableMapReduceUtil {
    Class<? extends TableMap> mapper,
    Class<? extends WritableComparable> outputKeyClass,
    Class<? extends Writable> outputValueClass, JobConf job) {
+    initTableMapJob(table, columns, mapper, outputKeyClass, outputValueClass, job, true);
+  }
+
+  /**
+   * Use this before submitting a TableMap job. It will
+   * appropriately set up the JobConf.
+   *
+   * @param table  The table name to read from.
+   * @param columns  The columns to scan.
+   * @param mapper  The mapper class to use.
+   * @param outputKeyClass  The class of the output key.
+   * @param outputValueClass  The class of the output value.
+   * @param job  The current job configuration to adjust.
+   * @param addDependencyJars upload HBase jars and jars for any of the configured
+   *           job classes via the distributed cache (tmpjars).
+   */
+  public static void initTableMapJob(String table, String columns,
+    Class<? extends TableMap> mapper,
+    Class<? extends WritableComparable> outputKeyClass,
+    Class<? extends Writable> outputValueClass, JobConf job, boolean addDependencyJars) {

    job.setInputFormat(TableInputFormat.class);
    job.setMapOutputValueClass(outputValueClass);
@ -63,10 +83,12 @@ public class TableMapReduceUtil {
    job.setMapperClass(mapper);
    FileInputFormat.addInputPaths(job, table);
    job.set(TableInputFormat.COLUMN_LIST, columns);
-    try {
-      addDependencyJars(job);
-    } catch (IOException e) {
-      e.printStackTrace();
+    if (addDependencyJars) {
+      try {
+        addDependencyJars(job);
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
    }
  }

@ -99,6 +121,25 @@ public class TableMapReduceUtil {
  public static void initTableReduceJob(String table,
    Class<? extends TableReduce> reducer, JobConf job, Class partitioner)
  throws IOException {
+    initTableReduceJob(table, reducer, job, partitioner, true);
+  }
+
+  /**
+   * Use this before submitting a TableReduce job. It will
+   * appropriately set up the JobConf.
+   *
+   * @param table  The output table.
+   * @param reducer  The reducer class to use.
+   * @param job  The current job configuration to adjust.
+   * @param partitioner  Partitioner to use. Pass <code>null</code> to use
+   * default partitioner.
+   * @param addDependencyJars upload HBase jars and jars for any of the configured
+   *           job classes via the distributed cache (tmpjars).
+   * @throws IOException When determining the region count fails.
+   */
+  public static void initTableReduceJob(String table,
+    Class<? extends TableReduce> reducer, JobConf job, Class partitioner,
+    boolean addDependencyJars) throws IOException {
    job.setOutputFormat(TableOutputFormat.class);
    job.setReducerClass(reducer);
    job.set(TableOutputFormat.OUTPUT_TABLE, table);
@ -114,7 +155,9 @@ public class TableMapReduceUtil {
    } else if (partitioner != null) {
      job.setPartitionerClass(partitioner);
    }
-    addDependencyJars(job);
+    if (addDependencyJars) {
+      addDependencyJars(job);
+    }
  }

  /**
--- a/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
+++ b/src/main/java/org/apache/hadoop/hbase/mapreduce/TableMapReduceUtil.java
@ -69,6 +69,31 @@ public class TableMapReduceUtil {
      Class<? extends TableMapper> mapper,
      Class<? extends WritableComparable> outputKeyClass,
      Class<? extends Writable> outputValueClass, Job job)
+  throws IOException {
+    initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass,
+        job, true);
+  }
+
+  /**
+   * Use this before submitting a TableMap job. It will appropriately set up
+   * the job.
+   *
+   * @param table  The table name to read from.
+   * @param scan  The scan instance with the columns, time range etc.
+   * @param mapper  The mapper class to use.
+   * @param outputKeyClass  The class of the output key.
+   * @param outputValueClass  The class of the output value.
+   * @param job  The current job to adjust.  Make sure the passed job is
+   * carrying all necessary HBase configuration.
+   * @param addDependencyJars upload HBase jars and jars for any of the configured
+   *           job classes via the distributed cache (tmpjars).
+   * @throws IOException When setting up the details fails.
+   */
+  public static void initTableMapperJob(String table, Scan scan,
+      Class<? extends TableMapper> mapper,
+      Class<? extends WritableComparable> outputKeyClass,
+      Class<? extends Writable> outputValueClass, Job job,
+      boolean addDependencyJars)
  throws IOException {
    job.setInputFormatClass(TableInputFormat.class);
    if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
@ -77,7 +102,9 @@ public class TableMapReduceUtil {
    job.getConfiguration().set(TableInputFormat.INPUT_TABLE, table);
    job.getConfiguration().set(TableInputFormat.SCAN,
      convertScanToString(scan));
-    addDependencyJars(job);
+    if (addDependencyJars) {
+      addDependencyJars(job);
+    }
  }

  /**
@ -167,6 +194,38 @@ public class TableMapReduceUtil {
    Class<? extends TableReducer> reducer, Job job,
    Class partitioner, String quorumAddress, String serverClass,
    String serverImpl) throws IOException {
+    initTableReducerJob(table, reducer, job, partitioner, quorumAddress,
+        serverClass, serverImpl, true);
+  }
+
+  /**
+   * Use this before submitting a TableReduce job. It will
+   * appropriately set up the JobConf.
+   *
+   * @param table  The output table.
+   * @param reducer  The reducer class to use.
+   * @param job  The current job to adjust.  Make sure the passed job is
+   * carrying all necessary HBase configuration.
+   * @param partitioner  Partitioner to use. Pass <code>null</code> to use
+   * default partitioner.
+   * @param quorumAddress Distant cluster to write to; default is null for
+   * output to the cluster that is designated in <code>hbase-site.xml</code>.
+   * Set this String to the zookeeper ensemble of an alternate remote cluster
+   * when you would have the reduce write a cluster that is other than the
+   * default; e.g. copying tables between clusters, the source would be
+   * designated by <code>hbase-site.xml</code> and this param would have the
+   * ensemble address of the remote cluster.  The format to pass is particular.
+   * Pass <code> &lt;hbase.zookeeper.quorum> ':' &lt;ZOOKEEPER_ZNODE_PARENT></code>.
+   * @param serverClass redefined hbase.regionserver.class
+   * @param serverImpl redefined hbase.regionserver.impl
+   * @param addDependencyJars upload HBase jars and jars for any of the configured
+   *           job classes via the distributed cache (tmpjars).
+   * @throws IOException When determining the region count fails.
+   */
+  public static void initTableReducerJob(String table,
+    Class<? extends TableReducer> reducer, Job job,
+    Class partitioner, String quorumAddress, String serverClass,
+    String serverImpl, boolean addDependencyJars) throws IOException {

    Configuration conf = job.getConfiguration();
    job.setOutputFormatClass(TableOutputFormat.class);
@ -198,7 +257,10 @@ public class TableMapReduceUtil {
    } else if (partitioner != null) {
      job.setPartitionerClass(partitioner);
    }
-    addDependencyJars(job);
+
+    if (addDependencyJars) {
+      addDependencyJars(job);
+    }
  }

  /**