HBASE-5663 HBASE-5636 MultithreadedTableMapper doesn't work (Takuya Ueshin)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1308353 13f79535-47bb-0310-9956-ffa450edef68
2012-04-02 13:50:03 +00:00 · 2012-04-02 13:50:03 +00:00 · 80722e7290
parent 86f8282bef
commit 80722e7290
4 changed files with 77 additions and 40 deletions
--- a/src/main/java/org/apache/hadoop/hbase/mapreduce/MultithreadedTableMapper.java
+++ b/src/main/java/org/apache/hadoop/hbase/mapreduce/MultithreadedTableMapper.java
@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.mapreduce;

 import java.io.IOException;
 import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;

@ -31,11 +32,14 @@ import org.apache.hadoop.mapreduce.Counter;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.MapContext;
 import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.OutputCommitter;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.StatusReporter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.hadoop.util.ReflectionUtils;


@ -239,15 +243,17 @@ public class MultithreadedTableMapper<K2, V2> extends TableMapper<K2, V2> {
          context.getConfiguration());
      try {
        Constructor c = context.getClass().getConstructor(
+          Mapper.class,
          Configuration.class,
-          outer.getTaskAttemptID().getClass(),
-          SubMapRecordReader.class,
-          SubMapRecordWriter.class,
-          context.getOutputCommitter().getClass(),
-          SubMapStatusReporter.class,
-          outer.getInputSplit().getClass());
+          TaskAttemptID.class,
+          RecordReader.class,
+          RecordWriter.class,
+          OutputCommitter.class,
+          StatusReporter.class,
+          InputSplit.class);
        c.setAccessible(true);
        subcontext = (Context) c.newInstance(
+          mapper,
          outer.getConfiguration(), 
          outer.getTaskAttemptID(),
          new SubMapRecordReader(),
@ -256,8 +262,31 @@ public class MultithreadedTableMapper<K2, V2> extends TableMapper<K2, V2> {
          new SubMapStatusReporter(),
          outer.getInputSplit());
      } catch (Exception e) {
-        // rethrow as IOE
-        throw new IOException(e);
+        try {
+          Constructor c = Class.forName("org.apache.hadoop.mapreduce.task.MapContextImpl").getConstructor(
+            Configuration.class,
+            TaskAttemptID.class,
+            RecordReader.class,
+            RecordWriter.class,
+            OutputCommitter.class,
+            StatusReporter.class,
+            InputSplit.class);
+          c.setAccessible(true);
+          MapContext mc = (MapContext) c.newInstance(
+            outer.getConfiguration(), 
+            outer.getTaskAttemptID(),
+            new SubMapRecordReader(),
+            new SubMapRecordWriter(),
+            context.getOutputCommitter(),
+            new SubMapStatusReporter(),
+            outer.getInputSplit());
+          Class<?> wrappedMapperClass = Class.forName("org.apache.hadoop.mapreduce.lib.map.WrappedMapper");
+          Method getMapContext = wrappedMapperClass.getMethod("getMapContext", MapContext.class);
+          subcontext = (Context) getMapContext.invoke(wrappedMapperClass.newInstance(), mc);
+        } catch (Exception ee) {
+          // rethrow as IOE
+          throw new IOException(e);
+        }
      }
    }

@ -270,4 +299,4 @@ public class MultithreadedTableMapper<K2, V2> extends TableMapper<K2, V2> {
      }
    }
  }
-}
+}
--- a/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java
+++ b/src/test/java/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java
@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.mapred;

 import java.io.File;
 import java.io.IOException;
+import java.util.Iterator;
 import java.util.Map;
 import java.util.NavigableMap;

@ -28,7 +29,6 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.hbase.*;
-import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Result;
@ -42,11 +42,15 @@ import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.RunningJob;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;

+import static org.junit.Assert.fail;
+import static org.junit.Assert.assertTrue;
+
 /**
 * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
 * on our tables is simple - take every row in the table, reverse the value of
@ -58,7 +62,7 @@ public class TestTableMapReduce {
    LogFactory.getLog(TestTableMapReduce.class.getName());
  private static final HBaseTestingUtility UTIL =
    new HBaseTestingUtility();
-  static final String MULTI_REGION_TABLE_NAME = "mrtest";
+  static final byte[] MULTI_REGION_TABLE_NAME = Bytes.toBytes("mrtest");
  static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
  static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");

@ -69,12 +73,10 @@ public class TestTableMapReduce {

  @BeforeClass
  public static void beforeClass() throws Exception {
-    HTableDescriptor desc = new HTableDescriptor(MULTI_REGION_TABLE_NAME);
-    desc.addFamily(new HColumnDescriptor(INPUT_FAMILY));
-    desc.addFamily(new HColumnDescriptor(OUTPUT_FAMILY));
    UTIL.startMiniCluster();
-    HBaseAdmin admin = new HBaseAdmin(UTIL.getConfiguration());
-    admin.createTable(desc, HBaseTestingUtility.KEYS);
+    HTable table = UTIL.createTable(MULTI_REGION_TABLE_NAME, new byte[][] {INPUT_FAMILY, OUTPUT_FAMILY});
+    UTIL.createMultiRegions(table, INPUT_FAMILY);
+    UTIL.loadTable(table, INPUT_FAMILY);
    UTIL.startMiniMapReduceCluster();
  }

@ -150,7 +152,8 @@ public class TestTableMapReduce {
        IdentityTableReduce.class, jobConf);

      LOG.info("Started " + Bytes.toString(table.getTableName()));
-      JobClient.runJob(jobConf);
+      RunningJob job = JobClient.runJob(jobConf);
+      assertTrue(job.isSuccessful());
      LOG.info("After map/reduce completion");

      // verify map-reduce results
@ -184,7 +187,7 @@ public class TestTableMapReduce {
        // continue
      }
    }
-    org.junit.Assert.assertTrue(verified);
+    assertTrue(verified);
  }

  /**
@ -199,7 +202,10 @@ public class TestTableMapReduce {
    TableInputFormat.addColumns(scan, columns);
    ResultScanner scanner = table.getScanner(scan);
    try {
-      for (Result r : scanner) {
+      Iterator<Result> itr = scanner.iterator();
+      assertTrue(itr.hasNext());
+      while(itr.hasNext()) {
+        Result r = itr.next();
        if (LOG.isDebugEnabled()) {
          if (r.size() > 2 ) {
            throw new IOException("Too many results, expected 2 got " +
@ -247,7 +253,7 @@ public class TestTableMapReduce {
                r.getRow() + ", first value=" + first + ", second value=" +
                second);
          }
-          org.junit.Assert.fail();
+          fail();
        }
      }
    } finally {
--- a/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultithreadedTableMapper.java
+++ b/src/test/java/org/apache/hadoop/hbase/mapreduce/TestMultithreadedTableMapper.java
@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.mapreduce;

 import java.io.File;
 import java.io.IOException;
+import java.util.Iterator;
 import java.util.Map;
 import java.util.NavigableMap;

@ -28,7 +29,6 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.*;
-import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Result;
@ -52,23 +52,21 @@ import static org.junit.Assert.assertTrue;
 * a particular cell, and write it back to the table.
 */
@Category(LargeTests.class)
-public class TestMulitthreadedTableMapper {
-  private static final Log LOG = LogFactory.getLog(TestMulitthreadedTableMapper.class);
+public class TestMultithreadedTableMapper {
+  private static final Log LOG = LogFactory.getLog(TestMultithreadedTableMapper.class);
  private static final HBaseTestingUtility UTIL =
      new HBaseTestingUtility();
-  static final String MULTI_REGION_TABLE_NAME = "mrtest";
+  static final byte[] MULTI_REGION_TABLE_NAME = Bytes.toBytes("mrtest");
  static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
  static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
  static final int    NUMBER_OF_THREADS = 10;

  @BeforeClass
  public static void beforeClass() throws Exception {
-    HTableDescriptor desc = new HTableDescriptor(MULTI_REGION_TABLE_NAME);
-    desc.addFamily(new HColumnDescriptor(INPUT_FAMILY));
-    desc.addFamily(new HColumnDescriptor(OUTPUT_FAMILY));
    UTIL.startMiniCluster();
-    HBaseAdmin admin = new HBaseAdmin(UTIL.getConfiguration());
-    admin.createTable(desc, HBaseTestingUtility.KEYS);
+    HTable table = UTIL.createTable(MULTI_REGION_TABLE_NAME, new byte[][] {INPUT_FAMILY, OUTPUT_FAMILY});
+    UTIL.createMultiRegions(table, INPUT_FAMILY);
+    UTIL.loadTable(table, INPUT_FAMILY);
    UTIL.startMiniMapReduceCluster();
  }

@ -149,7 +147,7 @@ public class TestMulitthreadedTableMapper {
          IdentityTableReducer.class, job);
      FileOutputFormat.setOutputPath(job, new Path("test"));
      LOG.info("Started " + Bytes.toString(table.getTableName()));
-      job.waitForCompletion(true);
+      assertTrue(job.waitForCompletion(true));
      LOG.info("After map/reduce completion");
      // verify map-reduce results
      verify(Bytes.toString(table.getTableName()));
@ -203,7 +201,10 @@ public class TestMulitthreadedTableMapper {
    scan.addFamily(OUTPUT_FAMILY);
    ResultScanner scanner = table.getScanner(scan);
    try {
-      for (Result r : scanner) {
+      Iterator<Result> itr = scanner.iterator();
+      assertTrue(itr.hasNext());
+      while(itr.hasNext()) {
+        Result r = itr.next();
        if (LOG.isDebugEnabled()) {
          if (r.size() > 2 ) {
            throw new IOException("Too many results, expected 2 got " +
--- a/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java
+++ b/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableMapReduce.java
@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.mapreduce;

 import java.io.File;
 import java.io.IOException;
+import java.util.Iterator;
 import java.util.Map;
 import java.util.NavigableMap;

@ -30,7 +31,6 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.*;
-import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Result;
@ -59,18 +59,16 @@ public class TestTableMapReduce {
  private static final Log LOG = LogFactory.getLog(TestTableMapReduce.class);
  private static final HBaseTestingUtility UTIL =
    new HBaseTestingUtility();
-  static final String MULTI_REGION_TABLE_NAME = "mrtest";
+  static final byte[] MULTI_REGION_TABLE_NAME = Bytes.toBytes("mrtest");
  static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
  static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");

  @BeforeClass
  public static void beforeClass() throws Exception {
-    HTableDescriptor desc = new HTableDescriptor(MULTI_REGION_TABLE_NAME);
-    desc.addFamily(new HColumnDescriptor(INPUT_FAMILY));
-    desc.addFamily(new HColumnDescriptor(OUTPUT_FAMILY));
    UTIL.startMiniCluster();
-    HBaseAdmin admin = new HBaseAdmin(UTIL.getConfiguration());
-    admin.createTable(desc, HBaseTestingUtility.KEYS);
+    HTable table = UTIL.createTable(MULTI_REGION_TABLE_NAME, new byte[][] {INPUT_FAMILY, OUTPUT_FAMILY});
+    UTIL.createMultiRegions(table, INPUT_FAMILY);
+    UTIL.loadTable(table, INPUT_FAMILY);
    UTIL.startMiniMapReduceCluster();
  }

@ -150,7 +148,7 @@ public class TestTableMapReduce {
        IdentityTableReducer.class, job);
      FileOutputFormat.setOutputPath(job, new Path("test"));
      LOG.info("Started " + Bytes.toString(table.getTableName()));
-      job.waitForCompletion(true);
+      assertTrue(job.waitForCompletion(true));
      LOG.info("After map/reduce completion");

      // verify map-reduce results
@ -204,7 +202,10 @@ public class TestTableMapReduce {
    scan.addFamily(OUTPUT_FAMILY);
    ResultScanner scanner = table.getScanner(scan);
    try {
-      for (Result r : scanner) {
+      Iterator<Result> itr = scanner.iterator();
+      assertTrue(itr.hasNext());
+      while(itr.hasNext()) {
+        Result r = itr.next();
        if (LOG.isDebugEnabled()) {
          if (r.size() > 2 ) {
            throw new IOException("Too many results, expected 2 got " +