HBASE-5663 HBASE-5636 MultithreadedTableMapper doesn't work (Takuya Ueshin)
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1308353 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
86f8282bef
commit
80722e7290
|
@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.mapreduce;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
|
||||
|
@ -31,11 +32,14 @@ import org.apache.hadoop.mapreduce.Counter;
|
|||
import org.apache.hadoop.mapreduce.InputSplit;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.JobContext;
|
||||
import org.apache.hadoop.mapreduce.MapContext;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
import org.apache.hadoop.mapreduce.OutputCommitter;
|
||||
import org.apache.hadoop.mapreduce.RecordReader;
|
||||
import org.apache.hadoop.mapreduce.RecordWriter;
|
||||
import org.apache.hadoop.mapreduce.StatusReporter;
|
||||
import org.apache.hadoop.mapreduce.TaskAttemptContext;
|
||||
import org.apache.hadoop.mapreduce.TaskAttemptID;
|
||||
import org.apache.hadoop.util.ReflectionUtils;
|
||||
|
||||
|
||||
|
@ -239,15 +243,17 @@ public class MultithreadedTableMapper<K2, V2> extends TableMapper<K2, V2> {
|
|||
context.getConfiguration());
|
||||
try {
|
||||
Constructor c = context.getClass().getConstructor(
|
||||
Mapper.class,
|
||||
Configuration.class,
|
||||
outer.getTaskAttemptID().getClass(),
|
||||
SubMapRecordReader.class,
|
||||
SubMapRecordWriter.class,
|
||||
context.getOutputCommitter().getClass(),
|
||||
SubMapStatusReporter.class,
|
||||
outer.getInputSplit().getClass());
|
||||
TaskAttemptID.class,
|
||||
RecordReader.class,
|
||||
RecordWriter.class,
|
||||
OutputCommitter.class,
|
||||
StatusReporter.class,
|
||||
InputSplit.class);
|
||||
c.setAccessible(true);
|
||||
subcontext = (Context) c.newInstance(
|
||||
mapper,
|
||||
outer.getConfiguration(),
|
||||
outer.getTaskAttemptID(),
|
||||
new SubMapRecordReader(),
|
||||
|
@ -256,8 +262,31 @@ public class MultithreadedTableMapper<K2, V2> extends TableMapper<K2, V2> {
|
|||
new SubMapStatusReporter(),
|
||||
outer.getInputSplit());
|
||||
} catch (Exception e) {
|
||||
// rethrow as IOE
|
||||
throw new IOException(e);
|
||||
try {
|
||||
Constructor c = Class.forName("org.apache.hadoop.mapreduce.task.MapContextImpl").getConstructor(
|
||||
Configuration.class,
|
||||
TaskAttemptID.class,
|
||||
RecordReader.class,
|
||||
RecordWriter.class,
|
||||
OutputCommitter.class,
|
||||
StatusReporter.class,
|
||||
InputSplit.class);
|
||||
c.setAccessible(true);
|
||||
MapContext mc = (MapContext) c.newInstance(
|
||||
outer.getConfiguration(),
|
||||
outer.getTaskAttemptID(),
|
||||
new SubMapRecordReader(),
|
||||
new SubMapRecordWriter(),
|
||||
context.getOutputCommitter(),
|
||||
new SubMapStatusReporter(),
|
||||
outer.getInputSplit());
|
||||
Class<?> wrappedMapperClass = Class.forName("org.apache.hadoop.mapreduce.lib.map.WrappedMapper");
|
||||
Method getMapContext = wrappedMapperClass.getMethod("getMapContext", MapContext.class);
|
||||
subcontext = (Context) getMapContext.invoke(wrappedMapperClass.newInstance(), mc);
|
||||
} catch (Exception ee) {
|
||||
// rethrow as IOE
|
||||
throw new IOException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -270,4 +299,4 @@ public class MultithreadedTableMapper<K2, V2> extends TableMapper<K2, V2> {
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.mapred;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.NavigableMap;
|
||||
|
||||
|
@ -28,7 +29,6 @@ import org.apache.commons.logging.Log;
|
|||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.fs.FileUtil;
|
||||
import org.apache.hadoop.hbase.*;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
|
@ -42,11 +42,15 @@ import org.apache.hadoop.mapred.JobConf;
|
|||
import org.apache.hadoop.mapred.MapReduceBase;
|
||||
import org.apache.hadoop.mapred.OutputCollector;
|
||||
import org.apache.hadoop.mapred.Reporter;
|
||||
import org.apache.hadoop.mapred.RunningJob;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
|
||||
import static org.junit.Assert.fail;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
/**
|
||||
* Test Map/Reduce job over HBase tables. The map/reduce process we're testing
|
||||
* on our tables is simple - take every row in the table, reverse the value of
|
||||
|
@ -58,7 +62,7 @@ public class TestTableMapReduce {
|
|||
LogFactory.getLog(TestTableMapReduce.class.getName());
|
||||
private static final HBaseTestingUtility UTIL =
|
||||
new HBaseTestingUtility();
|
||||
static final String MULTI_REGION_TABLE_NAME = "mrtest";
|
||||
static final byte[] MULTI_REGION_TABLE_NAME = Bytes.toBytes("mrtest");
|
||||
static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
|
||||
static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
|
||||
|
||||
|
@ -69,12 +73,10 @@ public class TestTableMapReduce {
|
|||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
HTableDescriptor desc = new HTableDescriptor(MULTI_REGION_TABLE_NAME);
|
||||
desc.addFamily(new HColumnDescriptor(INPUT_FAMILY));
|
||||
desc.addFamily(new HColumnDescriptor(OUTPUT_FAMILY));
|
||||
UTIL.startMiniCluster();
|
||||
HBaseAdmin admin = new HBaseAdmin(UTIL.getConfiguration());
|
||||
admin.createTable(desc, HBaseTestingUtility.KEYS);
|
||||
HTable table = UTIL.createTable(MULTI_REGION_TABLE_NAME, new byte[][] {INPUT_FAMILY, OUTPUT_FAMILY});
|
||||
UTIL.createMultiRegions(table, INPUT_FAMILY);
|
||||
UTIL.loadTable(table, INPUT_FAMILY);
|
||||
UTIL.startMiniMapReduceCluster();
|
||||
}
|
||||
|
||||
|
@ -150,7 +152,8 @@ public class TestTableMapReduce {
|
|||
IdentityTableReduce.class, jobConf);
|
||||
|
||||
LOG.info("Started " + Bytes.toString(table.getTableName()));
|
||||
JobClient.runJob(jobConf);
|
||||
RunningJob job = JobClient.runJob(jobConf);
|
||||
assertTrue(job.isSuccessful());
|
||||
LOG.info("After map/reduce completion");
|
||||
|
||||
// verify map-reduce results
|
||||
|
@ -184,7 +187,7 @@ public class TestTableMapReduce {
|
|||
// continue
|
||||
}
|
||||
}
|
||||
org.junit.Assert.assertTrue(verified);
|
||||
assertTrue(verified);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -199,7 +202,10 @@ public class TestTableMapReduce {
|
|||
TableInputFormat.addColumns(scan, columns);
|
||||
ResultScanner scanner = table.getScanner(scan);
|
||||
try {
|
||||
for (Result r : scanner) {
|
||||
Iterator<Result> itr = scanner.iterator();
|
||||
assertTrue(itr.hasNext());
|
||||
while(itr.hasNext()) {
|
||||
Result r = itr.next();
|
||||
if (LOG.isDebugEnabled()) {
|
||||
if (r.size() > 2 ) {
|
||||
throw new IOException("Too many results, expected 2 got " +
|
||||
|
@ -247,7 +253,7 @@ public class TestTableMapReduce {
|
|||
r.getRow() + ", first value=" + first + ", second value=" +
|
||||
second);
|
||||
}
|
||||
org.junit.Assert.fail();
|
||||
fail();
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.mapreduce;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.NavigableMap;
|
||||
|
||||
|
@ -28,7 +29,6 @@ import org.apache.hadoop.conf.Configuration;
|
|||
import org.apache.hadoop.fs.FileUtil;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.*;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
|
@ -52,23 +52,21 @@ import static org.junit.Assert.assertTrue;
|
|||
* a particular cell, and write it back to the table.
|
||||
*/
|
||||
@Category(LargeTests.class)
|
||||
public class TestMulitthreadedTableMapper {
|
||||
private static final Log LOG = LogFactory.getLog(TestMulitthreadedTableMapper.class);
|
||||
public class TestMultithreadedTableMapper {
|
||||
private static final Log LOG = LogFactory.getLog(TestMultithreadedTableMapper.class);
|
||||
private static final HBaseTestingUtility UTIL =
|
||||
new HBaseTestingUtility();
|
||||
static final String MULTI_REGION_TABLE_NAME = "mrtest";
|
||||
static final byte[] MULTI_REGION_TABLE_NAME = Bytes.toBytes("mrtest");
|
||||
static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
|
||||
static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
|
||||
static final int NUMBER_OF_THREADS = 10;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
HTableDescriptor desc = new HTableDescriptor(MULTI_REGION_TABLE_NAME);
|
||||
desc.addFamily(new HColumnDescriptor(INPUT_FAMILY));
|
||||
desc.addFamily(new HColumnDescriptor(OUTPUT_FAMILY));
|
||||
UTIL.startMiniCluster();
|
||||
HBaseAdmin admin = new HBaseAdmin(UTIL.getConfiguration());
|
||||
admin.createTable(desc, HBaseTestingUtility.KEYS);
|
||||
HTable table = UTIL.createTable(MULTI_REGION_TABLE_NAME, new byte[][] {INPUT_FAMILY, OUTPUT_FAMILY});
|
||||
UTIL.createMultiRegions(table, INPUT_FAMILY);
|
||||
UTIL.loadTable(table, INPUT_FAMILY);
|
||||
UTIL.startMiniMapReduceCluster();
|
||||
}
|
||||
|
||||
|
@ -149,7 +147,7 @@ public class TestMulitthreadedTableMapper {
|
|||
IdentityTableReducer.class, job);
|
||||
FileOutputFormat.setOutputPath(job, new Path("test"));
|
||||
LOG.info("Started " + Bytes.toString(table.getTableName()));
|
||||
job.waitForCompletion(true);
|
||||
assertTrue(job.waitForCompletion(true));
|
||||
LOG.info("After map/reduce completion");
|
||||
// verify map-reduce results
|
||||
verify(Bytes.toString(table.getTableName()));
|
||||
|
@ -203,7 +201,10 @@ public class TestMulitthreadedTableMapper {
|
|||
scan.addFamily(OUTPUT_FAMILY);
|
||||
ResultScanner scanner = table.getScanner(scan);
|
||||
try {
|
||||
for (Result r : scanner) {
|
||||
Iterator<Result> itr = scanner.iterator();
|
||||
assertTrue(itr.hasNext());
|
||||
while(itr.hasNext()) {
|
||||
Result r = itr.next();
|
||||
if (LOG.isDebugEnabled()) {
|
||||
if (r.size() > 2 ) {
|
||||
throw new IOException("Too many results, expected 2 got " +
|
|
@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.mapreduce;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.NavigableMap;
|
||||
|
||||
|
@ -30,7 +31,6 @@ import org.apache.hadoop.conf.Configuration;
|
|||
import org.apache.hadoop.fs.FileUtil;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.*;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
|
@ -59,18 +59,16 @@ public class TestTableMapReduce {
|
|||
private static final Log LOG = LogFactory.getLog(TestTableMapReduce.class);
|
||||
private static final HBaseTestingUtility UTIL =
|
||||
new HBaseTestingUtility();
|
||||
static final String MULTI_REGION_TABLE_NAME = "mrtest";
|
||||
static final byte[] MULTI_REGION_TABLE_NAME = Bytes.toBytes("mrtest");
|
||||
static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
|
||||
static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
HTableDescriptor desc = new HTableDescriptor(MULTI_REGION_TABLE_NAME);
|
||||
desc.addFamily(new HColumnDescriptor(INPUT_FAMILY));
|
||||
desc.addFamily(new HColumnDescriptor(OUTPUT_FAMILY));
|
||||
UTIL.startMiniCluster();
|
||||
HBaseAdmin admin = new HBaseAdmin(UTIL.getConfiguration());
|
||||
admin.createTable(desc, HBaseTestingUtility.KEYS);
|
||||
HTable table = UTIL.createTable(MULTI_REGION_TABLE_NAME, new byte[][] {INPUT_FAMILY, OUTPUT_FAMILY});
|
||||
UTIL.createMultiRegions(table, INPUT_FAMILY);
|
||||
UTIL.loadTable(table, INPUT_FAMILY);
|
||||
UTIL.startMiniMapReduceCluster();
|
||||
}
|
||||
|
||||
|
@ -150,7 +148,7 @@ public class TestTableMapReduce {
|
|||
IdentityTableReducer.class, job);
|
||||
FileOutputFormat.setOutputPath(job, new Path("test"));
|
||||
LOG.info("Started " + Bytes.toString(table.getTableName()));
|
||||
job.waitForCompletion(true);
|
||||
assertTrue(job.waitForCompletion(true));
|
||||
LOG.info("After map/reduce completion");
|
||||
|
||||
// verify map-reduce results
|
||||
|
@ -204,7 +202,10 @@ public class TestTableMapReduce {
|
|||
scan.addFamily(OUTPUT_FAMILY);
|
||||
ResultScanner scanner = table.getScanner(scan);
|
||||
try {
|
||||
for (Result r : scanner) {
|
||||
Iterator<Result> itr = scanner.iterator();
|
||||
assertTrue(itr.hasNext());
|
||||
while(itr.hasNext()) {
|
||||
Result r = itr.next();
|
||||
if (LOG.isDebugEnabled()) {
|
||||
if (r.size() > 2 ) {
|
||||
throw new IOException("Too many results, expected 2 got " +
|
||||
|
|
Loading…
Reference in New Issue