HBASE-5440 Allow Import to optionally use HFileOutputFormat
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1293101 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e531e2595c
commit
a54ec1b7cc
|
@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||||
import org.apache.hadoop.hbase.KeyValue;
|
import org.apache.hadoop.hbase.KeyValue;
|
||||||
import org.apache.hadoop.hbase.client.Delete;
|
import org.apache.hadoop.hbase.client.Delete;
|
||||||
|
import org.apache.hadoop.hbase.client.HTable;
|
||||||
import org.apache.hadoop.hbase.client.Mutation;
|
import org.apache.hadoop.hbase.client.Mutation;
|
||||||
import org.apache.hadoop.hbase.client.Put;
|
import org.apache.hadoop.hbase.client.Put;
|
||||||
import org.apache.hadoop.hbase.client.Result;
|
import org.apache.hadoop.hbase.client.Result;
|
||||||
|
@ -36,6 +37,7 @@ import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.mapreduce.Job;
|
import org.apache.hadoop.mapreduce.Job;
|
||||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||||
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
|
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||||
import org.apache.hadoop.util.GenericOptionsParser;
|
import org.apache.hadoop.util.GenericOptionsParser;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -43,7 +45,42 @@ import org.apache.hadoop.util.GenericOptionsParser;
|
||||||
*/
|
*/
|
||||||
public class Import {
|
public class Import {
|
||||||
final static String NAME = "import";
|
final static String NAME = "import";
|
||||||
public static final String CF_RENAME_PROP = "HBASE_IMPORTER_RENAME_CFS";
|
final static String CF_RENAME_PROP = "HBASE_IMPORTER_RENAME_CFS";
|
||||||
|
final static String BULK_OUTPUT_CONF_KEY = "import.bulk.output";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A mapper that just writes out KeyValues.
|
||||||
|
*/
|
||||||
|
static class KeyValueImporter
|
||||||
|
extends TableMapper<ImmutableBytesWritable, KeyValue> {
|
||||||
|
private Map<byte[], byte[]> cfRenameMap;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param row The current table row key.
|
||||||
|
* @param value The columns.
|
||||||
|
* @param context The current context.
|
||||||
|
* @throws IOException When something is broken with the data.
|
||||||
|
* @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN,
|
||||||
|
* org.apache.hadoop.mapreduce.Mapper.Context)
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void map(ImmutableBytesWritable row, Result value,
|
||||||
|
Context context)
|
||||||
|
throws IOException {
|
||||||
|
try {
|
||||||
|
for (KeyValue kv : value.raw()) {
|
||||||
|
context.write(row, convertKv(kv, cfRenameMap));
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setup(Context context) {
|
||||||
|
cfRenameMap = createCfRenameMap(context.getConfiguration());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Write table content out to files in hdfs.
|
* Write table content out to files in hdfs.
|
||||||
|
@ -76,26 +113,7 @@ public class Import {
|
||||||
Put put = null;
|
Put put = null;
|
||||||
Delete delete = null;
|
Delete delete = null;
|
||||||
for (KeyValue kv : result.raw()) {
|
for (KeyValue kv : result.raw()) {
|
||||||
if(cfRenameMap != null) {
|
kv = convertKv(kv, cfRenameMap);
|
||||||
// If there's a rename mapping for this CF, create a new KeyValue
|
|
||||||
byte[] newCfName = cfRenameMap.get(kv.getFamily());
|
|
||||||
if(newCfName != null) {
|
|
||||||
kv = new KeyValue(kv.getBuffer(), // row buffer
|
|
||||||
kv.getRowOffset(), // row offset
|
|
||||||
kv.getRowLength(), // row length
|
|
||||||
newCfName, // CF buffer
|
|
||||||
0, // CF offset
|
|
||||||
newCfName.length, // CF length
|
|
||||||
kv.getBuffer(), // qualifier buffer
|
|
||||||
kv.getQualifierOffset(), // qualifier offset
|
|
||||||
kv.getQualifierLength(), // qualifier length
|
|
||||||
kv.getTimestamp(), // timestamp
|
|
||||||
KeyValue.Type.codeToType(kv.getType()), // KV Type
|
|
||||||
kv.getBuffer(), // value buffer
|
|
||||||
kv.getValueOffset(), // value offset
|
|
||||||
kv.getValueLength()); // value length
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Deletes and Puts are gathered and written when finished
|
// Deletes and Puts are gathered and written when finished
|
||||||
if (kv.isDelete()) {
|
if (kv.isDelete()) {
|
||||||
if (delete == null) {
|
if (delete == null) {
|
||||||
|
@ -119,26 +137,56 @@ public class Import {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setup(Context context) {
|
public void setup(Context context) {
|
||||||
// Make a map from sourceCfName to destCfName by parsing a config key
|
cfRenameMap = createCfRenameMap(context.getConfiguration());
|
||||||
cfRenameMap = null;
|
|
||||||
String allMappingsPropVal = context.getConfiguration().get(CF_RENAME_PROP);
|
|
||||||
if(allMappingsPropVal != null) {
|
|
||||||
// The conf value format should be sourceCf1:destCf1,sourceCf2:destCf2,...
|
|
||||||
String[] allMappings = allMappingsPropVal.split(",");
|
|
||||||
for (String mapping: allMappings) {
|
|
||||||
if(cfRenameMap == null) {
|
|
||||||
cfRenameMap = new TreeMap<byte[],byte[]>(Bytes.BYTES_COMPARATOR);
|
|
||||||
}
|
|
||||||
String [] srcAndDest = mapping.split(":");
|
|
||||||
if(srcAndDest.length != 2) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
cfRenameMap.put(srcAndDest[0].getBytes(), srcAndDest[1].getBytes());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// helper: create a new KeyValue based on CF rename map
|
||||||
|
private static KeyValue convertKv(KeyValue kv, Map<byte[], byte[]> cfRenameMap) {
|
||||||
|
if(cfRenameMap != null) {
|
||||||
|
// If there's a rename mapping for this CF, create a new KeyValue
|
||||||
|
byte[] newCfName = cfRenameMap.get(kv.getFamily());
|
||||||
|
if(newCfName != null) {
|
||||||
|
kv = new KeyValue(kv.getBuffer(), // row buffer
|
||||||
|
kv.getRowOffset(), // row offset
|
||||||
|
kv.getRowLength(), // row length
|
||||||
|
newCfName, // CF buffer
|
||||||
|
0, // CF offset
|
||||||
|
newCfName.length, // CF length
|
||||||
|
kv.getBuffer(), // qualifier buffer
|
||||||
|
kv.getQualifierOffset(), // qualifier offset
|
||||||
|
kv.getQualifierLength(), // qualifier length
|
||||||
|
kv.getTimestamp(), // timestamp
|
||||||
|
KeyValue.Type.codeToType(kv.getType()), // KV Type
|
||||||
|
kv.getBuffer(), // value buffer
|
||||||
|
kv.getValueOffset(), // value offset
|
||||||
|
kv.getValueLength()); // value length
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return kv;
|
||||||
|
}
|
||||||
|
|
||||||
|
// helper: make a map from sourceCfName to destCfName by parsing a config key
|
||||||
|
private static Map<byte[], byte[]> createCfRenameMap(Configuration conf) {
|
||||||
|
Map<byte[], byte[]> cfRenameMap = null;
|
||||||
|
String allMappingsPropVal = conf.get(CF_RENAME_PROP);
|
||||||
|
if(allMappingsPropVal != null) {
|
||||||
|
// The conf value format should be sourceCf1:destCf1,sourceCf2:destCf2,...
|
||||||
|
String[] allMappings = allMappingsPropVal.split(",");
|
||||||
|
for (String mapping: allMappings) {
|
||||||
|
if(cfRenameMap == null) {
|
||||||
|
cfRenameMap = new TreeMap<byte[],byte[]>(Bytes.BYTES_COMPARATOR);
|
||||||
|
}
|
||||||
|
String [] srcAndDest = mapping.split(":");
|
||||||
|
if(srcAndDest.length != 2) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
cfRenameMap.put(srcAndDest[0].getBytes(), srcAndDest[1].getBytes());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return cfRenameMap;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Sets a configuration property with key {@link #CF_RENAME_PROP} in conf that tells
|
* <p>Sets a configuration property with key {@link #CF_RENAME_PROP} in conf that tells
|
||||||
* the mapper how to rename column families.
|
* the mapper how to rename column families.
|
||||||
|
@ -190,11 +238,25 @@ public class Import {
|
||||||
job.setJarByClass(Importer.class);
|
job.setJarByClass(Importer.class);
|
||||||
FileInputFormat.setInputPaths(job, inputDir);
|
FileInputFormat.setInputPaths(job, inputDir);
|
||||||
job.setInputFormatClass(SequenceFileInputFormat.class);
|
job.setInputFormatClass(SequenceFileInputFormat.class);
|
||||||
job.setMapperClass(Importer.class);
|
String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
|
||||||
// No reducers. Just write straight to table. Call initTableReducerJob
|
if (hfileOutPath != null) {
|
||||||
// because it sets up the TableOutputFormat.
|
job.setMapperClass(KeyValueImporter.class);
|
||||||
TableMapReduceUtil.initTableReducerJob(tableName, null, job);
|
HTable table = new HTable(conf, tableName);
|
||||||
job.setNumReduceTasks(0);
|
job.setReducerClass(KeyValueSortReducer.class);
|
||||||
|
Path outputDir = new Path(hfileOutPath);
|
||||||
|
FileOutputFormat.setOutputPath(job, outputDir);
|
||||||
|
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
|
||||||
|
job.setMapOutputValueClass(KeyValue.class);
|
||||||
|
HFileOutputFormat.configureIncrementalLoad(job, table);
|
||||||
|
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
|
||||||
|
com.google.common.base.Preconditions.class);
|
||||||
|
} else {
|
||||||
|
// No reducers. Just write straight to table. Call initTableReducerJob
|
||||||
|
// because it sets up the TableOutputFormat.
|
||||||
|
job.setMapperClass(Importer.class);
|
||||||
|
TableMapReduceUtil.initTableReducerJob(tableName, null, job);
|
||||||
|
job.setNumReduceTasks(0);
|
||||||
|
}
|
||||||
return job;
|
return job;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -205,7 +267,10 @@ public class Import {
|
||||||
if (errorMsg != null && errorMsg.length() > 0) {
|
if (errorMsg != null && errorMsg.length() > 0) {
|
||||||
System.err.println("ERROR: " + errorMsg);
|
System.err.println("ERROR: " + errorMsg);
|
||||||
}
|
}
|
||||||
System.err.println("Usage: Import <tablename> <inputdir>");
|
System.err.println("Usage: Import [options] <tablename> <inputdir>");
|
||||||
|
System.err.println("By default Import will load data directly into HBase. To instead generate");
|
||||||
|
System.err.println("HFiles of data to prepare for a bulk data load, pass the option:");
|
||||||
|
System.err.println(" -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue