HBASE-8361 Bulk load and other utilities should not create tables for user (Ashish Singhi)

This commit is contained in:
stack 2014-10-12 21:52:01 -07:00
parent ab42b9ffe6
commit 8e9a8b002f
4 changed files with 56 additions and 9 deletions

View File

@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.HTable;
@ -94,6 +95,7 @@ public class ImportTsv extends Configured implements Tool {
final static String DEFAULT_ATTRIBUTES_SEPERATOR = "=>"; final static String DEFAULT_ATTRIBUTES_SEPERATOR = "=>";
final static String DEFAULT_MULTIPLE_ATTRIBUTES_SEPERATOR = ","; final static String DEFAULT_MULTIPLE_ATTRIBUTES_SEPERATOR = ",";
final static Class DEFAULT_MAPPER = TsvImporterMapper.class; final static Class DEFAULT_MAPPER = TsvImporterMapper.class;
public final static String CREATE_TABLE_CONF_KEY = "create.table";
public static class TsvParser { public static class TsvParser {
/** /**
@ -432,10 +434,16 @@ public class ImportTsv extends Configured implements Tool {
if (hfileOutPath != null) { if (hfileOutPath != null) {
if (!admin.tableExists(tableName)) { if (!admin.tableExists(tableName)) {
LOG.warn(format("Table '%s' does not exist.", tableName)); String errorMsg = format("Table '%s' does not exist.", tableName);
// TODO: this is backwards. Instead of depending on the existence of a table, if ("yes".equalsIgnoreCase(conf.get(CREATE_TABLE_CONF_KEY, "yes"))) {
// create a sane splits file for HFileOutputFormat based on data sampling. LOG.warn(errorMsg);
createTable(admin, tableName, columns); // TODO: this is backwards. Instead of depending on the existence of a table,
// create a sane splits file for HFileOutputFormat based on data sampling.
createTable(admin, tableName, columns);
} else {
LOG.error(errorMsg);
throw new TableNotFoundException(errorMsg);
}
} }
HTable table = new HTable(conf, tableName); HTable table = new HTable(conf, tableName);
job.setReducerClass(PutSortReducer.class); job.setReducerClass(PutSortReducer.class);
@ -534,6 +542,9 @@ public class ImportTsv extends Configured implements Tool {
" -D" + MAPPER_CONF_KEY + "=my.Mapper - A user-defined Mapper to use instead of " + " -D" + MAPPER_CONF_KEY + "=my.Mapper - A user-defined Mapper to use instead of " +
DEFAULT_MAPPER.getName() + "\n" + DEFAULT_MAPPER.getName() + "\n" +
" -D" + JOB_NAME_CONF_KEY + "=jobName - use the specified mapreduce job name for the import\n" + " -D" + JOB_NAME_CONF_KEY + "=jobName - use the specified mapreduce job name for the import\n" +
" -D" + CREATE_TABLE_CONF_KEY + "=no - can be used to avoid creation of table by this tool\n" +
" Note: if you set this to 'no', then the target table must already exist in HBase\n" +
"\n" +
"For performance consider the following options:\n" + "For performance consider the following options:\n" +
" -Dmapreduce.map.speculative=false\n" + " -Dmapreduce.map.speculative=false\n" +
" -Dmapreduce.reduce.speculative=false"; " -Dmapreduce.reduce.speculative=false";

View File

@ -18,6 +18,8 @@
*/ */
package org.apache.hadoop.hbase.mapreduce; package org.apache.hadoop.hbase.mapreduce;
import static java.lang.String.format;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.InterruptedIOException; import java.io.InterruptedIOException;
@ -114,6 +116,7 @@ public class LoadIncrementalHFiles extends Configured implements Tool {
public static final String MAX_FILES_PER_REGION_PER_FAMILY public static final String MAX_FILES_PER_REGION_PER_FAMILY
= "hbase.mapreduce.bulkload.max.hfiles.perRegion.perFamily"; = "hbase.mapreduce.bulkload.max.hfiles.perRegion.perFamily";
private static final String ASSIGN_SEQ_IDS = "hbase.mapreduce.bulkload.assign.sequenceNumbers"; private static final String ASSIGN_SEQ_IDS = "hbase.mapreduce.bulkload.assign.sequenceNumbers";
public final static String CREATE_TABLE_CONF_KEY = "create.table";
private int maxFilesPerRegionPerFamily; private int maxFilesPerRegionPerFamily;
private boolean assignSeqIds; private boolean assignSeqIds;
@ -148,9 +151,10 @@ public class LoadIncrementalHFiles extends Configured implements Tool {
} }
private void usage() { private void usage() {
System.err.println("usage: " + NAME + System.err.println("usage: " + NAME + " /path/to/hfileoutputformat-output tablename" + "\n -D"
" /path/to/hfileoutputformat-output " + + CREATE_TABLE_CONF_KEY + "=no - can be used to avoid creation of table by this tool\n"
"tablename"); + " Note: if you set this to 'no', then the target table must already exist in HBase\n"
+ "\n");
} }
/** /**
@ -906,7 +910,15 @@ public class LoadIncrementalHFiles extends Configured implements Tool {
TableName tableName = TableName.valueOf(args[1]); TableName tableName = TableName.valueOf(args[1]);
boolean tableExists = this.doesTableExist(tableName); boolean tableExists = this.doesTableExist(tableName);
if (!tableExists) this.createTable(tableName,dirPath); if (!tableExists) {
if ("yes".equalsIgnoreCase(getConf().get(CREATE_TABLE_CONF_KEY, "yes"))) {
this.createTable(tableName, dirPath);
} else {
String errorMsg = format("Table '%s' does not exist.", tableName);
LOG.error(errorMsg);
throw new TableNotFoundException(errorMsg);
}
}
Path hfofDir = new Path(dirPath); Path hfofDir = new Path(dirPath);
HTable table = new HTable(getConf(), tableName); HTable table = new HTable(getConf(), tableName);

View File

@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.ResultScanner;
@ -230,6 +231,19 @@ public class TestImportTsv implements Configurable {
doMROnTableTest(util, FAMILY, data, args, 4); doMROnTableTest(util, FAMILY, data, args, 4);
} }
@Test(expected = TableNotFoundException.class)
public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception {
String table = "test-" + UUID.randomUUID();
String[] args =
new String[] { table, "/inputFile" };
Configuration conf = new Configuration(util.getConfiguration());
conf.set(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,FAM:A");
conf.set(ImportTsv.BULK_OUTPUT_CONF_KEY, "/output");
conf.set(ImportTsv.CREATE_TABLE_CONF_KEY, "no");
ImportTsv.createSubmittableJob(conf, args);
}
protected static Tool doMROnTableTest(HBaseTestingUtility util, String family, protected static Tool doMROnTableTest(HBaseTestingUtility util, String family,
String data, String[] args) throws Exception { String data, String[] args) throws Exception {
return doMROnTableTest(util, family, data, args, 1); return doMROnTableTest(util, family, data, args, 1);

View File

@ -32,6 +32,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.MapReduceTests; import org.apache.hadoop.hbase.testclassification.MapReduceTests;
import org.apache.hadoop.hbase.NamespaceDescriptor; import org.apache.hadoop.hbase.NamespaceDescriptor;
@ -422,5 +423,14 @@ public class TestLoadIncrementalHFiles {
+ MAX_FILES_PER_REGION_PER_FAMILY + " hfiles")); + MAX_FILES_PER_REGION_PER_FAMILY + " hfiles"));
} }
} }
@Test(expected = TableNotFoundException.class)
public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception {
Configuration conf = util.getConfiguration();
conf.set(LoadIncrementalHFiles.CREATE_TABLE_CONF_KEY, "no");
LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
String[] args = { "directory", "nonExistingTable" };
loader.run(args);
}
} }