HBASE-4657 Improve the efficiency of our MR jobs with a few configurations
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1304110 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a379a41e3d
commit
929113dfab
|
@ -70,6 +70,7 @@ public class CopyTable {
|
|||
Job job = new Job(conf, NAME + "_" + tableName);
|
||||
job.setJarByClass(CopyTable.class);
|
||||
Scan scan = new Scan();
|
||||
scan.setCacheBlocks(false);
|
||||
if (startTime != 0) {
|
||||
scan.setTimeRange(startTime,
|
||||
endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
|
||||
|
@ -115,7 +116,7 @@ public class CopyTable {
|
|||
if (errorMsg != null && errorMsg.length() > 0) {
|
||||
System.err.println("ERROR: " + errorMsg);
|
||||
}
|
||||
System.err.println("Usage: CopyTable [--rs.class=CLASS] " +
|
||||
System.err.println("Usage: CopyTable [general options] [--rs.class=CLASS] " +
|
||||
"[--rs.impl=IMPL] [--starttime=X] [--endtime=Y] " +
|
||||
"[--new.name=NEW] [--peer.adr=ADR] <tablename>");
|
||||
System.err.println();
|
||||
|
@ -144,6 +145,9 @@ public class CopyTable {
|
|||
"org.apache.hadoop.hbase.mapreduce.CopyTable --rs.class=org.apache.hadoop.hbase.ipc.ReplicationRegionInterface " +
|
||||
"--rs.impl=org.apache.hadoop.hbase.regionserver.replication.ReplicationRegionServer --starttime=1265875194289 --endtime=1265878794289 " +
|
||||
"--peer.adr=server1,server2,server3:2181:/hbase --families=myOldCf:myNewCf,cf2,cf3 TestTable ");
|
||||
System.err.println("For performance consider the following general options:\n"
|
||||
+ "-Dhbase.client.scanner.caching=100\n"
|
||||
+ "-Dmapred.map.tasks.speculative.execution=false");
|
||||
}
|
||||
|
||||
private static boolean doCommandLine(final String[] args) {
|
||||
|
|
|
@ -170,6 +170,10 @@ public class Export {
|
|||
System.err.println(" to control/limit what is exported..");
|
||||
System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>");
|
||||
System.err.println(" -D " + RAW_SCAN + "=true");
|
||||
System.err.println("For performance consider the following properties:\n"
|
||||
+ " -Dhbase.client.scanner.caching=100\n"
|
||||
+ " -Dmapred.map.tasks.speculative.execution=false\n"
|
||||
+ " -Dmapred.reduce.tasks.speculative.execution=false");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -275,6 +275,9 @@ public class Import {
|
|||
System.err.println("By default Import will load data directly into HBase. To instead generate");
|
||||
System.err.println("HFiles of data to prepare for a bulk data load, pass the option:");
|
||||
System.err.println(" -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output");
|
||||
System.err.println("For performance consider the following options:\n"
|
||||
+ " -Dmapred.map.tasks.speculative.execution=false\n"
|
||||
+ " -Dmapred.reduce.tasks.speculative.execution=false");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -270,7 +270,10 @@ public class ImportTsv {
|
|||
" -D" + SKIP_LINES_CONF_KEY + "=false - fail if encountering an invalid line\n" +
|
||||
" '-D" + SEPARATOR_CONF_KEY + "=|' - eg separate on pipes instead of tabs\n" +
|
||||
" -D" + TIMESTAMP_CONF_KEY + "=currentTimeAsLong - use the specified timestamp for the import\n" +
|
||||
" -D" + MAPPER_CONF_KEY + "=my.Mapper - A user-defined Mapper to use instead of " + DEFAULT_MAPPER.getName() + "\n";
|
||||
" -D" + MAPPER_CONF_KEY + "=my.Mapper - A user-defined Mapper to use instead of " + DEFAULT_MAPPER.getName() + "\n" +
|
||||
"For performance consider the following options:\n" +
|
||||
" -Dmapred.map.tasks.speculative.execution=false\n" +
|
||||
" -Dmapred.reduce.tasks.speculative.execution=false";
|
||||
|
||||
System.err.println(usage);
|
||||
}
|
||||
|
|
|
@ -113,6 +113,7 @@ public class RowCounter {
|
|||
Job job = new Job(conf, NAME + "_" + tableName);
|
||||
job.setJarByClass(RowCounter.class);
|
||||
Scan scan = new Scan();
|
||||
scan.setCacheBlocks(false);
|
||||
if (startKey != null && !startKey.equals("")) {
|
||||
scan.setStartRow(Bytes.toBytes(startKey));
|
||||
}
|
||||
|
@ -149,8 +150,11 @@ public class RowCounter {
|
|||
* Prints usage without error message
|
||||
*/
|
||||
private static void printUsage() {
|
||||
System.err.println("Usage: RowCounter <tablename> " +
|
||||
System.err.println("Usage: RowCounter [options] <tablename> " +
|
||||
"[--range=[startKey],[endKey]] [<column1> <column2>...]");
|
||||
System.err.println("For performance consider the following options:\n"
|
||||
+ "-Dhbase.client.scanner.caching=100\n"
|
||||
+ "-Dmapred.map.tasks.speculative.execution=false");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue