diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java index d52a31067f4..ed31c8422e7 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/Driver.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -34,6 +34,7 @@ import org.apache.yetus.audience.InterfaceStability; @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS) @InterfaceStability.Stable public class Driver { + private Driver() {} public static void main(String[] args) throws Throwable { ProgramDriver pgd = new ProgramDriver(); diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java index 1815412721f..7c4be83a73e 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java @@ -154,14 +154,13 @@ public class WALInputFormat extends InputFormat { WALSplit hsplit = (WALSplit)split; logFile = new Path(hsplit.getLogFileName()); conf = context.getConfiguration(); - LOG.info("Opening reader for "+split); + LOG.info("Opening {} for {}", logFile, split); openReader(logFile); this.startTime = hsplit.getStartTime(); this.endTime = hsplit.getEndTime(); } - private void openReader(Path path) throws IOException - { + private void openReader(Path path) throws IOException { closeReader(); reader = AbstractFSWALProvider.openReader(path, conf); seek(); diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java index bbaa7549fa9..5b1aac65441 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALPlayer.java @@ -58,6 +58,8 @@ import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + + /** * A tool to replay WAL files as a M/R job. * The WAL can be replayed for a set of tables or all tables, @@ -140,7 +142,22 @@ public class WALPlayer extends Configured implements Tool { } /** - * A mapper that writes out {@link Mutation} to be directly applied to a running HBase instance. + * Enum for map metrics. Keep it out here rather than inside in the Map + * inner-class so we can find associated properties. + */ + protected static enum Counter { + /** Number of aggregated writes */ + PUTS, + /** Number of aggregated deletes */ + DELETES, + CELLS_READ, + CELLS_WRITTEN, + WALEDITS + } + + /** + * A mapper that writes out {@link Mutation} to be directly applied to + * a running HBase instance. */ protected static class WALMapper extends Mapper { @@ -148,6 +165,7 @@ public class WALPlayer extends Configured implements Tool { @Override public void map(WALKey key, WALEdit value, Context context) throws IOException { + context.getCounter(Counter.WALEDITS).increment(1); try { if (tables.isEmpty() || tables.containsKey(key.getTableName())) { TableName targetTable = @@ -157,6 +175,7 @@ public class WALPlayer extends Configured implements Tool { Delete del = null; Cell lastCell = null; for (Cell cell : value.getCells()) { + context.getCounter(Counter.CELLS_READ).increment(1); // Filtering WAL meta marker entries. if (WALEdit.isMetaEditFamily(cell)) { continue; @@ -172,9 +191,11 @@ public class WALPlayer extends Configured implements Tool { // row or type changed, write out aggregate KVs. if (put != null) { context.write(tableOut, put); + context.getCounter(Counter.PUTS).increment(1); } if (del != null) { context.write(tableOut, del); + context.getCounter(Counter.DELETES).increment(1); } if (CellUtil.isDelete(cell)) { del = new Delete(CellUtil.cloneRow(cell)); @@ -187,14 +208,17 @@ public class WALPlayer extends Configured implements Tool { } else { put.add(cell); } + context.getCounter(Counter.CELLS_WRITTEN).increment(1); } lastCell = cell; } // write residual KVs if (put != null) { context.write(tableOut, put); + context.getCounter(Counter.PUTS).increment(1); } if (del != null) { + context.getCounter(Counter.DELETES).increment(1); context.write(tableOut, del); } } @@ -270,7 +294,7 @@ public class WALPlayer extends Configured implements Tool { setupTime(conf, WALInputFormat.START_TIME_KEY); setupTime(conf, WALInputFormat.END_TIME_KEY); String inputDirs = args[0]; - String[] tables = args[1].split(","); + String[] tables = args.length == 1? new String [] {}: args[1].split(","); String[] tableMap; if (args.length > 2) { tableMap = args[2].split(","); @@ -278,7 +302,7 @@ public class WALPlayer extends Configured implements Tool { throw new IOException("The same number of tables and mapping must be provided."); } } else { - // if not mapping is specified map each table to itself + // if no mapping is specified, map each table to itself tableMap = tables; } conf.setStrings(TABLES_KEY, tables); @@ -349,27 +373,27 @@ public class WALPlayer extends Configured implements Tool { if (errorMsg != null && errorMsg.length() > 0) { System.err.println("ERROR: " + errorMsg); } - System.err.println("Usage: " + NAME + " [options] []"); - System.err.println("Replay all WAL files into HBase."); - System.err.println(" is a comma separated list of tables."); - System.err.println("If no tables (\"\") are specified, all tables are imported."); - System.err.println("(Be careful, hbase:meta entries will be imported in this case.)\n"); - System.err.println("WAL entries can be mapped to new set of tables via ."); - System.err.println(" is a comma separated list of target tables."); - System.err.println("If specified, each table in must have a mapping.\n"); - System.err.println("By default " + NAME + " will load data directly into HBase."); - System.err.println("To generate HFiles for a bulk data load instead, pass the following option:"); - System.err.println(" -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output"); - System.err.println(" (Only one table can be specified, and no mapping is allowed!)"); - System.err.println("Time range options:"); - System.err.println(" -D" + WALInputFormat.START_TIME_KEY + "=[date|ms]"); - System.err.println(" -D" + WALInputFormat.END_TIME_KEY + "=[date|ms]"); - System.err.println(" (The start and the end date of timerange. The dates can be expressed"); - System.err.println(" in milliseconds since epoch or in yyyy-MM-dd'T'HH:mm:ss.SS format."); - System.err.println(" E.g. 1234567890120 or 2009-02-13T23:32:30.12)"); + System.err.println("Usage: " + NAME + " [options] [ ]"); + System.err.println(" directory of WALs to replay."); + System.err.println(" comma separated list of tables. If no tables specified,"); + System.err.println(" all are imported (even hbase:meta if present)."); + System.err.println(" WAL entries can be mapped to a new set of tables by passing"); + System.err.println(" , a comma separated list of target tables."); + System.err.println(" If specified, each table in must have a mapping."); + System.err.println("To generate HFiles to bulk load instead of loading HBase directly, pass:"); + System.err.println(" -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output"); + System.err.println(" Only one table can be specified, and no mapping allowed!"); + System.err.println("To specify a time range, pass:"); + System.err.println(" -D" + WALInputFormat.START_TIME_KEY + "=[date|ms]"); + System.err.println(" -D" + WALInputFormat.END_TIME_KEY + "=[date|ms]"); + System.err.println(" The start and the end date of timerange. The dates can be expressed"); + System.err.println(" in milliseconds since epoch or in yyyy-MM-dd'T'HH:mm:ss.SS format."); + System.err.println(" E.g. 1234567890120 or 2009-02-13T23:32:30.12"); System.err.println("Other options:"); - System.err.println(" -D" + JOB_NAME_CONF_KEY + "=jobName"); - System.err.println(" Use the specified mapreduce job name for the wal player"); + System.err.println(" -D" + JOB_NAME_CONF_KEY + "=jobName"); + System.err.println(" Use the specified mapreduce job name for the wal player"); + System.err.println(" -Dwal.input.separator=' '"); + System.err.println(" Change WAL filename separator (WAL dir names use default ','.)"); System.err.println("For performance also consider the following options:\n" + " -Dmapreduce.map.speculative=false\n" + " -Dmapreduce.reduce.speculative=false"); @@ -387,7 +411,7 @@ public class WALPlayer extends Configured implements Tool { @Override public int run(String[] args) throws Exception { - if (args.length < 2) { + if (args.length < 1) { usage("Wrong number of arguments: " + args.length); System.exit(-1); } diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java index 4880ab64e66..432aff1dd04 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestWALPlayer.java @@ -223,8 +223,8 @@ public class TestWALPlayer { } catch (SecurityException e) { assertEquals(-1, newSecurityManager.getExitCode()); assertTrue(data.toString().contains("ERROR: Wrong number of arguments:")); - assertTrue(data.toString().contains("Usage: WALPlayer [options] " + - " []")); + assertTrue(data.toString().contains("Usage: WALPlayer [options] " + + " [ ]")); assertTrue(data.toString().contains("-Dwal.bulk.output=/path/for/output")); } diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc index d1bc2cf8e0c..6ea23655d3c 100644 --- a/src/main/asciidoc/_chapters/ops_mgt.adoc +++ b/src/main/asciidoc/_chapters/ops_mgt.adoc @@ -914,7 +914,7 @@ see <<_wal_tools>>. Invoke via: ---- -$ bin/hbase org.apache.hadoop.hbase.mapreduce.WALPlayer [options] []> +$ bin/hbase org.apache.hadoop.hbase.mapreduce.WALPlayer [options] [ ]> ---- For example: @@ -932,29 +932,27 @@ To NOT run WALPlayer as a mapreduce job on your cluster, force it to run all in Running `WALPlayer` with no arguments prints brief usage information: ---- -Usage: WALPlayer [options] [] -Replay all WAL files into HBase. - is a comma separated list of tables. -If no tables ("") are specified, all tables are imported. -(Be careful, hbase:meta entries will be imported in this case.) - -WAL entries can be mapped to new set of tables via . - is a comma separated list of target tables. -If specified, each table in must have a mapping. - -By default WALPlayer will load data directly into HBase. -To generate HFiles for a bulk data load instead, pass the following option: - -Dwal.bulk.output=/path/for/output - (Only one table can be specified, and no mapping is allowed!) -Time range options: - -Dwal.start.time=[date|ms] - -Dwal.end.time=[date|ms] - (The start and the end date of timerange. The dates can be expressed - in milliseconds since epoch or in yyyy-MM-dd'T'HH:mm:ss.SS format. - E.g. 1234567890120 or 2009-02-13T23:32:30.12) +Usage: WALPlayer [options] [ ] + directory of WALs to replay. + comma separated list of tables. If no tables specified, + all are imported (even hbase:meta if present). + WAL entries can be mapped to a new set of tables by passing + , a comma separated list of target tables. + If specified, each table in must have a mapping. +To generate HFiles to bulk load instead of loading HBase directly, pass: + -Dwal.bulk.output=/path/for/output + Only one table can be specified, and no mapping allowed! +To specify a time range, pass: + -Dwal.start.time=[date|ms] + -Dwal.end.time=[date|ms] + The start and the end date of timerange. The dates can be expressed + in milliseconds since epoch or in yyyy-MM-dd'T'HH:mm:ss.SS format. + E.g. 1234567890120 or 2009-02-13T23:32:30.12 Other options: - -Dmapreduce.job.name=jobName - Use the specified mapreduce job name for the wal player + -Dmapreduce.job.name=jobName + Use the specified mapreduce job name for the wal player + -Dwal.input.separator=' ' + Change WAL filename separator (WAL dir names use default ','.) For performance also consider the following options: -Dmapreduce.map.speculative=false -Dmapreduce.reduce.speculative=false