diff --git a/CHANGES.txt b/CHANGES.txt index 8866ac10092..7baad34b22a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -198,6 +198,8 @@ Release 0.21.0 - Unreleased (Jeremiah Jacquet via Stack) HBASE-1961 HBase EC2 scripts HBASE-1971 Unit test the full WAL replay cycle + HBASE-1970 Export does one version only; make it configurable how many + it does NEW FEATURES HBASE-1901 "General" partitioner for "hbase-48" bulk (behind the api, write diff --git a/src/java/org/apache/hadoop/hbase/client/Scan.java b/src/java/org/apache/hadoop/hbase/client/Scan.java index a0b67378a51..f4f9286800d 100644 --- a/src/java/org/apache/hadoop/hbase/client/Scan.java +++ b/src/java/org/apache/hadoop/hbase/client/Scan.java @@ -76,7 +76,6 @@ import org.apache.hadoop.io.WritableFactories; */ public class Scan implements Writable { private static final byte SCAN_VERSION = (byte)1; - private byte [] startRow = HConstants.EMPTY_START_ROW; private byte [] stopRow = HConstants.EMPTY_END_ROW; private int maxVersions = 1; @@ -180,10 +179,14 @@ public class Scan implements Writable { /** * Get versions of columns only within the specified timestamp range, - * [minStamp, maxStamp). + * [minStamp, maxStamp). Note, default maximum versions to return is 1. If + * your time range spans more than one version and you want all versions + * returned, up the number of versions beyond the defaut. * @param minStamp minimum timestamp value, inclusive * @param maxStamp maximum timestamp value, exclusive * @throws IOException if invalid time range + * @see {@link #setMaxVersions()} + * @see {@link #setMaxVersions(int)} */ public Scan setTimeRange(long minStamp, long maxStamp) throws IOException { @@ -192,8 +195,13 @@ public class Scan implements Writable { } /** - * Get versions of columns with the specified timestamp. - * @param timestamp version timestamp + * Get versions of columns with the specified timestamp. Note, default maximum + * versions to return is 1. If your time range spans more than one version + * and you want all versions returned, up the number of versions beyond the + * defaut. + * @param timestamp version timestamp + * @see {@link #setMaxVersions()} + * @see {@link #setMaxVersions(int)} */ public Scan setTimeStamp(long timestamp) { try { diff --git a/src/java/org/apache/hadoop/hbase/mapreduce/Export.java b/src/java/org/apache/hadoop/hbase/mapreduce/Export.java index 2998450771e..f1cbcba06ae 100644 --- a/src/java/org/apache/hadoop/hbase/mapreduce/Export.java +++ b/src/java/org/apache/hadoop/hbase/mapreduce/Export.java @@ -31,6 +31,7 @@ import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; +import org.mortbay.log.Log; /** * Export an HBase table. @@ -80,8 +81,17 @@ public class Export { Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(Exporter.class); // TODO: Allow passing filter and subset of rows/columns. - TableMapReduceUtil.initTableMapperJob(tableName, new Scan(), - Exporter.class, null, null, job); + Scan s = new Scan(); + // Optional arguments. + int versions = args.length > 2? Integer.parseInt(args[2]): 1; + s.setMaxVersions(versions); + long startTime = args.length > 3? Long.parseLong(args[3]): 0L; + long endTime = args.length > 4? Long.parseLong(args[4]): Long.MAX_VALUE; + s.setTimeRange(startTime, endTime); + Log.info("verisons=" + versions + ", starttime=" + startTime + + ", endtime=" + endTime); + TableMapReduceUtil.initTableMapperJob(tableName, s, Exporter.class, null, + null, job); // No reducers. Just write straight to output files. job.setNumReduceTasks(0); job.setOutputFormatClass(SequenceFileOutputFormat.class); @@ -98,7 +108,8 @@ public class Export { if (errorMsg != null && errorMsg.length() > 0) { System.err.println("ERROR: " + errorMsg); } - System.err.println("Usage: Export "); + System.err.println("Usage: Export [ " + + "[ []]]"); } /** @@ -115,6 +126,6 @@ public class Export { System.exit(-1); } Job job = createSubmittableJob(conf, otherArgs); - System.exit(job.waitForCompletion(true) ? 0 : 1); + System.exit(job.waitForCompletion(true)? 0 : 1); } -} \ No newline at end of file +} diff --git a/src/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java b/src/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java index 6c5543aacb2..d8c78e6d924 100644 --- a/src/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java +++ b/src/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java @@ -102,7 +102,7 @@ extends InputFormat { public void restart(byte[] firstRow) throws IOException { Scan newScan = new Scan(scan); newScan.setStartRow(firstRow); - this.scanner = this.htable.getScanner(newScan); + this.scanner = this.htable.getScanner(newScan); } /** diff --git a/src/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java b/src/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java index 621ed9f2264..d373dd5bb60 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java @@ -1079,7 +1079,6 @@ public class HLog implements HConstants, Syncable { SequenceFile.Reader in = null; int count = 0; try { - long len = fs.getFileStatus(logfiles[i].getPath()).getLen(); in = HLog.getReader(fs, logfiles[i].getPath(), conf); try { HLogKey key = newKey(conf); diff --git a/src/test/org/apache/hadoop/hbase/regionserver/TestStoreScanner.java b/src/test/org/apache/hadoop/hbase/regionserver/TestStoreScanner.java index 74491f9fcc1..f1ec15b3b2d 100644 --- a/src/test/org/apache/hadoop/hbase/regionserver/TestStoreScanner.java +++ b/src/test/org/apache/hadoop/hbase/regionserver/TestStoreScanner.java @@ -34,8 +34,8 @@ import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.util.Bytes; public class TestStoreScanner extends TestCase { - - final byte [] CF = Bytes.toBytes("cf"); + private final String CF_STR = "cf"; + final byte [] CF = Bytes.toBytes(CF_STR); /** * Test utility for building a NavigableSet for scanners. @@ -50,6 +50,60 @@ public class TestStoreScanner extends TestCase { } return cols; } + + public void testScanTimeRange() throws IOException { + String r1 = "R1"; + // returns only 1 of these 2 even though same timestamp + KeyValue [] kvs = new KeyValue[] { + KeyValueTestUtil.create(r1, CF_STR, "a", 1, KeyValue.Type.Put, "dont-care"), + KeyValueTestUtil.create(r1, CF_STR, "a", 2, KeyValue.Type.Put, "dont-care"), + KeyValueTestUtil.create(r1, CF_STR, "a", 3, KeyValue.Type.Put, "dont-care"), + KeyValueTestUtil.create(r1, CF_STR, "a", 4, KeyValue.Type.Put, "dont-care"), + KeyValueTestUtil.create(r1, CF_STR, "a", 5, KeyValue.Type.Put, "dont-care"), + }; + KeyValueScanner [] scanners = new KeyValueScanner[] { + new KeyValueScanFixture(KeyValue.COMPARATOR, kvs) + }; + Scan scanSpec = new Scan(Bytes.toBytes(r1)); + scanSpec.setTimeRange(0, 6); + scanSpec.setMaxVersions(); + StoreScanner scan = + new StoreScanner(scanSpec, CF, Long.MAX_VALUE, + KeyValue.COMPARATOR, getCols("a"), scanners); + List results = new ArrayList(); + assertEquals(true, scan.next(results)); + assertEquals(5, results.size()); + assertEquals(kvs[kvs.length - 1], results.get(0)); + // Scan limited TimeRange + scanSpec = new Scan(Bytes.toBytes(r1)); + scanSpec.setTimeRange(1, 3); + scanSpec.setMaxVersions(); + scan = new StoreScanner(scanSpec, CF, Long.MAX_VALUE, + KeyValue.COMPARATOR, getCols("a"), scanners); + results = new ArrayList(); + assertEquals(true, scan.next(results)); + assertEquals(2, results.size()); + // Another range. + scanSpec = new Scan(Bytes.toBytes(r1)); + scanSpec.setTimeRange(5, 10); + scanSpec.setMaxVersions(); + scan = new StoreScanner(scanSpec, CF, Long.MAX_VALUE, + KeyValue.COMPARATOR, getCols("a"), scanners); + results = new ArrayList(); + assertEquals(true, scan.next(results)); + assertEquals(1, results.size()); + // See how TimeRange and Versions interact. + // Another range. + scanSpec = new Scan(Bytes.toBytes(r1)); + scanSpec.setTimeRange(0, 10); + scanSpec.setMaxVersions(3); + scan = new StoreScanner(scanSpec, CF, Long.MAX_VALUE, + KeyValue.COMPARATOR, getCols("a"), scanners); + results = new ArrayList(); + assertEquals(true, scan.next(results)); + assertEquals(3, results.size()); + + } public void testScanSameTimestamp() throws IOException { // returns only 1 of these 2 even though same timestamp @@ -58,8 +112,7 @@ public class TestStoreScanner extends TestCase { KeyValueTestUtil.create("R1", "cf", "a", 1, KeyValue.Type.Put, "dont-care"), }; KeyValueScanner [] scanners = new KeyValueScanner[] { - new KeyValueScanFixture(KeyValue.COMPARATOR, - kvs) + new KeyValueScanFixture(KeyValue.COMPARATOR, kvs) }; Scan scanSpec = new Scan(Bytes.toBytes("R1"));