From f6b74b3083fc05b938845900b050eef515cc1105 Mon Sep 17 00:00:00 2001 From: Niels Basjes Date: Wed, 2 Mar 2022 13:19:13 +0100 Subject: [PATCH] HBASE-26762 Un-Deprecate and improve documentation for Scan#setRowPrefixFilter (#4119) Signed-off-by: Andrew Purtell Signed-off-by: Duo Zhang --- .../hadoop/hbase/client/ImmutableScan.java | 4 +- .../org/apache/hadoop/hbase/client/Scan.java | 47 +++++++++--- .../hadoop/hbase/quotas/QuotaTableUtil.java | 9 +-- .../hbase/client/TestImmutableScan.java | 7 +- .../apache/hadoop/hbase/client/TestScan.java | 2 +- .../hbase/filter/TestScanRowPrefix.java | 71 +++++-------------- hbase-shell/src/main/ruby/hbase/table.rb | 4 +- src/main/asciidoc/_chapters/datamodel.adoc | 2 +- 8 files changed, 68 insertions(+), 78 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ImmutableScan.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ImmutableScan.java index 196d0c800f1..01ec316c798 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ImmutableScan.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ImmutableScan.java @@ -95,9 +95,9 @@ public final class ImmutableScan extends Scan { } @Override - public Scan setRowPrefixFilter(byte[] rowPrefix) { + public Scan setStartStopRowForPrefixScan(byte[] rowPrefix) { throw new UnsupportedOperationException( - "ImmutableScan does not allow access to setRowPrefixFilter"); + "ImmutableScan does not allow access to setStartStopRowForPrefixScan"); } @Override diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java index 3ddf8ba7286..dbbc4e6d70c 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java @@ -339,6 +339,11 @@ public class Scan extends Query { *

* If the specified row does not exist, the Scanner will start from the next closest row after the * specified row. + *

+ * Note: Do NOT use this in combination with + * {@link #setRowPrefixFilter(byte[])} or {@link #setStartStopRowForPrefixScan(byte[])}. + * Doing so will make the scan result unexpected or even undefined. + *

* @param startRow row to start scanner at or after * @return this * @throws IllegalArgumentException if startRow does not meet criteria for a row key (when length @@ -354,7 +359,9 @@ public class Scan extends Query { * If the specified row does not exist, or the {@code inclusive} is {@code false}, the Scanner * will start from the next closest row after the specified row. *

- * Note: When use {@link #setRowPrefixFilter(byte[])}, the result might be unexpected. + * Note: Do NOT use this in combination with + * {@link #setRowPrefixFilter(byte[])} or {@link #setStartStopRowForPrefixScan(byte[])}. + * Doing so will make the scan result unexpected or even undefined. *

* @param startRow row to start scanner at or after * @param inclusive whether we should include the start row when scan @@ -377,8 +384,9 @@ public class Scan extends Query { *

* The scan will include rows that are lexicographically less than the provided stopRow. *

- * Note: When doing a filter for a rowKey Prefix use - * {@link #setRowPrefixFilter(byte[])}. The 'trailing 0' will not yield the desired result. + * Note: Do NOT use this in combination with + * {@link #setRowPrefixFilter(byte[])} or {@link #setStartStopRowForPrefixScan(byte[])}. + * Doing so will make the scan result unexpected or even undefined. *

* @param stopRow row to end at (exclusive) * @return this @@ -394,6 +402,11 @@ public class Scan extends Query { *

* The scan will include rows that are lexicographically less than (or equal to if * {@code inclusive} is {@code true}) the provided stopRow. + *

+ * Note: Do NOT use this in combination with + * {@link #setRowPrefixFilter(byte[])} or {@link #setStartStopRowForPrefixScan(byte[])}. + * Doing so will make the scan result unexpected or even undefined. + *

* @param stopRow row to end at * @param inclusive whether we should include the stop row when scan * @return this @@ -416,18 +429,32 @@ public class Scan extends Query { *

This is a utility method that converts the desired rowPrefix into the appropriate values * for the startRow and stopRow to achieve the desired result.

*

This can safely be used in combination with setFilter.

- *

NOTE: Doing a {@link #withStartRow(byte[])} and/or {@link #withStopRow(byte[])} - * after this method will yield undefined results.

+ *

This CANNOT be used in combination with withStartRow and/or withStopRow. + * Such a combination will yield unexpected and even undefined results.

* @param rowPrefix the prefix all rows must start with. (Set null to remove the filter.) * @return this - * @deprecated since 3.0.0. The scan result might be unexpected in some cases. - * e.g. startRow : "112" and rowPrefixFilter : "11" - * The Result of this scan might contains : "111" - * This method implements the filter by setting startRow and stopRow, - * but does not take care of the scenario where startRow has been set. + * @deprecated since 2.5.0, will be removed in 4.0.0. + * The name of this method is considered to be confusing as it does not + * use a {@link Filter} but uses setting the startRow and stopRow instead. + * Use {@link #setStartStopRowForPrefixScan(byte[])} instead. */ @Deprecated public Scan setRowPrefixFilter(byte[] rowPrefix) { + return setStartStopRowForPrefixScan(rowPrefix); + } + + /** + *

Set a filter (using stopRow and startRow) so the result set only contains rows where the + * rowKey starts with the specified prefix.

+ *

This is a utility method that converts the desired rowPrefix into the appropriate values + * for the startRow and stopRow to achieve the desired result.

+ *

This can safely be used in combination with setFilter.

+ *

This CANNOT be used in combination with withStartRow and/or withStopRow. + * Such a combination will yield unexpected and even undefined results.

+ * @param rowPrefix the prefix all rows must start with. (Set null to remove the filter.) + * @return this + */ + public Scan setStartStopRowForPrefixScan(byte[] rowPrefix) { if (rowPrefix == null) { withStartRow(HConstants.EMPTY_START_ROW); withStopRow(HConstants.EMPTY_END_ROW); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/quotas/QuotaTableUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/quotas/QuotaTableUtil.java index 624b4751b3d..94b87c4683e 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/quotas/QuotaTableUtil.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/quotas/QuotaTableUtil.java @@ -288,7 +288,7 @@ public class QuotaTableUtil { // Limit to "u:v" column s.addColumn(QUOTA_FAMILY_USAGE, QUOTA_QUALIFIER_POLICY); if (null == tn) { - s.setRowPrefixFilter(QUOTA_TABLE_ROW_KEY_PREFIX); + s.setStartStopRowForPrefixScan(QUOTA_TABLE_ROW_KEY_PREFIX); } else { byte[] row = getTableRowKey(tn); // Limit rowspace to the "t:" prefix @@ -637,7 +637,8 @@ public class QuotaTableUtil { throws IOException { Scan s = new Scan(); //Get rows for all tables in namespace - s.setRowPrefixFilter(Bytes.add(QUOTA_TABLE_ROW_KEY_PREFIX, Bytes.toBytes(namespace + TableName.NAMESPACE_DELIM))); + s.setStartStopRowForPrefixScan( + Bytes.add(QUOTA_TABLE_ROW_KEY_PREFIX, Bytes.toBytes(namespace + TableName.NAMESPACE_DELIM))); //Scan for table usage column (u:p) in quota table s.addColumn(QUOTA_FAMILY_USAGE,QUOTA_QUALIFIER_POLICY); //Scan for table quota column (q:s) if table has a space quota defined @@ -706,7 +707,7 @@ public class QuotaTableUtil { Scan s = new Scan(); if (namespace == null || namespace.isEmpty()) { // Read all namespaces, just look at the row prefix - s.setRowPrefixFilter(QUOTA_NAMESPACE_ROW_KEY_PREFIX); + s.setStartStopRowForPrefixScan(QUOTA_NAMESPACE_ROW_KEY_PREFIX); } else { // Fetch the exact row for the table byte[] rowkey = getNamespaceRowKey(namespace); @@ -727,7 +728,7 @@ public class QuotaTableUtil { Scan s = new Scan(); if (null == table) { // Read all tables, just look at the row prefix - s.setRowPrefixFilter(QUOTA_TABLE_ROW_KEY_PREFIX); + s.setStartStopRowForPrefixScan(QUOTA_TABLE_ROW_KEY_PREFIX); } else { // Fetch the exact row for the table byte[] rowkey = getTableRowKey(table); diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestImmutableScan.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestImmutableScan.java index cc8cc29b18a..7a36696d154 100644 --- a/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestImmutableScan.java +++ b/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestImmutableScan.java @@ -84,7 +84,7 @@ public class TestImmutableScan { .setReplicaId(3) .setReversed(true) .setRowOffsetPerColumnFamily(5) - .setRowPrefixFilter(Bytes.toBytes("row_")) + .setStartStopRowForPrefixScan(Bytes.toBytes("row_")) .setScanMetricsEnabled(true) .setReadType(Scan.ReadType.STREAM) .withStartRow(Bytes.toBytes("row_1")) @@ -181,10 +181,11 @@ public class TestImmutableScan { assertEquals("ImmutableScan does not allow access to withStopRow", e.getMessage()); } try { - scanCopy.setRowPrefixFilter(new byte[] { 1, 2 }); + scanCopy.setStartStopRowForPrefixScan(new byte[] { 1, 2 }); throw new RuntimeException("Should not reach here"); } catch (UnsupportedOperationException e) { - assertEquals("ImmutableScan does not allow access to setRowPrefixFilter", e.getMessage()); + assertEquals("ImmutableScan does not allow access to setStartStopRowForPrefixScan", + e.getMessage()); } try { scanCopy.readAllVersions(); diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestScan.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestScan.java index fea6333b66d..0fbf4bb0796 100644 --- a/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestScan.java +++ b/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestScan.java @@ -246,7 +246,7 @@ public class TestScan { .setReplicaId(3) .setReversed(true) .setRowOffsetPerColumnFamily(5) - .setRowPrefixFilter(Bytes.toBytes("row_")) + .setStartStopRowForPrefixScan(Bytes.toBytes("row_")) .setScanMetricsEnabled(true) .setReadType(ReadType.STREAM) .withStartRow(Bytes.toBytes("row_1")) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestScanRowPrefix.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestScanRowPrefix.java index dcf7b0082ce..da5586fb75d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestScanRowPrefix.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestScanRowPrefix.java @@ -42,7 +42,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * Test if Scan.setRowPrefixFilter works as intended. + * Test if Scan.setStartStopRowForPrefixScan works as intended. */ @Category({FilterTests.class, MediumTests.class}) public class TestScanRowPrefix extends FilterTestingCluster { @@ -64,8 +64,8 @@ public class TestScanRowPrefix extends FilterTestingCluster { Table table = openTable(tableName); /** - * Note that about half of these tests were relevant for an different implementation approach - * of setRowPrefixFilter. These test cases have been retained to ensure that also the + * Note that about half of these tests were relevant for a different implementation approach + * of setStartStopRowForPrefixScan. These test cases have been retained to ensure that also the * edge cases found there are still covered. */ @@ -119,16 +119,16 @@ public class TestScanRowPrefix extends FilterTestingCluster { // ======== // PREFIX 0 Scan scan = new Scan(); - scan.setRowPrefixFilter(prefix0); + scan.setStartStopRowForPrefixScan(prefix0); verifyScanResult(table, scan, expected0, "Scan empty prefix failed"); // ======== // PREFIX 1 scan = new Scan(); - scan.setRowPrefixFilter(prefix1); + scan.setStartStopRowForPrefixScan(prefix1); verifyScanResult(table, scan, expected1, "Scan normal prefix failed"); - scan.setRowPrefixFilter(null); + scan.setStartStopRowForPrefixScan(null); verifyScanResult(table, scan, expected0, "Scan after prefix reset failed"); scan = new Scan(); @@ -138,10 +138,10 @@ public class TestScanRowPrefix extends FilterTestingCluster { // ======== // PREFIX 2 scan = new Scan(); - scan.setRowPrefixFilter(prefix2); + scan.setStartStopRowForPrefixScan(prefix2); verifyScanResult(table, scan, expected2, "Scan edge 0xFF prefix failed"); - scan.setRowPrefixFilter(null); + scan.setStartStopRowForPrefixScan(null); verifyScanResult(table, scan, expected0, "Scan after prefix reset failed"); scan = new Scan(); @@ -151,10 +151,10 @@ public class TestScanRowPrefix extends FilterTestingCluster { // ======== // PREFIX 3 scan = new Scan(); - scan.setRowPrefixFilter(prefix3); + scan.setStartStopRowForPrefixScan(prefix3); verifyScanResult(table, scan, expected3, "Scan normal with 0x00 ends failed"); - scan.setRowPrefixFilter(null); + scan.setStartStopRowForPrefixScan(null); verifyScanResult(table, scan, expected0, "Scan after prefix reset failed"); scan = new Scan(); @@ -164,10 +164,10 @@ public class TestScanRowPrefix extends FilterTestingCluster { // ======== // PREFIX 4 scan = new Scan(); - scan.setRowPrefixFilter(prefix4); + scan.setStartStopRowForPrefixScan(prefix4); verifyScanResult(table, scan, expected4, "Scan end prefix failed"); - scan.setRowPrefixFilter(null); + scan.setStartStopRowForPrefixScan(null); verifyScanResult(table, scan, expected0, "Scan after prefix reset failed"); scan = new Scan(); @@ -178,13 +178,13 @@ public class TestScanRowPrefix extends FilterTestingCluster { // COMBINED // Prefix + Filter scan = new Scan(); - scan.setRowPrefixFilter(prefix1); + scan.setStartStopRowForPrefixScan(prefix1); verifyScanResult(table, scan, expected1, "Prefix filter failed"); scan.setFilter(new ColumnPrefixFilter(prefix2)); verifyScanResult(table, scan, expected2, "Combined Prefix + Filter failed"); - scan.setRowPrefixFilter(null); + scan.setStartStopRowForPrefixScan(null); verifyScanResult(table, scan, expected2, "Combined Prefix + Filter; removing Prefix failed"); scan.setFilter(null); @@ -196,55 +196,16 @@ public class TestScanRowPrefix extends FilterTestingCluster { scan.setFilter(new ColumnPrefixFilter(prefix2)); verifyScanResult(table, scan, expected2, "Test filter failed"); - scan.setRowPrefixFilter(prefix1); + scan.setStartStopRowForPrefixScan(prefix1); verifyScanResult(table, scan, expected2, "Combined Filter + Prefix failed"); scan.setFilter(null); verifyScanResult(table, scan, expected1, "Combined Filter + Prefix ; removing Filter failed"); - scan.setRowPrefixFilter(null); + scan.setStartStopRowForPrefixScan(null); verifyScanResult(table, scan, expected0, "Scan after prefix reset failed"); } - @Test - public void testRowPrefixFilterAndStartRow() throws IOException { - final TableName tableName = TableName.valueOf(name.getMethodName()); - createTable(tableName,"F"); - Table table = openTable(tableName); - - final byte[][] rowkeys = {Bytes.toBytes("111"), Bytes.toBytes("112")}; - final byte[] prefixFilter = Bytes.toBytes("11"); - for (byte[] rowkey: rowkeys) { - Put p = new Put(rowkey); - p.addColumn(Bytes.toBytes("F"), Bytes.toBytes("f"), Bytes.toBytes("test value")); - table.put(p); - } - - List expected0 = new ArrayList<>(); - expected0.add(rowkeys[0]); - expected0.add(rowkeys[1]); - - List expected1 = new ArrayList<>(); - expected1.add(rowkeys[1]); - - // ======== - // First scan - // Set startRow before setRowPrefixFilter - Scan scan = new Scan(); - scan.withStartRow(rowkeys[1]); - scan.setRowPrefixFilter(prefixFilter); - verifyScanResult(table, scan, expected0, "Set startRow before setRowPrefixFilter unexpected"); - - // ======== - // Second scan - // Set startRow after setRowPrefixFilter - // The result is different from first scan - scan = new Scan(); - scan.setRowPrefixFilter(prefixFilter); - scan.withStartRow(rowkeys[1]); - verifyScanResult(table, scan, expected1, "Set startRow after setRowPrefixFilter unexpected"); - } - private void verifyScanResult(Table table, Scan scan, List expectedKeys, String message) { List actualKeys = new ArrayList<>(); try { diff --git a/hbase-shell/src/main/ruby/hbase/table.rb b/hbase-shell/src/main/ruby/hbase/table.rb index 0cd917efcc1..68d7349c00a 100644 --- a/hbase-shell/src/main/ruby/hbase/table.rb +++ b/hbase-shell/src/main/ruby/hbase/table.rb @@ -208,7 +208,7 @@ EOF # create scan to get table names using prefix scan = org.apache.hadoop.hbase.client.Scan.new - scan.setRowPrefixFilter(prefix.to_java_bytes) + scan.setStartStopRowForPrefixScan(prefix.to_java_bytes) # Run the scanner to get all rowkeys scanner = @table.getScanner(scan) # Create a list to store all deletes @@ -536,7 +536,7 @@ EOF end # This will overwrite any startrow/stoprow settings - scan.setRowPrefixFilter(rowprefixfilter.to_java_bytes) if rowprefixfilter + scan.setStartStopRowForPrefixScan(rowprefixfilter.to_java_bytes) if rowprefixfilter # Clear converters from last scan. @converters.clear diff --git a/src/main/asciidoc/_chapters/datamodel.adoc b/src/main/asciidoc/_chapters/datamodel.adoc index dd54b1cc04c..c8164e4696a 100644 --- a/src/main/asciidoc/_chapters/datamodel.adoc +++ b/src/main/asciidoc/_chapters/datamodel.adoc @@ -300,7 +300,7 @@ Table table = ... // instantiate a Table instance Scan scan = new Scan(); scan.addColumn(CF, ATTR); -scan.setRowPrefixFilter(Bytes.toBytes("row")); +scan.setStartStopRowForPrefixScan(Bytes.toBytes("row")); ResultScanner rs = table.getScanner(scan); try { for (Result r = rs.next(); r != null; r = rs.next()) {