From c10ee4d23be40a26070448d48e0608c7be95d4e1 Mon Sep 17 00:00:00 2001 From: Josh Elser Date: Mon, 1 Apr 2019 18:21:12 -0400 Subject: [PATCH] HBASE-22215 HBASE-22144 Correct MultiRowRangeFilter to work with reverse scans Signed-off-by: Toshihiro Suzuki --- .../hbase/filter/MultiRowRangeFilter.java | 345 ++++++++++++++---- .../hbase/filter/TestMultiRowRangeFilter.java | 136 ++++++- 2 files changed, 408 insertions(+), 73 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java index f5715b9abb5..c0026060248 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java @@ -54,14 +54,14 @@ import com.google.protobuf.InvalidProtocolBufferException; @InterfaceStability.Evolving public class MultiRowRangeFilter extends FilterBase { - private List rangeList; - private static final int ROW_BEFORE_FIRST_RANGE = -1; - private boolean EXCLUSIVE = false; + + private final List rangeList; + private final RangeIteration ranges; + private boolean done = false; - private boolean initialized = false; private int index; - private RowRange range; + private BasicRowRange range; private ReturnCode currentReturnCode; /** @@ -71,7 +71,15 @@ public class MultiRowRangeFilter extends FilterBase { * RowRange is invalid */ public MultiRowRangeFilter(List list) throws IOException { - this.rangeList = sortAndMerge(list); + // We don't use rangeList anywhere else, but keeping it lets us pay a little + // memory to avoid touching the serialization logic. + this.rangeList = Collections.unmodifiableList(sortAndMerge(list)); + this.ranges = new RangeIteration(rangeList); + } + + public List getRowRanges() { + // Used by hbase-rest + return this.rangeList; } @Override @@ -79,42 +87,45 @@ public class MultiRowRangeFilter extends FilterBase { return done; } - public List getRowRanges() { - return this.rangeList; - } - @Override public boolean filterRowKey(byte[] buffer, int offset, int length) { + // N.b. We can only do this after we're iterating over records. If we try to do + // it before, the Scan (and this filter) may not yet be fully initialized. This is a + // wart on Filter and something that'd be nice to clean up (like CP's in HBase2.0) + if (!ranges.isInitialized()) { + ranges.initialize(isReversed()); + } + // If it is the first time of running, calculate the current range index for // the row key. If index is out of bound which happens when the start row // user sets is after the largest stop row of the ranges, stop the scan. // If row key is after the current range, find the next range and update index. - if (!initialized || !range.contains(buffer, offset, length)) { + if (!ranges.hasFoundFirstRange() || !range.contains(buffer, offset, length)) { byte[] rowkey = new byte[length]; System.arraycopy(buffer, offset, rowkey, 0, length); - index = getNextRangeIndex(rowkey); - if (index >= rangeList.size()) { + index = ranges.getNextRangeIndex(rowkey); + if (ranges.isIterationComplete(index)) { done = true; currentReturnCode = ReturnCode.NEXT_ROW; return false; } if(index != ROW_BEFORE_FIRST_RANGE) { - range = rangeList.get(index); + range = ranges.get(index); } else { - range = rangeList.get(0); + range = ranges.get(0); } - if (EXCLUSIVE) { - EXCLUSIVE = false; + if (ranges.isExclusive()) { + ranges.resetExclusive(); currentReturnCode = ReturnCode.NEXT_ROW; return false; } - if (!initialized) { + if (!ranges.hasFoundFirstRange()) { if(index != ROW_BEFORE_FIRST_RANGE) { currentReturnCode = ReturnCode.INCLUDE; } else { currentReturnCode = ReturnCode.SEEK_NEXT_USING_HINT; } - initialized = true; + ranges.setFoundFirstRange(); } else { if (range.contains(buffer, offset, length)) { currentReturnCode = ReturnCode.INCLUDE; @@ -136,7 +147,9 @@ public class MultiRowRangeFilter extends FilterBase { @Override public Cell getNextCellHint(Cell currentKV) { // skip to the next range's start row - return KeyValueUtil.createFirstOnRow(range.startRow); + // #getComparisonData lets us avoid the `if (reversed)` branch + byte[] comparisonData = range.getComparisonData(); + return KeyValueUtil.createFirstOnRow(comparisonData); } /** @@ -218,37 +231,6 @@ public class MultiRowRangeFilter extends FilterBase { return true; } - /** - * calculate the position where the row key in the ranges list. - * - * @param rowKey the row key to calculate - * @return index the position of the row key - */ - private int getNextRangeIndex(byte[] rowKey) { - RowRange temp = new RowRange(rowKey, true, null, true); - int index = Collections.binarySearch(rangeList, temp); - if (index < 0) { - int insertionPosition = -index - 1; - // check if the row key in the range before the insertion position - if (insertionPosition != 0 && rangeList.get(insertionPosition - 1).contains(rowKey)) { - return insertionPosition - 1; - } - // check if the row key is before the first range - if (insertionPosition == 0 && !rangeList.get(insertionPosition).contains(rowKey)) { - return ROW_BEFORE_FIRST_RANGE; - } - if (!initialized) { - initialized = true; - } - return insertionPosition; - } - // the row key equals one of the start keys, and the the range exclude the start key - if(rangeList.get(index).startRowInclusive == false) { - EXCLUSIVE = true; - } - return index; - } - /** * sort the ranges and if the ranges with overlap, then merge them. * @@ -419,22 +401,20 @@ public class MultiRowRangeFilter extends FilterBase { throw new IllegalArgumentException(sb.toString()); } - @InterfaceAudience.Public - @InterfaceStability.Evolving - public static class RowRange implements Comparable { - private byte[] startRow; - private boolean startRowInclusive = true; - private byte[] stopRow; - private boolean stopRowInclusive = false; + private static abstract class BasicRowRange implements Comparable { + protected byte[] startRow; + protected boolean startRowInclusive = true; + protected byte[] stopRow; + protected boolean stopRowInclusive = false; - public RowRange() { + public BasicRowRange() { } /** * If the startRow is empty or null, set it to HConstants.EMPTY_BYTE_ARRAY, means begin at the * start row of the table. If the stopRow is empty or null, set it to * HConstants.EMPTY_BYTE_ARRAY, means end of the last row of table. */ - public RowRange(String startRow, boolean startRowInclusive, String stopRow, + public BasicRowRange(String startRow, boolean startRowInclusive, String stopRow, boolean stopRowInclusive) { this((startRow == null || startRow.isEmpty()) ? HConstants.EMPTY_BYTE_ARRAY : Bytes.toBytes(startRow), startRowInclusive, @@ -442,7 +422,7 @@ public class MultiRowRangeFilter extends FilterBase { Bytes.toBytes(stopRow), stopRowInclusive); } - public RowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow, + public BasicRowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow, boolean stopRowInclusive) { this.startRow = (startRow == null) ? HConstants.EMPTY_BYTE_ARRAY : startRow; this.startRowInclusive = startRowInclusive; @@ -500,13 +480,6 @@ public class MultiRowRangeFilter extends FilterBase { } } - @Override - @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_COMPARETO_USE_OBJECT_EQUALS", - justification="This compareTo is not of this Object, but of referenced RowRange") - public int compareTo(RowRange other) { - return Bytes.compareTo(this.startRow, other.startRow); - } - public boolean isValid() { return Bytes.equals(startRow, HConstants.EMPTY_BYTE_ARRAY) || Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY) @@ -516,13 +489,13 @@ public class MultiRowRangeFilter extends FilterBase { @Override public boolean equals(Object obj){ - if (!(obj instanceof RowRange)) { + if (!(obj instanceof BasicRowRange)) { return false; } if (this == obj) { return true; } - RowRange rr = (RowRange) obj; + BasicRowRange rr = (BasicRowRange) obj; return Bytes.equals(this.stopRow, rr.getStopRow()) && Bytes.equals(this.startRow, this.getStartRow()) && this.startRowInclusive == rr.isStartRowInclusive() && @@ -536,6 +509,236 @@ public class MultiRowRangeFilter extends FilterBase { this.startRowInclusive, this.stopRowInclusive); } + + /** + * Returns the data to be used to compare {@code this} to another object. + */ + public abstract byte[] getComparisonData(); + + /** + * Returns whether the bounding row used for binary-search is inclusive or not. + * + * For forward scans, we would check the starRow, but we would check the stopRow for + * the reverse scan case. + */ + public abstract boolean isSearchRowInclusive(); + + @Override + public int compareTo(BasicRowRange other) { + byte[] left; + byte[] right; + if (isAscendingOrder()) { + left = this.getComparisonData(); + right = other.getComparisonData(); + } else { + left = other.getComparisonData(); + right = this.getComparisonData(); + } + return Bytes.compareTo(left, right); + } + + public abstract boolean isAscendingOrder(); + } + + /** + * Internal RowRange that reverses the sort-order to handle reverse scans. + */ + @InterfaceAudience.Private + private static class ReversedRowRange extends BasicRowRange { + public ReversedRowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow, + boolean stopRowInclusive) { + super(startRow, startRowInclusive, stopRow, stopRowInclusive); + } + + @Override + public byte[] getComparisonData() { + return this.stopRow; + } + + @Override + public boolean isSearchRowInclusive() { + return this.stopRowInclusive; + } + + @Override + public boolean isAscendingOrder() { + return false; + } + } + + @InterfaceAudience.Public + @InterfaceStability.Evolving + public static class RowRange extends BasicRowRange { + public RowRange() { + } + /** + * If the startRow is empty or null, set it to HConstants.EMPTY_BYTE_ARRAY, means begin at the + * start row of the table. If the stopRow is empty or null, set it to + * HConstants.EMPTY_BYTE_ARRAY, means end of the last row of table. + */ + public RowRange(String startRow, boolean startRowInclusive, String stopRow, + boolean stopRowInclusive) { + super(startRow, startRowInclusive, stopRow, stopRowInclusive); + } + + public RowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow, + boolean stopRowInclusive) { + super(startRow, startRowInclusive, stopRow, stopRowInclusive); + } + + @Override + public byte[] getComparisonData() { + return startRow; + } + + @Override + public boolean isSearchRowInclusive() { + return startRowInclusive; + } + + @Override + public boolean isAscendingOrder() { + return true; + } + } + + /** + * Abstraction over the ranges of rows to return from this filter, regardless of forward or + * reverse scans being used. This Filter can use this class, agnostic of iteration direction, + * as the same algorithm can be applied in both cases. + */ + @InterfaceAudience.Private + private static class RangeIteration { + private boolean exclusive = false; + private boolean initialized = false; + private boolean foundFirstRange = false; + private boolean reversed = false; + private final List sortedAndMergedRanges; + private List ranges; + + public RangeIteration(List sortedAndMergedRanges) { + this.sortedAndMergedRanges = sortedAndMergedRanges; + } + + void initialize(boolean reversed) { + // Avoid double initialization + assert !this.initialized; + this.reversed = reversed; + if (reversed) { + // If we are doing a reverse scan, we can reverse the ranges (both the elements in + // the list as well as their start/stop key), and use the same algorithm. + this.ranges = flipAndReverseRanges(sortedAndMergedRanges); + } else { + this.ranges = sortedAndMergedRanges; + } + this.initialized = true; + } + + /** + * Rebuilds the sorted ranges (by startKey) into an equivalent sorted list of ranges, only by + * stopKey instead. Descending order and the ReversedRowRange compareTo implementation make + * sure that we can use Collections.binarySearch(). + */ + static List flipAndReverseRanges(List ranges) { + List flippedRanges = new ArrayList<>(ranges.size()); + for (int i = ranges.size() - 1; i >= 0; i--) { + RowRange origRange = ranges.get(i); + ReversedRowRange newRowRange = new ReversedRowRange( + origRange.startRow, origRange.startRowInclusive, origRange.stopRow, + origRange.isStopRowInclusive()); + flippedRanges.add(newRowRange); + } + return flippedRanges; + } + + /** + * Calculates the position where the given rowkey fits in the ranges list. + * + * @param rowKey the row key to calculate + * @return index the position of the row key + */ + public int getNextRangeIndex(byte[] rowKey) { + BasicRowRange temp; + if (reversed) { + temp = new ReversedRowRange(null, true, rowKey, true); + } else { + temp = new RowRange(rowKey, true, null, true); + } + // Because we make sure that `ranges` has the correct natural ordering (given it containing + // RowRange or ReverseRowRange objects). This keeps us from having to have two different + // implementations below. + final int index = Collections.binarySearch(ranges, temp); + if (index < 0) { + int insertionPosition = -index - 1; + // check if the row key in the range before the insertion position + if (insertionPosition != 0 && ranges.get(insertionPosition - 1).contains(rowKey)) { + return insertionPosition - 1; + } + // check if the row key is before the first range + if (insertionPosition == 0 && !ranges.get(insertionPosition).contains(rowKey)) { + return ROW_BEFORE_FIRST_RANGE; + } + if (!foundFirstRange) { + foundFirstRange = true; + } + return insertionPosition; + } + // the row key equals one of the start keys, and the the range exclude the start key + if(ranges.get(index).isSearchRowInclusive() == false) { + exclusive = true; + } + return index; + } + + /** + * Sets {@link #foundFirstRange} to {@code true}, indicating that we found a matching row range. + */ + public void setFoundFirstRange() { + this.foundFirstRange = true; + } + + /** + * Gets the RowRange at the given offset. + */ + @SuppressWarnings("unchecked") + public T get(int i) { + return (T) ranges.get(i); + } + + /** + * Returns true if the first matching row range was found. + */ + public boolean hasFoundFirstRange() { + return foundFirstRange; + } + + /** + * Returns true if the current range's key is exclusive + */ + public boolean isExclusive() { + return exclusive; + } + + /** + * Resets the exclusive flag. + */ + public void resetExclusive() { + exclusive = false; + } + + /** + * Returns true if this class has been initialized by calling {@link #initialize(boolean)}. + */ + public boolean isInitialized() { + return initialized; + } + + /** + * Returns true if we exhausted searching all row ranges. + */ + public boolean isIterationComplete(int index) { + return index >= ranges.size(); + } } @Override @@ -545,6 +748,6 @@ public class MultiRowRangeFilter extends FilterBase { @Override public int hashCode() { - return Objects.hash(this.rangeList); + return Objects.hash(this.ranges); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java index 9ba5ec3e86d..b5252713366 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java @@ -486,6 +486,134 @@ public class TestMultiRowRangeFilter { ht.close(); } + @Test + public void testReverseMultiRowRangeFilterWithinTable() throws IOException { + tableName = Bytes.toBytes("testReverseMultiRowRangeFilterWithinTable"); + HTable ht = TEST_UTIL.createTable(tableName, family); + generateRows(numRows, ht, family, qf, value); + + Scan scan = new Scan(); + scan.setReversed(true); + List ranges = Arrays.asList( + new RowRange(Bytes.toBytes(20), true, Bytes.toBytes(30), true), + new RowRange(Bytes.toBytes(50), true, Bytes.toBytes(60), true) + ); + MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges); + scan.setFilter(filter); + + List expectedResults = new ArrayList<>(); + for (int i = 60; i >= 50; i--) { + expectedResults.add(i); + } + for (int i = 30; i >= 20; i--) { + expectedResults.add(i); + } + + List results = getResults(ht, scan); + List actualResults = new ArrayList<>(); + StringBuilder sb = new StringBuilder(); + for (Cell result : results) { + int observedValue = Bytes.toInt( + result.getRowArray(), result.getRowOffset(), result.getRowLength()); + actualResults.add(observedValue); + if (sb.length() > 0) { + sb.append(", "); + } + sb.append(observedValue); + } + assertEquals("Saw results: " + sb.toString(), 22, results.size()); + } + + @Test + public void testReverseMultiRowRangeFilterIncludingMaxRow() throws IOException { + tableName = Bytes.toBytes("testReverseMultiRowRangeFilterIncludingMaxRow"); + HTable ht = TEST_UTIL.createTable(tableName, family); + for (String rowkey : Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h")) { + byte[] row = Bytes.toBytes(rowkey); + Put p = new Put(row); + p.addColumn(family, qf, value); + ht.put(p); + } + TEST_UTIL.flush(); + + Scan scan = new Scan(); + scan.setReversed(true); + List ranges = Arrays.asList( + new RowRange(Bytes.toBytes("b"), true, Bytes.toBytes("c"), true), + new RowRange(Bytes.toBytes("f"), true, Bytes.toBytes("h"), true) + ); + MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges); + scan.setFilter(filter); + + List expected = Arrays.asList("h", "g", "f", "c", "b"); + List actual = new ArrayList<>(); + for (Cell cell : getResults(ht, scan)) { + actual.add(Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())); + } + + assertEquals(expected, actual); + } + + @Test + public void testReverseMultiRowRangeFilterIncludingMinRow() throws IOException { + tableName = Bytes.toBytes("testReverseMultiRowRangeFilterIncludingMinRow"); + HTable ht = TEST_UTIL.createTable(tableName, family); + for (String rowkey : Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h")) { + byte[] row = Bytes.toBytes(rowkey); + Put p = new Put(row); + p.addColumn(family, qf, value); + ht.put(p); + } + TEST_UTIL.flush(); + + Scan scan = new Scan(); + scan.setReversed(true); + List ranges = Arrays.asList( + new RowRange(Bytes.toBytes("a"), true, Bytes.toBytes("c"), true), + new RowRange(Bytes.toBytes("f"), true, Bytes.toBytes("g"), true) + ); + MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges); + scan.setFilter(filter); + + List expected = Arrays.asList("g", "f", "c", "b", "a"); + List actual = new ArrayList<>(); + for (Cell cell : getResults(ht, scan)) { + actual.add(Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())); + } + + assertEquals(expected, actual); + } + + @Test + public void testReverseMultiRowRangeFilterIncludingMinAndMaxRow() throws IOException { + tableName = Bytes.toBytes("testReverseMultiRowRangeFilterIncludingMinAndMaxRow"); + HTable ht = TEST_UTIL.createTable(tableName, family); + for (String rowkey : Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h")) { + byte[] row = Bytes.toBytes(rowkey); + Put p = new Put(row); + p.addColumn(family, qf, value); + ht.put(p); + } + TEST_UTIL.flush(); + + Scan scan = new Scan(); + scan.setReversed(true); + List ranges = Arrays.asList( + new RowRange(Bytes.toBytes("a"), true, Bytes.toBytes("c"), true), + new RowRange(Bytes.toBytes("f"), true, Bytes.toBytes("h"), true) + ); + MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges); + scan.setFilter(filter); + + List expected = Arrays.asList("h", "g", "f", "c", "b", "a"); + List actual = new ArrayList<>(); + for (Cell cell : getResults(ht, scan)) { + actual.add(Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())); + } + + assertEquals(expected, actual); + } + private void generateRows(int numberOfRows, HTable ht, byte[] family, byte[] qf, byte[] value) throws IOException { for (int i = 0; i < numberOfRows; i++) { @@ -518,7 +646,7 @@ public class TestMultiRowRangeFilter { return kvList; } - private int getResultsSize(HTable ht, Scan scan) throws IOException { + private List getResults(HTable ht, Scan scan) throws IOException { ResultScanner scanner = ht.getScanner(scan); List results = new ArrayList(); Result r; @@ -528,6 +656,10 @@ public class TestMultiRowRangeFilter { } } scanner.close(); - return results.size(); + return results; + } + + private int getResultsSize(HTable ht, Scan scan) throws IOException { + return getResults(ht, scan).size(); } }