diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java index 7692be0d761..32e22d8a6c6 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java @@ -51,21 +51,29 @@ import org.apache.hadoop.hbase.util.Bytes; @InterfaceAudience.Public public class MultiRowRangeFilter extends FilterBase { - private List rangeList; - private static final int ROW_BEFORE_FIRST_RANGE = -1; - private boolean EXCLUSIVE = false; + + private final List rangeList; + private final RangeIteration ranges; + private boolean done = false; - private boolean initialized = false; private int index; - private RowRange range; + private BasicRowRange range; private ReturnCode currentReturnCode; /** * @param list A list of RowRange */ public MultiRowRangeFilter(List list) { - this.rangeList = sortAndMerge(list); + // We don't use rangeList anywhere else, but keeping it lets us pay a little + // memory to avoid touching the serialization logic. + this.rangeList = Collections.unmodifiableList(sortAndMerge(list)); + this.ranges = new RangeIteration(rangeList); + } + + public List getRowRanges() { + // Used by hbase-rest + return this.rangeList; } @Override @@ -73,13 +81,17 @@ public class MultiRowRangeFilter extends FilterBase { return done; } - public List getRowRanges() { - return this.rangeList; - } - @Override public boolean filterRowKey(Cell firstRowCell) { if (filterAllRemaining()) return true; + + // N.b. We can only do this after we're iterating over records. If we try to do + // it before, the Scan (and this filter) may not yet be fully initialized. This is a + // wart on Filter and something that'd be nice to clean up (like CP's in HBase2.0) + if (!ranges.isInitialized()) { + ranges.initialize(isReversed()); + } + // If it is the first time of running, calculate the current range index for // the row key. If index is out of bound which happens when the start row // user sets is after the largest stop row of the ranges, stop the scan. @@ -87,32 +99,31 @@ public class MultiRowRangeFilter extends FilterBase { byte[] rowArr = firstRowCell.getRowArray(); int length = firstRowCell.getRowLength(); int offset = firstRowCell.getRowOffset(); - if (!initialized - || !range.contains(rowArr, offset, length)) { + if (!ranges.hasFoundFirstRange() || !range.contains(rowArr, offset, length)) { byte[] rowkey = CellUtil.cloneRow(firstRowCell); - index = getNextRangeIndex(rowkey); - if (index >= rangeList.size()) { + index = ranges.getNextRangeIndex(rowkey); + if (ranges.isIterationComplete(index)) { done = true; currentReturnCode = ReturnCode.NEXT_ROW; return false; } if(index != ROW_BEFORE_FIRST_RANGE) { - range = rangeList.get(index); + range = ranges.get(index); } else { - range = rangeList.get(0); + range = ranges.get(0); } - if (EXCLUSIVE) { - EXCLUSIVE = false; + if (ranges.isExclusive()) { + ranges.resetExclusive(); currentReturnCode = ReturnCode.NEXT_ROW; return false; } - if (!initialized) { + if (!ranges.hasFoundFirstRange()) { if(index != ROW_BEFORE_FIRST_RANGE) { currentReturnCode = ReturnCode.INCLUDE; } else { currentReturnCode = ReturnCode.SEEK_NEXT_USING_HINT; } - initialized = true; + ranges.setFoundFirstRange(); } else { if (range.contains(rowArr, offset, length)) { currentReturnCode = ReturnCode.INCLUDE; @@ -140,8 +151,9 @@ public class MultiRowRangeFilter extends FilterBase { @Override public Cell getNextCellHint(Cell currentKV) { // skip to the next range's start row - return PrivateCellUtil.createFirstOnRow(range.startRow, 0, - (short) range.startRow.length); + // #getComparisonData lets us avoid the `if (reversed)` branch + byte[] comparisonData = range.getComparisonData(); + return PrivateCellUtil.createFirstOnRow(comparisonData, 0, (short) comparisonData.length); } /** @@ -219,37 +231,6 @@ public class MultiRowRangeFilter extends FilterBase { return true; } - /** - * calculate the position where the row key in the ranges list. - * - * @param rowKey the row key to calculate - * @return index the position of the row key - */ - private int getNextRangeIndex(byte[] rowKey) { - RowRange temp = new RowRange(rowKey, true, null, true); - int index = Collections.binarySearch(rangeList, temp); - if (index < 0) { - int insertionPosition = -index - 1; - // check if the row key in the range before the insertion position - if (insertionPosition != 0 && rangeList.get(insertionPosition - 1).contains(rowKey)) { - return insertionPosition - 1; - } - // check if the row key is before the first range - if (insertionPosition == 0 && !rangeList.get(insertionPosition).contains(rowKey)) { - return ROW_BEFORE_FIRST_RANGE; - } - if (!initialized) { - initialized = true; - } - return insertionPosition; - } - // the row key equals one of the start keys, and the the range exclude the start key - if(rangeList.get(index).startRowInclusive == false) { - EXCLUSIVE = true; - } - return index; - } - /** * sort the ranges and if the ranges with overlap, then merge them. * @@ -420,21 +401,20 @@ public class MultiRowRangeFilter extends FilterBase { throw new IllegalArgumentException(sb.toString()); } - @InterfaceAudience.Public - public static class RowRange implements Comparable { - private byte[] startRow; - private boolean startRowInclusive = true; - private byte[] stopRow; - private boolean stopRowInclusive = false; + private static abstract class BasicRowRange implements Comparable { + protected byte[] startRow; + protected boolean startRowInclusive = true; + protected byte[] stopRow; + protected boolean stopRowInclusive = false; - public RowRange() { + public BasicRowRange() { } /** * If the startRow is empty or null, set it to HConstants.EMPTY_BYTE_ARRAY, means begin at the * start row of the table. If the stopRow is empty or null, set it to * HConstants.EMPTY_BYTE_ARRAY, means end of the last row of table. */ - public RowRange(String startRow, boolean startRowInclusive, String stopRow, + public BasicRowRange(String startRow, boolean startRowInclusive, String stopRow, boolean stopRowInclusive) { this((startRow == null || startRow.isEmpty()) ? HConstants.EMPTY_BYTE_ARRAY : Bytes.toBytes(startRow), startRowInclusive, @@ -442,7 +422,7 @@ public class MultiRowRangeFilter extends FilterBase { Bytes.toBytes(stopRow), stopRowInclusive); } - public RowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow, + public BasicRowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow, boolean stopRowInclusive) { this.startRow = (startRow == null) ? HConstants.EMPTY_BYTE_ARRAY : startRow; this.startRowInclusive = startRowInclusive; @@ -500,13 +480,6 @@ public class MultiRowRangeFilter extends FilterBase { } } - @Override - @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_COMPARETO_USE_OBJECT_EQUALS", - justification="This compareTo is not of this Object, but of referenced RowRange") - public int compareTo(RowRange other) { - return Bytes.compareTo(this.startRow, other.startRow); - } - public boolean isValid() { return Bytes.equals(startRow, HConstants.EMPTY_BYTE_ARRAY) || Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY) @@ -516,13 +489,13 @@ public class MultiRowRangeFilter extends FilterBase { @Override public boolean equals(Object obj){ - if (!(obj instanceof RowRange)) { + if (!(obj instanceof BasicRowRange)) { return false; } if (this == obj) { return true; } - RowRange rr = (RowRange) obj; + BasicRowRange rr = (BasicRowRange) obj; return Bytes.equals(this.stopRow, rr.getStopRow()) && Bytes.equals(this.startRow, this.getStartRow()) && this.startRowInclusive == rr.isStartRowInclusive() && @@ -536,6 +509,235 @@ public class MultiRowRangeFilter extends FilterBase { this.startRowInclusive, this.stopRowInclusive); } + + /** + * Returns the data to be used to compare {@code this} to another object. + */ + public abstract byte[] getComparisonData(); + + /** + * Returns whether the bounding row used for binary-search is inclusive or not. + * + * For forward scans, we would check the starRow, but we would check the stopRow for + * the reverse scan case. + */ + public abstract boolean isSearchRowInclusive(); + + @Override + public int compareTo(BasicRowRange other) { + byte[] left; + byte[] right; + if (isAscendingOrder()) { + left = this.getComparisonData(); + right = other.getComparisonData(); + } else { + left = other.getComparisonData(); + right = this.getComparisonData(); + } + return Bytes.compareTo(left, right); + } + + public abstract boolean isAscendingOrder(); + } + + /** + * Internal RowRange that reverses the sort-order to handle reverse scans. + */ + @InterfaceAudience.Private + private static class ReversedRowRange extends BasicRowRange { + public ReversedRowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow, + boolean stopRowInclusive) { + super(startRow, startRowInclusive, stopRow, stopRowInclusive); + } + + @Override + public byte[] getComparisonData() { + return this.stopRow; + } + + @Override + public boolean isSearchRowInclusive() { + return this.stopRowInclusive; + } + + @Override + public boolean isAscendingOrder() { + return false; + } + } + + @InterfaceAudience.Public + public static class RowRange extends BasicRowRange { + public RowRange() { + } + /** + * If the startRow is empty or null, set it to HConstants.EMPTY_BYTE_ARRAY, means begin at the + * start row of the table. If the stopRow is empty or null, set it to + * HConstants.EMPTY_BYTE_ARRAY, means end of the last row of table. + */ + public RowRange(String startRow, boolean startRowInclusive, String stopRow, + boolean stopRowInclusive) { + super(startRow, startRowInclusive, stopRow, stopRowInclusive); + } + + public RowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow, + boolean stopRowInclusive) { + super(startRow, startRowInclusive, stopRow, stopRowInclusive); + } + + @Override + public byte[] getComparisonData() { + return startRow; + } + + @Override + public boolean isSearchRowInclusive() { + return startRowInclusive; + } + + @Override + public boolean isAscendingOrder() { + return true; + } + } + + /** + * Abstraction over the ranges of rows to return from this filter, regardless of forward or + * reverse scans being used. This Filter can use this class, agnostic of iteration direction, + * as the same algorithm can be applied in both cases. + */ + @InterfaceAudience.Private + private static class RangeIteration { + private boolean exclusive = false; + private boolean initialized = false; + private boolean foundFirstRange = false; + private boolean reversed = false; + private final List sortedAndMergedRanges; + private List ranges; + + public RangeIteration(List sortedAndMergedRanges) { + this.sortedAndMergedRanges = sortedAndMergedRanges; + } + + void initialize(boolean reversed) { + // Avoid double initialization + assert !this.initialized; + this.reversed = reversed; + if (reversed) { + // If we are doing a reverse scan, we can reverse the ranges (both the elements in + // the list as well as their start/stop key), and use the same algorithm. + this.ranges = flipAndReverseRanges(sortedAndMergedRanges); + } else { + this.ranges = sortedAndMergedRanges; + } + this.initialized = true; + } + + /** + * Rebuilds the sorted ranges (by startKey) into an equivalent sorted list of ranges, only by + * stopKey instead. Descending order and the ReversedRowRange compareTo implementation make + * sure that we can use Collections.binarySearch(). + */ + static List flipAndReverseRanges(List ranges) { + List flippedRanges = new ArrayList<>(ranges.size()); + for (int i = ranges.size() - 1; i >= 0; i--) { + RowRange origRange = ranges.get(i); + ReversedRowRange newRowRange = new ReversedRowRange( + origRange.startRow, origRange.startRowInclusive, origRange.stopRow, + origRange.isStopRowInclusive()); + flippedRanges.add(newRowRange); + } + return flippedRanges; + } + + /** + * Calculates the position where the given rowkey fits in the ranges list. + * + * @param rowKey the row key to calculate + * @return index the position of the row key + */ + public int getNextRangeIndex(byte[] rowKey) { + BasicRowRange temp; + if (reversed) { + temp = new ReversedRowRange(null, true, rowKey, true); + } else { + temp = new RowRange(rowKey, true, null, true); + } + // Because we make sure that `ranges` has the correct natural ordering (given it containing + // RowRange or ReverseRowRange objects). This keeps us from having to have two different + // implementations below. + final int index = Collections.binarySearch(ranges, temp); + if (index < 0) { + int insertionPosition = -index - 1; + // check if the row key in the range before the insertion position + if (insertionPosition != 0 && ranges.get(insertionPosition - 1).contains(rowKey)) { + return insertionPosition - 1; + } + // check if the row key is before the first range + if (insertionPosition == 0 && !ranges.get(insertionPosition).contains(rowKey)) { + return ROW_BEFORE_FIRST_RANGE; + } + if (!foundFirstRange) { + foundFirstRange = true; + } + return insertionPosition; + } + // the row key equals one of the start keys, and the the range exclude the start key + if(ranges.get(index).isSearchRowInclusive() == false) { + exclusive = true; + } + return index; + } + + /** + * Sets {@link #foundFirstRange} to {@code true}, indicating that we found a matching row range. + */ + public void setFoundFirstRange() { + this.foundFirstRange = true; + } + + /** + * Gets the RowRange at the given offset. + */ + @SuppressWarnings("unchecked") + public T get(int i) { + return (T) ranges.get(i); + } + + /** + * Returns true if the first matching row range was found. + */ + public boolean hasFoundFirstRange() { + return foundFirstRange; + } + + /** + * Returns true if the current range's key is exclusive + */ + public boolean isExclusive() { + return exclusive; + } + + /** + * Resets the exclusive flag. + */ + public void resetExclusive() { + exclusive = false; + } + + /** + * Returns true if this class has been initialized by calling {@link #initialize(boolean)}. + */ + public boolean isInitialized() { + return initialized; + } + + /** + * Returns true if we exhausted searching all row ranges. + */ + public boolean isIterationComplete(int index) { + return index >= ranges.size(); + } } @Override @@ -545,6 +747,6 @@ public class MultiRowRangeFilter extends FilterBase { @Override public int hashCode() { - return Objects.hash(this.rangeList); + return Objects.hash(this.ranges); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java index 695b9f5c394..4cfc02c5fe8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java @@ -507,6 +507,134 @@ public class TestMultiRowRangeFilter { ht.close(); } + @Test + public void testReverseMultiRowRangeFilterWithinTable() throws IOException { + tableName = TableName.valueOf(name.getMethodName()); + Table ht = TEST_UTIL.createTable(tableName, family); + generateRows(numRows, ht, family, qf, value); + + Scan scan = new Scan(); + scan.setReversed(true); + List ranges = Arrays.asList( + new RowRange(Bytes.toBytes(20), true, Bytes.toBytes(30), true), + new RowRange(Bytes.toBytes(50), true, Bytes.toBytes(60), true) + ); + MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges); + scan.setFilter(filter); + + List expectedResults = new ArrayList<>(); + for (int i = 60; i >= 50; i--) { + expectedResults.add(i); + } + for (int i = 30; i >= 20; i--) { + expectedResults.add(i); + } + + List results = getResults(ht, scan); + List actualResults = new ArrayList<>(); + StringBuilder sb = new StringBuilder(); + for (Cell result : results) { + int observedValue = Bytes.toInt( + result.getRowArray(), result.getRowOffset(), result.getRowLength()); + actualResults.add(observedValue); + if (sb.length() > 0) { + sb.append(", "); + } + sb.append(observedValue); + } + assertEquals("Saw results: " + sb.toString(), 22, results.size()); + } + + @Test + public void testReverseMultiRowRangeFilterIncludingMaxRow() throws IOException { + tableName = TableName.valueOf(name.getMethodName()); + Table ht = TEST_UTIL.createTable(tableName, family); + for (String rowkey : Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h")) { + byte[] row = Bytes.toBytes(rowkey); + Put p = new Put(row); + p.addColumn(family, qf, value); + ht.put(p); + } + TEST_UTIL.flush(); + + Scan scan = new Scan(); + scan.setReversed(true); + List ranges = Arrays.asList( + new RowRange(Bytes.toBytes("b"), true, Bytes.toBytes("c"), true), + new RowRange(Bytes.toBytes("f"), true, Bytes.toBytes("h"), true) + ); + MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges); + scan.setFilter(filter); + + List expected = Arrays.asList("h", "g", "f", "c", "b"); + List actual = new ArrayList<>(); + for (Cell cell : getResults(ht, scan)) { + actual.add(Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())); + } + + assertEquals(expected, actual); + } + + @Test + public void testReverseMultiRowRangeFilterIncludingMinRow() throws IOException { + tableName = TableName.valueOf(name.getMethodName()); + Table ht = TEST_UTIL.createTable(tableName, family); + for (String rowkey : Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h")) { + byte[] row = Bytes.toBytes(rowkey); + Put p = new Put(row); + p.addColumn(family, qf, value); + ht.put(p); + } + TEST_UTIL.flush(); + + Scan scan = new Scan(); + scan.setReversed(true); + List ranges = Arrays.asList( + new RowRange(Bytes.toBytes("a"), true, Bytes.toBytes("c"), true), + new RowRange(Bytes.toBytes("f"), true, Bytes.toBytes("g"), true) + ); + MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges); + scan.setFilter(filter); + + List expected = Arrays.asList("g", "f", "c", "b", "a"); + List actual = new ArrayList<>(); + for (Cell cell : getResults(ht, scan)) { + actual.add(Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())); + } + + assertEquals(expected, actual); + } + + @Test + public void testReverseMultiRowRangeFilterIncludingMinAndMaxRow() throws IOException { + tableName = TableName.valueOf(name.getMethodName()); + Table ht = TEST_UTIL.createTable(tableName, family); + for (String rowkey : Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h")) { + byte[] row = Bytes.toBytes(rowkey); + Put p = new Put(row); + p.addColumn(family, qf, value); + ht.put(p); + } + TEST_UTIL.flush(); + + Scan scan = new Scan(); + scan.setReversed(true); + List ranges = Arrays.asList( + new RowRange(Bytes.toBytes("a"), true, Bytes.toBytes("c"), true), + new RowRange(Bytes.toBytes("f"), true, Bytes.toBytes("h"), true) + ); + MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges); + scan.setFilter(filter); + + List expected = Arrays.asList("h", "g", "f", "c", "b", "a"); + List actual = new ArrayList<>(); + for (Cell cell : getResults(ht, scan)) { + actual.add(Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())); + } + + assertEquals(expected, actual); + } + private void generateRows(int numberOfRows, Table ht, byte[] family, byte[] qf, byte[] value) throws IOException { for (int i = 0; i < numberOfRows; i++) { @@ -539,7 +667,7 @@ public class TestMultiRowRangeFilter { return kvList; } - private int getResultsSize(Table ht, Scan scan) throws IOException { + private List getResults(Table ht, Scan scan) throws IOException { ResultScanner scanner = ht.getScanner(scan); List results = new ArrayList<>(); Result r; @@ -549,6 +677,10 @@ public class TestMultiRowRangeFilter { } } scanner.close(); - return results.size(); + return results; + } + + private int getResultsSize(Table ht, Scan scan) throws IOException { + return getResults(ht, scan).size(); } }