diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java index c0026060248..f5715b9abb5 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java @@ -54,14 +54,14 @@ import com.google.protobuf.InvalidProtocolBufferException; @InterfaceStability.Evolving public class MultiRowRangeFilter extends FilterBase { + private List rangeList; + private static final int ROW_BEFORE_FIRST_RANGE = -1; - - private final List rangeList; - private final RangeIteration ranges; - + private boolean EXCLUSIVE = false; private boolean done = false; + private boolean initialized = false; private int index; - private BasicRowRange range; + private RowRange range; private ReturnCode currentReturnCode; /** @@ -71,15 +71,7 @@ public class MultiRowRangeFilter extends FilterBase { * RowRange is invalid */ public MultiRowRangeFilter(List list) throws IOException { - // We don't use rangeList anywhere else, but keeping it lets us pay a little - // memory to avoid touching the serialization logic. - this.rangeList = Collections.unmodifiableList(sortAndMerge(list)); - this.ranges = new RangeIteration(rangeList); - } - - public List getRowRanges() { - // Used by hbase-rest - return this.rangeList; + this.rangeList = sortAndMerge(list); } @Override @@ -87,45 +79,42 @@ public class MultiRowRangeFilter extends FilterBase { return done; } + public List getRowRanges() { + return this.rangeList; + } + @Override public boolean filterRowKey(byte[] buffer, int offset, int length) { - // N.b. We can only do this after we're iterating over records. If we try to do - // it before, the Scan (and this filter) may not yet be fully initialized. This is a - // wart on Filter and something that'd be nice to clean up (like CP's in HBase2.0) - if (!ranges.isInitialized()) { - ranges.initialize(isReversed()); - } - // If it is the first time of running, calculate the current range index for // the row key. If index is out of bound which happens when the start row // user sets is after the largest stop row of the ranges, stop the scan. // If row key is after the current range, find the next range and update index. - if (!ranges.hasFoundFirstRange() || !range.contains(buffer, offset, length)) { + if (!initialized || !range.contains(buffer, offset, length)) { byte[] rowkey = new byte[length]; System.arraycopy(buffer, offset, rowkey, 0, length); - index = ranges.getNextRangeIndex(rowkey); - if (ranges.isIterationComplete(index)) { + index = getNextRangeIndex(rowkey); + if (index >= rangeList.size()) { done = true; currentReturnCode = ReturnCode.NEXT_ROW; return false; } if(index != ROW_BEFORE_FIRST_RANGE) { - range = ranges.get(index); + range = rangeList.get(index); } else { - range = ranges.get(0); + range = rangeList.get(0); } - if (ranges.isExclusive()) { - ranges.resetExclusive(); + if (EXCLUSIVE) { + EXCLUSIVE = false; currentReturnCode = ReturnCode.NEXT_ROW; return false; } - if (!ranges.hasFoundFirstRange()) { + if (!initialized) { if(index != ROW_BEFORE_FIRST_RANGE) { currentReturnCode = ReturnCode.INCLUDE; } else { currentReturnCode = ReturnCode.SEEK_NEXT_USING_HINT; } - ranges.setFoundFirstRange(); + initialized = true; } else { if (range.contains(buffer, offset, length)) { currentReturnCode = ReturnCode.INCLUDE; @@ -147,9 +136,7 @@ public class MultiRowRangeFilter extends FilterBase { @Override public Cell getNextCellHint(Cell currentKV) { // skip to the next range's start row - // #getComparisonData lets us avoid the `if (reversed)` branch - byte[] comparisonData = range.getComparisonData(); - return KeyValueUtil.createFirstOnRow(comparisonData); + return KeyValueUtil.createFirstOnRow(range.startRow); } /** @@ -231,6 +218,37 @@ public class MultiRowRangeFilter extends FilterBase { return true; } + /** + * calculate the position where the row key in the ranges list. + * + * @param rowKey the row key to calculate + * @return index the position of the row key + */ + private int getNextRangeIndex(byte[] rowKey) { + RowRange temp = new RowRange(rowKey, true, null, true); + int index = Collections.binarySearch(rangeList, temp); + if (index < 0) { + int insertionPosition = -index - 1; + // check if the row key in the range before the insertion position + if (insertionPosition != 0 && rangeList.get(insertionPosition - 1).contains(rowKey)) { + return insertionPosition - 1; + } + // check if the row key is before the first range + if (insertionPosition == 0 && !rangeList.get(insertionPosition).contains(rowKey)) { + return ROW_BEFORE_FIRST_RANGE; + } + if (!initialized) { + initialized = true; + } + return insertionPosition; + } + // the row key equals one of the start keys, and the the range exclude the start key + if(rangeList.get(index).startRowInclusive == false) { + EXCLUSIVE = true; + } + return index; + } + /** * sort the ranges and if the ranges with overlap, then merge them. * @@ -401,20 +419,22 @@ public class MultiRowRangeFilter extends FilterBase { throw new IllegalArgumentException(sb.toString()); } - private static abstract class BasicRowRange implements Comparable { - protected byte[] startRow; - protected boolean startRowInclusive = true; - protected byte[] stopRow; - protected boolean stopRowInclusive = false; + @InterfaceAudience.Public + @InterfaceStability.Evolving + public static class RowRange implements Comparable { + private byte[] startRow; + private boolean startRowInclusive = true; + private byte[] stopRow; + private boolean stopRowInclusive = false; - public BasicRowRange() { + public RowRange() { } /** * If the startRow is empty or null, set it to HConstants.EMPTY_BYTE_ARRAY, means begin at the * start row of the table. If the stopRow is empty or null, set it to * HConstants.EMPTY_BYTE_ARRAY, means end of the last row of table. */ - public BasicRowRange(String startRow, boolean startRowInclusive, String stopRow, + public RowRange(String startRow, boolean startRowInclusive, String stopRow, boolean stopRowInclusive) { this((startRow == null || startRow.isEmpty()) ? HConstants.EMPTY_BYTE_ARRAY : Bytes.toBytes(startRow), startRowInclusive, @@ -422,7 +442,7 @@ public class MultiRowRangeFilter extends FilterBase { Bytes.toBytes(stopRow), stopRowInclusive); } - public BasicRowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow, + public RowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow, boolean stopRowInclusive) { this.startRow = (startRow == null) ? HConstants.EMPTY_BYTE_ARRAY : startRow; this.startRowInclusive = startRowInclusive; @@ -480,6 +500,13 @@ public class MultiRowRangeFilter extends FilterBase { } } + @Override + @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_COMPARETO_USE_OBJECT_EQUALS", + justification="This compareTo is not of this Object, but of referenced RowRange") + public int compareTo(RowRange other) { + return Bytes.compareTo(this.startRow, other.startRow); + } + public boolean isValid() { return Bytes.equals(startRow, HConstants.EMPTY_BYTE_ARRAY) || Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY) @@ -489,13 +516,13 @@ public class MultiRowRangeFilter extends FilterBase { @Override public boolean equals(Object obj){ - if (!(obj instanceof BasicRowRange)) { + if (!(obj instanceof RowRange)) { return false; } if (this == obj) { return true; } - BasicRowRange rr = (BasicRowRange) obj; + RowRange rr = (RowRange) obj; return Bytes.equals(this.stopRow, rr.getStopRow()) && Bytes.equals(this.startRow, this.getStartRow()) && this.startRowInclusive == rr.isStartRowInclusive() && @@ -509,236 +536,6 @@ public class MultiRowRangeFilter extends FilterBase { this.startRowInclusive, this.stopRowInclusive); } - - /** - * Returns the data to be used to compare {@code this} to another object. - */ - public abstract byte[] getComparisonData(); - - /** - * Returns whether the bounding row used for binary-search is inclusive or not. - * - * For forward scans, we would check the starRow, but we would check the stopRow for - * the reverse scan case. - */ - public abstract boolean isSearchRowInclusive(); - - @Override - public int compareTo(BasicRowRange other) { - byte[] left; - byte[] right; - if (isAscendingOrder()) { - left = this.getComparisonData(); - right = other.getComparisonData(); - } else { - left = other.getComparisonData(); - right = this.getComparisonData(); - } - return Bytes.compareTo(left, right); - } - - public abstract boolean isAscendingOrder(); - } - - /** - * Internal RowRange that reverses the sort-order to handle reverse scans. - */ - @InterfaceAudience.Private - private static class ReversedRowRange extends BasicRowRange { - public ReversedRowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow, - boolean stopRowInclusive) { - super(startRow, startRowInclusive, stopRow, stopRowInclusive); - } - - @Override - public byte[] getComparisonData() { - return this.stopRow; - } - - @Override - public boolean isSearchRowInclusive() { - return this.stopRowInclusive; - } - - @Override - public boolean isAscendingOrder() { - return false; - } - } - - @InterfaceAudience.Public - @InterfaceStability.Evolving - public static class RowRange extends BasicRowRange { - public RowRange() { - } - /** - * If the startRow is empty or null, set it to HConstants.EMPTY_BYTE_ARRAY, means begin at the - * start row of the table. If the stopRow is empty or null, set it to - * HConstants.EMPTY_BYTE_ARRAY, means end of the last row of table. - */ - public RowRange(String startRow, boolean startRowInclusive, String stopRow, - boolean stopRowInclusive) { - super(startRow, startRowInclusive, stopRow, stopRowInclusive); - } - - public RowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow, - boolean stopRowInclusive) { - super(startRow, startRowInclusive, stopRow, stopRowInclusive); - } - - @Override - public byte[] getComparisonData() { - return startRow; - } - - @Override - public boolean isSearchRowInclusive() { - return startRowInclusive; - } - - @Override - public boolean isAscendingOrder() { - return true; - } - } - - /** - * Abstraction over the ranges of rows to return from this filter, regardless of forward or - * reverse scans being used. This Filter can use this class, agnostic of iteration direction, - * as the same algorithm can be applied in both cases. - */ - @InterfaceAudience.Private - private static class RangeIteration { - private boolean exclusive = false; - private boolean initialized = false; - private boolean foundFirstRange = false; - private boolean reversed = false; - private final List sortedAndMergedRanges; - private List ranges; - - public RangeIteration(List sortedAndMergedRanges) { - this.sortedAndMergedRanges = sortedAndMergedRanges; - } - - void initialize(boolean reversed) { - // Avoid double initialization - assert !this.initialized; - this.reversed = reversed; - if (reversed) { - // If we are doing a reverse scan, we can reverse the ranges (both the elements in - // the list as well as their start/stop key), and use the same algorithm. - this.ranges = flipAndReverseRanges(sortedAndMergedRanges); - } else { - this.ranges = sortedAndMergedRanges; - } - this.initialized = true; - } - - /** - * Rebuilds the sorted ranges (by startKey) into an equivalent sorted list of ranges, only by - * stopKey instead. Descending order and the ReversedRowRange compareTo implementation make - * sure that we can use Collections.binarySearch(). - */ - static List flipAndReverseRanges(List ranges) { - List flippedRanges = new ArrayList<>(ranges.size()); - for (int i = ranges.size() - 1; i >= 0; i--) { - RowRange origRange = ranges.get(i); - ReversedRowRange newRowRange = new ReversedRowRange( - origRange.startRow, origRange.startRowInclusive, origRange.stopRow, - origRange.isStopRowInclusive()); - flippedRanges.add(newRowRange); - } - return flippedRanges; - } - - /** - * Calculates the position where the given rowkey fits in the ranges list. - * - * @param rowKey the row key to calculate - * @return index the position of the row key - */ - public int getNextRangeIndex(byte[] rowKey) { - BasicRowRange temp; - if (reversed) { - temp = new ReversedRowRange(null, true, rowKey, true); - } else { - temp = new RowRange(rowKey, true, null, true); - } - // Because we make sure that `ranges` has the correct natural ordering (given it containing - // RowRange or ReverseRowRange objects). This keeps us from having to have two different - // implementations below. - final int index = Collections.binarySearch(ranges, temp); - if (index < 0) { - int insertionPosition = -index - 1; - // check if the row key in the range before the insertion position - if (insertionPosition != 0 && ranges.get(insertionPosition - 1).contains(rowKey)) { - return insertionPosition - 1; - } - // check if the row key is before the first range - if (insertionPosition == 0 && !ranges.get(insertionPosition).contains(rowKey)) { - return ROW_BEFORE_FIRST_RANGE; - } - if (!foundFirstRange) { - foundFirstRange = true; - } - return insertionPosition; - } - // the row key equals one of the start keys, and the the range exclude the start key - if(ranges.get(index).isSearchRowInclusive() == false) { - exclusive = true; - } - return index; - } - - /** - * Sets {@link #foundFirstRange} to {@code true}, indicating that we found a matching row range. - */ - public void setFoundFirstRange() { - this.foundFirstRange = true; - } - - /** - * Gets the RowRange at the given offset. - */ - @SuppressWarnings("unchecked") - public T get(int i) { - return (T) ranges.get(i); - } - - /** - * Returns true if the first matching row range was found. - */ - public boolean hasFoundFirstRange() { - return foundFirstRange; - } - - /** - * Returns true if the current range's key is exclusive - */ - public boolean isExclusive() { - return exclusive; - } - - /** - * Resets the exclusive flag. - */ - public void resetExclusive() { - exclusive = false; - } - - /** - * Returns true if this class has been initialized by calling {@link #initialize(boolean)}. - */ - public boolean isInitialized() { - return initialized; - } - - /** - * Returns true if we exhausted searching all row ranges. - */ - public boolean isIterationComplete(int index) { - return index >= ranges.size(); - } } @Override @@ -748,6 +545,6 @@ public class MultiRowRangeFilter extends FilterBase { @Override public int hashCode() { - return Objects.hash(this.ranges); + return Objects.hash(this.rangeList); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java index b5252713366..9ba5ec3e86d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java @@ -486,134 +486,6 @@ public class TestMultiRowRangeFilter { ht.close(); } - @Test - public void testReverseMultiRowRangeFilterWithinTable() throws IOException { - tableName = Bytes.toBytes("testReverseMultiRowRangeFilterWithinTable"); - HTable ht = TEST_UTIL.createTable(tableName, family); - generateRows(numRows, ht, family, qf, value); - - Scan scan = new Scan(); - scan.setReversed(true); - List ranges = Arrays.asList( - new RowRange(Bytes.toBytes(20), true, Bytes.toBytes(30), true), - new RowRange(Bytes.toBytes(50), true, Bytes.toBytes(60), true) - ); - MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges); - scan.setFilter(filter); - - List expectedResults = new ArrayList<>(); - for (int i = 60; i >= 50; i--) { - expectedResults.add(i); - } - for (int i = 30; i >= 20; i--) { - expectedResults.add(i); - } - - List results = getResults(ht, scan); - List actualResults = new ArrayList<>(); - StringBuilder sb = new StringBuilder(); - for (Cell result : results) { - int observedValue = Bytes.toInt( - result.getRowArray(), result.getRowOffset(), result.getRowLength()); - actualResults.add(observedValue); - if (sb.length() > 0) { - sb.append(", "); - } - sb.append(observedValue); - } - assertEquals("Saw results: " + sb.toString(), 22, results.size()); - } - - @Test - public void testReverseMultiRowRangeFilterIncludingMaxRow() throws IOException { - tableName = Bytes.toBytes("testReverseMultiRowRangeFilterIncludingMaxRow"); - HTable ht = TEST_UTIL.createTable(tableName, family); - for (String rowkey : Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h")) { - byte[] row = Bytes.toBytes(rowkey); - Put p = new Put(row); - p.addColumn(family, qf, value); - ht.put(p); - } - TEST_UTIL.flush(); - - Scan scan = new Scan(); - scan.setReversed(true); - List ranges = Arrays.asList( - new RowRange(Bytes.toBytes("b"), true, Bytes.toBytes("c"), true), - new RowRange(Bytes.toBytes("f"), true, Bytes.toBytes("h"), true) - ); - MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges); - scan.setFilter(filter); - - List expected = Arrays.asList("h", "g", "f", "c", "b"); - List actual = new ArrayList<>(); - for (Cell cell : getResults(ht, scan)) { - actual.add(Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())); - } - - assertEquals(expected, actual); - } - - @Test - public void testReverseMultiRowRangeFilterIncludingMinRow() throws IOException { - tableName = Bytes.toBytes("testReverseMultiRowRangeFilterIncludingMinRow"); - HTable ht = TEST_UTIL.createTable(tableName, family); - for (String rowkey : Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h")) { - byte[] row = Bytes.toBytes(rowkey); - Put p = new Put(row); - p.addColumn(family, qf, value); - ht.put(p); - } - TEST_UTIL.flush(); - - Scan scan = new Scan(); - scan.setReversed(true); - List ranges = Arrays.asList( - new RowRange(Bytes.toBytes("a"), true, Bytes.toBytes("c"), true), - new RowRange(Bytes.toBytes("f"), true, Bytes.toBytes("g"), true) - ); - MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges); - scan.setFilter(filter); - - List expected = Arrays.asList("g", "f", "c", "b", "a"); - List actual = new ArrayList<>(); - for (Cell cell : getResults(ht, scan)) { - actual.add(Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())); - } - - assertEquals(expected, actual); - } - - @Test - public void testReverseMultiRowRangeFilterIncludingMinAndMaxRow() throws IOException { - tableName = Bytes.toBytes("testReverseMultiRowRangeFilterIncludingMinAndMaxRow"); - HTable ht = TEST_UTIL.createTable(tableName, family); - for (String rowkey : Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h")) { - byte[] row = Bytes.toBytes(rowkey); - Put p = new Put(row); - p.addColumn(family, qf, value); - ht.put(p); - } - TEST_UTIL.flush(); - - Scan scan = new Scan(); - scan.setReversed(true); - List ranges = Arrays.asList( - new RowRange(Bytes.toBytes("a"), true, Bytes.toBytes("c"), true), - new RowRange(Bytes.toBytes("f"), true, Bytes.toBytes("h"), true) - ); - MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges); - scan.setFilter(filter); - - List expected = Arrays.asList("h", "g", "f", "c", "b", "a"); - List actual = new ArrayList<>(); - for (Cell cell : getResults(ht, scan)) { - actual.add(Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())); - } - - assertEquals(expected, actual); - } - private void generateRows(int numberOfRows, HTable ht, byte[] family, byte[] qf, byte[] value) throws IOException { for (int i = 0; i < numberOfRows; i++) { @@ -646,7 +518,7 @@ public class TestMultiRowRangeFilter { return kvList; } - private List getResults(HTable ht, Scan scan) throws IOException { + private int getResultsSize(HTable ht, Scan scan) throws IOException { ResultScanner scanner = ht.getScanner(scan); List results = new ArrayList(); Result r; @@ -656,10 +528,6 @@ public class TestMultiRowRangeFilter { } } scanner.close(); - return results; - } - - private int getResultsSize(HTable ht, Scan scan) throws IOException { - return getResults(ht, scan).size(); + return results.size(); } }