HBASE-22144 Correct MultiRowRangeFilter to work with reverse scans

Signed-off-by: Toshihiro Suzuki <brfrn169@gmail.com>
This commit is contained in:
Josh Elser 2019-04-01 18:21:12 -04:00
parent 61396aecd2
commit cb2ef7a606
2 changed files with 408 additions and 74 deletions

View File

@ -51,21 +51,29 @@ import org.apache.hadoop.hbase.util.Bytes;
@InterfaceAudience.Public
public class MultiRowRangeFilter extends FilterBase {
private List<RowRange> rangeList;
private static final int ROW_BEFORE_FIRST_RANGE = -1;
private boolean EXCLUSIVE = false;
private final List<RowRange> rangeList;
private final RangeIteration ranges;
private boolean done = false;
private boolean initialized = false;
private int index;
private RowRange range;
private BasicRowRange range;
private ReturnCode currentReturnCode;
/**
* @param list A list of <code>RowRange</code>
*/
public MultiRowRangeFilter(List<RowRange> list) {
this.rangeList = sortAndMerge(list);
// We don't use rangeList anywhere else, but keeping it lets us pay a little
// memory to avoid touching the serialization logic.
this.rangeList = Collections.unmodifiableList(sortAndMerge(list));
this.ranges = new RangeIteration(rangeList);
}
public List<RowRange> getRowRanges() {
// Used by hbase-rest
return this.rangeList;
}
@Override
@ -73,13 +81,17 @@ public class MultiRowRangeFilter extends FilterBase {
return done;
}
public List<RowRange> getRowRanges() {
return this.rangeList;
}
@Override
public boolean filterRowKey(Cell firstRowCell) {
if (filterAllRemaining()) return true;
// N.b. We can only do this after we're iterating over records. If we try to do
// it before, the Scan (and this filter) may not yet be fully initialized. This is a
// wart on Filter and something that'd be nice to clean up (like CP's in HBase2.0)
if (!ranges.isInitialized()) {
ranges.initialize(isReversed());
}
// If it is the first time of running, calculate the current range index for
// the row key. If index is out of bound which happens when the start row
// user sets is after the largest stop row of the ranges, stop the scan.
@ -87,32 +99,31 @@ public class MultiRowRangeFilter extends FilterBase {
byte[] rowArr = firstRowCell.getRowArray();
int length = firstRowCell.getRowLength();
int offset = firstRowCell.getRowOffset();
if (!initialized
|| !range.contains(rowArr, offset, length)) {
if (!ranges.hasFoundFirstRange() || !range.contains(rowArr, offset, length)) {
byte[] rowkey = CellUtil.cloneRow(firstRowCell);
index = getNextRangeIndex(rowkey);
if (index >= rangeList.size()) {
index = ranges.getNextRangeIndex(rowkey);
if (ranges.isIterationComplete(index)) {
done = true;
currentReturnCode = ReturnCode.NEXT_ROW;
return false;
}
if(index != ROW_BEFORE_FIRST_RANGE) {
range = rangeList.get(index);
range = ranges.get(index);
} else {
range = rangeList.get(0);
range = ranges.get(0);
}
if (EXCLUSIVE) {
EXCLUSIVE = false;
if (ranges.isExclusive()) {
ranges.resetExclusive();
currentReturnCode = ReturnCode.NEXT_ROW;
return false;
}
if (!initialized) {
if (!ranges.hasFoundFirstRange()) {
if(index != ROW_BEFORE_FIRST_RANGE) {
currentReturnCode = ReturnCode.INCLUDE;
} else {
currentReturnCode = ReturnCode.SEEK_NEXT_USING_HINT;
}
initialized = true;
ranges.setFoundFirstRange();
} else {
if (range.contains(rowArr, offset, length)) {
currentReturnCode = ReturnCode.INCLUDE;
@ -140,8 +151,9 @@ public class MultiRowRangeFilter extends FilterBase {
@Override
public Cell getNextCellHint(Cell currentKV) {
// skip to the next range's start row
return PrivateCellUtil.createFirstOnRow(range.startRow, 0,
(short) range.startRow.length);
// #getComparisonData lets us avoid the `if (reversed)` branch
byte[] comparisonData = range.getComparisonData();
return PrivateCellUtil.createFirstOnRow(comparisonData, 0, (short) comparisonData.length);
}
/**
@ -219,37 +231,6 @@ public class MultiRowRangeFilter extends FilterBase {
return true;
}
/**
* calculate the position where the row key in the ranges list.
*
* @param rowKey the row key to calculate
* @return index the position of the row key
*/
private int getNextRangeIndex(byte[] rowKey) {
RowRange temp = new RowRange(rowKey, true, null, true);
int index = Collections.binarySearch(rangeList, temp);
if (index < 0) {
int insertionPosition = -index - 1;
// check if the row key in the range before the insertion position
if (insertionPosition != 0 && rangeList.get(insertionPosition - 1).contains(rowKey)) {
return insertionPosition - 1;
}
// check if the row key is before the first range
if (insertionPosition == 0 && !rangeList.get(insertionPosition).contains(rowKey)) {
return ROW_BEFORE_FIRST_RANGE;
}
if (!initialized) {
initialized = true;
}
return insertionPosition;
}
// the row key equals one of the start keys, and the the range exclude the start key
if(rangeList.get(index).startRowInclusive == false) {
EXCLUSIVE = true;
}
return index;
}
/**
* sort the ranges and if the ranges with overlap, then merge them.
*
@ -420,21 +401,20 @@ public class MultiRowRangeFilter extends FilterBase {
throw new IllegalArgumentException(sb.toString());
}
@InterfaceAudience.Public
public static class RowRange implements Comparable<RowRange> {
private byte[] startRow;
private boolean startRowInclusive = true;
private byte[] stopRow;
private boolean stopRowInclusive = false;
private static abstract class BasicRowRange implements Comparable<BasicRowRange> {
protected byte[] startRow;
protected boolean startRowInclusive = true;
protected byte[] stopRow;
protected boolean stopRowInclusive = false;
public RowRange() {
public BasicRowRange() {
}
/**
* If the startRow is empty or null, set it to HConstants.EMPTY_BYTE_ARRAY, means begin at the
* start row of the table. If the stopRow is empty or null, set it to
* HConstants.EMPTY_BYTE_ARRAY, means end of the last row of table.
*/
public RowRange(String startRow, boolean startRowInclusive, String stopRow,
public BasicRowRange(String startRow, boolean startRowInclusive, String stopRow,
boolean stopRowInclusive) {
this((startRow == null || startRow.isEmpty()) ? HConstants.EMPTY_BYTE_ARRAY :
Bytes.toBytes(startRow), startRowInclusive,
@ -442,7 +422,7 @@ public class MultiRowRangeFilter extends FilterBase {
Bytes.toBytes(stopRow), stopRowInclusive);
}
public RowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow,
public BasicRowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow,
boolean stopRowInclusive) {
this.startRow = (startRow == null) ? HConstants.EMPTY_BYTE_ARRAY : startRow;
this.startRowInclusive = startRowInclusive;
@ -500,13 +480,6 @@ public class MultiRowRangeFilter extends FilterBase {
}
}
@Override
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_COMPARETO_USE_OBJECT_EQUALS",
justification="This compareTo is not of this Object, but of referenced RowRange")
public int compareTo(RowRange other) {
return Bytes.compareTo(this.startRow, other.startRow);
}
public boolean isValid() {
return Bytes.equals(startRow, HConstants.EMPTY_BYTE_ARRAY)
|| Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY)
@ -516,13 +489,13 @@ public class MultiRowRangeFilter extends FilterBase {
@Override
public boolean equals(Object obj){
if (!(obj instanceof RowRange)) {
if (!(obj instanceof BasicRowRange)) {
return false;
}
if (this == obj) {
return true;
}
RowRange rr = (RowRange) obj;
BasicRowRange rr = (BasicRowRange) obj;
return Bytes.equals(this.stopRow, rr.getStopRow()) &&
Bytes.equals(this.startRow, this.getStartRow()) &&
this.startRowInclusive == rr.isStartRowInclusive() &&
@ -536,6 +509,235 @@ public class MultiRowRangeFilter extends FilterBase {
this.startRowInclusive,
this.stopRowInclusive);
}
/**
* Returns the data to be used to compare {@code this} to another object.
*/
public abstract byte[] getComparisonData();
/**
* Returns whether the bounding row used for binary-search is inclusive or not.
*
* For forward scans, we would check the starRow, but we would check the stopRow for
* the reverse scan case.
*/
public abstract boolean isSearchRowInclusive();
@Override
public int compareTo(BasicRowRange other) {
byte[] left;
byte[] right;
if (isAscendingOrder()) {
left = this.getComparisonData();
right = other.getComparisonData();
} else {
left = other.getComparisonData();
right = this.getComparisonData();
}
return Bytes.compareTo(left, right);
}
public abstract boolean isAscendingOrder();
}
/**
* Internal RowRange that reverses the sort-order to handle reverse scans.
*/
@InterfaceAudience.Private
private static class ReversedRowRange extends BasicRowRange {
public ReversedRowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow,
boolean stopRowInclusive) {
super(startRow, startRowInclusive, stopRow, stopRowInclusive);
}
@Override
public byte[] getComparisonData() {
return this.stopRow;
}
@Override
public boolean isSearchRowInclusive() {
return this.stopRowInclusive;
}
@Override
public boolean isAscendingOrder() {
return false;
}
}
@InterfaceAudience.Public
public static class RowRange extends BasicRowRange {
public RowRange() {
}
/**
* If the startRow is empty or null, set it to HConstants.EMPTY_BYTE_ARRAY, means begin at the
* start row of the table. If the stopRow is empty or null, set it to
* HConstants.EMPTY_BYTE_ARRAY, means end of the last row of table.
*/
public RowRange(String startRow, boolean startRowInclusive, String stopRow,
boolean stopRowInclusive) {
super(startRow, startRowInclusive, stopRow, stopRowInclusive);
}
public RowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow,
boolean stopRowInclusive) {
super(startRow, startRowInclusive, stopRow, stopRowInclusive);
}
@Override
public byte[] getComparisonData() {
return startRow;
}
@Override
public boolean isSearchRowInclusive() {
return startRowInclusive;
}
@Override
public boolean isAscendingOrder() {
return true;
}
}
/**
* Abstraction over the ranges of rows to return from this filter, regardless of forward or
* reverse scans being used. This Filter can use this class, agnostic of iteration direction,
* as the same algorithm can be applied in both cases.
*/
@InterfaceAudience.Private
private static class RangeIteration {
private boolean exclusive = false;
private boolean initialized = false;
private boolean foundFirstRange = false;
private boolean reversed = false;
private final List<RowRange> sortedAndMergedRanges;
private List<? extends BasicRowRange> ranges;
public RangeIteration(List<RowRange> sortedAndMergedRanges) {
this.sortedAndMergedRanges = sortedAndMergedRanges;
}
void initialize(boolean reversed) {
// Avoid double initialization
assert !this.initialized;
this.reversed = reversed;
if (reversed) {
// If we are doing a reverse scan, we can reverse the ranges (both the elements in
// the list as well as their start/stop key), and use the same algorithm.
this.ranges = flipAndReverseRanges(sortedAndMergedRanges);
} else {
this.ranges = sortedAndMergedRanges;
}
this.initialized = true;
}
/**
* Rebuilds the sorted ranges (by startKey) into an equivalent sorted list of ranges, only by
* stopKey instead. Descending order and the ReversedRowRange compareTo implementation make
* sure that we can use Collections.binarySearch().
*/
static List<ReversedRowRange> flipAndReverseRanges(List<RowRange> ranges) {
List<ReversedRowRange> flippedRanges = new ArrayList<>(ranges.size());
for (int i = ranges.size() - 1; i >= 0; i--) {
RowRange origRange = ranges.get(i);
ReversedRowRange newRowRange = new ReversedRowRange(
origRange.startRow, origRange.startRowInclusive, origRange.stopRow,
origRange.isStopRowInclusive());
flippedRanges.add(newRowRange);
}
return flippedRanges;
}
/**
* Calculates the position where the given rowkey fits in the ranges list.
*
* @param rowKey the row key to calculate
* @return index the position of the row key
*/
public int getNextRangeIndex(byte[] rowKey) {
BasicRowRange temp;
if (reversed) {
temp = new ReversedRowRange(null, true, rowKey, true);
} else {
temp = new RowRange(rowKey, true, null, true);
}
// Because we make sure that `ranges` has the correct natural ordering (given it containing
// RowRange or ReverseRowRange objects). This keeps us from having to have two different
// implementations below.
final int index = Collections.binarySearch(ranges, temp);
if (index < 0) {
int insertionPosition = -index - 1;
// check if the row key in the range before the insertion position
if (insertionPosition != 0 && ranges.get(insertionPosition - 1).contains(rowKey)) {
return insertionPosition - 1;
}
// check if the row key is before the first range
if (insertionPosition == 0 && !ranges.get(insertionPosition).contains(rowKey)) {
return ROW_BEFORE_FIRST_RANGE;
}
if (!foundFirstRange) {
foundFirstRange = true;
}
return insertionPosition;
}
// the row key equals one of the start keys, and the the range exclude the start key
if(ranges.get(index).isSearchRowInclusive() == false) {
exclusive = true;
}
return index;
}
/**
* Sets {@link #foundFirstRange} to {@code true}, indicating that we found a matching row range.
*/
public void setFoundFirstRange() {
this.foundFirstRange = true;
}
/**
* Gets the RowRange at the given offset.
*/
@SuppressWarnings("unchecked")
public <T extends BasicRowRange> T get(int i) {
return (T) ranges.get(i);
}
/**
* Returns true if the first matching row range was found.
*/
public boolean hasFoundFirstRange() {
return foundFirstRange;
}
/**
* Returns true if the current range's key is exclusive
*/
public boolean isExclusive() {
return exclusive;
}
/**
* Resets the exclusive flag.
*/
public void resetExclusive() {
exclusive = false;
}
/**
* Returns true if this class has been initialized by calling {@link #initialize(boolean)}.
*/
public boolean isInitialized() {
return initialized;
}
/**
* Returns true if we exhausted searching all row ranges.
*/
public boolean isIterationComplete(int index) {
return index >= ranges.size();
}
}
@Override
@ -545,6 +747,6 @@ public class MultiRowRangeFilter extends FilterBase {
@Override
public int hashCode() {
return Objects.hash(this.rangeList);
return Objects.hash(this.ranges);
}
}

View File

@ -507,6 +507,134 @@ public class TestMultiRowRangeFilter {
ht.close();
}
@Test
public void testReverseMultiRowRangeFilterWithinTable() throws IOException {
tableName = TableName.valueOf(name.getMethodName());
Table ht = TEST_UTIL.createTable(tableName, family);
generateRows(numRows, ht, family, qf, value);
Scan scan = new Scan();
scan.setReversed(true);
List<RowRange> ranges = Arrays.asList(
new RowRange(Bytes.toBytes(20), true, Bytes.toBytes(30), true),
new RowRange(Bytes.toBytes(50), true, Bytes.toBytes(60), true)
);
MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges);
scan.setFilter(filter);
List<Integer> expectedResults = new ArrayList<>();
for (int i = 60; i >= 50; i--) {
expectedResults.add(i);
}
for (int i = 30; i >= 20; i--) {
expectedResults.add(i);
}
List<Cell> results = getResults(ht, scan);
List<Integer> actualResults = new ArrayList<>();
StringBuilder sb = new StringBuilder();
for (Cell result : results) {
int observedValue = Bytes.toInt(
result.getRowArray(), result.getRowOffset(), result.getRowLength());
actualResults.add(observedValue);
if (sb.length() > 0) {
sb.append(", ");
}
sb.append(observedValue);
}
assertEquals("Saw results: " + sb.toString(), 22, results.size());
}
@Test
public void testReverseMultiRowRangeFilterIncludingMaxRow() throws IOException {
tableName = TableName.valueOf(name.getMethodName());
Table ht = TEST_UTIL.createTable(tableName, family);
for (String rowkey : Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h")) {
byte[] row = Bytes.toBytes(rowkey);
Put p = new Put(row);
p.addColumn(family, qf, value);
ht.put(p);
}
TEST_UTIL.flush();
Scan scan = new Scan();
scan.setReversed(true);
List<RowRange> ranges = Arrays.asList(
new RowRange(Bytes.toBytes("b"), true, Bytes.toBytes("c"), true),
new RowRange(Bytes.toBytes("f"), true, Bytes.toBytes("h"), true)
);
MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges);
scan.setFilter(filter);
List<String> expected = Arrays.asList("h", "g", "f", "c", "b");
List<String> actual = new ArrayList<>();
for (Cell cell : getResults(ht, scan)) {
actual.add(Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
}
assertEquals(expected, actual);
}
@Test
public void testReverseMultiRowRangeFilterIncludingMinRow() throws IOException {
tableName = TableName.valueOf(name.getMethodName());
Table ht = TEST_UTIL.createTable(tableName, family);
for (String rowkey : Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h")) {
byte[] row = Bytes.toBytes(rowkey);
Put p = new Put(row);
p.addColumn(family, qf, value);
ht.put(p);
}
TEST_UTIL.flush();
Scan scan = new Scan();
scan.setReversed(true);
List<RowRange> ranges = Arrays.asList(
new RowRange(Bytes.toBytes("a"), true, Bytes.toBytes("c"), true),
new RowRange(Bytes.toBytes("f"), true, Bytes.toBytes("g"), true)
);
MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges);
scan.setFilter(filter);
List<String> expected = Arrays.asList("g", "f", "c", "b", "a");
List<String> actual = new ArrayList<>();
for (Cell cell : getResults(ht, scan)) {
actual.add(Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
}
assertEquals(expected, actual);
}
@Test
public void testReverseMultiRowRangeFilterIncludingMinAndMaxRow() throws IOException {
tableName = TableName.valueOf(name.getMethodName());
Table ht = TEST_UTIL.createTable(tableName, family);
for (String rowkey : Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h")) {
byte[] row = Bytes.toBytes(rowkey);
Put p = new Put(row);
p.addColumn(family, qf, value);
ht.put(p);
}
TEST_UTIL.flush();
Scan scan = new Scan();
scan.setReversed(true);
List<RowRange> ranges = Arrays.asList(
new RowRange(Bytes.toBytes("a"), true, Bytes.toBytes("c"), true),
new RowRange(Bytes.toBytes("f"), true, Bytes.toBytes("h"), true)
);
MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges);
scan.setFilter(filter);
List<String> expected = Arrays.asList("h", "g", "f", "c", "b", "a");
List<String> actual = new ArrayList<>();
for (Cell cell : getResults(ht, scan)) {
actual.add(Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
}
assertEquals(expected, actual);
}
private void generateRows(int numberOfRows, Table ht, byte[] family, byte[] qf, byte[] value)
throws IOException {
for (int i = 0; i < numberOfRows; i++) {
@ -539,7 +667,7 @@ public class TestMultiRowRangeFilter {
return kvList;
}
private int getResultsSize(Table ht, Scan scan) throws IOException {
private List<Cell> getResults(Table ht, Scan scan) throws IOException {
ResultScanner scanner = ht.getScanner(scan);
List<Cell> results = new ArrayList<>();
Result r;
@ -549,6 +677,10 @@ public class TestMultiRowRangeFilter {
}
}
scanner.close();
return results.size();
return results;
}
private int getResultsSize(Table ht, Scan scan) throws IOException {
return getResults(ht, scan).size();
}
}