Revert "HBASE-22215 HBASE-22144 Correct MultiRowRangeFilter to work with reverse scans"

This reverts commit c10ee4d23b.

Reverted due to binary compatibility concerns related to a Public/Evolving interface.
See comments on HBASE-22215 for detail.
This commit is contained in:
Andrew Purtell 2019-04-29 15:32:14 -07:00
parent 8d18a85e30
commit d61fcf5848
No known key found for this signature in database
GPG Key ID: 8597754DD5365CCD
2 changed files with 73 additions and 408 deletions

View File

@ -54,14 +54,14 @@ import com.google.protobuf.InvalidProtocolBufferException;
@InterfaceStability.Evolving @InterfaceStability.Evolving
public class MultiRowRangeFilter extends FilterBase { public class MultiRowRangeFilter extends FilterBase {
private List<RowRange> rangeList;
private static final int ROW_BEFORE_FIRST_RANGE = -1; private static final int ROW_BEFORE_FIRST_RANGE = -1;
private boolean EXCLUSIVE = false;
private final List<RowRange> rangeList;
private final RangeIteration ranges;
private boolean done = false; private boolean done = false;
private boolean initialized = false;
private int index; private int index;
private BasicRowRange range; private RowRange range;
private ReturnCode currentReturnCode; private ReturnCode currentReturnCode;
/** /**
@ -71,15 +71,7 @@ public class MultiRowRangeFilter extends FilterBase {
* <code>RowRange</code> is invalid * <code>RowRange</code> is invalid
*/ */
public MultiRowRangeFilter(List<RowRange> list) throws IOException { public MultiRowRangeFilter(List<RowRange> list) throws IOException {
// We don't use rangeList anywhere else, but keeping it lets us pay a little this.rangeList = sortAndMerge(list);
// memory to avoid touching the serialization logic.
this.rangeList = Collections.unmodifiableList(sortAndMerge(list));
this.ranges = new RangeIteration(rangeList);
}
public List<RowRange> getRowRanges() {
// Used by hbase-rest
return this.rangeList;
} }
@Override @Override
@ -87,45 +79,42 @@ public class MultiRowRangeFilter extends FilterBase {
return done; return done;
} }
public List<RowRange> getRowRanges() {
return this.rangeList;
}
@Override @Override
public boolean filterRowKey(byte[] buffer, int offset, int length) { public boolean filterRowKey(byte[] buffer, int offset, int length) {
// N.b. We can only do this after we're iterating over records. If we try to do
// it before, the Scan (and this filter) may not yet be fully initialized. This is a
// wart on Filter and something that'd be nice to clean up (like CP's in HBase2.0)
if (!ranges.isInitialized()) {
ranges.initialize(isReversed());
}
// If it is the first time of running, calculate the current range index for // If it is the first time of running, calculate the current range index for
// the row key. If index is out of bound which happens when the start row // the row key. If index is out of bound which happens when the start row
// user sets is after the largest stop row of the ranges, stop the scan. // user sets is after the largest stop row of the ranges, stop the scan.
// If row key is after the current range, find the next range and update index. // If row key is after the current range, find the next range and update index.
if (!ranges.hasFoundFirstRange() || !range.contains(buffer, offset, length)) { if (!initialized || !range.contains(buffer, offset, length)) {
byte[] rowkey = new byte[length]; byte[] rowkey = new byte[length];
System.arraycopy(buffer, offset, rowkey, 0, length); System.arraycopy(buffer, offset, rowkey, 0, length);
index = ranges.getNextRangeIndex(rowkey); index = getNextRangeIndex(rowkey);
if (ranges.isIterationComplete(index)) { if (index >= rangeList.size()) {
done = true; done = true;
currentReturnCode = ReturnCode.NEXT_ROW; currentReturnCode = ReturnCode.NEXT_ROW;
return false; return false;
} }
if(index != ROW_BEFORE_FIRST_RANGE) { if(index != ROW_BEFORE_FIRST_RANGE) {
range = ranges.get(index); range = rangeList.get(index);
} else { } else {
range = ranges.get(0); range = rangeList.get(0);
} }
if (ranges.isExclusive()) { if (EXCLUSIVE) {
ranges.resetExclusive(); EXCLUSIVE = false;
currentReturnCode = ReturnCode.NEXT_ROW; currentReturnCode = ReturnCode.NEXT_ROW;
return false; return false;
} }
if (!ranges.hasFoundFirstRange()) { if (!initialized) {
if(index != ROW_BEFORE_FIRST_RANGE) { if(index != ROW_BEFORE_FIRST_RANGE) {
currentReturnCode = ReturnCode.INCLUDE; currentReturnCode = ReturnCode.INCLUDE;
} else { } else {
currentReturnCode = ReturnCode.SEEK_NEXT_USING_HINT; currentReturnCode = ReturnCode.SEEK_NEXT_USING_HINT;
} }
ranges.setFoundFirstRange(); initialized = true;
} else { } else {
if (range.contains(buffer, offset, length)) { if (range.contains(buffer, offset, length)) {
currentReturnCode = ReturnCode.INCLUDE; currentReturnCode = ReturnCode.INCLUDE;
@ -147,9 +136,7 @@ public class MultiRowRangeFilter extends FilterBase {
@Override @Override
public Cell getNextCellHint(Cell currentKV) { public Cell getNextCellHint(Cell currentKV) {
// skip to the next range's start row // skip to the next range's start row
// #getComparisonData lets us avoid the `if (reversed)` branch return KeyValueUtil.createFirstOnRow(range.startRow);
byte[] comparisonData = range.getComparisonData();
return KeyValueUtil.createFirstOnRow(comparisonData);
} }
/** /**
@ -231,6 +218,37 @@ public class MultiRowRangeFilter extends FilterBase {
return true; return true;
} }
/**
* calculate the position where the row key in the ranges list.
*
* @param rowKey the row key to calculate
* @return index the position of the row key
*/
private int getNextRangeIndex(byte[] rowKey) {
RowRange temp = new RowRange(rowKey, true, null, true);
int index = Collections.binarySearch(rangeList, temp);
if (index < 0) {
int insertionPosition = -index - 1;
// check if the row key in the range before the insertion position
if (insertionPosition != 0 && rangeList.get(insertionPosition - 1).contains(rowKey)) {
return insertionPosition - 1;
}
// check if the row key is before the first range
if (insertionPosition == 0 && !rangeList.get(insertionPosition).contains(rowKey)) {
return ROW_BEFORE_FIRST_RANGE;
}
if (!initialized) {
initialized = true;
}
return insertionPosition;
}
// the row key equals one of the start keys, and the the range exclude the start key
if(rangeList.get(index).startRowInclusive == false) {
EXCLUSIVE = true;
}
return index;
}
/** /**
* sort the ranges and if the ranges with overlap, then merge them. * sort the ranges and if the ranges with overlap, then merge them.
* *
@ -401,20 +419,22 @@ public class MultiRowRangeFilter extends FilterBase {
throw new IllegalArgumentException(sb.toString()); throw new IllegalArgumentException(sb.toString());
} }
private static abstract class BasicRowRange implements Comparable<BasicRowRange> { @InterfaceAudience.Public
protected byte[] startRow; @InterfaceStability.Evolving
protected boolean startRowInclusive = true; public static class RowRange implements Comparable<RowRange> {
protected byte[] stopRow; private byte[] startRow;
protected boolean stopRowInclusive = false; private boolean startRowInclusive = true;
private byte[] stopRow;
private boolean stopRowInclusive = false;
public BasicRowRange() { public RowRange() {
} }
/** /**
* If the startRow is empty or null, set it to HConstants.EMPTY_BYTE_ARRAY, means begin at the * If the startRow is empty or null, set it to HConstants.EMPTY_BYTE_ARRAY, means begin at the
* start row of the table. If the stopRow is empty or null, set it to * start row of the table. If the stopRow is empty or null, set it to
* HConstants.EMPTY_BYTE_ARRAY, means end of the last row of table. * HConstants.EMPTY_BYTE_ARRAY, means end of the last row of table.
*/ */
public BasicRowRange(String startRow, boolean startRowInclusive, String stopRow, public RowRange(String startRow, boolean startRowInclusive, String stopRow,
boolean stopRowInclusive) { boolean stopRowInclusive) {
this((startRow == null || startRow.isEmpty()) ? HConstants.EMPTY_BYTE_ARRAY : this((startRow == null || startRow.isEmpty()) ? HConstants.EMPTY_BYTE_ARRAY :
Bytes.toBytes(startRow), startRowInclusive, Bytes.toBytes(startRow), startRowInclusive,
@ -422,7 +442,7 @@ public class MultiRowRangeFilter extends FilterBase {
Bytes.toBytes(stopRow), stopRowInclusive); Bytes.toBytes(stopRow), stopRowInclusive);
} }
public BasicRowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow, public RowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow,
boolean stopRowInclusive) { boolean stopRowInclusive) {
this.startRow = (startRow == null) ? HConstants.EMPTY_BYTE_ARRAY : startRow; this.startRow = (startRow == null) ? HConstants.EMPTY_BYTE_ARRAY : startRow;
this.startRowInclusive = startRowInclusive; this.startRowInclusive = startRowInclusive;
@ -480,6 +500,13 @@ public class MultiRowRangeFilter extends FilterBase {
} }
} }
@Override
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_COMPARETO_USE_OBJECT_EQUALS",
justification="This compareTo is not of this Object, but of referenced RowRange")
public int compareTo(RowRange other) {
return Bytes.compareTo(this.startRow, other.startRow);
}
public boolean isValid() { public boolean isValid() {
return Bytes.equals(startRow, HConstants.EMPTY_BYTE_ARRAY) return Bytes.equals(startRow, HConstants.EMPTY_BYTE_ARRAY)
|| Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY) || Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY)
@ -489,13 +516,13 @@ public class MultiRowRangeFilter extends FilterBase {
@Override @Override
public boolean equals(Object obj){ public boolean equals(Object obj){
if (!(obj instanceof BasicRowRange)) { if (!(obj instanceof RowRange)) {
return false; return false;
} }
if (this == obj) { if (this == obj) {
return true; return true;
} }
BasicRowRange rr = (BasicRowRange) obj; RowRange rr = (RowRange) obj;
return Bytes.equals(this.stopRow, rr.getStopRow()) && return Bytes.equals(this.stopRow, rr.getStopRow()) &&
Bytes.equals(this.startRow, this.getStartRow()) && Bytes.equals(this.startRow, this.getStartRow()) &&
this.startRowInclusive == rr.isStartRowInclusive() && this.startRowInclusive == rr.isStartRowInclusive() &&
@ -509,236 +536,6 @@ public class MultiRowRangeFilter extends FilterBase {
this.startRowInclusive, this.startRowInclusive,
this.stopRowInclusive); this.stopRowInclusive);
} }
/**
* Returns the data to be used to compare {@code this} to another object.
*/
public abstract byte[] getComparisonData();
/**
* Returns whether the bounding row used for binary-search is inclusive or not.
*
* For forward scans, we would check the starRow, but we would check the stopRow for
* the reverse scan case.
*/
public abstract boolean isSearchRowInclusive();
@Override
public int compareTo(BasicRowRange other) {
byte[] left;
byte[] right;
if (isAscendingOrder()) {
left = this.getComparisonData();
right = other.getComparisonData();
} else {
left = other.getComparisonData();
right = this.getComparisonData();
}
return Bytes.compareTo(left, right);
}
public abstract boolean isAscendingOrder();
}
/**
* Internal RowRange that reverses the sort-order to handle reverse scans.
*/
@InterfaceAudience.Private
private static class ReversedRowRange extends BasicRowRange {
public ReversedRowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow,
boolean stopRowInclusive) {
super(startRow, startRowInclusive, stopRow, stopRowInclusive);
}
@Override
public byte[] getComparisonData() {
return this.stopRow;
}
@Override
public boolean isSearchRowInclusive() {
return this.stopRowInclusive;
}
@Override
public boolean isAscendingOrder() {
return false;
}
}
@InterfaceAudience.Public
@InterfaceStability.Evolving
public static class RowRange extends BasicRowRange {
public RowRange() {
}
/**
* If the startRow is empty or null, set it to HConstants.EMPTY_BYTE_ARRAY, means begin at the
* start row of the table. If the stopRow is empty or null, set it to
* HConstants.EMPTY_BYTE_ARRAY, means end of the last row of table.
*/
public RowRange(String startRow, boolean startRowInclusive, String stopRow,
boolean stopRowInclusive) {
super(startRow, startRowInclusive, stopRow, stopRowInclusive);
}
public RowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow,
boolean stopRowInclusive) {
super(startRow, startRowInclusive, stopRow, stopRowInclusive);
}
@Override
public byte[] getComparisonData() {
return startRow;
}
@Override
public boolean isSearchRowInclusive() {
return startRowInclusive;
}
@Override
public boolean isAscendingOrder() {
return true;
}
}
/**
* Abstraction over the ranges of rows to return from this filter, regardless of forward or
* reverse scans being used. This Filter can use this class, agnostic of iteration direction,
* as the same algorithm can be applied in both cases.
*/
@InterfaceAudience.Private
private static class RangeIteration {
private boolean exclusive = false;
private boolean initialized = false;
private boolean foundFirstRange = false;
private boolean reversed = false;
private final List<RowRange> sortedAndMergedRanges;
private List<? extends BasicRowRange> ranges;
public RangeIteration(List<RowRange> sortedAndMergedRanges) {
this.sortedAndMergedRanges = sortedAndMergedRanges;
}
void initialize(boolean reversed) {
// Avoid double initialization
assert !this.initialized;
this.reversed = reversed;
if (reversed) {
// If we are doing a reverse scan, we can reverse the ranges (both the elements in
// the list as well as their start/stop key), and use the same algorithm.
this.ranges = flipAndReverseRanges(sortedAndMergedRanges);
} else {
this.ranges = sortedAndMergedRanges;
}
this.initialized = true;
}
/**
* Rebuilds the sorted ranges (by startKey) into an equivalent sorted list of ranges, only by
* stopKey instead. Descending order and the ReversedRowRange compareTo implementation make
* sure that we can use Collections.binarySearch().
*/
static List<ReversedRowRange> flipAndReverseRanges(List<RowRange> ranges) {
List<ReversedRowRange> flippedRanges = new ArrayList<>(ranges.size());
for (int i = ranges.size() - 1; i >= 0; i--) {
RowRange origRange = ranges.get(i);
ReversedRowRange newRowRange = new ReversedRowRange(
origRange.startRow, origRange.startRowInclusive, origRange.stopRow,
origRange.isStopRowInclusive());
flippedRanges.add(newRowRange);
}
return flippedRanges;
}
/**
* Calculates the position where the given rowkey fits in the ranges list.
*
* @param rowKey the row key to calculate
* @return index the position of the row key
*/
public int getNextRangeIndex(byte[] rowKey) {
BasicRowRange temp;
if (reversed) {
temp = new ReversedRowRange(null, true, rowKey, true);
} else {
temp = new RowRange(rowKey, true, null, true);
}
// Because we make sure that `ranges` has the correct natural ordering (given it containing
// RowRange or ReverseRowRange objects). This keeps us from having to have two different
// implementations below.
final int index = Collections.binarySearch(ranges, temp);
if (index < 0) {
int insertionPosition = -index - 1;
// check if the row key in the range before the insertion position
if (insertionPosition != 0 && ranges.get(insertionPosition - 1).contains(rowKey)) {
return insertionPosition - 1;
}
// check if the row key is before the first range
if (insertionPosition == 0 && !ranges.get(insertionPosition).contains(rowKey)) {
return ROW_BEFORE_FIRST_RANGE;
}
if (!foundFirstRange) {
foundFirstRange = true;
}
return insertionPosition;
}
// the row key equals one of the start keys, and the the range exclude the start key
if(ranges.get(index).isSearchRowInclusive() == false) {
exclusive = true;
}
return index;
}
/**
* Sets {@link #foundFirstRange} to {@code true}, indicating that we found a matching row range.
*/
public void setFoundFirstRange() {
this.foundFirstRange = true;
}
/**
* Gets the RowRange at the given offset.
*/
@SuppressWarnings("unchecked")
public <T extends BasicRowRange> T get(int i) {
return (T) ranges.get(i);
}
/**
* Returns true if the first matching row range was found.
*/
public boolean hasFoundFirstRange() {
return foundFirstRange;
}
/**
* Returns true if the current range's key is exclusive
*/
public boolean isExclusive() {
return exclusive;
}
/**
* Resets the exclusive flag.
*/
public void resetExclusive() {
exclusive = false;
}
/**
* Returns true if this class has been initialized by calling {@link #initialize(boolean)}.
*/
public boolean isInitialized() {
return initialized;
}
/**
* Returns true if we exhausted searching all row ranges.
*/
public boolean isIterationComplete(int index) {
return index >= ranges.size();
}
} }
@Override @Override
@ -748,6 +545,6 @@ public class MultiRowRangeFilter extends FilterBase {
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(this.ranges); return Objects.hash(this.rangeList);
} }
} }

View File

@ -486,134 +486,6 @@ public class TestMultiRowRangeFilter {
ht.close(); ht.close();
} }
@Test
public void testReverseMultiRowRangeFilterWithinTable() throws IOException {
tableName = Bytes.toBytes("testReverseMultiRowRangeFilterWithinTable");
HTable ht = TEST_UTIL.createTable(tableName, family);
generateRows(numRows, ht, family, qf, value);
Scan scan = new Scan();
scan.setReversed(true);
List<RowRange> ranges = Arrays.asList(
new RowRange(Bytes.toBytes(20), true, Bytes.toBytes(30), true),
new RowRange(Bytes.toBytes(50), true, Bytes.toBytes(60), true)
);
MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges);
scan.setFilter(filter);
List<Integer> expectedResults = new ArrayList<>();
for (int i = 60; i >= 50; i--) {
expectedResults.add(i);
}
for (int i = 30; i >= 20; i--) {
expectedResults.add(i);
}
List<Cell> results = getResults(ht, scan);
List<Integer> actualResults = new ArrayList<>();
StringBuilder sb = new StringBuilder();
for (Cell result : results) {
int observedValue = Bytes.toInt(
result.getRowArray(), result.getRowOffset(), result.getRowLength());
actualResults.add(observedValue);
if (sb.length() > 0) {
sb.append(", ");
}
sb.append(observedValue);
}
assertEquals("Saw results: " + sb.toString(), 22, results.size());
}
@Test
public void testReverseMultiRowRangeFilterIncludingMaxRow() throws IOException {
tableName = Bytes.toBytes("testReverseMultiRowRangeFilterIncludingMaxRow");
HTable ht = TEST_UTIL.createTable(tableName, family);
for (String rowkey : Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h")) {
byte[] row = Bytes.toBytes(rowkey);
Put p = new Put(row);
p.addColumn(family, qf, value);
ht.put(p);
}
TEST_UTIL.flush();
Scan scan = new Scan();
scan.setReversed(true);
List<RowRange> ranges = Arrays.asList(
new RowRange(Bytes.toBytes("b"), true, Bytes.toBytes("c"), true),
new RowRange(Bytes.toBytes("f"), true, Bytes.toBytes("h"), true)
);
MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges);
scan.setFilter(filter);
List<String> expected = Arrays.asList("h", "g", "f", "c", "b");
List<String> actual = new ArrayList<>();
for (Cell cell : getResults(ht, scan)) {
actual.add(Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
}
assertEquals(expected, actual);
}
@Test
public void testReverseMultiRowRangeFilterIncludingMinRow() throws IOException {
tableName = Bytes.toBytes("testReverseMultiRowRangeFilterIncludingMinRow");
HTable ht = TEST_UTIL.createTable(tableName, family);
for (String rowkey : Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h")) {
byte[] row = Bytes.toBytes(rowkey);
Put p = new Put(row);
p.addColumn(family, qf, value);
ht.put(p);
}
TEST_UTIL.flush();
Scan scan = new Scan();
scan.setReversed(true);
List<RowRange> ranges = Arrays.asList(
new RowRange(Bytes.toBytes("a"), true, Bytes.toBytes("c"), true),
new RowRange(Bytes.toBytes("f"), true, Bytes.toBytes("g"), true)
);
MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges);
scan.setFilter(filter);
List<String> expected = Arrays.asList("g", "f", "c", "b", "a");
List<String> actual = new ArrayList<>();
for (Cell cell : getResults(ht, scan)) {
actual.add(Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
}
assertEquals(expected, actual);
}
@Test
public void testReverseMultiRowRangeFilterIncludingMinAndMaxRow() throws IOException {
tableName = Bytes.toBytes("testReverseMultiRowRangeFilterIncludingMinAndMaxRow");
HTable ht = TEST_UTIL.createTable(tableName, family);
for (String rowkey : Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h")) {
byte[] row = Bytes.toBytes(rowkey);
Put p = new Put(row);
p.addColumn(family, qf, value);
ht.put(p);
}
TEST_UTIL.flush();
Scan scan = new Scan();
scan.setReversed(true);
List<RowRange> ranges = Arrays.asList(
new RowRange(Bytes.toBytes("a"), true, Bytes.toBytes("c"), true),
new RowRange(Bytes.toBytes("f"), true, Bytes.toBytes("h"), true)
);
MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges);
scan.setFilter(filter);
List<String> expected = Arrays.asList("h", "g", "f", "c", "b", "a");
List<String> actual = new ArrayList<>();
for (Cell cell : getResults(ht, scan)) {
actual.add(Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
}
assertEquals(expected, actual);
}
private void generateRows(int numberOfRows, HTable ht, byte[] family, byte[] qf, byte[] value) private void generateRows(int numberOfRows, HTable ht, byte[] family, byte[] qf, byte[] value)
throws IOException { throws IOException {
for (int i = 0; i < numberOfRows; i++) { for (int i = 0; i < numberOfRows; i++) {
@ -646,7 +518,7 @@ public class TestMultiRowRangeFilter {
return kvList; return kvList;
} }
private List<Cell> getResults(HTable ht, Scan scan) throws IOException { private int getResultsSize(HTable ht, Scan scan) throws IOException {
ResultScanner scanner = ht.getScanner(scan); ResultScanner scanner = ht.getScanner(scan);
List<Cell> results = new ArrayList<Cell>(); List<Cell> results = new ArrayList<Cell>();
Result r; Result r;
@ -656,10 +528,6 @@ public class TestMultiRowRangeFilter {
} }
} }
scanner.close(); scanner.close();
return results; return results.size();
}
private int getResultsSize(HTable ht, Scan scan) throws IOException {
return getResults(ht, scan).size();
} }
} }