HBASE-21620 Problem in scan query when using more than one column prefix filter in some cases
Signed-off-by: Guanghao Zhang <zghao@apache.org> Signed-off-by: Michael Stack <stack@apache.org> Signed-off-by: Allan Yang <allan163@apache.org>
This commit is contained in:
parent
8f39e7633e
commit
c5810f19a6
|
@ -82,30 +82,40 @@ public class FilterListWithOR extends FilterListBase {
|
||||||
* next family for RegionScanner, INCLUDE_AND_NEXT_ROW is the same. so we should pass current cell
|
* next family for RegionScanner, INCLUDE_AND_NEXT_ROW is the same. so we should pass current cell
|
||||||
* to the filter, if row mismatch or row match but column family mismatch. (HBASE-18368)
|
* to the filter, if row mismatch or row match but column family mismatch. (HBASE-18368)
|
||||||
* @see org.apache.hadoop.hbase.filter.Filter.ReturnCode
|
* @see org.apache.hadoop.hbase.filter.Filter.ReturnCode
|
||||||
|
* @param subFilter which sub-filter to calculate the return code by using previous cell and
|
||||||
|
* previous return code.
|
||||||
|
* @param prevCell the previous cell passed to given sub-filter.
|
||||||
|
* @param currentCell the current cell which will pass to given sub-filter.
|
||||||
|
* @param prevCode the previous return code for given sub-filter.
|
||||||
|
* @return return code calculated by using previous cell and previous return code. null means can
|
||||||
|
* not decide which return code should return, so we will pass the currentCell to
|
||||||
|
* subFilter for getting currentCell's return code, and it won't impact the sub-filter's
|
||||||
|
* internal states.
|
||||||
*/
|
*/
|
||||||
private boolean shouldPassCurrentCellToFilter(Cell prevCell, Cell currentCell,
|
private ReturnCode calculateReturnCodeByPrevCellAndRC(Filter subFilter, Cell currentCell,
|
||||||
ReturnCode prevCode) throws IOException {
|
Cell prevCell, ReturnCode prevCode) throws IOException {
|
||||||
if (prevCell == null || prevCode == null) {
|
if (prevCell == null || prevCode == null) {
|
||||||
return true;
|
return null;
|
||||||
}
|
}
|
||||||
switch (prevCode) {
|
switch (prevCode) {
|
||||||
case INCLUDE:
|
case INCLUDE:
|
||||||
case SKIP:
|
case SKIP:
|
||||||
return true;
|
return null;
|
||||||
case SEEK_NEXT_USING_HINT:
|
case SEEK_NEXT_USING_HINT:
|
||||||
Cell nextHintCell = getNextCellHint(prevCell);
|
Cell nextHintCell = subFilter.getNextCellHint(prevCell);
|
||||||
return nextHintCell == null || this.compareCell(currentCell, nextHintCell) >= 0;
|
return nextHintCell != null && compareCell(currentCell, nextHintCell) < 0
|
||||||
|
? ReturnCode.SEEK_NEXT_USING_HINT : null;
|
||||||
case NEXT_COL:
|
case NEXT_COL:
|
||||||
case INCLUDE_AND_NEXT_COL:
|
case INCLUDE_AND_NEXT_COL:
|
||||||
// Once row changed, reset() will clear prevCells, so we need not to compare their rows
|
// Once row changed, reset() will clear prevCells, so we need not to compare their rows
|
||||||
// because rows are the same here.
|
// because rows are the same here.
|
||||||
return !CellUtil.matchingColumn(prevCell, currentCell);
|
return CellUtil.matchingColumn(prevCell, currentCell) ? ReturnCode.NEXT_COL : null;
|
||||||
case NEXT_ROW:
|
case NEXT_ROW:
|
||||||
case INCLUDE_AND_SEEK_NEXT_ROW:
|
case INCLUDE_AND_SEEK_NEXT_ROW:
|
||||||
// As described above, rows are definitely the same, so we only compare the family.
|
// As described above, rows are definitely the same, so we only compare the family.
|
||||||
return !CellUtil.matchingFamily(prevCell, currentCell);
|
return CellUtil.matchingFamily(prevCell, currentCell) ? ReturnCode.NEXT_ROW : null;
|
||||||
default:
|
default:
|
||||||
throw new IllegalStateException("Received code is not valid.");
|
throw new IllegalStateException("Received code is not valid.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -239,7 +249,7 @@ public class FilterListWithOR extends FilterListBase {
|
||||||
private void updatePrevCellList(int index, Cell currentCell, ReturnCode currentRC) {
|
private void updatePrevCellList(int index, Cell currentCell, ReturnCode currentRC) {
|
||||||
if (currentCell == null || currentRC == ReturnCode.INCLUDE || currentRC == ReturnCode.SKIP) {
|
if (currentCell == null || currentRC == ReturnCode.INCLUDE || currentRC == ReturnCode.SKIP) {
|
||||||
// If previous return code is INCLUDE or SKIP, we should always pass the next cell to the
|
// If previous return code is INCLUDE or SKIP, we should always pass the next cell to the
|
||||||
// corresponding sub-filter(need not test shouldPassCurrentCellToFilter() method), So we
|
// corresponding sub-filter(need not test calculateReturnCodeByPrevCellAndRC() method), So we
|
||||||
// need not save current cell to prevCellList for saving heap memory.
|
// need not save current cell to prevCellList for saving heap memory.
|
||||||
prevCellList.set(index, null);
|
prevCellList.set(index, null);
|
||||||
} else {
|
} else {
|
||||||
|
@ -253,28 +263,27 @@ public class FilterListWithOR extends FilterListBase {
|
||||||
return ReturnCode.INCLUDE;
|
return ReturnCode.INCLUDE;
|
||||||
}
|
}
|
||||||
ReturnCode rc = null;
|
ReturnCode rc = null;
|
||||||
boolean everyFilterReturnHint = true;
|
|
||||||
for (int i = 0, n = filters.size(); i < n; i++) {
|
for (int i = 0, n = filters.size(); i < n; i++) {
|
||||||
Filter filter = filters.get(i);
|
Filter filter = filters.get(i);
|
||||||
subFiltersIncludedCell.set(i, false);
|
subFiltersIncludedCell.set(i, false);
|
||||||
|
|
||||||
Cell prevCell = this.prevCellList.get(i);
|
Cell prevCell = this.prevCellList.get(i);
|
||||||
ReturnCode prevCode = this.prevFilterRCList.get(i);
|
ReturnCode prevCode = this.prevFilterRCList.get(i);
|
||||||
if (filter.filterAllRemaining() || !shouldPassCurrentCellToFilter(prevCell, c, prevCode)) {
|
if (filter.filterAllRemaining()) {
|
||||||
everyFilterReturnHint = false;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
ReturnCode localRC = calculateReturnCodeByPrevCellAndRC(filter, c, prevCell, prevCode);
|
||||||
ReturnCode localRC = filter.filterCell(c);
|
if (localRC == null) {
|
||||||
|
// Can not get return code based on previous cell and previous return code. In other words,
|
||||||
|
// we should pass the current cell to this sub-filter to get the return code, and it won't
|
||||||
|
// impact the sub-filter's internal state.
|
||||||
|
localRC = filter.filterCell(c);
|
||||||
|
}
|
||||||
|
|
||||||
// Update previous return code and previous cell for filter[i].
|
// Update previous return code and previous cell for filter[i].
|
||||||
updatePrevFilterRCList(i, localRC);
|
updatePrevFilterRCList(i, localRC);
|
||||||
updatePrevCellList(i, c, localRC);
|
updatePrevCellList(i, c, localRC);
|
||||||
|
|
||||||
if (localRC != ReturnCode.SEEK_NEXT_USING_HINT) {
|
|
||||||
everyFilterReturnHint = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
rc = mergeReturnCode(rc, localRC);
|
rc = mergeReturnCode(rc, localRC);
|
||||||
|
|
||||||
// For INCLUDE* case, we need to update the transformed cell.
|
// For INCLUDE* case, we need to update the transformed cell.
|
||||||
|
@ -283,15 +292,9 @@ public class FilterListWithOR extends FilterListBase {
|
||||||
subFiltersIncludedCell.set(i, true);
|
subFiltersIncludedCell.set(i, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Each sub-filter in filter list got true for filterAllRemaining(), if rc is null, so we should
|
||||||
if (everyFilterReturnHint) {
|
// return SKIP.
|
||||||
return ReturnCode.SEEK_NEXT_USING_HINT;
|
return rc == null ? ReturnCode.SKIP : rc;
|
||||||
} else if (rc == null) {
|
|
||||||
// Each sub-filter in filter list got true for filterAllRemaining().
|
|
||||||
return ReturnCode.SKIP;
|
|
||||||
} else {
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -690,7 +690,7 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
|
||||||
|
|
||||||
case SEEK_NEXT_USING_HINT:
|
case SEEK_NEXT_USING_HINT:
|
||||||
Cell nextKV = matcher.getNextKeyHint(cell);
|
Cell nextKV = matcher.getNextKeyHint(cell);
|
||||||
if (nextKV != null) {
|
if (nextKV != null && comparator.compare(nextKV, cell) > 0) {
|
||||||
seekAsDirection(nextKV);
|
seekAsDirection(nextKV);
|
||||||
NextState stateAfterSeekByHint = needToReturn(outResult);
|
NextState stateAfterSeekByHint = needToReturn(outResult);
|
||||||
if (stateAfterSeekByHint != null) {
|
if (stateAfterSeekByHint != null) {
|
||||||
|
|
|
@ -43,6 +43,7 @@ import org.junit.Assert;
|
||||||
import org.junit.ClassRule;
|
import org.junit.ClassRule;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.experimental.categories.Category;
|
import org.junit.experimental.categories.Category;
|
||||||
|
import org.mockito.Mockito;
|
||||||
|
|
||||||
import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
|
import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
|
||||||
|
|
||||||
|
@ -602,15 +603,15 @@ public class TestFilterList {
|
||||||
3, Bytes.toBytes("value"));
|
3, Bytes.toBytes("value"));
|
||||||
|
|
||||||
assertEquals(ReturnCode.INCLUDE_AND_NEXT_COL, filterList01.filterCell(kv1));
|
assertEquals(ReturnCode.INCLUDE_AND_NEXT_COL, filterList01.filterCell(kv1));
|
||||||
assertEquals(ReturnCode.SKIP, filterList01.filterCell(kv2));
|
assertEquals(ReturnCode.NEXT_COL, filterList01.filterCell(kv2));
|
||||||
assertEquals(ReturnCode.SKIP, filterList01.filterCell(kv3));
|
assertEquals(ReturnCode.NEXT_COL, filterList01.filterCell(kv3));
|
||||||
|
|
||||||
FilterList filterList11 =
|
FilterList filterList11 =
|
||||||
new FilterList(Operator.MUST_PASS_ONE, new ColumnPaginationFilter(1, 1));
|
new FilterList(Operator.MUST_PASS_ONE, new ColumnPaginationFilter(1, 1));
|
||||||
|
|
||||||
assertEquals(ReturnCode.NEXT_COL, filterList11.filterCell(kv1));
|
assertEquals(ReturnCode.NEXT_COL, filterList11.filterCell(kv1));
|
||||||
assertEquals(ReturnCode.SKIP, filterList11.filterCell(kv2));
|
assertEquals(ReturnCode.NEXT_COL, filterList11.filterCell(kv2));
|
||||||
assertEquals(ReturnCode.SKIP, filterList11.filterCell(kv3));
|
assertEquals(ReturnCode.NEXT_COL, filterList11.filterCell(kv3));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -629,9 +630,9 @@ public class TestFilterList {
|
||||||
Bytes.toBytes("value"));
|
Bytes.toBytes("value"));
|
||||||
|
|
||||||
assertEquals(ReturnCode.SEEK_NEXT_USING_HINT, filterList.filterCell(kv1));
|
assertEquals(ReturnCode.SEEK_NEXT_USING_HINT, filterList.filterCell(kv1));
|
||||||
assertEquals(ReturnCode.SKIP, filterList.filterCell(kv2));
|
assertEquals(ReturnCode.SEEK_NEXT_USING_HINT, filterList.filterCell(kv2));
|
||||||
assertEquals(ReturnCode.INCLUDE_AND_NEXT_COL, filterList.filterCell(kv3));
|
assertEquals(ReturnCode.INCLUDE_AND_NEXT_COL, filterList.filterCell(kv3));
|
||||||
assertEquals(ReturnCode.SKIP, filterList.filterCell(kv4));
|
assertEquals(ReturnCode.NEXT_COL, filterList.filterCell(kv4));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class MockFilter extends FilterBase {
|
private static class MockFilter extends FilterBase {
|
||||||
|
@ -1019,5 +1020,54 @@ public class TestFilterList {
|
||||||
Assert.assertEquals(true, filter2.getTransformed());
|
Assert.assertEquals(true, filter2.getTransformed());
|
||||||
Assert.assertEquals(true, filter3.getTransformed());
|
Assert.assertEquals(true, filter3.getTransformed());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFilterListWithORWhenPassingCellMismatchPreviousRC() throws IOException {
|
||||||
|
// Mainly test FilterListWithOR#calculateReturnCodeByPrevCellAndRC method with two sub-filters.
|
||||||
|
KeyValue kv1 = new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("fam"), Bytes.toBytes("a"),
|
||||||
|
100, Bytes.toBytes("value"));
|
||||||
|
KeyValue kv2 = new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("fam"), Bytes.toBytes("a"), 99,
|
||||||
|
Bytes.toBytes("value"));
|
||||||
|
KeyValue kv3 = new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("fam"), Bytes.toBytes("b"), 1,
|
||||||
|
Bytes.toBytes("value"));
|
||||||
|
KeyValue kv4 = new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("fan"), Bytes.toBytes("a"), 1,
|
||||||
|
Bytes.toBytes("value"));
|
||||||
|
Filter subFilter1 = Mockito.mock(FilterBase.class);
|
||||||
|
Mockito.when(subFilter1.filterCell(kv1)).thenReturn(ReturnCode.INCLUDE_AND_NEXT_COL);
|
||||||
|
Mockito.when(subFilter1.filterCell(kv2)).thenReturn(ReturnCode.NEXT_COL);
|
||||||
|
Mockito.when(subFilter1.filterCell(kv3)).thenReturn(ReturnCode.INCLUDE_AND_NEXT_COL);
|
||||||
|
Mockito.when(subFilter1.filterCell(kv4)).thenReturn(ReturnCode.INCLUDE_AND_NEXT_COL);
|
||||||
|
|
||||||
|
Filter subFilter2 = Mockito.mock(FilterBase.class);
|
||||||
|
Mockito.when(subFilter2.filterCell(kv1)).thenReturn(ReturnCode.SKIP);
|
||||||
|
Mockito.when(subFilter2.filterCell(kv2)).thenReturn(ReturnCode.NEXT_ROW);
|
||||||
|
Mockito.when(subFilter2.filterCell(kv3)).thenReturn(ReturnCode.NEXT_ROW);
|
||||||
|
Mockito.when(subFilter2.filterCell(kv4)).thenReturn(ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW);
|
||||||
|
|
||||||
|
Filter filterList = new FilterList(Operator.MUST_PASS_ONE, subFilter1, subFilter2);
|
||||||
|
Assert.assertEquals(ReturnCode.INCLUDE, filterList.filterCell(kv1));
|
||||||
|
Assert.assertEquals(ReturnCode.NEXT_COL, filterList.filterCell(kv2));
|
||||||
|
Assert.assertEquals(ReturnCode.INCLUDE_AND_NEXT_COL, filterList.filterCell(kv3));
|
||||||
|
Assert.assertEquals(ReturnCode.INCLUDE_AND_NEXT_COL, filterList.filterCell(kv4));
|
||||||
|
|
||||||
|
// One sub-filter will filterAllRemaining but other sub-filter will return SEEK_HINT
|
||||||
|
subFilter1 = Mockito.mock(FilterBase.class);
|
||||||
|
Mockito.when(subFilter1.filterAllRemaining()).thenReturn(true);
|
||||||
|
Mockito.when(subFilter1.filterCell(kv1)).thenReturn(ReturnCode.NEXT_ROW);
|
||||||
|
|
||||||
|
subFilter2 = Mockito.mock(FilterBase.class);
|
||||||
|
Mockito.when(subFilter2.filterCell(kv1)).thenReturn(ReturnCode.SEEK_NEXT_USING_HINT);
|
||||||
|
filterList = new FilterList(Operator.MUST_PASS_ONE, subFilter1, subFilter2);
|
||||||
|
Assert.assertEquals(ReturnCode.SEEK_NEXT_USING_HINT, filterList.filterCell(kv1));
|
||||||
|
|
||||||
|
// Two sub-filter returns SEEK_NEXT_USING_HINT, then we should return SEEK_NEXT_USING_HINT.
|
||||||
|
subFilter1 = Mockito.mock(FilterBase.class);
|
||||||
|
Mockito.when(subFilter1.filterCell(kv1)).thenReturn(ReturnCode.SEEK_NEXT_USING_HINT);
|
||||||
|
|
||||||
|
subFilter2 = Mockito.mock(FilterBase.class);
|
||||||
|
Mockito.when(subFilter2.filterCell(kv1)).thenReturn(ReturnCode.SEEK_NEXT_USING_HINT);
|
||||||
|
filterList = new FilterList(Operator.MUST_PASS_ONE, subFilter1, subFilter2);
|
||||||
|
Assert.assertEquals(ReturnCode.SEEK_NEXT_USING_HINT, filterList.filterCell(kv1));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.client.*;
|
import org.apache.hadoop.hbase.client.*;
|
||||||
|
import org.apache.hadoop.hbase.filter.FilterList.Operator;
|
||||||
import org.apache.hadoop.hbase.testclassification.FilterTests;
|
import org.apache.hadoop.hbase.testclassification.FilterTests;
|
||||||
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
@ -28,7 +29,6 @@ import org.junit.AfterClass;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.ClassRule;
|
import org.junit.ClassRule;
|
||||||
import org.junit.Ignore;
|
|
||||||
import org.junit.Rule;
|
import org.junit.Rule;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.experimental.categories.Category;
|
import org.junit.experimental.categories.Category;
|
||||||
|
@ -90,4 +90,52 @@ public class TestFilterListOnMini {
|
||||||
Assert.assertEquals(2, rr.size());
|
Assert.assertEquals(2, rr.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test case for HBASE-21620
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testColumnPrefixFilterConcatWithOR() throws Exception {
|
||||||
|
TableName tn = TableName.valueOf(name.getMethodName());
|
||||||
|
byte[] cf1 = Bytes.toBytes("f1");
|
||||||
|
byte[] row = Bytes.toBytes("row");
|
||||||
|
byte[] value = Bytes.toBytes("value");
|
||||||
|
String[] columns = new String[]{
|
||||||
|
"1544768273917010001_lt",
|
||||||
|
"1544768273917010001_w_1",
|
||||||
|
"1544768723910010001_ca_1",
|
||||||
|
"1544768723910010001_lt",
|
||||||
|
"1544768723910010001_ut_1",
|
||||||
|
"1544768723910010001_w_5",
|
||||||
|
"1544769779710010001_lt",
|
||||||
|
"1544769779710010001_w_5",
|
||||||
|
"1544769883529010001_lt",
|
||||||
|
"1544769883529010001_w_5",
|
||||||
|
"1544769915805010001_lt",
|
||||||
|
"1544769915805010001_w_5",
|
||||||
|
"1544779883529010001_lt",
|
||||||
|
"1544770422942010001_lt",
|
||||||
|
"1544770422942010001_w_5"
|
||||||
|
};
|
||||||
|
Table table = TEST_UTIL.createTable(tn, cf1);
|
||||||
|
for (int i = 0; i < columns.length; i++) {
|
||||||
|
Put put = new Put(row).addColumn(cf1, Bytes.toBytes(columns[i]), value);
|
||||||
|
table.put(put);
|
||||||
|
}
|
||||||
|
Scan scan = new Scan();
|
||||||
|
scan.withStartRow(row).withStopRow(row, true)
|
||||||
|
.setFilter(new FilterList(Operator.MUST_PASS_ONE,
|
||||||
|
new ColumnPrefixFilter(Bytes.toBytes("1544770422942010001_")),
|
||||||
|
new ColumnPrefixFilter(Bytes.toBytes("1544769883529010001_"))));
|
||||||
|
ResultScanner scanner = table.getScanner(scan);
|
||||||
|
Result result;
|
||||||
|
int resultCount = 0;
|
||||||
|
int cellCount = 0;
|
||||||
|
while ((result = scanner.next()) != null) {
|
||||||
|
cellCount += result.listCells().size();
|
||||||
|
resultCount++;
|
||||||
|
}
|
||||||
|
Assert.assertEquals(resultCount, 1);
|
||||||
|
Assert.assertEquals(cellCount, 4);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue