HBASE-19252 Move the transform logic of FilterList into transformCell() method to avoid extra ref to question cell

This commit is contained in:
huzheng 2017-11-14 15:04:48 +08:00 committed by openinx
parent 5b13b624bb
commit d726492838
5 changed files with 89 additions and 79 deletions

View File

@ -165,25 +165,6 @@ final public class FilterList extends FilterBase {
return filterListBase.transformCell(c); return filterListBase.transformCell(c);
} }
/**
* Internal implementation of {@link #filterCell(Cell)}. Compared to the
* {@link #filterCell(Cell)} method, this method accepts an additional parameter named
* transformedCell. This parameter indicates the initial value of transformed cell before this
* filter operation. <br/>
* For FilterList, we can consider a filter list as a node in a tree. sub-filters of the filter
* list are children of the relative node. The logic of transforming cell of a filter list, well,
* we can consider it as the process of post-order tree traverse. For a node , Before we traverse
* the current child, we should set the traverse result (transformed cell) of previous node(s) as
* the initial value. so the additional currentTransformedCell parameter is needed (HBASE-18879).
* @param c The cell in question.
* @param transformedCell The transformed cell of previous filter(s)
* @return ReturnCode of this filter operation.
* @throws IOException
*/
ReturnCode internalFilterCell(Cell c, Cell transformedCell) throws IOException {
return this.filterListBase.internalFilterCell(c, transformedCell);
}
@Override @Override
@Deprecated @Deprecated
public ReturnCode filterKeyValue(final Cell c) throws IOException { public ReturnCode filterKeyValue(final Cell c) throws IOException {

View File

@ -26,8 +26,6 @@ import java.util.List;
import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
/** /**
@ -38,17 +36,12 @@ import org.apache.yetus.audience.InterfaceAudience;
public abstract class FilterListBase extends FilterBase { public abstract class FilterListBase extends FilterBase {
private static final int MAX_LOG_FILTERS = 5; private static final int MAX_LOG_FILTERS = 5;
protected final ArrayList<Filter> filters; protected final ArrayList<Filter> filters;
/** Reference Cell used by {@link #transformCell(Cell)} for validation purpose. */
protected Cell referenceCell = null;
/** /**
* When filtering a given Cell in {@link #filterCell(Cell)}, this stores the transformed Cell * For each sub-filter in filter list, we save a boolean flag to indicate that whether the return
* to be returned by {@link #transformCell(Cell)}. Individual filters transformation are applied * code of filterCell(c) for sub-filter is INCLUDE* (INCLUDE, INCLUDE_AND_NEXT_COL,
* only when the filter includes the Cell. Transformations are composed in the order specified by * INCLUDE_AND_SEEK_NEXT_ROW) case. if true, we need to transform cell for the sub-filter.
* {@link #filters}.
*/ */
protected Cell transformedCell = null; protected ArrayList<Boolean> subFiltersIncludedCell;
public FilterListBase(List<Filter> filters) { public FilterListBase(List<Filter> filters) {
reversed = checkAndGetReversed(filters, reversed); reversed = checkAndGetReversed(filters, reversed);
@ -90,43 +83,35 @@ public abstract class FilterListBase extends FilterBase {
return reversed ? -1 * cmp : cmp; return reversed ? -1 * cmp : cmp;
} }
/**
* For FilterList, we can consider a filter list as a node in a tree. sub-filters of the filter
* list are children of the relative node. The logic of transforming cell of a filter list, well,
* we can consider it as the process of post-order tree traverse. For a node , before we traverse
* the current child, we should set the traverse result (transformed cell) of previous node(s) as
* the initial value. (HBASE-18879).
* @param c The cell in question.
* @return the transformed cell.
* @throws IOException
*/
@Override @Override
public Cell transformCell(Cell c) throws IOException { public Cell transformCell(Cell c) throws IOException {
if (isEmpty()) { if (isEmpty()) {
return super.transformCell(c); return super.transformCell(c);
} }
if (!CellUtil.equals(c, referenceCell)) { Cell transformed = c;
throw new IllegalStateException( for (int i = 0, n = filters.size(); i < n; i++) {
"Reference Cell: " + this.referenceCell + " does not match: " + c); if (subFiltersIncludedCell.get(i)) {
transformed = filters.get(i).transformCell(transformed);
}
} }
// Copy transformedCell into a new cell and reset transformedCell & referenceCell to null for return transformed;
// Java GC optimization
Cell cell = KeyValueUtil.copyToNewKeyValue(this.transformedCell);
this.transformedCell = null;
this.referenceCell = null;
return cell;
} }
/**
* Internal implementation of {@link #filterCell(Cell)}
* @param c The cell in question.
* @param transformedCell The transformed cell of previous filter(s)
* @return ReturnCode of this filter operation.
* @throws IOException
* @see org.apache.hadoop.hbase.filter.FilterList#internalFilterCell(Cell, Cell)
*/
abstract ReturnCode internalFilterCell(Cell c, Cell transformedCell) throws IOException;
@Override @Override
public ReturnCode filterKeyValue(final Cell c) throws IOException { public ReturnCode filterKeyValue(final Cell c) throws IOException {
return filterCell(c); return filterCell(c);
} }
@Override
public ReturnCode filterCell(final Cell c) throws IOException {
return internalFilterCell(c, c);
}
/** /**
* Filters that never filter by modifying the returned List of Cells can inherit this * Filters that never filter by modifying the returned List of Cells can inherit this
* implementation that does nothing. {@inheritDoc} * implementation that does nothing. {@inheritDoc}

View File

@ -24,6 +24,7 @@ import org.apache.yetus.audience.InterfaceAudience;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.List; import java.util.List;
/** /**
@ -37,6 +38,10 @@ public class FilterListWithAND extends FilterListBase {
public FilterListWithAND(List<Filter> filters) { public FilterListWithAND(List<Filter> filters) {
super(filters); super(filters);
// For FilterList with AND, when call FL's transformCell(), we should transform cell for all
// sub-filters (because all sub-filters return INCLUDE*). So here, fill this array with true. we
// keep this in FilterListWithAND for abstracting the transformCell() in FilterListBase.
subFiltersIncludedCell = new ArrayList<>(Collections.nCopies(filters.size(), true));
} }
@Override @Override
@ -45,6 +50,7 @@ public class FilterListWithAND extends FilterListBase {
throw new IllegalArgumentException("Filters in the list must have the same reversed flag"); throw new IllegalArgumentException("Filters in the list must have the same reversed flag");
} }
this.filters.addAll(filters); this.filters.addAll(filters);
this.subFiltersIncludedCell.addAll(Collections.nCopies(filters.size(), true));
} }
@Override @Override
@ -149,13 +155,11 @@ public class FilterListWithAND extends FilterListBase {
} }
@Override @Override
ReturnCode internalFilterCell(Cell c, Cell transformedCell) throws IOException { public ReturnCode filterCell(Cell c) throws IOException {
if (isEmpty()) { if (isEmpty()) {
return ReturnCode.INCLUDE; return ReturnCode.INCLUDE;
} }
ReturnCode rc = ReturnCode.INCLUDE; ReturnCode rc = ReturnCode.INCLUDE;
Cell transformed = transformedCell;
this.referenceCell = c;
this.seekHintFilters.clear(); this.seekHintFilters.clear();
for (int i = 0, n = filters.size(); i < n; i++) { for (int i = 0, n = filters.size(); i < n; i++) {
Filter filter = filters.get(i); Filter filter = filters.get(i);
@ -163,23 +167,13 @@ public class FilterListWithAND extends FilterListBase {
return ReturnCode.NEXT_ROW; return ReturnCode.NEXT_ROW;
} }
ReturnCode localRC; ReturnCode localRC;
if (filter instanceof FilterList) { localRC = filter.filterCell(c);
localRC = ((FilterList) filter).internalFilterCell(c, transformed);
} else {
localRC = filter.filterCell(c);
}
rc = mergeReturnCode(rc, localRC); rc = mergeReturnCode(rc, localRC);
// For INCLUDE* case, we need to update the transformed cell.
if (isInReturnCodes(localRC, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL,
ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) {
transformed = filter.transformCell(transformed);
}
if (localRC == ReturnCode.SEEK_NEXT_USING_HINT) { if (localRC == ReturnCode.SEEK_NEXT_USING_HINT) {
seekHintFilters.add(filter); seekHintFilters.add(filter);
} }
} }
this.transformedCell = transformed;
if (!seekHintFilters.isEmpty()) { if (!seekHintFilters.isEmpty()) {
return ReturnCode.SEEK_NEXT_USING_HINT; return ReturnCode.SEEK_NEXT_USING_HINT;
} }

View File

@ -48,6 +48,7 @@ public class FilterListWithOR extends FilterListBase {
super(filters); super(filters);
prevFilterRCList = new ArrayList<>(Collections.nCopies(filters.size(), null)); prevFilterRCList = new ArrayList<>(Collections.nCopies(filters.size(), null));
prevCellList = new ArrayList<>(Collections.nCopies(filters.size(), null)); prevCellList = new ArrayList<>(Collections.nCopies(filters.size(), null));
subFiltersIncludedCell = new ArrayList<>(Collections.nCopies(filters.size(), false));
} }
@Override @Override
@ -56,6 +57,7 @@ public class FilterListWithOR extends FilterListBase {
throw new IllegalArgumentException("Filters in the list must have the same reversed flag"); throw new IllegalArgumentException("Filters in the list must have the same reversed flag");
} }
this.filters.addAll(filters); this.filters.addAll(filters);
this.subFiltersIncludedCell.addAll(Collections.nCopies(filters.size(), false));
this.prevFilterRCList.addAll(Collections.nCopies(filters.size(), null)); this.prevFilterRCList.addAll(Collections.nCopies(filters.size(), null));
this.prevCellList.addAll(Collections.nCopies(filters.size(), null)); this.prevCellList.addAll(Collections.nCopies(filters.size(), null));
} }
@ -246,16 +248,15 @@ public class FilterListWithOR extends FilterListBase {
} }
@Override @Override
ReturnCode internalFilterCell(Cell c, Cell transformCell) throws IOException { public ReturnCode filterCell(Cell c) throws IOException {
if (isEmpty()) { if (isEmpty()) {
return ReturnCode.INCLUDE; return ReturnCode.INCLUDE;
} }
ReturnCode rc = null; ReturnCode rc = null;
boolean everyFilterReturnHint = true; boolean everyFilterReturnHint = true;
Cell transformed = transformCell;
this.referenceCell = c;
for (int i = 0, n = filters.size(); i < n; i++) { for (int i = 0, n = filters.size(); i < n; i++) {
Filter filter = filters.get(i); Filter filter = filters.get(i);
subFiltersIncludedCell.set(i, false);
Cell prevCell = this.prevCellList.get(i); Cell prevCell = this.prevCellList.get(i);
ReturnCode prevCode = this.prevFilterRCList.get(i); ReturnCode prevCode = this.prevFilterRCList.get(i);
@ -264,12 +265,7 @@ public class FilterListWithOR extends FilterListBase {
continue; continue;
} }
ReturnCode localRC; ReturnCode localRC = filter.filterCell(c);
if (filter instanceof FilterList) {
localRC = ((FilterList) filter).internalFilterCell(c, transformed);
} else {
localRC = filter.filterCell(c);
}
// Update previous return code and previous cell for filter[i]. // Update previous return code and previous cell for filter[i].
updatePrevFilterRCList(i, localRC); updatePrevFilterRCList(i, localRC);
@ -284,11 +280,10 @@ public class FilterListWithOR extends FilterListBase {
// For INCLUDE* case, we need to update the transformed cell. // For INCLUDE* case, we need to update the transformed cell.
if (isInReturnCodes(localRC, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL, if (isInReturnCodes(localRC, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL,
ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) { ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) {
transformed = filter.transformCell(transformed); subFiltersIncludedCell.set(i, true);
} }
} }
this.transformedCell = transformed;
if (everyFilterReturnHint) { if (everyFilterReturnHint) {
return ReturnCode.SEEK_NEXT_USING_HINT; return ReturnCode.SEEK_NEXT_USING_HINT;
} else if (rc == null) { } else if (rc == null) {
@ -303,6 +298,7 @@ public class FilterListWithOR extends FilterListBase {
public void reset() throws IOException { public void reset() throws IOException {
for (int i = 0, n = filters.size(); i < n; i++) { for (int i = 0, n = filters.size(); i < n; i++) {
filters.get(i).reset(); filters.get(i).reset();
subFiltersIncludedCell.set(i, false);
prevFilterRCList.set(i, null); prevFilterRCList.set(i, null);
prevCellList.set(i, null); prevCellList.set(i, null);
} }

View File

@ -960,5 +960,59 @@ public class TestFilterList {
filter.filterCell(kv2); filter.filterCell(kv2);
assertEquals(2, mockNextRowFilter.getHitCount()); assertEquals(2, mockNextRowFilter.getHitCount());
} }
private static class TransformFilter extends FilterBase {
private ReturnCode targetRetCode;
private boolean transformed = false;
public TransformFilter(ReturnCode targetRetCode) {
this.targetRetCode = targetRetCode;
}
@Override
public ReturnCode filterCell(final Cell v) throws IOException {
return targetRetCode;
}
@Override
public Cell transformCell(Cell c) throws IOException {
transformed = true;
return super.transformCell(c);
}
public boolean getTransformed() {
return this.transformed;
}
}
@Test
public void testTransformCell() throws IOException {
KeyValue kv =
new KeyValue(Bytes.toBytes("row"), Bytes.toBytes("cf"), Bytes.toBytes("column1"), 1,
Bytes.toBytes("value"));
// case MUST_PASS_ONE
TransformFilter filter1 = new TransformFilter(ReturnCode.INCLUDE);
TransformFilter filter2 = new TransformFilter(ReturnCode.NEXT_ROW);
TransformFilter filter3 = new TransformFilter(ReturnCode.SEEK_NEXT_USING_HINT);
FilterList filterList = new FilterList(Operator.MUST_PASS_ONE, filter1, filter2, filter3);
Assert.assertEquals(ReturnCode.INCLUDE, filterList.filterCell(kv));
Assert.assertEquals(kv, filterList.transformCell(kv));
Assert.assertEquals(true, filter1.getTransformed());
Assert.assertEquals(false, filter2.getTransformed());
Assert.assertEquals(false, filter3.getTransformed());
// case MUST_PASS_ALL
filter1 = new TransformFilter(ReturnCode.INCLUDE);
filter2 = new TransformFilter(ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW);
filter3 = new TransformFilter(ReturnCode.INCLUDE_AND_NEXT_COL);
filterList = new FilterList(Operator.MUST_PASS_ALL, filter1, filter2, filter3);
Assert.assertEquals(ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW, filterList.filterCell(kv));
Assert.assertEquals(kv, filterList.transformCell(kv));
Assert.assertEquals(true, filter1.getTransformed());
Assert.assertEquals(true, filter2.getTransformed());
Assert.assertEquals(true, filter3.getTransformed());
}
} }