HBASE-8847 Filter.transform() always applies unconditionally, even when combined in a FilterList (Christophe Taton)
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1499851 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
acc6dee508
commit
6ba235d1cd
|
@ -29,17 +29,19 @@ import org.apache.hadoop.hbase.exceptions.DeserializationException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interface for row and column filters directly applied within the regionserver.
|
* Interface for row and column filters directly applied within the regionserver.
|
||||||
|
*
|
||||||
* A filter can expect the following call sequence:
|
* A filter can expect the following call sequence:
|
||||||
*<ul>
|
* <ul>
|
||||||
* <li>{@link #reset()}</li>
|
* <li> {@link #reset()} : reset the filter state before filtering a new row. </li>
|
||||||
* <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
|
* <li> {@link #filterAllRemaining()}: true means row scan is over; false means keep going. </li>
|
||||||
* <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
|
* <li> {@link #filterRowKey(byte[],int,int)}: true means drop this row; false means include.</li>
|
||||||
* if false, we will also call</li>
|
* <li> {@link #filterKeyValue(KeyValue)}: decides whether to include or exclude this KeyValue.
|
||||||
* <li>{@link #filterKeyValue(KeyValue)} -> true to drop this key/value</li>
|
* See {@link ReturnCode}. </li>
|
||||||
* <li>{@link #filterRow(List)} -> allows directmodification of the final list to be submitted
|
* <li> {@link #transform(KeyValue)}: if the KeyValue is included, let the filter transform the
|
||||||
* <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
|
* KeyValue. </li>
|
||||||
* filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
|
* <li> {@link #filterRow(List)}: allows direct modification of the final list to be submitted
|
||||||
* </li>
|
* <li> {@link #filterRow()}: last chance to drop entire row based on the sequence of
|
||||||
|
* filter calls. Eg: filter a row if it doesn't contain a specified column. </li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* Filter instances are created one per region/scan. This abstract class replaces
|
* Filter instances are created one per region/scan. This abstract class replaces
|
||||||
|
|
|
@ -35,9 +35,18 @@ import com.google.protobuf.InvalidProtocolBufferException;
|
||||||
/**
|
/**
|
||||||
* Implementation of {@link Filter} that represents an ordered List of Filters
|
* Implementation of {@link Filter} that represents an ordered List of Filters
|
||||||
* which will be evaluated with a specified boolean operator {@link Operator#MUST_PASS_ALL}
|
* which will be evaluated with a specified boolean operator {@link Operator#MUST_PASS_ALL}
|
||||||
* (<code>!AND</code>) or {@link Operator#MUST_PASS_ONE} (<code>!OR</code>).
|
* (<code>AND</code>) or {@link Operator#MUST_PASS_ONE} (<code>OR</code>).
|
||||||
* Since you can use Filter Lists as children of Filter Lists, you can create a
|
* Since you can use Filter Lists as children of Filter Lists, you can create a
|
||||||
* hierarchy of filters to be evaluated.
|
* hierarchy of filters to be evaluated.
|
||||||
|
*
|
||||||
|
* <br/>
|
||||||
|
* {@link Operator#MUST_PASS_ALL} evaluates lazily: evaluation stops as soon as one filter does
|
||||||
|
* not include the KeyValue.
|
||||||
|
*
|
||||||
|
* <br/>
|
||||||
|
* {@link Operator#MUST_PASS_ONE} evaluates non-lazily: all filters are always evaluated.
|
||||||
|
*
|
||||||
|
* <br/>
|
||||||
* Defaults to {@link Operator#MUST_PASS_ALL}.
|
* Defaults to {@link Operator#MUST_PASS_ALL}.
|
||||||
* <p>TODO: Fix creation of Configuration on serialization and deserialization.
|
* <p>TODO: Fix creation of Configuration on serialization and deserialization.
|
||||||
*/
|
*/
|
||||||
|
@ -56,6 +65,18 @@ public class FilterList extends Filter {
|
||||||
private Operator operator = Operator.MUST_PASS_ALL;
|
private Operator operator = Operator.MUST_PASS_ALL;
|
||||||
private List<Filter> filters = new ArrayList<Filter>();
|
private List<Filter> filters = new ArrayList<Filter>();
|
||||||
|
|
||||||
|
/** Reference KeyValue used by {@link #transform(KeyValue)} for validation purpose. */
|
||||||
|
private KeyValue referenceKV = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* When filtering a given KeyValue in {@link #filterKeyValue(KeyValue)},
|
||||||
|
* this stores the transformed KeyValue to be returned by {@link #transform(KeyValue)}.
|
||||||
|
*
|
||||||
|
* Individual filters transformation are applied only when the filter includes the KeyValue.
|
||||||
|
* Transformations are composed in the order specified by {@link #filters}.
|
||||||
|
*/
|
||||||
|
private KeyValue transformedKV = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor that takes a set of {@link Filter}s. The default operator
|
* Constructor that takes a set of {@link Filter}s. The default operator
|
||||||
* MUST_PASS_ALL is assumed.
|
* MUST_PASS_ALL is assumed.
|
||||||
|
@ -181,15 +202,21 @@ public class FilterList extends Filter {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public KeyValue transform(KeyValue v) throws IOException {
|
public KeyValue transform(KeyValue v) throws IOException {
|
||||||
KeyValue current = v;
|
// transform() is expected to follow an inclusive filterKeyValue() immediately:
|
||||||
for (Filter filter : filters) {
|
if (!v.equals(this.referenceKV)) {
|
||||||
current = filter.transform(current);
|
throw new IllegalStateException(
|
||||||
|
"Reference KeyValue: " + this.referenceKV + " does not match: " + v);
|
||||||
}
|
}
|
||||||
return current;
|
return this.transformedKV;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ReturnCode filterKeyValue(KeyValue v) throws IOException {
|
public ReturnCode filterKeyValue(KeyValue v) throws IOException {
|
||||||
|
this.referenceKV = v;
|
||||||
|
|
||||||
|
// Accumulates successive transformation of every filter that includes the KeyValue:
|
||||||
|
KeyValue transformed = v;
|
||||||
|
|
||||||
ReturnCode rc = operator == Operator.MUST_PASS_ONE?
|
ReturnCode rc = operator == Operator.MUST_PASS_ONE?
|
||||||
ReturnCode.SKIP: ReturnCode.INCLUDE;
|
ReturnCode.SKIP: ReturnCode.INCLUDE;
|
||||||
for (Filter filter : filters) {
|
for (Filter filter : filters) {
|
||||||
|
@ -203,6 +230,7 @@ public class FilterList extends Filter {
|
||||||
case INCLUDE_AND_NEXT_COL:
|
case INCLUDE_AND_NEXT_COL:
|
||||||
rc = ReturnCode.INCLUDE_AND_NEXT_COL;
|
rc = ReturnCode.INCLUDE_AND_NEXT_COL;
|
||||||
case INCLUDE:
|
case INCLUDE:
|
||||||
|
transformed = filter.transform(transformed);
|
||||||
continue;
|
continue;
|
||||||
default:
|
default:
|
||||||
return code;
|
return code;
|
||||||
|
@ -217,15 +245,16 @@ public class FilterList extends Filter {
|
||||||
if (rc != ReturnCode.INCLUDE_AND_NEXT_COL) {
|
if (rc != ReturnCode.INCLUDE_AND_NEXT_COL) {
|
||||||
rc = ReturnCode.INCLUDE;
|
rc = ReturnCode.INCLUDE;
|
||||||
}
|
}
|
||||||
|
transformed = filter.transform(transformed);
|
||||||
break;
|
break;
|
||||||
case INCLUDE_AND_NEXT_COL:
|
case INCLUDE_AND_NEXT_COL:
|
||||||
rc = ReturnCode.INCLUDE_AND_NEXT_COL;
|
rc = ReturnCode.INCLUDE_AND_NEXT_COL;
|
||||||
|
transformed = filter.transform(transformed);
|
||||||
// must continue here to evaluate all filters
|
// must continue here to evaluate all filters
|
||||||
break;
|
break;
|
||||||
case NEXT_ROW:
|
case NEXT_ROW:
|
||||||
break;
|
break;
|
||||||
case SKIP:
|
case SKIP:
|
||||||
// continue;
|
|
||||||
break;
|
break;
|
||||||
case NEXT_COL:
|
case NEXT_COL:
|
||||||
break;
|
break;
|
||||||
|
@ -236,6 +265,10 @@ public class FilterList extends Filter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Save the transformed KeyValue for transform():
|
||||||
|
this.transformedKV = transformed;
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,8 @@ import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.experimental.categories.Category;
|
import org.junit.experimental.categories.Category;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests filter sets
|
* Tests filter sets
|
||||||
*
|
*
|
||||||
|
@ -421,5 +423,46 @@ public class TestFilterList {
|
||||||
minKeyValue));
|
minKeyValue));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests the behavior of transform() in a hierarchical filter.
|
||||||
|
*
|
||||||
|
* transform() only applies after a filterKeyValue() whose return-code includes the KeyValue.
|
||||||
|
* Lazy evaluation of AND
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTransformMPO() throws Exception {
|
||||||
|
// Apply the following filter:
|
||||||
|
// (family=fam AND qualifier=qual1 AND KeyOnlyFilter)
|
||||||
|
// OR (family=fam AND qualifier=qual2)
|
||||||
|
final FilterList flist = new FilterList(Operator.MUST_PASS_ONE, Lists.<Filter>newArrayList(
|
||||||
|
new FilterList(Operator.MUST_PASS_ALL, Lists.<Filter>newArrayList(
|
||||||
|
new FamilyFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("fam"))),
|
||||||
|
new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("qual1"))),
|
||||||
|
new KeyOnlyFilter())),
|
||||||
|
new FilterList(Operator.MUST_PASS_ALL, Lists.<Filter>newArrayList(
|
||||||
|
new FamilyFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("fam"))),
|
||||||
|
new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("qual2")))))));
|
||||||
|
|
||||||
|
final KeyValue kvQual1 = new KeyValue(
|
||||||
|
Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual1"), Bytes.toBytes("value"));
|
||||||
|
final KeyValue kvQual2 = new KeyValue(
|
||||||
|
Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual2"), Bytes.toBytes("value"));
|
||||||
|
final KeyValue kvQual3 = new KeyValue(
|
||||||
|
Bytes.toBytes("row"), Bytes.toBytes("fam"), Bytes.toBytes("qual3"), Bytes.toBytes("value"));
|
||||||
|
|
||||||
|
// Value for fam:qual1 should be stripped:
|
||||||
|
assertEquals(Filter.ReturnCode.INCLUDE, flist.filterKeyValue(kvQual1));
|
||||||
|
final KeyValue transformedQual1 = flist.transform(kvQual1);
|
||||||
|
assertEquals(0, transformedQual1.getValue().length);
|
||||||
|
|
||||||
|
// Value for fam:qual2 should not be stripped:
|
||||||
|
assertEquals(Filter.ReturnCode.INCLUDE, flist.filterKeyValue(kvQual2));
|
||||||
|
final KeyValue transformedQual2 = flist.transform(kvQual2);
|
||||||
|
assertEquals("value", Bytes.toString(transformedQual2.getValue()));
|
||||||
|
|
||||||
|
// Other keys should be skipped:
|
||||||
|
assertEquals(Filter.ReturnCode.SKIP, flist.filterKeyValue(kvQual3));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue