Add Filters.matchPredicate helper, use it where appropriate. (#2851)

This approach simplifies code and is generally faster, due to skipping
unnecessary dictionary lookups (see #2850).
This commit is contained in:
Gian Merlino 2016-04-19 15:54:32 -07:00 committed by Fangjin Yang
parent b2745befb7
commit 59460b17cc
10 changed files with 158 additions and 242 deletions

View File

@ -22,16 +22,10 @@ package io.druid.query.filter;
import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.metamx.common.StringUtils; import com.metamx.common.StringUtils;
import io.druid.query.extraction.ExtractionFn; import io.druid.query.extraction.ExtractionFn;
import io.druid.query.lookup.LookupExtractionFn;
import io.druid.query.lookup.LookupExtractor;
import io.druid.segment.filter.SelectorFilter;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.List;
import java.util.Objects;
/** /**
* This class is deprecated, use SelectorDimFilter instead: {@link io.druid.query.filter.SelectorDimFilter} * This class is deprecated, use SelectorDimFilter instead: {@link io.druid.query.filter.SelectorDimFilter}
@ -106,7 +100,7 @@ public class ExtractionDimFilter implements DimFilter
@Override @Override
public Filter toFilter() public Filter toFilter()
{ {
return new SelectorFilter(dimension, value, extractionFn); return new SelectorDimFilter(dimension, value, extractionFn).toFilter();
} }
@Override @Override

View File

@ -21,19 +21,18 @@ package io.druid.query.filter;
import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Function;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.base.Strings; import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.metamx.common.StringUtils; import com.metamx.common.StringUtils;
import io.druid.query.extraction.ExtractionFn; import io.druid.query.extraction.ExtractionFn;
import io.druid.query.lookup.LookupExtractionFn; import io.druid.query.lookup.LookupExtractionFn;
import io.druid.query.lookup.LookupExtractor; import io.druid.query.lookup.LookupExtractor;
import io.druid.segment.filter.DimensionPredicateFilter;
import io.druid.segment.filter.SelectorFilter; import io.druid.segment.filter.SelectorFilter;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
@ -119,7 +118,20 @@ public class SelectorDimFilter implements DimFilter
@Override @Override
public Filter toFilter() public Filter toFilter()
{ {
return new SelectorFilter(dimension, value, extractionFn); if (extractionFn == null) {
return new SelectorFilter(dimension, value);
} else {
final String valueOrNull = Strings.emptyToNull(value);
final Predicate<String> predicate = new Predicate<String>()
{
@Override
public boolean apply(String input)
{
return Objects.equals(valueOrNull, input);
}
};
return new DimensionPredicateFilter(dimension, predicate, extractionFn);
}
} }
@JsonProperty @JsonProperty

View File

@ -29,7 +29,6 @@ import io.druid.query.filter.ValueMatcher;
import io.druid.query.filter.ValueMatcherFactory; import io.druid.query.filter.ValueMatcherFactory;
import io.druid.query.ordering.StringComparators; import io.druid.query.ordering.StringComparators;
import io.druid.segment.column.BitmapIndex; import io.druid.segment.column.BitmapIndex;
import io.druid.segment.data.Indexed;
import java.util.Comparator; import java.util.Comparator;
import java.util.Iterator; import java.util.Iterator;
@ -52,9 +51,23 @@ public class BoundFilter implements Filter
@Override @Override
public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector)
{ {
if (boundDimFilter.isAlphaNumeric() || extractionFn != null) {
return Filters.matchPredicate(
boundDimFilter.getDimension(),
selector,
new Predicate<String>()
{
@Override
public boolean apply(String input)
{
return doesMatch(input);
}
}
);
} else {
final BitmapIndex bitmapIndex = selector.getBitmapIndex(boundDimFilter.getDimension()); final BitmapIndex bitmapIndex = selector.getBitmapIndex(boundDimFilter.getDimension());
if (bitmapIndex == null) { if (bitmapIndex == null || bitmapIndex.getCardinality() == 0) {
if (doesMatch(null)) { if (doesMatch(null)) {
return selector.getBitmapFactory() return selector.getBitmapFactory()
.complement(selector.getBitmapFactory().makeEmptyImmutableBitmap(), selector.getNumRows()); .complement(selector.getBitmapFactory().makeEmptyImmutableBitmap(), selector.getNumRows());
@ -63,52 +76,6 @@ public class BoundFilter implements Filter
} }
} }
if (boundDimFilter.isAlphaNumeric() || extractionFn != null) {
// inspect all values
// will be non-null because bitmapIndex was non-null
final Indexed<String> dimValues = selector.getDimensionValues(boundDimFilter.getDimension());
return selector.getBitmapFactory().union(
new Iterable<ImmutableBitmap>()
{
@Override
public Iterator<ImmutableBitmap> iterator()
{
return new Iterator<ImmutableBitmap>()
{
int currIndex = 0;
@Override
public boolean hasNext()
{
return currIndex < bitmapIndex.getCardinality();
}
@Override
public ImmutableBitmap next()
{
while (currIndex < bitmapIndex.getCardinality() && !doesMatch(dimValues.get(currIndex))) {
currIndex++;
}
if (currIndex == bitmapIndex.getCardinality()) {
return bitmapIndex.getBitmapFactory().makeEmptyImmutableBitmap();
}
return bitmapIndex.getBitmap(currIndex++);
}
@Override
public void remove()
{
throw new UnsupportedOperationException();
}
};
}
}
);
} else {
// search for start, end indexes in the bitmaps; then include all bitmaps between those points // search for start, end indexes in the bitmaps; then include all bitmaps between those points
final int startIndex; // inclusive final int startIndex; // inclusive

View File

@ -19,6 +19,7 @@
package io.druid.segment.filter; package io.druid.segment.filter;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate; import com.google.common.base.Predicate;
import com.metamx.collections.bitmap.ImmutableBitmap; import com.metamx.collections.bitmap.ImmutableBitmap;
import io.druid.query.extraction.ExtractionFn; import io.druid.query.extraction.ExtractionFn;
@ -26,10 +27,6 @@ import io.druid.query.filter.BitmapIndexSelector;
import io.druid.query.filter.Filter; import io.druid.query.filter.Filter;
import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcher;
import io.druid.query.filter.ValueMatcherFactory; import io.druid.query.filter.ValueMatcherFactory;
import io.druid.segment.column.BitmapIndex;
import io.druid.segment.data.Indexed;
import java.util.Iterator;
/** /**
*/ */
@ -44,12 +41,13 @@ public class DimensionPredicateFilter implements Filter
final ExtractionFn extractionFn final ExtractionFn extractionFn
) )
{ {
this.dimension = dimension; Preconditions.checkNotNull(predicate, "predicate");
this.dimension = Preconditions.checkNotNull(dimension, "dimension");
if (predicate == null) { if (extractionFn == null) {
this.predicate = null; this.predicate = predicate;
} else { } else {
this.predicate = extractionFn == null ? predicate : new Predicate<String>() this.predicate = new Predicate<String>()
{ {
@Override @Override
public boolean apply(String input) public boolean apply(String input)
@ -63,63 +61,7 @@ public class DimensionPredicateFilter implements Filter
@Override @Override
public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector)
{ {
if (predicate == null) { return Filters.matchPredicate(dimension, selector, predicate);
return selector.getBitmapFactory().makeEmptyImmutableBitmap();
}
final Indexed<String> dimValues = selector.getDimensionValues(dimension);
if (dimValues == null || dimValues.size() == 0) {
if (predicate.apply(null)) {
return selector.getBitmapFactory().complement(
selector.getBitmapFactory().makeEmptyImmutableBitmap(),
selector.getNumRows()
);
} else {
return selector.getBitmapFactory().makeEmptyImmutableBitmap();
}
}
final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension);
return selector.getBitmapFactory().union(
new Iterable<ImmutableBitmap>()
{
@Override
public Iterator<ImmutableBitmap> iterator()
{
return new Iterator<ImmutableBitmap>()
{
int currIndex = 0;
@Override
public boolean hasNext()
{
return currIndex < bitmapIndex.getCardinality();
}
@Override
public ImmutableBitmap next()
{
while (currIndex < bitmapIndex.getCardinality() && !predicate.apply(dimValues.get(currIndex))) {
currIndex++;
}
if (currIndex == bitmapIndex.getCardinality()) {
return bitmapIndex.getBitmapFactory().makeEmptyImmutableBitmap();
}
return bitmapIndex.getBitmap(currIndex++);
}
@Override
public void remove()
{
throw new UnsupportedOperationException();
}
};
}
}
);
} }
@Override @Override

View File

@ -20,11 +20,18 @@
package io.druid.segment.filter; package io.druid.segment.filter;
import com.google.common.base.Function; import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.common.guava.FunctionalIterable; import com.metamx.common.guava.FunctionalIterable;
import io.druid.query.filter.BitmapIndexSelector;
import io.druid.query.filter.DimFilter; import io.druid.query.filter.DimFilter;
import io.druid.query.filter.Filter; import io.druid.query.filter.Filter;
import io.druid.segment.column.BitmapIndex;
import io.druid.segment.data.Indexed;
import java.util.Iterator;
import java.util.List; import java.util.List;
/** /**
@ -67,4 +74,79 @@ public class Filters
{ {
return dimFilter == null ? null : dimFilter.toFilter(); return dimFilter == null ? null : dimFilter.toFilter();
} }
/**
* Return the union of bitmaps for all values matching a particular predicate.
*
* @param dimension dimension to look at
* @param selector bitmap selector
* @param predicate predicate to use
*
* @return bitmap of matching rows
*/
public static ImmutableBitmap matchPredicate(
final String dimension,
final BitmapIndexSelector selector,
final Predicate<String> predicate
)
{
Preconditions.checkNotNull(dimension, "dimension");
Preconditions.checkNotNull(selector, "selector");
Preconditions.checkNotNull(predicate, "predicate");
// Missing dimension -> match all rows if the predicate matches null; match no rows otherwise
final Indexed<String> dimValues = selector.getDimensionValues(dimension);
if (dimValues == null || dimValues.size() == 0) {
if (predicate.apply(null)) {
return selector.getBitmapFactory().complement(
selector.getBitmapFactory().makeEmptyImmutableBitmap(),
selector.getNumRows()
);
} else {
return selector.getBitmapFactory().makeEmptyImmutableBitmap();
}
}
// Apply predicate to all dimension values and union the matching bitmaps
final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension);
return selector.getBitmapFactory().union(
new Iterable<ImmutableBitmap>()
{
@Override
public Iterator<ImmutableBitmap> iterator()
{
return new Iterator<ImmutableBitmap>()
{
int currIndex = 0;
@Override
public boolean hasNext()
{
return currIndex < bitmapIndex.getCardinality();
}
@Override
public ImmutableBitmap next()
{
while (currIndex < bitmapIndex.getCardinality() && !predicate.apply(dimValues.get(currIndex))) {
currIndex++;
}
if (currIndex == bitmapIndex.getCardinality()) {
return bitmapIndex.getBitmapFactory().makeEmptyImmutableBitmap();
}
return bitmapIndex.getBitmap(currIndex++);
}
@Override
public void remove()
{
throw new UnsupportedOperationException();
}
};
}
}
);
}
} }

View File

@ -23,18 +23,13 @@ import com.google.common.base.Function;
import com.google.common.base.Predicate; import com.google.common.base.Predicate;
import com.google.common.base.Strings; import com.google.common.base.Strings;
import com.google.common.collect.Iterables; import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.bitmap.ImmutableBitmap; import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.collections.bitmap.MutableBitmap;
import io.druid.query.extraction.ExtractionFn; import io.druid.query.extraction.ExtractionFn;
import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.BitmapIndexSelector;
import io.druid.query.filter.Filter; import io.druid.query.filter.Filter;
import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcher;
import io.druid.query.filter.ValueMatcherFactory; import io.druid.query.filter.ValueMatcherFactory;
import javax.annotation.Nullable;
import java.util.List;
import java.util.Set; import java.util.Set;
/** /**
@ -69,18 +64,19 @@ public class InFilter implements Filter
) )
); );
} else { } else {
Iterable<String> allDimVals = selector.getDimensionValues(dimension); return Filters.matchPredicate(
if (allDimVals == null) { dimension,
allDimVals = Lists.newArrayList((String) null); selector,
} new Predicate<String>()
{
List<ImmutableBitmap> bitmaps = Lists.newArrayList(); @Override
for (String dimVal : allDimVals) { public boolean apply(String input)
if (values.contains(Strings.nullToEmpty(extractionFn.apply(dimVal)))) { {
bitmaps.add(selector.getBitmapIndex(dimension, dimVal)); // InDimFilter converts all null "values" to empty.
return values.contains(Strings.nullToEmpty(extractionFn.apply(input)));
} }
} }
return selector.getBitmapFactory().union(bitmaps); );
} }
} }

View File

@ -22,19 +22,15 @@ package io.druid.segment.filter;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import com.google.common.base.Predicate; import com.google.common.base.Predicate;
import com.metamx.collections.bitmap.ImmutableBitmap; import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.common.guava.FunctionalIterable;
import io.druid.query.extraction.ExtractionFn; import io.druid.query.extraction.ExtractionFn;
import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.BitmapIndexSelector;
import io.druid.query.filter.Filter; import io.druid.query.filter.Filter;
import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcher;
import io.druid.query.filter.ValueMatcherFactory; import io.druid.query.filter.ValueMatcherFactory;
import io.druid.segment.data.Indexed;
import org.mozilla.javascript.Context; import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function; import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject; import org.mozilla.javascript.ScriptableObject;
import javax.annotation.Nullable;
public class JavaScriptFilter implements Filter public class JavaScriptFilter implements Filter
{ {
private final JavaScriptPredicate predicate; private final JavaScriptPredicate predicate;
@ -53,39 +49,16 @@ public class JavaScriptFilter implements Filter
{ {
final Context cx = Context.enter(); final Context cx = Context.enter();
try { try {
final Indexed<String> dimValues = selector.getDimensionValues(dimension); final Predicate<String> contextualPredicate = new Predicate<String>()
ImmutableBitmap bitmap;
if (dimValues == null || dimValues.size() == 0) {
bitmap = selector.getBitmapFactory().makeEmptyImmutableBitmap();
if (predicate.applyInContext(cx, null)) {
bitmap = selector.getBitmapFactory().complement(bitmap, selector.getNumRows());
}
} else {
bitmap = selector.getBitmapFactory().union(
FunctionalIterable.create(dimValues)
.filter(
new Predicate<String>()
{ {
@Override @Override
public boolean apply(@Nullable String input) public boolean apply(String input)
{ {
return predicate.applyInContext(cx, input); return predicate.applyInContext(cx, input);
} }
} };
)
.transform( return Filters.matchPredicate(dimension, selector, contextualPredicate);
new com.google.common.base.Function<String, ImmutableBitmap>()
{
@Override
public ImmutableBitmap apply(@Nullable String input)
{
return selector.getBitmapIndex(dimension, input);
}
}
)
);
}
return bitmap;
} }
finally { finally {
Context.exit(); Context.exit();

View File

@ -19,20 +19,11 @@
package io.druid.segment.filter; package io.druid.segment.filter;
import com.google.common.base.Predicate;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.metamx.collections.bitmap.ImmutableBitmap; import com.metamx.collections.bitmap.ImmutableBitmap;
import io.druid.query.extraction.ExtractionFn;
import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.BitmapIndexSelector;
import io.druid.query.filter.Filter; import io.druid.query.filter.Filter;
import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcher;
import io.druid.query.filter.ValueMatcherFactory; import io.druid.query.filter.ValueMatcherFactory;
import io.druid.segment.data.Indexed;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
/** /**
*/ */
@ -40,68 +31,25 @@ public class SelectorFilter implements Filter
{ {
private final String dimension; private final String dimension;
private final String value; private final String value;
private final ExtractionFn extractionFn;
public SelectorFilter( public SelectorFilter(
String dimension, String dimension,
String value, String value
ExtractionFn extractionFn
) )
{ {
this.dimension = dimension; this.dimension = dimension;
this.value = Strings.nullToEmpty(value); this.value = value;
this.extractionFn = extractionFn;
} }
@Override @Override
public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector) public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector)
{ {
if (extractionFn == null) {
return selector.getBitmapIndex(dimension, value); return selector.getBitmapIndex(dimension, value);
} else {
final List<Filter> filters = makeFiltersUsingExtractionFn(selector);
if (filters.isEmpty()) {
return selector.getBitmapFactory().makeEmptyImmutableBitmap();
}
return new OrFilter(filters).getBitmapIndex(selector);
}
} }
@Override @Override
public ValueMatcher makeMatcher(ValueMatcherFactory factory) public ValueMatcher makeMatcher(ValueMatcherFactory factory)
{ {
if (extractionFn == null) {
return factory.makeValueMatcher(dimension, value); return factory.makeValueMatcher(dimension, value);
} else {
return factory.makeValueMatcher(
dimension, new Predicate<String>()
{
@Override
public boolean apply(String input)
{
// Assuming that a null/absent/empty dimension are equivalent from the druid perspective
return value.equals(Strings.nullToEmpty(extractionFn.apply(input)));
}
}
);
}
}
private List<Filter> makeFiltersUsingExtractionFn(BitmapIndexSelector selector)
{
final List<Filter> filters = Lists.newArrayList();
Iterable<String> allDimVals = selector.getDimensionValues(dimension);
if (allDimVals == null) {
allDimVals = Lists.newArrayList((String) null);
}
for (String dimVal : allDimVals) {
if (value.equals(Strings.nullToEmpty(extractionFn.apply(dimVal)))) {
filters.add(new SelectorFilter(dimension, dimVal, null));
}
}
return filters;
} }
} }

View File

@ -32,6 +32,8 @@ import io.druid.query.extraction.ExtractionFn;
import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.BitmapIndexSelector;
import io.druid.query.filter.DimFilters; import io.druid.query.filter.DimFilters;
import io.druid.query.filter.ExtractionDimFilter; import io.druid.query.filter.ExtractionDimFilter;
import io.druid.query.filter.Filter;
import io.druid.query.filter.SelectorDimFilter;
import io.druid.segment.column.BitmapIndex; import io.druid.segment.column.BitmapIndex;
import io.druid.segment.data.ArrayIndexed; import io.druid.segment.data.ArrayIndexed;
import io.druid.segment.data.BitmapSerdeFactory; import io.druid.segment.data.BitmapSerdeFactory;
@ -160,9 +162,9 @@ public class ExtractionDimFilterTest
@Test @Test
public void testEmpty() public void testEmpty()
{ {
SelectorFilter extractionFilter = new SelectorFilter( Filter extractionFilter = new SelectorDimFilter(
"foo", "NFDJUKFNDSJFNS", DIM_EXTRACTION_FN "foo", "NFDJUKFNDSJFNS", DIM_EXTRACTION_FN
); ).toFilter();
ImmutableBitmap immutableBitmap = extractionFilter.getBitmapIndex(BITMAP_INDEX_SELECTOR); ImmutableBitmap immutableBitmap = extractionFilter.getBitmapIndex(BITMAP_INDEX_SELECTOR);
Assert.assertEquals(0, immutableBitmap.size()); Assert.assertEquals(0, immutableBitmap.size());
} }
@ -170,9 +172,9 @@ public class ExtractionDimFilterTest
@Test @Test
public void testNull() public void testNull()
{ {
SelectorFilter extractionFilter = new SelectorFilter( Filter extractionFilter = new SelectorDimFilter(
"FDHJSFFHDS", "extractDimVal", DIM_EXTRACTION_FN "FDHJSFFHDS", "extractDimVal", DIM_EXTRACTION_FN
); ).toFilter();
ImmutableBitmap immutableBitmap = extractionFilter.getBitmapIndex(BITMAP_INDEX_SELECTOR); ImmutableBitmap immutableBitmap = extractionFilter.getBitmapIndex(BITMAP_INDEX_SELECTOR);
Assert.assertEquals(0, immutableBitmap.size()); Assert.assertEquals(0, immutableBitmap.size());
} }
@ -180,9 +182,9 @@ public class ExtractionDimFilterTest
@Test @Test
public void testNormal() public void testNormal()
{ {
SelectorFilter extractionFilter = new SelectorFilter( Filter extractionFilter = new SelectorDimFilter(
"foo", "extractDimVal", DIM_EXTRACTION_FN "foo", "extractDimVal", DIM_EXTRACTION_FN
); ).toFilter();
ImmutableBitmap immutableBitmap = extractionFilter.getBitmapIndex(BITMAP_INDEX_SELECTOR); ImmutableBitmap immutableBitmap = extractionFilter.getBitmapIndex(BITMAP_INDEX_SELECTOR);
Assert.assertEquals(1, immutableBitmap.size()); Assert.assertEquals(1, immutableBitmap.size());
} }

View File

@ -259,7 +259,7 @@ public class IncrementalIndexStorageAdapterTest
for (boolean descending : Arrays.asList(false, true)) { for (boolean descending : Arrays.asList(false, true)) {
Sequence<Cursor> cursorSequence = adapter.makeCursors( Sequence<Cursor> cursorSequence = adapter.makeCursors(
new SelectorFilter("sally", "bo", null), new SelectorFilter("sally", "bo"),
interval, interval,
QueryGranularity.NONE, QueryGranularity.NONE,
descending descending