From 59460b17cc704d9d8ea61693c34a6203f971d4bf Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 19 Apr 2016 15:54:32 -0700 Subject: [PATCH] Add Filters.matchPredicate helper, use it where appropriate. (#2851) This approach simplifies code and is generally faster, due to skipping unnecessary dictionary lookups (see #2850). --- .../query/filter/ExtractionDimFilter.java | 8 +- .../druid/query/filter/SelectorDimFilter.java | 20 ++++- .../io/druid/segment/filter/BoundFilter.java | 67 ++++----------- .../filter/DimensionPredicateFilter.java | 72 ++-------------- .../java/io/druid/segment/filter/Filters.java | 82 +++++++++++++++++++ .../io/druid/segment/filter/InFilter.java | 30 +++---- .../segment/filter/JavaScriptFilter.java | 45 ++-------- .../druid/segment/filter/SelectorFilter.java | 60 +------------- .../filter/ExtractionDimFilterTest.java | 14 ++-- .../IncrementalIndexStorageAdapterTest.java | 2 +- 10 files changed, 158 insertions(+), 242 deletions(-) diff --git a/processing/src/main/java/io/druid/query/filter/ExtractionDimFilter.java b/processing/src/main/java/io/druid/query/filter/ExtractionDimFilter.java index 0048b8c5bd0..ad0f165966a 100644 --- a/processing/src/main/java/io/druid/query/filter/ExtractionDimFilter.java +++ b/processing/src/main/java/io/druid/query/filter/ExtractionDimFilter.java @@ -22,16 +22,10 @@ package io.druid.query.filter; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; -import com.google.common.base.Strings; import com.metamx.common.StringUtils; import io.druid.query.extraction.ExtractionFn; -import io.druid.query.lookup.LookupExtractionFn; -import io.druid.query.lookup.LookupExtractor; -import io.druid.segment.filter.SelectorFilter; import java.nio.ByteBuffer; -import java.util.List; -import java.util.Objects; /** * This class is deprecated, use SelectorDimFilter instead: {@link io.druid.query.filter.SelectorDimFilter} @@ -106,7 +100,7 @@ public class ExtractionDimFilter implements DimFilter @Override public Filter toFilter() { - return new SelectorFilter(dimension, value, extractionFn); + return new SelectorDimFilter(dimension, value, extractionFn).toFilter(); } @Override diff --git a/processing/src/main/java/io/druid/query/filter/SelectorDimFilter.java b/processing/src/main/java/io/druid/query/filter/SelectorDimFilter.java index 6355ee60df9..9c697da33fb 100644 --- a/processing/src/main/java/io/druid/query/filter/SelectorDimFilter.java +++ b/processing/src/main/java/io/druid/query/filter/SelectorDimFilter.java @@ -21,19 +21,18 @@ package io.druid.query.filter; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Function; import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; import com.google.common.base.Strings; -import com.google.common.collect.Lists; import com.metamx.common.StringUtils; import io.druid.query.extraction.ExtractionFn; import io.druid.query.lookup.LookupExtractionFn; import io.druid.query.lookup.LookupExtractor; +import io.druid.segment.filter.DimensionPredicateFilter; import io.druid.segment.filter.SelectorFilter; import java.nio.ByteBuffer; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.Objects; @@ -119,7 +118,20 @@ public class SelectorDimFilter implements DimFilter @Override public Filter toFilter() { - return new SelectorFilter(dimension, value, extractionFn); + if (extractionFn == null) { + return new SelectorFilter(dimension, value); + } else { + final String valueOrNull = Strings.emptyToNull(value); + final Predicate predicate = new Predicate() + { + @Override + public boolean apply(String input) + { + return Objects.equals(valueOrNull, input); + } + }; + return new DimensionPredicateFilter(dimension, predicate, extractionFn); + } } @JsonProperty diff --git a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java index b70093b037c..24cb647a54d 100644 --- a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java @@ -29,7 +29,6 @@ import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherFactory; import io.druid.query.ordering.StringComparators; import io.druid.segment.column.BitmapIndex; -import io.druid.segment.data.Indexed; import java.util.Comparator; import java.util.Iterator; @@ -52,63 +51,31 @@ public class BoundFilter implements Filter @Override public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) { - final BitmapIndex bitmapIndex = selector.getBitmapIndex(boundDimFilter.getDimension()); - - if (bitmapIndex == null) { - if (doesMatch(null)) { - return selector.getBitmapFactory() - .complement(selector.getBitmapFactory().makeEmptyImmutableBitmap(), selector.getNumRows()); - } else { - return selector.getBitmapFactory().makeEmptyImmutableBitmap(); - } - } - if (boundDimFilter.isAlphaNumeric() || extractionFn != null) { - // inspect all values - - // will be non-null because bitmapIndex was non-null - final Indexed dimValues = selector.getDimensionValues(boundDimFilter.getDimension()); - - return selector.getBitmapFactory().union( - new Iterable() + return Filters.matchPredicate( + boundDimFilter.getDimension(), + selector, + new Predicate() { @Override - public Iterator iterator() + public boolean apply(String input) { - return new Iterator() - { - int currIndex = 0; - - @Override - public boolean hasNext() - { - return currIndex < bitmapIndex.getCardinality(); - } - - @Override - public ImmutableBitmap next() - { - while (currIndex < bitmapIndex.getCardinality() && !doesMatch(dimValues.get(currIndex))) { - currIndex++; - } - - if (currIndex == bitmapIndex.getCardinality()) { - return bitmapIndex.getBitmapFactory().makeEmptyImmutableBitmap(); - } - - return bitmapIndex.getBitmap(currIndex++); - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - }; + return doesMatch(input); } } ); } else { + final BitmapIndex bitmapIndex = selector.getBitmapIndex(boundDimFilter.getDimension()); + + if (bitmapIndex == null || bitmapIndex.getCardinality() == 0) { + if (doesMatch(null)) { + return selector.getBitmapFactory() + .complement(selector.getBitmapFactory().makeEmptyImmutableBitmap(), selector.getNumRows()); + } else { + return selector.getBitmapFactory().makeEmptyImmutableBitmap(); + } + } + // search for start, end indexes in the bitmaps; then include all bitmaps between those points final int startIndex; // inclusive diff --git a/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java b/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java index debdd2a50d2..a4c9811df42 100644 --- a/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java @@ -19,6 +19,7 @@ package io.druid.segment.filter; +import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.metamx.collections.bitmap.ImmutableBitmap; import io.druid.query.extraction.ExtractionFn; @@ -26,10 +27,6 @@ import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherFactory; -import io.druid.segment.column.BitmapIndex; -import io.druid.segment.data.Indexed; - -import java.util.Iterator; /** */ @@ -44,12 +41,13 @@ public class DimensionPredicateFilter implements Filter final ExtractionFn extractionFn ) { - this.dimension = dimension; + Preconditions.checkNotNull(predicate, "predicate"); + this.dimension = Preconditions.checkNotNull(dimension, "dimension"); - if (predicate == null) { - this.predicate = null; + if (extractionFn == null) { + this.predicate = predicate; } else { - this.predicate = extractionFn == null ? predicate : new Predicate() + this.predicate = new Predicate() { @Override public boolean apply(String input) @@ -63,63 +61,7 @@ public class DimensionPredicateFilter implements Filter @Override public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) { - if (predicate == null) { - return selector.getBitmapFactory().makeEmptyImmutableBitmap(); - } - final Indexed dimValues = selector.getDimensionValues(dimension); - if (dimValues == null || dimValues.size() == 0) { - if (predicate.apply(null)) { - return selector.getBitmapFactory().complement( - selector.getBitmapFactory().makeEmptyImmutableBitmap(), - selector.getNumRows() - ); - } else { - return selector.getBitmapFactory().makeEmptyImmutableBitmap(); - } - - } - - final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); - - return selector.getBitmapFactory().union( - new Iterable() - { - @Override - public Iterator iterator() - { - return new Iterator() - { - int currIndex = 0; - - @Override - public boolean hasNext() - { - return currIndex < bitmapIndex.getCardinality(); - } - - @Override - public ImmutableBitmap next() - { - while (currIndex < bitmapIndex.getCardinality() && !predicate.apply(dimValues.get(currIndex))) { - currIndex++; - } - - if (currIndex == bitmapIndex.getCardinality()) { - return bitmapIndex.getBitmapFactory().makeEmptyImmutableBitmap(); - } - - return bitmapIndex.getBitmap(currIndex++); - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - }; - } - } - ); + return Filters.matchPredicate(dimension, selector, predicate); } @Override diff --git a/processing/src/main/java/io/druid/segment/filter/Filters.java b/processing/src/main/java/io/druid/segment/filter/Filters.java index 67f2838d20e..afdd2b43207 100644 --- a/processing/src/main/java/io/druid/segment/filter/Filters.java +++ b/processing/src/main/java/io/druid/segment/filter/Filters.java @@ -20,11 +20,18 @@ package io.druid.segment.filter; import com.google.common.base.Function; +import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; import com.google.common.collect.ImmutableList; +import com.metamx.collections.bitmap.ImmutableBitmap; import com.metamx.common.guava.FunctionalIterable; +import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.DimFilter; import io.druid.query.filter.Filter; +import io.druid.segment.column.BitmapIndex; +import io.druid.segment.data.Indexed; +import java.util.Iterator; import java.util.List; /** @@ -67,4 +74,79 @@ public class Filters { return dimFilter == null ? null : dimFilter.toFilter(); } + + /** + * Return the union of bitmaps for all values matching a particular predicate. + * + * @param dimension dimension to look at + * @param selector bitmap selector + * @param predicate predicate to use + * + * @return bitmap of matching rows + */ + public static ImmutableBitmap matchPredicate( + final String dimension, + final BitmapIndexSelector selector, + final Predicate predicate + ) + { + Preconditions.checkNotNull(dimension, "dimension"); + Preconditions.checkNotNull(selector, "selector"); + Preconditions.checkNotNull(predicate, "predicate"); + + // Missing dimension -> match all rows if the predicate matches null; match no rows otherwise + final Indexed dimValues = selector.getDimensionValues(dimension); + if (dimValues == null || dimValues.size() == 0) { + if (predicate.apply(null)) { + return selector.getBitmapFactory().complement( + selector.getBitmapFactory().makeEmptyImmutableBitmap(), + selector.getNumRows() + ); + } else { + return selector.getBitmapFactory().makeEmptyImmutableBitmap(); + } + } + + // Apply predicate to all dimension values and union the matching bitmaps + final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); + return selector.getBitmapFactory().union( + new Iterable() + { + @Override + public Iterator iterator() + { + return new Iterator() + { + int currIndex = 0; + + @Override + public boolean hasNext() + { + return currIndex < bitmapIndex.getCardinality(); + } + + @Override + public ImmutableBitmap next() + { + while (currIndex < bitmapIndex.getCardinality() && !predicate.apply(dimValues.get(currIndex))) { + currIndex++; + } + + if (currIndex == bitmapIndex.getCardinality()) { + return bitmapIndex.getBitmapFactory().makeEmptyImmutableBitmap(); + } + + return bitmapIndex.getBitmap(currIndex++); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + } + ); + } } diff --git a/processing/src/main/java/io/druid/segment/filter/InFilter.java b/processing/src/main/java/io/druid/segment/filter/InFilter.java index 7af0bdee58b..5dd31a05326 100644 --- a/processing/src/main/java/io/druid/segment/filter/InFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/InFilter.java @@ -23,18 +23,13 @@ import com.google.common.base.Function; import com.google.common.base.Predicate; import com.google.common.base.Strings; import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.metamx.collections.bitmap.BitmapFactory; import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.collections.bitmap.MutableBitmap; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherFactory; -import javax.annotation.Nullable; -import java.util.List; import java.util.Set; /** @@ -69,18 +64,19 @@ public class InFilter implements Filter ) ); } else { - Iterable allDimVals = selector.getDimensionValues(dimension); - if (allDimVals == null) { - allDimVals = Lists.newArrayList((String) null); - } - - List bitmaps = Lists.newArrayList(); - for (String dimVal : allDimVals) { - if (values.contains(Strings.nullToEmpty(extractionFn.apply(dimVal)))) { - bitmaps.add(selector.getBitmapIndex(dimension, dimVal)); - } - } - return selector.getBitmapFactory().union(bitmaps); + return Filters.matchPredicate( + dimension, + selector, + new Predicate() + { + @Override + public boolean apply(String input) + { + // InDimFilter converts all null "values" to empty. + return values.contains(Strings.nullToEmpty(extractionFn.apply(input))); + } + } + ); } } diff --git a/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java b/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java index f4d16b829f6..ab0944323c7 100644 --- a/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java @@ -22,19 +22,15 @@ package io.druid.segment.filter; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.common.guava.FunctionalIterable; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherFactory; -import io.druid.segment.data.Indexed; import org.mozilla.javascript.Context; import org.mozilla.javascript.Function; import org.mozilla.javascript.ScriptableObject; -import javax.annotation.Nullable; - public class JavaScriptFilter implements Filter { private final JavaScriptPredicate predicate; @@ -53,39 +49,16 @@ public class JavaScriptFilter implements Filter { final Context cx = Context.enter(); try { - final Indexed dimValues = selector.getDimensionValues(dimension); - ImmutableBitmap bitmap; - if (dimValues == null || dimValues.size() == 0) { - bitmap = selector.getBitmapFactory().makeEmptyImmutableBitmap(); - if (predicate.applyInContext(cx, null)) { - bitmap = selector.getBitmapFactory().complement(bitmap, selector.getNumRows()); + final Predicate contextualPredicate = new Predicate() + { + @Override + public boolean apply(String input) + { + return predicate.applyInContext(cx, input); } - } else { - bitmap = selector.getBitmapFactory().union( - FunctionalIterable.create(dimValues) - .filter( - new Predicate() - { - @Override - public boolean apply(@Nullable String input) - { - return predicate.applyInContext(cx, input); - } - } - ) - .transform( - new com.google.common.base.Function() - { - @Override - public ImmutableBitmap apply(@Nullable String input) - { - return selector.getBitmapIndex(dimension, input); - } - } - ) - ); - } - return bitmap; + }; + + return Filters.matchPredicate(dimension, selector, contextualPredicate); } finally { Context.exit(); diff --git a/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java b/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java index 444f6bbb496..604dce9a45d 100644 --- a/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java @@ -19,20 +19,11 @@ package io.druid.segment.filter; -import com.google.common.base.Predicate; -import com.google.common.base.Strings; -import com.google.common.collect.Lists; import com.metamx.collections.bitmap.ImmutableBitmap; -import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherFactory; -import io.druid.segment.data.Indexed; - -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; /** */ @@ -40,68 +31,25 @@ public class SelectorFilter implements Filter { private final String dimension; private final String value; - private final ExtractionFn extractionFn; public SelectorFilter( String dimension, - String value, - ExtractionFn extractionFn + String value ) { this.dimension = dimension; - this.value = Strings.nullToEmpty(value); - this.extractionFn = extractionFn; + this.value = value; } @Override public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector) { - if (extractionFn == null) { - return selector.getBitmapIndex(dimension, value); - } else { - final List filters = makeFiltersUsingExtractionFn(selector); - if (filters.isEmpty()) { - return selector.getBitmapFactory().makeEmptyImmutableBitmap(); - } - return new OrFilter(filters).getBitmapIndex(selector); - } + return selector.getBitmapIndex(dimension, value); } @Override public ValueMatcher makeMatcher(ValueMatcherFactory factory) { - if (extractionFn == null) { - return factory.makeValueMatcher(dimension, value); - } else { - return factory.makeValueMatcher( - dimension, new Predicate() - { - @Override - public boolean apply(String input) - { - // Assuming that a null/absent/empty dimension are equivalent from the druid perspective - return value.equals(Strings.nullToEmpty(extractionFn.apply(input))); - } - } - ); - } - } - - private List makeFiltersUsingExtractionFn(BitmapIndexSelector selector) - { - final List filters = Lists.newArrayList(); - - Iterable allDimVals = selector.getDimensionValues(dimension); - if (allDimVals == null) { - allDimVals = Lists.newArrayList((String) null); - } - - for (String dimVal : allDimVals) { - if (value.equals(Strings.nullToEmpty(extractionFn.apply(dimVal)))) { - filters.add(new SelectorFilter(dimension, dimVal, null)); - } - } - - return filters; + return factory.makeValueMatcher(dimension, value); } } diff --git a/processing/src/test/java/io/druid/segment/filter/ExtractionDimFilterTest.java b/processing/src/test/java/io/druid/segment/filter/ExtractionDimFilterTest.java index 660099b988a..0e3e61ef541 100644 --- a/processing/src/test/java/io/druid/segment/filter/ExtractionDimFilterTest.java +++ b/processing/src/test/java/io/druid/segment/filter/ExtractionDimFilterTest.java @@ -32,6 +32,8 @@ import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.DimFilters; import io.druid.query.filter.ExtractionDimFilter; +import io.druid.query.filter.Filter; +import io.druid.query.filter.SelectorDimFilter; import io.druid.segment.column.BitmapIndex; import io.druid.segment.data.ArrayIndexed; import io.druid.segment.data.BitmapSerdeFactory; @@ -160,9 +162,9 @@ public class ExtractionDimFilterTest @Test public void testEmpty() { - SelectorFilter extractionFilter = new SelectorFilter( + Filter extractionFilter = new SelectorDimFilter( "foo", "NFDJUKFNDSJFNS", DIM_EXTRACTION_FN - ); + ).toFilter(); ImmutableBitmap immutableBitmap = extractionFilter.getBitmapIndex(BITMAP_INDEX_SELECTOR); Assert.assertEquals(0, immutableBitmap.size()); } @@ -170,9 +172,9 @@ public class ExtractionDimFilterTest @Test public void testNull() { - SelectorFilter extractionFilter = new SelectorFilter( + Filter extractionFilter = new SelectorDimFilter( "FDHJSFFHDS", "extractDimVal", DIM_EXTRACTION_FN - ); + ).toFilter(); ImmutableBitmap immutableBitmap = extractionFilter.getBitmapIndex(BITMAP_INDEX_SELECTOR); Assert.assertEquals(0, immutableBitmap.size()); } @@ -180,9 +182,9 @@ public class ExtractionDimFilterTest @Test public void testNormal() { - SelectorFilter extractionFilter = new SelectorFilter( + Filter extractionFilter = new SelectorDimFilter( "foo", "extractDimVal", DIM_EXTRACTION_FN - ); + ).toFilter(); ImmutableBitmap immutableBitmap = extractionFilter.getBitmapIndex(BITMAP_INDEX_SELECTOR); Assert.assertEquals(1, immutableBitmap.size()); } diff --git a/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexStorageAdapterTest.java b/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexStorageAdapterTest.java index 4ad6c90a95c..4eb5b510177 100644 --- a/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexStorageAdapterTest.java +++ b/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexStorageAdapterTest.java @@ -259,7 +259,7 @@ public class IncrementalIndexStorageAdapterTest for (boolean descending : Arrays.asList(false, true)) { Sequence cursorSequence = adapter.makeCursors( - new SelectorFilter("sally", "bo", null), + new SelectorFilter("sally", "bo"), interval, QueryGranularity.NONE, descending