From 170031744e8a2ed5fc4c06062df014bcfcfe2e1f Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 6 Aug 2020 18:34:21 -0700 Subject: [PATCH] Combine InDimFilter, InFilter. (#10119) * Combine InDimFilter, InFilter. There are two motivations: 1. Ensure that when HashJoinSegmentStorageAdapter compares its Filter to the original one, and it is an "in" type, the comparison is by reference and does not need to check deep equality. This is useful when the "in" filter is very large. 2. Simplify things. (There isn't a great reason for the DimFilter and Filter logic to be separate, and combining them reduces some duplication.) * Fix test. --- .../druid/query/filter/InDimFilter.java | 491 +++++++++++++----- .../apache/druid/segment/filter/Filters.java | 4 +- .../apache/druid/segment/filter/InFilter.java | 336 ------------ .../join/HashJoinSegmentStorageAdapter.java | 3 +- .../druid/segment/filter/InFilterTest.java | 22 +- 5 files changed, 375 insertions(+), 481 deletions(-) delete mode 100644 processing/src/main/java/org/apache/druid/segment/filter/InFilter.java diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index 25688ba87f8..8e13cf1cf50 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -26,6 +26,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; import com.google.common.base.Supplier; import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableSet; @@ -37,17 +38,29 @@ import com.google.common.collect.TreeRangeSet; import com.google.common.hash.Hasher; import com.google.common.hash.Hashing; import it.unimi.dsi.fastutil.ints.IntArrayList; +import it.unimi.dsi.fastutil.ints.IntIterable; +import it.unimi.dsi.fastutil.ints.IntIterator; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import it.unimi.dsi.fastutil.longs.LongArrayList; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; +import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.extraction.ExtractionFn; +import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory; import org.apache.druid.query.lookup.LookupExtractionFn; import org.apache.druid.query.lookup.LookupExtractor; +import org.apache.druid.segment.ColumnSelector; +import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.DimensionHandlerUtils; -import org.apache.druid.segment.filter.InFilter; +import org.apache.druid.segment.IntIteratorUtils; +import org.apache.druid.segment.column.BitmapIndex; +import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; import java.nio.charset.StandardCharsets; @@ -56,12 +69,14 @@ import java.util.Arrays; import java.util.Collection; import java.util.Comparator; import java.util.HashSet; +import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; -public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilter +public class InDimFilter extends AbstractOptimizableDimFilter implements Filter { // determined through benchmark that binary search on long[] is faster than HashSet until ~16 elements // Hashing threshold is not applied to String for now, String still uses ImmutableSortedSet @@ -74,12 +89,10 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt private final ExtractionFn extractionFn; @Nullable private final FilterTuning filterTuning; - private final Supplier longPredicateSupplier; - private final Supplier floatPredicateSupplier; - private final Supplier doublePredicateSupplier; + private final DruidPredicateFactory predicateFactory; @JsonIgnore - private byte[] cacheKey; + private final Supplier cacheKeySupplier; @JsonCreator public InDimFilter( @@ -91,8 +104,42 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt @JsonProperty("filterTuning") @Nullable FilterTuning filterTuning ) { - Preconditions.checkNotNull(dimension, "dimension can not be null"); - Preconditions.checkArgument(values != null, "values can not be null"); + this( + dimension, + values, + extractionFn, + filterTuning, + null + ); + } + + /** + * This constructor should be called only in unit tests since accepting a Collection makes copying more likely. + */ + @VisibleForTesting + public InDimFilter(String dimension, Collection values, @Nullable ExtractionFn extractionFn) + { + this( + dimension, + values instanceof Set ? (Set) values : new HashSet<>(values), + extractionFn, + null, + null + ); + } + + /** + * Internal constructor. + */ + private InDimFilter( + final String dimension, + final Set values, + @Nullable final ExtractionFn extractionFn, + @Nullable final FilterTuning filterTuning, + @Nullable final DruidPredicateFactory predicateFactory + ) + { + Preconditions.checkNotNull(values, "values cannot be null"); // The values set can be huge. Try to avoid copying the set if possible. // Note that we may still need to copy values to a list for caching. See getCacheKey(). @@ -101,21 +148,18 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt } else { this.values = values.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toSet()); } - this.dimension = dimension; + + this.dimension = Preconditions.checkNotNull(dimension, "dimension cannot be null"); this.extractionFn = extractionFn; this.filterTuning = filterTuning; - this.longPredicateSupplier = getLongPredicateSupplier(); - this.floatPredicateSupplier = getFloatPredicateSupplier(); - this.doublePredicateSupplier = getDoublePredicateSupplier(); - } - /** - * This constructor should be called only in unit tests since it creates a new hash set wrapping the given values. - */ - @VisibleForTesting - public InDimFilter(String dimension, Collection values, @Nullable ExtractionFn extractionFn) - { - this(dimension, new HashSet<>(values), extractionFn, null); + if (predicateFactory != null) { + this.predicateFactory = predicateFactory; + } else { + this.predicateFactory = new InFilterDruidPredicateFactory(extractionFn, this.values); + } + + this.cacheKeySupplier = Suppliers.memoize(this::computeCacheKey); } @JsonProperty @@ -149,26 +193,7 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt @Override public byte[] getCacheKey() { - if (cacheKey == null) { - final List sortedValues = new ArrayList<>(values); - sortedValues.sort(Comparator.nullsFirst(Ordering.natural())); - final Hasher hasher = Hashing.sha256().newHasher(); - for (String v : sortedValues) { - if (v == null) { - hasher.putInt(0); - } else { - hasher.putString(v, StandardCharsets.UTF_8); - } - } - cacheKey = new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID) - .appendString(dimension) - .appendByte(DimFilterUtils.STRING_SEPARATOR) - .appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey()) - .appendByte(DimFilterUtils.STRING_SEPARATOR) - .appendByteArray(hasher.hash().asBytes()) - .build(); - } - return cacheKey; + return cacheKeySupplier.get(); } @Override @@ -190,6 +215,197 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt return inFilter; } + @Override + public Filter toFilter() + { + return this; + } + + @Nullable + @Override + public RangeSet getDimensionRangeSet(String dimension) + { + if (!Objects.equals(getDimension(), dimension) || getExtractionFn() != null) { + return null; + } + RangeSet retSet = TreeRangeSet.create(); + for (String value : values) { + String valueEquivalent = NullHandling.nullToEmptyIfNeeded(value); + if (valueEquivalent == null) { + // Case when SQL compatible null handling is enabled + // Range.singleton(null) is invalid, so use the fact that + // only null values are less than empty string. + retSet.add(Range.lessThan("")); + } else { + retSet.add(Range.singleton(valueEquivalent)); + } + } + return retSet; + } + + @Override + public Set getRequiredColumns() + { + return ImmutableSet.of(dimension); + } + + @Override + public T getBitmapResult(BitmapIndexSelector selector, BitmapResultFactory bitmapResultFactory) + { + if (extractionFn == null) { + final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); + return bitmapResultFactory.unionDimensionValueBitmaps(getBitmapIterable(values, bitmapIndex)); + } else { + return Filters.matchPredicate( + dimension, + selector, + bitmapResultFactory, + predicateFactory.makeStringPredicate() + ); + } + } + + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) + { + if (extractionFn == null) { + final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension); + return Filters.estimateSelectivity( + bitmapIndex, + IntIteratorUtils.toIntList(getBitmapIndexIterable(values, bitmapIndex).iterator()), + indexSelector.getNumRows() + ); + } else { + return Filters.estimateSelectivity( + dimension, + indexSelector, + predicateFactory.makeStringPredicate() + ); + } + } + + @Override + public ValueMatcher makeMatcher(ColumnSelectorFactory factory) + { + return Filters.makeValueMatcher(factory, dimension, predicateFactory); + } + + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + return DimensionHandlerUtils.makeVectorProcessor( + dimension, + VectorValueMatcherColumnProcessorFactory.instance(), + factory + ).makeMatcher(predicateFactory); + } + + @Override + public boolean canVectorizeMatcher() + { + return true; + } + + @Override + public boolean supportsRequiredColumnRewrite() + { + return true; + } + + @Override + public Filter rewriteRequiredColumns(Map columnRewrites) + { + String rewriteDimensionTo = columnRewrites.get(dimension); + if (rewriteDimensionTo == null) { + throw new IAE("Received a non-applicable rewrite: %s, filter's dimension: %s", columnRewrites, dimension); + } + + if (rewriteDimensionTo.equals(dimension)) { + return this; + } else { + return new InDimFilter( + rewriteDimensionTo, + values, + extractionFn, + filterTuning, + predicateFactory + ); + } + } + + @Override + public boolean supportsBitmapIndex(BitmapIndexSelector selector) + { + return selector.getBitmapIndex(dimension) != null; + } + + @Override + public boolean shouldUseBitmapIndex(BitmapIndexSelector selector) + { + return Filters.shouldUseBitmapIndex(this, selector, filterTuning); + } + + @Override + public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) + { + return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector); + } + + @Override + public String toString() + { + final DimFilterToStringBuilder builder = new DimFilterToStringBuilder(); + return builder.appendDimension(dimension, extractionFn) + .append(" IN (") + .append(Joiner.on(", ").join(Iterables.transform(values, StringUtils::nullToEmptyNonDruidDataString))) + .append(")") + .appendFilterTuning(filterTuning) + .build(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + InDimFilter that = (InDimFilter) o; + return values.equals(that.values) && + dimension.equals(that.dimension) && + Objects.equals(extractionFn, that.extractionFn) && + Objects.equals(filterTuning, that.filterTuning); + } + + @Override + public int hashCode() + { + return Objects.hash(values, dimension, extractionFn, filterTuning); + } + + private byte[] computeCacheKey() + { + final List sortedValues = new ArrayList<>(values); + sortedValues.sort(Comparator.nullsFirst(Ordering.natural())); + final Hasher hasher = Hashing.sha256().newHasher(); + for (String v : sortedValues) { + if (v == null) { + hasher.putInt(0); + } else { + hasher.putString(v, StandardCharsets.UTF_8); + } + } + return new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID) + .appendString(dimension) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey()) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendByteArray(hasher.hash().asBytes()) + .build(); + } + private InDimFilter optimizeLookup() { if (extractionFn instanceof LookupExtractionFn @@ -227,83 +443,32 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt return this; } - @Override - public Filter toFilter() + private static Iterable getBitmapIterable(final Set values, final BitmapIndex bitmapIndex) { - return new InFilter( - dimension, - values, - longPredicateSupplier, - floatPredicateSupplier, - doublePredicateSupplier, - extractionFn, - filterTuning - ); + return Filters.bitmapsFromIndexes(getBitmapIndexIterable(values, bitmapIndex), bitmapIndex); } - @Nullable - @Override - public RangeSet getDimensionRangeSet(String dimension) + private static IntIterable getBitmapIndexIterable(final Set values, final BitmapIndex bitmapIndex) { - if (!Objects.equals(getDimension(), dimension) || getExtractionFn() != null) { - return null; - } - RangeSet retSet = TreeRangeSet.create(); - for (String value : values) { - String valueEquivalent = NullHandling.nullToEmptyIfNeeded(value); - if (valueEquivalent == null) { - // Case when SQL compatible null handling is enabled - // Range.singleton(null) is invalid, so use the fact that - // only null values are less than empty string. - retSet.add(Range.lessThan("")); - } else { - retSet.add(Range.singleton(valueEquivalent)); + return () -> new IntIterator() + { + final Iterator iterator = values.iterator(); + + @Override + public boolean hasNext() + { + return iterator.hasNext(); } - } - return retSet; + + @Override + public int nextInt() + { + return bitmapIndex.getIndex(iterator.next()); + } + }; } - @Override - public Set getRequiredColumns() - { - return ImmutableSet.of(dimension); - } - - @Override - public String toString() - { - final DimFilterToStringBuilder builder = new DimFilterToStringBuilder(); - return builder.appendDimension(dimension, extractionFn) - .append(" IN (") - .append(Joiner.on(", ").join(Iterables.transform(values, StringUtils::nullToEmptyNonDruidDataString))) - .append(")") - .appendFilterTuning(filterTuning) - .build(); - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - InDimFilter that = (InDimFilter) o; - return values.equals(that.values) && - dimension.equals(that.dimension) && - Objects.equals(extractionFn, that.extractionFn) && - Objects.equals(filterTuning, that.filterTuning); - } - - @Override - public int hashCode() - { - return Objects.hash(values, dimension, extractionFn, filterTuning); - } - - private DruidLongPredicate createLongPredicate() + private static DruidLongPredicate createLongPredicate(final Set values) { LongArrayList longs = new LongArrayList(values.size()); for (String value : values) { @@ -323,17 +488,7 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt } } - // As the set of filtered values can be large, parsing them as longs should be done only if needed, and only once. - // Pass in a common long predicate supplier to all filters created by .toFilter(), so that - // we only compute the long hashset/array once per query. - // This supplier must be thread-safe, since this DimFilter will be accessed in the query runners. - private Supplier getLongPredicateSupplier() - { - Supplier longPredicate = this::createLongPredicate; - return Suppliers.memoize(longPredicate); - } - - private DruidFloatPredicate createFloatPredicate() + private static DruidFloatPredicate createFloatPredicate(final Set values) { IntArrayList floatBits = new IntArrayList(values.size()); for (String value : values) { @@ -355,13 +510,7 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt } } - private Supplier getFloatPredicateSupplier() - { - Supplier floatPredicate = this::createFloatPredicate; - return Suppliers.memoize(floatPredicate); - } - - private DruidDoublePredicate createDoublePredicate() + private static DruidDoublePredicate createDoublePredicate(final Set values) { LongArrayList doubleBits = new LongArrayList(values.size()); for (String value : values) { @@ -383,9 +532,89 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt } } - private Supplier getDoublePredicateSupplier() + @VisibleForTesting + public static class InFilterDruidPredicateFactory implements DruidPredicateFactory { - Supplier doublePredicate = this::createDoublePredicate; - return Suppliers.memoize(doublePredicate); + private final ExtractionFn extractionFn; + private final Set values; + private final Supplier longPredicateSupplier; + private final Supplier floatPredicateSupplier; + private final Supplier doublePredicateSupplier; + + InFilterDruidPredicateFactory( + final ExtractionFn extractionFn, + final Set values + ) + { + this.extractionFn = extractionFn; + this.values = values; + + // As the set of filtered values can be large, parsing them as numbers should be done only if needed, and + // only once. Pass in a common long predicate supplier to all filters created by .toFilter(), so that we only + // compute the long hashset/array once per query. This supplier must be thread-safe, since this DimFilter will be + // accessed in the query runners. + this.longPredicateSupplier = Suppliers.memoize(() -> createLongPredicate(values)); + this.floatPredicateSupplier = Suppliers.memoize(() -> createFloatPredicate(values)); + this.doublePredicateSupplier = Suppliers.memoize(() -> createDoublePredicate(values)); + } + + @Override + public Predicate makeStringPredicate() + { + if (extractionFn != null) { + return input -> values.contains(extractionFn.apply(input)); + } else { + return values::contains; + } + } + + @Override + public DruidLongPredicate makeLongPredicate() + { + if (extractionFn != null) { + return input -> values.contains(extractionFn.apply(input)); + } else { + return longPredicateSupplier.get(); + } + } + + @Override + public DruidFloatPredicate makeFloatPredicate() + { + if (extractionFn != null) { + return input -> values.contains(extractionFn.apply(input)); + } else { + return floatPredicateSupplier.get(); + } + } + + @Override + public DruidDoublePredicate makeDoublePredicate() + { + if (extractionFn != null) { + return input -> values.contains(extractionFn.apply(input)); + } + return input -> doublePredicateSupplier.get().applyDouble(input); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + InFilterDruidPredicateFactory that = (InFilterDruidPredicateFactory) o; + return Objects.equals(extractionFn, that.extractionFn) && + Objects.equals(values, that.values); + } + + @Override + public int hashCode() + { + return Objects.hash(extractionFn, values); + } } } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java index 399adb9eaaf..9cbecba671f 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java @@ -160,7 +160,7 @@ public class Filters * * @return an iterable of bitmaps */ - static Iterable bitmapsFromIndexes(final IntIterable indexes, final BitmapIndex bitmapIndex) + public static Iterable bitmapsFromIndexes(final IntIterable indexes, final BitmapIndex bitmapIndex) { // Do not use Iterables.transform() to avoid boxing/unboxing integers. return new Iterable() @@ -404,7 +404,7 @@ public class Filters }; } - static boolean supportsSelectivityEstimation( + public static boolean supportsSelectivityEstimation( Filter filter, String dimension, ColumnSelector columnSelector, diff --git a/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java deleted file mode 100644 index dfc4d711d4b..00000000000 --- a/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java +++ /dev/null @@ -1,336 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.filter; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Predicate; -import com.google.common.base.Supplier; -import com.google.common.collect.ImmutableSet; -import it.unimi.dsi.fastutil.ints.IntIterable; -import it.unimi.dsi.fastutil.ints.IntIterator; -import org.apache.druid.collections.bitmap.ImmutableBitmap; -import org.apache.druid.java.util.common.IAE; -import org.apache.druid.query.BitmapResultFactory; -import org.apache.druid.query.extraction.ExtractionFn; -import org.apache.druid.query.filter.BitmapIndexSelector; -import org.apache.druid.query.filter.DruidDoublePredicate; -import org.apache.druid.query.filter.DruidFloatPredicate; -import org.apache.druid.query.filter.DruidLongPredicate; -import org.apache.druid.query.filter.DruidPredicateFactory; -import org.apache.druid.query.filter.Filter; -import org.apache.druid.query.filter.FilterTuning; -import org.apache.druid.query.filter.ValueMatcher; -import org.apache.druid.query.filter.vector.VectorValueMatcher; -import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory; -import org.apache.druid.segment.ColumnSelector; -import org.apache.druid.segment.ColumnSelectorFactory; -import org.apache.druid.segment.DimensionHandlerUtils; -import org.apache.druid.segment.IntIteratorUtils; -import org.apache.druid.segment.column.BitmapIndex; -import org.apache.druid.segment.vector.VectorColumnSelectorFactory; - -import java.util.Iterator; -import java.util.Map; -import java.util.Objects; -import java.util.Set; - -/** - * The IN filter. - * For single-valued dimension, this filter returns true if the dimension value matches to one of the - * given {@link #values}. - * For multi-valued dimension, this filter returns true if one of the dimension values matches to one of the - * given {@link #values}. - * - * In SQL-compatible null handling mode, this filter is equivalent to {@code (dimension IN [values])} or - * {@code (dimension IN [non-null values] OR dimension IS NULL)} when {@link #values} contains nulls. - * In default null handling mode, this filter is equivalent to {@code (dimension IN [values])} or - * {@code (dimension IN [non-null values, ''])} when {@link #values} contains nulls. - */ -public class InFilter implements Filter -{ - private final String dimension; - private final Set values; - private final ExtractionFn extractionFn; - private final FilterTuning filterTuning; - private final Supplier longPredicateSupplier; - private final Supplier floatPredicateSupplier; - private final Supplier doublePredicateSupplier; - - public InFilter( - String dimension, - Set values, - Supplier longPredicateSupplier, - Supplier floatPredicateSupplier, - Supplier doublePredicateSupplier, - ExtractionFn extractionFn, - FilterTuning filterTuning - ) - { - this.dimension = dimension; - this.values = values; - this.extractionFn = extractionFn; - this.filterTuning = filterTuning; - this.longPredicateSupplier = longPredicateSupplier; - this.floatPredicateSupplier = floatPredicateSupplier; - this.doublePredicateSupplier = doublePredicateSupplier; - } - - @Override - public T getBitmapResult(BitmapIndexSelector selector, BitmapResultFactory bitmapResultFactory) - { - if (extractionFn == null) { - final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); - return bitmapResultFactory.unionDimensionValueBitmaps(getBitmapIterable(bitmapIndex)); - } else { - return Filters.matchPredicate( - dimension, - selector, - bitmapResultFactory, - getPredicateFactory().makeStringPredicate() - ); - } - } - - @Override - public double estimateSelectivity(BitmapIndexSelector indexSelector) - { - if (extractionFn == null) { - final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension); - return Filters.estimateSelectivity( - bitmapIndex, - IntIteratorUtils.toIntList(getBitmapIndexIterable(bitmapIndex).iterator()), - indexSelector.getNumRows() - ); - } else { - return Filters.estimateSelectivity( - dimension, - indexSelector, - getPredicateFactory().makeStringPredicate() - ); - } - } - - private Iterable getBitmapIterable(final BitmapIndex bitmapIndex) - { - return Filters.bitmapsFromIndexes(getBitmapIndexIterable(bitmapIndex), bitmapIndex); - } - - private IntIterable getBitmapIndexIterable(final BitmapIndex bitmapIndex) - { - return () -> new IntIterator() - { - final Iterator iterator = values.iterator(); - - @Override - public boolean hasNext() - { - return iterator.hasNext(); - } - - @Override - public int nextInt() - { - return bitmapIndex.getIndex(iterator.next()); - } - }; - } - - @Override - public ValueMatcher makeMatcher(ColumnSelectorFactory factory) - { - return Filters.makeValueMatcher(factory, dimension, getPredicateFactory()); - } - - @Override - public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) - { - return DimensionHandlerUtils.makeVectorProcessor( - dimension, - VectorValueMatcherColumnProcessorFactory.instance(), - factory - ).makeMatcher(getPredicateFactory()); - } - - @Override - public boolean canVectorizeMatcher() - { - return true; - } - - @Override - public Set getRequiredColumns() - { - return ImmutableSet.of(dimension); - } - - @Override - public boolean supportsRequiredColumnRewrite() - { - return true; - } - - @Override - public Filter rewriteRequiredColumns(Map columnRewrites) - { - String rewriteDimensionTo = columnRewrites.get(dimension); - if (rewriteDimensionTo == null) { - throw new IAE("Received a non-applicable rewrite: %s, filter's dimension: %s", columnRewrites, dimension); - } - - return new InFilter( - rewriteDimensionTo, - values, - longPredicateSupplier, - floatPredicateSupplier, - doublePredicateSupplier, - extractionFn, - filterTuning - ); - } - - @Override - public boolean supportsBitmapIndex(BitmapIndexSelector selector) - { - return selector.getBitmapIndex(dimension) != null; - } - - @Override - public boolean shouldUseBitmapIndex(BitmapIndexSelector selector) - { - return Filters.shouldUseBitmapIndex(this, selector, filterTuning); - } - - @Override - public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) - { - return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector); - } - - private DruidPredicateFactory getPredicateFactory() - { - return new InFilterDruidPredicateFactory(extractionFn, values, longPredicateSupplier, floatPredicateSupplier, doublePredicateSupplier); - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - InFilter inFilter = (InFilter) o; - return Objects.equals(dimension, inFilter.dimension) && - Objects.equals(values, inFilter.values) && - Objects.equals(extractionFn, inFilter.extractionFn) && - Objects.equals(filterTuning, inFilter.filterTuning); - } - - @Override - public int hashCode() - { - return Objects.hash(dimension, values, extractionFn, filterTuning); - } - - @VisibleForTesting - static class InFilterDruidPredicateFactory implements DruidPredicateFactory - { - private final ExtractionFn extractionFn; - private final Set values; - private final Supplier longPredicateSupplier; - private final Supplier floatPredicateSupplier; - private final Supplier doublePredicateSupplier; - - InFilterDruidPredicateFactory( - ExtractionFn extractionFn, - Set values, - Supplier longPredicateSupplier, - Supplier floatPredicateSupplier, - Supplier doublePredicateSupplier - ) - { - this.extractionFn = extractionFn; - this.values = values; - this.longPredicateSupplier = longPredicateSupplier; - this.floatPredicateSupplier = floatPredicateSupplier; - this.doublePredicateSupplier = doublePredicateSupplier; - } - - @Override - public Predicate makeStringPredicate() - { - if (extractionFn != null) { - return input -> values.contains(extractionFn.apply(input)); - } else { - return input -> values.contains(input); - } - } - - @Override - public DruidLongPredicate makeLongPredicate() - { - if (extractionFn != null) { - return input -> values.contains(extractionFn.apply(input)); - } else { - return longPredicateSupplier.get(); - } - } - - @Override - public DruidFloatPredicate makeFloatPredicate() - { - if (extractionFn != null) { - return input -> values.contains(extractionFn.apply(input)); - } else { - return floatPredicateSupplier.get(); - } - } - - @Override - public DruidDoublePredicate makeDoublePredicate() - { - if (extractionFn != null) { - return input -> values.contains(extractionFn.apply(input)); - } - return input -> doublePredicateSupplier.get().applyDouble(input); - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - InFilterDruidPredicateFactory that = (InFilterDruidPredicateFactory) o; - return Objects.equals(extractionFn, that.extractionFn) && - Objects.equals(values, that.values); - } - - @Override - public int hashCode() - { - return Objects.hash(extractionFn, values); - } - } -} diff --git a/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java index e23a2d4df2b..b5bef656773 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java @@ -224,8 +224,7 @@ public class HashJoinSegmentStorageAdapter implements StorageAdapter final JoinFilterPreAnalysisKey keyCached = joinFilterPreAnalysis.getKey(); if (!keyIn.equals(keyCached)) { - // It is a bug if this happens. We expect the comparison to be quick, because in the sane case, identical objects - // will be used and therefore deep equality checks will be unnecessary. + // It is a bug if this happens. The implied key and the cached key should always match. throw new ISE("Pre-analysis mismatch, cannot execute query"); } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java index 6a0db8463fe..aac0e6339b5 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java @@ -337,11 +337,13 @@ public class InFilterTest extends BaseFilterTest assertFilterMatches(toInFilterWithFn("dim1", lookupFn, "N/A"), ImmutableList.of()); assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "a"), ImmutableList.of()); assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "HELLO"), ImmutableList.of("a", "d")); - assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "HELLO", "BYE", "UNKNOWN"), - ImmutableList.of("a", "b", "c", "d", "e", "f")); + assertFilterMatches( + toInFilterWithFn("dim2", lookupFn, "HELLO", "BYE", "UNKNOWN"), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); final Map stringMap2 = ImmutableMap.of( - "a", "e" + "a", "e" ); LookupExtractor mapExtractor2 = new MapLookupExtractor(stringMap2, false); LookupExtractionFn lookupFn2 = new LookupExtractionFn(mapExtractor2, true, null, false, true); @@ -350,8 +352,8 @@ public class InFilterTest extends BaseFilterTest assertFilterMatches(toInFilterWithFn("dim0", lookupFn2, "a"), ImmutableList.of()); final Map stringMap3 = ImmutableMap.of( - "c", "500", - "100", "e" + "c", "500", + "100", "e" ); LookupExtractor mapExtractor3 = new MapLookupExtractor(stringMap3, false); LookupExtractionFn lookupFn3 = new LookupExtractionFn(mapExtractor3, false, null, false, true); @@ -364,8 +366,8 @@ public class InFilterTest extends BaseFilterTest @Test public void testRequiredColumnRewrite() { - InFilter filter = (InFilter) toInFilter("dim0", "a", "c").toFilter(); - InFilter filter2 = (InFilter) toInFilter("dim1", "a", "c").toFilter(); + InDimFilter filter = (InDimFilter) toInFilter("dim0", "a", "c").toFilter(); + InDimFilter filter2 = (InDimFilter) toInFilter("dim1", "a", "c").toFilter(); Assert.assertTrue(filter.supportsRequiredColumnRewrite()); Assert.assertTrue(filter2.supportsRequiredColumnRewrite()); @@ -381,17 +383,17 @@ public class InFilterTest extends BaseFilterTest @Test public void test_equals() { - EqualsVerifier.forClass(InFilter.class) + EqualsVerifier.forClass(InDimFilter.class) .usingGetClass() .withNonnullFields("dimension", "values") - .withIgnoredFields("longPredicateSupplier", "floatPredicateSupplier", "doublePredicateSupplier") + .withIgnoredFields("cacheKeySupplier", "predicateFactory", "cachedOptimizedFilter") .verify(); } @Test public void test_equals_forInFilterDruidPredicateFactory() { - EqualsVerifier.forClass(InFilter.InFilterDruidPredicateFactory.class) + EqualsVerifier.forClass(InDimFilter.InFilterDruidPredicateFactory.class) .usingGetClass() .withNonnullFields("values") .withIgnoredFields("longPredicateSupplier", "floatPredicateSupplier", "doublePredicateSupplier")