diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java index 25688ba87f8..8e13cf1cf50 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java @@ -26,6 +26,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; import com.google.common.base.Supplier; import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableSet; @@ -37,17 +38,29 @@ import com.google.common.collect.TreeRangeSet; import com.google.common.hash.Hasher; import com.google.common.hash.Hashing; import it.unimi.dsi.fastutil.ints.IntArrayList; +import it.unimi.dsi.fastutil.ints.IntIterable; +import it.unimi.dsi.fastutil.ints.IntIterator; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import it.unimi.dsi.fastutil.longs.LongArrayList; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; +import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.query.BitmapResultFactory; import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.extraction.ExtractionFn; +import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory; import org.apache.druid.query.lookup.LookupExtractionFn; import org.apache.druid.query.lookup.LookupExtractor; +import org.apache.druid.segment.ColumnSelector; +import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.DimensionHandlerUtils; -import org.apache.druid.segment.filter.InFilter; +import org.apache.druid.segment.IntIteratorUtils; +import org.apache.druid.segment.column.BitmapIndex; +import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; import java.nio.charset.StandardCharsets; @@ -56,12 +69,14 @@ import java.util.Arrays; import java.util.Collection; import java.util.Comparator; import java.util.HashSet; +import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; -public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilter +public class InDimFilter extends AbstractOptimizableDimFilter implements Filter { // determined through benchmark that binary search on long[] is faster than HashSet until ~16 elements // Hashing threshold is not applied to String for now, String still uses ImmutableSortedSet @@ -74,12 +89,10 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt private final ExtractionFn extractionFn; @Nullable private final FilterTuning filterTuning; - private final Supplier longPredicateSupplier; - private final Supplier floatPredicateSupplier; - private final Supplier doublePredicateSupplier; + private final DruidPredicateFactory predicateFactory; @JsonIgnore - private byte[] cacheKey; + private final Supplier cacheKeySupplier; @JsonCreator public InDimFilter( @@ -91,8 +104,42 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt @JsonProperty("filterTuning") @Nullable FilterTuning filterTuning ) { - Preconditions.checkNotNull(dimension, "dimension can not be null"); - Preconditions.checkArgument(values != null, "values can not be null"); + this( + dimension, + values, + extractionFn, + filterTuning, + null + ); + } + + /** + * This constructor should be called only in unit tests since accepting a Collection makes copying more likely. + */ + @VisibleForTesting + public InDimFilter(String dimension, Collection values, @Nullable ExtractionFn extractionFn) + { + this( + dimension, + values instanceof Set ? (Set) values : new HashSet<>(values), + extractionFn, + null, + null + ); + } + + /** + * Internal constructor. + */ + private InDimFilter( + final String dimension, + final Set values, + @Nullable final ExtractionFn extractionFn, + @Nullable final FilterTuning filterTuning, + @Nullable final DruidPredicateFactory predicateFactory + ) + { + Preconditions.checkNotNull(values, "values cannot be null"); // The values set can be huge. Try to avoid copying the set if possible. // Note that we may still need to copy values to a list for caching. See getCacheKey(). @@ -101,21 +148,18 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt } else { this.values = values.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toSet()); } - this.dimension = dimension; + + this.dimension = Preconditions.checkNotNull(dimension, "dimension cannot be null"); this.extractionFn = extractionFn; this.filterTuning = filterTuning; - this.longPredicateSupplier = getLongPredicateSupplier(); - this.floatPredicateSupplier = getFloatPredicateSupplier(); - this.doublePredicateSupplier = getDoublePredicateSupplier(); - } - /** - * This constructor should be called only in unit tests since it creates a new hash set wrapping the given values. - */ - @VisibleForTesting - public InDimFilter(String dimension, Collection values, @Nullable ExtractionFn extractionFn) - { - this(dimension, new HashSet<>(values), extractionFn, null); + if (predicateFactory != null) { + this.predicateFactory = predicateFactory; + } else { + this.predicateFactory = new InFilterDruidPredicateFactory(extractionFn, this.values); + } + + this.cacheKeySupplier = Suppliers.memoize(this::computeCacheKey); } @JsonProperty @@ -149,26 +193,7 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt @Override public byte[] getCacheKey() { - if (cacheKey == null) { - final List sortedValues = new ArrayList<>(values); - sortedValues.sort(Comparator.nullsFirst(Ordering.natural())); - final Hasher hasher = Hashing.sha256().newHasher(); - for (String v : sortedValues) { - if (v == null) { - hasher.putInt(0); - } else { - hasher.putString(v, StandardCharsets.UTF_8); - } - } - cacheKey = new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID) - .appendString(dimension) - .appendByte(DimFilterUtils.STRING_SEPARATOR) - .appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey()) - .appendByte(DimFilterUtils.STRING_SEPARATOR) - .appendByteArray(hasher.hash().asBytes()) - .build(); - } - return cacheKey; + return cacheKeySupplier.get(); } @Override @@ -190,6 +215,197 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt return inFilter; } + @Override + public Filter toFilter() + { + return this; + } + + @Nullable + @Override + public RangeSet getDimensionRangeSet(String dimension) + { + if (!Objects.equals(getDimension(), dimension) || getExtractionFn() != null) { + return null; + } + RangeSet retSet = TreeRangeSet.create(); + for (String value : values) { + String valueEquivalent = NullHandling.nullToEmptyIfNeeded(value); + if (valueEquivalent == null) { + // Case when SQL compatible null handling is enabled + // Range.singleton(null) is invalid, so use the fact that + // only null values are less than empty string. + retSet.add(Range.lessThan("")); + } else { + retSet.add(Range.singleton(valueEquivalent)); + } + } + return retSet; + } + + @Override + public Set getRequiredColumns() + { + return ImmutableSet.of(dimension); + } + + @Override + public T getBitmapResult(BitmapIndexSelector selector, BitmapResultFactory bitmapResultFactory) + { + if (extractionFn == null) { + final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); + return bitmapResultFactory.unionDimensionValueBitmaps(getBitmapIterable(values, bitmapIndex)); + } else { + return Filters.matchPredicate( + dimension, + selector, + bitmapResultFactory, + predicateFactory.makeStringPredicate() + ); + } + } + + @Override + public double estimateSelectivity(BitmapIndexSelector indexSelector) + { + if (extractionFn == null) { + final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension); + return Filters.estimateSelectivity( + bitmapIndex, + IntIteratorUtils.toIntList(getBitmapIndexIterable(values, bitmapIndex).iterator()), + indexSelector.getNumRows() + ); + } else { + return Filters.estimateSelectivity( + dimension, + indexSelector, + predicateFactory.makeStringPredicate() + ); + } + } + + @Override + public ValueMatcher makeMatcher(ColumnSelectorFactory factory) + { + return Filters.makeValueMatcher(factory, dimension, predicateFactory); + } + + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + return DimensionHandlerUtils.makeVectorProcessor( + dimension, + VectorValueMatcherColumnProcessorFactory.instance(), + factory + ).makeMatcher(predicateFactory); + } + + @Override + public boolean canVectorizeMatcher() + { + return true; + } + + @Override + public boolean supportsRequiredColumnRewrite() + { + return true; + } + + @Override + public Filter rewriteRequiredColumns(Map columnRewrites) + { + String rewriteDimensionTo = columnRewrites.get(dimension); + if (rewriteDimensionTo == null) { + throw new IAE("Received a non-applicable rewrite: %s, filter's dimension: %s", columnRewrites, dimension); + } + + if (rewriteDimensionTo.equals(dimension)) { + return this; + } else { + return new InDimFilter( + rewriteDimensionTo, + values, + extractionFn, + filterTuning, + predicateFactory + ); + } + } + + @Override + public boolean supportsBitmapIndex(BitmapIndexSelector selector) + { + return selector.getBitmapIndex(dimension) != null; + } + + @Override + public boolean shouldUseBitmapIndex(BitmapIndexSelector selector) + { + return Filters.shouldUseBitmapIndex(this, selector, filterTuning); + } + + @Override + public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) + { + return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector); + } + + @Override + public String toString() + { + final DimFilterToStringBuilder builder = new DimFilterToStringBuilder(); + return builder.appendDimension(dimension, extractionFn) + .append(" IN (") + .append(Joiner.on(", ").join(Iterables.transform(values, StringUtils::nullToEmptyNonDruidDataString))) + .append(")") + .appendFilterTuning(filterTuning) + .build(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + InDimFilter that = (InDimFilter) o; + return values.equals(that.values) && + dimension.equals(that.dimension) && + Objects.equals(extractionFn, that.extractionFn) && + Objects.equals(filterTuning, that.filterTuning); + } + + @Override + public int hashCode() + { + return Objects.hash(values, dimension, extractionFn, filterTuning); + } + + private byte[] computeCacheKey() + { + final List sortedValues = new ArrayList<>(values); + sortedValues.sort(Comparator.nullsFirst(Ordering.natural())); + final Hasher hasher = Hashing.sha256().newHasher(); + for (String v : sortedValues) { + if (v == null) { + hasher.putInt(0); + } else { + hasher.putString(v, StandardCharsets.UTF_8); + } + } + return new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID) + .appendString(dimension) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey()) + .appendByte(DimFilterUtils.STRING_SEPARATOR) + .appendByteArray(hasher.hash().asBytes()) + .build(); + } + private InDimFilter optimizeLookup() { if (extractionFn instanceof LookupExtractionFn @@ -227,83 +443,32 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt return this; } - @Override - public Filter toFilter() + private static Iterable getBitmapIterable(final Set values, final BitmapIndex bitmapIndex) { - return new InFilter( - dimension, - values, - longPredicateSupplier, - floatPredicateSupplier, - doublePredicateSupplier, - extractionFn, - filterTuning - ); + return Filters.bitmapsFromIndexes(getBitmapIndexIterable(values, bitmapIndex), bitmapIndex); } - @Nullable - @Override - public RangeSet getDimensionRangeSet(String dimension) + private static IntIterable getBitmapIndexIterable(final Set values, final BitmapIndex bitmapIndex) { - if (!Objects.equals(getDimension(), dimension) || getExtractionFn() != null) { - return null; - } - RangeSet retSet = TreeRangeSet.create(); - for (String value : values) { - String valueEquivalent = NullHandling.nullToEmptyIfNeeded(value); - if (valueEquivalent == null) { - // Case when SQL compatible null handling is enabled - // Range.singleton(null) is invalid, so use the fact that - // only null values are less than empty string. - retSet.add(Range.lessThan("")); - } else { - retSet.add(Range.singleton(valueEquivalent)); + return () -> new IntIterator() + { + final Iterator iterator = values.iterator(); + + @Override + public boolean hasNext() + { + return iterator.hasNext(); } - } - return retSet; + + @Override + public int nextInt() + { + return bitmapIndex.getIndex(iterator.next()); + } + }; } - @Override - public Set getRequiredColumns() - { - return ImmutableSet.of(dimension); - } - - @Override - public String toString() - { - final DimFilterToStringBuilder builder = new DimFilterToStringBuilder(); - return builder.appendDimension(dimension, extractionFn) - .append(" IN (") - .append(Joiner.on(", ").join(Iterables.transform(values, StringUtils::nullToEmptyNonDruidDataString))) - .append(")") - .appendFilterTuning(filterTuning) - .build(); - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - InDimFilter that = (InDimFilter) o; - return values.equals(that.values) && - dimension.equals(that.dimension) && - Objects.equals(extractionFn, that.extractionFn) && - Objects.equals(filterTuning, that.filterTuning); - } - - @Override - public int hashCode() - { - return Objects.hash(values, dimension, extractionFn, filterTuning); - } - - private DruidLongPredicate createLongPredicate() + private static DruidLongPredicate createLongPredicate(final Set values) { LongArrayList longs = new LongArrayList(values.size()); for (String value : values) { @@ -323,17 +488,7 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt } } - // As the set of filtered values can be large, parsing them as longs should be done only if needed, and only once. - // Pass in a common long predicate supplier to all filters created by .toFilter(), so that - // we only compute the long hashset/array once per query. - // This supplier must be thread-safe, since this DimFilter will be accessed in the query runners. - private Supplier getLongPredicateSupplier() - { - Supplier longPredicate = this::createLongPredicate; - return Suppliers.memoize(longPredicate); - } - - private DruidFloatPredicate createFloatPredicate() + private static DruidFloatPredicate createFloatPredicate(final Set values) { IntArrayList floatBits = new IntArrayList(values.size()); for (String value : values) { @@ -355,13 +510,7 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt } } - private Supplier getFloatPredicateSupplier() - { - Supplier floatPredicate = this::createFloatPredicate; - return Suppliers.memoize(floatPredicate); - } - - private DruidDoublePredicate createDoublePredicate() + private static DruidDoublePredicate createDoublePredicate(final Set values) { LongArrayList doubleBits = new LongArrayList(values.size()); for (String value : values) { @@ -383,9 +532,89 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt } } - private Supplier getDoublePredicateSupplier() + @VisibleForTesting + public static class InFilterDruidPredicateFactory implements DruidPredicateFactory { - Supplier doublePredicate = this::createDoublePredicate; - return Suppliers.memoize(doublePredicate); + private final ExtractionFn extractionFn; + private final Set values; + private final Supplier longPredicateSupplier; + private final Supplier floatPredicateSupplier; + private final Supplier doublePredicateSupplier; + + InFilterDruidPredicateFactory( + final ExtractionFn extractionFn, + final Set values + ) + { + this.extractionFn = extractionFn; + this.values = values; + + // As the set of filtered values can be large, parsing them as numbers should be done only if needed, and + // only once. Pass in a common long predicate supplier to all filters created by .toFilter(), so that we only + // compute the long hashset/array once per query. This supplier must be thread-safe, since this DimFilter will be + // accessed in the query runners. + this.longPredicateSupplier = Suppliers.memoize(() -> createLongPredicate(values)); + this.floatPredicateSupplier = Suppliers.memoize(() -> createFloatPredicate(values)); + this.doublePredicateSupplier = Suppliers.memoize(() -> createDoublePredicate(values)); + } + + @Override + public Predicate makeStringPredicate() + { + if (extractionFn != null) { + return input -> values.contains(extractionFn.apply(input)); + } else { + return values::contains; + } + } + + @Override + public DruidLongPredicate makeLongPredicate() + { + if (extractionFn != null) { + return input -> values.contains(extractionFn.apply(input)); + } else { + return longPredicateSupplier.get(); + } + } + + @Override + public DruidFloatPredicate makeFloatPredicate() + { + if (extractionFn != null) { + return input -> values.contains(extractionFn.apply(input)); + } else { + return floatPredicateSupplier.get(); + } + } + + @Override + public DruidDoublePredicate makeDoublePredicate() + { + if (extractionFn != null) { + return input -> values.contains(extractionFn.apply(input)); + } + return input -> doublePredicateSupplier.get().applyDouble(input); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + InFilterDruidPredicateFactory that = (InFilterDruidPredicateFactory) o; + return Objects.equals(extractionFn, that.extractionFn) && + Objects.equals(values, that.values); + } + + @Override + public int hashCode() + { + return Objects.hash(extractionFn, values); + } } } diff --git a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java index 399adb9eaaf..9cbecba671f 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java @@ -160,7 +160,7 @@ public class Filters * * @return an iterable of bitmaps */ - static Iterable bitmapsFromIndexes(final IntIterable indexes, final BitmapIndex bitmapIndex) + public static Iterable bitmapsFromIndexes(final IntIterable indexes, final BitmapIndex bitmapIndex) { // Do not use Iterables.transform() to avoid boxing/unboxing integers. return new Iterable() @@ -404,7 +404,7 @@ public class Filters }; } - static boolean supportsSelectivityEstimation( + public static boolean supportsSelectivityEstimation( Filter filter, String dimension, ColumnSelector columnSelector, diff --git a/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java deleted file mode 100644 index dfc4d711d4b..00000000000 --- a/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java +++ /dev/null @@ -1,336 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.filter; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Predicate; -import com.google.common.base.Supplier; -import com.google.common.collect.ImmutableSet; -import it.unimi.dsi.fastutil.ints.IntIterable; -import it.unimi.dsi.fastutil.ints.IntIterator; -import org.apache.druid.collections.bitmap.ImmutableBitmap; -import org.apache.druid.java.util.common.IAE; -import org.apache.druid.query.BitmapResultFactory; -import org.apache.druid.query.extraction.ExtractionFn; -import org.apache.druid.query.filter.BitmapIndexSelector; -import org.apache.druid.query.filter.DruidDoublePredicate; -import org.apache.druid.query.filter.DruidFloatPredicate; -import org.apache.druid.query.filter.DruidLongPredicate; -import org.apache.druid.query.filter.DruidPredicateFactory; -import org.apache.druid.query.filter.Filter; -import org.apache.druid.query.filter.FilterTuning; -import org.apache.druid.query.filter.ValueMatcher; -import org.apache.druid.query.filter.vector.VectorValueMatcher; -import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory; -import org.apache.druid.segment.ColumnSelector; -import org.apache.druid.segment.ColumnSelectorFactory; -import org.apache.druid.segment.DimensionHandlerUtils; -import org.apache.druid.segment.IntIteratorUtils; -import org.apache.druid.segment.column.BitmapIndex; -import org.apache.druid.segment.vector.VectorColumnSelectorFactory; - -import java.util.Iterator; -import java.util.Map; -import java.util.Objects; -import java.util.Set; - -/** - * The IN filter. - * For single-valued dimension, this filter returns true if the dimension value matches to one of the - * given {@link #values}. - * For multi-valued dimension, this filter returns true if one of the dimension values matches to one of the - * given {@link #values}. - * - * In SQL-compatible null handling mode, this filter is equivalent to {@code (dimension IN [values])} or - * {@code (dimension IN [non-null values] OR dimension IS NULL)} when {@link #values} contains nulls. - * In default null handling mode, this filter is equivalent to {@code (dimension IN [values])} or - * {@code (dimension IN [non-null values, ''])} when {@link #values} contains nulls. - */ -public class InFilter implements Filter -{ - private final String dimension; - private final Set values; - private final ExtractionFn extractionFn; - private final FilterTuning filterTuning; - private final Supplier longPredicateSupplier; - private final Supplier floatPredicateSupplier; - private final Supplier doublePredicateSupplier; - - public InFilter( - String dimension, - Set values, - Supplier longPredicateSupplier, - Supplier floatPredicateSupplier, - Supplier doublePredicateSupplier, - ExtractionFn extractionFn, - FilterTuning filterTuning - ) - { - this.dimension = dimension; - this.values = values; - this.extractionFn = extractionFn; - this.filterTuning = filterTuning; - this.longPredicateSupplier = longPredicateSupplier; - this.floatPredicateSupplier = floatPredicateSupplier; - this.doublePredicateSupplier = doublePredicateSupplier; - } - - @Override - public T getBitmapResult(BitmapIndexSelector selector, BitmapResultFactory bitmapResultFactory) - { - if (extractionFn == null) { - final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension); - return bitmapResultFactory.unionDimensionValueBitmaps(getBitmapIterable(bitmapIndex)); - } else { - return Filters.matchPredicate( - dimension, - selector, - bitmapResultFactory, - getPredicateFactory().makeStringPredicate() - ); - } - } - - @Override - public double estimateSelectivity(BitmapIndexSelector indexSelector) - { - if (extractionFn == null) { - final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension); - return Filters.estimateSelectivity( - bitmapIndex, - IntIteratorUtils.toIntList(getBitmapIndexIterable(bitmapIndex).iterator()), - indexSelector.getNumRows() - ); - } else { - return Filters.estimateSelectivity( - dimension, - indexSelector, - getPredicateFactory().makeStringPredicate() - ); - } - } - - private Iterable getBitmapIterable(final BitmapIndex bitmapIndex) - { - return Filters.bitmapsFromIndexes(getBitmapIndexIterable(bitmapIndex), bitmapIndex); - } - - private IntIterable getBitmapIndexIterable(final BitmapIndex bitmapIndex) - { - return () -> new IntIterator() - { - final Iterator iterator = values.iterator(); - - @Override - public boolean hasNext() - { - return iterator.hasNext(); - } - - @Override - public int nextInt() - { - return bitmapIndex.getIndex(iterator.next()); - } - }; - } - - @Override - public ValueMatcher makeMatcher(ColumnSelectorFactory factory) - { - return Filters.makeValueMatcher(factory, dimension, getPredicateFactory()); - } - - @Override - public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) - { - return DimensionHandlerUtils.makeVectorProcessor( - dimension, - VectorValueMatcherColumnProcessorFactory.instance(), - factory - ).makeMatcher(getPredicateFactory()); - } - - @Override - public boolean canVectorizeMatcher() - { - return true; - } - - @Override - public Set getRequiredColumns() - { - return ImmutableSet.of(dimension); - } - - @Override - public boolean supportsRequiredColumnRewrite() - { - return true; - } - - @Override - public Filter rewriteRequiredColumns(Map columnRewrites) - { - String rewriteDimensionTo = columnRewrites.get(dimension); - if (rewriteDimensionTo == null) { - throw new IAE("Received a non-applicable rewrite: %s, filter's dimension: %s", columnRewrites, dimension); - } - - return new InFilter( - rewriteDimensionTo, - values, - longPredicateSupplier, - floatPredicateSupplier, - doublePredicateSupplier, - extractionFn, - filterTuning - ); - } - - @Override - public boolean supportsBitmapIndex(BitmapIndexSelector selector) - { - return selector.getBitmapIndex(dimension) != null; - } - - @Override - public boolean shouldUseBitmapIndex(BitmapIndexSelector selector) - { - return Filters.shouldUseBitmapIndex(this, selector, filterTuning); - } - - @Override - public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, BitmapIndexSelector indexSelector) - { - return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector); - } - - private DruidPredicateFactory getPredicateFactory() - { - return new InFilterDruidPredicateFactory(extractionFn, values, longPredicateSupplier, floatPredicateSupplier, doublePredicateSupplier); - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - InFilter inFilter = (InFilter) o; - return Objects.equals(dimension, inFilter.dimension) && - Objects.equals(values, inFilter.values) && - Objects.equals(extractionFn, inFilter.extractionFn) && - Objects.equals(filterTuning, inFilter.filterTuning); - } - - @Override - public int hashCode() - { - return Objects.hash(dimension, values, extractionFn, filterTuning); - } - - @VisibleForTesting - static class InFilterDruidPredicateFactory implements DruidPredicateFactory - { - private final ExtractionFn extractionFn; - private final Set values; - private final Supplier longPredicateSupplier; - private final Supplier floatPredicateSupplier; - private final Supplier doublePredicateSupplier; - - InFilterDruidPredicateFactory( - ExtractionFn extractionFn, - Set values, - Supplier longPredicateSupplier, - Supplier floatPredicateSupplier, - Supplier doublePredicateSupplier - ) - { - this.extractionFn = extractionFn; - this.values = values; - this.longPredicateSupplier = longPredicateSupplier; - this.floatPredicateSupplier = floatPredicateSupplier; - this.doublePredicateSupplier = doublePredicateSupplier; - } - - @Override - public Predicate makeStringPredicate() - { - if (extractionFn != null) { - return input -> values.contains(extractionFn.apply(input)); - } else { - return input -> values.contains(input); - } - } - - @Override - public DruidLongPredicate makeLongPredicate() - { - if (extractionFn != null) { - return input -> values.contains(extractionFn.apply(input)); - } else { - return longPredicateSupplier.get(); - } - } - - @Override - public DruidFloatPredicate makeFloatPredicate() - { - if (extractionFn != null) { - return input -> values.contains(extractionFn.apply(input)); - } else { - return floatPredicateSupplier.get(); - } - } - - @Override - public DruidDoublePredicate makeDoublePredicate() - { - if (extractionFn != null) { - return input -> values.contains(extractionFn.apply(input)); - } - return input -> doublePredicateSupplier.get().applyDouble(input); - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - InFilterDruidPredicateFactory that = (InFilterDruidPredicateFactory) o; - return Objects.equals(extractionFn, that.extractionFn) && - Objects.equals(values, that.values); - } - - @Override - public int hashCode() - { - return Objects.hash(extractionFn, values); - } - } -} diff --git a/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java index e23a2d4df2b..b5bef656773 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java @@ -224,8 +224,7 @@ public class HashJoinSegmentStorageAdapter implements StorageAdapter final JoinFilterPreAnalysisKey keyCached = joinFilterPreAnalysis.getKey(); if (!keyIn.equals(keyCached)) { - // It is a bug if this happens. We expect the comparison to be quick, because in the sane case, identical objects - // will be used and therefore deep equality checks will be unnecessary. + // It is a bug if this happens. The implied key and the cached key should always match. throw new ISE("Pre-analysis mismatch, cannot execute query"); } diff --git a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java index 6a0db8463fe..aac0e6339b5 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java @@ -337,11 +337,13 @@ public class InFilterTest extends BaseFilterTest assertFilterMatches(toInFilterWithFn("dim1", lookupFn, "N/A"), ImmutableList.of()); assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "a"), ImmutableList.of()); assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "HELLO"), ImmutableList.of("a", "d")); - assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "HELLO", "BYE", "UNKNOWN"), - ImmutableList.of("a", "b", "c", "d", "e", "f")); + assertFilterMatches( + toInFilterWithFn("dim2", lookupFn, "HELLO", "BYE", "UNKNOWN"), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); final Map stringMap2 = ImmutableMap.of( - "a", "e" + "a", "e" ); LookupExtractor mapExtractor2 = new MapLookupExtractor(stringMap2, false); LookupExtractionFn lookupFn2 = new LookupExtractionFn(mapExtractor2, true, null, false, true); @@ -350,8 +352,8 @@ public class InFilterTest extends BaseFilterTest assertFilterMatches(toInFilterWithFn("dim0", lookupFn2, "a"), ImmutableList.of()); final Map stringMap3 = ImmutableMap.of( - "c", "500", - "100", "e" + "c", "500", + "100", "e" ); LookupExtractor mapExtractor3 = new MapLookupExtractor(stringMap3, false); LookupExtractionFn lookupFn3 = new LookupExtractionFn(mapExtractor3, false, null, false, true); @@ -364,8 +366,8 @@ public class InFilterTest extends BaseFilterTest @Test public void testRequiredColumnRewrite() { - InFilter filter = (InFilter) toInFilter("dim0", "a", "c").toFilter(); - InFilter filter2 = (InFilter) toInFilter("dim1", "a", "c").toFilter(); + InDimFilter filter = (InDimFilter) toInFilter("dim0", "a", "c").toFilter(); + InDimFilter filter2 = (InDimFilter) toInFilter("dim1", "a", "c").toFilter(); Assert.assertTrue(filter.supportsRequiredColumnRewrite()); Assert.assertTrue(filter2.supportsRequiredColumnRewrite()); @@ -381,17 +383,17 @@ public class InFilterTest extends BaseFilterTest @Test public void test_equals() { - EqualsVerifier.forClass(InFilter.class) + EqualsVerifier.forClass(InDimFilter.class) .usingGetClass() .withNonnullFields("dimension", "values") - .withIgnoredFields("longPredicateSupplier", "floatPredicateSupplier", "doublePredicateSupplier") + .withIgnoredFields("cacheKeySupplier", "predicateFactory", "cachedOptimizedFilter") .verify(); } @Test public void test_equals_forInFilterDruidPredicateFactory() { - EqualsVerifier.forClass(InFilter.InFilterDruidPredicateFactory.class) + EqualsVerifier.forClass(InDimFilter.InFilterDruidPredicateFactory.class) .usingGetClass() .withNonnullFields("values") .withIgnoredFields("longPredicateSupplier", "floatPredicateSupplier", "doublePredicateSupplier")