Combine InDimFilter, InFilter. (#10119)

* Combine InDimFilter, InFilter.

There are two motivations:

1. Ensure that when HashJoinSegmentStorageAdapter compares its Filter
   to the original one, and it is an "in" type, the comparison is by
   reference and does not need to check deep equality. This is useful
   when the "in" filter is very large.
2. Simplify things. (There isn't a great reason for the DimFilter and
   Filter logic to be separate, and combining them reduces some
   duplication.)

* Fix test.
This commit is contained in:
Gian Merlino 2020-08-06 18:34:21 -07:00 committed by GitHub
parent b6aaf59e8c
commit 170031744e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 375 additions and 481 deletions

View File

@ -26,6 +26,7 @@ import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner; import com.google.common.base.Joiner;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.base.Supplier; import com.google.common.base.Supplier;
import com.google.common.base.Suppliers; import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet;
@ -37,17 +38,29 @@ import com.google.common.collect.TreeRangeSet;
import com.google.common.hash.Hasher; import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing; import com.google.common.hash.Hashing;
import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntIterable;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongArrayList; import it.unimi.dsi.fastutil.longs.LongArrayList;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet; import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.BitmapResultFactory;
import org.apache.druid.query.cache.CacheKeyBuilder; import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.query.extraction.ExtractionFn;
import org.apache.druid.query.filter.vector.VectorValueMatcher;
import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory;
import org.apache.druid.query.lookup.LookupExtractionFn; import org.apache.druid.query.lookup.LookupExtractionFn;
import org.apache.druid.query.lookup.LookupExtractor; import org.apache.druid.query.lookup.LookupExtractor;
import org.apache.druid.segment.ColumnSelector;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.DimensionHandlerUtils;
import org.apache.druid.segment.filter.InFilter; import org.apache.druid.segment.IntIteratorUtils;
import org.apache.druid.segment.column.BitmapIndex;
import org.apache.druid.segment.filter.Filters;
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
@ -56,12 +69,14 @@ import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilter public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
{ {
// determined through benchmark that binary search on long[] is faster than HashSet until ~16 elements // determined through benchmark that binary search on long[] is faster than HashSet until ~16 elements
// Hashing threshold is not applied to String for now, String still uses ImmutableSortedSet // Hashing threshold is not applied to String for now, String still uses ImmutableSortedSet
@ -74,12 +89,10 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt
private final ExtractionFn extractionFn; private final ExtractionFn extractionFn;
@Nullable @Nullable
private final FilterTuning filterTuning; private final FilterTuning filterTuning;
private final Supplier<DruidLongPredicate> longPredicateSupplier; private final DruidPredicateFactory predicateFactory;
private final Supplier<DruidFloatPredicate> floatPredicateSupplier;
private final Supplier<DruidDoublePredicate> doublePredicateSupplier;
@JsonIgnore @JsonIgnore
private byte[] cacheKey; private final Supplier<byte[]> cacheKeySupplier;
@JsonCreator @JsonCreator
public InDimFilter( public InDimFilter(
@ -91,8 +104,42 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt
@JsonProperty("filterTuning") @Nullable FilterTuning filterTuning @JsonProperty("filterTuning") @Nullable FilterTuning filterTuning
) )
{ {
Preconditions.checkNotNull(dimension, "dimension can not be null"); this(
Preconditions.checkArgument(values != null, "values can not be null"); dimension,
values,
extractionFn,
filterTuning,
null
);
}
/**
* This constructor should be called only in unit tests since accepting a Collection makes copying more likely.
*/
@VisibleForTesting
public InDimFilter(String dimension, Collection<String> values, @Nullable ExtractionFn extractionFn)
{
this(
dimension,
values instanceof Set ? (Set<String>) values : new HashSet<>(values),
extractionFn,
null,
null
);
}
/**
* Internal constructor.
*/
private InDimFilter(
final String dimension,
final Set<String> values,
@Nullable final ExtractionFn extractionFn,
@Nullable final FilterTuning filterTuning,
@Nullable final DruidPredicateFactory predicateFactory
)
{
Preconditions.checkNotNull(values, "values cannot be null");
// The values set can be huge. Try to avoid copying the set if possible. // The values set can be huge. Try to avoid copying the set if possible.
// Note that we may still need to copy values to a list for caching. See getCacheKey(). // Note that we may still need to copy values to a list for caching. See getCacheKey().
@ -101,21 +148,18 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt
} else { } else {
this.values = values.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toSet()); this.values = values.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toSet());
} }
this.dimension = dimension;
this.dimension = Preconditions.checkNotNull(dimension, "dimension cannot be null");
this.extractionFn = extractionFn; this.extractionFn = extractionFn;
this.filterTuning = filterTuning; this.filterTuning = filterTuning;
this.longPredicateSupplier = getLongPredicateSupplier();
this.floatPredicateSupplier = getFloatPredicateSupplier(); if (predicateFactory != null) {
this.doublePredicateSupplier = getDoublePredicateSupplier(); this.predicateFactory = predicateFactory;
} else {
this.predicateFactory = new InFilterDruidPredicateFactory(extractionFn, this.values);
} }
/** this.cacheKeySupplier = Suppliers.memoize(this::computeCacheKey);
* This constructor should be called only in unit tests since it creates a new hash set wrapping the given values.
*/
@VisibleForTesting
public InDimFilter(String dimension, Collection<String> values, @Nullable ExtractionFn extractionFn)
{
this(dimension, new HashSet<>(values), extractionFn, null);
} }
@JsonProperty @JsonProperty
@ -149,26 +193,7 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt
@Override @Override
public byte[] getCacheKey() public byte[] getCacheKey()
{ {
if (cacheKey == null) { return cacheKeySupplier.get();
final List<String> sortedValues = new ArrayList<>(values);
sortedValues.sort(Comparator.nullsFirst(Ordering.natural()));
final Hasher hasher = Hashing.sha256().newHasher();
for (String v : sortedValues) {
if (v == null) {
hasher.putInt(0);
} else {
hasher.putString(v, StandardCharsets.UTF_8);
}
}
cacheKey = new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID)
.appendString(dimension)
.appendByte(DimFilterUtils.STRING_SEPARATOR)
.appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey())
.appendByte(DimFilterUtils.STRING_SEPARATOR)
.appendByteArray(hasher.hash().asBytes())
.build();
}
return cacheKey;
} }
@Override @Override
@ -190,6 +215,197 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt
return inFilter; return inFilter;
} }
@Override
public Filter toFilter()
{
return this;
}
@Nullable
@Override
public RangeSet<String> getDimensionRangeSet(String dimension)
{
if (!Objects.equals(getDimension(), dimension) || getExtractionFn() != null) {
return null;
}
RangeSet<String> retSet = TreeRangeSet.create();
for (String value : values) {
String valueEquivalent = NullHandling.nullToEmptyIfNeeded(value);
if (valueEquivalent == null) {
// Case when SQL compatible null handling is enabled
// Range.singleton(null) is invalid, so use the fact that
// only null values are less than empty string.
retSet.add(Range.lessThan(""));
} else {
retSet.add(Range.singleton(valueEquivalent));
}
}
return retSet;
}
@Override
public Set<String> getRequiredColumns()
{
return ImmutableSet.of(dimension);
}
@Override
public <T> T getBitmapResult(BitmapIndexSelector selector, BitmapResultFactory<T> bitmapResultFactory)
{
if (extractionFn == null) {
final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension);
return bitmapResultFactory.unionDimensionValueBitmaps(getBitmapIterable(values, bitmapIndex));
} else {
return Filters.matchPredicate(
dimension,
selector,
bitmapResultFactory,
predicateFactory.makeStringPredicate()
);
}
}
@Override
public double estimateSelectivity(BitmapIndexSelector indexSelector)
{
if (extractionFn == null) {
final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension);
return Filters.estimateSelectivity(
bitmapIndex,
IntIteratorUtils.toIntList(getBitmapIndexIterable(values, bitmapIndex).iterator()),
indexSelector.getNumRows()
);
} else {
return Filters.estimateSelectivity(
dimension,
indexSelector,
predicateFactory.makeStringPredicate()
);
}
}
@Override
public ValueMatcher makeMatcher(ColumnSelectorFactory factory)
{
return Filters.makeValueMatcher(factory, dimension, predicateFactory);
}
@Override
public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory)
{
return DimensionHandlerUtils.makeVectorProcessor(
dimension,
VectorValueMatcherColumnProcessorFactory.instance(),
factory
).makeMatcher(predicateFactory);
}
@Override
public boolean canVectorizeMatcher()
{
return true;
}
@Override
public boolean supportsRequiredColumnRewrite()
{
return true;
}
@Override
public Filter rewriteRequiredColumns(Map<String, String> columnRewrites)
{
String rewriteDimensionTo = columnRewrites.get(dimension);
if (rewriteDimensionTo == null) {
throw new IAE("Received a non-applicable rewrite: %s, filter's dimension: %s", columnRewrites, dimension);
}
if (rewriteDimensionTo.equals(dimension)) {
return this;
} else {
return new InDimFilter(
rewriteDimensionTo,
values,
extractionFn,
filterTuning,
predicateFactory
);
}
}
@Override
public boolean supportsBitmapIndex(BitmapIndexSelector selector)
{
return selector.getBitmapIndex(dimension) != null;
}
@Override
public boolean shouldUseBitmapIndex(BitmapIndexSelector selector)
{
return Filters.shouldUseBitmapIndex(this, selector, filterTuning);
}
@Override
public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, BitmapIndexSelector indexSelector)
{
return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector);
}
@Override
public String toString()
{
final DimFilterToStringBuilder builder = new DimFilterToStringBuilder();
return builder.appendDimension(dimension, extractionFn)
.append(" IN (")
.append(Joiner.on(", ").join(Iterables.transform(values, StringUtils::nullToEmptyNonDruidDataString)))
.append(")")
.appendFilterTuning(filterTuning)
.build();
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
InDimFilter that = (InDimFilter) o;
return values.equals(that.values) &&
dimension.equals(that.dimension) &&
Objects.equals(extractionFn, that.extractionFn) &&
Objects.equals(filterTuning, that.filterTuning);
}
@Override
public int hashCode()
{
return Objects.hash(values, dimension, extractionFn, filterTuning);
}
private byte[] computeCacheKey()
{
final List<String> sortedValues = new ArrayList<>(values);
sortedValues.sort(Comparator.nullsFirst(Ordering.natural()));
final Hasher hasher = Hashing.sha256().newHasher();
for (String v : sortedValues) {
if (v == null) {
hasher.putInt(0);
} else {
hasher.putString(v, StandardCharsets.UTF_8);
}
}
return new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID)
.appendString(dimension)
.appendByte(DimFilterUtils.STRING_SEPARATOR)
.appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey())
.appendByte(DimFilterUtils.STRING_SEPARATOR)
.appendByteArray(hasher.hash().asBytes())
.build();
}
private InDimFilter optimizeLookup() private InDimFilter optimizeLookup()
{ {
if (extractionFn instanceof LookupExtractionFn if (extractionFn instanceof LookupExtractionFn
@ -227,83 +443,32 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt
return this; return this;
} }
@Override private static Iterable<ImmutableBitmap> getBitmapIterable(final Set<String> values, final BitmapIndex bitmapIndex)
public Filter toFilter()
{ {
return new InFilter( return Filters.bitmapsFromIndexes(getBitmapIndexIterable(values, bitmapIndex), bitmapIndex);
dimension,
values,
longPredicateSupplier,
floatPredicateSupplier,
doublePredicateSupplier,
extractionFn,
filterTuning
);
} }
@Nullable private static IntIterable getBitmapIndexIterable(final Set<String> values, final BitmapIndex bitmapIndex)
@Override
public RangeSet<String> getDimensionRangeSet(String dimension)
{ {
if (!Objects.equals(getDimension(), dimension) || getExtractionFn() != null) { return () -> new IntIterator()
return null; {
} final Iterator<String> iterator = values.iterator();
RangeSet<String> retSet = TreeRangeSet.create();
for (String value : values) { @Override
String valueEquivalent = NullHandling.nullToEmptyIfNeeded(value); public boolean hasNext()
if (valueEquivalent == null) { {
// Case when SQL compatible null handling is enabled return iterator.hasNext();
// Range.singleton(null) is invalid, so use the fact that
// only null values are less than empty string.
retSet.add(Range.lessThan(""));
} else {
retSet.add(Range.singleton(valueEquivalent));
}
}
return retSet;
} }
@Override @Override
public Set<String> getRequiredColumns() public int nextInt()
{ {
return ImmutableSet.of(dimension); return bitmapIndex.getIndex(iterator.next());
}
};
} }
@Override private static DruidLongPredicate createLongPredicate(final Set<String> values)
public String toString()
{
final DimFilterToStringBuilder builder = new DimFilterToStringBuilder();
return builder.appendDimension(dimension, extractionFn)
.append(" IN (")
.append(Joiner.on(", ").join(Iterables.transform(values, StringUtils::nullToEmptyNonDruidDataString)))
.append(")")
.appendFilterTuning(filterTuning)
.build();
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
InDimFilter that = (InDimFilter) o;
return values.equals(that.values) &&
dimension.equals(that.dimension) &&
Objects.equals(extractionFn, that.extractionFn) &&
Objects.equals(filterTuning, that.filterTuning);
}
@Override
public int hashCode()
{
return Objects.hash(values, dimension, extractionFn, filterTuning);
}
private DruidLongPredicate createLongPredicate()
{ {
LongArrayList longs = new LongArrayList(values.size()); LongArrayList longs = new LongArrayList(values.size());
for (String value : values) { for (String value : values) {
@ -323,17 +488,7 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt
} }
} }
// As the set of filtered values can be large, parsing them as longs should be done only if needed, and only once. private static DruidFloatPredicate createFloatPredicate(final Set<String> values)
// Pass in a common long predicate supplier to all filters created by .toFilter(), so that
// we only compute the long hashset/array once per query.
// This supplier must be thread-safe, since this DimFilter will be accessed in the query runners.
private Supplier<DruidLongPredicate> getLongPredicateSupplier()
{
Supplier<DruidLongPredicate> longPredicate = this::createLongPredicate;
return Suppliers.memoize(longPredicate);
}
private DruidFloatPredicate createFloatPredicate()
{ {
IntArrayList floatBits = new IntArrayList(values.size()); IntArrayList floatBits = new IntArrayList(values.size());
for (String value : values) { for (String value : values) {
@ -355,13 +510,7 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt
} }
} }
private Supplier<DruidFloatPredicate> getFloatPredicateSupplier() private static DruidDoublePredicate createDoublePredicate(final Set<String> values)
{
Supplier<DruidFloatPredicate> floatPredicate = this::createFloatPredicate;
return Suppliers.memoize(floatPredicate);
}
private DruidDoublePredicate createDoublePredicate()
{ {
LongArrayList doubleBits = new LongArrayList(values.size()); LongArrayList doubleBits = new LongArrayList(values.size());
for (String value : values) { for (String value : values) {
@ -383,9 +532,89 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt
} }
} }
private Supplier<DruidDoublePredicate> getDoublePredicateSupplier() @VisibleForTesting
public static class InFilterDruidPredicateFactory implements DruidPredicateFactory
{ {
Supplier<DruidDoublePredicate> doublePredicate = this::createDoublePredicate; private final ExtractionFn extractionFn;
return Suppliers.memoize(doublePredicate); private final Set<String> values;
private final Supplier<DruidLongPredicate> longPredicateSupplier;
private final Supplier<DruidFloatPredicate> floatPredicateSupplier;
private final Supplier<DruidDoublePredicate> doublePredicateSupplier;
InFilterDruidPredicateFactory(
final ExtractionFn extractionFn,
final Set<String> values
)
{
this.extractionFn = extractionFn;
this.values = values;
// As the set of filtered values can be large, parsing them as numbers should be done only if needed, and
// only once. Pass in a common long predicate supplier to all filters created by .toFilter(), so that we only
// compute the long hashset/array once per query. This supplier must be thread-safe, since this DimFilter will be
// accessed in the query runners.
this.longPredicateSupplier = Suppliers.memoize(() -> createLongPredicate(values));
this.floatPredicateSupplier = Suppliers.memoize(() -> createFloatPredicate(values));
this.doublePredicateSupplier = Suppliers.memoize(() -> createDoublePredicate(values));
}
@Override
public Predicate<String> makeStringPredicate()
{
if (extractionFn != null) {
return input -> values.contains(extractionFn.apply(input));
} else {
return values::contains;
}
}
@Override
public DruidLongPredicate makeLongPredicate()
{
if (extractionFn != null) {
return input -> values.contains(extractionFn.apply(input));
} else {
return longPredicateSupplier.get();
}
}
@Override
public DruidFloatPredicate makeFloatPredicate()
{
if (extractionFn != null) {
return input -> values.contains(extractionFn.apply(input));
} else {
return floatPredicateSupplier.get();
}
}
@Override
public DruidDoublePredicate makeDoublePredicate()
{
if (extractionFn != null) {
return input -> values.contains(extractionFn.apply(input));
}
return input -> doublePredicateSupplier.get().applyDouble(input);
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
InFilterDruidPredicateFactory that = (InFilterDruidPredicateFactory) o;
return Objects.equals(extractionFn, that.extractionFn) &&
Objects.equals(values, that.values);
}
@Override
public int hashCode()
{
return Objects.hash(extractionFn, values);
}
} }
} }

View File

@ -160,7 +160,7 @@ public class Filters
* *
* @return an iterable of bitmaps * @return an iterable of bitmaps
*/ */
static Iterable<ImmutableBitmap> bitmapsFromIndexes(final IntIterable indexes, final BitmapIndex bitmapIndex) public static Iterable<ImmutableBitmap> bitmapsFromIndexes(final IntIterable indexes, final BitmapIndex bitmapIndex)
{ {
// Do not use Iterables.transform() to avoid boxing/unboxing integers. // Do not use Iterables.transform() to avoid boxing/unboxing integers.
return new Iterable<ImmutableBitmap>() return new Iterable<ImmutableBitmap>()
@ -404,7 +404,7 @@ public class Filters
}; };
} }
static boolean supportsSelectivityEstimation( public static boolean supportsSelectivityEstimation(
Filter filter, Filter filter,
String dimension, String dimension,
ColumnSelector columnSelector, ColumnSelector columnSelector,

View File

@ -1,336 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.filter;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Predicate;
import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableSet;
import it.unimi.dsi.fastutil.ints.IntIterable;
import it.unimi.dsi.fastutil.ints.IntIterator;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.query.BitmapResultFactory;
import org.apache.druid.query.extraction.ExtractionFn;
import org.apache.druid.query.filter.BitmapIndexSelector;
import org.apache.druid.query.filter.DruidDoublePredicate;
import org.apache.druid.query.filter.DruidFloatPredicate;
import org.apache.druid.query.filter.DruidLongPredicate;
import org.apache.druid.query.filter.DruidPredicateFactory;
import org.apache.druid.query.filter.Filter;
import org.apache.druid.query.filter.FilterTuning;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.filter.vector.VectorValueMatcher;
import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory;
import org.apache.druid.segment.ColumnSelector;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.DimensionHandlerUtils;
import org.apache.druid.segment.IntIteratorUtils;
import org.apache.druid.segment.column.BitmapIndex;
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
import java.util.Iterator;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
/**
* The IN filter.
* For single-valued dimension, this filter returns true if the dimension value matches to one of the
* given {@link #values}.
* For multi-valued dimension, this filter returns true if one of the dimension values matches to one of the
* given {@link #values}.
*
* In SQL-compatible null handling mode, this filter is equivalent to {@code (dimension IN [values])} or
* {@code (dimension IN [non-null values] OR dimension IS NULL)} when {@link #values} contains nulls.
* In default null handling mode, this filter is equivalent to {@code (dimension IN [values])} or
* {@code (dimension IN [non-null values, ''])} when {@link #values} contains nulls.
*/
public class InFilter implements Filter
{
private final String dimension;
private final Set<String> values;
private final ExtractionFn extractionFn;
private final FilterTuning filterTuning;
private final Supplier<DruidLongPredicate> longPredicateSupplier;
private final Supplier<DruidFloatPredicate> floatPredicateSupplier;
private final Supplier<DruidDoublePredicate> doublePredicateSupplier;
public InFilter(
String dimension,
Set<String> values,
Supplier<DruidLongPredicate> longPredicateSupplier,
Supplier<DruidFloatPredicate> floatPredicateSupplier,
Supplier<DruidDoublePredicate> doublePredicateSupplier,
ExtractionFn extractionFn,
FilterTuning filterTuning
)
{
this.dimension = dimension;
this.values = values;
this.extractionFn = extractionFn;
this.filterTuning = filterTuning;
this.longPredicateSupplier = longPredicateSupplier;
this.floatPredicateSupplier = floatPredicateSupplier;
this.doublePredicateSupplier = doublePredicateSupplier;
}
@Override
public <T> T getBitmapResult(BitmapIndexSelector selector, BitmapResultFactory<T> bitmapResultFactory)
{
if (extractionFn == null) {
final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension);
return bitmapResultFactory.unionDimensionValueBitmaps(getBitmapIterable(bitmapIndex));
} else {
return Filters.matchPredicate(
dimension,
selector,
bitmapResultFactory,
getPredicateFactory().makeStringPredicate()
);
}
}
@Override
public double estimateSelectivity(BitmapIndexSelector indexSelector)
{
if (extractionFn == null) {
final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension);
return Filters.estimateSelectivity(
bitmapIndex,
IntIteratorUtils.toIntList(getBitmapIndexIterable(bitmapIndex).iterator()),
indexSelector.getNumRows()
);
} else {
return Filters.estimateSelectivity(
dimension,
indexSelector,
getPredicateFactory().makeStringPredicate()
);
}
}
private Iterable<ImmutableBitmap> getBitmapIterable(final BitmapIndex bitmapIndex)
{
return Filters.bitmapsFromIndexes(getBitmapIndexIterable(bitmapIndex), bitmapIndex);
}
private IntIterable getBitmapIndexIterable(final BitmapIndex bitmapIndex)
{
return () -> new IntIterator()
{
final Iterator<String> iterator = values.iterator();
@Override
public boolean hasNext()
{
return iterator.hasNext();
}
@Override
public int nextInt()
{
return bitmapIndex.getIndex(iterator.next());
}
};
}
@Override
public ValueMatcher makeMatcher(ColumnSelectorFactory factory)
{
return Filters.makeValueMatcher(factory, dimension, getPredicateFactory());
}
@Override
public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory)
{
return DimensionHandlerUtils.makeVectorProcessor(
dimension,
VectorValueMatcherColumnProcessorFactory.instance(),
factory
).makeMatcher(getPredicateFactory());
}
@Override
public boolean canVectorizeMatcher()
{
return true;
}
@Override
public Set<String> getRequiredColumns()
{
return ImmutableSet.of(dimension);
}
@Override
public boolean supportsRequiredColumnRewrite()
{
return true;
}
@Override
public Filter rewriteRequiredColumns(Map<String, String> columnRewrites)
{
String rewriteDimensionTo = columnRewrites.get(dimension);
if (rewriteDimensionTo == null) {
throw new IAE("Received a non-applicable rewrite: %s, filter's dimension: %s", columnRewrites, dimension);
}
return new InFilter(
rewriteDimensionTo,
values,
longPredicateSupplier,
floatPredicateSupplier,
doublePredicateSupplier,
extractionFn,
filterTuning
);
}
@Override
public boolean supportsBitmapIndex(BitmapIndexSelector selector)
{
return selector.getBitmapIndex(dimension) != null;
}
@Override
public boolean shouldUseBitmapIndex(BitmapIndexSelector selector)
{
return Filters.shouldUseBitmapIndex(this, selector, filterTuning);
}
@Override
public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, BitmapIndexSelector indexSelector)
{
return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector);
}
private DruidPredicateFactory getPredicateFactory()
{
return new InFilterDruidPredicateFactory(extractionFn, values, longPredicateSupplier, floatPredicateSupplier, doublePredicateSupplier);
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
InFilter inFilter = (InFilter) o;
return Objects.equals(dimension, inFilter.dimension) &&
Objects.equals(values, inFilter.values) &&
Objects.equals(extractionFn, inFilter.extractionFn) &&
Objects.equals(filterTuning, inFilter.filterTuning);
}
@Override
public int hashCode()
{
return Objects.hash(dimension, values, extractionFn, filterTuning);
}
@VisibleForTesting
static class InFilterDruidPredicateFactory implements DruidPredicateFactory
{
private final ExtractionFn extractionFn;
private final Set<String> values;
private final Supplier<DruidLongPredicate> longPredicateSupplier;
private final Supplier<DruidFloatPredicate> floatPredicateSupplier;
private final Supplier<DruidDoublePredicate> doublePredicateSupplier;
InFilterDruidPredicateFactory(
ExtractionFn extractionFn,
Set<String> values,
Supplier<DruidLongPredicate> longPredicateSupplier,
Supplier<DruidFloatPredicate> floatPredicateSupplier,
Supplier<DruidDoublePredicate> doublePredicateSupplier
)
{
this.extractionFn = extractionFn;
this.values = values;
this.longPredicateSupplier = longPredicateSupplier;
this.floatPredicateSupplier = floatPredicateSupplier;
this.doublePredicateSupplier = doublePredicateSupplier;
}
@Override
public Predicate<String> makeStringPredicate()
{
if (extractionFn != null) {
return input -> values.contains(extractionFn.apply(input));
} else {
return input -> values.contains(input);
}
}
@Override
public DruidLongPredicate makeLongPredicate()
{
if (extractionFn != null) {
return input -> values.contains(extractionFn.apply(input));
} else {
return longPredicateSupplier.get();
}
}
@Override
public DruidFloatPredicate makeFloatPredicate()
{
if (extractionFn != null) {
return input -> values.contains(extractionFn.apply(input));
} else {
return floatPredicateSupplier.get();
}
}
@Override
public DruidDoublePredicate makeDoublePredicate()
{
if (extractionFn != null) {
return input -> values.contains(extractionFn.apply(input));
}
return input -> doublePredicateSupplier.get().applyDouble(input);
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
InFilterDruidPredicateFactory that = (InFilterDruidPredicateFactory) o;
return Objects.equals(extractionFn, that.extractionFn) &&
Objects.equals(values, that.values);
}
@Override
public int hashCode()
{
return Objects.hash(extractionFn, values);
}
}
}

View File

@ -224,8 +224,7 @@ public class HashJoinSegmentStorageAdapter implements StorageAdapter
final JoinFilterPreAnalysisKey keyCached = joinFilterPreAnalysis.getKey(); final JoinFilterPreAnalysisKey keyCached = joinFilterPreAnalysis.getKey();
if (!keyIn.equals(keyCached)) { if (!keyIn.equals(keyCached)) {
// It is a bug if this happens. We expect the comparison to be quick, because in the sane case, identical objects // It is a bug if this happens. The implied key and the cached key should always match.
// will be used and therefore deep equality checks will be unnecessary.
throw new ISE("Pre-analysis mismatch, cannot execute query"); throw new ISE("Pre-analysis mismatch, cannot execute query");
} }

View File

@ -337,8 +337,10 @@ public class InFilterTest extends BaseFilterTest
assertFilterMatches(toInFilterWithFn("dim1", lookupFn, "N/A"), ImmutableList.of()); assertFilterMatches(toInFilterWithFn("dim1", lookupFn, "N/A"), ImmutableList.of());
assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "a"), ImmutableList.of()); assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "a"), ImmutableList.of());
assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "HELLO"), ImmutableList.of("a", "d")); assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "HELLO"), ImmutableList.of("a", "d"));
assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "HELLO", "BYE", "UNKNOWN"), assertFilterMatches(
ImmutableList.of("a", "b", "c", "d", "e", "f")); toInFilterWithFn("dim2", lookupFn, "HELLO", "BYE", "UNKNOWN"),
ImmutableList.of("a", "b", "c", "d", "e", "f")
);
final Map<String, String> stringMap2 = ImmutableMap.of( final Map<String, String> stringMap2 = ImmutableMap.of(
"a", "e" "a", "e"
@ -364,8 +366,8 @@ public class InFilterTest extends BaseFilterTest
@Test @Test
public void testRequiredColumnRewrite() public void testRequiredColumnRewrite()
{ {
InFilter filter = (InFilter) toInFilter("dim0", "a", "c").toFilter(); InDimFilter filter = (InDimFilter) toInFilter("dim0", "a", "c").toFilter();
InFilter filter2 = (InFilter) toInFilter("dim1", "a", "c").toFilter(); InDimFilter filter2 = (InDimFilter) toInFilter("dim1", "a", "c").toFilter();
Assert.assertTrue(filter.supportsRequiredColumnRewrite()); Assert.assertTrue(filter.supportsRequiredColumnRewrite());
Assert.assertTrue(filter2.supportsRequiredColumnRewrite()); Assert.assertTrue(filter2.supportsRequiredColumnRewrite());
@ -381,17 +383,17 @@ public class InFilterTest extends BaseFilterTest
@Test @Test
public void test_equals() public void test_equals()
{ {
EqualsVerifier.forClass(InFilter.class) EqualsVerifier.forClass(InDimFilter.class)
.usingGetClass() .usingGetClass()
.withNonnullFields("dimension", "values") .withNonnullFields("dimension", "values")
.withIgnoredFields("longPredicateSupplier", "floatPredicateSupplier", "doublePredicateSupplier") .withIgnoredFields("cacheKeySupplier", "predicateFactory", "cachedOptimizedFilter")
.verify(); .verify();
} }
@Test @Test
public void test_equals_forInFilterDruidPredicateFactory() public void test_equals_forInFilterDruidPredicateFactory()
{ {
EqualsVerifier.forClass(InFilter.InFilterDruidPredicateFactory.class) EqualsVerifier.forClass(InDimFilter.InFilterDruidPredicateFactory.class)
.usingGetClass() .usingGetClass()
.withNonnullFields("values") .withNonnullFields("values")
.withIgnoredFields("longPredicateSupplier", "floatPredicateSupplier", "doublePredicateSupplier") .withIgnoredFields("longPredicateSupplier", "floatPredicateSupplier", "doublePredicateSupplier")