Avoid sorting values in InDimFilter if possible (#9800)

* Avoid sorting values in InDimFilter if possible

* tests

* more tests

* fix and and or filters

* fix build

* false and true vector matchers

* fix vector matchers

* checkstyle

* in filter null handling

* remove wrong test

* address comments

* remove unnecessary null check

* redundant separator

* address comments

* typo

* tests
This commit is contained in:
Jihoon Son 2020-05-06 15:26:36 -07:00 committed by GitHub
parent ce03f31a73
commit 6674d721bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
31 changed files with 717 additions and 146 deletions

View File

@ -94,6 +94,11 @@ public class NullHandling
//CHECKSTYLE.ON: Regexp
}
public static boolean needsEmptyToNull(@Nullable String value)
{
return replaceWithDefault() && Strings.isNullOrEmpty(value);
}
@Nullable
public static String defaultStringValue()
{

View File

@ -348,7 +348,7 @@ public class IngestSegmentFirehoseFactoryTimelineTest
DATA_SOURCE,
testCase.interval,
null,
new TrueDimFilter(),
TrueDimFilter.instance(),
Arrays.asList(DIMENSIONS),
Arrays.asList(METRICS),
// Split as much as possible

View File

@ -23,7 +23,7 @@ import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.query.aggregation.AggregatorFactory;
@ -58,6 +58,7 @@ import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
*/
@ -202,7 +203,9 @@ public class Druids
public TimeseriesQueryBuilder filters(String dimensionName, String value, String... values)
{
dimFilter = new InDimFilter(dimensionName, Lists.asList(value, values), null, null);
final Set<String> filterValues = Sets.newHashSet(values);
filterValues.add(value);
dimFilter = new InDimFilter(dimensionName, filterValues, null, null);
return this;
}

View File

@ -33,6 +33,7 @@ import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
/**
*/
@ -72,8 +73,20 @@ public class AndDimFilter implements DimFilter
@Override
public DimFilter optimize()
{
List<DimFilter> elements = DimFilters.optimize(fields);
return elements.size() == 1 ? elements.get(0) : new AndDimFilter(elements);
List<DimFilter> elements = DimFilters.optimize(fields)
.stream()
.filter(filter -> !(filter instanceof TrueDimFilter))
.collect(Collectors.toList());
if (elements.isEmpty()) {
// All elements were TrueDimFilter after optimization
return TrueDimFilter.instance();
} else if (elements.size() == 1) {
return elements.get(0);
} else if (elements.stream().anyMatch(filter -> filter instanceof FalseDimFilter)) {
return FalseDimFilter.instance();
} else {
return new AndDimFilter(elements);
}
}
@Override

View File

@ -47,7 +47,8 @@ import java.util.Set;
@JsonSubTypes.Type(name = "interval", value = IntervalDimFilter.class),
@JsonSubTypes.Type(name = "like", value = LikeDimFilter.class),
@JsonSubTypes.Type(name = "expression", value = ExpressionDimFilter.class),
@JsonSubTypes.Type(name = "true", value = TrueDimFilter.class)
@JsonSubTypes.Type(name = "true", value = TrueDimFilter.class),
@JsonSubTypes.Type(name = "false", value = FalseDimFilter.class)
})
public interface DimFilter extends Cacheable
{
@ -78,6 +79,7 @@ public interface DimFilter extends Cacheable
* @return a RangeSet that represent the possible range of the input dimension, or null if it is not possible to
* determine for this DimFilter.
*/
@Nullable
RangeSet<String> getDimensionRangeSet(String dimension);
/**

View File

@ -52,6 +52,7 @@ public class DimFilterUtils
static final byte COLUMN_COMPARISON_CACHE_ID = 0xD;
static final byte EXPRESSION_CACHE_ID = 0xE;
static final byte TRUE_CACHE_ID = 0xF;
static final byte FALSE_CACHE_ID = 0x11;
public static final byte BLOOM_DIM_FILTER_CACHE_ID = 0x10;
public static final byte STRING_SEPARATOR = (byte) 0xFF;

View File

@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.filter;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.google.common.collect.ImmutableRangeSet;
import com.google.common.collect.RangeSet;
import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.segment.filter.FalseFilter;
import javax.annotation.Nullable;
import java.util.Collections;
import java.util.Set;
public class FalseDimFilter implements DimFilter
{
private static final FalseDimFilter INSTANCE = new FalseDimFilter();
private static final byte[] CACHE_KEY = new CacheKeyBuilder(DimFilterUtils.FALSE_CACHE_ID).build();
@JsonCreator
public static FalseDimFilter instance()
{
return INSTANCE;
}
private FalseDimFilter()
{
}
@Override
public DimFilter optimize()
{
return this;
}
@Override
public Filter toFilter()
{
return FalseFilter.instance();
}
@Nullable
@Override
public RangeSet<String> getDimensionRangeSet(String dimension)
{
return ImmutableRangeSet.of();
}
@Override
public Set<String> getRequiredColumns()
{
return Collections.emptySet();
}
@Override
public byte[] getCacheKey()
{
return CACHE_KEY;
}
@Override
public int hashCode()
{
return DimFilterUtils.FALSE_CACHE_ID;
}
@Override
public boolean equals(Object o)
{
return o != null && o.getClass() == this.getClass();
}
@Override
public String toString()
{
return "FALSE";
}
}

View File

@ -20,6 +20,7 @@
package org.apache.druid.query.filter;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.annotations.VisibleForTesting;
@ -29,16 +30,18 @@ import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Ordering;
import com.google.common.collect.Range;
import com.google.common.collect.RangeSet;
import com.google.common.collect.TreeRangeSet;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.guava.Comparators;
import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.query.extraction.ExtractionFn;
import org.apache.druid.query.lookup.LookupExtractionFn;
@ -47,14 +50,16 @@ import org.apache.druid.segment.DimensionHandlerUtils;
import org.apache.druid.segment.filter.InFilter;
import javax.annotation.Nullable;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.stream.Collectors;
public class InDimFilter implements DimFilter
{
@ -63,7 +68,7 @@ public class InDimFilter implements DimFilter
public static final int NUMERIC_HASHING_THRESHOLD = 16;
// Values can contain `null` object
private final SortedSet<String> values;
private final Set<String> values;
private final String dimension;
@Nullable
private final ExtractionFn extractionFn;
@ -73,10 +78,15 @@ public class InDimFilter implements DimFilter
private final Supplier<DruidFloatPredicate> floatPredicateSupplier;
private final Supplier<DruidDoublePredicate> doublePredicateSupplier;
@JsonIgnore
private byte[] cacheKey;
@JsonCreator
public InDimFilter(
@JsonProperty("dimension") String dimension,
@JsonProperty("values") Collection<String> values,
// This 'values' collection instance can be reused if possible to avoid copying a big collection.
// Callers should _not_ modify the collection after it is passed to this constructor.
@JsonProperty("values") Set<String> values,
@JsonProperty("extractionFn") @Nullable ExtractionFn extractionFn,
@JsonProperty("filterTuning") @Nullable FilterTuning filterTuning
)
@ -84,9 +94,12 @@ public class InDimFilter implements DimFilter
Preconditions.checkNotNull(dimension, "dimension can not be null");
Preconditions.checkArgument(values != null, "values can not be null");
this.values = new TreeSet<>(Comparators.naturalNullsFirst());
for (String value : values) {
this.values.add(NullHandling.emptyToNullIfNeeded(value));
// The values set can be huge. Try to avoid copying the set if possible.
// Note that we may still need to copy values to a list for caching. See getCacheKey().
if ((NullHandling.sqlCompatible() || values.stream().noneMatch(NullHandling::needsEmptyToNull))) {
this.values = values;
} else {
this.values = values.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toSet());
}
this.dimension = dimension;
this.extractionFn = extractionFn;
@ -96,10 +109,13 @@ public class InDimFilter implements DimFilter
this.doublePredicateSupplier = getDoublePredicateSupplier();
}
/**
* This constructor should be called only in unit tests since it creates a new hash set wrapping the given values.
*/
@VisibleForTesting
public InDimFilter(String dimension, Collection<String> values, @Nullable ExtractionFn extractionFn)
{
this(dimension, values, extractionFn, null);
this(dimension, new HashSet<>(values), extractionFn, null);
}
@JsonProperty
@ -115,6 +131,7 @@ public class InDimFilter implements DimFilter
}
@Nullable
@JsonInclude(JsonInclude.Include.NON_NULL)
@JsonProperty
public ExtractionFn getExtractionFn()
{
@ -132,31 +149,40 @@ public class InDimFilter implements DimFilter
@Override
public byte[] getCacheKey()
{
boolean hasNull = false;
for (String value : values) {
if (value == null) {
hasNull = true;
break;
if (cacheKey == null) {
final List<String> sortedValues = new ArrayList<>(values);
sortedValues.sort(Comparator.nullsFirst(Ordering.natural()));
final Hasher hasher = Hashing.sha256().newHasher();
for (String v : sortedValues) {
if (v == null) {
hasher.putInt(0);
} else {
hasher.putString(v, StandardCharsets.UTF_8);
}
}
cacheKey = new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID)
.appendString(dimension)
.appendByte(DimFilterUtils.STRING_SEPARATOR)
.appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey())
.appendByte(DimFilterUtils.STRING_SEPARATOR)
.appendByteArray(hasher.hash().asBytes())
.build();
}
return new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID)
.appendString(dimension)
.appendByte(DimFilterUtils.STRING_SEPARATOR)
.appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey())
.appendByte(DimFilterUtils.STRING_SEPARATOR)
.appendByte(hasNull ? NullHandling.IS_NULL_BYTE : NullHandling.IS_NOT_NULL_BYTE)
.appendByte(DimFilterUtils.STRING_SEPARATOR)
.appendStrings(values).build();
return cacheKey;
}
@Override
public DimFilter optimize()
{
InDimFilter inFilter = optimizeLookup();
if (inFilter.values.isEmpty()) {
return FalseDimFilter.instance();
}
if (inFilter.values.size() == 1) {
return new SelectorDimFilter(
inFilter.dimension,
inFilter.values.first(),
inFilter.values.iterator().next(),
inFilter.getExtractionFn(),
filterTuning
);
@ -171,7 +197,7 @@ public class InDimFilter implements DimFilter
LookupExtractionFn exFn = (LookupExtractionFn) extractionFn;
LookupExtractor lookup = exFn.getLookup();
final List<String> keys = new ArrayList<>();
final Set<String> keys = new HashSet<>();
for (String value : values) {
// We cannot do an unapply()-based optimization if the selector value
@ -215,6 +241,7 @@ public class InDimFilter implements DimFilter
);
}
@Nullable
@Override
public RangeSet<String> getDimensionRangeSet(String dimension)
{
@ -302,7 +329,7 @@ public class InDimFilter implements DimFilter
// This supplier must be thread-safe, since this DimFilter will be accessed in the query runners.
private Supplier<DruidLongPredicate> getLongPredicateSupplier()
{
Supplier<DruidLongPredicate> longPredicate = () -> createLongPredicate();
Supplier<DruidLongPredicate> longPredicate = this::createLongPredicate;
return Suppliers.memoize(longPredicate);
}
@ -330,7 +357,7 @@ public class InDimFilter implements DimFilter
private Supplier<DruidFloatPredicate> getFloatPredicateSupplier()
{
Supplier<DruidFloatPredicate> floatPredicate = () -> createFloatPredicate();
Supplier<DruidFloatPredicate> floatPredicate = this::createFloatPredicate;
return Suppliers.memoize(floatPredicate);
}
@ -358,7 +385,7 @@ public class InDimFilter implements DimFilter
private Supplier<DruidDoublePredicate> getDoublePredicateSupplier()
{
Supplier<DruidDoublePredicate> doublePredicate = () -> createDoublePredicate();
Supplier<DruidDoublePredicate> doublePredicate = this::createDoublePredicate;
return Suppliers.memoize(doublePredicate);
}
}

View File

@ -60,10 +60,17 @@ public class NotDimFilter implements DimFilter
return ByteBuffer.allocate(1 + subKey.length).put(DimFilterUtils.NOT_CACHE_ID).put(subKey).array();
}
@SuppressWarnings("ObjectEquality")
@Override
public DimFilter optimize()
{
return new NotDimFilter(this.getField().optimize());
final DimFilter optimized = this.getField().optimize();
if (optimized == FalseDimFilter.instance()) {
return TrueDimFilter.instance();
} else if (optimized == TrueDimFilter.instance()) {
return FalseDimFilter.instance();
}
return new NotDimFilter(optimized);
}
@Override

View File

@ -34,6 +34,7 @@ import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
/**
*/
@ -44,9 +45,7 @@ public class OrDimFilter implements DimFilter
private final List<DimFilter> fields;
@JsonCreator
public OrDimFilter(
@JsonProperty("fields") List<DimFilter> fields
)
public OrDimFilter(@JsonProperty("fields") List<DimFilter> fields)
{
fields = DimFilters.filterNulls(fields);
Preconditions.checkArgument(fields.size() > 0, "OR operator requires at least one field");
@ -82,8 +81,20 @@ public class OrDimFilter implements DimFilter
@Override
public DimFilter optimize()
{
List<DimFilter> elements = DimFilters.optimize(fields);
return elements.size() == 1 ? elements.get(0) : new OrDimFilter(elements);
List<DimFilter> elements = DimFilters.optimize(fields)
.stream()
.filter(filter -> !(filter instanceof FalseDimFilter))
.collect(Collectors.toList());
if (elements.isEmpty()) {
// All elements were FalseDimFilter after optimization
return FalseDimFilter.instance();
} else if (elements.size() == 1) {
return elements.get(0);
} else if (elements.stream().anyMatch(filter -> filter instanceof TrueDimFilter)) {
return TrueDimFilter.instance();
} else {
return new OrDimFilter(elements);
}
}
@Override

View File

@ -96,7 +96,7 @@ public class SelectorDimFilter implements DimFilter
@Override
public DimFilter optimize()
{
return new InDimFilter(dimension, Collections.singletonList(value), extractionFn, filterTuning).optimize();
return new InDimFilter(dimension, Collections.singleton(value), extractionFn, filterTuning).optimize();
}
@Override

View File

@ -19,6 +19,7 @@
package org.apache.druid.query.filter;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.google.common.collect.RangeSet;
import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.segment.filter.TrueFilter;
@ -26,14 +27,25 @@ import org.apache.druid.segment.filter.TrueFilter;
import java.util.Collections;
import java.util.Set;
/**
*/
public class TrueDimFilter implements DimFilter
{
private static final TrueDimFilter INSTANCE = new TrueDimFilter();
private static final byte[] CACHE_KEY = new CacheKeyBuilder(DimFilterUtils.TRUE_CACHE_ID).build();
@JsonCreator
public static TrueDimFilter instance()
{
return INSTANCE;
}
private TrueDimFilter()
{
}
@Override
public byte[] getCacheKey()
{
return new CacheKeyBuilder(DimFilterUtils.TRUE_CACHE_ID).build();
return CACHE_KEY;
}
@Override
@ -59,4 +71,22 @@ public class TrueDimFilter implements DimFilter
{
return Collections.emptySet();
}
@Override
public int hashCode()
{
return DimFilterUtils.TRUE_CACHE_ID;
}
@Override
public boolean equals(Object o)
{
return o != null && o.getClass() == this.getClass();
}
@Override
public String toString()
{
return "TRUE";
}
}

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.filter.vector;
import org.apache.druid.segment.vector.VectorSizeInspector;
public class FalseVectorMatcher implements VectorValueMatcher
{
private final VectorSizeInspector vectorSizeInspector;
public FalseVectorMatcher(VectorSizeInspector vectorSizeInspector)
{
this.vectorSizeInspector = vectorSizeInspector;
}
@Override
public ReadableVectorMatch match(ReadableVectorMatch mask)
{
return VectorMatch.allFalse();
}
@Override
public int getMaxVectorSize()
{
return vectorSizeInspector.getMaxVectorSize();
}
@Override
public int getCurrentVectorSize()
{
return vectorSizeInspector.getCurrentVectorSize();
}
}

View File

@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.filter.vector;
import org.apache.druid.segment.vector.VectorSizeInspector;
public class TrueVectorMatcher implements VectorValueMatcher
{
private final VectorSizeInspector vectorSizeInspector;
public TrueVectorMatcher(VectorSizeInspector vectorSizeInspector)
{
this.vectorSizeInspector = vectorSizeInspector;
}
@Override
public ReadableVectorMatch match(ReadableVectorMatch mask)
{
// The given mask is all true for its valid selections.
return mask;
}
@Override
public int getMaxVectorSize()
{
return vectorSizeInspector.getMaxVectorSize();
}
@Override
public int getCurrentVectorSize()
{
return vectorSizeInspector.getCurrentVectorSize();
}
}

View File

@ -19,7 +19,7 @@
package org.apache.druid.query.topn;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.query.DataSource;
@ -42,6 +42,7 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* A Builder for TopNQuery.
@ -230,7 +231,9 @@ public class TopNQueryBuilder
public TopNQueryBuilder filters(String dimensionName, String value, String... values)
{
dimFilter = new InDimFilter(dimensionName, Lists.asList(value, values), null, null);
final Set<String> filterValues = Sets.newHashSet(values);
filterValues.add(value);
dimFilter = new InDimFilter(dimensionName, filterValues, null, null);
return this;
}

View File

@ -23,8 +23,11 @@ import org.apache.druid.query.BitmapResultFactory;
import org.apache.druid.query.filter.BitmapIndexSelector;
import org.apache.druid.query.filter.Filter;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.filter.vector.FalseVectorMatcher;
import org.apache.druid.query.filter.vector.VectorValueMatcher;
import org.apache.druid.segment.ColumnSelector;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
import java.util.Collections;
import java.util.Set;
@ -60,6 +63,12 @@ public class FalseFilter implements Filter
return FalseValueMatcher.instance();
}
@Override
public VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory)
{
return new FalseVectorMatcher(factory.getVectorSizeInspector());
}
@Override
public boolean supportsBitmapIndex(BitmapIndexSelector selector)
{
@ -78,6 +87,12 @@ public class FalseFilter implements Filter
return true;
}
@Override
public boolean canVectorizeMatcher()
{
return true;
}
@Override
public Set<String> getRequiredColumns()
{

View File

@ -54,6 +54,11 @@ import java.util.Set;
* given {@link #values}.
* For multi-valued dimension, this filter returns true if one of the dimension values matches to one of the
* given {@link #values}.
*
* In SQL-compatible null handling mode, this filter is equivalent to {@code (dimension IN [values])} or
* {@code (dimension IN [non-null values] OR dimension IS NULL)} when {@link #values} contains nulls.
* In default null handling mode, this filter is equivalent to {@code (dimension IN [values])} or
* {@code (dimension IN [non-null values, ''])} when {@link #values} contains nulls.
*/
public class InFilter implements Filter
{

View File

@ -38,6 +38,11 @@ import java.util.Objects;
import java.util.Set;
/**
* This filter is to select the rows where the {@link #dimension} has the {@link #value}. The value can be null.
* In SQL-compatible null handling mode, this filter is equivalent to {@code dimension = value}
* or {@code dimension IS NULL} when the value is null.
* In default null handling mode, this filter is equivalent to {@code dimension = value} or
* {@code dimension = ''} when the value is null.
*/
public class SelectorFilter implements Filter
{

View File

@ -23,8 +23,11 @@ import org.apache.druid.query.BitmapResultFactory;
import org.apache.druid.query.filter.BitmapIndexSelector;
import org.apache.druid.query.filter.Filter;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.filter.vector.TrueVectorMatcher;
import org.apache.druid.query.filter.vector.VectorValueMatcher;
import org.apache.druid.segment.ColumnSelector;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
import java.util.Collections;
import java.util.Set;
@ -56,6 +59,12 @@ public class TrueFilter implements Filter
return TrueValueMatcher.instance();
}
@Override
public VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory)
{
return new TrueVectorMatcher(factory.getVectorSizeInspector());
}
@Override
public boolean supportsBitmapIndex(BitmapIndexSelector selector)
{
@ -74,6 +83,12 @@ public class TrueFilter implements Filter
return true;
}
@Override
public boolean canVectorizeMatcher()
{
return true;
}
@Override
public Set<String> getRequiredColumns()
{

View File

@ -66,9 +66,9 @@ public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSe
}
@Override
public int getMaxVectorSize()
public VectorSizeInspector getVectorSizeInspector()
{
return offset.getMaxVectorSize();
return offset;
}
@Override

View File

@ -31,12 +31,20 @@ import javax.annotation.Nullable;
*/
public interface VectorColumnSelectorFactory
{
/**
* Returns a {@link VectorSizeInspector} for the {@link VectorCursor} that generated this object.
*/
VectorSizeInspector getVectorSizeInspector();
/**
* Returns the maximum vector size for the {@link VectorCursor} that generated this object.
*
* @see VectorCursor#getMaxVectorSize()
*/
int getMaxVectorSize();
default int getMaxVectorSize()
{
return getVectorSizeInspector().getMaxVectorSize();
}
/**
* Returns a string-typed, single-value-per-row column selector.

View File

@ -30,17 +30,17 @@ public class FilteredAggregatorFactoryTest
{
Assert.assertEquals("overrideName", new FilteredAggregatorFactory(
new CountAggregatorFactory("foo"),
new TrueDimFilter(),
TrueDimFilter.instance(),
"overrideName"
).getName());
Assert.assertEquals("delegateName", new FilteredAggregatorFactory(
new CountAggregatorFactory("delegateName"),
new TrueDimFilter(),
TrueDimFilter.instance(),
""
).getName());
Assert.assertEquals("delegateName", new FilteredAggregatorFactory(
new CountAggregatorFactory("delegateName"),
new TrueDimFilter(),
TrueDimFilter.instance(),
null
).getName());
}

View File

@ -65,4 +65,39 @@ public class AndDimFilterTest extends InitializedNullHandlingTest
);
Assert.assertEquals(expected, dimFilter.toFilter());
}
@Test
public void testOptimieShortCircuitWithFalseFilter()
{
DimFilter filter = DimFilterTestUtils.and(
DimFilterTestUtils.selector("col1", "1"),
FalseDimFilter.instance()
);
Assert.assertTrue(filter.optimize() instanceof FalseDimFilter);
}
@Test
public void testOptimizeOringTrueFilters()
{
DimFilter filter = DimFilterTestUtils.and(TrueDimFilter.instance(), TrueDimFilter.instance());
Assert.assertSame(TrueDimFilter.instance(), filter.optimize());
}
@Test
public void testOptimizeAndOfSingleFilterUnwrapAnd()
{
DimFilter expected = DimFilterTestUtils.selector("col1", "1");
DimFilter filter = DimFilterTestUtils.and(expected);
Assert.assertEquals(expected, filter.optimize());
}
@Test
public void testOptimizeAndOfMultipleFiltersReturningAsItIs()
{
DimFilter filter = DimFilterTestUtils.and(
DimFilterTestUtils.selector("col1", "1"),
DimFilterTestUtils.selector("col1", "2")
);
Assert.assertEquals(filter, filter.optimize());
}
}

View File

@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.filter;
import com.fasterxml.jackson.databind.ObjectMapper;
import nl.jqno.equalsverifier.EqualsVerifier;
import org.apache.druid.jackson.DefaultObjectMapper;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
public class FalseDimFilterTest
{
@Test
public void testSerde() throws IOException
{
final ObjectMapper mapper = new DefaultObjectMapper();
final FalseDimFilter original = FalseDimFilter.instance();
final byte[] bytes = mapper.writeValueAsBytes(original);
final FalseDimFilter fromBytes = (FalseDimFilter) mapper.readValue(bytes, DimFilter.class);
Assert.assertSame(original, fromBytes);
}
@Test
public void testEquals()
{
EqualsVerifier.forClass(FalseDimFilter.class).usingGetClass().verify();
}
}

View File

@ -1,85 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.filter;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.inject.Injector;
import com.google.inject.Key;
import org.apache.druid.guice.GuiceInjectors;
import org.apache.druid.guice.annotations.Json;
import org.apache.druid.query.extraction.RegexDimExtractionFn;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
public class InDimFilterSerDesrTest
{
private static ObjectMapper mapper;
private final String serializedFilter =
"{\"type\":\"in\",\"dimension\":\"dimTest\",\"values\":[\"bad\",\"good\"],\"extractionFn\":null}";
@Before
public void setUp()
{
Injector defaultInjector = GuiceInjectors.makeStartupInjector();
mapper = defaultInjector.getInstance(Key.get(ObjectMapper.class, Json.class));
}
@Test
public void testDeserialization() throws IOException
{
final InDimFilter actualInDimFilter = mapper.readerFor(DimFilter.class).readValue(serializedFilter);
final InDimFilter expectedInDimFilter = new InDimFilter("dimTest", Arrays.asList("good", "bad"), null);
Assert.assertEquals(expectedInDimFilter, actualInDimFilter);
}
@Test
public void testSerialization() throws IOException
{
final InDimFilter dimInFilter = new InDimFilter("dimTest", Arrays.asList("good", "bad"), null);
final String actualSerializedFilter = mapper.writeValueAsString(dimInFilter);
Assert.assertEquals(serializedFilter, actualSerializedFilter);
}
@Test
public void testGetCacheKey()
{
final InDimFilter inDimFilter_1 = new InDimFilter("dimTest", Arrays.asList("good", "bad"), null);
final InDimFilter inDimFilter_2 = new InDimFilter("dimTest", Collections.singletonList("good,bad"), null);
Assert.assertNotEquals(inDimFilter_1.getCacheKey(), inDimFilter_2.getCacheKey());
RegexDimExtractionFn regexFn = new RegexDimExtractionFn(".*", false, null);
final InDimFilter inDimFilter_3 = new InDimFilter("dimTest", Arrays.asList("good", "bad"), regexFn);
final InDimFilter inDimFilter_4 = new InDimFilter("dimTest", Collections.singletonList("good,bad"), regexFn);
Assert.assertNotEquals(inDimFilter_3.getCacheKey(), inDimFilter_4.getCacheKey());
}
@Test
public void testGetCacheKeyNullValue() throws IOException
{
InDimFilter inDimFilter = mapper.readValue("{\"type\":\"in\",\"dimension\":\"dimTest\",\"values\":[null]}", InDimFilter.class);
Assert.assertNotNull(inDimFilter.getCacheKey());
}
}

View File

@ -0,0 +1,147 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.filter;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.jackson.DefaultObjectMapper;
import org.apache.druid.query.extraction.RegexDimExtractionFn;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.Set;
public class InDimFilterTest extends InitializedNullHandlingTest
{
private ObjectMapper mapper = new DefaultObjectMapper();
private final String serializedFilter =
"{\"type\":\"in\",\"dimension\":\"dimTest\",\"values\":[\"bad\",\"good\"]}";
@Test
public void testDeserialization() throws IOException
{
final InDimFilter actualInDimFilter = mapper.readerFor(DimFilter.class).readValue(serializedFilter);
final InDimFilter expectedInDimFilter = new InDimFilter("dimTest", Arrays.asList("good", "bad"), null);
Assert.assertEquals(expectedInDimFilter, actualInDimFilter);
}
@Test
public void testSerialization() throws IOException
{
final InDimFilter dimInFilter = new InDimFilter("dimTest", Arrays.asList("good", "bad"), null);
final String actualSerializedFilter = mapper.writeValueAsString(dimInFilter);
Assert.assertEquals(serializedFilter, actualSerializedFilter);
}
@Test
public void testGetValuesWithValuesSetOfNonEmptyStringsUseTheGivenSet()
{
final Set<String> values = ImmutableSet.of("v1", "v2", "v3");
final InDimFilter filter = new InDimFilter("dim", values, null, null);
Assert.assertSame(values, filter.getValues());
}
@Test
public void testGetValuesWithValuesSetIncludingEmptyString()
{
final Set<String> values = Sets.newHashSet("v1", "", "v3");
final InDimFilter filter = new InDimFilter("dim", values, null, null);
if (NullHandling.replaceWithDefault()) {
Assert.assertNotSame(values, filter.getValues());
Assert.assertEquals(Sets.newHashSet("v1", null, "v3"), filter.getValues());
} else {
Assert.assertSame(values, filter.getValues());
}
}
@Test
public void testGetCacheKeyReturningSameKeyForValuesOfDifferentOrders()
{
final InDimFilter dimFilter1 = new InDimFilter("dim", ImmutableList.of("v1", "v2"), null);
final InDimFilter dimFilter2 = new InDimFilter("dim", ImmutableList.of("v2", "v1"), null);
Assert.assertArrayEquals(dimFilter1.getCacheKey(), dimFilter2.getCacheKey());
}
@Test
public void testGetCacheKeyDifferentKeysForListOfStringsAndSingleStringOfLists()
{
final InDimFilter inDimFilter1 = new InDimFilter("dimTest", Arrays.asList("good", "bad"), null);
final InDimFilter inDimFilter2 = new InDimFilter("dimTest", Collections.singletonList("good,bad"), null);
Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey()));
}
@Test
public void testGetCacheKeyDifferentKeysForListOfStringsAndSingleStringOfListsWithExtractFn()
{
RegexDimExtractionFn regexFn = new RegexDimExtractionFn(".*", false, null);
final InDimFilter inDimFilter1 = new InDimFilter("dimTest", Arrays.asList("good", "bad"), regexFn);
final InDimFilter inDimFilter2 = new InDimFilter("dimTest", Collections.singletonList("good,bad"), regexFn);
Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey()));
}
@Test
public void testGetCacheKeyNullValue() throws IOException
{
InDimFilter inDimFilter = mapper.readValue(
"{\"type\":\"in\",\"dimension\":\"dimTest\",\"values\":[null]}",
InDimFilter.class
);
Assert.assertNotNull(inDimFilter.getCacheKey());
}
@Test
public void testGetCacheKeyReturningDifferentKeysWithAndWithoutNull()
{
InDimFilter filter1 = new InDimFilter("dim", Arrays.asList("val", null), null);
InDimFilter filter2 = new InDimFilter("dim", Collections.singletonList("val"), null);
Assert.assertFalse(Arrays.equals(filter1.getCacheKey(), filter2.getCacheKey()));
}
@Test
public void testGetCacheKeyReturningCachedCacheKey()
{
final InDimFilter filter = new InDimFilter("dim", ImmutableList.of("v1", "v2"), null);
// Compares the array object, not the elements of the array
Assert.assertSame(filter.getCacheKey(), filter.getCacheKey());
}
@Test
public void testGetDimensionRangeSetValuesOfDifferentOrdersReturningSameResult()
{
final InDimFilter dimFilter1 = new InDimFilter("dim", ImmutableList.of("v1", "v2", "v3"), null);
final InDimFilter dimFilter2 = new InDimFilter("dim", ImmutableList.of("v3", "v2", "v1"), null);
Assert.assertEquals(dimFilter1.getDimensionRangeSet("dim"), dimFilter2.getDimensionRangeSet("dim"));
}
@Test
public void testOptimizeSingleValueInToSelector()
{
final InDimFilter filter = new InDimFilter("dim", Collections.singleton("v1"), null);
Assert.assertEquals(new SelectorDimFilter("dim", "v1", null), filter.optimize());
}
}

View File

@ -50,4 +50,39 @@ public class OrDimFilterTest extends InitializedNullHandlingTest
);
Assert.assertEquals(expected, dimFilter.toFilter());
}
@Test
public void testOptimieShortCircuitWithTrueFilter()
{
DimFilter filter = DimFilterTestUtils.or(
DimFilterTestUtils.selector("col1", "1"),
TrueDimFilter.instance()
);
Assert.assertTrue(filter.optimize() instanceof TrueDimFilter);
}
@Test
public void testOptimizeOringFalseFilters()
{
DimFilter filter = DimFilterTestUtils.or(FalseDimFilter.instance(), FalseDimFilter.instance());
Assert.assertSame(FalseDimFilter.instance(), filter.optimize());
}
@Test
public void testOptimizeOrOfSingleFilterUnwrapOr()
{
DimFilter expected = DimFilterTestUtils.selector("col1", "1");
DimFilter filter = DimFilterTestUtils.or(expected);
Assert.assertEquals(expected, filter.optimize());
}
@Test
public void testOptimizeOrOfMultipleFiltersReturningAsItIs()
{
DimFilter filter = DimFilterTestUtils.or(
DimFilterTestUtils.selector("col1", "1"),
DimFilterTestUtils.selector("col1", "2")
);
Assert.assertEquals(filter, filter.optimize());
}
}

View File

@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.filter;
import com.fasterxml.jackson.databind.ObjectMapper;
import nl.jqno.equalsverifier.EqualsVerifier;
import org.apache.druid.jackson.DefaultObjectMapper;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
public class TrueDimFilterTest
{
@Test
public void testSerde() throws IOException
{
final ObjectMapper mapper = new DefaultObjectMapper();
final TrueDimFilter original = TrueDimFilter.instance();
final byte[] bytes = mapper.writeValueAsBytes(original);
final TrueDimFilter fromBytes = (TrueDimFilter) mapper.readValue(bytes, DimFilter.class);
Assert.assertSame(original, fromBytes);
}
@Test
public void testEquals()
{
EqualsVerifier.forClass(TrueDimFilter.class).usingGetClass().verify();
}
}

View File

@ -123,7 +123,8 @@ public class ArrayOverlapOperatorConversion extends BaseExpressionDimFilterOpera
return new InDimFilter(
simpleExtractionExpr.getSimpleExtraction().getColumn(),
Sets.newHashSet(arrayElements),
simpleExtractionExpr.getSimpleExtraction().getExtractionFn()
simpleExtractionExpr.getSimpleExtraction().getExtractionFn(),
null
);
}
}

View File

@ -20,6 +20,7 @@
package org.apache.druid.sql.calcite.filtration;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.filter.InDimFilter;
@ -33,6 +34,7 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class ConvertSelectorsToIns extends BottomUpTransform
{
@ -78,7 +80,7 @@ public class ConvertSelectorsToIns extends BottomUpTransform
final List<SelectorDimFilter> filterList = entry.getValue();
if (filterList.size() > 1) {
// We found a simplification. Remove the old filters and add new ones.
final List<String> values = new ArrayList<>();
final Set<String> values = Sets.newHashSetWithExpectedSize(filterList.size());
for (final SelectorDimFilter selector : filterList) {
values.add(selector.getValue());

View File

@ -23,9 +23,9 @@ import com.google.common.base.Function;
import com.google.common.collect.ImmutableList;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.filter.ExpressionDimFilter;
import org.apache.druid.query.filter.FalseDimFilter;
import org.apache.druid.query.filter.TrueDimFilter;
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
import org.apache.druid.query.spec.QuerySegmentSpec;
import org.apache.druid.segment.column.RowSignature;
@ -36,12 +36,8 @@ import java.util.Objects;
public class Filtration
{
private static final DimFilter MATCH_NOTHING = new ExpressionDimFilter(
"1 == 2", ExprMacroTable.nil()
);
private static final DimFilter MATCH_EVERYTHING = new ExpressionDimFilter(
"1 == 1", ExprMacroTable.nil()
);
private static final DimFilter MATCH_NOTHING = FalseDimFilter.instance();
private static final DimFilter MATCH_EVERYTHING = TrueDimFilter.instance();
// 1) If "dimFilter" is null, it should be ignored and not affect filtration.
// 2) There is an implicit AND between "intervals" and "dimFilter" (if dimFilter is non-null).