mirror of https://github.com/apache/druid.git
Optimize large InDimFilters (#10312)
* Optimize large InDimFilters
For large InDimFilters, in default mode, the filter does a linear check of the
set to see if it contains either an empty or null. If it does, the empties are
converted to nulls by passing through the entire list again.
Instead of this, in default mode, we attempt to remove an empty string from the
values that are passed to the InDimFilter. If an empty string was removed, we
add null to the set
* code review
* Revert "code review"
This reverts commit 61fe33ebf7
.
* code review - less brittle
This commit is contained in:
parent
028442e75e
commit
707b5aae2b
|
@ -34,6 +34,7 @@ import com.google.common.collect.Iterables;
|
||||||
import com.google.common.collect.Ordering;
|
import com.google.common.collect.Ordering;
|
||||||
import com.google.common.collect.Range;
|
import com.google.common.collect.Range;
|
||||||
import com.google.common.collect.RangeSet;
|
import com.google.common.collect.RangeSet;
|
||||||
|
import com.google.common.collect.Sets;
|
||||||
import com.google.common.collect.TreeRangeSet;
|
import com.google.common.collect.TreeRangeSet;
|
||||||
import com.google.common.hash.Hasher;
|
import com.google.common.hash.Hasher;
|
||||||
import com.google.common.hash.Hashing;
|
import com.google.common.hash.Hashing;
|
||||||
|
@ -74,7 +75,6 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
|
public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
|
||||||
{
|
{
|
||||||
|
@ -143,10 +143,15 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
|
||||||
|
|
||||||
// The values set can be huge. Try to avoid copying the set if possible.
|
// The values set can be huge. Try to avoid copying the set if possible.
|
||||||
// Note that we may still need to copy values to a list for caching. See getCacheKey().
|
// Note that we may still need to copy values to a list for caching. See getCacheKey().
|
||||||
if ((NullHandling.sqlCompatible() || values.stream().noneMatch(NullHandling::needsEmptyToNull))) {
|
if (!NullHandling.sqlCompatible() && values.contains("")) {
|
||||||
this.values = values;
|
// In Non sql compatible mode, empty strings should be converted to nulls for the filter.
|
||||||
|
// In sql compatible mode, empty strings and nulls should be treated differently
|
||||||
|
this.values = Sets.newHashSetWithExpectedSize(values.size());
|
||||||
|
for (String v : values) {
|
||||||
|
this.values.add(NullHandling.emptyToNullIfNeeded(v));
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
this.values = values.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toSet());
|
this.values = values;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.dimension = Preconditions.checkNotNull(dimension, "dimension cannot be null");
|
this.dimension = Preconditions.checkNotNull(dimension, "dimension cannot be null");
|
||||||
|
|
Loading…
Reference in New Issue