Optimize large InDimFilters (#10312)

* Optimize large InDimFilters

For large InDimFilters, in default mode, the filter does a linear check of the
set to see if it contains either an empty or null. If it does, the empties are
converted to nulls by passing through the entire list again.

Instead of this, in default mode, we attempt to remove an empty string from the
values that are passed to the InDimFilter. If an empty string was removed, we
add null to the set

* code review

* Revert "code review"

This reverts commit 61fe33ebf7.

* code review - less brittle
This commit is contained in:
Suneet Saldanha 2020-08-24 16:39:27 -07:00 committed by GitHub
parent 028442e75e
commit 707b5aae2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 9 additions and 4 deletions

View File

@ -34,6 +34,7 @@ import com.google.common.collect.Iterables;
import com.google.common.collect.Ordering; import com.google.common.collect.Ordering;
import com.google.common.collect.Range; import com.google.common.collect.Range;
import com.google.common.collect.RangeSet; import com.google.common.collect.RangeSet;
import com.google.common.collect.Sets;
import com.google.common.collect.TreeRangeSet; import com.google.common.collect.TreeRangeSet;
import com.google.common.hash.Hasher; import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing; import com.google.common.hash.Hashing;
@ -74,7 +75,6 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors;
public class InDimFilter extends AbstractOptimizableDimFilter implements Filter public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
{ {
@ -143,10 +143,15 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
// The values set can be huge. Try to avoid copying the set if possible. // The values set can be huge. Try to avoid copying the set if possible.
// Note that we may still need to copy values to a list for caching. See getCacheKey(). // Note that we may still need to copy values to a list for caching. See getCacheKey().
if ((NullHandling.sqlCompatible() || values.stream().noneMatch(NullHandling::needsEmptyToNull))) { if (!NullHandling.sqlCompatible() && values.contains("")) {
this.values = values; // In Non sql compatible mode, empty strings should be converted to nulls for the filter.
// In sql compatible mode, empty strings and nulls should be treated differently
this.values = Sets.newHashSetWithExpectedSize(values.size());
for (String v : values) {
this.values.add(NullHandling.emptyToNullIfNeeded(v));
}
} else { } else {
this.values = values.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toSet()); this.values = values;
} }
this.dimension = Preconditions.checkNotNull(dimension, "dimension cannot be null"); this.dimension = Preconditions.checkNotNull(dimension, "dimension cannot be null");