mirror of https://github.com/apache/druid.git
Add lookup optimization for InDimFilter (#2938)
* Add lookup optimization for InDimFilter * tests for in filter with lookup extraction fn * refactor * refactor2 and modified filter test * make optimizeLookup private
This commit is contained in:
parent
00de26c76a
commit
dcabd4b1ee
|
@ -28,10 +28,14 @@ import com.google.common.collect.ImmutableSortedSet;
|
|||
import com.google.common.collect.Iterables;
|
||||
import com.metamx.common.StringUtils;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.query.lookup.LookupExtractionFn;
|
||||
import io.druid.query.lookup.LookupExtractor;
|
||||
import io.druid.segment.filter.InFilter;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
public class InDimFilter implements DimFilter
|
||||
|
@ -117,6 +121,46 @@ public class InDimFilter implements DimFilter
|
|||
@Override
|
||||
public DimFilter optimize()
|
||||
{
|
||||
InDimFilter inFilter = optimizeLookup();
|
||||
if (inFilter.values.size() == 1) {
|
||||
return new SelectorDimFilter(inFilter.dimension, inFilter.values.first(), inFilter.getExtractionFn());
|
||||
}
|
||||
return inFilter;
|
||||
}
|
||||
|
||||
private InDimFilter optimizeLookup() {
|
||||
if (extractionFn instanceof LookupExtractionFn
|
||||
&& ((LookupExtractionFn) extractionFn).isOptimize()) {
|
||||
LookupExtractionFn exFn = (LookupExtractionFn) extractionFn;
|
||||
LookupExtractor lookup = exFn.getLookup();
|
||||
|
||||
final List<String> keys = new ArrayList<>();
|
||||
for (String value : values) {
|
||||
|
||||
// We cannot do an unapply()-based optimization if the selector value
|
||||
// and the replaceMissingValuesWith value are the same, since we have to match on
|
||||
// all values that are not present in the lookup.
|
||||
final String convertedValue = Strings.emptyToNull(value);
|
||||
if (!exFn.isRetainMissingValue() && Objects.equals(convertedValue, exFn.getReplaceMissingValueWith())) {
|
||||
return this;
|
||||
}
|
||||
keys.addAll(lookup.unapply(convertedValue));
|
||||
|
||||
// If retainMissingValues is true and the selector value is not in the lookup map,
|
||||
// there may be row values that match the selector value but are not included
|
||||
// in the lookup map. Match on the selector value as well.
|
||||
// If the selector value is overwritten in the lookup map, don't add selector value to keys.
|
||||
if (exFn.isRetainMissingValue() && lookup.apply(convertedValue) == null) {
|
||||
keys.add(convertedValue);
|
||||
}
|
||||
}
|
||||
|
||||
if (keys.isEmpty()) {
|
||||
return this;
|
||||
} else {
|
||||
return new InDimFilter(dimension, keys, null);
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
|
@ -24,16 +24,13 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
|||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.metamx.common.StringUtils;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.query.lookup.LookupExtractionFn;
|
||||
import io.druid.query.lookup.LookupExtractor;
|
||||
import io.druid.segment.filter.DimensionPredicateFilter;
|
||||
import io.druid.segment.filter.SelectorFilter;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
|
@ -54,7 +51,7 @@ public class SelectorDimFilter implements DimFilter
|
|||
Preconditions.checkArgument(dimension != null, "dimension must not be null");
|
||||
|
||||
this.dimension = dimension;
|
||||
this.value = value;
|
||||
this.value = Strings.nullToEmpty(value);
|
||||
this.extractionFn = extractionFn;
|
||||
}
|
||||
|
||||
|
@ -78,41 +75,7 @@ public class SelectorDimFilter implements DimFilter
|
|||
@Override
|
||||
public DimFilter optimize()
|
||||
{
|
||||
if (this.getExtractionFn() instanceof LookupExtractionFn
|
||||
&& ((LookupExtractionFn) this.getExtractionFn()).isOptimize()) {
|
||||
LookupExtractionFn exFn = (LookupExtractionFn) this.getExtractionFn();
|
||||
LookupExtractor lookup = exFn.getLookup();
|
||||
|
||||
final String convertedValue = Strings.emptyToNull(value);
|
||||
|
||||
// We cannot do an unapply()-based optimization if the selector value
|
||||
// and the replaceMissingValuesWith value are the same, since we have to match on
|
||||
// all values that are not present in the lookup.
|
||||
if (!exFn.isRetainMissingValue() && Objects.equals(convertedValue, exFn.getReplaceMissingValueWith())) {
|
||||
return this;
|
||||
}
|
||||
|
||||
final String mappingForValue = lookup.apply(convertedValue);
|
||||
final List<String> keys = new ArrayList<>();
|
||||
keys.addAll(lookup.unapply(convertedValue));
|
||||
|
||||
// If retainMissingValues is true and the selector value is not in the lookup map,
|
||||
// there may be row values that match the selector value but are not included
|
||||
// in the lookup map. Match on the selector value as well.
|
||||
// If the selector value is overwritten in the lookup map, don't add selector value to keys.
|
||||
if (exFn.isRetainMissingValue() && mappingForValue == null) {
|
||||
keys.add(convertedValue);
|
||||
}
|
||||
|
||||
if (keys.isEmpty()) {
|
||||
return this;
|
||||
} else if (keys.size() == 1) {
|
||||
return new SelectorDimFilter(dimension, keys.get(0), null);
|
||||
} else {
|
||||
return new InDimFilter(dimension, keys, null);
|
||||
}
|
||||
}
|
||||
return this;
|
||||
return new InDimFilter(dimension, ImmutableList.of(value), extractionFn).optimize();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -35,10 +35,13 @@ import io.druid.data.input.impl.TimestampSpec;
|
|||
import io.druid.js.JavaScriptConfig;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.query.extraction.JavaScriptExtractionFn;
|
||||
import io.druid.query.extraction.MapLookupExtractor;
|
||||
import io.druid.query.filter.BoundDimFilter;
|
||||
import io.druid.query.filter.DimFilter;
|
||||
import io.druid.query.filter.Filter;
|
||||
import io.druid.query.filter.InDimFilter;
|
||||
import io.druid.query.lookup.LookupExtractionFn;
|
||||
import io.druid.query.lookup.LookupExtractor;
|
||||
import io.druid.segment.IndexBuilder;
|
||||
import io.druid.segment.StorageAdapter;
|
||||
import org.joda.time.DateTime;
|
||||
|
@ -260,6 +263,48 @@ public class InFilterTest extends BaseFilterTest
|
|||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMatchWithLookupExtractionFn() {
|
||||
final Map<String, String> stringMap = ImmutableMap.of(
|
||||
"a", "HELLO",
|
||||
"10", "HELLO",
|
||||
"def", "HELLO",
|
||||
"c", "BYE"
|
||||
);
|
||||
LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false);
|
||||
LookupExtractionFn lookupFn = new LookupExtractionFn(mapExtractor, false, "UNKNOWN", false, true);
|
||||
|
||||
assertFilterMatches(toInFilterWithFn("dim0", lookupFn, null, "HELLO"), ImmutableList.of("a"));
|
||||
assertFilterMatches(toInFilterWithFn("dim0", lookupFn, "HELLO", "BYE"), ImmutableList.of("a", "c"));
|
||||
assertFilterMatches(toInFilterWithFn("dim0", lookupFn, "UNKNOWN"), ImmutableList.of("b", "d", "e", "f"));
|
||||
assertFilterMatches(toInFilterWithFn("dim1", lookupFn, "HELLO"), ImmutableList.of("b", "e"));
|
||||
assertFilterMatches(toInFilterWithFn("dim1", lookupFn, "N/A"), ImmutableList.<String>of());
|
||||
assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "a"), ImmutableList.<String>of());
|
||||
assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "HELLO"), ImmutableList.of("a", "d"));
|
||||
assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "HELLO", "BYE", "UNKNOWN"),
|
||||
ImmutableList.of("a", "b", "c", "d", "e", "f"));
|
||||
|
||||
final Map<String, String> stringMap2 = ImmutableMap.of(
|
||||
"a", "e"
|
||||
);
|
||||
LookupExtractor mapExtractor2 = new MapLookupExtractor(stringMap2, false);
|
||||
LookupExtractionFn lookupFn2 = new LookupExtractionFn(mapExtractor2, true, null, false, true);
|
||||
|
||||
assertFilterMatches(toInFilterWithFn("dim0", lookupFn2, null, "e"), ImmutableList.of("a", "e"));
|
||||
assertFilterMatches(toInFilterWithFn("dim0", lookupFn2, "a"), ImmutableList.<String>of());
|
||||
|
||||
final Map<String, String> stringMap3 = ImmutableMap.of(
|
||||
"c", "500",
|
||||
"100", "e"
|
||||
);
|
||||
LookupExtractor mapExtractor3 = new MapLookupExtractor(stringMap3, false);
|
||||
LookupExtractionFn lookupFn3 = new LookupExtractionFn(mapExtractor3, false, null, false, true);
|
||||
|
||||
assertFilterMatches(toInFilterWithFn("dim0", lookupFn3, null, "c"), ImmutableList.of("a", "b", "d", "e", "f"));
|
||||
assertFilterMatches(toInFilterWithFn("dim0", lookupFn3, "e"), ImmutableList.<String>of());
|
||||
|
||||
}
|
||||
|
||||
private DimFilter toInFilter(String dim, String value, String... values)
|
||||
{
|
||||
return new InDimFilter(dim, Lists.asList(value, values), null);
|
||||
|
|
|
@ -209,11 +209,11 @@ public class SelectorFilterTest extends BaseFilterTest
|
|||
SelectorDimFilter optFilter4Optimized = new SelectorDimFilter("dim0", "5", null);
|
||||
SelectorDimFilter optFilter6Optimized = new SelectorDimFilter("dim0", "5", null);
|
||||
|
||||
Assert.assertTrue(optFilter1 == optFilter1.optimize());
|
||||
Assert.assertTrue(optFilter1.equals(optFilter1.optimize()));
|
||||
Assert.assertTrue(optFilter2Optimized.equals(optFilter2.optimize()));
|
||||
Assert.assertTrue(optFilter3 == optFilter3.optimize());
|
||||
Assert.assertTrue(optFilter3.equals(optFilter3.optimize()));
|
||||
Assert.assertTrue(optFilter4Optimized.equals(optFilter4.optimize()));
|
||||
Assert.assertTrue(optFilter5 == optFilter5.optimize());
|
||||
Assert.assertTrue(optFilter5.equals(optFilter5.optimize()));
|
||||
Assert.assertTrue(optFilter6Optimized.equals(optFilter6.optimize()));
|
||||
|
||||
assertFilterMatches(optFilter1, ImmutableList.of("0", "1", "2", "5"));
|
||||
|
|
Loading…
Reference in New Issue