Add lookup optimization for InDimFilter (#2938)

* Add lookup optimization for InDimFilter

* tests for in filter with lookup extraction fn

* refactor

* refactor2 and modified filter test

* make optimizeLookup private
This commit is contained in:
Dave Li 2016-05-19 19:29:16 -04:00 committed by Fangjin Yang
parent 00de26c76a
commit dcabd4b1ee
4 changed files with 95 additions and 43 deletions

View File

@ -28,10 +28,14 @@ import com.google.common.collect.ImmutableSortedSet;
import com.google.common.collect.Iterables; import com.google.common.collect.Iterables;
import com.metamx.common.StringUtils; import com.metamx.common.StringUtils;
import io.druid.query.extraction.ExtractionFn; import io.druid.query.extraction.ExtractionFn;
import io.druid.query.lookup.LookupExtractionFn;
import io.druid.query.lookup.LookupExtractor;
import io.druid.segment.filter.InFilter; import io.druid.segment.filter.InFilter;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Objects;
import java.util.Set; import java.util.Set;
public class InDimFilter implements DimFilter public class InDimFilter implements DimFilter
@ -117,6 +121,46 @@ public class InDimFilter implements DimFilter
@Override @Override
public DimFilter optimize() public DimFilter optimize()
{ {
InDimFilter inFilter = optimizeLookup();
if (inFilter.values.size() == 1) {
return new SelectorDimFilter(inFilter.dimension, inFilter.values.first(), inFilter.getExtractionFn());
}
return inFilter;
}
private InDimFilter optimizeLookup() {
if (extractionFn instanceof LookupExtractionFn
&& ((LookupExtractionFn) extractionFn).isOptimize()) {
LookupExtractionFn exFn = (LookupExtractionFn) extractionFn;
LookupExtractor lookup = exFn.getLookup();
final List<String> keys = new ArrayList<>();
for (String value : values) {
// We cannot do an unapply()-based optimization if the selector value
// and the replaceMissingValuesWith value are the same, since we have to match on
// all values that are not present in the lookup.
final String convertedValue = Strings.emptyToNull(value);
if (!exFn.isRetainMissingValue() && Objects.equals(convertedValue, exFn.getReplaceMissingValueWith())) {
return this;
}
keys.addAll(lookup.unapply(convertedValue));
// If retainMissingValues is true and the selector value is not in the lookup map,
// there may be row values that match the selector value but are not included
// in the lookup map. Match on the selector value as well.
// If the selector value is overwritten in the lookup map, don't add selector value to keys.
if (exFn.isRetainMissingValue() && lookup.apply(convertedValue) == null) {
keys.add(convertedValue);
}
}
if (keys.isEmpty()) {
return this;
} else {
return new InDimFilter(dimension, keys, null);
}
}
return this; return this;
} }

View File

@ -24,16 +24,13 @@ import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import com.google.common.base.Predicate; import com.google.common.base.Predicate;
import com.google.common.base.Strings; import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.metamx.common.StringUtils; import com.metamx.common.StringUtils;
import io.druid.query.extraction.ExtractionFn; import io.druid.query.extraction.ExtractionFn;
import io.druid.query.lookup.LookupExtractionFn;
import io.druid.query.lookup.LookupExtractor;
import io.druid.segment.filter.DimensionPredicateFilter; import io.druid.segment.filter.DimensionPredicateFilter;
import io.druid.segment.filter.SelectorFilter; import io.druid.segment.filter.SelectorFilter;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects; import java.util.Objects;
/** /**
@ -54,7 +51,7 @@ public class SelectorDimFilter implements DimFilter
Preconditions.checkArgument(dimension != null, "dimension must not be null"); Preconditions.checkArgument(dimension != null, "dimension must not be null");
this.dimension = dimension; this.dimension = dimension;
this.value = value; this.value = Strings.nullToEmpty(value);
this.extractionFn = extractionFn; this.extractionFn = extractionFn;
} }
@ -78,41 +75,7 @@ public class SelectorDimFilter implements DimFilter
@Override @Override
public DimFilter optimize() public DimFilter optimize()
{ {
if (this.getExtractionFn() instanceof LookupExtractionFn return new InDimFilter(dimension, ImmutableList.of(value), extractionFn).optimize();
&& ((LookupExtractionFn) this.getExtractionFn()).isOptimize()) {
LookupExtractionFn exFn = (LookupExtractionFn) this.getExtractionFn();
LookupExtractor lookup = exFn.getLookup();
final String convertedValue = Strings.emptyToNull(value);
// We cannot do an unapply()-based optimization if the selector value
// and the replaceMissingValuesWith value are the same, since we have to match on
// all values that are not present in the lookup.
if (!exFn.isRetainMissingValue() && Objects.equals(convertedValue, exFn.getReplaceMissingValueWith())) {
return this;
}
final String mappingForValue = lookup.apply(convertedValue);
final List<String> keys = new ArrayList<>();
keys.addAll(lookup.unapply(convertedValue));
// If retainMissingValues is true and the selector value is not in the lookup map,
// there may be row values that match the selector value but are not included
// in the lookup map. Match on the selector value as well.
// If the selector value is overwritten in the lookup map, don't add selector value to keys.
if (exFn.isRetainMissingValue() && mappingForValue == null) {
keys.add(convertedValue);
}
if (keys.isEmpty()) {
return this;
} else if (keys.size() == 1) {
return new SelectorDimFilter(dimension, keys.get(0), null);
} else {
return new InDimFilter(dimension, keys, null);
}
}
return this;
} }
@Override @Override

View File

@ -35,10 +35,13 @@ import io.druid.data.input.impl.TimestampSpec;
import io.druid.js.JavaScriptConfig; import io.druid.js.JavaScriptConfig;
import io.druid.query.extraction.ExtractionFn; import io.druid.query.extraction.ExtractionFn;
import io.druid.query.extraction.JavaScriptExtractionFn; import io.druid.query.extraction.JavaScriptExtractionFn;
import io.druid.query.extraction.MapLookupExtractor;
import io.druid.query.filter.BoundDimFilter; import io.druid.query.filter.BoundDimFilter;
import io.druid.query.filter.DimFilter; import io.druid.query.filter.DimFilter;
import io.druid.query.filter.Filter; import io.druid.query.filter.Filter;
import io.druid.query.filter.InDimFilter; import io.druid.query.filter.InDimFilter;
import io.druid.query.lookup.LookupExtractionFn;
import io.druid.query.lookup.LookupExtractor;
import io.druid.segment.IndexBuilder; import io.druid.segment.IndexBuilder;
import io.druid.segment.StorageAdapter; import io.druid.segment.StorageAdapter;
import org.joda.time.DateTime; import org.joda.time.DateTime;
@ -260,6 +263,48 @@ public class InFilterTest extends BaseFilterTest
); );
} }
@Test
public void testMatchWithLookupExtractionFn() {
final Map<String, String> stringMap = ImmutableMap.of(
"a", "HELLO",
"10", "HELLO",
"def", "HELLO",
"c", "BYE"
);
LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false);
LookupExtractionFn lookupFn = new LookupExtractionFn(mapExtractor, false, "UNKNOWN", false, true);
assertFilterMatches(toInFilterWithFn("dim0", lookupFn, null, "HELLO"), ImmutableList.of("a"));
assertFilterMatches(toInFilterWithFn("dim0", lookupFn, "HELLO", "BYE"), ImmutableList.of("a", "c"));
assertFilterMatches(toInFilterWithFn("dim0", lookupFn, "UNKNOWN"), ImmutableList.of("b", "d", "e", "f"));
assertFilterMatches(toInFilterWithFn("dim1", lookupFn, "HELLO"), ImmutableList.of("b", "e"));
assertFilterMatches(toInFilterWithFn("dim1", lookupFn, "N/A"), ImmutableList.<String>of());
assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "a"), ImmutableList.<String>of());
assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "HELLO"), ImmutableList.of("a", "d"));
assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "HELLO", "BYE", "UNKNOWN"),
ImmutableList.of("a", "b", "c", "d", "e", "f"));
final Map<String, String> stringMap2 = ImmutableMap.of(
"a", "e"
);
LookupExtractor mapExtractor2 = new MapLookupExtractor(stringMap2, false);
LookupExtractionFn lookupFn2 = new LookupExtractionFn(mapExtractor2, true, null, false, true);
assertFilterMatches(toInFilterWithFn("dim0", lookupFn2, null, "e"), ImmutableList.of("a", "e"));
assertFilterMatches(toInFilterWithFn("dim0", lookupFn2, "a"), ImmutableList.<String>of());
final Map<String, String> stringMap3 = ImmutableMap.of(
"c", "500",
"100", "e"
);
LookupExtractor mapExtractor3 = new MapLookupExtractor(stringMap3, false);
LookupExtractionFn lookupFn3 = new LookupExtractionFn(mapExtractor3, false, null, false, true);
assertFilterMatches(toInFilterWithFn("dim0", lookupFn3, null, "c"), ImmutableList.of("a", "b", "d", "e", "f"));
assertFilterMatches(toInFilterWithFn("dim0", lookupFn3, "e"), ImmutableList.<String>of());
}
private DimFilter toInFilter(String dim, String value, String... values) private DimFilter toInFilter(String dim, String value, String... values)
{ {
return new InDimFilter(dim, Lists.asList(value, values), null); return new InDimFilter(dim, Lists.asList(value, values), null);

View File

@ -209,11 +209,11 @@ public class SelectorFilterTest extends BaseFilterTest
SelectorDimFilter optFilter4Optimized = new SelectorDimFilter("dim0", "5", null); SelectorDimFilter optFilter4Optimized = new SelectorDimFilter("dim0", "5", null);
SelectorDimFilter optFilter6Optimized = new SelectorDimFilter("dim0", "5", null); SelectorDimFilter optFilter6Optimized = new SelectorDimFilter("dim0", "5", null);
Assert.assertTrue(optFilter1 == optFilter1.optimize()); Assert.assertTrue(optFilter1.equals(optFilter1.optimize()));
Assert.assertTrue(optFilter2Optimized.equals(optFilter2.optimize())); Assert.assertTrue(optFilter2Optimized.equals(optFilter2.optimize()));
Assert.assertTrue(optFilter3 == optFilter3.optimize()); Assert.assertTrue(optFilter3.equals(optFilter3.optimize()));
Assert.assertTrue(optFilter4Optimized.equals(optFilter4.optimize())); Assert.assertTrue(optFilter4Optimized.equals(optFilter4.optimize()));
Assert.assertTrue(optFilter5 == optFilter5.optimize()); Assert.assertTrue(optFilter5.equals(optFilter5.optimize()));
Assert.assertTrue(optFilter6Optimized.equals(optFilter6.optimize())); Assert.assertTrue(optFilter6Optimized.equals(optFilter6.optimize()));
assertFilterMatches(optFilter1, ImmutableList.of("0", "1", "2", "5")); assertFilterMatches(optFilter1, ImmutableList.of("0", "1", "2", "5"));