From dcabd4b1ee1e87f83c7524b1a62cc5ad1b17687b Mon Sep 17 00:00:00 2001 From: Dave Li Date: Thu, 19 May 2016 19:29:16 -0400 Subject: [PATCH] Add lookup optimization for InDimFilter (#2938) * Add lookup optimization for InDimFilter * tests for in filter with lookup extraction fn * refactor * refactor2 and modified filter test * make optimizeLookup private --- .../io/druid/query/filter/InDimFilter.java | 44 ++++++++++++++++++ .../druid/query/filter/SelectorDimFilter.java | 43 ++---------------- .../io/druid/segment/filter/InFilterTest.java | 45 +++++++++++++++++++ .../segment/filter/SelectorFilterTest.java | 6 +-- 4 files changed, 95 insertions(+), 43 deletions(-) diff --git a/processing/src/main/java/io/druid/query/filter/InDimFilter.java b/processing/src/main/java/io/druid/query/filter/InDimFilter.java index f6d3861ac9a..6c896cd8e09 100644 --- a/processing/src/main/java/io/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/io/druid/query/filter/InDimFilter.java @@ -28,10 +28,14 @@ import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.Iterables; import com.metamx.common.StringUtils; import io.druid.query.extraction.ExtractionFn; +import io.druid.query.lookup.LookupExtractionFn; +import io.druid.query.lookup.LookupExtractor; import io.druid.segment.filter.InFilter; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.List; +import java.util.Objects; import java.util.Set; public class InDimFilter implements DimFilter @@ -117,6 +121,46 @@ public class InDimFilter implements DimFilter @Override public DimFilter optimize() { + InDimFilter inFilter = optimizeLookup(); + if (inFilter.values.size() == 1) { + return new SelectorDimFilter(inFilter.dimension, inFilter.values.first(), inFilter.getExtractionFn()); + } + return inFilter; + } + + private InDimFilter optimizeLookup() { + if (extractionFn instanceof LookupExtractionFn + && ((LookupExtractionFn) extractionFn).isOptimize()) { + LookupExtractionFn exFn = (LookupExtractionFn) extractionFn; + LookupExtractor lookup = exFn.getLookup(); + + final List keys = new ArrayList<>(); + for (String value : values) { + + // We cannot do an unapply()-based optimization if the selector value + // and the replaceMissingValuesWith value are the same, since we have to match on + // all values that are not present in the lookup. + final String convertedValue = Strings.emptyToNull(value); + if (!exFn.isRetainMissingValue() && Objects.equals(convertedValue, exFn.getReplaceMissingValueWith())) { + return this; + } + keys.addAll(lookup.unapply(convertedValue)); + + // If retainMissingValues is true and the selector value is not in the lookup map, + // there may be row values that match the selector value but are not included + // in the lookup map. Match on the selector value as well. + // If the selector value is overwritten in the lookup map, don't add selector value to keys. + if (exFn.isRetainMissingValue() && lookup.apply(convertedValue) == null) { + keys.add(convertedValue); + } + } + + if (keys.isEmpty()) { + return this; + } else { + return new InDimFilter(dimension, keys, null); + } + } return this; } diff --git a/processing/src/main/java/io/druid/query/filter/SelectorDimFilter.java b/processing/src/main/java/io/druid/query/filter/SelectorDimFilter.java index 9c697da33fb..1bde907f8b0 100644 --- a/processing/src/main/java/io/druid/query/filter/SelectorDimFilter.java +++ b/processing/src/main/java/io/druid/query/filter/SelectorDimFilter.java @@ -24,16 +24,13 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.base.Strings; +import com.google.common.collect.ImmutableList; import com.metamx.common.StringUtils; import io.druid.query.extraction.ExtractionFn; -import io.druid.query.lookup.LookupExtractionFn; -import io.druid.query.lookup.LookupExtractor; import io.druid.segment.filter.DimensionPredicateFilter; import io.druid.segment.filter.SelectorFilter; import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.List; import java.util.Objects; /** @@ -54,7 +51,7 @@ public class SelectorDimFilter implements DimFilter Preconditions.checkArgument(dimension != null, "dimension must not be null"); this.dimension = dimension; - this.value = value; + this.value = Strings.nullToEmpty(value); this.extractionFn = extractionFn; } @@ -78,41 +75,7 @@ public class SelectorDimFilter implements DimFilter @Override public DimFilter optimize() { - if (this.getExtractionFn() instanceof LookupExtractionFn - && ((LookupExtractionFn) this.getExtractionFn()).isOptimize()) { - LookupExtractionFn exFn = (LookupExtractionFn) this.getExtractionFn(); - LookupExtractor lookup = exFn.getLookup(); - - final String convertedValue = Strings.emptyToNull(value); - - // We cannot do an unapply()-based optimization if the selector value - // and the replaceMissingValuesWith value are the same, since we have to match on - // all values that are not present in the lookup. - if (!exFn.isRetainMissingValue() && Objects.equals(convertedValue, exFn.getReplaceMissingValueWith())) { - return this; - } - - final String mappingForValue = lookup.apply(convertedValue); - final List keys = new ArrayList<>(); - keys.addAll(lookup.unapply(convertedValue)); - - // If retainMissingValues is true and the selector value is not in the lookup map, - // there may be row values that match the selector value but are not included - // in the lookup map. Match on the selector value as well. - // If the selector value is overwritten in the lookup map, don't add selector value to keys. - if (exFn.isRetainMissingValue() && mappingForValue == null) { - keys.add(convertedValue); - } - - if (keys.isEmpty()) { - return this; - } else if (keys.size() == 1) { - return new SelectorDimFilter(dimension, keys.get(0), null); - } else { - return new InDimFilter(dimension, keys, null); - } - } - return this; + return new InDimFilter(dimension, ImmutableList.of(value), extractionFn).optimize(); } @Override diff --git a/processing/src/test/java/io/druid/segment/filter/InFilterTest.java b/processing/src/test/java/io/druid/segment/filter/InFilterTest.java index 81427ea8a1f..b5f1b912f7a 100644 --- a/processing/src/test/java/io/druid/segment/filter/InFilterTest.java +++ b/processing/src/test/java/io/druid/segment/filter/InFilterTest.java @@ -35,10 +35,13 @@ import io.druid.data.input.impl.TimestampSpec; import io.druid.js.JavaScriptConfig; import io.druid.query.extraction.ExtractionFn; import io.druid.query.extraction.JavaScriptExtractionFn; +import io.druid.query.extraction.MapLookupExtractor; import io.druid.query.filter.BoundDimFilter; import io.druid.query.filter.DimFilter; import io.druid.query.filter.Filter; import io.druid.query.filter.InDimFilter; +import io.druid.query.lookup.LookupExtractionFn; +import io.druid.query.lookup.LookupExtractor; import io.druid.segment.IndexBuilder; import io.druid.segment.StorageAdapter; import org.joda.time.DateTime; @@ -260,6 +263,48 @@ public class InFilterTest extends BaseFilterTest ); } + @Test + public void testMatchWithLookupExtractionFn() { + final Map stringMap = ImmutableMap.of( + "a", "HELLO", + "10", "HELLO", + "def", "HELLO", + "c", "BYE" + ); + LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false); + LookupExtractionFn lookupFn = new LookupExtractionFn(mapExtractor, false, "UNKNOWN", false, true); + + assertFilterMatches(toInFilterWithFn("dim0", lookupFn, null, "HELLO"), ImmutableList.of("a")); + assertFilterMatches(toInFilterWithFn("dim0", lookupFn, "HELLO", "BYE"), ImmutableList.of("a", "c")); + assertFilterMatches(toInFilterWithFn("dim0", lookupFn, "UNKNOWN"), ImmutableList.of("b", "d", "e", "f")); + assertFilterMatches(toInFilterWithFn("dim1", lookupFn, "HELLO"), ImmutableList.of("b", "e")); + assertFilterMatches(toInFilterWithFn("dim1", lookupFn, "N/A"), ImmutableList.of()); + assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "a"), ImmutableList.of()); + assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "HELLO"), ImmutableList.of("a", "d")); + assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "HELLO", "BYE", "UNKNOWN"), + ImmutableList.of("a", "b", "c", "d", "e", "f")); + + final Map stringMap2 = ImmutableMap.of( + "a", "e" + ); + LookupExtractor mapExtractor2 = new MapLookupExtractor(stringMap2, false); + LookupExtractionFn lookupFn2 = new LookupExtractionFn(mapExtractor2, true, null, false, true); + + assertFilterMatches(toInFilterWithFn("dim0", lookupFn2, null, "e"), ImmutableList.of("a", "e")); + assertFilterMatches(toInFilterWithFn("dim0", lookupFn2, "a"), ImmutableList.of()); + + final Map stringMap3 = ImmutableMap.of( + "c", "500", + "100", "e" + ); + LookupExtractor mapExtractor3 = new MapLookupExtractor(stringMap3, false); + LookupExtractionFn lookupFn3 = new LookupExtractionFn(mapExtractor3, false, null, false, true); + + assertFilterMatches(toInFilterWithFn("dim0", lookupFn3, null, "c"), ImmutableList.of("a", "b", "d", "e", "f")); + assertFilterMatches(toInFilterWithFn("dim0", lookupFn3, "e"), ImmutableList.of()); + + } + private DimFilter toInFilter(String dim, String value, String... values) { return new InDimFilter(dim, Lists.asList(value, values), null); diff --git a/processing/src/test/java/io/druid/segment/filter/SelectorFilterTest.java b/processing/src/test/java/io/druid/segment/filter/SelectorFilterTest.java index 94b1bf845a6..adbd5dc82b3 100644 --- a/processing/src/test/java/io/druid/segment/filter/SelectorFilterTest.java +++ b/processing/src/test/java/io/druid/segment/filter/SelectorFilterTest.java @@ -209,11 +209,11 @@ public class SelectorFilterTest extends BaseFilterTest SelectorDimFilter optFilter4Optimized = new SelectorDimFilter("dim0", "5", null); SelectorDimFilter optFilter6Optimized = new SelectorDimFilter("dim0", "5", null); - Assert.assertTrue(optFilter1 == optFilter1.optimize()); + Assert.assertTrue(optFilter1.equals(optFilter1.optimize())); Assert.assertTrue(optFilter2Optimized.equals(optFilter2.optimize())); - Assert.assertTrue(optFilter3 == optFilter3.optimize()); + Assert.assertTrue(optFilter3.equals(optFilter3.optimize())); Assert.assertTrue(optFilter4Optimized.equals(optFilter4.optimize())); - Assert.assertTrue(optFilter5 == optFilter5.optimize()); + Assert.assertTrue(optFilter5.equals(optFilter5.optimize())); Assert.assertTrue(optFilter6Optimized.equals(optFilter6.optimize())); assertFilterMatches(optFilter1, ImmutableList.of("0", "1", "2", "5"));