mirror of https://github.com/apache/druid.git
Add lookup optimization for InDimFilter (#2938)
* Add lookup optimization for InDimFilter * tests for in filter with lookup extraction fn * refactor * refactor2 and modified filter test * make optimizeLookup private
This commit is contained in:
parent
00de26c76a
commit
dcabd4b1ee
|
@ -28,10 +28,14 @@ import com.google.common.collect.ImmutableSortedSet;
|
||||||
import com.google.common.collect.Iterables;
|
import com.google.common.collect.Iterables;
|
||||||
import com.metamx.common.StringUtils;
|
import com.metamx.common.StringUtils;
|
||||||
import io.druid.query.extraction.ExtractionFn;
|
import io.druid.query.extraction.ExtractionFn;
|
||||||
|
import io.druid.query.lookup.LookupExtractionFn;
|
||||||
|
import io.druid.query.lookup.LookupExtractor;
|
||||||
import io.druid.segment.filter.InFilter;
|
import io.druid.segment.filter.InFilter;
|
||||||
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
public class InDimFilter implements DimFilter
|
public class InDimFilter implements DimFilter
|
||||||
|
@ -117,6 +121,46 @@ public class InDimFilter implements DimFilter
|
||||||
@Override
|
@Override
|
||||||
public DimFilter optimize()
|
public DimFilter optimize()
|
||||||
{
|
{
|
||||||
|
InDimFilter inFilter = optimizeLookup();
|
||||||
|
if (inFilter.values.size() == 1) {
|
||||||
|
return new SelectorDimFilter(inFilter.dimension, inFilter.values.first(), inFilter.getExtractionFn());
|
||||||
|
}
|
||||||
|
return inFilter;
|
||||||
|
}
|
||||||
|
|
||||||
|
private InDimFilter optimizeLookup() {
|
||||||
|
if (extractionFn instanceof LookupExtractionFn
|
||||||
|
&& ((LookupExtractionFn) extractionFn).isOptimize()) {
|
||||||
|
LookupExtractionFn exFn = (LookupExtractionFn) extractionFn;
|
||||||
|
LookupExtractor lookup = exFn.getLookup();
|
||||||
|
|
||||||
|
final List<String> keys = new ArrayList<>();
|
||||||
|
for (String value : values) {
|
||||||
|
|
||||||
|
// We cannot do an unapply()-based optimization if the selector value
|
||||||
|
// and the replaceMissingValuesWith value are the same, since we have to match on
|
||||||
|
// all values that are not present in the lookup.
|
||||||
|
final String convertedValue = Strings.emptyToNull(value);
|
||||||
|
if (!exFn.isRetainMissingValue() && Objects.equals(convertedValue, exFn.getReplaceMissingValueWith())) {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
keys.addAll(lookup.unapply(convertedValue));
|
||||||
|
|
||||||
|
// If retainMissingValues is true and the selector value is not in the lookup map,
|
||||||
|
// there may be row values that match the selector value but are not included
|
||||||
|
// in the lookup map. Match on the selector value as well.
|
||||||
|
// If the selector value is overwritten in the lookup map, don't add selector value to keys.
|
||||||
|
if (exFn.isRetainMissingValue() && lookup.apply(convertedValue) == null) {
|
||||||
|
keys.add(convertedValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (keys.isEmpty()) {
|
||||||
|
return this;
|
||||||
|
} else {
|
||||||
|
return new InDimFilter(dimension, keys, null);
|
||||||
|
}
|
||||||
|
}
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,16 +24,13 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.base.Predicate;
|
import com.google.common.base.Predicate;
|
||||||
import com.google.common.base.Strings;
|
import com.google.common.base.Strings;
|
||||||
|
import com.google.common.collect.ImmutableList;
|
||||||
import com.metamx.common.StringUtils;
|
import com.metamx.common.StringUtils;
|
||||||
import io.druid.query.extraction.ExtractionFn;
|
import io.druid.query.extraction.ExtractionFn;
|
||||||
import io.druid.query.lookup.LookupExtractionFn;
|
|
||||||
import io.druid.query.lookup.LookupExtractor;
|
|
||||||
import io.druid.segment.filter.DimensionPredicateFilter;
|
import io.druid.segment.filter.DimensionPredicateFilter;
|
||||||
import io.druid.segment.filter.SelectorFilter;
|
import io.druid.segment.filter.SelectorFilter;
|
||||||
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -54,7 +51,7 @@ public class SelectorDimFilter implements DimFilter
|
||||||
Preconditions.checkArgument(dimension != null, "dimension must not be null");
|
Preconditions.checkArgument(dimension != null, "dimension must not be null");
|
||||||
|
|
||||||
this.dimension = dimension;
|
this.dimension = dimension;
|
||||||
this.value = value;
|
this.value = Strings.nullToEmpty(value);
|
||||||
this.extractionFn = extractionFn;
|
this.extractionFn = extractionFn;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -78,41 +75,7 @@ public class SelectorDimFilter implements DimFilter
|
||||||
@Override
|
@Override
|
||||||
public DimFilter optimize()
|
public DimFilter optimize()
|
||||||
{
|
{
|
||||||
if (this.getExtractionFn() instanceof LookupExtractionFn
|
return new InDimFilter(dimension, ImmutableList.of(value), extractionFn).optimize();
|
||||||
&& ((LookupExtractionFn) this.getExtractionFn()).isOptimize()) {
|
|
||||||
LookupExtractionFn exFn = (LookupExtractionFn) this.getExtractionFn();
|
|
||||||
LookupExtractor lookup = exFn.getLookup();
|
|
||||||
|
|
||||||
final String convertedValue = Strings.emptyToNull(value);
|
|
||||||
|
|
||||||
// We cannot do an unapply()-based optimization if the selector value
|
|
||||||
// and the replaceMissingValuesWith value are the same, since we have to match on
|
|
||||||
// all values that are not present in the lookup.
|
|
||||||
if (!exFn.isRetainMissingValue() && Objects.equals(convertedValue, exFn.getReplaceMissingValueWith())) {
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
final String mappingForValue = lookup.apply(convertedValue);
|
|
||||||
final List<String> keys = new ArrayList<>();
|
|
||||||
keys.addAll(lookup.unapply(convertedValue));
|
|
||||||
|
|
||||||
// If retainMissingValues is true and the selector value is not in the lookup map,
|
|
||||||
// there may be row values that match the selector value but are not included
|
|
||||||
// in the lookup map. Match on the selector value as well.
|
|
||||||
// If the selector value is overwritten in the lookup map, don't add selector value to keys.
|
|
||||||
if (exFn.isRetainMissingValue() && mappingForValue == null) {
|
|
||||||
keys.add(convertedValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (keys.isEmpty()) {
|
|
||||||
return this;
|
|
||||||
} else if (keys.size() == 1) {
|
|
||||||
return new SelectorDimFilter(dimension, keys.get(0), null);
|
|
||||||
} else {
|
|
||||||
return new InDimFilter(dimension, keys, null);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return this;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -35,10 +35,13 @@ import io.druid.data.input.impl.TimestampSpec;
|
||||||
import io.druid.js.JavaScriptConfig;
|
import io.druid.js.JavaScriptConfig;
|
||||||
import io.druid.query.extraction.ExtractionFn;
|
import io.druid.query.extraction.ExtractionFn;
|
||||||
import io.druid.query.extraction.JavaScriptExtractionFn;
|
import io.druid.query.extraction.JavaScriptExtractionFn;
|
||||||
|
import io.druid.query.extraction.MapLookupExtractor;
|
||||||
import io.druid.query.filter.BoundDimFilter;
|
import io.druid.query.filter.BoundDimFilter;
|
||||||
import io.druid.query.filter.DimFilter;
|
import io.druid.query.filter.DimFilter;
|
||||||
import io.druid.query.filter.Filter;
|
import io.druid.query.filter.Filter;
|
||||||
import io.druid.query.filter.InDimFilter;
|
import io.druid.query.filter.InDimFilter;
|
||||||
|
import io.druid.query.lookup.LookupExtractionFn;
|
||||||
|
import io.druid.query.lookup.LookupExtractor;
|
||||||
import io.druid.segment.IndexBuilder;
|
import io.druid.segment.IndexBuilder;
|
||||||
import io.druid.segment.StorageAdapter;
|
import io.druid.segment.StorageAdapter;
|
||||||
import org.joda.time.DateTime;
|
import org.joda.time.DateTime;
|
||||||
|
@ -260,6 +263,48 @@ public class InFilterTest extends BaseFilterTest
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMatchWithLookupExtractionFn() {
|
||||||
|
final Map<String, String> stringMap = ImmutableMap.of(
|
||||||
|
"a", "HELLO",
|
||||||
|
"10", "HELLO",
|
||||||
|
"def", "HELLO",
|
||||||
|
"c", "BYE"
|
||||||
|
);
|
||||||
|
LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false);
|
||||||
|
LookupExtractionFn lookupFn = new LookupExtractionFn(mapExtractor, false, "UNKNOWN", false, true);
|
||||||
|
|
||||||
|
assertFilterMatches(toInFilterWithFn("dim0", lookupFn, null, "HELLO"), ImmutableList.of("a"));
|
||||||
|
assertFilterMatches(toInFilterWithFn("dim0", lookupFn, "HELLO", "BYE"), ImmutableList.of("a", "c"));
|
||||||
|
assertFilterMatches(toInFilterWithFn("dim0", lookupFn, "UNKNOWN"), ImmutableList.of("b", "d", "e", "f"));
|
||||||
|
assertFilterMatches(toInFilterWithFn("dim1", lookupFn, "HELLO"), ImmutableList.of("b", "e"));
|
||||||
|
assertFilterMatches(toInFilterWithFn("dim1", lookupFn, "N/A"), ImmutableList.<String>of());
|
||||||
|
assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "a"), ImmutableList.<String>of());
|
||||||
|
assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "HELLO"), ImmutableList.of("a", "d"));
|
||||||
|
assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "HELLO", "BYE", "UNKNOWN"),
|
||||||
|
ImmutableList.of("a", "b", "c", "d", "e", "f"));
|
||||||
|
|
||||||
|
final Map<String, String> stringMap2 = ImmutableMap.of(
|
||||||
|
"a", "e"
|
||||||
|
);
|
||||||
|
LookupExtractor mapExtractor2 = new MapLookupExtractor(stringMap2, false);
|
||||||
|
LookupExtractionFn lookupFn2 = new LookupExtractionFn(mapExtractor2, true, null, false, true);
|
||||||
|
|
||||||
|
assertFilterMatches(toInFilterWithFn("dim0", lookupFn2, null, "e"), ImmutableList.of("a", "e"));
|
||||||
|
assertFilterMatches(toInFilterWithFn("dim0", lookupFn2, "a"), ImmutableList.<String>of());
|
||||||
|
|
||||||
|
final Map<String, String> stringMap3 = ImmutableMap.of(
|
||||||
|
"c", "500",
|
||||||
|
"100", "e"
|
||||||
|
);
|
||||||
|
LookupExtractor mapExtractor3 = new MapLookupExtractor(stringMap3, false);
|
||||||
|
LookupExtractionFn lookupFn3 = new LookupExtractionFn(mapExtractor3, false, null, false, true);
|
||||||
|
|
||||||
|
assertFilterMatches(toInFilterWithFn("dim0", lookupFn3, null, "c"), ImmutableList.of("a", "b", "d", "e", "f"));
|
||||||
|
assertFilterMatches(toInFilterWithFn("dim0", lookupFn3, "e"), ImmutableList.<String>of());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
private DimFilter toInFilter(String dim, String value, String... values)
|
private DimFilter toInFilter(String dim, String value, String... values)
|
||||||
{
|
{
|
||||||
return new InDimFilter(dim, Lists.asList(value, values), null);
|
return new InDimFilter(dim, Lists.asList(value, values), null);
|
||||||
|
|
|
@ -209,11 +209,11 @@ public class SelectorFilterTest extends BaseFilterTest
|
||||||
SelectorDimFilter optFilter4Optimized = new SelectorDimFilter("dim0", "5", null);
|
SelectorDimFilter optFilter4Optimized = new SelectorDimFilter("dim0", "5", null);
|
||||||
SelectorDimFilter optFilter6Optimized = new SelectorDimFilter("dim0", "5", null);
|
SelectorDimFilter optFilter6Optimized = new SelectorDimFilter("dim0", "5", null);
|
||||||
|
|
||||||
Assert.assertTrue(optFilter1 == optFilter1.optimize());
|
Assert.assertTrue(optFilter1.equals(optFilter1.optimize()));
|
||||||
Assert.assertTrue(optFilter2Optimized.equals(optFilter2.optimize()));
|
Assert.assertTrue(optFilter2Optimized.equals(optFilter2.optimize()));
|
||||||
Assert.assertTrue(optFilter3 == optFilter3.optimize());
|
Assert.assertTrue(optFilter3.equals(optFilter3.optimize()));
|
||||||
Assert.assertTrue(optFilter4Optimized.equals(optFilter4.optimize()));
|
Assert.assertTrue(optFilter4Optimized.equals(optFilter4.optimize()));
|
||||||
Assert.assertTrue(optFilter5 == optFilter5.optimize());
|
Assert.assertTrue(optFilter5.equals(optFilter5.optimize()));
|
||||||
Assert.assertTrue(optFilter6Optimized.equals(optFilter6.optimize()));
|
Assert.assertTrue(optFilter6Optimized.equals(optFilter6.optimize()));
|
||||||
|
|
||||||
assertFilterMatches(optFilter1, ImmutableList.of("0", "1", "2", "5"));
|
assertFilterMatches(optFilter1, ImmutableList.of("0", "1", "2", "5"));
|
||||||
|
|
Loading…
Reference in New Issue