Merge pull request #1578 from b-slim/fix_extraction_filter_2

Fix UT and documentation to the extraction filter
This commit is contained in:
cheddar 2015-09-01 10:46:20 -07:00
commit 4f61b42f40
9 changed files with 490 additions and 35 deletions

View File

@ -185,7 +185,7 @@ A filtered aggregator wraps any given aggregator, but only aggregates the values
This makes it possible to compute the results of a filtered and an unfiltered aggregation simultaneously, without having to issue multiple queries, and use both results as part of post-aggregations.
*Limitations:* The filtered aggregator currently only supports 'or', 'and', 'selector' and 'not' filters, i.e. matching one or multiple dimensions against a single value.
*Limitations:* The filtered aggregator currently only supports 'or', 'and', 'selector', 'not' and 'Extraction' filters, i.e. matching one or multiple dimensions against a single value.
*Note:* If only the filtered results are required, consider putting the filter on the query itself, which will be much faster since it does not require scanning all the data.

View File

@ -80,3 +80,33 @@ The following matches any dimension values for the dimension `name` between `'ba
"function" : "function(x) { return(x >= 'bar' && x <= 'foo') }"
}
```
### Extraction filter
Extraction filter matches a dimension using some specific [Extraction function](./dimensionspecs.html#extraction-functions).
The following filter matches the values for which the extraction function has transformation entry `input_key=output_value` where
`output_value` is equal to the filter `value` and `input_key` is present as dimension.
**Example**
The following matches dimension values in `[product_1, product_3, product_5]` for the column `product`
```json
{
"filter": {
"type": "extraction",
"dimension": "product",
"value": "bar_1",
"extractionFn": {
"type": "lookup",
"lookup": {
"type": "map",
"map": {
"product_1": "bar_1",
"product_5": "bar_1",
"product_3": "bar_1"
}
}
}
}
}
```

View File

@ -24,13 +24,10 @@ import com.google.common.base.Strings;
import com.google.inject.Inject;
import io.druid.query.extraction.namespace.ExtractionNamespaceFunctionFactory;
import io.druid.query.extraction.namespace.KafkaExtractionNamespace;
import io.druid.query.extraction.namespace.URIExtractionNamespace;
import io.druid.server.namespace.cache.NamespaceExtractionCacheManager;
import javax.annotation.Nullable;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentMap;
/**
*

View File

@ -19,7 +19,6 @@
package io.druid.server.namespace;
import com.fasterxml.jackson.core.Version;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.Module;
import com.fasterxml.jackson.databind.ObjectMapper;
@ -27,19 +26,13 @@ import com.fasterxml.jackson.databind.module.SimpleModule;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.inject.Binder;
import com.google.inject.Injector;
import com.google.inject.Provides;
import com.google.inject.TypeLiteral;
import com.google.inject.multibindings.MapBinder;
import com.google.inject.name.Named;
import io.druid.guice.LazySingleton;
import io.druid.guice.LifecycleModule;
import io.druid.guice.annotations.Json;
import io.druid.initialization.DruidModule;
import io.druid.query.extraction.namespace.ExtractionNamespace;
import io.druid.query.extraction.namespace.ExtractionNamespaceFunctionFactory;
import io.druid.query.extraction.namespace.KafkaExtractionNamespace;
import io.druid.server.namespace.cache.NamespaceExtractionCacheManager;
import java.io.IOException;
import java.util.List;

View File

@ -43,7 +43,6 @@ public class ExtractionDimFilter implements DimFilter
)
{
Preconditions.checkArgument(dimension != null, "dimension must not be null");
Preconditions.checkArgument(value != null, "value must not be null");
Preconditions.checkArgument(extractionFn != null || dimExtractionFn != null, "extraction function must not be null");
this.dimension = dimension;

View File

@ -17,6 +17,7 @@
package io.druid.segment.column;
import com.google.common.base.Strings;
import com.metamx.common.guava.CloseQuietly;
import io.druid.segment.data.CachingIndexed;
import io.druid.segment.data.IndexedInts;
@ -71,7 +72,8 @@ public class SimpleDictionaryEncodedColumn
@Override
public String lookupName(int id)
{
return cachedLookups.get(id);
//Empty to Null will ensure that null and empty are equivalent for extraction function
return Strings.emptyToNull(cachedLookups.get(id));
}
@Override

View File

@ -17,18 +17,22 @@
package io.druid.segment.filter;
import com.google.common.base.Predicate;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.collections.bitmap.WrappedImmutableConciseBitmap;
import io.druid.query.extraction.ExtractionFn;
import io.druid.query.filter.BitmapIndexSelector;
import io.druid.query.filter.Filter;
import io.druid.query.filter.ValueMatcher;
import io.druid.query.filter.ValueMatcherFactory;
import io.druid.segment.ColumnSelectorFactory;
import io.druid.segment.DimensionSelector;
import io.druid.segment.data.Indexed;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
import io.druid.segment.data.IndexedInts;
import java.util.BitSet;
import java.util.Iterator;
import java.util.List;
/**
@ -39,27 +43,50 @@ public class ExtractionFilter implements Filter
private final String value;
private final ExtractionFn fn;
public ExtractionFilter(
String dimension,
String value,
ExtractionFn fn
)
public ExtractionFilter(String dimension, String value, ExtractionFn fn)
{
this.dimension = dimension;
this.value = value;
this.value = Strings.nullToEmpty(value);
this.fn = fn;
}
private List<Filter> makeFilters(BitmapIndexSelector selector)
{
final Indexed<String> allDimVals = selector.getDimensionValues(dimension);
Indexed<String> allDimVals = selector.getDimensionValues(dimension);
final List<Filter> filters = Lists.newArrayList();
if (allDimVals != null) {
for (int i = 0; i < allDimVals.size(); i++) {
String dimVal = allDimVals.get(i);
if (value.equals(fn.apply(dimVal))) {
filters.add(new SelectorFilter(dimension, dimVal));
if (allDimVals == null) {
allDimVals = new Indexed<String>()
{
@Override
public Iterator<String> iterator()
{
return null;
}
@Override
public Class<? extends String> getClazz()
{
return null;
}
@Override
public int size() { return 1; }
@Override
public String get(int index) { return null;}
@Override
public int indexOf(String value)
{
return 0;
}
};
}
for (int i = 0; i < allDimVals.size(); i++) {
String dimVal = allDimVals.get(i);
if (value.equals(Strings.nullToEmpty(fn.apply(dimVal)))) {
filters.add(new SelectorFilter(dimension, dimVal));
}
}
@ -79,13 +106,48 @@ public class ExtractionFilter implements Filter
@Override
public ValueMatcher makeMatcher(ValueMatcherFactory factory)
{
throw new UnsupportedOperationException();
return factory.makeValueMatcher(
dimension, new Predicate<String>()
{
@Override
public boolean apply(String input)
{
// Assuming that a null/absent/empty dimension are equivalent from the druid perspective
return value.equals(Strings.nullToEmpty(fn.apply(Strings.emptyToNull(input))));
}
}
);
}
@Override
public ValueMatcher makeMatcher(ColumnSelectorFactory factory)
public ValueMatcher makeMatcher(ColumnSelectorFactory columnSelectorFactory)
{
throw new UnsupportedOperationException();
final DimensionSelector dimensionSelector = columnSelectorFactory.makeDimensionSelector(dimension, null);
if (dimensionSelector == null) {
return new BooleanValueMatcher(value.equals(Strings.nullToEmpty(fn.apply(null))));
} else {
final BitSet bitSetOfIds = new BitSet(dimensionSelector.getValueCardinality());
for (int i = 0; i < dimensionSelector.getValueCardinality(); i++) {
if (value.equals(Strings.nullToEmpty(fn.apply(dimensionSelector.lookupName(i))))) {
bitSetOfIds.set(i);
}
}
return new ValueMatcher()
{
@Override
public boolean matches()
{
final IndexedInts row = dimensionSelector.getRow();
final int size = row.size();
for (int i = 0; i < size; ++i) {
if (bitSetOfIds.get(row.get(i))) {
return true;
}
}
return false;
}
};
}
}
}

View File

@ -36,6 +36,7 @@ import io.druid.granularity.QueryGranularity;
import io.druid.jackson.DefaultObjectMapper;
import io.druid.query.BySegmentResultValue;
import io.druid.query.BySegmentResultValueClass;
import io.druid.query.Druids;
import io.druid.query.FinalizeResultsQueryRunner;
import io.druid.query.Query;
import io.druid.query.QueryRunner;
@ -46,6 +47,7 @@ import io.druid.query.TestQueryRunners;
import io.druid.query.aggregation.AggregatorFactory;
import io.druid.query.aggregation.DoubleMaxAggregatorFactory;
import io.druid.query.aggregation.DoubleSumAggregatorFactory;
import io.druid.query.aggregation.FilteredAggregatorFactory;
import io.druid.query.aggregation.JavaScriptAggregatorFactory;
import io.druid.query.aggregation.LongSumAggregatorFactory;
import io.druid.query.aggregation.PostAggregator;
@ -63,6 +65,7 @@ import io.druid.query.extraction.MapLookupExtractor;
import io.druid.query.extraction.RegexDimExtractionFn;
import io.druid.query.extraction.TimeFormatExtractionFn;
import io.druid.query.filter.DimFilter;
import io.druid.query.filter.ExtractionDimFilter;
import io.druid.query.filter.JavaScriptDimFilter;
import io.druid.query.filter.OrDimFilter;
import io.druid.query.filter.RegexDimFilter;
@ -3842,4 +3845,270 @@ public class GroupByQueryRunnerTest
TestHelper.assertExpectedObjects(bySegmentResults, theRunner.run(fullQuery, Maps.newHashMap()), "");
exec.shutdownNow();
}
// Extraction Filters testing
@Test
public void testGroupByWithExtractionDimFilter()
{
Map<String, String> extractionMap = new HashMap<>();
extractionMap.put("automotive", "automotiveAndBusinessAndNewsAndMezzanine");
extractionMap.put("business", "automotiveAndBusinessAndNewsAndMezzanine");
extractionMap.put("mezzanine", "automotiveAndBusinessAndNewsAndMezzanine");
extractionMap.put("news", "automotiveAndBusinessAndNewsAndMezzanine");
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
List<DimFilter> dimFilters = Lists.<DimFilter>newArrayList(
new ExtractionDimFilter("quality", "automotiveAndBusinessAndNewsAndMezzanine", lookupExtractionFn, null),
new SelectorDimFilter("quality", "entertainment"),
new SelectorDimFilter("quality", "health"),
new SelectorDimFilter("quality", "premium"),
new SelectorDimFilter("quality", "technology"),
new SelectorDimFilter("quality", "travel")
);
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
.setDimensions(
Lists.<DimensionSpec>newArrayList(
new DefaultDimensionSpec(
"quality",
"alias"
)
)
)
.setAggregatorSpecs(
Arrays.asList(
QueryRunnerTestHelper.rowsCount,
new LongSumAggregatorFactory("idx", "index")
)
)
.setGranularity(QueryRunnerTestHelper.dayGran)
.setDimFilter(Druids.newOrDimFilterBuilder().fields(dimFilters).build())
.build();
List<Row> expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 120L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 1L, "idx", 147L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 112L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 113L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L)
);
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
@Test
public void testGroupByWithExtractionDimFilterCaseMappingValueIsNullOrEmpty()
{
Map<String, String> extractionMap = new HashMap<>();
extractionMap.put("automotive", "automotive0");
extractionMap.put("business", "business0");
extractionMap.put("entertainment", "entertainment0");
extractionMap.put("health", "health0");
extractionMap.put("mezzanine", null);
extractionMap.put("news", "");
extractionMap.put("premium", "premium0");
extractionMap.put("technology", "technology0");
extractionMap.put("travel", "travel0");
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
.setDimensions(
Lists.<DimensionSpec>newArrayList(
new DefaultDimensionSpec(
"quality",
"alias"
)
)
)
.setAggregatorSpecs(
Arrays.asList(
QueryRunnerTestHelper.rowsCount,
new LongSumAggregatorFactory("idx", "index")
)
)
.setGranularity(QueryRunnerTestHelper.dayGran)
.setDimFilter(new ExtractionDimFilter("quality", "", lookupExtractionFn, null))
.build();
List<Row> expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L)
);
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
@Test
public void testGroupByWithExtractionDimFilterWhenSearchValueNotInTheMap()
{
Map<String, String> extractionMap = new HashMap<>();
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
.setDimensions(
Lists.<DimensionSpec>newArrayList(
new DefaultDimensionSpec(
"quality",
"alias"
)
)
)
.setAggregatorSpecs(
Arrays.asList(
QueryRunnerTestHelper.rowsCount,
new LongSumAggregatorFactory("idx", "index")
)
)
.setGranularity(QueryRunnerTestHelper.dayGran)
.setDimFilter(
new ExtractionDimFilter(
"quality",
"NOT_THERE",
lookupExtractionFn,
null
)
).build();
List<Row> expectedResults = Arrays.asList();
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
@Test
public void testGroupByWithExtractionDimFilterKeyisNull()
{
Map<String, String> extractionMap = new HashMap<>();
extractionMap.put("", "NULLorEMPTY");
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
.setDimensions(
Lists.<DimensionSpec>newArrayList(
new DefaultDimensionSpec(
"null_column",
"alias"
)
)
)
.setAggregatorSpecs(
Arrays.asList(
QueryRunnerTestHelper.rowsCount,
new LongSumAggregatorFactory("idx", "index")
)
)
.setGranularity(QueryRunnerTestHelper.dayGran)
.setDimFilter(
new ExtractionDimFilter(
"null_column",
"NULLorEMPTY",
lookupExtractionFn,
null
)
).build();
List<Row> expectedResults = Arrays
.asList(
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", null, "rows", 13L, "idx", 6619L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", null, "rows", 13L, "idx", 5827L)
);
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
@Test
public void testGroupByWithAggregatorFilterAndExtractionFunction()
{
Map<String, String> extractionMap = new HashMap<>();
extractionMap.put("automotive", "automotive0");
extractionMap.put("business", "business0");
extractionMap.put("entertainment", "entertainment0");
extractionMap.put("health", "health0");
extractionMap.put("mezzanine", "mezzanineANDnews");
extractionMap.put("news", "mezzanineANDnews");
extractionMap.put("premium", "premium0");
extractionMap.put("technology", "technology0");
extractionMap.put("travel", "travel0");
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, "missing", true);
DimFilter filter = new ExtractionDimFilter("quality","mezzanineANDnews",lookupExtractionFn,null);
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
.setDimensions(
Lists.<DimensionSpec>newArrayList(
new DefaultDimensionSpec(
"quality",
"alias"
)
)
)
.setAggregatorSpecs(
Arrays.asList(
new FilteredAggregatorFactory(QueryRunnerTestHelper.rowsCount, filter),
(AggregatorFactory) new FilteredAggregatorFactory(
new LongSumAggregatorFactory(
"idx",
"index"
), filter
)
)
)
.setGranularity(QueryRunnerTestHelper.dayGran)
.build();
List<Row> expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 0L, "idx", 0L)
);
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
}

View File

@ -41,6 +41,7 @@ import io.druid.query.TestQueryRunners;
import io.druid.query.aggregation.AggregatorFactory;
import io.druid.query.aggregation.DoubleMaxAggregatorFactory;
import io.druid.query.aggregation.DoubleMinAggregatorFactory;
import io.druid.query.aggregation.FilteredAggregatorFactory;
import io.druid.query.aggregation.PostAggregator;
import io.druid.query.aggregation.cardinality.CardinalityAggregatorFactory;
import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory;
@ -54,6 +55,7 @@ import io.druid.query.extraction.RegexDimExtractionFn;
import io.druid.query.extraction.TimeFormatExtractionFn;
import io.druid.query.filter.AndDimFilter;
import io.druid.query.filter.DimFilter;
import io.druid.query.filter.ExtractionDimFilter;
import io.druid.query.filter.SelectorDimFilter;
import io.druid.query.spec.MultipleIntervalSegmentSpec;
import io.druid.query.timeseries.TimeseriesQuery;
@ -158,9 +160,7 @@ public class TopNQueryRunnerTest
QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator()
);
final QueryRunner<Result<TopNResultValue>> mergeRunner = chest.mergeResults(runner);
return mergeRunner.run(
query, context
);
return mergeRunner.run(query, context);
}
@Test
@ -1611,7 +1611,6 @@ public class TopNQueryRunnerTest
}
@Test
public void testTopNDimExtractionFastTopNOptimalWithReplaceMissing()
{
@ -3119,8 +3118,10 @@ public class TopNQueryRunnerTest
);
assertExpectedResults(expectedResults, query);
}
@Test
public void testAlphaNumericTopNWithNullPreviousStop(){
public void testAlphaNumericTopNWithNullPreviousStop()
{
TopNQuery query = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryGranularity.ALL)
@ -3149,4 +3150,106 @@ public class TopNQueryRunnerTest
);
TestHelper.assertExpectedResults(expectedResults, runner.run(query, new HashMap<String, Object>()));
}
@Test
public void testTopNWithExtractionFilter()
{
Map<String, String> extractionMap = new HashMap<>();
extractionMap.put("spot", "spot0");
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.dimension(QueryRunnerTestHelper.marketDimension)
.metric("rows")
.threshold(3)
.intervals(QueryRunnerTestHelper.firstToThird)
.aggregators(QueryRunnerTestHelper.commonAggregators)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
.filters(
new ExtractionDimFilter(
QueryRunnerTestHelper.marketDimension,
"spot0",
lookupExtractionFn,
null
)
)
.build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
new Result<>(
new DateTime("2011-04-01T00:00:00.000Z"),
new TopNResultValue(
Arrays.<Map<String, Object>>asList(
ImmutableMap.<String, Object>of(
QueryRunnerTestHelper.marketDimension, "spot",
"rows", 18L,
"index", 2231.8768157958984D,
"addRowsIndexConstant", 2250.8768157958984D,
"uniques", QueryRunnerTestHelper.UNIQUES_9
)
)
)
)
);
assertExpectedResults(expectedResults, query);
}
@Test
public void testTopNWithExtractionFilterAndFilteredAggregatorCaseNoExistingValue()
{
Map<String, String> extractionMap = new HashMap<>();
extractionMap.put("", "NULL");
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
DimFilter extractionFilter = new ExtractionDimFilter("null_column", "NULL", lookupExtractionFn, null);
TopNQueryBuilder topNQueryBuilder = new TopNQueryBuilder()
.dataSource(QueryRunnerTestHelper.dataSource)
.granularity(QueryRunnerTestHelper.allGran)
.dimension("null_column")
.metric(QueryRunnerTestHelper.indexMetric)
.threshold(4)
.intervals(QueryRunnerTestHelper.fullOnInterval)
.aggregators(
Lists.newArrayList(
Iterables.concat(
QueryRunnerTestHelper.commonAggregators, Lists.newArrayList(
new FilteredAggregatorFactory(
new DoubleMaxAggregatorFactory("maxIndex", "index"),
extractionFilter
),
new DoubleMinAggregatorFactory("minIndex", "index")
)
)
)
)
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant));
TopNQuery topNQueryWithNULLValueExtraction = topNQueryBuilder
.filters(extractionFilter)
.build();
Map<String, Object> map = Maps.newHashMap();
map.put("null_column", null);
map.put("rows", 1209L);
map.put("index", 503332.5071372986D);
map.put("addRowsIndexConstant", 504542.5071372986D);
map.put("uniques", QueryRunnerTestHelper.UNIQUES_9);
map.put("maxIndex", 1870.06103515625D);
map.put("minIndex", 59.02102279663086D);
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
new Result<>(
new DateTime("2011-01-12T00:00:00.000Z"),
new TopNResultValue(
Arrays.asList(
map
)
)
)
);
assertExpectedResults(expectedResults, topNQueryWithNULLValueExtraction);
}
}