mirror of https://github.com/apache/druid.git
Merge pull request #1578 from b-slim/fix_extraction_filter_2
Fix UT and documentation to the extraction filter
This commit is contained in:
commit
4f61b42f40
|
@ -185,7 +185,7 @@ A filtered aggregator wraps any given aggregator, but only aggregates the values
|
|||
|
||||
This makes it possible to compute the results of a filtered and an unfiltered aggregation simultaneously, without having to issue multiple queries, and use both results as part of post-aggregations.
|
||||
|
||||
*Limitations:* The filtered aggregator currently only supports 'or', 'and', 'selector' and 'not' filters, i.e. matching one or multiple dimensions against a single value.
|
||||
*Limitations:* The filtered aggregator currently only supports 'or', 'and', 'selector', 'not' and 'Extraction' filters, i.e. matching one or multiple dimensions against a single value.
|
||||
|
||||
*Note:* If only the filtered results are required, consider putting the filter on the query itself, which will be much faster since it does not require scanning all the data.
|
||||
|
||||
|
|
|
@ -80,3 +80,33 @@ The following matches any dimension values for the dimension `name` between `'ba
|
|||
"function" : "function(x) { return(x >= 'bar' && x <= 'foo') }"
|
||||
}
|
||||
```
|
||||
|
||||
### Extraction filter
|
||||
|
||||
Extraction filter matches a dimension using some specific [Extraction function](./dimensionspecs.html#extraction-functions).
|
||||
The following filter matches the values for which the extraction function has transformation entry `input_key=output_value` where
|
||||
`output_value` is equal to the filter `value` and `input_key` is present as dimension.
|
||||
|
||||
**Example**
|
||||
The following matches dimension values in `[product_1, product_3, product_5]` for the column `product`
|
||||
|
||||
```json
|
||||
{
|
||||
"filter": {
|
||||
"type": "extraction",
|
||||
"dimension": "product",
|
||||
"value": "bar_1",
|
||||
"extractionFn": {
|
||||
"type": "lookup",
|
||||
"lookup": {
|
||||
"type": "map",
|
||||
"map": {
|
||||
"product_1": "bar_1",
|
||||
"product_5": "bar_1",
|
||||
"product_3": "bar_1"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
|
|
@ -24,13 +24,10 @@ import com.google.common.base.Strings;
|
|||
import com.google.inject.Inject;
|
||||
import io.druid.query.extraction.namespace.ExtractionNamespaceFunctionFactory;
|
||||
import io.druid.query.extraction.namespace.KafkaExtractionNamespace;
|
||||
import io.druid.query.extraction.namespace.URIExtractionNamespace;
|
||||
import io.druid.server.namespace.cache.NamespaceExtractionCacheManager;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
|
||||
package io.druid.server.namespace;
|
||||
|
||||
import com.fasterxml.jackson.core.Version;
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.Module;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
@ -27,19 +26,13 @@ import com.fasterxml.jackson.databind.module.SimpleModule;
|
|||
import com.google.common.base.Throwables;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.inject.Binder;
|
||||
import com.google.inject.Injector;
|
||||
import com.google.inject.Provides;
|
||||
import com.google.inject.TypeLiteral;
|
||||
import com.google.inject.multibindings.MapBinder;
|
||||
import com.google.inject.name.Named;
|
||||
import io.druid.guice.LazySingleton;
|
||||
import io.druid.guice.LifecycleModule;
|
||||
import io.druid.guice.annotations.Json;
|
||||
import io.druid.initialization.DruidModule;
|
||||
import io.druid.query.extraction.namespace.ExtractionNamespace;
|
||||
import io.druid.query.extraction.namespace.ExtractionNamespaceFunctionFactory;
|
||||
import io.druid.query.extraction.namespace.KafkaExtractionNamespace;
|
||||
import io.druid.server.namespace.cache.NamespaceExtractionCacheManager;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
|
|
@ -43,7 +43,6 @@ public class ExtractionDimFilter implements DimFilter
|
|||
)
|
||||
{
|
||||
Preconditions.checkArgument(dimension != null, "dimension must not be null");
|
||||
Preconditions.checkArgument(value != null, "value must not be null");
|
||||
Preconditions.checkArgument(extractionFn != null || dimExtractionFn != null, "extraction function must not be null");
|
||||
|
||||
this.dimension = dimension;
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
package io.druid.segment.column;
|
||||
|
||||
import com.google.common.base.Strings;
|
||||
import com.metamx.common.guava.CloseQuietly;
|
||||
import io.druid.segment.data.CachingIndexed;
|
||||
import io.druid.segment.data.IndexedInts;
|
||||
|
@ -71,7 +72,8 @@ public class SimpleDictionaryEncodedColumn
|
|||
@Override
|
||||
public String lookupName(int id)
|
||||
{
|
||||
return cachedLookups.get(id);
|
||||
//Empty to Null will ensure that null and empty are equivalent for extraction function
|
||||
return Strings.emptyToNull(cachedLookups.get(id));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,18 +17,22 @@
|
|||
|
||||
package io.druid.segment.filter;
|
||||
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.metamx.collections.bitmap.ImmutableBitmap;
|
||||
import com.metamx.collections.bitmap.WrappedImmutableConciseBitmap;
|
||||
import io.druid.query.extraction.ExtractionFn;
|
||||
import io.druid.query.filter.BitmapIndexSelector;
|
||||
import io.druid.query.filter.Filter;
|
||||
import io.druid.query.filter.ValueMatcher;
|
||||
import io.druid.query.filter.ValueMatcherFactory;
|
||||
import io.druid.segment.ColumnSelectorFactory;
|
||||
import io.druid.segment.DimensionSelector;
|
||||
import io.druid.segment.data.Indexed;
|
||||
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet;
|
||||
import io.druid.segment.data.IndexedInts;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
|
@ -39,29 +43,52 @@ public class ExtractionFilter implements Filter
|
|||
private final String value;
|
||||
private final ExtractionFn fn;
|
||||
|
||||
public ExtractionFilter(
|
||||
String dimension,
|
||||
String value,
|
||||
ExtractionFn fn
|
||||
)
|
||||
public ExtractionFilter(String dimension, String value, ExtractionFn fn)
|
||||
{
|
||||
this.dimension = dimension;
|
||||
this.value = value;
|
||||
this.value = Strings.nullToEmpty(value);
|
||||
this.fn = fn;
|
||||
}
|
||||
|
||||
private List<Filter> makeFilters(BitmapIndexSelector selector)
|
||||
{
|
||||
final Indexed<String> allDimVals = selector.getDimensionValues(dimension);
|
||||
Indexed<String> allDimVals = selector.getDimensionValues(dimension);
|
||||
final List<Filter> filters = Lists.newArrayList();
|
||||
if (allDimVals != null) {
|
||||
if (allDimVals == null) {
|
||||
allDimVals = new Indexed<String>()
|
||||
{
|
||||
@Override
|
||||
public Iterator<String> iterator()
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Class<? extends String> getClazz()
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() { return 1; }
|
||||
|
||||
@Override
|
||||
public String get(int index) { return null;}
|
||||
|
||||
@Override
|
||||
public int indexOf(String value)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
for (int i = 0; i < allDimVals.size(); i++) {
|
||||
String dimVal = allDimVals.get(i);
|
||||
if (value.equals(fn.apply(dimVal))) {
|
||||
if (value.equals(Strings.nullToEmpty(fn.apply(dimVal)))) {
|
||||
filters.add(new SelectorFilter(dimension, dimVal));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return filters;
|
||||
}
|
||||
|
@ -79,13 +106,48 @@ public class ExtractionFilter implements Filter
|
|||
@Override
|
||||
public ValueMatcher makeMatcher(ValueMatcherFactory factory)
|
||||
{
|
||||
throw new UnsupportedOperationException();
|
||||
return factory.makeValueMatcher(
|
||||
dimension, new Predicate<String>()
|
||||
{
|
||||
@Override
|
||||
public boolean apply(String input)
|
||||
{
|
||||
// Assuming that a null/absent/empty dimension are equivalent from the druid perspective
|
||||
return value.equals(Strings.nullToEmpty(fn.apply(Strings.emptyToNull(input))));
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueMatcher makeMatcher(ColumnSelectorFactory factory)
|
||||
public ValueMatcher makeMatcher(ColumnSelectorFactory columnSelectorFactory)
|
||||
{
|
||||
throw new UnsupportedOperationException();
|
||||
final DimensionSelector dimensionSelector = columnSelectorFactory.makeDimensionSelector(dimension, null);
|
||||
if (dimensionSelector == null) {
|
||||
return new BooleanValueMatcher(value.equals(Strings.nullToEmpty(fn.apply(null))));
|
||||
} else {
|
||||
final BitSet bitSetOfIds = new BitSet(dimensionSelector.getValueCardinality());
|
||||
for (int i = 0; i < dimensionSelector.getValueCardinality(); i++) {
|
||||
if (value.equals(Strings.nullToEmpty(fn.apply(dimensionSelector.lookupName(i))))) {
|
||||
bitSetOfIds.set(i);
|
||||
}
|
||||
}
|
||||
return new ValueMatcher()
|
||||
{
|
||||
@Override
|
||||
public boolean matches()
|
||||
{
|
||||
final IndexedInts row = dimensionSelector.getRow();
|
||||
final int size = row.size();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (bitSetOfIds.get(row.get(i))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ import io.druid.granularity.QueryGranularity;
|
|||
import io.druid.jackson.DefaultObjectMapper;
|
||||
import io.druid.query.BySegmentResultValue;
|
||||
import io.druid.query.BySegmentResultValueClass;
|
||||
import io.druid.query.Druids;
|
||||
import io.druid.query.FinalizeResultsQueryRunner;
|
||||
import io.druid.query.Query;
|
||||
import io.druid.query.QueryRunner;
|
||||
|
@ -46,6 +47,7 @@ import io.druid.query.TestQueryRunners;
|
|||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.DoubleMaxAggregatorFactory;
|
||||
import io.druid.query.aggregation.DoubleSumAggregatorFactory;
|
||||
import io.druid.query.aggregation.FilteredAggregatorFactory;
|
||||
import io.druid.query.aggregation.JavaScriptAggregatorFactory;
|
||||
import io.druid.query.aggregation.LongSumAggregatorFactory;
|
||||
import io.druid.query.aggregation.PostAggregator;
|
||||
|
@ -63,6 +65,7 @@ import io.druid.query.extraction.MapLookupExtractor;
|
|||
import io.druid.query.extraction.RegexDimExtractionFn;
|
||||
import io.druid.query.extraction.TimeFormatExtractionFn;
|
||||
import io.druid.query.filter.DimFilter;
|
||||
import io.druid.query.filter.ExtractionDimFilter;
|
||||
import io.druid.query.filter.JavaScriptDimFilter;
|
||||
import io.druid.query.filter.OrDimFilter;
|
||||
import io.druid.query.filter.RegexDimFilter;
|
||||
|
@ -3842,4 +3845,270 @@ public class GroupByQueryRunnerTest
|
|||
TestHelper.assertExpectedObjects(bySegmentResults, theRunner.run(fullQuery, Maps.newHashMap()), "");
|
||||
exec.shutdownNow();
|
||||
}
|
||||
|
||||
// Extraction Filters testing
|
||||
|
||||
@Test
|
||||
public void testGroupByWithExtractionDimFilter()
|
||||
{
|
||||
Map<String, String> extractionMap = new HashMap<>();
|
||||
extractionMap.put("automotive", "automotiveAndBusinessAndNewsAndMezzanine");
|
||||
extractionMap.put("business", "automotiveAndBusinessAndNewsAndMezzanine");
|
||||
extractionMap.put("mezzanine", "automotiveAndBusinessAndNewsAndMezzanine");
|
||||
extractionMap.put("news", "automotiveAndBusinessAndNewsAndMezzanine");
|
||||
|
||||
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
|
||||
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
|
||||
|
||||
List<DimFilter> dimFilters = Lists.<DimFilter>newArrayList(
|
||||
new ExtractionDimFilter("quality", "automotiveAndBusinessAndNewsAndMezzanine", lookupExtractionFn, null),
|
||||
new SelectorDimFilter("quality", "entertainment"),
|
||||
new SelectorDimFilter("quality", "health"),
|
||||
new SelectorDimFilter("quality", "premium"),
|
||||
new SelectorDimFilter("quality", "technology"),
|
||||
new SelectorDimFilter("quality", "travel")
|
||||
);
|
||||
|
||||
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
|
||||
.setDimensions(
|
||||
Lists.<DimensionSpec>newArrayList(
|
||||
new DefaultDimensionSpec(
|
||||
"quality",
|
||||
"alias"
|
||||
)
|
||||
)
|
||||
)
|
||||
.setAggregatorSpecs(
|
||||
Arrays.asList(
|
||||
QueryRunnerTestHelper.rowsCount,
|
||||
new LongSumAggregatorFactory("idx", "index")
|
||||
)
|
||||
)
|
||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||
.setDimFilter(Druids.newOrDimFilterBuilder().fields(dimFilters).build())
|
||||
.build();
|
||||
List<Row> expectedResults = Arrays.asList(
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 120L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L),
|
||||
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 1L, "idx", 147L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 112L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 113L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L)
|
||||
);
|
||||
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupByWithExtractionDimFilterCaseMappingValueIsNullOrEmpty()
|
||||
{
|
||||
Map<String, String> extractionMap = new HashMap<>();
|
||||
extractionMap.put("automotive", "automotive0");
|
||||
extractionMap.put("business", "business0");
|
||||
extractionMap.put("entertainment", "entertainment0");
|
||||
extractionMap.put("health", "health0");
|
||||
extractionMap.put("mezzanine", null);
|
||||
extractionMap.put("news", "");
|
||||
extractionMap.put("premium", "premium0");
|
||||
extractionMap.put("technology", "technology0");
|
||||
extractionMap.put("travel", "travel0");
|
||||
|
||||
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
|
||||
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
|
||||
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
|
||||
.setDimensions(
|
||||
Lists.<DimensionSpec>newArrayList(
|
||||
new DefaultDimensionSpec(
|
||||
"quality",
|
||||
"alias"
|
||||
)
|
||||
)
|
||||
)
|
||||
.setAggregatorSpecs(
|
||||
Arrays.asList(
|
||||
QueryRunnerTestHelper.rowsCount,
|
||||
new LongSumAggregatorFactory("idx", "index")
|
||||
)
|
||||
)
|
||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||
.setDimFilter(new ExtractionDimFilter("quality", "", lookupExtractionFn, null))
|
||||
.build();
|
||||
List<Row> expectedResults = Arrays.asList(
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L)
|
||||
);
|
||||
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupByWithExtractionDimFilterWhenSearchValueNotInTheMap()
|
||||
{
|
||||
Map<String, String> extractionMap = new HashMap<>();
|
||||
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
|
||||
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
|
||||
|
||||
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
|
||||
.setDimensions(
|
||||
Lists.<DimensionSpec>newArrayList(
|
||||
new DefaultDimensionSpec(
|
||||
"quality",
|
||||
"alias"
|
||||
)
|
||||
)
|
||||
)
|
||||
.setAggregatorSpecs(
|
||||
Arrays.asList(
|
||||
QueryRunnerTestHelper.rowsCount,
|
||||
new LongSumAggregatorFactory("idx", "index")
|
||||
)
|
||||
)
|
||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||
.setDimFilter(
|
||||
new ExtractionDimFilter(
|
||||
"quality",
|
||||
"NOT_THERE",
|
||||
lookupExtractionFn,
|
||||
null
|
||||
)
|
||||
).build();
|
||||
List<Row> expectedResults = Arrays.asList();
|
||||
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testGroupByWithExtractionDimFilterKeyisNull()
|
||||
{
|
||||
Map<String, String> extractionMap = new HashMap<>();
|
||||
extractionMap.put("", "NULLorEMPTY");
|
||||
|
||||
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
|
||||
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
|
||||
|
||||
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
|
||||
.setDimensions(
|
||||
Lists.<DimensionSpec>newArrayList(
|
||||
new DefaultDimensionSpec(
|
||||
"null_column",
|
||||
"alias"
|
||||
)
|
||||
)
|
||||
)
|
||||
.setAggregatorSpecs(
|
||||
Arrays.asList(
|
||||
QueryRunnerTestHelper.rowsCount,
|
||||
new LongSumAggregatorFactory("idx", "index")
|
||||
)
|
||||
)
|
||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||
.setDimFilter(
|
||||
new ExtractionDimFilter(
|
||||
"null_column",
|
||||
"NULLorEMPTY",
|
||||
lookupExtractionFn,
|
||||
null
|
||||
)
|
||||
).build();
|
||||
List<Row> expectedResults = Arrays
|
||||
.asList(
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", null, "rows", 13L, "idx", 6619L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", null, "rows", 13L, "idx", 5827L)
|
||||
);
|
||||
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupByWithAggregatorFilterAndExtractionFunction()
|
||||
{
|
||||
Map<String, String> extractionMap = new HashMap<>();
|
||||
extractionMap.put("automotive", "automotive0");
|
||||
extractionMap.put("business", "business0");
|
||||
extractionMap.put("entertainment", "entertainment0");
|
||||
extractionMap.put("health", "health0");
|
||||
extractionMap.put("mezzanine", "mezzanineANDnews");
|
||||
extractionMap.put("news", "mezzanineANDnews");
|
||||
extractionMap.put("premium", "premium0");
|
||||
extractionMap.put("technology", "technology0");
|
||||
extractionMap.put("travel", "travel0");
|
||||
|
||||
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
|
||||
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, "missing", true);
|
||||
DimFilter filter = new ExtractionDimFilter("quality","mezzanineANDnews",lookupExtractionFn,null);
|
||||
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
|
||||
.setDimensions(
|
||||
Lists.<DimensionSpec>newArrayList(
|
||||
new DefaultDimensionSpec(
|
||||
"quality",
|
||||
"alias"
|
||||
)
|
||||
)
|
||||
)
|
||||
.setAggregatorSpecs(
|
||||
Arrays.asList(
|
||||
new FilteredAggregatorFactory(QueryRunnerTestHelper.rowsCount, filter),
|
||||
(AggregatorFactory) new FilteredAggregatorFactory(
|
||||
new LongSumAggregatorFactory(
|
||||
"idx",
|
||||
"index"
|
||||
), filter
|
||||
)
|
||||
)
|
||||
)
|
||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||
.build();
|
||||
List<Row> expectedResults = Arrays.asList(
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 0L, "idx", 0L),
|
||||
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 0L, "idx", 0L)
|
||||
);
|
||||
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -41,6 +41,7 @@ import io.druid.query.TestQueryRunners;
|
|||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.DoubleMaxAggregatorFactory;
|
||||
import io.druid.query.aggregation.DoubleMinAggregatorFactory;
|
||||
import io.druid.query.aggregation.FilteredAggregatorFactory;
|
||||
import io.druid.query.aggregation.PostAggregator;
|
||||
import io.druid.query.aggregation.cardinality.CardinalityAggregatorFactory;
|
||||
import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory;
|
||||
|
@ -54,6 +55,7 @@ import io.druid.query.extraction.RegexDimExtractionFn;
|
|||
import io.druid.query.extraction.TimeFormatExtractionFn;
|
||||
import io.druid.query.filter.AndDimFilter;
|
||||
import io.druid.query.filter.DimFilter;
|
||||
import io.druid.query.filter.ExtractionDimFilter;
|
||||
import io.druid.query.filter.SelectorDimFilter;
|
||||
import io.druid.query.spec.MultipleIntervalSegmentSpec;
|
||||
import io.druid.query.timeseries.TimeseriesQuery;
|
||||
|
@ -158,9 +160,7 @@ public class TopNQueryRunnerTest
|
|||
QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator()
|
||||
);
|
||||
final QueryRunner<Result<TopNResultValue>> mergeRunner = chest.mergeResults(runner);
|
||||
return mergeRunner.run(
|
||||
query, context
|
||||
);
|
||||
return mergeRunner.run(query, context);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -1611,7 +1611,6 @@ public class TopNQueryRunnerTest
|
|||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testTopNDimExtractionFastTopNOptimalWithReplaceMissing()
|
||||
{
|
||||
|
@ -3119,8 +3118,10 @@ public class TopNQueryRunnerTest
|
|||
);
|
||||
assertExpectedResults(expectedResults, query);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAlphaNumericTopNWithNullPreviousStop(){
|
||||
public void testAlphaNumericTopNWithNullPreviousStop()
|
||||
{
|
||||
TopNQuery query = new TopNQueryBuilder()
|
||||
.dataSource(QueryRunnerTestHelper.dataSource)
|
||||
.granularity(QueryGranularity.ALL)
|
||||
|
@ -3149,4 +3150,106 @@ public class TopNQueryRunnerTest
|
|||
);
|
||||
TestHelper.assertExpectedResults(expectedResults, runner.run(query, new HashMap<String, Object>()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTopNWithExtractionFilter()
|
||||
{
|
||||
Map<String, String> extractionMap = new HashMap<>();
|
||||
extractionMap.put("spot", "spot0");
|
||||
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
|
||||
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
|
||||
|
||||
TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource)
|
||||
.granularity(QueryRunnerTestHelper.allGran)
|
||||
.dimension(QueryRunnerTestHelper.marketDimension)
|
||||
.metric("rows")
|
||||
.threshold(3)
|
||||
.intervals(QueryRunnerTestHelper.firstToThird)
|
||||
.aggregators(QueryRunnerTestHelper.commonAggregators)
|
||||
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant))
|
||||
.filters(
|
||||
new ExtractionDimFilter(
|
||||
QueryRunnerTestHelper.marketDimension,
|
||||
"spot0",
|
||||
lookupExtractionFn,
|
||||
null
|
||||
)
|
||||
)
|
||||
.build();
|
||||
|
||||
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
|
||||
new Result<>(
|
||||
new DateTime("2011-04-01T00:00:00.000Z"),
|
||||
new TopNResultValue(
|
||||
Arrays.<Map<String, Object>>asList(
|
||||
ImmutableMap.<String, Object>of(
|
||||
QueryRunnerTestHelper.marketDimension, "spot",
|
||||
"rows", 18L,
|
||||
"index", 2231.8768157958984D,
|
||||
"addRowsIndexConstant", 2250.8768157958984D,
|
||||
"uniques", QueryRunnerTestHelper.UNIQUES_9
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
assertExpectedResults(expectedResults, query);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTopNWithExtractionFilterAndFilteredAggregatorCaseNoExistingValue()
|
||||
{
|
||||
Map<String, String> extractionMap = new HashMap<>();
|
||||
extractionMap.put("", "NULL");
|
||||
|
||||
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
|
||||
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
|
||||
DimFilter extractionFilter = new ExtractionDimFilter("null_column", "NULL", lookupExtractionFn, null);
|
||||
TopNQueryBuilder topNQueryBuilder = new TopNQueryBuilder()
|
||||
.dataSource(QueryRunnerTestHelper.dataSource)
|
||||
.granularity(QueryRunnerTestHelper.allGran)
|
||||
.dimension("null_column")
|
||||
.metric(QueryRunnerTestHelper.indexMetric)
|
||||
.threshold(4)
|
||||
.intervals(QueryRunnerTestHelper.fullOnInterval)
|
||||
.aggregators(
|
||||
Lists.newArrayList(
|
||||
Iterables.concat(
|
||||
QueryRunnerTestHelper.commonAggregators, Lists.newArrayList(
|
||||
new FilteredAggregatorFactory(
|
||||
new DoubleMaxAggregatorFactory("maxIndex", "index"),
|
||||
extractionFilter
|
||||
),
|
||||
new DoubleMinAggregatorFactory("minIndex", "index")
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
.postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant));
|
||||
TopNQuery topNQueryWithNULLValueExtraction = topNQueryBuilder
|
||||
.filters(extractionFilter)
|
||||
.build();
|
||||
|
||||
Map<String, Object> map = Maps.newHashMap();
|
||||
map.put("null_column", null);
|
||||
map.put("rows", 1209L);
|
||||
map.put("index", 503332.5071372986D);
|
||||
map.put("addRowsIndexConstant", 504542.5071372986D);
|
||||
map.put("uniques", QueryRunnerTestHelper.UNIQUES_9);
|
||||
map.put("maxIndex", 1870.06103515625D);
|
||||
map.put("minIndex", 59.02102279663086D);
|
||||
List<Result<TopNResultValue>> expectedResults = Arrays.asList(
|
||||
new Result<>(
|
||||
new DateTime("2011-01-12T00:00:00.000Z"),
|
||||
new TopNResultValue(
|
||||
Arrays.asList(
|
||||
map
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
assertExpectedResults(expectedResults, topNQueryWithNULLValueExtraction);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue