mirror of https://github.com/apache/druid.git
optimize makeMatcher
This commit is contained in:
parent
7848429cbf
commit
64d638a386
|
@ -185,7 +185,7 @@ A filtered aggregator wraps any given aggregator, but only aggregates the values
|
|||
|
||||
This makes it possible to compute the results of a filtered and an unfiltered aggregation simultaneously, without having to issue multiple queries, and use both results as part of post-aggregations.
|
||||
|
||||
*Limitations:* The filtered aggregator currently only supports 'or', 'and', 'selector' and 'not' filters, i.e. matching one or multiple dimensions against a single value.
|
||||
*Limitations:* The filtered aggregator currently only supports 'or', 'and', 'selector', 'not' and 'Extraction' filters, i.e. matching one or multiple dimensions against a single value.
|
||||
|
||||
*Note:* If only the filtered results are required, consider putting the filter on the query itself, which will be much faster since it does not require scanning all the data.
|
||||
|
||||
|
|
|
@ -105,10 +105,7 @@ The following matches dimension values in `[product_1, product_3, product_5]` fo
|
|||
"product_5": "bar_1",
|
||||
"product_3": "bar_1"
|
||||
}
|
||||
},
|
||||
"replaceMissingValueWith": "",
|
||||
"retainMissingValue": false,
|
||||
"injective": false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -72,6 +72,7 @@ public class SimpleDictionaryEncodedColumn
|
|||
@Override
|
||||
public String lookupName(int id)
|
||||
{
|
||||
//Empty to Null will ensure that null and empty are equivalent for extraction function
|
||||
return Strings.emptyToNull(cachedLookups.get(id));
|
||||
}
|
||||
|
||||
|
|
|
@ -31,6 +31,8 @@ import io.druid.segment.DimensionSelector;
|
|||
import io.druid.segment.data.Indexed;
|
||||
import io.druid.segment.data.IndexedInts;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
|
@ -50,8 +52,36 @@ public class ExtractionFilter implements Filter
|
|||
|
||||
private List<Filter> makeFilters(BitmapIndexSelector selector)
|
||||
{
|
||||
final Indexed<String> allDimVals = selector.getDimensionValues(dimension);
|
||||
Indexed<String> allDimVals = selector.getDimensionValues(dimension);
|
||||
final List<Filter> filters = Lists.newArrayList();
|
||||
if (allDimVals == null) {
|
||||
allDimVals = new Indexed<String>()
|
||||
{
|
||||
@Override
|
||||
public Iterator<String> iterator()
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Class<? extends String> getClazz()
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() { return 1; }
|
||||
|
||||
@Override
|
||||
public String get(int index) { return null;}
|
||||
|
||||
@Override
|
||||
public int indexOf(String value)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
if (allDimVals != null) {
|
||||
for (int i = 0; i < allDimVals.size(); i++) {
|
||||
String dimVal = allDimVals.get(i);
|
||||
|
@ -59,8 +89,6 @@ public class ExtractionFilter implements Filter
|
|||
filters.add(new SelectorFilter(dimension, dimVal));
|
||||
}
|
||||
}
|
||||
} else if (value.equals(Strings.nullToEmpty(fn.apply(null)))) {
|
||||
filters.add(new SelectorFilter(dimension, null));
|
||||
}
|
||||
return filters;
|
||||
}
|
||||
|
@ -98,6 +126,12 @@ public class ExtractionFilter implements Filter
|
|||
if (dimensionSelector == null) {
|
||||
return new BooleanValueMatcher(value.equals(Strings.nullToEmpty(fn.apply(null))));
|
||||
} else {
|
||||
final BitSet bitSetOfIds = new BitSet(dimensionSelector.getValueCardinality());
|
||||
for (int i = 0; i < dimensionSelector.getValueCardinality(); i++) {
|
||||
if (value.equals(Strings.nullToEmpty(fn.apply(dimensionSelector.lookupName(i))))) {
|
||||
bitSetOfIds.set(i);
|
||||
}
|
||||
}
|
||||
return new ValueMatcher()
|
||||
{
|
||||
@Override
|
||||
|
@ -106,7 +140,7 @@ public class ExtractionFilter implements Filter
|
|||
final IndexedInts row = dimensionSelector.getRow();
|
||||
final int size = row.size();
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (value.equals(Strings.nullToEmpty(fn.apply(dimensionSelector.lookupName(row.get(i)))))) {
|
||||
if (bitSetOfIds.get(row.get(i))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,6 +47,7 @@ import io.druid.query.TestQueryRunners;
|
|||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.query.aggregation.DoubleMaxAggregatorFactory;
|
||||
import io.druid.query.aggregation.DoubleSumAggregatorFactory;
|
||||
import io.druid.query.aggregation.FilteredAggregatorFactory;
|
||||
import io.druid.query.aggregation.JavaScriptAggregatorFactory;
|
||||
import io.druid.query.aggregation.LongSumAggregatorFactory;
|
||||
import io.druid.query.aggregation.PostAggregator;
|
||||
|
@ -245,243 +246,6 @@ public class GroupByQueryRunnerTest
|
|||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupByWithExtractionDimFilterOptimazitionWithEmptyResult()
|
||||
{
|
||||
Map<String, String> extractionMap = new HashMap<>();
|
||||
extractionMap.put("automotive", "automotive0");
|
||||
extractionMap.put("business", "business0");
|
||||
extractionMap.put("entertainment", "entertainment0");
|
||||
extractionMap.put("health", "health0");
|
||||
extractionMap.put("mezzanine", "mezzanine0");
|
||||
extractionMap.put("news", "news0");
|
||||
extractionMap.put("premium", "premium0");
|
||||
extractionMap.put("technology", "technology0");
|
||||
extractionMap.put("travel", "travel0");
|
||||
|
||||
|
||||
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
|
||||
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
|
||||
|
||||
List<DimFilter> dimFilters = Lists.<DimFilter>newArrayList(
|
||||
new ExtractionDimFilter("quality", "Missing_value", lookupExtractionFn, null),
|
||||
new ExtractionDimFilter("quality", "business0", lookupExtractionFn, null),
|
||||
new SelectorDimFilter("quality", "entertainment"),
|
||||
new SelectorDimFilter("quality", "health"),
|
||||
new ExtractionDimFilter("quality", "mezzanine0", lookupExtractionFn, null),
|
||||
new ExtractionDimFilter("quality", "news0", lookupExtractionFn, null),
|
||||
new SelectorDimFilter("quality", "premium"),
|
||||
new SelectorDimFilter("quality", "technology"),
|
||||
new SelectorDimFilter("quality", "travel")
|
||||
);
|
||||
|
||||
|
||||
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
|
||||
.setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias")))
|
||||
.setAggregatorSpecs(
|
||||
Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")))
|
||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||
.setDimFilter(Druids.newOrDimFilterBuilder().fields(dimFilters).build())
|
||||
.build();
|
||||
List<Row> expectedResults = Arrays.asList(
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 120L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L),
|
||||
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 112L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 113L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L));
|
||||
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupByWithExtractionDimFilterOptimazitionOneToOne()
|
||||
{
|
||||
Map<String, String> extractionMap = new HashMap<>();
|
||||
extractionMap.put("automotive", "automotive0");
|
||||
extractionMap.put("business", "business0");
|
||||
extractionMap.put("entertainment", "entertainment0");
|
||||
extractionMap.put("health", "health0");
|
||||
extractionMap.put("mezzanine", "mezzanine0");
|
||||
extractionMap.put("news", "news0");
|
||||
extractionMap.put("premium", "premium0");
|
||||
extractionMap.put("technology", "technology0");
|
||||
extractionMap.put("travel", "travel0");
|
||||
|
||||
|
||||
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
|
||||
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
|
||||
|
||||
List<DimFilter> dimFilters = Lists.<DimFilter>newArrayList(
|
||||
new ExtractionDimFilter("quality", "automotive0", lookupExtractionFn, null),
|
||||
new ExtractionDimFilter("quality", "business0", lookupExtractionFn, null),
|
||||
new SelectorDimFilter("quality", "entertainment"),
|
||||
new SelectorDimFilter("quality", "health"),
|
||||
new ExtractionDimFilter("quality", "mezzanine0", lookupExtractionFn, null),
|
||||
new ExtractionDimFilter("quality", "news0", lookupExtractionFn, null),
|
||||
new SelectorDimFilter("quality", "premium"),
|
||||
new SelectorDimFilter("quality", "technology"),
|
||||
new SelectorDimFilter("quality", "travel")
|
||||
);
|
||||
|
||||
|
||||
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
|
||||
.setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias")))
|
||||
.setAggregatorSpecs(
|
||||
Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")))
|
||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||
.setDimFilter(Druids.newOrDimFilterBuilder().fields(dimFilters).build())
|
||||
.build();
|
||||
List<Row> expectedResults = Arrays.asList(
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 120L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L),
|
||||
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 1L, "idx", 147L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 112L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 113L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L));
|
||||
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupByWithExtractionDimFilterOptimazitionManyToOne()
|
||||
{
|
||||
Map<String, String> extractionMap = new HashMap<>();
|
||||
extractionMap.put("mezzanine", "newsANDmezzanine");
|
||||
extractionMap.put("news", "newsANDmezzanine");
|
||||
|
||||
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
|
||||
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
|
||||
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
|
||||
.setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias")))
|
||||
.setAggregatorSpecs(
|
||||
Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")))
|
||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||
.setDimFilter(new ExtractionDimFilter("quality", "newsANDmezzanine", lookupExtractionFn, null))
|
||||
.build();
|
||||
List<Row> expectedResults = Arrays.asList(
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L));
|
||||
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupByWithExtractionDimFilterCaseNullValue()
|
||||
{
|
||||
Map<String, String> extractionMap = new HashMap<>();
|
||||
extractionMap.put("automotive", "automotive0");
|
||||
extractionMap.put("business", "business0");
|
||||
extractionMap.put("entertainment", "entertainment0");
|
||||
extractionMap.put("health", "health0");
|
||||
extractionMap.put("mezzanine", "");
|
||||
extractionMap.put("news", null);
|
||||
extractionMap.put("premium", "premium0");
|
||||
extractionMap.put("technology", "technology0");
|
||||
extractionMap.put("travel", "travel0");
|
||||
|
||||
|
||||
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
|
||||
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
|
||||
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
|
||||
.setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias")))
|
||||
.setAggregatorSpecs(
|
||||
Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")))
|
||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||
.setDimFilter(new ExtractionDimFilter("quality", "", lookupExtractionFn, null))
|
||||
.build();
|
||||
List<Row> expectedResults = Arrays.asList(
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L));
|
||||
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
}
|
||||
|
||||
@Test public void testGroupByWithExtractionDimFilterWhenValueNotThere()
|
||||
{
|
||||
Map<String, String> extractionMap = new HashMap<>();
|
||||
extractionMap.put("mezzanine", "");
|
||||
extractionMap.put("news", null);
|
||||
|
||||
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
|
||||
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
|
||||
|
||||
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
|
||||
.setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias")))
|
||||
.setAggregatorSpecs(
|
||||
Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))
|
||||
)
|
||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||
.setDimFilter(new ExtractionDimFilter("quality", "NOT_THERE", lookupExtractionFn, null)).build();
|
||||
List<Row> expectedResults = Arrays.asList();
|
||||
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
}
|
||||
|
||||
|
||||
@Test public void testGroupByWithExtractionDimFilterNullDims()
|
||||
{
|
||||
Map<String, String> extractionMap = new HashMap<>();
|
||||
extractionMap.put("", "EMPTY");
|
||||
|
||||
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
|
||||
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
|
||||
|
||||
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
|
||||
.setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("null_column", "alias")))
|
||||
.setAggregatorSpecs(
|
||||
Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")))
|
||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||
.setDimFilter(new ExtractionDimFilter("null_column", "EMPTY", lookupExtractionFn, null)).build();
|
||||
List<Row> expectedResults = Arrays
|
||||
.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", null, "rows", 13L, "idx", 6619L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", null, "rows", 13L, "idx", 5827L));
|
||||
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupByWithRebucketRename()
|
||||
{
|
||||
|
@ -558,7 +322,6 @@ public class GroupByQueryRunnerTest
|
|||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testGroupByWithSimpleRenameRetainMissingNonInjective()
|
||||
{
|
||||
|
@ -731,7 +494,10 @@ public class GroupByQueryRunnerTest
|
|||
.setDimensions(
|
||||
Lists.<DimensionSpec>newArrayList(
|
||||
new ExtractionDimensionSpec(
|
||||
"quality", "alias", new LookupExtractionFn(new MapLookupExtractor(map), false, "MISSING", true), null
|
||||
"quality",
|
||||
"alias",
|
||||
new LookupExtractionFn(new MapLookupExtractor(map), false, "MISSING", true),
|
||||
null
|
||||
)
|
||||
)
|
||||
)
|
||||
|
@ -4024,4 +3790,270 @@ public class GroupByQueryRunnerTest
|
|||
TestHelper.assertExpectedObjects(bySegmentResults, theRunner.run(fullQuery, Maps.newHashMap()), "");
|
||||
exec.shutdownNow();
|
||||
}
|
||||
|
||||
// Extraction Filters testing
|
||||
|
||||
@Test
|
||||
public void testGroupByWithExtractionDimFilter()
|
||||
{
|
||||
Map<String, String> extractionMap = new HashMap<>();
|
||||
extractionMap.put("automotive", "automotiveAndBusinessAndNewsAndMezzanine");
|
||||
extractionMap.put("business", "automotiveAndBusinessAndNewsAndMezzanine");
|
||||
extractionMap.put("mezzanine", "automotiveAndBusinessAndNewsAndMezzanine");
|
||||
extractionMap.put("news", "automotiveAndBusinessAndNewsAndMezzanine");
|
||||
|
||||
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
|
||||
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
|
||||
|
||||
List<DimFilter> dimFilters = Lists.<DimFilter>newArrayList(
|
||||
new ExtractionDimFilter("quality", "automotiveAndBusinessAndNewsAndMezzanine", lookupExtractionFn, null),
|
||||
new SelectorDimFilter("quality", "entertainment"),
|
||||
new SelectorDimFilter("quality", "health"),
|
||||
new SelectorDimFilter("quality", "premium"),
|
||||
new SelectorDimFilter("quality", "technology"),
|
||||
new SelectorDimFilter("quality", "travel")
|
||||
);
|
||||
|
||||
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
|
||||
.setDimensions(
|
||||
Lists.<DimensionSpec>newArrayList(
|
||||
new DefaultDimensionSpec(
|
||||
"quality",
|
||||
"alias"
|
||||
)
|
||||
)
|
||||
)
|
||||
.setAggregatorSpecs(
|
||||
Arrays.asList(
|
||||
QueryRunnerTestHelper.rowsCount,
|
||||
new LongSumAggregatorFactory("idx", "index")
|
||||
)
|
||||
)
|
||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||
.setDimFilter(Druids.newOrDimFilterBuilder().fields(dimFilters).build())
|
||||
.build();
|
||||
List<Row> expectedResults = Arrays.asList(
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 120L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L),
|
||||
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 1L, "idx", 147L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 112L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 113L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L)
|
||||
);
|
||||
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupByWithExtractionDimFilterCaseMappingValueIsNullOrEmpty()
|
||||
{
|
||||
Map<String, String> extractionMap = new HashMap<>();
|
||||
extractionMap.put("automotive", "automotive0");
|
||||
extractionMap.put("business", "business0");
|
||||
extractionMap.put("entertainment", "entertainment0");
|
||||
extractionMap.put("health", "health0");
|
||||
extractionMap.put("mezzanine", null);
|
||||
extractionMap.put("news", "");
|
||||
extractionMap.put("premium", "premium0");
|
||||
extractionMap.put("technology", "technology0");
|
||||
extractionMap.put("travel", "travel0");
|
||||
|
||||
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
|
||||
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
|
||||
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
|
||||
.setDimensions(
|
||||
Lists.<DimensionSpec>newArrayList(
|
||||
new DefaultDimensionSpec(
|
||||
"quality",
|
||||
"alias"
|
||||
)
|
||||
)
|
||||
)
|
||||
.setAggregatorSpecs(
|
||||
Arrays.asList(
|
||||
QueryRunnerTestHelper.rowsCount,
|
||||
new LongSumAggregatorFactory("idx", "index")
|
||||
)
|
||||
)
|
||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||
.setDimFilter(new ExtractionDimFilter("quality", "", lookupExtractionFn, null))
|
||||
.build();
|
||||
List<Row> expectedResults = Arrays.asList(
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L)
|
||||
);
|
||||
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupByWithExtractionDimFilterWhenSearchValueNotInTheMap()
|
||||
{
|
||||
Map<String, String> extractionMap = new HashMap<>();
|
||||
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
|
||||
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
|
||||
|
||||
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
|
||||
.setDimensions(
|
||||
Lists.<DimensionSpec>newArrayList(
|
||||
new DefaultDimensionSpec(
|
||||
"quality",
|
||||
"alias"
|
||||
)
|
||||
)
|
||||
)
|
||||
.setAggregatorSpecs(
|
||||
Arrays.asList(
|
||||
QueryRunnerTestHelper.rowsCount,
|
||||
new LongSumAggregatorFactory("idx", "index")
|
||||
)
|
||||
)
|
||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||
.setDimFilter(
|
||||
new ExtractionDimFilter(
|
||||
"quality",
|
||||
"NOT_THERE",
|
||||
lookupExtractionFn,
|
||||
null
|
||||
)
|
||||
).build();
|
||||
List<Row> expectedResults = Arrays.asList();
|
||||
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testGroupByWithExtractionDimFilterKeyisNull()
|
||||
{
|
||||
Map<String, String> extractionMap = new HashMap<>();
|
||||
extractionMap.put("", "NULLorEMPTY");
|
||||
|
||||
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
|
||||
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
|
||||
|
||||
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
|
||||
.setDimensions(
|
||||
Lists.<DimensionSpec>newArrayList(
|
||||
new DefaultDimensionSpec(
|
||||
"null_column",
|
||||
"alias"
|
||||
)
|
||||
)
|
||||
)
|
||||
.setAggregatorSpecs(
|
||||
Arrays.asList(
|
||||
QueryRunnerTestHelper.rowsCount,
|
||||
new LongSumAggregatorFactory("idx", "index")
|
||||
)
|
||||
)
|
||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||
.setDimFilter(
|
||||
new ExtractionDimFilter(
|
||||
"null_column",
|
||||
"NULLorEMPTY",
|
||||
lookupExtractionFn,
|
||||
null
|
||||
)
|
||||
).build();
|
||||
List<Row> expectedResults = Arrays
|
||||
.asList(
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", null, "rows", 13L, "idx", 6619L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", null, "rows", 13L, "idx", 5827L)
|
||||
);
|
||||
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupByWithAggregatorFilterAndExtractionFunction()
|
||||
{
|
||||
Map<String, String> extractionMap = new HashMap<>();
|
||||
extractionMap.put("automotive", "automotive0");
|
||||
extractionMap.put("business", "business0");
|
||||
extractionMap.put("entertainment", "entertainment0");
|
||||
extractionMap.put("health", "health0");
|
||||
extractionMap.put("mezzanine", "mezzanineANDnews");
|
||||
extractionMap.put("news", "mezzanineANDnews");
|
||||
extractionMap.put("premium", "premium0");
|
||||
extractionMap.put("technology", "technology0");
|
||||
extractionMap.put("travel", "travel0");
|
||||
|
||||
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
|
||||
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, "missing", true);
|
||||
DimFilter filter = new ExtractionDimFilter("quality","mezzanineANDnews",lookupExtractionFn,null);
|
||||
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
|
||||
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
|
||||
.setDimensions(
|
||||
Lists.<DimensionSpec>newArrayList(
|
||||
new DefaultDimensionSpec(
|
||||
"quality",
|
||||
"alias"
|
||||
)
|
||||
)
|
||||
)
|
||||
.setAggregatorSpecs(
|
||||
Arrays.asList(
|
||||
new FilteredAggregatorFactory(QueryRunnerTestHelper.rowsCount, filter),
|
||||
(AggregatorFactory) new FilteredAggregatorFactory(
|
||||
new LongSumAggregatorFactory(
|
||||
"idx",
|
||||
"index"
|
||||
), filter
|
||||
)
|
||||
)
|
||||
)
|
||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||
.build();
|
||||
List<Row> expectedResults = Arrays.asList(
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 0L, "idx", 0L),
|
||||
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 0L, "idx", 0L),
|
||||
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 0L, "idx", 0L)
|
||||
);
|
||||
|
||||
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
|
||||
TestHelper.assertExpectedObjects(expectedResults, results, "");
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue