optimize makeMatcher

This commit is contained in:
Slim Bouguerra 2015-08-04 09:07:47 -05:00
parent 7848429cbf
commit 64d638a386
5 changed files with 312 additions and 248 deletions

View File

@ -185,7 +185,7 @@ A filtered aggregator wraps any given aggregator, but only aggregates the values
This makes it possible to compute the results of a filtered and an unfiltered aggregation simultaneously, without having to issue multiple queries, and use both results as part of post-aggregations. This makes it possible to compute the results of a filtered and an unfiltered aggregation simultaneously, without having to issue multiple queries, and use both results as part of post-aggregations.
*Limitations:* The filtered aggregator currently only supports 'or', 'and', 'selector' and 'not' filters, i.e. matching one or multiple dimensions against a single value. *Limitations:* The filtered aggregator currently only supports 'or', 'and', 'selector', 'not' and 'Extraction' filters, i.e. matching one or multiple dimensions against a single value.
*Note:* If only the filtered results are required, consider putting the filter on the query itself, which will be much faster since it does not require scanning all the data. *Note:* If only the filtered results are required, consider putting the filter on the query itself, which will be much faster since it does not require scanning all the data.

View File

@ -105,10 +105,7 @@ The following matches dimension values in `[product_1, product_3, product_5]` fo
"product_5": "bar_1", "product_5": "bar_1",
"product_3": "bar_1" "product_3": "bar_1"
} }
}, }
"replaceMissingValueWith": "",
"retainMissingValue": false,
"injective": false
} }
} }
} }

View File

@ -72,6 +72,7 @@ public class SimpleDictionaryEncodedColumn
@Override @Override
public String lookupName(int id) public String lookupName(int id)
{ {
//Empty to Null will ensure that null and empty are equivalent for extraction function
return Strings.emptyToNull(cachedLookups.get(id)); return Strings.emptyToNull(cachedLookups.get(id));
} }

View File

@ -31,6 +31,8 @@ import io.druid.segment.DimensionSelector;
import io.druid.segment.data.Indexed; import io.druid.segment.data.Indexed;
import io.druid.segment.data.IndexedInts; import io.druid.segment.data.IndexedInts;
import java.util.BitSet;
import java.util.Iterator;
import java.util.List; import java.util.List;
/** /**
@ -50,8 +52,36 @@ public class ExtractionFilter implements Filter
private List<Filter> makeFilters(BitmapIndexSelector selector) private List<Filter> makeFilters(BitmapIndexSelector selector)
{ {
final Indexed<String> allDimVals = selector.getDimensionValues(dimension); Indexed<String> allDimVals = selector.getDimensionValues(dimension);
final List<Filter> filters = Lists.newArrayList(); final List<Filter> filters = Lists.newArrayList();
if (allDimVals == null) {
allDimVals = new Indexed<String>()
{
@Override
public Iterator<String> iterator()
{
return null;
}
@Override
public Class<? extends String> getClazz()
{
return null;
}
@Override
public int size() { return 1; }
@Override
public String get(int index) { return null;}
@Override
public int indexOf(String value)
{
return 0;
}
};
}
if (allDimVals != null) { if (allDimVals != null) {
for (int i = 0; i < allDimVals.size(); i++) { for (int i = 0; i < allDimVals.size(); i++) {
String dimVal = allDimVals.get(i); String dimVal = allDimVals.get(i);
@ -59,8 +89,6 @@ public class ExtractionFilter implements Filter
filters.add(new SelectorFilter(dimension, dimVal)); filters.add(new SelectorFilter(dimension, dimVal));
} }
} }
} else if (value.equals(Strings.nullToEmpty(fn.apply(null)))) {
filters.add(new SelectorFilter(dimension, null));
} }
return filters; return filters;
} }
@ -98,6 +126,12 @@ public class ExtractionFilter implements Filter
if (dimensionSelector == null) { if (dimensionSelector == null) {
return new BooleanValueMatcher(value.equals(Strings.nullToEmpty(fn.apply(null)))); return new BooleanValueMatcher(value.equals(Strings.nullToEmpty(fn.apply(null))));
} else { } else {
final BitSet bitSetOfIds = new BitSet(dimensionSelector.getValueCardinality());
for (int i = 0; i < dimensionSelector.getValueCardinality(); i++) {
if (value.equals(Strings.nullToEmpty(fn.apply(dimensionSelector.lookupName(i))))) {
bitSetOfIds.set(i);
}
}
return new ValueMatcher() return new ValueMatcher()
{ {
@Override @Override
@ -106,7 +140,7 @@ public class ExtractionFilter implements Filter
final IndexedInts row = dimensionSelector.getRow(); final IndexedInts row = dimensionSelector.getRow();
final int size = row.size(); final int size = row.size();
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
if (value.equals(Strings.nullToEmpty(fn.apply(dimensionSelector.lookupName(row.get(i)))))) { if (bitSetOfIds.get(row.get(i))) {
return true; return true;
} }
} }

View File

@ -47,6 +47,7 @@ import io.druid.query.TestQueryRunners;
import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.AggregatorFactory;
import io.druid.query.aggregation.DoubleMaxAggregatorFactory; import io.druid.query.aggregation.DoubleMaxAggregatorFactory;
import io.druid.query.aggregation.DoubleSumAggregatorFactory; import io.druid.query.aggregation.DoubleSumAggregatorFactory;
import io.druid.query.aggregation.FilteredAggregatorFactory;
import io.druid.query.aggregation.JavaScriptAggregatorFactory; import io.druid.query.aggregation.JavaScriptAggregatorFactory;
import io.druid.query.aggregation.LongSumAggregatorFactory; import io.druid.query.aggregation.LongSumAggregatorFactory;
import io.druid.query.aggregation.PostAggregator; import io.druid.query.aggregation.PostAggregator;
@ -245,243 +246,6 @@ public class GroupByQueryRunnerTest
TestHelper.assertExpectedObjects(expectedResults, results, ""); TestHelper.assertExpectedObjects(expectedResults, results, "");
} }
@Test
public void testGroupByWithExtractionDimFilterOptimazitionWithEmptyResult()
{
Map<String, String> extractionMap = new HashMap<>();
extractionMap.put("automotive", "automotive0");
extractionMap.put("business", "business0");
extractionMap.put("entertainment", "entertainment0");
extractionMap.put("health", "health0");
extractionMap.put("mezzanine", "mezzanine0");
extractionMap.put("news", "news0");
extractionMap.put("premium", "premium0");
extractionMap.put("technology", "technology0");
extractionMap.put("travel", "travel0");
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
List<DimFilter> dimFilters = Lists.<DimFilter>newArrayList(
new ExtractionDimFilter("quality", "Missing_value", lookupExtractionFn, null),
new ExtractionDimFilter("quality", "business0", lookupExtractionFn, null),
new SelectorDimFilter("quality", "entertainment"),
new SelectorDimFilter("quality", "health"),
new ExtractionDimFilter("quality", "mezzanine0", lookupExtractionFn, null),
new ExtractionDimFilter("quality", "news0", lookupExtractionFn, null),
new SelectorDimFilter("quality", "premium"),
new SelectorDimFilter("quality", "technology"),
new SelectorDimFilter("quality", "travel")
);
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
.setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias")))
.setAggregatorSpecs(
Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")))
.setGranularity(QueryRunnerTestHelper.dayGran)
.setDimFilter(Druids.newOrDimFilterBuilder().fields(dimFilters).build())
.build();
List<Row> expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 120L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 112L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 113L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L));
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
@Test
public void testGroupByWithExtractionDimFilterOptimazitionOneToOne()
{
Map<String, String> extractionMap = new HashMap<>();
extractionMap.put("automotive", "automotive0");
extractionMap.put("business", "business0");
extractionMap.put("entertainment", "entertainment0");
extractionMap.put("health", "health0");
extractionMap.put("mezzanine", "mezzanine0");
extractionMap.put("news", "news0");
extractionMap.put("premium", "premium0");
extractionMap.put("technology", "technology0");
extractionMap.put("travel", "travel0");
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
List<DimFilter> dimFilters = Lists.<DimFilter>newArrayList(
new ExtractionDimFilter("quality", "automotive0", lookupExtractionFn, null),
new ExtractionDimFilter("quality", "business0", lookupExtractionFn, null),
new SelectorDimFilter("quality", "entertainment"),
new SelectorDimFilter("quality", "health"),
new ExtractionDimFilter("quality", "mezzanine0", lookupExtractionFn, null),
new ExtractionDimFilter("quality", "news0", lookupExtractionFn, null),
new SelectorDimFilter("quality", "premium"),
new SelectorDimFilter("quality", "technology"),
new SelectorDimFilter("quality", "travel")
);
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
.setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias")))
.setAggregatorSpecs(
Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")))
.setGranularity(QueryRunnerTestHelper.dayGran)
.setDimFilter(Druids.newOrDimFilterBuilder().fields(dimFilters).build())
.build();
List<Row> expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 120L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 1L, "idx", 147L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 112L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 113L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L));
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
@Test
public void testGroupByWithExtractionDimFilterOptimazitionManyToOne()
{
Map<String, String> extractionMap = new HashMap<>();
extractionMap.put("mezzanine", "newsANDmezzanine");
extractionMap.put("news", "newsANDmezzanine");
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
.setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias")))
.setAggregatorSpecs(
Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")))
.setGranularity(QueryRunnerTestHelper.dayGran)
.setDimFilter(new ExtractionDimFilter("quality", "newsANDmezzanine", lookupExtractionFn, null))
.build();
List<Row> expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L));
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
@Test
public void testGroupByWithExtractionDimFilterCaseNullValue()
{
Map<String, String> extractionMap = new HashMap<>();
extractionMap.put("automotive", "automotive0");
extractionMap.put("business", "business0");
extractionMap.put("entertainment", "entertainment0");
extractionMap.put("health", "health0");
extractionMap.put("mezzanine", "");
extractionMap.put("news", null);
extractionMap.put("premium", "premium0");
extractionMap.put("technology", "technology0");
extractionMap.put("travel", "travel0");
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
.setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias")))
.setAggregatorSpecs(
Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")))
.setGranularity(QueryRunnerTestHelper.dayGran)
.setDimFilter(new ExtractionDimFilter("quality", "", lookupExtractionFn, null))
.build();
List<Row> expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L));
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
@Test public void testGroupByWithExtractionDimFilterWhenValueNotThere()
{
Map<String, String> extractionMap = new HashMap<>();
extractionMap.put("mezzanine", "");
extractionMap.put("news", null);
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
.setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias")))
.setAggregatorSpecs(
Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))
)
.setGranularity(QueryRunnerTestHelper.dayGran)
.setDimFilter(new ExtractionDimFilter("quality", "NOT_THERE", lookupExtractionFn, null)).build();
List<Row> expectedResults = Arrays.asList();
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
@Test public void testGroupByWithExtractionDimFilterNullDims()
{
Map<String, String> extractionMap = new HashMap<>();
extractionMap.put("", "EMPTY");
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
.setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("null_column", "alias")))
.setAggregatorSpecs(
Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")))
.setGranularity(QueryRunnerTestHelper.dayGran)
.setDimFilter(new ExtractionDimFilter("null_column", "EMPTY", lookupExtractionFn, null)).build();
List<Row> expectedResults = Arrays
.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", null, "rows", 13L, "idx", 6619L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", null, "rows", 13L, "idx", 5827L));
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
@Test @Test
public void testGroupByWithRebucketRename() public void testGroupByWithRebucketRename()
{ {
@ -558,7 +322,6 @@ public class GroupByQueryRunnerTest
} }
@Test @Test
public void testGroupByWithSimpleRenameRetainMissingNonInjective() public void testGroupByWithSimpleRenameRetainMissingNonInjective()
{ {
@ -731,7 +494,10 @@ public class GroupByQueryRunnerTest
.setDimensions( .setDimensions(
Lists.<DimensionSpec>newArrayList( Lists.<DimensionSpec>newArrayList(
new ExtractionDimensionSpec( new ExtractionDimensionSpec(
"quality", "alias", new LookupExtractionFn(new MapLookupExtractor(map), false, "MISSING", true), null "quality",
"alias",
new LookupExtractionFn(new MapLookupExtractor(map), false, "MISSING", true),
null
) )
) )
) )
@ -4024,4 +3790,270 @@ public class GroupByQueryRunnerTest
TestHelper.assertExpectedObjects(bySegmentResults, theRunner.run(fullQuery, Maps.newHashMap()), ""); TestHelper.assertExpectedObjects(bySegmentResults, theRunner.run(fullQuery, Maps.newHashMap()), "");
exec.shutdownNow(); exec.shutdownNow();
} }
// Extraction Filters testing
@Test
public void testGroupByWithExtractionDimFilter()
{
Map<String, String> extractionMap = new HashMap<>();
extractionMap.put("automotive", "automotiveAndBusinessAndNewsAndMezzanine");
extractionMap.put("business", "automotiveAndBusinessAndNewsAndMezzanine");
extractionMap.put("mezzanine", "automotiveAndBusinessAndNewsAndMezzanine");
extractionMap.put("news", "automotiveAndBusinessAndNewsAndMezzanine");
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
List<DimFilter> dimFilters = Lists.<DimFilter>newArrayList(
new ExtractionDimFilter("quality", "automotiveAndBusinessAndNewsAndMezzanine", lookupExtractionFn, null),
new SelectorDimFilter("quality", "entertainment"),
new SelectorDimFilter("quality", "health"),
new SelectorDimFilter("quality", "premium"),
new SelectorDimFilter("quality", "technology"),
new SelectorDimFilter("quality", "travel")
);
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
.setDimensions(
Lists.<DimensionSpec>newArrayList(
new DefaultDimensionSpec(
"quality",
"alias"
)
)
)
.setAggregatorSpecs(
Arrays.asList(
QueryRunnerTestHelper.rowsCount,
new LongSumAggregatorFactory("idx", "index")
)
)
.setGranularity(QueryRunnerTestHelper.dayGran)
.setDimFilter(Druids.newOrDimFilterBuilder().fields(dimFilters).build())
.build();
List<Row> expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 120L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 1L, "idx", 147L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 112L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 113L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L)
);
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
@Test
public void testGroupByWithExtractionDimFilterCaseMappingValueIsNullOrEmpty()
{
Map<String, String> extractionMap = new HashMap<>();
extractionMap.put("automotive", "automotive0");
extractionMap.put("business", "business0");
extractionMap.put("entertainment", "entertainment0");
extractionMap.put("health", "health0");
extractionMap.put("mezzanine", null);
extractionMap.put("news", "");
extractionMap.put("premium", "premium0");
extractionMap.put("technology", "technology0");
extractionMap.put("travel", "travel0");
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
.setDimensions(
Lists.<DimensionSpec>newArrayList(
new DefaultDimensionSpec(
"quality",
"alias"
)
)
)
.setAggregatorSpecs(
Arrays.asList(
QueryRunnerTestHelper.rowsCount,
new LongSumAggregatorFactory("idx", "index")
)
)
.setGranularity(QueryRunnerTestHelper.dayGran)
.setDimFilter(new ExtractionDimFilter("quality", "", lookupExtractionFn, null))
.build();
List<Row> expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L)
);
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
@Test
public void testGroupByWithExtractionDimFilterWhenSearchValueNotInTheMap()
{
Map<String, String> extractionMap = new HashMap<>();
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
.setDimensions(
Lists.<DimensionSpec>newArrayList(
new DefaultDimensionSpec(
"quality",
"alias"
)
)
)
.setAggregatorSpecs(
Arrays.asList(
QueryRunnerTestHelper.rowsCount,
new LongSumAggregatorFactory("idx", "index")
)
)
.setGranularity(QueryRunnerTestHelper.dayGran)
.setDimFilter(
new ExtractionDimFilter(
"quality",
"NOT_THERE",
lookupExtractionFn,
null
)
).build();
List<Row> expectedResults = Arrays.asList();
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
@Test
public void testGroupByWithExtractionDimFilterKeyisNull()
{
Map<String, String> extractionMap = new HashMap<>();
extractionMap.put("", "NULLorEMPTY");
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true);
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
.setDimensions(
Lists.<DimensionSpec>newArrayList(
new DefaultDimensionSpec(
"null_column",
"alias"
)
)
)
.setAggregatorSpecs(
Arrays.asList(
QueryRunnerTestHelper.rowsCount,
new LongSumAggregatorFactory("idx", "index")
)
)
.setGranularity(QueryRunnerTestHelper.dayGran)
.setDimFilter(
new ExtractionDimFilter(
"null_column",
"NULLorEMPTY",
lookupExtractionFn,
null
)
).build();
List<Row> expectedResults = Arrays
.asList(
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", null, "rows", 13L, "idx", 6619L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", null, "rows", 13L, "idx", 5827L)
);
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
@Test
public void testGroupByWithAggregatorFilterAndExtractionFunction()
{
Map<String, String> extractionMap = new HashMap<>();
extractionMap.put("automotive", "automotive0");
extractionMap.put("business", "business0");
extractionMap.put("entertainment", "entertainment0");
extractionMap.put("health", "health0");
extractionMap.put("mezzanine", "mezzanineANDnews");
extractionMap.put("news", "mezzanineANDnews");
extractionMap.put("premium", "premium0");
extractionMap.put("technology", "technology0");
extractionMap.put("travel", "travel0");
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, "missing", true);
DimFilter filter = new ExtractionDimFilter("quality","mezzanineANDnews",lookupExtractionFn,null);
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird)
.setDimensions(
Lists.<DimensionSpec>newArrayList(
new DefaultDimensionSpec(
"quality",
"alias"
)
)
)
.setAggregatorSpecs(
Arrays.asList(
new FilteredAggregatorFactory(QueryRunnerTestHelper.rowsCount, filter),
(AggregatorFactory) new FilteredAggregatorFactory(
new LongSumAggregatorFactory(
"idx",
"index"
), filter
)
)
)
.setGranularity(QueryRunnerTestHelper.dayGran)
.build();
List<Row> expectedResults = Arrays.asList(
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 0L, "idx", 0L),
GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 0L, "idx", 0L)
);
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
} }