From 65654ddbf95be684819285097f4c0ffb83229e66 Mon Sep 17 00:00:00 2001 From: Slim Bouguerra Date: Mon, 27 Jul 2015 16:23:14 -0500 Subject: [PATCH 1/7] adding documentation about extraction filter --- docs/content/querying/filters.md | 33 ++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/docs/content/querying/filters.md b/docs/content/querying/filters.md index 0fe931dbb96..2cc780bcd89 100644 --- a/docs/content/querying/filters.md +++ b/docs/content/querying/filters.md @@ -80,3 +80,36 @@ The following matches any dimension values for the dimension `name` between `'ba "function" : "function(x) { return(x >= 'bar' && x <= 'foo') }" } ``` + +### Extraction filter + +Extraction filter matches a dimension using some specific [Extraction function](./dimensionspecs.html#extraction-functions). +The filter matches the values for which the extraction function has transformation entry `input_key=output_value` where + `output_value` is equal to the filter `value` and `input_key` is present as dimension. + +**Example** +The following matches dimension values in `[product_1, product_3, product_5]` for the column `product` + +```json +{ + "filter": { + "type": "extraction", + "dimension": "product", + "value": "bar_1", + "extractionFn": { + "type": "lookup", + "lookup": { + "type": "map", + "map": { + "product_1": "bar_1", + "product_5": "bar_1", + "product_3": "bar_1" + } + }, + "replaceMissingValueWith": "", + "retainMissingValue": false, + "injective": false + } + } +} +``` From dda0790a60ef3738552a539eb41e637adfa3e5ea Mon Sep 17 00:00:00 2001 From: Slim Bouguerra Date: Fri, 24 Jul 2015 09:54:12 -0500 Subject: [PATCH 2/7] Fix extractionFilter by implementing make matcher Fix getBitmapIndex to consider the case were dim is null Unit Test for exractionFn with empty result and null_column UT for TopN queries with Extraction filter refactor in Extractiuon fileter makematcher for realtime segment and clean code in b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java fix to make sure that empty string are converted to null --- .../column/SimpleDictionaryEncodedColumn.java | 3 +- .../segment/filter/ExtractionFilter.java | 51 ++++- .../query/groupby/GroupByQueryRunnerTest.java | 201 ++++++++++++++++++ .../druid/query/topn/TopNQueryRunnerTest.java | 93 +++++++- 4 files changed, 335 insertions(+), 13 deletions(-) diff --git a/processing/src/main/java/io/druid/segment/column/SimpleDictionaryEncodedColumn.java b/processing/src/main/java/io/druid/segment/column/SimpleDictionaryEncodedColumn.java index 1cbf45e01d1..9a7419f8929 100644 --- a/processing/src/main/java/io/druid/segment/column/SimpleDictionaryEncodedColumn.java +++ b/processing/src/main/java/io/druid/segment/column/SimpleDictionaryEncodedColumn.java @@ -17,6 +17,7 @@ package io.druid.segment.column; +import com.google.common.base.Strings; import com.metamx.common.guava.CloseQuietly; import io.druid.segment.data.CachingIndexed; import io.druid.segment.data.IndexedInts; @@ -71,7 +72,7 @@ public class SimpleDictionaryEncodedColumn @Override public String lookupName(int id) { - return cachedLookups.get(id); + return Strings.emptyToNull(cachedLookups.get(id)); } @Override diff --git a/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java b/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java index e2a0d7cfe1c..d89000c5bd5 100644 --- a/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java @@ -17,17 +17,19 @@ package io.druid.segment.filter; +import com.google.common.base.Predicate; +import com.google.common.base.Strings; import com.google.common.collect.Lists; import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.collections.bitmap.WrappedImmutableConciseBitmap; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherFactory; import io.druid.segment.ColumnSelectorFactory; +import io.druid.segment.DimensionSelector; import io.druid.segment.data.Indexed; -import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; +import io.druid.segment.data.IndexedInts; import java.util.List; @@ -54,15 +56,20 @@ public class ExtractionFilter implements Filter { final Indexed allDimVals = selector.getDimensionValues(dimension); final List filters = Lists.newArrayList(); - if (allDimVals != null) { - for (int i = 0; i < allDimVals.size(); i++) { + if (allDimVals != null) + { + for (int i = 0; i < allDimVals.size(); i++) + { String dimVal = allDimVals.get(i); - if (value.equals(fn.apply(dimVal))) { + if (value.equals(fn.apply(dimVal))) + { filters.add(new SelectorFilter(dimension, dimVal)); } } + } else if (value.equals(fn.apply(null))) + { + filters.add(new SelectorFilter(dimension, null)); } - return filters; } @@ -79,13 +86,39 @@ public class ExtractionFilter implements Filter @Override public ValueMatcher makeMatcher(ValueMatcherFactory factory) { - throw new UnsupportedOperationException(); + return factory.makeValueMatcher(dimension, new Predicate() + { + @Override public boolean apply(String input) + { + // Assuming that a null/absent/empty dimension are equivalent from the druid perspective + return value.equals(fn.apply(Strings.emptyToNull(input))); + } + }); } @Override - public ValueMatcher makeMatcher(ColumnSelectorFactory factory) + public ValueMatcher makeMatcher(ColumnSelectorFactory columnSelectorFactory) { - throw new UnsupportedOperationException(); + final DimensionSelector dimensionSelector = columnSelectorFactory.makeDimensionSelector(dimension, null); + if (dimensionSelector == null) { + return new BooleanValueMatcher(Strings.isNullOrEmpty(fn.apply(value))); + } else { + return new ValueMatcher() + { + @Override + public boolean matches() + { + final IndexedInts row = dimensionSelector.getRow(); + final int size = row.size(); + for (int i = 0; i < size; ++i) { + if (value.equals(fn.apply(dimensionSelector.lookupName(row.get(i))))) { + return true; + } + } + return false; + } + }; + } } } diff --git a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java index 6f03974c96f..1132cf330f9 100644 --- a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java @@ -36,6 +36,7 @@ import io.druid.granularity.QueryGranularity; import io.druid.jackson.DefaultObjectMapper; import io.druid.query.BySegmentResultValue; import io.druid.query.BySegmentResultValueClass; +import io.druid.query.Druids; import io.druid.query.FinalizeResultsQueryRunner; import io.druid.query.Query; import io.druid.query.QueryRunner; @@ -63,6 +64,7 @@ import io.druid.query.extraction.MapLookupExtractor; import io.druid.query.extraction.RegexDimExtractionFn; import io.druid.query.extraction.TimeFormatExtractionFn; import io.druid.query.filter.DimFilter; +import io.druid.query.filter.ExtractionDimFilter; import io.druid.query.filter.JavaScriptDimFilter; import io.druid.query.filter.OrDimFilter; import io.druid.query.filter.RegexDimFilter; @@ -243,6 +245,205 @@ public class GroupByQueryRunnerTest TestHelper.assertExpectedObjects(expectedResults, results, ""); } + @Test + public void testGroupByWithExtractionDimFilterOptimazitionWithEmptyResult() + { + Map extractionMap = new HashMap<>(); + extractionMap.put("automotive", "automotive0"); + extractionMap.put("business", "business0"); + extractionMap.put("entertainment", "entertainment0"); + extractionMap.put("health", "health0"); + extractionMap.put("mezzanine", "mezzanine0"); + extractionMap.put("news", "news0"); + extractionMap.put("premium", "premium0"); + extractionMap.put("technology", "technology0"); + extractionMap.put("travel", "travel0"); + + + MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); + LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); + + List dimFilters = Lists.newArrayList( + new ExtractionDimFilter("quality", "Missing_value", lookupExtractionFn, null), + new ExtractionDimFilter("quality", "business0", lookupExtractionFn, null), + new SelectorDimFilter("quality", "entertainment"), + new SelectorDimFilter("quality", "health"), + new ExtractionDimFilter("quality", "mezzanine0", lookupExtractionFn, null), + new ExtractionDimFilter("quality", "news0", lookupExtractionFn, null), + new SelectorDimFilter("quality", "premium"), + new SelectorDimFilter("quality", "technology"), + new SelectorDimFilter("quality", "travel") + ); + + + GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) + .setAggregatorSpecs( + Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter(Druids.newOrDimFilterBuilder().fields(dimFilters).build()) + .build(); + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 120L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L), + + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 112L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 113L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L)); + + Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + TestHelper.assertExpectedObjects(expectedResults, results, ""); + + } + + @Test + public void testGroupByWithExtractionDimFilterOptimazitionOneToOne() + { + Map extractionMap = new HashMap<>(); + extractionMap.put("automotive", "automotive0"); + extractionMap.put("business", "business0"); + extractionMap.put("entertainment", "entertainment0"); + extractionMap.put("health", "health0"); + extractionMap.put("mezzanine", "mezzanine0"); + extractionMap.put("news", "news0"); + extractionMap.put("premium", "premium0"); + extractionMap.put("technology", "technology0"); + extractionMap.put("travel", "travel0"); + + + MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); + LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); + + List dimFilters = Lists.newArrayList( + new ExtractionDimFilter("quality", "automotive0", lookupExtractionFn, null), + new ExtractionDimFilter("quality", "business0", lookupExtractionFn, null), + new SelectorDimFilter("quality", "entertainment"), + new SelectorDimFilter("quality", "health"), + new ExtractionDimFilter("quality", "mezzanine0", lookupExtractionFn, null), + new ExtractionDimFilter("quality", "news0", lookupExtractionFn, null), + new SelectorDimFilter("quality", "premium"), + new SelectorDimFilter("quality", "technology"), + new SelectorDimFilter("quality", "travel") + ); + + + GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) + .setAggregatorSpecs( + Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter(Druids.newOrDimFilterBuilder().fields(dimFilters).build()) + .build(); + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 120L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L), + + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 1L, "idx", 147L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 112L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 113L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L)); + + Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + TestHelper.assertExpectedObjects(expectedResults, results, ""); + + } + + @Test + public void testGroupByWithExtractionDimFilterOptimazitionManyToOne() + { + Map extractionMap = new HashMap<>(); + extractionMap.put("mezzanine", "newsANDmezzanine"); + extractionMap.put("news", "newsANDmezzanine"); + + MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); + LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); + GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) + .setAggregatorSpecs( + Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter(new ExtractionDimFilter("quality", "newsANDmezzanine", lookupExtractionFn, null)) + .build(); + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L)); + + Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + TestHelper.assertExpectedObjects(expectedResults, results, ""); + } + + @Test public void testGroupByWithExtractionDimFilterEmptyResult() + { + Map extractionMap = new HashMap<>(); + extractionMap.put("mezzanine", "mezzanine0"); + + MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); + LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); + + GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) + .setAggregatorSpecs( + Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter(new ExtractionDimFilter("quality", "NOT_THERE", lookupExtractionFn, null)).build(); + List expectedResults = Arrays.asList(); + + Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + TestHelper.assertExpectedObjects(expectedResults, results, ""); + } + + @Test public void testGroupByWithExtractionDimFilterNullDims() + { + Map extractionMap = new HashMap<>(); + extractionMap.put("", "EMPTY"); + + MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); + LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); + + GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("null_column", "alias"))) + .setAggregatorSpecs( + Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter(new ExtractionDimFilter("null_column", "EMPTY", lookupExtractionFn, null)).build(); + List expectedResults = Arrays + .asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", null, "rows", 13L, "idx", 6619L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", null, "rows", 13L, "idx", 5827L)); + + Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + TestHelper.assertExpectedObjects(expectedResults, results, ""); + } + @Test public void testGroupByWithRebucketRename() { diff --git a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java index 20b35d4e88d..149c949b84d 100644 --- a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java @@ -41,6 +41,7 @@ import io.druid.query.TestQueryRunners; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.DoubleMaxAggregatorFactory; import io.druid.query.aggregation.DoubleMinAggregatorFactory; +import io.druid.query.aggregation.FilteredAggregatorFactory; import io.druid.query.aggregation.PostAggregator; import io.druid.query.aggregation.cardinality.CardinalityAggregatorFactory; import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; @@ -54,6 +55,7 @@ import io.druid.query.extraction.RegexDimExtractionFn; import io.druid.query.extraction.TimeFormatExtractionFn; import io.druid.query.filter.AndDimFilter; import io.druid.query.filter.DimFilter; +import io.druid.query.filter.ExtractionDimFilter; import io.druid.query.filter.SelectorDimFilter; import io.druid.query.spec.MultipleIntervalSegmentSpec; import io.druid.query.timeseries.TimeseriesQuery; @@ -158,9 +160,7 @@ public class TopNQueryRunnerTest QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator() ); final QueryRunner> mergeRunner = chest.mergeResults(runner); - return mergeRunner.run( - query, context - ); + return mergeRunner.run(query, context); } @Test @@ -3149,4 +3149,91 @@ public class TopNQueryRunnerTest ); TestHelper.assertExpectedResults(expectedResults, runner.run(query, new HashMap())); } + + @Test + public void testTopNWithExtractionFilter() + { + Map extractionMap = new HashMap<>(); + extractionMap.put("spot", "spot0"); + MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); + LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); + + TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .dimension(QueryRunnerTestHelper.marketDimension) + .metric("rows") + .threshold(3) + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators(QueryRunnerTestHelper.commonAggregators) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .filters(new ExtractionDimFilter(QueryRunnerTestHelper.marketDimension, "spot0", lookupExtractionFn, null)) + .build(); + + List> expectedResults = Arrays.asList( + new Result<>( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + QueryRunnerTestHelper.marketDimension, "spot", + "rows", 18L, + "index", 2231.8768157958984D, + "addRowsIndexConstant", 2250.8768157958984D, + "uniques", QueryRunnerTestHelper.UNIQUES_9 + ) + ) + ) + ) + ); + + assertExpectedResults(expectedResults, query); + } + + @Test + public void testTopNWithExtractionFilterNoExistingValue() + { + Map extractionMap = new HashMap<>(); + extractionMap.put("","NULL"); + + MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); + LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); + DimFilter extractionFilter = new ExtractionDimFilter("null_column", "NULL", lookupExtractionFn, null); + TopNQueryBuilder topNQueryBuilder = new TopNQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .dimension("null_column") + .metric(QueryRunnerTestHelper.indexMetric) + .threshold(4) + .intervals(QueryRunnerTestHelper.fullOnInterval) + .aggregators(Lists.newArrayList(Iterables.concat(QueryRunnerTestHelper.commonAggregators, Lists.newArrayList( + new FilteredAggregatorFactory(new DoubleMaxAggregatorFactory("maxIndex", "index"), + extractionFilter), + //new DoubleMaxAggregatorFactory("maxIndex", "index"), + new DoubleMinAggregatorFactory("minIndex", "index"))))) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)); + TopNQuery topNQueryWithNULLValueExtraction = topNQueryBuilder + .filters(extractionFilter) + .build(); + + Map map = Maps.newHashMap(); + map.put("null_column", null); + map.put("rows", 1209L); + map.put("index", 503332.5071372986D); + map.put("addRowsIndexConstant", 504542.5071372986D); + map.put("uniques", QueryRunnerTestHelper.UNIQUES_9); + map.put("maxIndex", 1870.06103515625D); + map.put("minIndex", 59.02102279663086D); + List> expectedResults = Arrays.asList( + new Result<>( + new DateTime("2011-01-12T00:00:00.000Z"), + new TopNResultValue( + Arrays.asList( + map + ) + ) + ) + ); + assertExpectedResults(expectedResults, topNQueryWithNULLValueExtraction); + } + } From 83de5a47166eadad81c1e38e0cff26f4d710ccf7 Mon Sep 17 00:00:00 2001 From: Slim Bouguerra Date: Fri, 31 Jul 2015 11:30:47 -0500 Subject: [PATCH 3/7] addressing reviewers comments --- docs/content/querying/filters.md | 2 +- .../segment/filter/ExtractionFilter.java | 41 +++++++-------- .../query/groupby/GroupByQueryRunnerTest.java | 44 ++++++++++++++-- .../druid/query/topn/TopNQueryRunnerTest.java | 52 ++++++++++++------- 4 files changed, 94 insertions(+), 45 deletions(-) diff --git a/docs/content/querying/filters.md b/docs/content/querying/filters.md index 2cc780bcd89..9191af86127 100644 --- a/docs/content/querying/filters.md +++ b/docs/content/querying/filters.md @@ -84,7 +84,7 @@ The following matches any dimension values for the dimension `name` between `'ba ### Extraction filter Extraction filter matches a dimension using some specific [Extraction function](./dimensionspecs.html#extraction-functions). -The filter matches the values for which the extraction function has transformation entry `input_key=output_value` where +The following filter matches the values for which the extraction function has transformation entry `input_key=output_value` where `output_value` is equal to the filter `value` and `input_key` is present as dimension. **Example** diff --git a/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java b/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java index d89000c5bd5..98e16ea9095 100644 --- a/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java @@ -41,11 +41,7 @@ public class ExtractionFilter implements Filter private final String value; private final ExtractionFn fn; - public ExtractionFilter( - String dimension, - String value, - ExtractionFn fn - ) + public ExtractionFilter(String dimension, String value, ExtractionFn fn) { this.dimension = dimension; this.value = value; @@ -56,18 +52,14 @@ public class ExtractionFilter implements Filter { final Indexed allDimVals = selector.getDimensionValues(dimension); final List filters = Lists.newArrayList(); - if (allDimVals != null) - { - for (int i = 0; i < allDimVals.size(); i++) - { + if (allDimVals != null) { + for (int i = 0; i < allDimVals.size(); i++) { String dimVal = allDimVals.get(i); - if (value.equals(fn.apply(dimVal))) - { + if (value.equals(Strings.nullToEmpty(fn.apply(dimVal)))) { filters.add(new SelectorFilter(dimension, dimVal)); } } - } else if (value.equals(fn.apply(null))) - { + } else if (value.equals(Strings.nullToEmpty(fn.apply(null)))) { filters.add(new SelectorFilter(dimension, null)); } return filters; @@ -86,14 +78,17 @@ public class ExtractionFilter implements Filter @Override public ValueMatcher makeMatcher(ValueMatcherFactory factory) { - return factory.makeValueMatcher(dimension, new Predicate() - { - @Override public boolean apply(String input) - { - // Assuming that a null/absent/empty dimension are equivalent from the druid perspective - return value.equals(fn.apply(Strings.emptyToNull(input))); - } - }); + return factory.makeValueMatcher( + dimension, new Predicate() + { + @Override + public boolean apply(String input) + { + // Assuming that a null/absent/empty dimension are equivalent from the druid perspective + return value.equals(Strings.nullToEmpty(fn.apply(Strings.emptyToNull(input)))); + } + } + ); } @Override @@ -101,7 +96,7 @@ public class ExtractionFilter implements Filter { final DimensionSelector dimensionSelector = columnSelectorFactory.makeDimensionSelector(dimension, null); if (dimensionSelector == null) { - return new BooleanValueMatcher(Strings.isNullOrEmpty(fn.apply(value))); + return new BooleanValueMatcher(value.equals(Strings.nullToEmpty(fn.apply(null)))); } else { return new ValueMatcher() { @@ -111,7 +106,7 @@ public class ExtractionFilter implements Filter final IndexedInts row = dimensionSelector.getRow(); final int size = row.size(); for (int i = 0; i < size; ++i) { - if (value.equals(fn.apply(dimensionSelector.lookupName(row.get(i))))) { + if (value.equals(Strings.nullToEmpty(fn.apply(dimensionSelector.lookupName(row.get(i)))))) { return true; } } diff --git a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java index 1132cf330f9..8b00079e923 100644 --- a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java @@ -400,10 +400,46 @@ public class GroupByQueryRunnerTest TestHelper.assertExpectedObjects(expectedResults, results, ""); } - @Test public void testGroupByWithExtractionDimFilterEmptyResult() + @Test + public void testGroupByWithExtractionDimFilterCaseNullValue() { Map extractionMap = new HashMap<>(); - extractionMap.put("mezzanine", "mezzanine0"); + extractionMap.put("automotive", "automotive0"); + extractionMap.put("business", "business0"); + extractionMap.put("entertainment", "entertainment0"); + extractionMap.put("health", "health0"); + extractionMap.put("mezzanine", ""); + extractionMap.put("news", null); + extractionMap.put("premium", "premium0"); + extractionMap.put("technology", "technology0"); + extractionMap.put("travel", "travel0"); + + + MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); + LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); + GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) + .setAggregatorSpecs( + Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter(new ExtractionDimFilter("quality", "", lookupExtractionFn, null)) + .build(); + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L)); + + Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + TestHelper.assertExpectedObjects(expectedResults, results, ""); + } + + @Test public void testGroupByWithExtractionDimFilterWhenValueNotThere() + { + Map extractionMap = new HashMap<>(); + extractionMap.put("mezzanine", ""); + extractionMap.put("news", null); MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); @@ -412,7 +448,8 @@ public class GroupByQueryRunnerTest .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) .setAggregatorSpecs( - Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))) + Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) + ) .setGranularity(QueryRunnerTestHelper.dayGran) .setDimFilter(new ExtractionDimFilter("quality", "NOT_THERE", lookupExtractionFn, null)).build(); List expectedResults = Arrays.asList(); @@ -421,6 +458,7 @@ public class GroupByQueryRunnerTest TestHelper.assertExpectedObjects(expectedResults, results, ""); } + @Test public void testGroupByWithExtractionDimFilterNullDims() { Map extractionMap = new HashMap<>(); diff --git a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java index 149c949b84d..acc7996c8ca 100644 --- a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java @@ -1611,7 +1611,6 @@ public class TopNQueryRunnerTest } - @Test public void testTopNDimExtractionFastTopNOptimalWithReplaceMissing() { @@ -3119,8 +3118,10 @@ public class TopNQueryRunnerTest ); assertExpectedResults(expectedResults, query); } + @Test - public void testAlphaNumericTopNWithNullPreviousStop(){ + public void testAlphaNumericTopNWithNullPreviousStop() + { TopNQuery query = new TopNQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryGranularity.ALL) @@ -3159,15 +3160,22 @@ public class TopNQueryRunnerTest LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource) - .granularity(QueryRunnerTestHelper.allGran) - .dimension(QueryRunnerTestHelper.marketDimension) - .metric("rows") - .threshold(3) - .intervals(QueryRunnerTestHelper.firstToThird) - .aggregators(QueryRunnerTestHelper.commonAggregators) - .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) - .filters(new ExtractionDimFilter(QueryRunnerTestHelper.marketDimension, "spot0", lookupExtractionFn, null)) - .build(); + .granularity(QueryRunnerTestHelper.allGran) + .dimension(QueryRunnerTestHelper.marketDimension) + .metric("rows") + .threshold(3) + .intervals(QueryRunnerTestHelper.firstToThird) + .aggregators(QueryRunnerTestHelper.commonAggregators) + .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) + .filters( + new ExtractionDimFilter( + QueryRunnerTestHelper.marketDimension, + "spot0", + lookupExtractionFn, + null + ) + ) + .build(); List> expectedResults = Arrays.asList( new Result<>( @@ -3190,10 +3198,10 @@ public class TopNQueryRunnerTest } @Test - public void testTopNWithExtractionFilterNoExistingValue() + public void testTopNWithExtractionFilterAndFilteredAggregatorCaseNoExistingValue() { Map extractionMap = new HashMap<>(); - extractionMap.put("","NULL"); + extractionMap.put("", "NULL"); MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); @@ -3205,11 +3213,19 @@ public class TopNQueryRunnerTest .metric(QueryRunnerTestHelper.indexMetric) .threshold(4) .intervals(QueryRunnerTestHelper.fullOnInterval) - .aggregators(Lists.newArrayList(Iterables.concat(QueryRunnerTestHelper.commonAggregators, Lists.newArrayList( - new FilteredAggregatorFactory(new DoubleMaxAggregatorFactory("maxIndex", "index"), - extractionFilter), - //new DoubleMaxAggregatorFactory("maxIndex", "index"), - new DoubleMinAggregatorFactory("minIndex", "index"))))) + .aggregators( + Lists.newArrayList( + Iterables.concat( + QueryRunnerTestHelper.commonAggregators, Lists.newArrayList( + new FilteredAggregatorFactory( + new DoubleMaxAggregatorFactory("maxIndex", "index"), + extractionFilter + ), + new DoubleMinAggregatorFactory("minIndex", "index") + ) + ) + ) + ) .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)); TopNQuery topNQueryWithNULLValueExtraction = topNQueryBuilder .filters(extractionFilter) From 7848429cbf58d9ca829e9b27abf55c68776b1fee Mon Sep 17 00:00:00 2001 From: Slim Bouguerra Date: Mon, 3 Aug 2015 14:50:52 -0500 Subject: [PATCH 4/7] unused imports --- .../server/namespace/KafkaExtractionNamespaceFactory.java | 3 --- .../server/namespace/KafkaExtractionNamespaceModule.java | 7 ------- 2 files changed, 10 deletions(-) diff --git a/extensions/kafka-extraction-namespace/src/main/java/io/druid/server/namespace/KafkaExtractionNamespaceFactory.java b/extensions/kafka-extraction-namespace/src/main/java/io/druid/server/namespace/KafkaExtractionNamespaceFactory.java index 9718a9a2f7d..28ee21fbc6b 100644 --- a/extensions/kafka-extraction-namespace/src/main/java/io/druid/server/namespace/KafkaExtractionNamespaceFactory.java +++ b/extensions/kafka-extraction-namespace/src/main/java/io/druid/server/namespace/KafkaExtractionNamespaceFactory.java @@ -24,13 +24,10 @@ import com.google.common.base.Strings; import com.google.inject.Inject; import io.druid.query.extraction.namespace.ExtractionNamespaceFunctionFactory; import io.druid.query.extraction.namespace.KafkaExtractionNamespace; -import io.druid.query.extraction.namespace.URIExtractionNamespace; -import io.druid.server.namespace.cache.NamespaceExtractionCacheManager; import javax.annotation.Nullable; import java.util.Map; import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentMap; /** * diff --git a/extensions/kafka-extraction-namespace/src/main/java/io/druid/server/namespace/KafkaExtractionNamespaceModule.java b/extensions/kafka-extraction-namespace/src/main/java/io/druid/server/namespace/KafkaExtractionNamespaceModule.java index d66f4016461..0edbeb47e52 100644 --- a/extensions/kafka-extraction-namespace/src/main/java/io/druid/server/namespace/KafkaExtractionNamespaceModule.java +++ b/extensions/kafka-extraction-namespace/src/main/java/io/druid/server/namespace/KafkaExtractionNamespaceModule.java @@ -19,7 +19,6 @@ package io.druid.server.namespace; -import com.fasterxml.jackson.core.Version; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.Module; import com.fasterxml.jackson.databind.ObjectMapper; @@ -27,19 +26,13 @@ import com.fasterxml.jackson.databind.module.SimpleModule; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.inject.Binder; -import com.google.inject.Injector; import com.google.inject.Provides; -import com.google.inject.TypeLiteral; -import com.google.inject.multibindings.MapBinder; import com.google.inject.name.Named; import io.druid.guice.LazySingleton; import io.druid.guice.LifecycleModule; import io.druid.guice.annotations.Json; import io.druid.initialization.DruidModule; -import io.druid.query.extraction.namespace.ExtractionNamespace; -import io.druid.query.extraction.namespace.ExtractionNamespaceFunctionFactory; import io.druid.query.extraction.namespace.KafkaExtractionNamespace; -import io.druid.server.namespace.cache.NamespaceExtractionCacheManager; import java.io.IOException; import java.util.List; From 64d638a386fd752056ea18193e96b7cff81de409 Mon Sep 17 00:00:00 2001 From: Slim Bouguerra Date: Tue, 4 Aug 2015 09:07:47 -0500 Subject: [PATCH 5/7] optimize makeMatcher --- docs/content/querying/aggregations.md | 2 +- docs/content/querying/filters.md | 5 +- .../column/SimpleDictionaryEncodedColumn.java | 1 + .../segment/filter/ExtractionFilter.java | 42 +- .../query/groupby/GroupByQueryRunnerTest.java | 510 ++++++++++-------- 5 files changed, 312 insertions(+), 248 deletions(-) diff --git a/docs/content/querying/aggregations.md b/docs/content/querying/aggregations.md index 4c487f4fda5..4ccd3c774ea 100644 --- a/docs/content/querying/aggregations.md +++ b/docs/content/querying/aggregations.md @@ -185,7 +185,7 @@ A filtered aggregator wraps any given aggregator, but only aggregates the values This makes it possible to compute the results of a filtered and an unfiltered aggregation simultaneously, without having to issue multiple queries, and use both results as part of post-aggregations. -*Limitations:* The filtered aggregator currently only supports 'or', 'and', 'selector' and 'not' filters, i.e. matching one or multiple dimensions against a single value. +*Limitations:* The filtered aggregator currently only supports 'or', 'and', 'selector', 'not' and 'Extraction' filters, i.e. matching one or multiple dimensions against a single value. *Note:* If only the filtered results are required, consider putting the filter on the query itself, which will be much faster since it does not require scanning all the data. diff --git a/docs/content/querying/filters.md b/docs/content/querying/filters.md index 9191af86127..bea6b31abee 100644 --- a/docs/content/querying/filters.md +++ b/docs/content/querying/filters.md @@ -105,10 +105,7 @@ The following matches dimension values in `[product_1, product_3, product_5]` fo "product_5": "bar_1", "product_3": "bar_1" } - }, - "replaceMissingValueWith": "", - "retainMissingValue": false, - "injective": false + } } } } diff --git a/processing/src/main/java/io/druid/segment/column/SimpleDictionaryEncodedColumn.java b/processing/src/main/java/io/druid/segment/column/SimpleDictionaryEncodedColumn.java index 9a7419f8929..0eb913eee5d 100644 --- a/processing/src/main/java/io/druid/segment/column/SimpleDictionaryEncodedColumn.java +++ b/processing/src/main/java/io/druid/segment/column/SimpleDictionaryEncodedColumn.java @@ -72,6 +72,7 @@ public class SimpleDictionaryEncodedColumn @Override public String lookupName(int id) { + //Empty to Null will ensure that null and empty are equivalent for extraction function return Strings.emptyToNull(cachedLookups.get(id)); } diff --git a/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java b/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java index 98e16ea9095..443def751d7 100644 --- a/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java @@ -31,6 +31,8 @@ import io.druid.segment.DimensionSelector; import io.druid.segment.data.Indexed; import io.druid.segment.data.IndexedInts; +import java.util.BitSet; +import java.util.Iterator; import java.util.List; /** @@ -50,8 +52,36 @@ public class ExtractionFilter implements Filter private List makeFilters(BitmapIndexSelector selector) { - final Indexed allDimVals = selector.getDimensionValues(dimension); + Indexed allDimVals = selector.getDimensionValues(dimension); final List filters = Lists.newArrayList(); + if (allDimVals == null) { + allDimVals = new Indexed() + { + @Override + public Iterator iterator() + { + return null; + } + + @Override + public Class getClazz() + { + return null; + } + + @Override + public int size() { return 1; } + + @Override + public String get(int index) { return null;} + + @Override + public int indexOf(String value) + { + return 0; + } + }; + } if (allDimVals != null) { for (int i = 0; i < allDimVals.size(); i++) { String dimVal = allDimVals.get(i); @@ -59,8 +89,6 @@ public class ExtractionFilter implements Filter filters.add(new SelectorFilter(dimension, dimVal)); } } - } else if (value.equals(Strings.nullToEmpty(fn.apply(null)))) { - filters.add(new SelectorFilter(dimension, null)); } return filters; } @@ -98,6 +126,12 @@ public class ExtractionFilter implements Filter if (dimensionSelector == null) { return new BooleanValueMatcher(value.equals(Strings.nullToEmpty(fn.apply(null)))); } else { + final BitSet bitSetOfIds = new BitSet(dimensionSelector.getValueCardinality()); + for (int i = 0; i < dimensionSelector.getValueCardinality(); i++) { + if (value.equals(Strings.nullToEmpty(fn.apply(dimensionSelector.lookupName(i))))) { + bitSetOfIds.set(i); + } + } return new ValueMatcher() { @Override @@ -106,7 +140,7 @@ public class ExtractionFilter implements Filter final IndexedInts row = dimensionSelector.getRow(); final int size = row.size(); for (int i = 0; i < size; ++i) { - if (value.equals(Strings.nullToEmpty(fn.apply(dimensionSelector.lookupName(row.get(i)))))) { + if (bitSetOfIds.get(row.get(i))) { return true; } } diff --git a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java index 8b00079e923..fa5f99064e2 100644 --- a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java @@ -47,6 +47,7 @@ import io.druid.query.TestQueryRunners; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.DoubleMaxAggregatorFactory; import io.druid.query.aggregation.DoubleSumAggregatorFactory; +import io.druid.query.aggregation.FilteredAggregatorFactory; import io.druid.query.aggregation.JavaScriptAggregatorFactory; import io.druid.query.aggregation.LongSumAggregatorFactory; import io.druid.query.aggregation.PostAggregator; @@ -245,243 +246,6 @@ public class GroupByQueryRunnerTest TestHelper.assertExpectedObjects(expectedResults, results, ""); } - @Test - public void testGroupByWithExtractionDimFilterOptimazitionWithEmptyResult() - { - Map extractionMap = new HashMap<>(); - extractionMap.put("automotive", "automotive0"); - extractionMap.put("business", "business0"); - extractionMap.put("entertainment", "entertainment0"); - extractionMap.put("health", "health0"); - extractionMap.put("mezzanine", "mezzanine0"); - extractionMap.put("news", "news0"); - extractionMap.put("premium", "premium0"); - extractionMap.put("technology", "technology0"); - extractionMap.put("travel", "travel0"); - - - MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); - LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); - - List dimFilters = Lists.newArrayList( - new ExtractionDimFilter("quality", "Missing_value", lookupExtractionFn, null), - new ExtractionDimFilter("quality", "business0", lookupExtractionFn, null), - new SelectorDimFilter("quality", "entertainment"), - new SelectorDimFilter("quality", "health"), - new ExtractionDimFilter("quality", "mezzanine0", lookupExtractionFn, null), - new ExtractionDimFilter("quality", "news0", lookupExtractionFn, null), - new SelectorDimFilter("quality", "premium"), - new SelectorDimFilter("quality", "technology"), - new SelectorDimFilter("quality", "travel") - ); - - - GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource) - .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) - .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) - .setAggregatorSpecs( - Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))) - .setGranularity(QueryRunnerTestHelper.dayGran) - .setDimFilter(Druids.newOrDimFilterBuilder().fields(dimFilters).build()) - .build(); - List expectedResults = Arrays.asList( - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 120L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L), - - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 112L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 113L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L)); - - Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); - TestHelper.assertExpectedObjects(expectedResults, results, ""); - - } - - @Test - public void testGroupByWithExtractionDimFilterOptimazitionOneToOne() - { - Map extractionMap = new HashMap<>(); - extractionMap.put("automotive", "automotive0"); - extractionMap.put("business", "business0"); - extractionMap.put("entertainment", "entertainment0"); - extractionMap.put("health", "health0"); - extractionMap.put("mezzanine", "mezzanine0"); - extractionMap.put("news", "news0"); - extractionMap.put("premium", "premium0"); - extractionMap.put("technology", "technology0"); - extractionMap.put("travel", "travel0"); - - - MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); - LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); - - List dimFilters = Lists.newArrayList( - new ExtractionDimFilter("quality", "automotive0", lookupExtractionFn, null), - new ExtractionDimFilter("quality", "business0", lookupExtractionFn, null), - new SelectorDimFilter("quality", "entertainment"), - new SelectorDimFilter("quality", "health"), - new ExtractionDimFilter("quality", "mezzanine0", lookupExtractionFn, null), - new ExtractionDimFilter("quality", "news0", lookupExtractionFn, null), - new SelectorDimFilter("quality", "premium"), - new SelectorDimFilter("quality", "technology"), - new SelectorDimFilter("quality", "travel") - ); - - - GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource) - .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) - .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) - .setAggregatorSpecs( - Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))) - .setGranularity(QueryRunnerTestHelper.dayGran) - .setDimFilter(Druids.newOrDimFilterBuilder().fields(dimFilters).build()) - .build(); - List expectedResults = Arrays.asList( - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 120L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L), - - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 1L, "idx", 147L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 112L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 113L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L)); - - Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); - TestHelper.assertExpectedObjects(expectedResults, results, ""); - - } - - @Test - public void testGroupByWithExtractionDimFilterOptimazitionManyToOne() - { - Map extractionMap = new HashMap<>(); - extractionMap.put("mezzanine", "newsANDmezzanine"); - extractionMap.put("news", "newsANDmezzanine"); - - MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); - LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); - GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource) - .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) - .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) - .setAggregatorSpecs( - Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))) - .setGranularity(QueryRunnerTestHelper.dayGran) - .setDimFilter(new ExtractionDimFilter("quality", "newsANDmezzanine", lookupExtractionFn, null)) - .build(); - List expectedResults = Arrays.asList( - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L)); - - Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); - TestHelper.assertExpectedObjects(expectedResults, results, ""); - } - - @Test - public void testGroupByWithExtractionDimFilterCaseNullValue() - { - Map extractionMap = new HashMap<>(); - extractionMap.put("automotive", "automotive0"); - extractionMap.put("business", "business0"); - extractionMap.put("entertainment", "entertainment0"); - extractionMap.put("health", "health0"); - extractionMap.put("mezzanine", ""); - extractionMap.put("news", null); - extractionMap.put("premium", "premium0"); - extractionMap.put("technology", "technology0"); - extractionMap.put("travel", "travel0"); - - - MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); - LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); - GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource) - .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) - .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) - .setAggregatorSpecs( - Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))) - .setGranularity(QueryRunnerTestHelper.dayGran) - .setDimFilter(new ExtractionDimFilter("quality", "", lookupExtractionFn, null)) - .build(); - List expectedResults = Arrays.asList( - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L)); - - Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); - TestHelper.assertExpectedObjects(expectedResults, results, ""); - } - - @Test public void testGroupByWithExtractionDimFilterWhenValueNotThere() - { - Map extractionMap = new HashMap<>(); - extractionMap.put("mezzanine", ""); - extractionMap.put("news", null); - - MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); - LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); - - GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource) - .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) - .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) - .setAggregatorSpecs( - Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index")) - ) - .setGranularity(QueryRunnerTestHelper.dayGran) - .setDimFilter(new ExtractionDimFilter("quality", "NOT_THERE", lookupExtractionFn, null)).build(); - List expectedResults = Arrays.asList(); - - Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); - TestHelper.assertExpectedObjects(expectedResults, results, ""); - } - - - @Test public void testGroupByWithExtractionDimFilterNullDims() - { - Map extractionMap = new HashMap<>(); - extractionMap.put("", "EMPTY"); - - MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); - LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); - - GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource) - .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) - .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("null_column", "alias"))) - .setAggregatorSpecs( - Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))) - .setGranularity(QueryRunnerTestHelper.dayGran) - .setDimFilter(new ExtractionDimFilter("null_column", "EMPTY", lookupExtractionFn, null)).build(); - List expectedResults = Arrays - .asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", null, "rows", 13L, "idx", 6619L), - GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", null, "rows", 13L, "idx", 5827L)); - - Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); - TestHelper.assertExpectedObjects(expectedResults, results, ""); - } - @Test public void testGroupByWithRebucketRename() { @@ -558,7 +322,6 @@ public class GroupByQueryRunnerTest } - @Test public void testGroupByWithSimpleRenameRetainMissingNonInjective() { @@ -731,7 +494,10 @@ public class GroupByQueryRunnerTest .setDimensions( Lists.newArrayList( new ExtractionDimensionSpec( - "quality", "alias", new LookupExtractionFn(new MapLookupExtractor(map), false, "MISSING", true), null + "quality", + "alias", + new LookupExtractionFn(new MapLookupExtractor(map), false, "MISSING", true), + null ) ) ) @@ -4024,4 +3790,270 @@ public class GroupByQueryRunnerTest TestHelper.assertExpectedObjects(bySegmentResults, theRunner.run(fullQuery, Maps.newHashMap()), ""); exec.shutdownNow(); } + + // Extraction Filters testing + + @Test + public void testGroupByWithExtractionDimFilter() + { + Map extractionMap = new HashMap<>(); + extractionMap.put("automotive", "automotiveAndBusinessAndNewsAndMezzanine"); + extractionMap.put("business", "automotiveAndBusinessAndNewsAndMezzanine"); + extractionMap.put("mezzanine", "automotiveAndBusinessAndNewsAndMezzanine"); + extractionMap.put("news", "automotiveAndBusinessAndNewsAndMezzanine"); + + MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); + LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); + + List dimFilters = Lists.newArrayList( + new ExtractionDimFilter("quality", "automotiveAndBusinessAndNewsAndMezzanine", lookupExtractionFn, null), + new SelectorDimFilter("quality", "entertainment"), + new SelectorDimFilter("quality", "health"), + new SelectorDimFilter("quality", "premium"), + new SelectorDimFilter("quality", "technology"), + new SelectorDimFilter("quality", "travel") + ); + + GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions( + Lists.newArrayList( + new DefaultDimensionSpec( + "quality", + "alias" + ) + ) + ) + .setAggregatorSpecs( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index") + ) + ) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter(Druids.newOrDimFilterBuilder().fields(dimFilters).build()) + .build(); + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 120L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L), + + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 1L, "idx", 147L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 112L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 113L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L) + ); + + Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + TestHelper.assertExpectedObjects(expectedResults, results, ""); + + } + + @Test + public void testGroupByWithExtractionDimFilterCaseMappingValueIsNullOrEmpty() + { + Map extractionMap = new HashMap<>(); + extractionMap.put("automotive", "automotive0"); + extractionMap.put("business", "business0"); + extractionMap.put("entertainment", "entertainment0"); + extractionMap.put("health", "health0"); + extractionMap.put("mezzanine", null); + extractionMap.put("news", ""); + extractionMap.put("premium", "premium0"); + extractionMap.put("technology", "technology0"); + extractionMap.put("travel", "travel0"); + + MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); + LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); + GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions( + Lists.newArrayList( + new DefaultDimensionSpec( + "quality", + "alias" + ) + ) + ) + .setAggregatorSpecs( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index") + ) + ) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter(new ExtractionDimFilter("quality", "", lookupExtractionFn, null)) + .build(); + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L) + ); + + Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + TestHelper.assertExpectedObjects(expectedResults, results, ""); + } + + @Test + public void testGroupByWithExtractionDimFilterWhenSearchValueNotInTheMap() + { + Map extractionMap = new HashMap<>(); + MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); + LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); + + GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions( + Lists.newArrayList( + new DefaultDimensionSpec( + "quality", + "alias" + ) + ) + ) + .setAggregatorSpecs( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index") + ) + ) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter( + new ExtractionDimFilter( + "quality", + "NOT_THERE", + lookupExtractionFn, + null + ) + ).build(); + List expectedResults = Arrays.asList(); + + Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + TestHelper.assertExpectedObjects(expectedResults, results, ""); + } + + + @Test + public void testGroupByWithExtractionDimFilterKeyisNull() + { + Map extractionMap = new HashMap<>(); + extractionMap.put("", "NULLorEMPTY"); + + MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); + LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true); + + GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions( + Lists.newArrayList( + new DefaultDimensionSpec( + "null_column", + "alias" + ) + ) + ) + .setAggregatorSpecs( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index") + ) + ) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter( + new ExtractionDimFilter( + "null_column", + "NULLorEMPTY", + lookupExtractionFn, + null + ) + ).build(); + List expectedResults = Arrays + .asList( + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", null, "rows", 13L, "idx", 6619L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", null, "rows", 13L, "idx", 5827L) + ); + + Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + TestHelper.assertExpectedObjects(expectedResults, results, ""); + } + + @Test + public void testGroupByWithAggregatorFilterAndExtractionFunction() + { + Map extractionMap = new HashMap<>(); + extractionMap.put("automotive", "automotive0"); + extractionMap.put("business", "business0"); + extractionMap.put("entertainment", "entertainment0"); + extractionMap.put("health", "health0"); + extractionMap.put("mezzanine", "mezzanineANDnews"); + extractionMap.put("news", "mezzanineANDnews"); + extractionMap.put("premium", "premium0"); + extractionMap.put("technology", "technology0"); + extractionMap.put("travel", "travel0"); + + MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap); + LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, "missing", true); + DimFilter filter = new ExtractionDimFilter("quality","mezzanineANDnews",lookupExtractionFn,null); + GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions( + Lists.newArrayList( + new DefaultDimensionSpec( + "quality", + "alias" + ) + ) + ) + .setAggregatorSpecs( + Arrays.asList( + new FilteredAggregatorFactory(QueryRunnerTestHelper.rowsCount, filter), + (AggregatorFactory) new FilteredAggregatorFactory( + new LongSumAggregatorFactory( + "idx", + "index" + ), filter + ) + ) + ) + .setGranularity(QueryRunnerTestHelper.dayGran) + .build(); + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 0L, "idx", 0L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 0L, "idx", 0L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 0L, "idx", 0L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 0L, "idx", 0L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 0L, "idx", 0L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 0L, "idx", 0L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 0L, "idx", 0L), + + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 0L, "idx", 0L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 0L, "idx", 0L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 0L, "idx", 0L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 0L, "idx", 0L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 0L, "idx", 0L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 0L, "idx", 0L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 0L, "idx", 0L) + ); + + Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + TestHelper.assertExpectedObjects(expectedResults, results, ""); + + } + } From f0bc362981cdefd38eb93f266a51ed1d979c1246 Mon Sep 17 00:00:00 2001 From: Slim Bouguerra Date: Fri, 7 Aug 2015 12:38:41 -0500 Subject: [PATCH 6/7] clean code if is not needed anymore --- .../io/druid/segment/filter/ExtractionFilter.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java b/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java index 443def751d7..c47a8720a06 100644 --- a/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java @@ -82,14 +82,14 @@ public class ExtractionFilter implements Filter } }; } - if (allDimVals != null) { - for (int i = 0; i < allDimVals.size(); i++) { - String dimVal = allDimVals.get(i); - if (value.equals(Strings.nullToEmpty(fn.apply(dimVal)))) { - filters.add(new SelectorFilter(dimension, dimVal)); - } + + for (int i = 0; i < allDimVals.size(); i++) { + String dimVal = allDimVals.get(i); + if (value.equals(Strings.nullToEmpty(fn.apply(dimVal)))) { + filters.add(new SelectorFilter(dimension, dimVal)); } } + return filters; } From 7549f02578080968983f609f8715cc6c7e9d505f Mon Sep 17 00:00:00 2001 From: Slim Bouguerra Date: Mon, 17 Aug 2015 15:09:37 -0500 Subject: [PATCH 7/7] support the case filter value is null --- .../main/java/io/druid/query/filter/ExtractionDimFilter.java | 1 - .../src/main/java/io/druid/segment/filter/ExtractionFilter.java | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/processing/src/main/java/io/druid/query/filter/ExtractionDimFilter.java b/processing/src/main/java/io/druid/query/filter/ExtractionDimFilter.java index 7de7d51c8db..329ee1e6d0f 100644 --- a/processing/src/main/java/io/druid/query/filter/ExtractionDimFilter.java +++ b/processing/src/main/java/io/druid/query/filter/ExtractionDimFilter.java @@ -43,7 +43,6 @@ public class ExtractionDimFilter implements DimFilter ) { Preconditions.checkArgument(dimension != null, "dimension must not be null"); - Preconditions.checkArgument(value != null, "value must not be null"); Preconditions.checkArgument(extractionFn != null || dimExtractionFn != null, "extraction function must not be null"); this.dimension = dimension; diff --git a/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java b/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java index c47a8720a06..252209af1b9 100644 --- a/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java @@ -46,7 +46,7 @@ public class ExtractionFilter implements Filter public ExtractionFilter(String dimension, String value, ExtractionFn fn) { this.dimension = dimension; - this.value = value; + this.value = Strings.nullToEmpty(value); this.fn = fn; }