From be30d450d7481a9d0f047b8c7582b7022c49451c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20L=C3=A9aut=C3=A9?= Date: Wed, 9 Apr 2014 17:16:57 -0700 Subject: [PATCH] support dimension extraction functions in group by --- .../query/groupby/GroupByQueryEngine.java | 8 +++- .../query/groupby/GroupByQueryRunnerTest.java | 44 +++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java b/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java index b8f63a5c4a5..888a034b9ae 100644 --- a/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java +++ b/processing/src/main/java/io/druid/query/groupby/GroupByQueryEngine.java @@ -44,6 +44,7 @@ import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.BufferAggregator; import io.druid.query.aggregation.PostAggregator; import io.druid.query.dimension.DimensionSpec; +import io.druid.query.extraction.DimExtractionFn; import io.druid.segment.Cursor; import io.druid.segment.DimensionSelector; import io.druid.segment.StorageAdapter; @@ -398,9 +399,14 @@ public class GroupByQueryEngine ByteBuffer keyBuffer = input.getKey().duplicate(); for (int i = 0; i < dimensions.size(); ++i) { final DimensionSelector dimSelector = dimensions.get(i); + final DimExtractionFn fn = dimensionSpecs.get(i).getDimExtractionFn(); final int dimVal = keyBuffer.getInt(); if (dimSelector.getValueCardinality() != dimVal) { - theEvent.put(dimNames.get(i), dimSelector.lookupName(dimVal)); + if(fn != null) { + theEvent.put(dimNames.get(i), fn.apply(dimSelector.lookupName(dimVal))); + } else { + theEvent.put(dimNames.get(i), dimSelector.lookupName(dimVal)); + } } } diff --git a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java index f336a26884e..c70492e3340 100644 --- a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java @@ -45,6 +45,9 @@ import io.druid.query.aggregation.LongSumAggregatorFactory; import io.druid.query.aggregation.MaxAggregatorFactory; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; +import io.druid.query.dimension.ExtractionDimensionSpec; +import io.druid.query.extraction.PartialDimExtractionFn; +import io.druid.query.extraction.RegexDimExtractionFn; import io.druid.query.filter.JavaScriptDimFilter; import io.druid.query.filter.RegexDimFilter; import io.druid.query.groupby.having.EqualToHavingSpec; @@ -178,6 +181,47 @@ public class GroupByQueryRunnerTest TestHelper.assertExpectedObjects(expectedResults, results, ""); } + @Test + public void testGroupByWithDimExtractionFn() + { + GroupByQuery query = GroupByQuery + .builder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(Lists.newArrayList(new ExtractionDimensionSpec("quality", "alias", new RegexDimExtractionFn("(\\w{1})")))) + .setAggregatorSpecs( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index") + ) + ) + .setGranularity(QueryRunnerTestHelper.dayGran) + .build(); + + List expectedResults = Arrays.asList( + createExpectedRow("2011-04-01", "alias", "a", "rows", 1L, "idx", 135L), + createExpectedRow("2011-04-01", "alias", "b", "rows", 1L, "idx", 118L), + createExpectedRow("2011-04-01", "alias", "e", "rows", 1L, "idx", 158L), + createExpectedRow("2011-04-01", "alias", "h", "rows", 1L, "idx", 120L), + createExpectedRow("2011-04-01", "alias", "m", "rows", 3L, "idx", 2870L), + createExpectedRow("2011-04-01", "alias", "n", "rows", 1L, "idx", 121L), + createExpectedRow("2011-04-01", "alias", "p", "rows", 3L, "idx", 2900L), + createExpectedRow("2011-04-01", "alias", "t", "rows", 2L, "idx", 197L), + + createExpectedRow("2011-04-02", "alias", "a", "rows", 1L, "idx", 147L), + createExpectedRow("2011-04-02", "alias", "b", "rows", 1L, "idx", 112L), + createExpectedRow("2011-04-02", "alias", "e", "rows", 1L, "idx", 166L), + createExpectedRow("2011-04-02", "alias", "h", "rows", 1L, "idx", 113L), + createExpectedRow("2011-04-02", "alias", "m", "rows", 3L, "idx", 2447L), + createExpectedRow("2011-04-02", "alias", "n", "rows", 1L, "idx", 114L), + createExpectedRow("2011-04-02", "alias", "p", "rows", 3L, "idx", 2505L), + createExpectedRow("2011-04-02", "alias", "t", "rows", 2L, "idx", 223L) + ); + + Iterable results = runQuery(query); + TestHelper.assertExpectedObjects(expectedResults, results, ""); + } + @Test public void testGroupByWithTimeZone() {