From fdc7e88a7d963ac68ac40290818633768ad495b7 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 6 Jul 2016 08:08:54 -0700 Subject: [PATCH] Allow queries with no aggregators. (#3216) This is actually reasonable for a groupBy or lexicographic topNs that is being used to do a "COUNT DISTINCT" kind of query. No aggregators are needed for that query, and including a dummy aggregator wastes 8 bytes per row. It's kind of silly for timeseries, but why not. --- docs/content/querying/groupbyquery.md | 2 +- docs/content/querying/timeseriesquery.md | 2 +- docs/content/querying/topnquery.md | 2 +- .../src/main/java/io/druid/query/Queries.java | 1 - .../io/druid/query/groupby/GroupByQuery.java | 3 +- .../query/timeseries/TimeseriesQuery.java | 2 +- .../druid/query/topn/PooledTopNAlgorithm.java | 2 +- .../java/io/druid/query/topn/TopNQuery.java | 2 +- .../query/groupby/GroupByQueryRunnerTest.java | 37 +++++++++++++++++++ .../timeseries/TimeseriesQueryRunnerTest.java | 34 +++++++++++++++++ .../druid/query/topn/TopNQueryRunnerTest.java | 33 +++++++++++++++++ 11 files changed, 111 insertions(+), 9 deletions(-) diff --git a/docs/content/querying/groupbyquery.md b/docs/content/querying/groupbyquery.md index 62000c561fd..692339b267f 100644 --- a/docs/content/querying/groupbyquery.md +++ b/docs/content/querying/groupbyquery.md @@ -67,7 +67,7 @@ There are 11 main parts to a groupBy query: |having|See [Having](../querying/having.html).|no| |granularity|Defines the granularity of the query. See [Granularities](../querying/granularities.html)|yes| |filter|See [Filters](../querying/filters.html)|no| -|aggregations|See [Aggregations](../querying/aggregations.html)|yes| +|aggregations|See [Aggregations](../querying/aggregations.html)|no| |postAggregations|See [Post Aggregations](../querying/post-aggregations.html)|no| |intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes| |context|An additional JSON Object which can be used to specify certain flags.|no| diff --git a/docs/content/querying/timeseriesquery.md b/docs/content/querying/timeseriesquery.md index 955439575a2..08175c7f264 100644 --- a/docs/content/querying/timeseriesquery.md +++ b/docs/content/querying/timeseriesquery.md @@ -54,7 +54,7 @@ There are 7 main parts to a timeseries query: |intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes| |granularity|Defines the granularity to bucket query results. See [Granularities](../querying/granularities.html)|yes| |filter|See [Filters](../querying/filters.html)|no| -|aggregations|See [Aggregations](../querying/aggregations.html)|yes| +|aggregations|See [Aggregations](../querying/aggregations.html)|no| |postAggregations|See [Post Aggregations](../querying/post-aggregations.html)|no| |context|See [Context](../querying/query-context.html)|no| diff --git a/docs/content/querying/topnquery.md b/docs/content/querying/topnquery.md index d0acd4630e0..fe19b83e32b 100644 --- a/docs/content/querying/topnquery.md +++ b/docs/content/querying/topnquery.md @@ -79,7 +79,7 @@ There are 11 parts to a topN query. |intervals|A JSON Object representing ISO-8601 Intervals. This defines the time ranges to run the query over.|yes| |granularity|Defines the granularity to bucket query results. See [Granularities](../querying/granularities.html)|yes| |filter|See [Filters](../querying/filters.html)|no| -|aggregations|See [Aggregations](../querying/aggregations.html)|yes| +|aggregations|See [Aggregations](../querying/aggregations.html)|no| |postAggregations|See [Post Aggregations](../querying/post-aggregations.html)|no| |dimension|A String or JSON object defining the dimension that you want the top taken for. For more info, see [DimensionSpecs](../querying/dimensionspecs.html)|yes| |threshold|An integer defining the N in the topN (i.e. how many results you want in the top list)|yes| diff --git a/processing/src/main/java/io/druid/query/Queries.java b/processing/src/main/java/io/druid/query/Queries.java index 014eb9ae1e4..26afcc1bfc8 100644 --- a/processing/src/main/java/io/druid/query/Queries.java +++ b/processing/src/main/java/io/druid/query/Queries.java @@ -37,7 +37,6 @@ public class Queries ) { Preconditions.checkNotNull(aggFactories, "aggregations cannot be null"); - Preconditions.checkArgument(aggFactories.size() > 0, "Must have at least one AggregatorFactory"); final Set aggNames = Sets.newHashSet(); for (AggregatorFactory aggFactory : aggFactories) { diff --git a/processing/src/main/java/io/druid/query/groupby/GroupByQuery.java b/processing/src/main/java/io/druid/query/groupby/GroupByQuery.java index f19c53754c7..c76274e7499 100644 --- a/processing/src/main/java/io/druid/query/groupby/GroupByQuery.java +++ b/processing/src/main/java/io/druid/query/groupby/GroupByQuery.java @@ -98,13 +98,12 @@ public class GroupByQuery extends BaseQuery for (DimensionSpec spec : this.dimensions) { Preconditions.checkArgument(spec != null, "dimensions has null DimensionSpec"); } - this.aggregatorSpecs = aggregatorSpecs; + this.aggregatorSpecs = aggregatorSpecs == null ? ImmutableList.of() : aggregatorSpecs; this.postAggregatorSpecs = postAggregatorSpecs == null ? ImmutableList.of() : postAggregatorSpecs; this.havingSpec = havingSpec; this.limitSpec = (limitSpec == null) ? new NoopLimitSpec() : limitSpec; Preconditions.checkNotNull(this.granularity, "Must specify a granularity"); - Preconditions.checkNotNull(this.aggregatorSpecs, "Must specify at least one aggregator"); Queries.verifyAggregations(this.aggregatorSpecs, this.postAggregatorSpecs); Function, Sequence> postProcFn = diff --git a/processing/src/main/java/io/druid/query/timeseries/TimeseriesQuery.java b/processing/src/main/java/io/druid/query/timeseries/TimeseriesQuery.java index 60c32e482a6..7e3af2167fc 100644 --- a/processing/src/main/java/io/druid/query/timeseries/TimeseriesQuery.java +++ b/processing/src/main/java/io/druid/query/timeseries/TimeseriesQuery.java @@ -62,7 +62,7 @@ public class TimeseriesQuery extends BaseQuery> super(dataSource, querySegmentSpec, descending, context); this.dimFilter = dimFilter; this.granularity = granularity; - this.aggregatorSpecs = aggregatorSpecs; + this.aggregatorSpecs = aggregatorSpecs == null ? ImmutableList.of() : aggregatorSpecs; this.postAggregatorSpecs = postAggregatorSpecs == null ? ImmutableList.of() : postAggregatorSpecs; Queries.verifyAggregations(this.aggregatorSpecs, this.postAggregatorSpecs); diff --git a/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java index 521d1903e55..e96162052c6 100644 --- a/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java @@ -97,7 +97,7 @@ public class PooledTopNAlgorithm numBytesPerRecord += aggregatorSizes[i]; } - final int numValuesPerPass = numBytesToWorkWith / numBytesPerRecord; + final int numValuesPerPass = numBytesPerRecord > 0 ? numBytesToWorkWith / numBytesPerRecord : cardinality; return PooledTopNParams.builder() .withDimSelector(dimSelector) diff --git a/processing/src/main/java/io/druid/query/topn/TopNQuery.java b/processing/src/main/java/io/druid/query/topn/TopNQuery.java index c06a7134add..71f4c23d3bd 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNQuery.java +++ b/processing/src/main/java/io/druid/query/topn/TopNQuery.java @@ -73,7 +73,7 @@ public class TopNQuery extends BaseQuery> this.dimFilter = dimFilter; this.granularity = granularity; - this.aggregatorSpecs = aggregatorSpecs; + this.aggregatorSpecs = aggregatorSpecs == null ? ImmutableList.of() : aggregatorSpecs; this.postAggregatorSpecs = postAggregatorSpecs == null ? ImmutableList.of() : postAggregatorSpecs; Preconditions.checkNotNull(dimensionSpec, "dimensionSpec can't be null"); diff --git a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java index 7c5365ab432..983cced4434 100644 --- a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java @@ -333,6 +333,43 @@ public class GroupByQueryRunnerTest TestHelper.assertExpectedObjects(expectedResults, results, ""); } + @Test + public void testGroupByNoAggregators() + { + GroupByQuery query = GroupByQuery + .builder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) + .setGranularity(QueryRunnerTestHelper.dayGran) + .build(); + + List expectedResults = Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive"), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business"), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment"), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health"), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine"), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news"), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium"), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology"), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel"), + + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive"), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business"), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment"), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health"), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine"), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news"), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium"), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology"), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel") + ); + + Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + TestHelper.assertExpectedObjects(expectedResults, results, ""); + } + @Test public void testMultiValueDimension() { diff --git a/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java b/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java index 00741cc60be..86dc97e2c3b 100644 --- a/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java @@ -183,6 +183,40 @@ public class TimeseriesQueryRunnerTest Assert.assertEquals(lastResult.toString(), expectedLast, lastResult.getTimestamp()); } + @Test + public void testTimeseriesNoAggregators() + { + QueryGranularity gran = QueryGranularities.DAY; + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(gran) + .intervals(QueryRunnerTestHelper.fullOnInterval) + .descending(descending) + .build(); + + Iterable> results = Sequences.toList( + runner.run(query, CONTEXT), + Lists.>newArrayList() + ); + + final DateTime expectedLast = descending ? + QueryRunnerTestHelper.earliest : + QueryRunnerTestHelper.last; + + Result lastResult = null; + for (Result result : results) { + DateTime current = result.getTimestamp(); + Assert.assertFalse( + String.format("Timestamp[%s] > expectedLast[%s]", current, expectedLast), + descending ? current.isBefore(expectedLast) : current.isAfter(expectedLast) + ); + Assert.assertEquals(ImmutableMap.of(), result.getValue().getBaseObject()); + lastResult = result; + } + + Assert.assertEquals(lastResult.toString(), expectedLast, lastResult.getTimestamp()); + } + @Test public void testFullOnTimeseriesMaxMin() { diff --git a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java index 8a2ac0d03d1..e63467cc9ed 100644 --- a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java @@ -1339,6 +1339,39 @@ public class TopNQueryRunnerTest assertExpectedResults(expectedResults, query); } + @Test + public void testTopNLexicographicNoAggregators() + { + TopNQuery query = new TopNQueryBuilder() + .dataSource(QueryRunnerTestHelper.dataSource) + .granularity(QueryRunnerTestHelper.allGran) + .dimension(QueryRunnerTestHelper.marketDimension) + .metric(new LexicographicTopNMetricSpec("")) + .threshold(4) + .intervals(QueryRunnerTestHelper.firstToThird) + .build(); + + List> expectedResults = Arrays.asList( + new Result<>( + new DateTime("2011-04-01T00:00:00.000Z"), + new TopNResultValue( + Arrays.>asList( + ImmutableMap.of( + QueryRunnerTestHelper.marketDimension, "spot" + ), + ImmutableMap.of( + QueryRunnerTestHelper.marketDimension, "total_market" + ), + ImmutableMap.of( + QueryRunnerTestHelper.marketDimension, "upfront" + ) + ) + ) + ) + ); + assertExpectedResults(expectedResults, query); + } + @Test public void testTopNLexicographicWithPreviousStop() {