From f4d0ea7bc82bb185a467f7f9fa2a085068995573 Mon Sep 17 00:00:00 2001 From: Abhishek Radhakrishnan Date: Tue, 18 Jul 2023 12:37:10 -0700 Subject: [PATCH] Add support for earliest `aggregatorMergeStrategy` (#14598) * Add EARLIEST aggregator merge strategy. - More unit tests. - Include the aggregators analysis type by default in tests. * Docs. * Some comments and a test * Collapse into individual code blocks. --- docs/querying/segmentmetadataquery.md | 12 +- .../SegmentMetadataQueryQueryToolChest.java | 17 +- .../metadata/AggregatorMergeStrategy.java | 1 + ...egmentMetadataQueryQueryToolChestTest.java | 416 +++++++++++++++++- .../metadata/SegmentMetadataQueryTest.java | 71 +-- 5 files changed, 459 insertions(+), 58 deletions(-) diff --git a/docs/querying/segmentmetadataquery.md b/docs/querying/segmentmetadataquery.md index 22176ee2647..3e1b4a5a245 100644 --- a/docs/querying/segmentmetadataquery.md +++ b/docs/querying/segmentmetadataquery.md @@ -62,7 +62,7 @@ There are several main parts to a segment metadata query: |merge|Merge all individual segment metadata results into a single result|no| |context|See [Context](../querying/query-context.md)|no| |analysisTypes|A list of Strings specifying what column properties (e.g. cardinality, size) should be calculated and returned in the result. Defaults to ["cardinality", "interval", "minmax"], but can be overridden with using the [segment metadata query config](../configuration/index.md#segmentmetadata-query-config). See section [analysisTypes](#analysistypes) for more details.|no| -|aggregatorMergeStrategy| The strategy Druid uses to merge aggregators across segments. If true and if the `aggregators` analysis type is enabled, `aggregatorMergeStrategy` defaults to `strict`. Possible values include `strict`, `lenient`, and `latest`. See [`aggregatorMergeStrategy`](#aggregatormergestrategy) for details.|no| +|aggregatorMergeStrategy| The strategy Druid uses to merge aggregators across segments. If true and if the `aggregators` analysis type is enabled, `aggregatorMergeStrategy` defaults to `strict`. Possible values include `strict`, `lenient`, `earliest`, and `latest`. See [`aggregatorMergeStrategy`](#aggregatormergestrategy) for details.|no| |lenientAggregatorMerge|Deprecated. Use `aggregatorMergeStrategy` property instead. If true, and if the `aggregators` analysis type is enabled, Druid merges aggregators leniently.|no| The format of the result is: @@ -186,7 +186,7 @@ Currently, there is no API for retrieving this information. * `aggregators` in the result will contain the list of aggregators usable for querying metric columns. This may be null if the aggregators are unknown or unmergeable (if merging is enabled). -* Merging can be `strict`, `lenient`, or `latest`. See [`aggregatorMergeStrategy`](#aggregatormergestrategy) for details. +* Merging can be `strict`, `lenient`, `earliest`, or `latest`. See [`aggregatorMergeStrategy`](#aggregatormergestrategy) for details. * The form of the result is a map of column name to aggregator. @@ -201,10 +201,12 @@ Conflicts between aggregator metadata across segments can occur if some segments two segments use incompatible aggregators for the same column, such as `longSum` changed to `doubleSum`. Druid supports the following aggregator merge strategies: -- `strict`: If there are any segments with unknown aggregators or any conflicts of any kind, the merged aggregators +- `strict`: If there are any segments with unknown aggregators or any conflicts of any kind, the merged aggregators list is `null`. -- `lenient`: Druid ignores segments with unknown aggregators. Conflicts between aggregators set the aggregator for that particular column to null. -- the aggregator for that particular column. +- `lenient`: Druid ignores segments with unknown aggregators. Conflicts between aggregators set the aggregator for + that particular column to null. +- `earliest`: In the event of conflicts between segments, Druid selects the aggregator from the earliest segment + for that particular column. - `latest`: In the event of conflicts between segments, Druid selects the aggregator from the most recent segment for that particular column. diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java index 655b95e503f..4bb73d845ec 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java @@ -277,6 +277,7 @@ public class SegmentMetadataQueryQueryToolChest extends QueryToolChest entry : analysis.getAggregators().entrySet()) { + aggregators.putIfAbsent(entry.getKey(), entry.getValue()); + } + } + } } else if (AggregatorMergeStrategy.LATEST == aggregatorMergeStrategy) { // The segment analyses are already ordered above, where arg1 is the analysis pertaining to the latest interval - // followed by arg2. + // followed by arg2. So for latest strategy, the iteration order should be arg1 and arg2. for (SegmentAnalysis analysis : ImmutableList.of(arg1, arg2)) { if (analysis.getAggregators() != null) { for (Map.Entry entry : analysis.getAggregators().entrySet()) { - final String aggregatorName = entry.getKey(); - final AggregatorFactory aggregator = entry.getValue(); - aggregators.putIfAbsent(aggregatorName, aggregator); + aggregators.putIfAbsent(entry.getKey(), entry.getValue()); } } } diff --git a/processing/src/main/java/org/apache/druid/query/metadata/metadata/AggregatorMergeStrategy.java b/processing/src/main/java/org/apache/druid/query/metadata/metadata/AggregatorMergeStrategy.java index d7c013131ab..9155bdab347 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/metadata/AggregatorMergeStrategy.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/metadata/AggregatorMergeStrategy.java @@ -28,6 +28,7 @@ public enum AggregatorMergeStrategy { STRICT, LENIENT, + EARLIEST, LATEST; @JsonValue diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java index 4e77087c796..1926e14eae5 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java @@ -32,6 +32,7 @@ import org.apache.druid.query.CacheStrategy; import org.apache.druid.query.DataSource; import org.apache.druid.query.Druids; import org.apache.druid.query.TableDataSource; +import org.apache.druid.query.UnionDataSource; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory; import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; @@ -62,8 +63,18 @@ import java.util.stream.Collectors; public class SegmentMetadataQueryQueryToolChestTest { private static final DataSource TEST_DATASOURCE = new TableDataSource("dummy"); - private static final SegmentId TEST_SEGMENT_ID1 = SegmentId.of(TEST_DATASOURCE.toString(), Intervals.of("2020-01-01/2020-01-02"), "test", 0); - private static final SegmentId TEST_SEGMENT_ID2 = SegmentId.of(TEST_DATASOURCE.toString(), Intervals.of("2021-01-01/2021-01-02"), "test", 0); + private static final SegmentId TEST_SEGMENT_ID1 = SegmentId.of( + TEST_DATASOURCE.toString(), + Intervals.of("2020-01-01/2020-01-02"), + "test", + 0 + ); + private static final SegmentId TEST_SEGMENT_ID2 = SegmentId.of( + TEST_DATASOURCE.toString(), + Intervals.of("2021-01-01/2021-01-02"), + "test", + 0 + ); @Test public void testCacheStrategy() throws Exception @@ -162,19 +173,19 @@ public class SegmentMetadataQueryQueryToolChestTest Assert.assertEquals( new SegmentAnalysis( - "dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged", - null, - new LinkedHashMap<>(), - 0, - 0, - ImmutableMap.of( - "foo", new LongSumAggregatorFactory("foo", "foo"), - "bar", new DoubleSumAggregatorFactory("bar", "bar"), - "baz", new DoubleSumAggregatorFactory("baz", "baz") - ), - null, - null, - null + "dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged", + null, + new LinkedHashMap<>(), + 0, + 0, + ImmutableMap.of( + "foo", new LongSumAggregatorFactory("foo", "foo"), + "bar", new DoubleSumAggregatorFactory("bar", "bar"), + "baz", new DoubleSumAggregatorFactory("baz", "baz") + ), + null, + null, + null ), mergeStrict(analysis1, analysis2) ); @@ -198,6 +209,25 @@ public class SegmentMetadataQueryQueryToolChestTest mergeLenient(analysis1, analysis2) ); + Assert.assertEquals( + new SegmentAnalysis( + "dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged", + null, + new LinkedHashMap<>(), + 0, + 0, + ImmutableMap.of( + "foo", new LongSumAggregatorFactory("foo", "foo"), + "bar", new DoubleSumAggregatorFactory("bar", "bar"), + "baz", new DoubleSumAggregatorFactory("baz", "baz") + ), + null, + null, + null + ), + mergeEarliest(analysis1, analysis2) + ); + Assert.assertEquals( new SegmentAnalysis( "dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged", @@ -292,6 +322,25 @@ public class SegmentMetadataQueryQueryToolChestTest mergeLenient(analysis1, analysis2) ); + Assert.assertEquals( + new SegmentAnalysis( + "dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged", + expectedIntervals, + new LinkedHashMap<>(), + 0, + 0, + ImmutableMap.of( + "foo", new LongSumAggregatorFactory("foo", "foo"), + "baz", new DoubleSumAggregatorFactory("baz", "baz"), + "bar", new DoubleSumAggregatorFactory("bar", "bar") + ), + null, + null, + null + ), + mergeEarliest(analysis1, analysis2) + ); + Assert.assertEquals( new SegmentAnalysis( "dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged", @@ -374,6 +423,24 @@ public class SegmentMetadataQueryQueryToolChestTest mergeLenient(analysis1, analysis2) ); + Assert.assertEquals( + new SegmentAnalysis( + "dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged", + null, + new LinkedHashMap<>(), + 0, + 0, + ImmutableMap.of( + "foo", new LongSumAggregatorFactory("foo", "foo"), + "bar", new DoubleSumAggregatorFactory("bar", "bar") + ), + null, + null, + null + ), + mergeEarliest(analysis1, analysis2) + ); + Assert.assertEquals( new SegmentAnalysis( "dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged", @@ -449,6 +516,21 @@ public class SegmentMetadataQueryQueryToolChestTest mergeLenient(analysis1, analysis2) ); + Assert.assertEquals( + new SegmentAnalysis( + "dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged", + null, + new LinkedHashMap<>(), + 0, + 0, + null, + null, + null, + null + ), + mergeEarliest(analysis1, analysis2) + ); + Assert.assertEquals( new SegmentAnalysis( "dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged", @@ -552,6 +634,48 @@ public class SegmentMetadataQueryQueryToolChestTest ) ); + Assert.assertEquals( + new SegmentAnalysis( + "dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged", + null, + new LinkedHashMap<>(), + 0, + 0, + ImmutableMap.of( + "foo", new LongSumAggregatorFactory("foo", "foo"), + "bar", new DoubleSumAggregatorFactory("bar", "bar"), + "baz", new LongMaxAggregatorFactory("baz", "baz") + ), + null, + null, + null + ), + mergeEarliest(analysis1, analysis2) + ); + + // Simulate multi-level earliest merge + Assert.assertEquals( + new SegmentAnalysis( + "dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged", + null, + new LinkedHashMap<>(), + 0, + 0, + ImmutableMap.of( + "foo", new LongSumAggregatorFactory("foo", "foo"), + "bar", new DoubleSumAggregatorFactory("bar", "bar"), + "baz", new LongMaxAggregatorFactory("baz", "baz") + ), + null, + null, + null + ), + mergeEarliest( + mergeEarliest(analysis1, analysis2), + mergeEarliest(analysis1, analysis2) + ) + ); + Assert.assertEquals( new SegmentAnalysis( "dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged", @@ -571,7 +695,7 @@ public class SegmentMetadataQueryQueryToolChestTest mergeLatest(analysis1, analysis2) ); - // Simulate multi-level lenient merge + // Simulate multi-level latest merge Assert.assertEquals( new SegmentAnalysis( "dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged", @@ -683,6 +807,48 @@ public class SegmentMetadataQueryQueryToolChestTest ) ); + Assert.assertEquals( + new SegmentAnalysis( + "dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged", + null, + new LinkedHashMap<>(), + 0, + 0, + ImmutableMap.of( + "foo", new LongSumAggregatorFactory("foo", "foo"), + "bar", new DoubleMaxAggregatorFactory("bar", "bar"), + "baz", new LongMaxAggregatorFactory("baz", "baz") + ), + null, + null, + null + ), + mergeEarliest(analysis1, analysis2) + ); + + // Simulate multi-level earliest merge + Assert.assertEquals( + new SegmentAnalysis( + "dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged", + null, + new LinkedHashMap<>(), + 0, + 0, + ImmutableMap.of( + "foo", new LongSumAggregatorFactory("foo", "foo"), + "bar", new DoubleMaxAggregatorFactory("bar", "bar"), + "baz", new LongMaxAggregatorFactory("baz", "baz") + ), + null, + null, + null + ), + mergeEarliest( + mergeEarliest(analysis1, analysis2), + mergeEarliest(analysis1, analysis2) + ) + ); + Assert.assertEquals( new SegmentAnalysis( "dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged", @@ -702,7 +868,7 @@ public class SegmentMetadataQueryQueryToolChestTest mergeLatest(analysis1, analysis2) ); - // Simulate multi-level lenient merge + // Simulate multi-level latest merge Assert.assertEquals( new SegmentAnalysis( "dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged", @@ -729,8 +895,18 @@ public class SegmentMetadataQueryQueryToolChestTest @Test public void testMergeAggregatorsConflictWithEqualSegmentIntervalsAndDifferentPartitions() { - final SegmentId segmentId1 = SegmentId.of(TEST_DATASOURCE.toString(), Intervals.of("2023-01-01/2023-01-02"), "test", 1); - final SegmentId segmentId2 = SegmentId.of(TEST_DATASOURCE.toString(), Intervals.of("2023-01-01/2023-01-02"), "test", 2); + final SegmentId segmentId1 = SegmentId.of( + TEST_DATASOURCE.toString(), + Intervals.of("2023-01-01/2023-01-02"), + "test", + 1 + ); + final SegmentId segmentId2 = SegmentId.of( + TEST_DATASOURCE.toString(), + Intervals.of("2023-01-01/2023-01-02"), + "test", + 2 + ); final SegmentAnalysis analysis1 = new SegmentAnalysis( segmentId1.toString(), @@ -817,6 +993,48 @@ public class SegmentMetadataQueryQueryToolChestTest ) ); + Assert.assertEquals( + new SegmentAnalysis( + "dummy_2023-01-01T00:00:00.000Z_2023-01-02T00:00:00.000Z_merged_2", + null, + new LinkedHashMap<>(), + 0, + 0, + ImmutableMap.of( + "foo", new LongSumAggregatorFactory("foo", "foo"), + "bar", new DoubleSumAggregatorFactory("bar", "bar"), + "baz", new LongMaxAggregatorFactory("baz", "baz") + ), + null, + null, + null + ), + mergeEarliest(analysis1, analysis2) + ); + + // Simulate multi-level earliest merge + Assert.assertEquals( + new SegmentAnalysis( + "dummy_2023-01-01T00:00:00.000Z_2023-01-02T00:00:00.000Z_merged_2", + null, + new LinkedHashMap<>(), + 0, + 0, + ImmutableMap.of( + "foo", new LongSumAggregatorFactory("foo", "foo"), + "bar", new DoubleSumAggregatorFactory("bar", "bar"), + "baz", new LongMaxAggregatorFactory("baz", "baz") + ), + null, + null, + null + ), + mergeEarliest( + mergeEarliest(analysis1, analysis2), + mergeEarliest(analysis1, analysis2) + ) + ); + Assert.assertEquals( new SegmentAnalysis( "dummy_2023-01-01T00:00:00.000Z_2023-01-02T00:00:00.000Z_merged_2", @@ -836,7 +1054,7 @@ public class SegmentMetadataQueryQueryToolChestTest mergeLatest(analysis1, analysis2) ); - // Simulate multi-level lenient merge + // Simulate multi-level latest merge Assert.assertEquals( new SegmentAnalysis( "dummy_2023-01-01T00:00:00.000Z_2023-01-02T00:00:00.000Z_merged_2", @@ -976,6 +1194,12 @@ public class SegmentMetadataQueryQueryToolChestTest Assert.assertFalse(mergeLenient(analysis2, analysis3).isRollup()); Assert.assertTrue(mergeLenient(analysis4, analysis5).isRollup()); + Assert.assertNull(mergeEarliest(analysis1, analysis2).isRollup()); + Assert.assertNull(mergeEarliest(analysis1, analysis4).isRollup()); + Assert.assertNull(mergeEarliest(analysis2, analysis4).isRollup()); + Assert.assertFalse(mergeEarliest(analysis2, analysis3).isRollup()); + Assert.assertTrue(mergeEarliest(analysis4, analysis5).isRollup()); + Assert.assertNull(mergeLatest(analysis1, analysis2).isRollup()); Assert.assertNull(mergeLatest(analysis1, analysis4).isRollup()); Assert.assertNull(mergeLatest(analysis2, analysis4).isRollup()); @@ -1042,6 +1266,146 @@ public class SegmentMetadataQueryQueryToolChestTest ); } + + @Test + public void testMergeWithUnionDatasource() + { + final SegmentAnalysis analysis1 = new SegmentAnalysis( + TEST_SEGMENT_ID1.toString(), + null, + new LinkedHashMap<>(), + 0, + 0, + ImmutableMap.of( + "foo", new LongSumAggregatorFactory("foo", "foo"), + "bar", new DoubleSumAggregatorFactory("bar", "bar") + ), + null, + null, + null + ); + final SegmentAnalysis analysis2 = new SegmentAnalysis( + TEST_SEGMENT_ID2.toString(), + null, + new LinkedHashMap<>(), + 0, + 0, + ImmutableMap.of( + "foo", new LongSumAggregatorFactory("foo", "foo"), + "bar", new DoubleMaxAggregatorFactory("bar", "bar"), + "baz", new LongMaxAggregatorFactory("baz", "baz") + ), + null, + null, + false + ); + + final SegmentAnalysis expectedMergedAnalysis = new SegmentAnalysis( + "dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged", + null, + new LinkedHashMap<>(), + 0, + 0, + ImmutableMap.of( + "foo", new LongSumAggregatorFactory("foo", "foo"), + "bar", new DoubleMaxAggregatorFactory("bar", "bar"), + "baz", new LongMaxAggregatorFactory("baz", "baz") + ), + null, + null, + null + ); + + Assert.assertEquals( + expectedMergedAnalysis, + SegmentMetadataQueryQueryToolChest.finalizeAnalysis( + SegmentMetadataQueryQueryToolChest.mergeAnalyses( + new UnionDataSource( + ImmutableList.of( + new TableDataSource("foo"), + new TableDataSource("dummy") + ) + ).getTableNames(), + analysis1, + analysis2, + AggregatorMergeStrategy.LATEST + ) + ) + ); + + Assert.assertEquals( + expectedMergedAnalysis, + SegmentMetadataQueryQueryToolChest.finalizeAnalysis( + SegmentMetadataQueryQueryToolChest.mergeAnalyses( + new UnionDataSource( + ImmutableList.of( + new TableDataSource("dummy"), + new TableDataSource("foo"), + new TableDataSource("bar") + ) + ).getTableNames(), + analysis1, + analysis2, + AggregatorMergeStrategy.LATEST + ) + ) + ); + } + + @Test + public void testMergeWithNullAnalyses() + { + final SegmentAnalysis analysis1 = new SegmentAnalysis( + TEST_SEGMENT_ID1.toString(), + null, + new LinkedHashMap<>(), + 0, + 0, + null, + null, + null, + null + ); + final SegmentAnalysis analysis2 = new SegmentAnalysis( + TEST_SEGMENT_ID2.toString(), + null, + new LinkedHashMap<>(), + 0, + 0, + null, + null, + null, + false + ); + + Assert.assertEquals( + analysis1, + SegmentMetadataQueryQueryToolChest + .mergeAnalyses(TEST_DATASOURCE.getTableNames(), analysis1, null, AggregatorMergeStrategy.STRICT) + ); + Assert.assertEquals( + analysis2, + SegmentMetadataQueryQueryToolChest + .mergeAnalyses(TEST_DATASOURCE.getTableNames(), null, analysis2, AggregatorMergeStrategy.STRICT) + ); + Assert.assertNull( + SegmentMetadataQueryQueryToolChest + .mergeAnalyses(TEST_DATASOURCE.getTableNames(), null, null, AggregatorMergeStrategy.STRICT) + ); + Assert.assertNull( + SegmentMetadataQueryQueryToolChest + .mergeAnalyses(TEST_DATASOURCE.getTableNames(), null, null, AggregatorMergeStrategy.LENIENT) + ); + Assert.assertNull( + SegmentMetadataQueryQueryToolChest + .mergeAnalyses(TEST_DATASOURCE.getTableNames(), null, null, AggregatorMergeStrategy.EARLIEST) + ); + Assert.assertNull( + SegmentMetadataQueryQueryToolChest + .mergeAnalyses(TEST_DATASOURCE.getTableNames(), null, null, AggregatorMergeStrategy.LATEST) + ); + } + private static SegmentAnalysis mergeStrict(SegmentAnalysis analysis1, SegmentAnalysis analysis2) { return SegmentMetadataQueryQueryToolChest.finalizeAnalysis( @@ -1066,6 +1430,18 @@ public class SegmentMetadataQueryQueryToolChestTest ); } + private static SegmentAnalysis mergeEarliest(SegmentAnalysis analysis1, SegmentAnalysis analysis2) + { + return SegmentMetadataQueryQueryToolChest.finalizeAnalysis( + SegmentMetadataQueryQueryToolChest.mergeAnalyses( + TEST_DATASOURCE.getTableNames(), + analysis1, + analysis2, + AggregatorMergeStrategy.EARLIEST + ) + ); + } + private static SegmentAnalysis mergeLatest(SegmentAnalysis analysis1, SegmentAnalysis analysis2) { return SegmentMetadataQueryQueryToolChest.finalizeAnalysis( diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java index 0e13b9f5986..e919799deb5 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java @@ -96,6 +96,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest QueryRunnerTestHelper.NOOP_QUERYWATCHER ); private static final ObjectMapper MAPPER = new DefaultObjectMapper(); + private static final String DATASOURCE = "testDatasource"; @SuppressWarnings("unchecked") public static QueryRunner makeMMappedQueryRunner( @@ -176,8 +177,8 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest boolean bitmaps ) { - final SegmentId id1 = SegmentId.dummy(differentIds ? "testSegment1" : "testSegment"); - final SegmentId id2 = SegmentId.dummy(differentIds ? "testSegment2" : "testSegment"); + final SegmentId id1 = SegmentId.dummy(differentIds ? "testSegment1" : DATASOURCE); + final SegmentId id2 = SegmentId.dummy(differentIds ? "testSegment2" : DATASOURCE); this.runner1 = mmap1 ? makeMMappedQueryRunner(id1, rollup1, bitmaps, FACTORY) : makeIncrementalIndexQueryRunner(id1, rollup1, bitmaps, FACTORY); @@ -191,14 +192,15 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest this.differentIds = differentIds; this.bitmaps = bitmaps; testQuery = Druids.newSegmentMetadataQueryBuilder() - .dataSource("testing") + .dataSource(DATASOURCE) .intervals("2013/2014") .toInclude(new ListColumnIncluderator(Arrays.asList("__time", "index", "placement"))) .analysisTypes( SegmentMetadataQuery.AnalysisType.CARDINALITY, SegmentMetadataQuery.AnalysisType.SIZE, SegmentMetadataQuery.AnalysisType.INTERVAL, - SegmentMetadataQuery.AnalysisType.MINMAX + SegmentMetadataQuery.AnalysisType.MINMAX, + SegmentMetadataQuery.AnalysisType.AGGREGATORS ) .merge(true) .build(); @@ -213,6 +215,12 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest overallSize1 = mmap1 ? 201345 : 200831; overallSize2 = mmap2 ? 201345 : 200831; } + + final Map expectedAggregators = new HashMap<>(); + for (AggregatorFactory agg : TestIndex.METRIC_AGGS) { + expectedAggregators.put(agg.getName(), agg.getCombiningFactory()); + } + expectedSegmentAnalysis1 = new SegmentAnalysis( id1.toString(), ImmutableList.of(Intervals.of("2011-01-12T00:00:00.000Z/2011-04-15T00:00:00.001Z")), @@ -258,7 +266,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest ), overallSize1, 1209, - null, + expectedAggregators, null, null, null @@ -309,7 +317,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest // null_column will be included only for incremental index, which makes a little bigger result than expected overallSize2, 1209, - null, + expectedAggregators, null, null, null @@ -329,7 +337,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest public void testSegmentMetadataQueryWithRollupMerge() { SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis( - differentIds ? "merged" : SegmentId.dummy("testSegment").toString(), + differentIds ? "merged" : SegmentId.dummy(DATASOURCE).toString(), null, new LinkedHashMap<>( ImmutableMap.of( @@ -385,7 +393,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest SegmentMetadataQuery query = Druids .newSegmentMetadataQueryBuilder() - .dataSource("testing") + .dataSource(DATASOURCE) .intervals("2013/2014") .toInclude(new ListColumnIncluderator(Arrays.asList("placement", "placementish"))) .analysisTypes(SegmentMetadataQuery.AnalysisType.ROLLUP) @@ -403,7 +411,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest public void testSegmentMetadataQueryWithHasMultipleValuesMerge() { SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis( - differentIds ? "merged" : SegmentId.dummy("testSegment").toString(), + differentIds ? "merged" : SegmentId.dummy(DATASOURCE).toString(), null, new LinkedHashMap<>( ImmutableMap.of( @@ -459,7 +467,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest SegmentMetadataQuery query = Druids .newSegmentMetadataQueryBuilder() - .dataSource("testing") + .dataSource(DATASOURCE) .intervals("2013/2014") .toInclude(new ListColumnIncluderator(Arrays.asList("placement", "placementish"))) .analysisTypes(SegmentMetadataQuery.AnalysisType.CARDINALITY) @@ -477,7 +485,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest public void testSegmentMetadataQueryWithComplexColumnMerge() { SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis( - differentIds ? "merged" : SegmentId.dummy("testSegment").toString(), + differentIds ? "merged" : SegmentId.dummy(DATASOURCE).toString(), null, new LinkedHashMap<>( ImmutableMap.of( @@ -533,7 +541,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest SegmentMetadataQuery query = Druids .newSegmentMetadataQueryBuilder() - .dataSource("testing") + .dataSource(DATASOURCE) .intervals("2013/2014") .toInclude(new ListColumnIncluderator(Arrays.asList("placement", "quality_uniques"))) .analysisTypes(SegmentMetadataQuery.AnalysisType.CARDINALITY) @@ -621,8 +629,13 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest ColumnAnalysis analysis ) { + final Map expectedAggregators = new HashMap<>(); + for (AggregatorFactory agg : TestIndex.METRIC_AGGS) { + expectedAggregators.put(agg.getName(), agg.getCombiningFactory()); + } + SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis( - differentIds ? "merged" : SegmentId.dummy("testSegment").toString(), + differentIds ? "merged" : SegmentId.dummy(DATASOURCE).toString(), ImmutableList.of(expectedSegmentAnalysis1.getIntervals().get(0)), new LinkedHashMap<>( ImmutableMap.of( @@ -656,7 +669,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest ), expectedSegmentAnalysis1.getSize() + expectedSegmentAnalysis2.getSize(), expectedSegmentAnalysis1.getNumRows() + expectedSegmentAnalysis2.getNumRows(), - null, + expectedAggregators, null, null, null @@ -692,7 +705,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest public void testSegmentMetadataQueryWithNoAnalysisTypesMerge() { SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis( - differentIds ? "merged" : SegmentId.dummy("testSegment").toString(), + differentIds ? "merged" : SegmentId.dummy(DATASOURCE).toString(), null, new LinkedHashMap<>( ImmutableMap.of( @@ -736,7 +749,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest SegmentMetadataQuery query = Druids .newSegmentMetadataQueryBuilder() - .dataSource("testing") + .dataSource(DATASOURCE) .intervals("2013/2014") .toInclude(new ListColumnIncluderator(Collections.singletonList("placement"))) .analysisTypes() @@ -758,7 +771,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest expectedAggregators.put(agg.getName(), agg.getCombiningFactory()); } SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis( - differentIds ? "merged" : SegmentId.dummy("testSegment").toString(), + differentIds ? "merged" : SegmentId.dummy(DATASOURCE).toString(), null, new LinkedHashMap<>( ImmutableMap.of( @@ -802,7 +815,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest SegmentMetadataQuery query = Druids .newSegmentMetadataQueryBuilder() - .dataSource("testing") + .dataSource(DATASOURCE) .intervals("2013/2014") .toInclude(new ListColumnIncluderator(Collections.singletonList("placement"))) .analysisTypes(SegmentMetadataQuery.AnalysisType.AGGREGATORS) @@ -824,7 +837,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest expectedAggregators.put(agg.getName(), agg.getCombiningFactory()); } SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis( - differentIds ? "merged" : SegmentId.dummy("testSegment").toString(), + differentIds ? "merged" : SegmentId.dummy(DATASOURCE).toString(), null, new LinkedHashMap<>( ImmutableMap.of( @@ -868,7 +881,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest SegmentMetadataQuery query = Druids .newSegmentMetadataQueryBuilder() - .dataSource("testing222") + .dataSource(DATASOURCE) .intervals("2013/2014") .toInclude(new ListColumnIncluderator(Collections.singletonList("placement"))) .analysisTypes(SegmentMetadataQuery.AnalysisType.AGGREGATORS) @@ -887,7 +900,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest public void testSegmentMetadataQueryWithTimestampSpecMerge() { SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis( - differentIds ? "merged" : SegmentId.dummy("testSegment").toString(), + differentIds ? "merged" : SegmentId.dummy(DATASOURCE).toString(), null, new LinkedHashMap<>( ImmutableMap.of( @@ -931,7 +944,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest SegmentMetadataQuery query = Druids .newSegmentMetadataQueryBuilder() - .dataSource("testing") + .dataSource(DATASOURCE) .intervals("2013/2014") .toInclude(new ListColumnIncluderator(Collections.singletonList("placement"))) .analysisTypes(SegmentMetadataQuery.AnalysisType.TIMESTAMPSPEC) @@ -949,7 +962,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest public void testSegmentMetadataQueryWithQueryGranularityMerge() { SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis( - differentIds ? "merged" : SegmentId.dummy("testSegment").toString(), + differentIds ? "merged" : SegmentId.dummy(DATASOURCE).toString(), null, new LinkedHashMap<>( ImmutableMap.of( @@ -993,7 +1006,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest SegmentMetadataQuery query = Druids .newSegmentMetadataQueryBuilder() - .dataSource("testing") + .dataSource(DATASOURCE) .intervals("2013/2014") .toInclude(new ListColumnIncluderator(Collections.singletonList("placement"))) .analysisTypes(SegmentMetadataQuery.AnalysisType.QUERYGRANULARITY) @@ -1150,7 +1163,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest public void testDefaultIntervalAndFiltering() { SegmentMetadataQuery testQuery = Druids.newSegmentMetadataQueryBuilder() - .dataSource("testing") + .dataSource(DATASOURCE) .toInclude(new ListColumnIncluderator(Collections.singletonList("placement"))) .merge(true) .build(); @@ -1410,12 +1423,12 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest public void testCacheKeyWithListColumnIncluderator() { SegmentMetadataQuery oneColumnQuery = Druids.newSegmentMetadataQueryBuilder() - .dataSource("testing") + .dataSource(DATASOURCE) .toInclude(new ListColumnIncluderator(Collections.singletonList("foo"))) .build(); SegmentMetadataQuery twoColumnQuery = Druids.newSegmentMetadataQueryBuilder() - .dataSource("testing") + .dataSource(DATASOURCE) .toInclude(new ListColumnIncluderator(Arrays.asList("fo", "o"))) .build(); @@ -1436,12 +1449,12 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest public void testAnanlysisTypesBeingSet() { SegmentMetadataQuery query1 = Druids.newSegmentMetadataQueryBuilder() - .dataSource("testing") + .dataSource(DATASOURCE) .toInclude(new ListColumnIncluderator(Collections.singletonList("foo"))) .build(); SegmentMetadataQuery query2 = Druids.newSegmentMetadataQueryBuilder() - .dataSource("testing") + .dataSource(DATASOURCE) .toInclude(new ListColumnIncluderator(Collections.singletonList("foo"))) .analysisTypes(SegmentMetadataQuery.AnalysisType.MINMAX) .build();