Add support for earliest `aggregatorMergeStrategy` (#14598)

* Add EARLIEST aggregator merge strategy.

- More unit tests.
- Include the aggregators analysis type by default in tests.

* Docs.

* Some comments and a test

* Collapse into individual code blocks.
This commit is contained in:
Abhishek Radhakrishnan 2023-07-18 12:37:10 -07:00 committed by GitHub
parent 913416c669
commit f4d0ea7bc8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 459 additions and 58 deletions

View File

@ -62,7 +62,7 @@ There are several main parts to a segment metadata query:
|merge|Merge all individual segment metadata results into a single result|no|
|context|See [Context](../querying/query-context.md)|no|
|analysisTypes|A list of Strings specifying what column properties (e.g. cardinality, size) should be calculated and returned in the result. Defaults to ["cardinality", "interval", "minmax"], but can be overridden with using the [segment metadata query config](../configuration/index.md#segmentmetadata-query-config). See section [analysisTypes](#analysistypes) for more details.|no|
|aggregatorMergeStrategy| The strategy Druid uses to merge aggregators across segments. If true and if the `aggregators` analysis type is enabled, `aggregatorMergeStrategy` defaults to `strict`. Possible values include `strict`, `lenient`, and `latest`. See [`aggregatorMergeStrategy`](#aggregatormergestrategy) for details.|no|
|aggregatorMergeStrategy| The strategy Druid uses to merge aggregators across segments. If true and if the `aggregators` analysis type is enabled, `aggregatorMergeStrategy` defaults to `strict`. Possible values include `strict`, `lenient`, `earliest`, and `latest`. See [`aggregatorMergeStrategy`](#aggregatormergestrategy) for details.|no|
|lenientAggregatorMerge|Deprecated. Use `aggregatorMergeStrategy` property instead. If true, and if the `aggregators` analysis type is enabled, Druid merges aggregators leniently.|no|
The format of the result is:
@ -186,7 +186,7 @@ Currently, there is no API for retrieving this information.
* `aggregators` in the result will contain the list of aggregators usable for querying metric columns. This may be
null if the aggregators are unknown or unmergeable (if merging is enabled).
* Merging can be `strict`, `lenient`, or `latest`. See [`aggregatorMergeStrategy`](#aggregatormergestrategy) for details.
* Merging can be `strict`, `lenient`, `earliest`, or `latest`. See [`aggregatorMergeStrategy`](#aggregatormergestrategy) for details.
* The form of the result is a map of column name to aggregator.
@ -201,10 +201,12 @@ Conflicts between aggregator metadata across segments can occur if some segments
two segments use incompatible aggregators for the same column, such as `longSum` changed to `doubleSum`.
Druid supports the following aggregator merge strategies:
- `strict`: If there are any segments with unknown aggregators or any conflicts of any kind, the merged aggregators
- `strict`: If there are any segments with unknown aggregators or any conflicts of any kind, the merged aggregators
list is `null`.
- `lenient`: Druid ignores segments with unknown aggregators. Conflicts between aggregators set the aggregator for that particular column to null.
- the aggregator for that particular column.
- `lenient`: Druid ignores segments with unknown aggregators. Conflicts between aggregators set the aggregator for
that particular column to null.
- `earliest`: In the event of conflicts between segments, Druid selects the aggregator from the earliest segment
for that particular column.
- `latest`: In the event of conflicts between segments, Druid selects the aggregator from the most recent segment
for that particular column.

View File

@ -277,6 +277,7 @@ public class SegmentMetadataQueryQueryToolChest extends QueryToolChest<SegmentAn
SegmentId mergedSegmentId = null;
// Union datasources can have multiple datasources. So we iterate through all the datasources to parse the segment id.
for (String dataSource : dataSources) {
final SegmentId id1 = SegmentId.tryParse(dataSource, arg1.getId());
final SegmentId id2 = SegmentId.tryParse(dataSource, arg2.getId());
@ -364,15 +365,23 @@ public class SegmentMetadataQueryQueryToolChest extends QueryToolChest<SegmentAn
aggregators.put(aggregator.getName(), aggregator);
}
}
} else if (AggregatorMergeStrategy.EARLIEST == aggregatorMergeStrategy) {
// The segment analyses are already ordered above, where arg1 is the analysis pertaining to the latest interval
// followed by arg2. So for earliest strategy, the iteration order should be arg2 and arg1.
for (SegmentAnalysis analysis : ImmutableList.of(arg2, arg1)) {
if (analysis.getAggregators() != null) {
for (Map.Entry<String, AggregatorFactory> entry : analysis.getAggregators().entrySet()) {
aggregators.putIfAbsent(entry.getKey(), entry.getValue());
}
}
}
} else if (AggregatorMergeStrategy.LATEST == aggregatorMergeStrategy) {
// The segment analyses are already ordered above, where arg1 is the analysis pertaining to the latest interval
// followed by arg2.
// followed by arg2. So for latest strategy, the iteration order should be arg1 and arg2.
for (SegmentAnalysis analysis : ImmutableList.of(arg1, arg2)) {
if (analysis.getAggregators() != null) {
for (Map.Entry<String, AggregatorFactory> entry : analysis.getAggregators().entrySet()) {
final String aggregatorName = entry.getKey();
final AggregatorFactory aggregator = entry.getValue();
aggregators.putIfAbsent(aggregatorName, aggregator);
aggregators.putIfAbsent(entry.getKey(), entry.getValue());
}
}
}

View File

@ -28,6 +28,7 @@ public enum AggregatorMergeStrategy
{
STRICT,
LENIENT,
EARLIEST,
LATEST;
@JsonValue

View File

@ -32,6 +32,7 @@ import org.apache.druid.query.CacheStrategy;
import org.apache.druid.query.DataSource;
import org.apache.druid.query.Druids;
import org.apache.druid.query.TableDataSource;
import org.apache.druid.query.UnionDataSource;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory;
import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory;
@ -62,8 +63,18 @@ import java.util.stream.Collectors;
public class SegmentMetadataQueryQueryToolChestTest
{
private static final DataSource TEST_DATASOURCE = new TableDataSource("dummy");
private static final SegmentId TEST_SEGMENT_ID1 = SegmentId.of(TEST_DATASOURCE.toString(), Intervals.of("2020-01-01/2020-01-02"), "test", 0);
private static final SegmentId TEST_SEGMENT_ID2 = SegmentId.of(TEST_DATASOURCE.toString(), Intervals.of("2021-01-01/2021-01-02"), "test", 0);
private static final SegmentId TEST_SEGMENT_ID1 = SegmentId.of(
TEST_DATASOURCE.toString(),
Intervals.of("2020-01-01/2020-01-02"),
"test",
0
);
private static final SegmentId TEST_SEGMENT_ID2 = SegmentId.of(
TEST_DATASOURCE.toString(),
Intervals.of("2021-01-01/2021-01-02"),
"test",
0
);
@Test
public void testCacheStrategy() throws Exception
@ -162,19 +173,19 @@ public class SegmentMetadataQueryQueryToolChestTest
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged",
null,
new LinkedHashMap<>(),
0,
0,
ImmutableMap.of(
"foo", new LongSumAggregatorFactory("foo", "foo"),
"bar", new DoubleSumAggregatorFactory("bar", "bar"),
"baz", new DoubleSumAggregatorFactory("baz", "baz")
),
null,
null,
null
"dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged",
null,
new LinkedHashMap<>(),
0,
0,
ImmutableMap.of(
"foo", new LongSumAggregatorFactory("foo", "foo"),
"bar", new DoubleSumAggregatorFactory("bar", "bar"),
"baz", new DoubleSumAggregatorFactory("baz", "baz")
),
null,
null,
null
),
mergeStrict(analysis1, analysis2)
);
@ -198,6 +209,25 @@ public class SegmentMetadataQueryQueryToolChestTest
mergeLenient(analysis1, analysis2)
);
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged",
null,
new LinkedHashMap<>(),
0,
0,
ImmutableMap.of(
"foo", new LongSumAggregatorFactory("foo", "foo"),
"bar", new DoubleSumAggregatorFactory("bar", "bar"),
"baz", new DoubleSumAggregatorFactory("baz", "baz")
),
null,
null,
null
),
mergeEarliest(analysis1, analysis2)
);
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged",
@ -292,6 +322,25 @@ public class SegmentMetadataQueryQueryToolChestTest
mergeLenient(analysis1, analysis2)
);
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged",
expectedIntervals,
new LinkedHashMap<>(),
0,
0,
ImmutableMap.of(
"foo", new LongSumAggregatorFactory("foo", "foo"),
"baz", new DoubleSumAggregatorFactory("baz", "baz"),
"bar", new DoubleSumAggregatorFactory("bar", "bar")
),
null,
null,
null
),
mergeEarliest(analysis1, analysis2)
);
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged",
@ -374,6 +423,24 @@ public class SegmentMetadataQueryQueryToolChestTest
mergeLenient(analysis1, analysis2)
);
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged",
null,
new LinkedHashMap<>(),
0,
0,
ImmutableMap.of(
"foo", new LongSumAggregatorFactory("foo", "foo"),
"bar", new DoubleSumAggregatorFactory("bar", "bar")
),
null,
null,
null
),
mergeEarliest(analysis1, analysis2)
);
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged",
@ -449,6 +516,21 @@ public class SegmentMetadataQueryQueryToolChestTest
mergeLenient(analysis1, analysis2)
);
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged",
null,
new LinkedHashMap<>(),
0,
0,
null,
null,
null,
null
),
mergeEarliest(analysis1, analysis2)
);
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged",
@ -552,6 +634,48 @@ public class SegmentMetadataQueryQueryToolChestTest
)
);
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged",
null,
new LinkedHashMap<>(),
0,
0,
ImmutableMap.of(
"foo", new LongSumAggregatorFactory("foo", "foo"),
"bar", new DoubleSumAggregatorFactory("bar", "bar"),
"baz", new LongMaxAggregatorFactory("baz", "baz")
),
null,
null,
null
),
mergeEarliest(analysis1, analysis2)
);
// Simulate multi-level earliest merge
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged",
null,
new LinkedHashMap<>(),
0,
0,
ImmutableMap.of(
"foo", new LongSumAggregatorFactory("foo", "foo"),
"bar", new DoubleSumAggregatorFactory("bar", "bar"),
"baz", new LongMaxAggregatorFactory("baz", "baz")
),
null,
null,
null
),
mergeEarliest(
mergeEarliest(analysis1, analysis2),
mergeEarliest(analysis1, analysis2)
)
);
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged",
@ -571,7 +695,7 @@ public class SegmentMetadataQueryQueryToolChestTest
mergeLatest(analysis1, analysis2)
);
// Simulate multi-level lenient merge
// Simulate multi-level latest merge
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged",
@ -683,6 +807,48 @@ public class SegmentMetadataQueryQueryToolChestTest
)
);
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged",
null,
new LinkedHashMap<>(),
0,
0,
ImmutableMap.of(
"foo", new LongSumAggregatorFactory("foo", "foo"),
"bar", new DoubleMaxAggregatorFactory("bar", "bar"),
"baz", new LongMaxAggregatorFactory("baz", "baz")
),
null,
null,
null
),
mergeEarliest(analysis1, analysis2)
);
// Simulate multi-level earliest merge
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged",
null,
new LinkedHashMap<>(),
0,
0,
ImmutableMap.of(
"foo", new LongSumAggregatorFactory("foo", "foo"),
"bar", new DoubleMaxAggregatorFactory("bar", "bar"),
"baz", new LongMaxAggregatorFactory("baz", "baz")
),
null,
null,
null
),
mergeEarliest(
mergeEarliest(analysis1, analysis2),
mergeEarliest(analysis1, analysis2)
)
);
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged",
@ -702,7 +868,7 @@ public class SegmentMetadataQueryQueryToolChestTest
mergeLatest(analysis1, analysis2)
);
// Simulate multi-level lenient merge
// Simulate multi-level latest merge
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged",
@ -729,8 +895,18 @@ public class SegmentMetadataQueryQueryToolChestTest
@Test
public void testMergeAggregatorsConflictWithEqualSegmentIntervalsAndDifferentPartitions()
{
final SegmentId segmentId1 = SegmentId.of(TEST_DATASOURCE.toString(), Intervals.of("2023-01-01/2023-01-02"), "test", 1);
final SegmentId segmentId2 = SegmentId.of(TEST_DATASOURCE.toString(), Intervals.of("2023-01-01/2023-01-02"), "test", 2);
final SegmentId segmentId1 = SegmentId.of(
TEST_DATASOURCE.toString(),
Intervals.of("2023-01-01/2023-01-02"),
"test",
1
);
final SegmentId segmentId2 = SegmentId.of(
TEST_DATASOURCE.toString(),
Intervals.of("2023-01-01/2023-01-02"),
"test",
2
);
final SegmentAnalysis analysis1 = new SegmentAnalysis(
segmentId1.toString(),
@ -817,6 +993,48 @@ public class SegmentMetadataQueryQueryToolChestTest
)
);
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2023-01-01T00:00:00.000Z_2023-01-02T00:00:00.000Z_merged_2",
null,
new LinkedHashMap<>(),
0,
0,
ImmutableMap.of(
"foo", new LongSumAggregatorFactory("foo", "foo"),
"bar", new DoubleSumAggregatorFactory("bar", "bar"),
"baz", new LongMaxAggregatorFactory("baz", "baz")
),
null,
null,
null
),
mergeEarliest(analysis1, analysis2)
);
// Simulate multi-level earliest merge
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2023-01-01T00:00:00.000Z_2023-01-02T00:00:00.000Z_merged_2",
null,
new LinkedHashMap<>(),
0,
0,
ImmutableMap.of(
"foo", new LongSumAggregatorFactory("foo", "foo"),
"bar", new DoubleSumAggregatorFactory("bar", "bar"),
"baz", new LongMaxAggregatorFactory("baz", "baz")
),
null,
null,
null
),
mergeEarliest(
mergeEarliest(analysis1, analysis2),
mergeEarliest(analysis1, analysis2)
)
);
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2023-01-01T00:00:00.000Z_2023-01-02T00:00:00.000Z_merged_2",
@ -836,7 +1054,7 @@ public class SegmentMetadataQueryQueryToolChestTest
mergeLatest(analysis1, analysis2)
);
// Simulate multi-level lenient merge
// Simulate multi-level latest merge
Assert.assertEquals(
new SegmentAnalysis(
"dummy_2023-01-01T00:00:00.000Z_2023-01-02T00:00:00.000Z_merged_2",
@ -976,6 +1194,12 @@ public class SegmentMetadataQueryQueryToolChestTest
Assert.assertFalse(mergeLenient(analysis2, analysis3).isRollup());
Assert.assertTrue(mergeLenient(analysis4, analysis5).isRollup());
Assert.assertNull(mergeEarliest(analysis1, analysis2).isRollup());
Assert.assertNull(mergeEarliest(analysis1, analysis4).isRollup());
Assert.assertNull(mergeEarliest(analysis2, analysis4).isRollup());
Assert.assertFalse(mergeEarliest(analysis2, analysis3).isRollup());
Assert.assertTrue(mergeEarliest(analysis4, analysis5).isRollup());
Assert.assertNull(mergeLatest(analysis1, analysis2).isRollup());
Assert.assertNull(mergeLatest(analysis1, analysis4).isRollup());
Assert.assertNull(mergeLatest(analysis2, analysis4).isRollup());
@ -1042,6 +1266,146 @@ public class SegmentMetadataQueryQueryToolChestTest
);
}
@Test
public void testMergeWithUnionDatasource()
{
final SegmentAnalysis analysis1 = new SegmentAnalysis(
TEST_SEGMENT_ID1.toString(),
null,
new LinkedHashMap<>(),
0,
0,
ImmutableMap.of(
"foo", new LongSumAggregatorFactory("foo", "foo"),
"bar", new DoubleSumAggregatorFactory("bar", "bar")
),
null,
null,
null
);
final SegmentAnalysis analysis2 = new SegmentAnalysis(
TEST_SEGMENT_ID2.toString(),
null,
new LinkedHashMap<>(),
0,
0,
ImmutableMap.of(
"foo", new LongSumAggregatorFactory("foo", "foo"),
"bar", new DoubleMaxAggregatorFactory("bar", "bar"),
"baz", new LongMaxAggregatorFactory("baz", "baz")
),
null,
null,
false
);
final SegmentAnalysis expectedMergedAnalysis = new SegmentAnalysis(
"dummy_2021-01-01T00:00:00.000Z_2021-01-02T00:00:00.000Z_merged",
null,
new LinkedHashMap<>(),
0,
0,
ImmutableMap.of(
"foo", new LongSumAggregatorFactory("foo", "foo"),
"bar", new DoubleMaxAggregatorFactory("bar", "bar"),
"baz", new LongMaxAggregatorFactory("baz", "baz")
),
null,
null,
null
);
Assert.assertEquals(
expectedMergedAnalysis,
SegmentMetadataQueryQueryToolChest.finalizeAnalysis(
SegmentMetadataQueryQueryToolChest.mergeAnalyses(
new UnionDataSource(
ImmutableList.of(
new TableDataSource("foo"),
new TableDataSource("dummy")
)
).getTableNames(),
analysis1,
analysis2,
AggregatorMergeStrategy.LATEST
)
)
);
Assert.assertEquals(
expectedMergedAnalysis,
SegmentMetadataQueryQueryToolChest.finalizeAnalysis(
SegmentMetadataQueryQueryToolChest.mergeAnalyses(
new UnionDataSource(
ImmutableList.of(
new TableDataSource("dummy"),
new TableDataSource("foo"),
new TableDataSource("bar")
)
).getTableNames(),
analysis1,
analysis2,
AggregatorMergeStrategy.LATEST
)
)
);
}
@Test
public void testMergeWithNullAnalyses()
{
final SegmentAnalysis analysis1 = new SegmentAnalysis(
TEST_SEGMENT_ID1.toString(),
null,
new LinkedHashMap<>(),
0,
0,
null,
null,
null,
null
);
final SegmentAnalysis analysis2 = new SegmentAnalysis(
TEST_SEGMENT_ID2.toString(),
null,
new LinkedHashMap<>(),
0,
0,
null,
null,
null,
false
);
Assert.assertEquals(
analysis1,
SegmentMetadataQueryQueryToolChest
.mergeAnalyses(TEST_DATASOURCE.getTableNames(), analysis1, null, AggregatorMergeStrategy.STRICT)
);
Assert.assertEquals(
analysis2,
SegmentMetadataQueryQueryToolChest
.mergeAnalyses(TEST_DATASOURCE.getTableNames(), null, analysis2, AggregatorMergeStrategy.STRICT)
);
Assert.assertNull(
SegmentMetadataQueryQueryToolChest
.mergeAnalyses(TEST_DATASOURCE.getTableNames(), null, null, AggregatorMergeStrategy.STRICT)
);
Assert.assertNull(
SegmentMetadataQueryQueryToolChest
.mergeAnalyses(TEST_DATASOURCE.getTableNames(), null, null, AggregatorMergeStrategy.LENIENT)
);
Assert.assertNull(
SegmentMetadataQueryQueryToolChest
.mergeAnalyses(TEST_DATASOURCE.getTableNames(), null, null, AggregatorMergeStrategy.EARLIEST)
);
Assert.assertNull(
SegmentMetadataQueryQueryToolChest
.mergeAnalyses(TEST_DATASOURCE.getTableNames(), null, null, AggregatorMergeStrategy.LATEST)
);
}
private static SegmentAnalysis mergeStrict(SegmentAnalysis analysis1, SegmentAnalysis analysis2)
{
return SegmentMetadataQueryQueryToolChest.finalizeAnalysis(
@ -1066,6 +1430,18 @@ public class SegmentMetadataQueryQueryToolChestTest
);
}
private static SegmentAnalysis mergeEarliest(SegmentAnalysis analysis1, SegmentAnalysis analysis2)
{
return SegmentMetadataQueryQueryToolChest.finalizeAnalysis(
SegmentMetadataQueryQueryToolChest.mergeAnalyses(
TEST_DATASOURCE.getTableNames(),
analysis1,
analysis2,
AggregatorMergeStrategy.EARLIEST
)
);
}
private static SegmentAnalysis mergeLatest(SegmentAnalysis analysis1, SegmentAnalysis analysis2)
{
return SegmentMetadataQueryQueryToolChest.finalizeAnalysis(

View File

@ -96,6 +96,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
QueryRunnerTestHelper.NOOP_QUERYWATCHER
);
private static final ObjectMapper MAPPER = new DefaultObjectMapper();
private static final String DATASOURCE = "testDatasource";
@SuppressWarnings("unchecked")
public static QueryRunner makeMMappedQueryRunner(
@ -176,8 +177,8 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
boolean bitmaps
)
{
final SegmentId id1 = SegmentId.dummy(differentIds ? "testSegment1" : "testSegment");
final SegmentId id2 = SegmentId.dummy(differentIds ? "testSegment2" : "testSegment");
final SegmentId id1 = SegmentId.dummy(differentIds ? "testSegment1" : DATASOURCE);
final SegmentId id2 = SegmentId.dummy(differentIds ? "testSegment2" : DATASOURCE);
this.runner1 = mmap1
? makeMMappedQueryRunner(id1, rollup1, bitmaps, FACTORY)
: makeIncrementalIndexQueryRunner(id1, rollup1, bitmaps, FACTORY);
@ -191,14 +192,15 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
this.differentIds = differentIds;
this.bitmaps = bitmaps;
testQuery = Druids.newSegmentMetadataQueryBuilder()
.dataSource("testing")
.dataSource(DATASOURCE)
.intervals("2013/2014")
.toInclude(new ListColumnIncluderator(Arrays.asList("__time", "index", "placement")))
.analysisTypes(
SegmentMetadataQuery.AnalysisType.CARDINALITY,
SegmentMetadataQuery.AnalysisType.SIZE,
SegmentMetadataQuery.AnalysisType.INTERVAL,
SegmentMetadataQuery.AnalysisType.MINMAX
SegmentMetadataQuery.AnalysisType.MINMAX,
SegmentMetadataQuery.AnalysisType.AGGREGATORS
)
.merge(true)
.build();
@ -213,6 +215,12 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
overallSize1 = mmap1 ? 201345 : 200831;
overallSize2 = mmap2 ? 201345 : 200831;
}
final Map<String, AggregatorFactory> expectedAggregators = new HashMap<>();
for (AggregatorFactory agg : TestIndex.METRIC_AGGS) {
expectedAggregators.put(agg.getName(), agg.getCombiningFactory());
}
expectedSegmentAnalysis1 = new SegmentAnalysis(
id1.toString(),
ImmutableList.of(Intervals.of("2011-01-12T00:00:00.000Z/2011-04-15T00:00:00.001Z")),
@ -258,7 +266,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
),
overallSize1,
1209,
null,
expectedAggregators,
null,
null,
null
@ -309,7 +317,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
// null_column will be included only for incremental index, which makes a little bigger result than expected
overallSize2,
1209,
null,
expectedAggregators,
null,
null,
null
@ -329,7 +337,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
public void testSegmentMetadataQueryWithRollupMerge()
{
SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis(
differentIds ? "merged" : SegmentId.dummy("testSegment").toString(),
differentIds ? "merged" : SegmentId.dummy(DATASOURCE).toString(),
null,
new LinkedHashMap<>(
ImmutableMap.of(
@ -385,7 +393,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
SegmentMetadataQuery query = Druids
.newSegmentMetadataQueryBuilder()
.dataSource("testing")
.dataSource(DATASOURCE)
.intervals("2013/2014")
.toInclude(new ListColumnIncluderator(Arrays.asList("placement", "placementish")))
.analysisTypes(SegmentMetadataQuery.AnalysisType.ROLLUP)
@ -403,7 +411,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
public void testSegmentMetadataQueryWithHasMultipleValuesMerge()
{
SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis(
differentIds ? "merged" : SegmentId.dummy("testSegment").toString(),
differentIds ? "merged" : SegmentId.dummy(DATASOURCE).toString(),
null,
new LinkedHashMap<>(
ImmutableMap.of(
@ -459,7 +467,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
SegmentMetadataQuery query = Druids
.newSegmentMetadataQueryBuilder()
.dataSource("testing")
.dataSource(DATASOURCE)
.intervals("2013/2014")
.toInclude(new ListColumnIncluderator(Arrays.asList("placement", "placementish")))
.analysisTypes(SegmentMetadataQuery.AnalysisType.CARDINALITY)
@ -477,7 +485,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
public void testSegmentMetadataQueryWithComplexColumnMerge()
{
SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis(
differentIds ? "merged" : SegmentId.dummy("testSegment").toString(),
differentIds ? "merged" : SegmentId.dummy(DATASOURCE).toString(),
null,
new LinkedHashMap<>(
ImmutableMap.of(
@ -533,7 +541,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
SegmentMetadataQuery query = Druids
.newSegmentMetadataQueryBuilder()
.dataSource("testing")
.dataSource(DATASOURCE)
.intervals("2013/2014")
.toInclude(new ListColumnIncluderator(Arrays.asList("placement", "quality_uniques")))
.analysisTypes(SegmentMetadataQuery.AnalysisType.CARDINALITY)
@ -621,8 +629,13 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
ColumnAnalysis analysis
)
{
final Map<String, AggregatorFactory> expectedAggregators = new HashMap<>();
for (AggregatorFactory agg : TestIndex.METRIC_AGGS) {
expectedAggregators.put(agg.getName(), agg.getCombiningFactory());
}
SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis(
differentIds ? "merged" : SegmentId.dummy("testSegment").toString(),
differentIds ? "merged" : SegmentId.dummy(DATASOURCE).toString(),
ImmutableList.of(expectedSegmentAnalysis1.getIntervals().get(0)),
new LinkedHashMap<>(
ImmutableMap.of(
@ -656,7 +669,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
),
expectedSegmentAnalysis1.getSize() + expectedSegmentAnalysis2.getSize(),
expectedSegmentAnalysis1.getNumRows() + expectedSegmentAnalysis2.getNumRows(),
null,
expectedAggregators,
null,
null,
null
@ -692,7 +705,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
public void testSegmentMetadataQueryWithNoAnalysisTypesMerge()
{
SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis(
differentIds ? "merged" : SegmentId.dummy("testSegment").toString(),
differentIds ? "merged" : SegmentId.dummy(DATASOURCE).toString(),
null,
new LinkedHashMap<>(
ImmutableMap.of(
@ -736,7 +749,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
SegmentMetadataQuery query = Druids
.newSegmentMetadataQueryBuilder()
.dataSource("testing")
.dataSource(DATASOURCE)
.intervals("2013/2014")
.toInclude(new ListColumnIncluderator(Collections.singletonList("placement")))
.analysisTypes()
@ -758,7 +771,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
expectedAggregators.put(agg.getName(), agg.getCombiningFactory());
}
SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis(
differentIds ? "merged" : SegmentId.dummy("testSegment").toString(),
differentIds ? "merged" : SegmentId.dummy(DATASOURCE).toString(),
null,
new LinkedHashMap<>(
ImmutableMap.of(
@ -802,7 +815,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
SegmentMetadataQuery query = Druids
.newSegmentMetadataQueryBuilder()
.dataSource("testing")
.dataSource(DATASOURCE)
.intervals("2013/2014")
.toInclude(new ListColumnIncluderator(Collections.singletonList("placement")))
.analysisTypes(SegmentMetadataQuery.AnalysisType.AGGREGATORS)
@ -824,7 +837,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
expectedAggregators.put(agg.getName(), agg.getCombiningFactory());
}
SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis(
differentIds ? "merged" : SegmentId.dummy("testSegment").toString(),
differentIds ? "merged" : SegmentId.dummy(DATASOURCE).toString(),
null,
new LinkedHashMap<>(
ImmutableMap.of(
@ -868,7 +881,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
SegmentMetadataQuery query = Druids
.newSegmentMetadataQueryBuilder()
.dataSource("testing222")
.dataSource(DATASOURCE)
.intervals("2013/2014")
.toInclude(new ListColumnIncluderator(Collections.singletonList("placement")))
.analysisTypes(SegmentMetadataQuery.AnalysisType.AGGREGATORS)
@ -887,7 +900,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
public void testSegmentMetadataQueryWithTimestampSpecMerge()
{
SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis(
differentIds ? "merged" : SegmentId.dummy("testSegment").toString(),
differentIds ? "merged" : SegmentId.dummy(DATASOURCE).toString(),
null,
new LinkedHashMap<>(
ImmutableMap.of(
@ -931,7 +944,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
SegmentMetadataQuery query = Druids
.newSegmentMetadataQueryBuilder()
.dataSource("testing")
.dataSource(DATASOURCE)
.intervals("2013/2014")
.toInclude(new ListColumnIncluderator(Collections.singletonList("placement")))
.analysisTypes(SegmentMetadataQuery.AnalysisType.TIMESTAMPSPEC)
@ -949,7 +962,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
public void testSegmentMetadataQueryWithQueryGranularityMerge()
{
SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis(
differentIds ? "merged" : SegmentId.dummy("testSegment").toString(),
differentIds ? "merged" : SegmentId.dummy(DATASOURCE).toString(),
null,
new LinkedHashMap<>(
ImmutableMap.of(
@ -993,7 +1006,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
SegmentMetadataQuery query = Druids
.newSegmentMetadataQueryBuilder()
.dataSource("testing")
.dataSource(DATASOURCE)
.intervals("2013/2014")
.toInclude(new ListColumnIncluderator(Collections.singletonList("placement")))
.analysisTypes(SegmentMetadataQuery.AnalysisType.QUERYGRANULARITY)
@ -1150,7 +1163,7 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
public void testDefaultIntervalAndFiltering()
{
SegmentMetadataQuery testQuery = Druids.newSegmentMetadataQueryBuilder()
.dataSource("testing")
.dataSource(DATASOURCE)
.toInclude(new ListColumnIncluderator(Collections.singletonList("placement")))
.merge(true)
.build();
@ -1410,12 +1423,12 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
public void testCacheKeyWithListColumnIncluderator()
{
SegmentMetadataQuery oneColumnQuery = Druids.newSegmentMetadataQueryBuilder()
.dataSource("testing")
.dataSource(DATASOURCE)
.toInclude(new ListColumnIncluderator(Collections.singletonList("foo")))
.build();
SegmentMetadataQuery twoColumnQuery = Druids.newSegmentMetadataQueryBuilder()
.dataSource("testing")
.dataSource(DATASOURCE)
.toInclude(new ListColumnIncluderator(Arrays.asList("fo", "o")))
.build();
@ -1436,12 +1449,12 @@ public class SegmentMetadataQueryTest extends InitializedNullHandlingTest
public void testAnanlysisTypesBeingSet()
{
SegmentMetadataQuery query1 = Druids.newSegmentMetadataQueryBuilder()
.dataSource("testing")
.dataSource(DATASOURCE)
.toInclude(new ListColumnIncluderator(Collections.singletonList("foo")))
.build();
SegmentMetadataQuery query2 = Druids.newSegmentMetadataQueryBuilder()
.dataSource("testing")
.dataSource(DATASOURCE)
.toInclude(new ListColumnIncluderator(Collections.singletonList("foo")))
.analysisTypes(SegmentMetadataQuery.AnalysisType.MINMAX)
.build();