From ed322a4bebc89ba666adefaf9c1e6e298e44be35 Mon Sep 17 00:00:00 2001 From: Himanshu Date: Tue, 13 Dec 2016 20:01:21 -0600 Subject: [PATCH] remove size from default analysisTypes list for segmentMetadata query (#3773) --- docs/content/querying/segmentmetadataquery.md | 2 +- .../resources/hadoop/batch_hadoop_queries.json | 8 ++++---- .../resources/queries/twitterstream_queries.json | 12 ++++++------ .../queries/wikipedia_editstream_queries.json | 6 +++--- .../metadata/metadata/SegmentMetadataQuery.java | 1 - .../druid/query/metadata/SegmentAnalyzerTest.java | 15 +++------------ .../SegmentMetadataQueryQueryToolChestTest.java | 2 +- .../query/metadata/SegmentMetadataQueryTest.java | 7 ++++++- .../metadata/SegmentMetadataUnionQueryTest.java | 6 ++++++ 9 files changed, 30 insertions(+), 29 deletions(-) diff --git a/docs/content/querying/segmentmetadataquery.md b/docs/content/querying/segmentmetadataquery.md index 9189e1fc83c..820db6cadfc 100644 --- a/docs/content/querying/segmentmetadataquery.md +++ b/docs/content/querying/segmentmetadataquery.md @@ -32,7 +32,7 @@ There are several main parts to a segment metadata query: |toInclude|A JSON Object representing what columns should be included in the result. Defaults to "all".|no| |merge|Merge all individual segment metadata results into a single result|no| |context|See [Context](../querying/query-context.html)|no| -|analysisTypes|A list of Strings specifying what column properties (e.g. cardinality, size) should be calculated and returned in the result. Defaults to ["cardinality", "size", "interval", "minmax"]. See section [analysisTypes](#analysistypes) for more details.|no| +|analysisTypes|A list of Strings specifying what column properties (e.g. cardinality, size) should be calculated and returned in the result. Defaults to ["cardinality", "interval", "minmax"]. See section [analysisTypes](#analysistypes) for more details.|no| |lenientAggregatorMerge|If true, and if the "aggregators" analysisType is enabled, aggregators will be merged leniently. See below for details.|no| The format of the result is: diff --git a/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json b/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json index ded986fc55c..40101e23f22 100644 --- a/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json +++ b/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json @@ -16,7 +16,7 @@ "columns": { "location": { "type": "STRING", - "size": 10140, + "size": 0, "hasMultipleValues": false, "minValue": "location_1", "maxValue": "location_5", @@ -43,7 +43,7 @@ }, "__time": { "type": "LONG", - "size": 10140, + "size": 0, "hasMultipleValues": false, "minValue": null, "maxValue": null, @@ -52,7 +52,7 @@ }, "product": { "type": "STRING", - "size": 9531, + "size": 0, "hasMultipleValues": false, "minValue": "product_1", "maxValue": "product_9", @@ -60,7 +60,7 @@ "errorMessage": null } }, - "size": 34881, + "size": 0, "numRows": 1014, "aggregators": null, "timestampSpec": null, diff --git a/integration-tests/src/test/resources/queries/twitterstream_queries.json b/integration-tests/src/test/resources/queries/twitterstream_queries.json index fdc1a513492..901360e716c 100644 --- a/integration-tests/src/test/resources/queries/twitterstream_queries.json +++ b/integration-tests/src/test/resources/queries/twitterstream_queries.json @@ -598,14 +598,14 @@ "has_links": { "type": "STRING", "hasMultipleValues": false, - "size": 7773438, + "size": 0, "cardinality": 2, "minValue":"No", "maxValue":"Yes", "errorMessage": null } }, - "size": 747056474, + "size": 0, "numRows": 3702583, "aggregators": null, "timestampSpec": null, @@ -619,14 +619,14 @@ "has_links": { "type": "STRING", "hasMultipleValues": false, - "size": 7901000, + "size": 0, "cardinality": 2, "minValue":"No", "maxValue":"Yes", "errorMessage": null } }, - "size": 755796690, + "size": 0, "numRows": 3743002, "aggregators": null, "timestampSpec": null, @@ -640,14 +640,14 @@ "has_links": { "type": "STRING", "hasMultipleValues": false, - "size": 7405654, + "size": 0, "cardinality": 2, "minValue":"No", "maxValue":"Yes", "errorMessage": null } }, - "size": 706893542, + "size": 0, "numRows":3502959, "aggregators": null, "timestampSpec": null, diff --git a/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json b/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json index 5873a87d95b..1fe441252c7 100644 --- a/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json +++ b/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json @@ -1048,7 +1048,7 @@ "country_name": { "type": "STRING", "hasMultipleValues": false, - "size": 41922148, + "size": 0, "cardinality": 208, "minValue":"", "maxValue":"mmx._unknown", @@ -1057,14 +1057,14 @@ "language": { "type": "STRING", "hasMultipleValues": false, - "size": 8924222, + "size": 0, "cardinality": 36, "minValue":"ar", "maxValue":"zh", "errorMessage": null } }, - "size": 902457341, + "size": 0, "numRows": 4462111, "aggregators": null, "timestampSpec": null, diff --git a/processing/src/main/java/io/druid/query/metadata/metadata/SegmentMetadataQuery.java b/processing/src/main/java/io/druid/query/metadata/metadata/SegmentMetadataQuery.java index dbac143ba3f..3d9ab5b117e 100644 --- a/processing/src/main/java/io/druid/query/metadata/metadata/SegmentMetadataQuery.java +++ b/processing/src/main/java/io/druid/query/metadata/metadata/SegmentMetadataQuery.java @@ -86,7 +86,6 @@ public class SegmentMetadataQuery extends BaseQuery public static final EnumSet DEFAULT_ANALYSIS_TYPES = EnumSet.of( AnalysisType.CARDINALITY, - AnalysisType.SIZE, AnalysisType.INTERVAL, AnalysisType.MINMAX ); diff --git a/processing/src/test/java/io/druid/query/metadata/SegmentAnalyzerTest.java b/processing/src/test/java/io/druid/query/metadata/SegmentAnalyzerTest.java index 748ccb634eb..5c927be6b2f 100644 --- a/processing/src/test/java/io/druid/query/metadata/SegmentAnalyzerTest.java +++ b/processing/src/test/java/io/druid/query/metadata/SegmentAnalyzerTest.java @@ -91,11 +91,7 @@ public class SegmentAnalyzerTest final ColumnAnalysis columnAnalysis = columns.get(metric); Assert.assertEquals(metric, ValueType.FLOAT.name(), columnAnalysis.getType()); - if (analyses == null) { - Assert.assertTrue(metric, columnAnalysis.getSize() > 0); - } else { - Assert.assertEquals(metric, 0, columnAnalysis.getSize()); - } + Assert.assertEquals(metric, 0, columnAnalysis.getSize()); Assert.assertNull(metric, columnAnalysis.getCardinality()); } } @@ -131,12 +127,11 @@ public class SegmentAnalyzerTest Assert.assertNull(columnAnalysis); } else { Assert.assertEquals(dimension, ValueType.STRING.name(), columnAnalysis.getType()); + Assert.assertEquals(dimension, 0, columnAnalysis.getSize()); if (analyses == null) { - Assert.assertTrue(dimension, columnAnalysis.getSize() > 0); Assert.assertTrue(dimension, columnAnalysis.getCardinality() > 0); } else { Assert.assertEquals(dimension, 0, columnAnalysis.getCardinality().longValue()); - Assert.assertEquals(dimension, 0, columnAnalysis.getSize()); } } } @@ -145,11 +140,7 @@ public class SegmentAnalyzerTest final ColumnAnalysis columnAnalysis = columns.get(metric); Assert.assertEquals(metric, ValueType.FLOAT.name(), columnAnalysis.getType()); - if (analyses == null) { - Assert.assertTrue(metric, columnAnalysis.getSize() > 0); - } else { - Assert.assertEquals(metric, 0, columnAnalysis.getSize()); - } + Assert.assertEquals(metric, 0, columnAnalysis.getSize()); Assert.assertNull(metric, columnAnalysis.getCardinality()); } } diff --git a/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java b/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java index 24da3e4b9b1..8449f3e39d1 100644 --- a/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java +++ b/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java @@ -63,7 +63,7 @@ public class SegmentMetadataQueryQueryToolChestTest new SegmentMetadataQueryQueryToolChest(null).getCacheStrategy(query); // Test cache key generation - byte[] expectedKey = {0x04, 0x01, (byte) 0xFF, 0x00, 0x01, 0x02, 0x04}; + byte[] expectedKey = {0x04, 0x01, (byte) 0xFF, 0x00, 0x02, 0x04}; byte[] actualKey = strategy.computeCacheKey(query); Assert.assertArrayEquals(expectedKey, actualKey); diff --git a/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryTest.java b/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryTest.java index e312399d2f0..d84db7f6f33 100644 --- a/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryTest.java +++ b/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryTest.java @@ -154,7 +154,12 @@ public class SegmentMetadataQueryTest .dataSource("testing") .intervals("2013/2014") .toInclude(new ListColumnIncluderator(Arrays.asList("__time", "index", "placement"))) - .analysisTypes(null) + .analysisTypes( + SegmentMetadataQuery.AnalysisType.CARDINALITY, + SegmentMetadataQuery.AnalysisType.SIZE, + SegmentMetadataQuery.AnalysisType.INTERVAL, + SegmentMetadataQuery.AnalysisType.MINMAX + ) .merge(true) .build(); diff --git a/processing/src/test/java/io/druid/query/metadata/SegmentMetadataUnionQueryTest.java b/processing/src/test/java/io/druid/query/metadata/SegmentMetadataUnionQueryTest.java index e6b2a0bb90e..701d28159bd 100644 --- a/processing/src/test/java/io/druid/query/metadata/SegmentMetadataUnionQueryTest.java +++ b/processing/src/test/java/io/druid/query/metadata/SegmentMetadataUnionQueryTest.java @@ -121,6 +121,12 @@ public class SegmentMetadataUnionQueryTest .dataSource(QueryRunnerTestHelper.unionDataSource) .intervals(QueryRunnerTestHelper.fullOnInterval) .toInclude(new ListColumnIncluderator(Lists.newArrayList("placement"))) + .analysisTypes( + SegmentMetadataQuery.AnalysisType.CARDINALITY, + SegmentMetadataQuery.AnalysisType.SIZE, + SegmentMetadataQuery.AnalysisType.INTERVAL, + SegmentMetadataQuery.AnalysisType.MINMAX + ) .build(); List result = Sequences.toList(runner.run(query, Maps.newHashMap()), Lists.newArrayList()); TestHelper.assertExpectedObjects(ImmutableList.of(expected), result, "failed SegmentMetadata union query");