From 5baa22148e7d8c1ff0919e516656ec460457ab6c Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 10 Nov 2021 18:46:29 -0800 Subject: [PATCH] revert ColumnAnalysis type, add typeSignature and use it for DruidSchema (#11895) * revert ColumnAnalysis type, add typeSignature and use it for DruidSchema * review stuffs * maybe null * better maybe null * Update docs/querying/segmentmetadataquery.md * Update docs/querying/segmentmetadataquery.md Co-authored-by: Charles Smith * fix null right * sad * oops * Update batch_hadoop_queries.json Co-authored-by: Charles Smith --- docs/querying/segmentmetadataquery.md | 8 +- .../hadoop/batch_hadoop_queries.json | 5 + .../queries/twitterstream_queries.json | 3 + .../queries/wikipedia_editstream_queries.json | 2 + .../druid/query/metadata/SegmentAnalyzer.java | 18 +- .../SegmentMetadataQueryQueryToolChest.java | 52 +---- .../metadata/metadata/ColumnAnalysis.java | 47 +++- .../metadata/metadata/ColumnIncluderator.java | 4 +- .../metadata/SegmentMetadataQuery.java | 32 +-- .../apache/druid/query/DoubleStorageTest.java | 19 +- .../query/metadata/SegmentAnalyzerTest.java | 3 +- ...egmentMetadataQueryQueryToolChestTest.java | 6 +- .../metadata/SegmentMetadataQueryTest.java | 27 ++- .../SegmentMetadataUnionQueryTest.java | 2 + .../metadata/metadata/ColumnAnalysisTest.java | 204 ++++++++++++++++-- .../druid/sql/calcite/schema/DruidSchema.java | 10 +- 16 files changed, 317 insertions(+), 125 deletions(-) diff --git a/docs/querying/segmentmetadataquery.md b/docs/querying/segmentmetadataquery.md index 779d51cd95c..b7ae9377e5a 100644 --- a/docs/querying/segmentmetadataquery.md +++ b/docs/querying/segmentmetadataquery.md @@ -87,9 +87,11 @@ The format of the result is: } ] ``` -Dimension columns will have type `STRING`, `FLOAT`, `DOUBLE`, or `LONG`. -Metric columns will have type `FLOAT`, `DOUBLE`, or `LONG`, or the name of the underlying complex type such as `hyperUnique` in case of COMPLEX metric. -Timestamp column will have type `LONG`. +All columns contain a `typeSignature` that Druid uses to represent the column type information internally. The `typeSignature` is typically the same value used to identify the JSON type information at query or ingest time. One of: `STRING`, `FLOAT`, `DOUBLE`, `LONG`, or `COMPLEX`, e.g. `COMPLEX`. + +Columns also have a legacy `type` name. For some column types, the value may match the `typeSignature` (`STRING`, `FLOAT`, `DOUBLE`, or `LONG`). For `COMPLEX` columns, the `type` only contains the name of the underlying complex type such as `hyperUnique`. + +New applications should use `typeSignature`, not `type`. If the `errorMessage` field is non-null, you should not trust the other fields in the response. Their contents are undefined. diff --git a/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json b/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json index 2a390b2ec1f..50397b1d7f4 100644 --- a/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json +++ b/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json @@ -15,6 +15,7 @@ "id": "merged", "columns": { "location": { + "typeSignature": "STRING", "type": "STRING", "size": 0, "hasMultipleValues": false, @@ -25,6 +26,7 @@ "errorMessage": null }, "user_id_sketch": { + "typeSignature": "COMPLEX", "type": "thetaSketch", "size": 0, "hasMultipleValues": false, @@ -35,6 +37,7 @@ "errorMessage": null }, "other_metric": { + "typeSignature": "COMPLEX", "type": "thetaSketch", "size": 0, "hasMultipleValues": false, @@ -45,6 +48,7 @@ "errorMessage": null }, "__time": { + "typeSignature": "LONG", "type": "LONG", "size": 0, "hasMultipleValues": false, @@ -55,6 +59,7 @@ "errorMessage": null }, "product": { + "typeSignature": "STRING", "type": "STRING", "size": 0, "hasMultipleValues": false, diff --git a/integration-tests/src/test/resources/queries/twitterstream_queries.json b/integration-tests/src/test/resources/queries/twitterstream_queries.json index a0796fd4f9b..cdd4057eb57 100644 --- a/integration-tests/src/test/resources/queries/twitterstream_queries.json +++ b/integration-tests/src/test/resources/queries/twitterstream_queries.json @@ -596,6 +596,7 @@ "intervals":["2013-01-01T00:00:00.000Z/2013-01-02T00:00:00.000Z"], "columns":{ "has_links":{ + "typeSignature": "STRING", "type":"STRING", "hasMultipleValues":false, "size":0, @@ -618,6 +619,7 @@ "intervals":["2013-01-02T00:00:00.000Z/2013-01-03T00:00:00.000Z"], "columns":{ "has_links":{ + "typeSignature": "STRING", "type":"STRING", "hasMultipleValues":false, "size":0, @@ -640,6 +642,7 @@ "intervals":["2013-01-03T00:00:00.000Z/2013-01-04T00:00:00.000Z"], "columns":{ "has_links":{ + "typeSignature": "STRING", "type":"STRING", "hasMultipleValues":false, "size":0, diff --git a/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json b/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json index 90cd3cbc0e1..59a5c6ca70b 100644 --- a/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json +++ b/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json @@ -1402,6 +1402,7 @@ "intervals":["2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z"], "columns":{ "country_name":{ + "typeSignature": "STRING", "type":"STRING", "hasMultipleValues":false, "size":0, @@ -1412,6 +1413,7 @@ "hasNulls":true }, "language":{ + "typeSignature": "STRING", "type":"STRING", "hasMultipleValues":false, "size":0, diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index 876a5a169fa..600b773540c 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -43,6 +43,7 @@ import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.ColumnTypeFactory; import org.apache.druid.segment.column.ComplexColumn; import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.TypeSignature; @@ -194,7 +195,8 @@ public class SegmentAnalyzer } return new ColumnAnalysis( - capabilities.asTypeString(), + capabilities.toColumnType(), + capabilities.getType().name(), capabilities.hasMultipleValues().isTrue(), capabilities.hasNulls().isMaybeTrue(), // if we don't know for sure, then we should plan to check for nulls size, @@ -250,7 +252,8 @@ public class SegmentAnalyzer } return new ColumnAnalysis( - capabilities.asTypeString(), + capabilities.toColumnType(), + capabilities.getType().name(), capabilities.hasMultipleValues().isTrue(), capabilities.hasNulls().isMaybeTrue(), // if we don't know for sure, then we should plan to check for nulls size, @@ -328,7 +331,8 @@ public class SegmentAnalyzer } return new ColumnAnalysis( - capabilities.asTypeString(), + capabilities.toColumnType(), + capabilities.getType().name(), capabilities.hasMultipleValues().isTrue(), capabilities.hasNulls().isMaybeTrue(), // if we don't know for sure, then we should plan to check for nulls size, @@ -347,8 +351,6 @@ public class SegmentAnalyzer final TypeSignature typeSignature = capabilities == null ? ColumnType.UNKNOWN_COMPLEX : capabilities; final String typeName = typeSignature.getComplexTypeName(); - // serialize using asTypeString (which is also used for JSON so can easily round-trip complex type info back into ColumnType) - final String serdeTypeName = typeSignature.asTypeString(); try (final ComplexColumn complexColumn = columnHolder != null ? (ComplexColumn) columnHolder.getColumn() : null) { final boolean hasMultipleValues = capabilities != null && capabilities.hasMultipleValues().isTrue(); final boolean hasNulls = capabilities != null && capabilities.hasNulls().isMaybeTrue(); @@ -363,7 +365,8 @@ public class SegmentAnalyzer final Function inputSizeFn = serde.inputSizeFn(); if (inputSizeFn == null) { return new ColumnAnalysis( - serdeTypeName, + ColumnTypeFactory.ofType(typeSignature), + typeName, hasMultipleValues, hasNulls, 0, @@ -381,7 +384,8 @@ public class SegmentAnalyzer } return new ColumnAnalysis( - serdeTypeName, + ColumnTypeFactory.ofType(typeSignature), + typeName, hasMultipleValues, hasNulls, size, diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java index a40bd69ee55..1bb24ef2e71 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java @@ -23,7 +23,6 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Functions; -import com.google.common.base.Predicate; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; @@ -58,8 +57,6 @@ import org.apache.druid.timeline.LogicalSegment; import org.joda.time.DateTime; import org.joda.time.Interval; -import javax.annotation.Nullable; -import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; @@ -75,16 +72,10 @@ public class SegmentMetadataQueryQueryToolChest extends QueryToolChest TYPE_REFERENCE = new TypeReference() { }; - private static final byte[] SEGMENT_METADATA_CACHE_PREFIX = new byte[]{0x4}; + private static final byte SEGMENT_METADATA_CACHE_PREFIX = 0x4; private static final byte SEGMENT_METADATA_QUERY = 0x16; - private static final Function MERGE_TRANSFORM_FN = new Function() - { - @Override - public SegmentAnalysis apply(SegmentAnalysis analysis) - { - return finalizeAnalysis(analysis); - } - }; + private static final Function MERGE_TRANSFORM_FN = + SegmentMetadataQueryQueryToolChest::finalizeAnalysis; private final SegmentMetadataQueryConfig config; private final GenericQueryMetricsFactory queryMetricsFactory; @@ -195,13 +186,9 @@ public class SegmentMetadataQueryQueryToolChest extends QueryToolChest prepareForCache(boolean isResultLevelCache) { - return new Function() - { - @Override - public SegmentAnalysis apply(@Nullable SegmentAnalysis input) - { - return input; - } - }; + return input -> input; } @Override public Function pullFromCache(boolean isResultLevelCache) { - return new Function() - { - @Override - public SegmentAnalysis apply(@Nullable SegmentAnalysis input) - { - return input; - } - }; + return input -> input; } }; } @@ -266,14 +239,7 @@ public class SegmentMetadataQueryQueryToolChest extends QueryToolChest() - { - @Override - public boolean apply(T input) - { - return (input.getInterval().overlaps(targetInterval)); - } - } + input -> (input.getInterval().overlaps(targetInterval)) ) ); } diff --git a/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java b/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java index 4edb4b7517d..4d68cfa32d4 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java @@ -22,6 +22,8 @@ package org.apache.druid.query.metadata.metadata; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeInfo; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.segment.column.ColumnType; import java.util.Objects; @@ -33,10 +35,11 @@ public class ColumnAnalysis public static ColumnAnalysis error(String reason) { - return new ColumnAnalysis("STRING", false, false, -1, null, null, null, ERROR_PREFIX + reason); + return new ColumnAnalysis(ColumnType.STRING, "STRING", false, false, -1, null, null, null, ERROR_PREFIX + reason); } private final String type; + private final ColumnType typeSignature; private final boolean hasMultipleValues; private final boolean hasNulls; private final long size; @@ -47,6 +50,7 @@ public class ColumnAnalysis @JsonCreator public ColumnAnalysis( + @JsonProperty("typeSignature") ColumnType typeSignature, @JsonProperty("type") String type, @JsonProperty("hasMultipleValues") boolean hasMultipleValues, @JsonProperty("hasNulls") boolean hasNulls, @@ -57,6 +61,7 @@ public class ColumnAnalysis @JsonProperty("errorMessage") String errorMessage ) { + this.typeSignature = typeSignature; this.type = type; this.hasMultipleValues = hasMultipleValues; this.hasNulls = hasNulls; @@ -68,6 +73,13 @@ public class ColumnAnalysis } @JsonProperty + public ColumnType getTypeSignature() + { + return typeSignature; + } + + @JsonProperty + @Deprecated public String getType() { return type; @@ -135,8 +147,20 @@ public class ColumnAnalysis return rhs; } - if (!type.equals(rhs.getType())) { - return ColumnAnalysis.error("cannot_merge_diff_types"); + if (!Objects.equals(type, rhs.getType())) { + return ColumnAnalysis.error( + StringUtils.format("cannot_merge_diff_types: [%s] and [%s]", type, rhs.getType()) + ); + } + + if (!Objects.equals(typeSignature, rhs.getTypeSignature())) { + return ColumnAnalysis.error( + StringUtils.format( + "cannot_merge_diff_types: [%s] and [%s]", + typeSignature.asTypeString(), + rhs.getTypeSignature().asTypeString() + ) + ); } Integer cardinality = getCardinality(); @@ -153,6 +177,7 @@ public class ColumnAnalysis Comparable newMax = choose(maxValue, rhs.maxValue, true); return new ColumnAnalysis( + typeSignature, type, multipleValues, hasNulls || rhs.hasNulls, @@ -180,7 +205,8 @@ public class ColumnAnalysis public String toString() { return "ColumnAnalysis{" + - "type='" + type + '\'' + + "typeSignature='" + typeSignature + '\'' + + ", type=" + type + ", hasMultipleValues=" + hasMultipleValues + ", hasNulls=" + hasNulls + ", size=" + size + @@ -204,6 +230,7 @@ public class ColumnAnalysis return hasMultipleValues == that.hasMultipleValues && hasNulls == that.hasNulls && size == that.size && + Objects.equals(typeSignature, that.typeSignature) && Objects.equals(type, that.type) && Objects.equals(cardinality, that.cardinality) && Objects.equals(minValue, that.minValue) && @@ -214,6 +241,16 @@ public class ColumnAnalysis @Override public int hashCode() { - return Objects.hash(type, hasMultipleValues, hasNulls, size, cardinality, minValue, maxValue, errorMessage); + return Objects.hash( + typeSignature, + type, + hasMultipleValues, + hasNulls, + size, + cardinality, + minValue, + maxValue, + errorMessage + ); } } diff --git a/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnIncluderator.java b/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnIncluderator.java index 600b2773d2e..cf092f458d7 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnIncluderator.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnIncluderator.java @@ -21,6 +21,7 @@ package org.apache.druid.query.metadata.metadata; import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; +import org.apache.druid.java.util.common.Cacheable; /** */ @@ -30,8 +31,7 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo; @JsonSubTypes.Type(name = "all", value = AllColumnIncluderator.class), @JsonSubTypes.Type(name = "list", value = ListColumnIncluderator.class) }) -public interface ColumnIncluderator +public interface ColumnIncluderator extends Cacheable { boolean include(String columnName); - byte[] getCacheKey(); } diff --git a/processing/src/main/java/org/apache/druid/query/metadata/metadata/SegmentMetadataQuery.java b/processing/src/main/java/org/apache/druid/query/metadata/metadata/SegmentMetadataQuery.java index 3888f8c989b..746dc4f224b 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/metadata/SegmentMetadataQuery.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/metadata/SegmentMetadataQuery.java @@ -23,7 +23,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonValue; import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.druid.java.util.common.Cacheable; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.BaseQuery; @@ -38,7 +38,6 @@ import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; import org.apache.druid.query.spec.QuerySegmentSpec; import org.joda.time.Interval; -import java.nio.ByteBuffer; import java.util.EnumSet; import java.util.List; import java.util.Map; @@ -46,16 +45,9 @@ import java.util.Objects; public class SegmentMetadataQuery extends BaseQuery { - /** - * The SegmentMetadataQuery cache key may contain UTF-8 column name strings. - * Prepend 0xFF before the analysisTypes as a separator to avoid - * any potential confusion with string values. - */ - public static final byte[] ANALYSIS_TYPES_CACHE_PREFIX = new byte[] {(byte) 0xFF}; - private static final QuerySegmentSpec DEFAULT_SEGMENT_SPEC = new MultipleIntervalSegmentSpec(Intervals.ONLY_ETERNITY); - public enum AnalysisType + public enum AnalysisType implements Cacheable { CARDINALITY, SIZE, @@ -79,6 +71,7 @@ public class SegmentMetadataQuery extends BaseQuery return valueOf(StringUtils.toUpperCase(name)); } + @Override public byte[] getCacheKey() { return new byte[] {(byte) this.ordinal()}; @@ -193,25 +186,6 @@ public class SegmentMetadataQuery extends BaseQuery return analysisTypes.contains(AnalysisType.ROLLUP); } - public byte[] getAnalysisTypesCacheKey() - { - int size = 1; - List typeBytesList = Lists.newArrayListWithExpectedSize(analysisTypes.size()); - for (AnalysisType analysisType : analysisTypes) { - final byte[] bytes = analysisType.getCacheKey(); - typeBytesList.add(bytes); - size += bytes.length; - } - - final ByteBuffer bytes = ByteBuffer.allocate(size); - bytes.put(ANALYSIS_TYPES_CACHE_PREFIX); - for (byte[] typeBytes : typeBytesList) { - bytes.put(typeBytes); - } - - return bytes.array(); - } - @Override public Query withOverriddenContext(Map contextOverride) { diff --git a/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java b/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java index db7c57d2916..14baf31304c 100644 --- a/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java +++ b/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java @@ -52,6 +52,7 @@ import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndexSegment; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexSchema; @@ -156,7 +157,8 @@ public class DoubleStorageTest ImmutableMap.of( TIME_COLUMN, new ColumnAnalysis( - ValueType.LONG.toString(), + ColumnType.LONG, + ValueType.LONG.name(), false, false, 100, @@ -167,7 +169,8 @@ public class DoubleStorageTest ), DIM_NAME, new ColumnAnalysis( - ValueType.STRING.toString(), + ColumnType.STRING, + ValueType.STRING.name(), false, false, 120, @@ -178,7 +181,8 @@ public class DoubleStorageTest ), DIM_FLOAT_NAME, new ColumnAnalysis( - ValueType.DOUBLE.toString(), + ColumnType.DOUBLE, + ValueType.DOUBLE.name(), false, false, 80, @@ -201,7 +205,8 @@ public class DoubleStorageTest ImmutableMap.of( TIME_COLUMN, new ColumnAnalysis( - ValueType.LONG.toString(), + ColumnType.LONG, + ValueType.LONG.name(), false, false, 100, @@ -212,7 +217,8 @@ public class DoubleStorageTest ), DIM_NAME, new ColumnAnalysis( - ValueType.STRING.toString(), + ColumnType.STRING, + ValueType.STRING.name(), false, false, 120, @@ -223,7 +229,8 @@ public class DoubleStorageTest ), DIM_FLOAT_NAME, new ColumnAnalysis( - ValueType.FLOAT.toString(), + ColumnType.FLOAT, + ValueType.FLOAT.name(), false, false, 80, diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentAnalyzerTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentAnalyzerTest.java index 14ed7833f04..9e2781f466d 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentAnalyzerTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentAnalyzerTest.java @@ -295,7 +295,8 @@ public class SegmentAnalyzerTest extends InitializedNullHandlingTest Map analyses = analyzer.analyze(segment); ColumnAnalysis columnAnalysis = analyses.get(invalid_aggregator); Assert.assertFalse(columnAnalysis.isError()); - Assert.assertEquals("COMPLEX", columnAnalysis.getType()); + Assert.assertEquals("invalid_complex_column_type", columnAnalysis.getType()); + Assert.assertEquals(ColumnType.ofComplex("invalid_complex_column_type"), columnAnalysis.getTypeSignature()); } // Persist the index. diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java index 752db63d983..b93c160b290 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java @@ -37,6 +37,7 @@ import org.apache.druid.query.metadata.metadata.ColumnAnalysis; import org.apache.druid.query.metadata.metadata.SegmentAnalysis; import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery; import org.apache.druid.query.spec.LegacySegmentSpec; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ValueType; import org.apache.druid.timeline.LogicalSegment; import org.joda.time.Interval; @@ -69,7 +70,7 @@ public class SegmentMetadataQueryQueryToolChestTest new SegmentMetadataQueryQueryToolChest(new SegmentMetadataQueryConfig()).getCacheStrategy(query); // Test cache key generation - byte[] expectedKey = {0x04, 0x01, (byte) 0xFF, 0x00, 0x02, 0x04}; + byte[] expectedKey = {0x04, 0x09, 0x01, 0x0A, 0x00, 0x00, 0x00, 0x03, 0x00, 0x02, 0x04}; byte[] actualKey = strategy.computeCacheKey(query); Assert.assertArrayEquals(expectedKey, actualKey); @@ -79,7 +80,8 @@ public class SegmentMetadataQueryQueryToolChestTest ImmutableMap.of( "placement", new ColumnAnalysis( - ValueType.STRING.toString(), + ColumnType.STRING, + ValueType.STRING.name(), true, false, 10881, diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java index 3e6acc1b62f..499a061b348 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java @@ -52,6 +52,7 @@ import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndexSegment; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.TestIndex; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.timeline.LogicalSegment; @@ -207,6 +208,7 @@ public class SegmentMetadataQueryTest ImmutableMap.of( "__time", new ColumnAnalysis( + ColumnType.LONG, ValueType.LONG.toString(), false, false, @@ -218,6 +220,7 @@ public class SegmentMetadataQueryTest ), "index", new ColumnAnalysis( + ColumnType.DOUBLE, ValueType.DOUBLE.toString(), false, false, @@ -229,6 +232,7 @@ public class SegmentMetadataQueryTest ), "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -251,6 +255,7 @@ public class SegmentMetadataQueryTest ImmutableMap.of( "__time", new ColumnAnalysis( + ColumnType.LONG, ValueType.LONG.toString(), false, false, @@ -262,6 +267,7 @@ public class SegmentMetadataQueryTest ), "index", new ColumnAnalysis( + ColumnType.DOUBLE, ValueType.DOUBLE.toString(), false, false, @@ -273,6 +279,7 @@ public class SegmentMetadataQueryTest ), "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -310,6 +317,7 @@ public class SegmentMetadataQueryTest ImmutableMap.of( "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -321,6 +329,7 @@ public class SegmentMetadataQueryTest ), "placementish", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), true, false, @@ -380,6 +389,7 @@ public class SegmentMetadataQueryTest ImmutableMap.of( "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -391,6 +401,7 @@ public class SegmentMetadataQueryTest ), "placementish", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), true, false, @@ -450,6 +461,7 @@ public class SegmentMetadataQueryTest ImmutableMap.of( "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -461,7 +473,8 @@ public class SegmentMetadataQueryTest ), "quality_uniques", new ColumnAnalysis( - "COMPLEX", + ColumnType.ofComplex("hyperUnique"), + "hyperUnique", false, true, 0, @@ -521,6 +534,7 @@ public class SegmentMetadataQueryTest size2 = mmap2 ? 10881 : 10764; } ColumnAnalysis analysis = new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -543,6 +557,7 @@ public class SegmentMetadataQueryTest size2 = mmap2 ? 6882 : 6808; } ColumnAnalysis analysis = new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -565,6 +580,7 @@ public class SegmentMetadataQueryTest size2 = mmap2 ? 9765 : 9660; } ColumnAnalysis analysis = new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -588,6 +604,7 @@ public class SegmentMetadataQueryTest ImmutableMap.of( "__time", new ColumnAnalysis( + ColumnType.LONG, ValueType.LONG.toString(), false, false, @@ -599,6 +616,7 @@ public class SegmentMetadataQueryTest ), "index", new ColumnAnalysis( + ColumnType.DOUBLE, ValueType.DOUBLE.toString(), false, false, @@ -654,6 +672,7 @@ public class SegmentMetadataQueryTest ImmutableMap.of( "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -717,6 +736,7 @@ public class SegmentMetadataQueryTest ImmutableMap.of( "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -776,6 +796,7 @@ public class SegmentMetadataQueryTest ImmutableMap.of( "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -835,6 +856,7 @@ public class SegmentMetadataQueryTest ImmutableMap.of( "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -1299,6 +1321,7 @@ public class SegmentMetadataQueryTest public void testLongNullableColumn() { ColumnAnalysis analysis = new ColumnAnalysis( + ColumnType.LONG, ValueType.LONG.toString(), false, NullHandling.replaceWithDefault() ? false : true, @@ -1315,6 +1338,7 @@ public class SegmentMetadataQueryTest public void testDoubleNullableColumn() { ColumnAnalysis analysis = new ColumnAnalysis( + ColumnType.DOUBLE, ValueType.DOUBLE.toString(), false, NullHandling.replaceWithDefault() ? false : true, @@ -1332,6 +1356,7 @@ public class SegmentMetadataQueryTest public void testFloatNullableColumn() { ColumnAnalysis analysis = new ColumnAnalysis( + ColumnType.FLOAT, ValueType.FLOAT.toString(), false, NullHandling.replaceWithDefault() ? false : true, diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java index 2680d4b30e2..c8862c57c67 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java @@ -36,6 +36,7 @@ import org.apache.druid.segment.IncrementalIndexSegment; import org.apache.druid.segment.QueryableIndexSegment; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.TestIndex; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ValueType; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Test; @@ -101,6 +102,7 @@ public class SegmentMetadataUnionQueryTest extends InitializedNullHandlingTest ImmutableMap.of( "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, diff --git a/processing/src/test/java/org/apache/druid/query/metadata/metadata/ColumnAnalysisTest.java b/processing/src/test/java/org/apache/druid/query/metadata/metadata/ColumnAnalysisTest.java index 798d67a06fb..9b1f75b5d76 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/metadata/ColumnAnalysisTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/metadata/ColumnAnalysisTest.java @@ -21,6 +21,7 @@ package org.apache.druid.query.metadata.metadata; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.column.ColumnType; import org.junit.Assert; import org.junit.Test; @@ -36,13 +37,43 @@ public class ColumnAnalysisTest @Test public void testFoldStringColumns() throws Exception { - final ColumnAnalysis analysis1 = new ColumnAnalysis("STRING", false, false, 1L, 2, "aaA", "Zzz", null); - final ColumnAnalysis analysis2 = new ColumnAnalysis("STRING", true, false, 3L, 4, "aAA", "ZZz", null); + final ColumnAnalysis analysis1 = new ColumnAnalysis( + ColumnType.STRING, + "STRING", + false, + false, + 1L, + 2, + "aaA", + "Zzz", + null + ); + final ColumnAnalysis analysis2 = new ColumnAnalysis( + ColumnType.STRING, + "STRING", + true, + false, + 3L, + 4, + "aAA", + "ZZz", + null + ); assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("STRING", true, false, 4L, 4, "aAA", "Zzz", null); + final ColumnAnalysis expected = new ColumnAnalysis( + ColumnType.STRING, + "STRING", + true, + false, + 4L, + 4, + "aAA", + "Zzz", + null + ); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); @@ -56,7 +87,17 @@ public class ColumnAnalysisTest @Test public void testFoldWithNull() throws Exception { - final ColumnAnalysis analysis1 = new ColumnAnalysis("STRING", false, false, 1L, 2, null, null, null); + final ColumnAnalysis analysis1 = new ColumnAnalysis( + ColumnType.STRING, + "STRING", + false, + false, + 1L, + 2, + null, + null, + null + ); Assert.assertEquals(analysis1, analysis1.fold(null)); assertSerDe(analysis1); } @@ -64,13 +105,43 @@ public class ColumnAnalysisTest @Test public void testFoldComplexColumns() throws Exception { - final ColumnAnalysis analysis1 = new ColumnAnalysis("hyperUnique", false, false, 0L, null, null, null, null); - final ColumnAnalysis analysis2 = new ColumnAnalysis("hyperUnique", false, false, 0L, null, null, null, null); + final ColumnAnalysis analysis1 = new ColumnAnalysis( + ColumnType.ofComplex("hyperUnique"), + "hyperUnique", + false, + false, + 0L, + null, + null, + null, + null + ); + final ColumnAnalysis analysis2 = new ColumnAnalysis( + ColumnType.ofComplex("hyperUnique"), + "hyperUnique", + false, + false, + 0L, + null, + null, + null, + null + ); assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("hyperUnique", false, false, 0L, null, null, null, null); + final ColumnAnalysis expected = new ColumnAnalysis( + ColumnType.ofComplex("hyperUnique"), + "hyperUnique", + false, + false, + 0L, + null, + null, + null, + null + ); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); @@ -84,26 +155,83 @@ public class ColumnAnalysisTest @Test public void testFoldDifferentTypes() throws Exception { - final ColumnAnalysis analysis1 = new ColumnAnalysis("hyperUnique", false, false, 1L, 1, null, null, null); - final ColumnAnalysis analysis2 = new ColumnAnalysis("COMPLEX", false, false, 2L, 2, null, null, null); + final ColumnAnalysis analysis1 = new ColumnAnalysis( + ColumnType.ofComplex("hyperUnique"), + "hyperUnique", + false, + false, + 1L, + 1, + null, + null, + null + ); + final ColumnAnalysis analysis2 = new ColumnAnalysis( + ColumnType.UNKNOWN_COMPLEX, + "COMPLEX", + false, + false, + 2L, + 2, + null, + null, + null + ); assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis( - "STRING", + final ColumnAnalysis expected = ColumnAnalysis.error("cannot_merge_diff_types: [hyperUnique] and [COMPLEX]"); + final ColumnAnalysis expected2 = ColumnAnalysis.error("cannot_merge_diff_types: [COMPLEX] and [hyperUnique]"); + ColumnAnalysis fold1 = analysis1.fold(analysis2); + ColumnAnalysis fold2 = analysis2.fold(analysis1); + Assert.assertEquals(expected, fold1); + Assert.assertEquals(expected2, fold2); + + assertSerDe(fold1); + assertSerDe(fold2); + } + + @Test + public void testFoldDifferentTypeSignatures() throws Exception + { + // not sure that this should be able to happen in real life but if messages are artifically constructed + final ColumnAnalysis analysis1 = new ColumnAnalysis( + ColumnType.ofComplex("hyperUnique"), + "hyperUnique", false, false, - -1L, + 1L, + 1, null, null, + null + ); + final ColumnAnalysis analysis2 = new ColumnAnalysis( + ColumnType.UNKNOWN_COMPLEX, + "hyperUnique", + false, + false, + 2L, + 2, null, - "error:cannot_merge_diff_types" + null, + null + ); + + assertSerDe(analysis1); + assertSerDe(analysis2); + + final ColumnAnalysis expected = ColumnAnalysis.error( + "cannot_merge_diff_types: [COMPLEX] and [COMPLEX]" + ); + final ColumnAnalysis expected2 = ColumnAnalysis.error( + "cannot_merge_diff_types: [COMPLEX] and [COMPLEX]" ); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); Assert.assertEquals(expected, fold1); - Assert.assertEquals(expected, fold2); + Assert.assertEquals(expected2, fold2); assertSerDe(fold1); assertSerDe(fold2); @@ -118,7 +246,17 @@ public class ColumnAnalysisTest assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, false, -1L, null, null, null, "error:foo"); + final ColumnAnalysis expected = new ColumnAnalysis( + ColumnType.STRING, + "STRING", + false, + false, + -1L, + null, + null, + null, + "error:foo" + ); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); Assert.assertEquals(expected, fold1); @@ -132,12 +270,32 @@ public class ColumnAnalysisTest public void testFoldErrorAndNoError() throws Exception { final ColumnAnalysis analysis1 = ColumnAnalysis.error("foo"); - final ColumnAnalysis analysis2 = new ColumnAnalysis("STRING", false, false, 2L, 2, "a", "z", null); + final ColumnAnalysis analysis2 = new ColumnAnalysis( + ColumnType.STRING, + "STRING", + false, + false, + 2L, + 2, + "a", + "z", + null + ); assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, false, -1L, null, null, null, "error:foo"); + final ColumnAnalysis expected = new ColumnAnalysis( + ColumnType.STRING, + "STRING", + false, + false, + -1L, + null, + null, + null, + "error:foo" + ); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); Assert.assertEquals(expected, fold1); @@ -156,7 +314,17 @@ public class ColumnAnalysisTest assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, false, -1L, null, null, null, "error:multiple_errors"); + final ColumnAnalysis expected = new ColumnAnalysis( + ColumnType.STRING, + "STRING", + false, + false, + -1L, + null, + null, + null, + "error:multiple_errors" + ); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); Assert.assertEquals(expected, fold1); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/schema/DruidSchema.java b/sql/src/main/java/org/apache/druid/sql/calcite/schema/DruidSchema.java index 0d01d6fa678..0c1dffa3d0e 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/schema/DruidSchema.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/schema/DruidSchema.java @@ -884,15 +884,9 @@ public class DruidSchema extends AbstractSchema continue; } - ColumnType valueType = null; - try { - valueType = ColumnType.fromString(entry.getValue().getType()); - } - catch (IllegalArgumentException ignored) { - } + ColumnType valueType = entry.getValue().getTypeSignature(); - // Assume unrecognized types are some flavor of COMPLEX. This throws away information about exactly - // what kind of complex column it is, which we may want to preserve some day. + // this shouldn't happen, but if it does assume types are some flavor of COMPLEX. if (valueType == null) { valueType = ColumnType.UNKNOWN_COMPLEX; }