revert ColumnAnalysis type, add typeSignature and use it for DruidSchema (#11895)

* revert ColumnAnalysis type, add typeSignature and use it for DruidSchema

* review stuffs

* maybe null

* better maybe null

* Update docs/querying/segmentmetadataquery.md

* Update docs/querying/segmentmetadataquery.md

Co-authored-by: Charles Smith <techdocsmith@gmail.com>

* fix null right

* sad

* oops

* Update batch_hadoop_queries.json

Co-authored-by: Charles Smith <techdocsmith@gmail.com>
This commit is contained in:
Clint Wylie 2021-11-10 18:46:29 -08:00 committed by GitHub
parent cdd1c2876c
commit 5baa22148e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 317 additions and 125 deletions

View File

@ -87,9 +87,11 @@ The format of the result is:
} ]
```
Dimension columns will have type `STRING`, `FLOAT`, `DOUBLE`, or `LONG`.
Metric columns will have type `FLOAT`, `DOUBLE`, or `LONG`, or the name of the underlying complex type such as `hyperUnique` in case of COMPLEX metric.
Timestamp column will have type `LONG`.
All columns contain a `typeSignature` that Druid uses to represent the column type information internally. The `typeSignature` is typically the same value used to identify the JSON type information at query or ingest time. One of: `STRING`, `FLOAT`, `DOUBLE`, `LONG`, or `COMPLEX<typeName>`, e.g. `COMPLEX<hyperUnique>`.
Columns also have a legacy `type` name. For some column types, the value may match the `typeSignature` (`STRING`, `FLOAT`, `DOUBLE`, or `LONG`). For `COMPLEX` columns, the `type` only contains the name of the underlying complex type such as `hyperUnique`.
New applications should use `typeSignature`, not `type`.
If the `errorMessage` field is non-null, you should not trust the other fields in the response. Their contents are
undefined.

View File

@ -15,6 +15,7 @@
"id": "merged",
"columns": {
"location": {
"typeSignature": "STRING",
"type": "STRING",
"size": 0,
"hasMultipleValues": false,
@ -25,6 +26,7 @@
"errorMessage": null
},
"user_id_sketch": {
"typeSignature": "COMPLEX<thetaSketch>",
"type": "thetaSketch",
"size": 0,
"hasMultipleValues": false,
@ -35,6 +37,7 @@
"errorMessage": null
},
"other_metric": {
"typeSignature": "COMPLEX<thetaSketch>",
"type": "thetaSketch",
"size": 0,
"hasMultipleValues": false,
@ -45,6 +48,7 @@
"errorMessage": null
},
"__time": {
"typeSignature": "LONG",
"type": "LONG",
"size": 0,
"hasMultipleValues": false,
@ -55,6 +59,7 @@
"errorMessage": null
},
"product": {
"typeSignature": "STRING",
"type": "STRING",
"size": 0,
"hasMultipleValues": false,

View File

@ -596,6 +596,7 @@
"intervals":["2013-01-01T00:00:00.000Z/2013-01-02T00:00:00.000Z"],
"columns":{
"has_links":{
"typeSignature": "STRING",
"type":"STRING",
"hasMultipleValues":false,
"size":0,
@ -618,6 +619,7 @@
"intervals":["2013-01-02T00:00:00.000Z/2013-01-03T00:00:00.000Z"],
"columns":{
"has_links":{
"typeSignature": "STRING",
"type":"STRING",
"hasMultipleValues":false,
"size":0,
@ -640,6 +642,7 @@
"intervals":["2013-01-03T00:00:00.000Z/2013-01-04T00:00:00.000Z"],
"columns":{
"has_links":{
"typeSignature": "STRING",
"type":"STRING",
"hasMultipleValues":false,
"size":0,

View File

@ -1402,6 +1402,7 @@
"intervals":["2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z"],
"columns":{
"country_name":{
"typeSignature": "STRING",
"type":"STRING",
"hasMultipleValues":false,
"size":0,
@ -1412,6 +1413,7 @@
"hasNulls":true
},
"language":{
"typeSignature": "STRING",
"type":"STRING",
"hasMultipleValues":false,
"size":0,

View File

@ -43,6 +43,7 @@ import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.ColumnTypeFactory;
import org.apache.druid.segment.column.ComplexColumn;
import org.apache.druid.segment.column.DictionaryEncodedColumn;
import org.apache.druid.segment.column.TypeSignature;
@ -194,7 +195,8 @@ public class SegmentAnalyzer
}
return new ColumnAnalysis(
capabilities.asTypeString(),
capabilities.toColumnType(),
capabilities.getType().name(),
capabilities.hasMultipleValues().isTrue(),
capabilities.hasNulls().isMaybeTrue(), // if we don't know for sure, then we should plan to check for nulls
size,
@ -250,7 +252,8 @@ public class SegmentAnalyzer
}
return new ColumnAnalysis(
capabilities.asTypeString(),
capabilities.toColumnType(),
capabilities.getType().name(),
capabilities.hasMultipleValues().isTrue(),
capabilities.hasNulls().isMaybeTrue(), // if we don't know for sure, then we should plan to check for nulls
size,
@ -328,7 +331,8 @@ public class SegmentAnalyzer
}
return new ColumnAnalysis(
capabilities.asTypeString(),
capabilities.toColumnType(),
capabilities.getType().name(),
capabilities.hasMultipleValues().isTrue(),
capabilities.hasNulls().isMaybeTrue(), // if we don't know for sure, then we should plan to check for nulls
size,
@ -347,8 +351,6 @@ public class SegmentAnalyzer
final TypeSignature<ValueType> typeSignature = capabilities == null ? ColumnType.UNKNOWN_COMPLEX : capabilities;
final String typeName = typeSignature.getComplexTypeName();
// serialize using asTypeString (which is also used for JSON so can easily round-trip complex type info back into ColumnType)
final String serdeTypeName = typeSignature.asTypeString();
try (final ComplexColumn complexColumn = columnHolder != null ? (ComplexColumn) columnHolder.getColumn() : null) {
final boolean hasMultipleValues = capabilities != null && capabilities.hasMultipleValues().isTrue();
final boolean hasNulls = capabilities != null && capabilities.hasNulls().isMaybeTrue();
@ -363,7 +365,8 @@ public class SegmentAnalyzer
final Function<Object, Long> inputSizeFn = serde.inputSizeFn();
if (inputSizeFn == null) {
return new ColumnAnalysis(
serdeTypeName,
ColumnTypeFactory.ofType(typeSignature),
typeName,
hasMultipleValues,
hasNulls,
0,
@ -381,7 +384,8 @@ public class SegmentAnalyzer
}
return new ColumnAnalysis(
serdeTypeName,
ColumnTypeFactory.ofType(typeSignature),
typeName,
hasMultipleValues,
hasNulls,
size,

View File

@ -23,7 +23,6 @@ import com.fasterxml.jackson.core.type.TypeReference;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Functions;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
@ -58,8 +57,6 @@ import org.apache.druid.timeline.LogicalSegment;
import org.joda.time.DateTime;
import org.joda.time.Interval;
import javax.annotation.Nullable;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
@ -75,16 +72,10 @@ public class SegmentMetadataQueryQueryToolChest extends QueryToolChest<SegmentAn
private static final TypeReference<SegmentAnalysis> TYPE_REFERENCE = new TypeReference<SegmentAnalysis>()
{
};
private static final byte[] SEGMENT_METADATA_CACHE_PREFIX = new byte[]{0x4};
private static final byte SEGMENT_METADATA_CACHE_PREFIX = 0x4;
private static final byte SEGMENT_METADATA_QUERY = 0x16;
private static final Function<SegmentAnalysis, SegmentAnalysis> MERGE_TRANSFORM_FN = new Function<SegmentAnalysis, SegmentAnalysis>()
{
@Override
public SegmentAnalysis apply(SegmentAnalysis analysis)
{
return finalizeAnalysis(analysis);
}
};
private static final Function<SegmentAnalysis, SegmentAnalysis> MERGE_TRANSFORM_FN =
SegmentMetadataQueryQueryToolChest::finalizeAnalysis;
private final SegmentMetadataQueryConfig config;
private final GenericQueryMetricsFactory queryMetricsFactory;
@ -195,13 +186,9 @@ public class SegmentMetadataQueryQueryToolChest extends QueryToolChest<SegmentAn
public byte[] computeCacheKey(SegmentMetadataQuery query)
{
SegmentMetadataQuery updatedQuery = query.withFinalizedAnalysisTypes(config);
byte[] includerBytes = updatedQuery.getToInclude().getCacheKey();
byte[] analysisTypesBytes = updatedQuery.getAnalysisTypesCacheKey();
return ByteBuffer.allocate(1 + includerBytes.length + analysisTypesBytes.length)
.put(SEGMENT_METADATA_CACHE_PREFIX)
.put(includerBytes)
.put(analysisTypesBytes)
.array();
return new CacheKeyBuilder(SEGMENT_METADATA_CACHE_PREFIX).appendCacheable(updatedQuery.getToInclude())
.appendCacheables(updatedQuery.getAnalysisTypes())
.build();
}
@Override
@ -223,27 +210,13 @@ public class SegmentMetadataQueryQueryToolChest extends QueryToolChest<SegmentAn
@Override
public Function<SegmentAnalysis, SegmentAnalysis> prepareForCache(boolean isResultLevelCache)
{
return new Function<SegmentAnalysis, SegmentAnalysis>()
{
@Override
public SegmentAnalysis apply(@Nullable SegmentAnalysis input)
{
return input;
}
};
return input -> input;
}
@Override
public Function<SegmentAnalysis, SegmentAnalysis> pullFromCache(boolean isResultLevelCache)
{
return new Function<SegmentAnalysis, SegmentAnalysis>()
{
@Override
public SegmentAnalysis apply(@Nullable SegmentAnalysis input)
{
return input;
}
};
return input -> input;
}
};
}
@ -266,14 +239,7 @@ public class SegmentMetadataQueryQueryToolChest extends QueryToolChest<SegmentAn
return Lists.newArrayList(
Iterables.filter(
segments,
new Predicate<T>()
{
@Override
public boolean apply(T input)
{
return (input.getInterval().overlaps(targetInterval));
}
}
input -> (input.getInterval().overlaps(targetInterval))
)
);
}

View File

@ -22,6 +22,8 @@ package org.apache.druid.query.metadata.metadata;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.segment.column.ColumnType;
import java.util.Objects;
@ -33,10 +35,11 @@ public class ColumnAnalysis
public static ColumnAnalysis error(String reason)
{
return new ColumnAnalysis("STRING", false, false, -1, null, null, null, ERROR_PREFIX + reason);
return new ColumnAnalysis(ColumnType.STRING, "STRING", false, false, -1, null, null, null, ERROR_PREFIX + reason);
}
private final String type;
private final ColumnType typeSignature;
private final boolean hasMultipleValues;
private final boolean hasNulls;
private final long size;
@ -47,6 +50,7 @@ public class ColumnAnalysis
@JsonCreator
public ColumnAnalysis(
@JsonProperty("typeSignature") ColumnType typeSignature,
@JsonProperty("type") String type,
@JsonProperty("hasMultipleValues") boolean hasMultipleValues,
@JsonProperty("hasNulls") boolean hasNulls,
@ -57,6 +61,7 @@ public class ColumnAnalysis
@JsonProperty("errorMessage") String errorMessage
)
{
this.typeSignature = typeSignature;
this.type = type;
this.hasMultipleValues = hasMultipleValues;
this.hasNulls = hasNulls;
@ -68,6 +73,13 @@ public class ColumnAnalysis
}
@JsonProperty
public ColumnType getTypeSignature()
{
return typeSignature;
}
@JsonProperty
@Deprecated
public String getType()
{
return type;
@ -135,8 +147,20 @@ public class ColumnAnalysis
return rhs;
}
if (!type.equals(rhs.getType())) {
return ColumnAnalysis.error("cannot_merge_diff_types");
if (!Objects.equals(type, rhs.getType())) {
return ColumnAnalysis.error(
StringUtils.format("cannot_merge_diff_types: [%s] and [%s]", type, rhs.getType())
);
}
if (!Objects.equals(typeSignature, rhs.getTypeSignature())) {
return ColumnAnalysis.error(
StringUtils.format(
"cannot_merge_diff_types: [%s] and [%s]",
typeSignature.asTypeString(),
rhs.getTypeSignature().asTypeString()
)
);
}
Integer cardinality = getCardinality();
@ -153,6 +177,7 @@ public class ColumnAnalysis
Comparable newMax = choose(maxValue, rhs.maxValue, true);
return new ColumnAnalysis(
typeSignature,
type,
multipleValues,
hasNulls || rhs.hasNulls,
@ -180,7 +205,8 @@ public class ColumnAnalysis
public String toString()
{
return "ColumnAnalysis{" +
"type='" + type + '\'' +
"typeSignature='" + typeSignature + '\'' +
", type=" + type +
", hasMultipleValues=" + hasMultipleValues +
", hasNulls=" + hasNulls +
", size=" + size +
@ -204,6 +230,7 @@ public class ColumnAnalysis
return hasMultipleValues == that.hasMultipleValues &&
hasNulls == that.hasNulls &&
size == that.size &&
Objects.equals(typeSignature, that.typeSignature) &&
Objects.equals(type, that.type) &&
Objects.equals(cardinality, that.cardinality) &&
Objects.equals(minValue, that.minValue) &&
@ -214,6 +241,16 @@ public class ColumnAnalysis
@Override
public int hashCode()
{
return Objects.hash(type, hasMultipleValues, hasNulls, size, cardinality, minValue, maxValue, errorMessage);
return Objects.hash(
typeSignature,
type,
hasMultipleValues,
hasNulls,
size,
cardinality,
minValue,
maxValue,
errorMessage
);
}
}

View File

@ -21,6 +21,7 @@ package org.apache.druid.query.metadata.metadata;
import com.fasterxml.jackson.annotation.JsonSubTypes;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import org.apache.druid.java.util.common.Cacheable;
/**
*/
@ -30,8 +31,7 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
@JsonSubTypes.Type(name = "all", value = AllColumnIncluderator.class),
@JsonSubTypes.Type(name = "list", value = ListColumnIncluderator.class)
})
public interface ColumnIncluderator
public interface ColumnIncluderator extends Cacheable
{
boolean include(String columnName);
byte[] getCacheKey();
}

View File

@ -23,7 +23,7 @@ import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import org.apache.druid.java.util.common.Cacheable;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.BaseQuery;
@ -38,7 +38,6 @@ import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
import org.apache.druid.query.spec.QuerySegmentSpec;
import org.joda.time.Interval;
import java.nio.ByteBuffer;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
@ -46,16 +45,9 @@ import java.util.Objects;
public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
{
/**
* The SegmentMetadataQuery cache key may contain UTF-8 column name strings.
* Prepend 0xFF before the analysisTypes as a separator to avoid
* any potential confusion with string values.
*/
public static final byte[] ANALYSIS_TYPES_CACHE_PREFIX = new byte[] {(byte) 0xFF};
private static final QuerySegmentSpec DEFAULT_SEGMENT_SPEC = new MultipleIntervalSegmentSpec(Intervals.ONLY_ETERNITY);
public enum AnalysisType
public enum AnalysisType implements Cacheable
{
CARDINALITY,
SIZE,
@ -79,6 +71,7 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
return valueOf(StringUtils.toUpperCase(name));
}
@Override
public byte[] getCacheKey()
{
return new byte[] {(byte) this.ordinal()};
@ -193,25 +186,6 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
return analysisTypes.contains(AnalysisType.ROLLUP);
}
public byte[] getAnalysisTypesCacheKey()
{
int size = 1;
List<byte[]> typeBytesList = Lists.newArrayListWithExpectedSize(analysisTypes.size());
for (AnalysisType analysisType : analysisTypes) {
final byte[] bytes = analysisType.getCacheKey();
typeBytesList.add(bytes);
size += bytes.length;
}
final ByteBuffer bytes = ByteBuffer.allocate(size);
bytes.put(ANALYSIS_TYPES_CACHE_PREFIX);
for (byte[] typeBytes : typeBytesList) {
bytes.put(typeBytes);
}
return bytes.array();
}
@Override
public Query<SegmentAnalysis> withOverriddenContext(Map<String, Object> contextOverride)
{

View File

@ -52,6 +52,7 @@ import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.QueryableIndexSegment;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
@ -156,7 +157,8 @@ public class DoubleStorageTest
ImmutableMap.of(
TIME_COLUMN,
new ColumnAnalysis(
ValueType.LONG.toString(),
ColumnType.LONG,
ValueType.LONG.name(),
false,
false,
100,
@ -167,7 +169,8 @@ public class DoubleStorageTest
),
DIM_NAME,
new ColumnAnalysis(
ValueType.STRING.toString(),
ColumnType.STRING,
ValueType.STRING.name(),
false,
false,
120,
@ -178,7 +181,8 @@ public class DoubleStorageTest
),
DIM_FLOAT_NAME,
new ColumnAnalysis(
ValueType.DOUBLE.toString(),
ColumnType.DOUBLE,
ValueType.DOUBLE.name(),
false,
false,
80,
@ -201,7 +205,8 @@ public class DoubleStorageTest
ImmutableMap.of(
TIME_COLUMN,
new ColumnAnalysis(
ValueType.LONG.toString(),
ColumnType.LONG,
ValueType.LONG.name(),
false,
false,
100,
@ -212,7 +217,8 @@ public class DoubleStorageTest
),
DIM_NAME,
new ColumnAnalysis(
ValueType.STRING.toString(),
ColumnType.STRING,
ValueType.STRING.name(),
false,
false,
120,
@ -223,7 +229,8 @@ public class DoubleStorageTest
),
DIM_FLOAT_NAME,
new ColumnAnalysis(
ValueType.FLOAT.toString(),
ColumnType.FLOAT,
ValueType.FLOAT.name(),
false,
false,
80,

View File

@ -295,7 +295,8 @@ public class SegmentAnalyzerTest extends InitializedNullHandlingTest
Map<String, ColumnAnalysis> analyses = analyzer.analyze(segment);
ColumnAnalysis columnAnalysis = analyses.get(invalid_aggregator);
Assert.assertFalse(columnAnalysis.isError());
Assert.assertEquals("COMPLEX<invalid_complex_column_type>", columnAnalysis.getType());
Assert.assertEquals("invalid_complex_column_type", columnAnalysis.getType());
Assert.assertEquals(ColumnType.ofComplex("invalid_complex_column_type"), columnAnalysis.getTypeSignature());
}
// Persist the index.

View File

@ -37,6 +37,7 @@ import org.apache.druid.query.metadata.metadata.ColumnAnalysis;
import org.apache.druid.query.metadata.metadata.SegmentAnalysis;
import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery;
import org.apache.druid.query.spec.LegacySegmentSpec;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.timeline.LogicalSegment;
import org.joda.time.Interval;
@ -69,7 +70,7 @@ public class SegmentMetadataQueryQueryToolChestTest
new SegmentMetadataQueryQueryToolChest(new SegmentMetadataQueryConfig()).getCacheStrategy(query);
// Test cache key generation
byte[] expectedKey = {0x04, 0x01, (byte) 0xFF, 0x00, 0x02, 0x04};
byte[] expectedKey = {0x04, 0x09, 0x01, 0x0A, 0x00, 0x00, 0x00, 0x03, 0x00, 0x02, 0x04};
byte[] actualKey = strategy.computeCacheKey(query);
Assert.assertArrayEquals(expectedKey, actualKey);
@ -79,7 +80,8 @@ public class SegmentMetadataQueryQueryToolChestTest
ImmutableMap.of(
"placement",
new ColumnAnalysis(
ValueType.STRING.toString(),
ColumnType.STRING,
ValueType.STRING.name(),
true,
false,
10881,

View File

@ -52,6 +52,7 @@ import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.QueryableIndexSegment;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.TestIndex;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.timeline.LogicalSegment;
@ -207,6 +208,7 @@ public class SegmentMetadataQueryTest
ImmutableMap.of(
"__time",
new ColumnAnalysis(
ColumnType.LONG,
ValueType.LONG.toString(),
false,
false,
@ -218,6 +220,7 @@ public class SegmentMetadataQueryTest
),
"index",
new ColumnAnalysis(
ColumnType.DOUBLE,
ValueType.DOUBLE.toString(),
false,
false,
@ -229,6 +232,7 @@ public class SegmentMetadataQueryTest
),
"placement",
new ColumnAnalysis(
ColumnType.STRING,
ValueType.STRING.toString(),
false,
false,
@ -251,6 +255,7 @@ public class SegmentMetadataQueryTest
ImmutableMap.of(
"__time",
new ColumnAnalysis(
ColumnType.LONG,
ValueType.LONG.toString(),
false,
false,
@ -262,6 +267,7 @@ public class SegmentMetadataQueryTest
),
"index",
new ColumnAnalysis(
ColumnType.DOUBLE,
ValueType.DOUBLE.toString(),
false,
false,
@ -273,6 +279,7 @@ public class SegmentMetadataQueryTest
),
"placement",
new ColumnAnalysis(
ColumnType.STRING,
ValueType.STRING.toString(),
false,
false,
@ -310,6 +317,7 @@ public class SegmentMetadataQueryTest
ImmutableMap.of(
"placement",
new ColumnAnalysis(
ColumnType.STRING,
ValueType.STRING.toString(),
false,
false,
@ -321,6 +329,7 @@ public class SegmentMetadataQueryTest
),
"placementish",
new ColumnAnalysis(
ColumnType.STRING,
ValueType.STRING.toString(),
true,
false,
@ -380,6 +389,7 @@ public class SegmentMetadataQueryTest
ImmutableMap.of(
"placement",
new ColumnAnalysis(
ColumnType.STRING,
ValueType.STRING.toString(),
false,
false,
@ -391,6 +401,7 @@ public class SegmentMetadataQueryTest
),
"placementish",
new ColumnAnalysis(
ColumnType.STRING,
ValueType.STRING.toString(),
true,
false,
@ -450,6 +461,7 @@ public class SegmentMetadataQueryTest
ImmutableMap.of(
"placement",
new ColumnAnalysis(
ColumnType.STRING,
ValueType.STRING.toString(),
false,
false,
@ -461,7 +473,8 @@ public class SegmentMetadataQueryTest
),
"quality_uniques",
new ColumnAnalysis(
"COMPLEX<hyperUnique>",
ColumnType.ofComplex("hyperUnique"),
"hyperUnique",
false,
true,
0,
@ -521,6 +534,7 @@ public class SegmentMetadataQueryTest
size2 = mmap2 ? 10881 : 10764;
}
ColumnAnalysis analysis = new ColumnAnalysis(
ColumnType.STRING,
ValueType.STRING.toString(),
false,
false,
@ -543,6 +557,7 @@ public class SegmentMetadataQueryTest
size2 = mmap2 ? 6882 : 6808;
}
ColumnAnalysis analysis = new ColumnAnalysis(
ColumnType.STRING,
ValueType.STRING.toString(),
false,
false,
@ -565,6 +580,7 @@ public class SegmentMetadataQueryTest
size2 = mmap2 ? 9765 : 9660;
}
ColumnAnalysis analysis = new ColumnAnalysis(
ColumnType.STRING,
ValueType.STRING.toString(),
false,
false,
@ -588,6 +604,7 @@ public class SegmentMetadataQueryTest
ImmutableMap.of(
"__time",
new ColumnAnalysis(
ColumnType.LONG,
ValueType.LONG.toString(),
false,
false,
@ -599,6 +616,7 @@ public class SegmentMetadataQueryTest
),
"index",
new ColumnAnalysis(
ColumnType.DOUBLE,
ValueType.DOUBLE.toString(),
false,
false,
@ -654,6 +672,7 @@ public class SegmentMetadataQueryTest
ImmutableMap.of(
"placement",
new ColumnAnalysis(
ColumnType.STRING,
ValueType.STRING.toString(),
false,
false,
@ -717,6 +736,7 @@ public class SegmentMetadataQueryTest
ImmutableMap.of(
"placement",
new ColumnAnalysis(
ColumnType.STRING,
ValueType.STRING.toString(),
false,
false,
@ -776,6 +796,7 @@ public class SegmentMetadataQueryTest
ImmutableMap.of(
"placement",
new ColumnAnalysis(
ColumnType.STRING,
ValueType.STRING.toString(),
false,
false,
@ -835,6 +856,7 @@ public class SegmentMetadataQueryTest
ImmutableMap.of(
"placement",
new ColumnAnalysis(
ColumnType.STRING,
ValueType.STRING.toString(),
false,
false,
@ -1299,6 +1321,7 @@ public class SegmentMetadataQueryTest
public void testLongNullableColumn()
{
ColumnAnalysis analysis = new ColumnAnalysis(
ColumnType.LONG,
ValueType.LONG.toString(),
false,
NullHandling.replaceWithDefault() ? false : true,
@ -1315,6 +1338,7 @@ public class SegmentMetadataQueryTest
public void testDoubleNullableColumn()
{
ColumnAnalysis analysis = new ColumnAnalysis(
ColumnType.DOUBLE,
ValueType.DOUBLE.toString(),
false,
NullHandling.replaceWithDefault() ? false : true,
@ -1332,6 +1356,7 @@ public class SegmentMetadataQueryTest
public void testFloatNullableColumn()
{
ColumnAnalysis analysis = new ColumnAnalysis(
ColumnType.FLOAT,
ValueType.FLOAT.toString(),
false,
NullHandling.replaceWithDefault() ? false : true,

View File

@ -36,6 +36,7 @@ import org.apache.druid.segment.IncrementalIndexSegment;
import org.apache.druid.segment.QueryableIndexSegment;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.TestIndex;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.junit.Test;
@ -101,6 +102,7 @@ public class SegmentMetadataUnionQueryTest extends InitializedNullHandlingTest
ImmutableMap.of(
"placement",
new ColumnAnalysis(
ColumnType.STRING,
ValueType.STRING.toString(),
false,
false,

View File

@ -21,6 +21,7 @@ package org.apache.druid.query.metadata.metadata;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.column.ColumnType;
import org.junit.Assert;
import org.junit.Test;
@ -36,13 +37,43 @@ public class ColumnAnalysisTest
@Test
public void testFoldStringColumns() throws Exception
{
final ColumnAnalysis analysis1 = new ColumnAnalysis("STRING", false, false, 1L, 2, "aaA", "Zzz", null);
final ColumnAnalysis analysis2 = new ColumnAnalysis("STRING", true, false, 3L, 4, "aAA", "ZZz", null);
final ColumnAnalysis analysis1 = new ColumnAnalysis(
ColumnType.STRING,
"STRING",
false,
false,
1L,
2,
"aaA",
"Zzz",
null
);
final ColumnAnalysis analysis2 = new ColumnAnalysis(
ColumnType.STRING,
"STRING",
true,
false,
3L,
4,
"aAA",
"ZZz",
null
);
assertSerDe(analysis1);
assertSerDe(analysis2);
final ColumnAnalysis expected = new ColumnAnalysis("STRING", true, false, 4L, 4, "aAA", "Zzz", null);
final ColumnAnalysis expected = new ColumnAnalysis(
ColumnType.STRING,
"STRING",
true,
false,
4L,
4,
"aAA",
"Zzz",
null
);
ColumnAnalysis fold1 = analysis1.fold(analysis2);
ColumnAnalysis fold2 = analysis2.fold(analysis1);
@ -56,7 +87,17 @@ public class ColumnAnalysisTest
@Test
public void testFoldWithNull() throws Exception
{
final ColumnAnalysis analysis1 = new ColumnAnalysis("STRING", false, false, 1L, 2, null, null, null);
final ColumnAnalysis analysis1 = new ColumnAnalysis(
ColumnType.STRING,
"STRING",
false,
false,
1L,
2,
null,
null,
null
);
Assert.assertEquals(analysis1, analysis1.fold(null));
assertSerDe(analysis1);
}
@ -64,13 +105,43 @@ public class ColumnAnalysisTest
@Test
public void testFoldComplexColumns() throws Exception
{
final ColumnAnalysis analysis1 = new ColumnAnalysis("hyperUnique", false, false, 0L, null, null, null, null);
final ColumnAnalysis analysis2 = new ColumnAnalysis("hyperUnique", false, false, 0L, null, null, null, null);
final ColumnAnalysis analysis1 = new ColumnAnalysis(
ColumnType.ofComplex("hyperUnique"),
"hyperUnique",
false,
false,
0L,
null,
null,
null,
null
);
final ColumnAnalysis analysis2 = new ColumnAnalysis(
ColumnType.ofComplex("hyperUnique"),
"hyperUnique",
false,
false,
0L,
null,
null,
null,
null
);
assertSerDe(analysis1);
assertSerDe(analysis2);
final ColumnAnalysis expected = new ColumnAnalysis("hyperUnique", false, false, 0L, null, null, null, null);
final ColumnAnalysis expected = new ColumnAnalysis(
ColumnType.ofComplex("hyperUnique"),
"hyperUnique",
false,
false,
0L,
null,
null,
null,
null
);
ColumnAnalysis fold1 = analysis1.fold(analysis2);
ColumnAnalysis fold2 = analysis2.fold(analysis1);
@ -84,26 +155,83 @@ public class ColumnAnalysisTest
@Test
public void testFoldDifferentTypes() throws Exception
{
final ColumnAnalysis analysis1 = new ColumnAnalysis("hyperUnique", false, false, 1L, 1, null, null, null);
final ColumnAnalysis analysis2 = new ColumnAnalysis("COMPLEX", false, false, 2L, 2, null, null, null);
final ColumnAnalysis analysis1 = new ColumnAnalysis(
ColumnType.ofComplex("hyperUnique"),
"hyperUnique",
false,
false,
1L,
1,
null,
null,
null
);
final ColumnAnalysis analysis2 = new ColumnAnalysis(
ColumnType.UNKNOWN_COMPLEX,
"COMPLEX",
false,
false,
2L,
2,
null,
null,
null
);
assertSerDe(analysis1);
assertSerDe(analysis2);
final ColumnAnalysis expected = new ColumnAnalysis(
"STRING",
final ColumnAnalysis expected = ColumnAnalysis.error("cannot_merge_diff_types: [hyperUnique] and [COMPLEX]");
final ColumnAnalysis expected2 = ColumnAnalysis.error("cannot_merge_diff_types: [COMPLEX] and [hyperUnique]");
ColumnAnalysis fold1 = analysis1.fold(analysis2);
ColumnAnalysis fold2 = analysis2.fold(analysis1);
Assert.assertEquals(expected, fold1);
Assert.assertEquals(expected2, fold2);
assertSerDe(fold1);
assertSerDe(fold2);
}
@Test
public void testFoldDifferentTypeSignatures() throws Exception
{
// not sure that this should be able to happen in real life but if messages are artifically constructed
final ColumnAnalysis analysis1 = new ColumnAnalysis(
ColumnType.ofComplex("hyperUnique"),
"hyperUnique",
false,
false,
-1L,
1L,
1,
null,
null,
null
);
final ColumnAnalysis analysis2 = new ColumnAnalysis(
ColumnType.UNKNOWN_COMPLEX,
"hyperUnique",
false,
false,
2L,
2,
null,
"error:cannot_merge_diff_types"
null,
null
);
assertSerDe(analysis1);
assertSerDe(analysis2);
final ColumnAnalysis expected = ColumnAnalysis.error(
"cannot_merge_diff_types: [COMPLEX<hyperUnique>] and [COMPLEX]"
);
final ColumnAnalysis expected2 = ColumnAnalysis.error(
"cannot_merge_diff_types: [COMPLEX] and [COMPLEX<hyperUnique>]"
);
ColumnAnalysis fold1 = analysis1.fold(analysis2);
ColumnAnalysis fold2 = analysis2.fold(analysis1);
Assert.assertEquals(expected, fold1);
Assert.assertEquals(expected, fold2);
Assert.assertEquals(expected2, fold2);
assertSerDe(fold1);
assertSerDe(fold2);
@ -118,7 +246,17 @@ public class ColumnAnalysisTest
assertSerDe(analysis1);
assertSerDe(analysis2);
final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, false, -1L, null, null, null, "error:foo");
final ColumnAnalysis expected = new ColumnAnalysis(
ColumnType.STRING,
"STRING",
false,
false,
-1L,
null,
null,
null,
"error:foo"
);
ColumnAnalysis fold1 = analysis1.fold(analysis2);
ColumnAnalysis fold2 = analysis2.fold(analysis1);
Assert.assertEquals(expected, fold1);
@ -132,12 +270,32 @@ public class ColumnAnalysisTest
public void testFoldErrorAndNoError() throws Exception
{
final ColumnAnalysis analysis1 = ColumnAnalysis.error("foo");
final ColumnAnalysis analysis2 = new ColumnAnalysis("STRING", false, false, 2L, 2, "a", "z", null);
final ColumnAnalysis analysis2 = new ColumnAnalysis(
ColumnType.STRING,
"STRING",
false,
false,
2L,
2,
"a",
"z",
null
);
assertSerDe(analysis1);
assertSerDe(analysis2);
final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, false, -1L, null, null, null, "error:foo");
final ColumnAnalysis expected = new ColumnAnalysis(
ColumnType.STRING,
"STRING",
false,
false,
-1L,
null,
null,
null,
"error:foo"
);
ColumnAnalysis fold1 = analysis1.fold(analysis2);
ColumnAnalysis fold2 = analysis2.fold(analysis1);
Assert.assertEquals(expected, fold1);
@ -156,7 +314,17 @@ public class ColumnAnalysisTest
assertSerDe(analysis1);
assertSerDe(analysis2);
final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, false, -1L, null, null, null, "error:multiple_errors");
final ColumnAnalysis expected = new ColumnAnalysis(
ColumnType.STRING,
"STRING",
false,
false,
-1L,
null,
null,
null,
"error:multiple_errors"
);
ColumnAnalysis fold1 = analysis1.fold(analysis2);
ColumnAnalysis fold2 = analysis2.fold(analysis1);
Assert.assertEquals(expected, fold1);

View File

@ -884,15 +884,9 @@ public class DruidSchema extends AbstractSchema
continue;
}
ColumnType valueType = null;
try {
valueType = ColumnType.fromString(entry.getValue().getType());
}
catch (IllegalArgumentException ignored) {
}
ColumnType valueType = entry.getValue().getTypeSignature();
// Assume unrecognized types are some flavor of COMPLEX. This throws away information about exactly
// what kind of complex column it is, which we may want to preserve some day.
// this shouldn't happen, but if it does assume types are some flavor of COMPLEX.
if (valueType == null) {
valueType = ColumnType.UNKNOWN_COMPLEX;
}