From 6c196a5ea2100b5b5c5e81fe75b30ad5d3a63f83 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 9 Nov 2021 15:18:07 -0800 Subject: [PATCH] Remove StorageAdapter.getColumnTypeName. (#11893) * Remove StorageAdapter.getColumnTypeName. It was only used by SegmentAnalyzer, and isn't necessary anymore due to the recent improvements to ColumnCapabilities. Also: tidy ColumnDescriptor.read slightly by removing an instanceof check, and moving the relevant logic into ComplexColumnPartSerde. * Fix spellings. --- .../druid/query/metadata/SegmentAnalyzer.java | 14 +++-- .../segment/QueryableIndexStorageAdapter.java | 23 -------- .../druid/segment/RowBasedStorageAdapter.java | 8 --- .../apache/druid/segment/StorageAdapter.java | 7 --- .../segment/column/ColumnDescriptor.java | 5 -- .../IncrementalIndexStorageAdapter.java | 15 ----- .../join/HashJoinSegmentStorageAdapter.java | 15 ----- .../segment/serde/ComplexColumnPartSerde.java | 12 ++-- .../druid/segment/serde/ComplexMetrics.java | 12 ++++ .../query/metadata/SegmentAnalyzerTest.java | 56 ++++++++++++++----- .../topn/TopNMetricSpecOptimizationsTest.java | 7 --- .../segment/RowBasedStorageAdapterTest.java | 22 ++++---- .../HashJoinSegmentStorageAdapterTest.java | 27 +++++---- .../coordination/ServerManagerTest.java | 8 --- 14 files changed, 94 insertions(+), 137 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index 6e96cd971c3..876a5a169fa 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -45,6 +45,8 @@ import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ComplexColumn; import org.apache.druid.segment.column.DictionaryEncodedColumn; +import org.apache.druid.segment.column.TypeSignature; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter; import org.apache.druid.segment.serde.ComplexMetricSerde; @@ -137,7 +139,7 @@ public class SegmentAnalyzer } break; case COMPLEX: - analysis = analyzeComplexColumn(capabilities, columnHolder, storageAdapter.getColumnTypeName(columnName)); + analysis = analyzeComplexColumn(capabilities, columnHolder); break; default: log.warn("Unknown column type[%s].", capabilities.asTypeString()); @@ -339,19 +341,21 @@ public class SegmentAnalyzer private ColumnAnalysis analyzeComplexColumn( @Nullable final ColumnCapabilities capabilities, - @Nullable final ColumnHolder columnHolder, - final String typeName + @Nullable final ColumnHolder columnHolder ) { + final TypeSignature typeSignature = capabilities == null ? ColumnType.UNKNOWN_COMPLEX : capabilities; + final String typeName = typeSignature.getComplexTypeName(); + // serialize using asTypeString (which is also used for JSON so can easily round-trip complex type info back into ColumnType) - final String serdeTypeName = ColumnType.ofComplex(typeName).asTypeString(); + final String serdeTypeName = typeSignature.asTypeString(); try (final ComplexColumn complexColumn = columnHolder != null ? (ComplexColumn) columnHolder.getColumn() : null) { final boolean hasMultipleValues = capabilities != null && capabilities.hasMultipleValues().isTrue(); final boolean hasNulls = capabilities != null && capabilities.hasNulls().isMaybeTrue(); long size = 0; if (analyzingSize() && complexColumn != null) { - final ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(typeName); + final ComplexMetricSerde serde = typeName == null ? null : ComplexMetrics.getSerdeForType(typeName); if (serde == null) { return ColumnAnalysis.error(StringUtils.format("unknown_complex_%s", typeName)); } diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java index 35a8443a35b..cb070e1e492 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java @@ -36,7 +36,6 @@ import org.apache.druid.segment.column.BaseColumn; import org.apache.druid.segment.column.BitmapIndex; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; -import org.apache.druid.segment.column.ComplexColumn; import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.NumericColumn; import org.apache.druid.segment.data.Indexed; @@ -172,28 +171,6 @@ public class QueryableIndexStorageAdapter implements StorageAdapter return getColumnCapabilities(index, column); } - @Override - @Nullable - public String getColumnTypeName(String columnName) - { - final ColumnHolder columnHolder = index.getColumnHolder(columnName); - - if (columnHolder == null) { - return null; - } - - try (final BaseColumn col = columnHolder.getColumn()) { - if (col instanceof ComplexColumn) { - return ((ComplexColumn) col).getTypeName(); - } else { - return columnHolder.getCapabilities().asTypeString(); - } - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - } - @Override public DateTime getMaxIngestedEventTime() { diff --git a/processing/src/main/java/org/apache/druid/segment/RowBasedStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/RowBasedStorageAdapter.java index d962accfcb7..843972564ac 100644 --- a/processing/src/main/java/org/apache/druid/segment/RowBasedStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/RowBasedStorageAdapter.java @@ -120,14 +120,6 @@ public class RowBasedStorageAdapter implements StorageAdapter return RowBasedColumnSelectorFactory.getColumnCapabilities(rowSignature, column); } - @Nullable - @Override - public String getColumnTypeName(String column) - { - final ColumnCapabilities columnCapabilities = getColumnCapabilities(column); - return columnCapabilities != null ? columnCapabilities.asTypeString() : null; - } - @Override public int getNumRows() { diff --git a/processing/src/main/java/org/apache/druid/segment/StorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/StorageAdapter.java index 2aa4b774506..470a1de1d99 100644 --- a/processing/src/main/java/org/apache/druid/segment/StorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/StorageAdapter.java @@ -66,13 +66,6 @@ public interface StorageAdapter extends CursorFactory, ColumnInspector @Nullable ColumnCapabilities getColumnCapabilities(String column); - /** - * Like {@link ColumnCapabilities#getType()}, but may return a more descriptive string for complex columns. - * @param column column name - * @return type name - */ - @Nullable - String getColumnTypeName(String column); int getNumRows(); DateTime getMaxIngestedEventTime(); Metadata getMetadata(); diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnDescriptor.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnDescriptor.java index 0783f7586c1..056ae85e220 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnDescriptor.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnDescriptor.java @@ -26,11 +26,9 @@ import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; import org.apache.druid.segment.serde.ColumnPartSerde; -import org.apache.druid.segment.serde.ComplexColumnPartSerde; import org.apache.druid.segment.serde.Serializer; import javax.annotation.Nullable; - import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.WritableByteChannel; @@ -106,9 +104,6 @@ public class ColumnDescriptor implements Serializer .setFileMapper(smooshedFiles); for (ColumnPartSerde part : parts) { - if (part instanceof ComplexColumnPartSerde) { - builder.setComplexTypeName(((ComplexColumnPartSerde) part).getTypeName()); - } part.getDeserializer().read(buffer, builder, columnConfig); } diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java index 86451472209..83458f4aa11 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -239,21 +239,6 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter ); } - @Override - public String getColumnTypeName(String column) - { - final String metricType = index.getMetricType(column); - if (metricType != null) { - return metricType; - } - ColumnCapabilities columnCapabilities = getColumnCapabilities(column); - if (columnCapabilities != null) { - return columnCapabilities.asTypeString(); - } else { - return null; - } - } - @Override public DateTime getMaxIngestedEventTime() { diff --git a/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java index f27a279fbb7..401bb1b2f74 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java @@ -188,21 +188,6 @@ public class HashJoinSegmentStorageAdapter implements StorageAdapter } } - @Nullable - @Override - public String getColumnTypeName(String column) - { - final Optional maybeClause = getClauseForColumn(column); - - if (maybeClause.isPresent()) { - final JoinableClause clause = maybeClause.get(); - final ColumnCapabilities capabilities = clause.getJoinable().getColumnCapabilities(clause.unprefix(column)); - return capabilities != null ? capabilities.asTypeString() : null; - } else { - return baseAdapter.getColumnTypeName(column); - } - } - @Override public int getNumRows() { diff --git a/processing/src/main/java/org/apache/druid/segment/serde/ComplexColumnPartSerde.java b/processing/src/main/java/org/apache/druid/segment/serde/ComplexColumnPartSerde.java index 848645d86a1..651d2c6a5d9 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/ComplexColumnPartSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/ComplexColumnPartSerde.java @@ -78,12 +78,14 @@ public class ComplexColumnPartSerde implements ColumnPartSerde public Deserializer getDeserializer() { return (buffer, builder, columnConfig) -> { + // we don't currently know if complex column can have nulls (or can be multi-valued, but not making that change + // since it isn't supported anywhere in the query engines) + // longer term this needs to be captured by making the serde provide this information, and then this should + // no longer be set to true but rather the actual values + builder.setHasNulls(ColumnCapabilities.Capable.TRUE); + builder.setComplexTypeName(typeName); + if (serde != null) { - // we don't currently know if complex column can have nulls (or can be multi-valued, but not making that change - // since it isn't supported anywhere in the query engines) - // longer term this needs to be captured by making the serde provide this information, and then this should - // no longer be set to true but rather the actual values - builder.setHasNulls(ColumnCapabilities.Capable.TRUE); serde.deserializeColumn(buffer, builder, columnConfig); } }; diff --git a/processing/src/main/java/org/apache/druid/segment/serde/ComplexMetrics.java b/processing/src/main/java/org/apache/druid/segment/serde/ComplexMetrics.java index 03cd44bd7d7..1a3389ee6a5 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/ComplexMetrics.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/ComplexMetrics.java @@ -69,4 +69,16 @@ public class ComplexMetrics } }); } + + /** + * Unregister a serde name -> ComplexMetricSerde mapping. + * + * If the specified serde key string is not in use, does nothing. + * + * Only expected to be used in tests. + */ + public static void unregisterSerde(String type) + { + COMPLEX_SERIALIZERS.remove(type); + } } diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentAnalyzerTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentAnalyzerTest.java index 8dd259148f3..14ed7833f04 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentAnalyzerTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentAnalyzerTest.java @@ -42,7 +42,6 @@ import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery; import org.apache.druid.query.spec.LegacySegmentSpec; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.IncrementalIndexSegment; -import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndexSegment; import org.apache.druid.segment.Segment; import org.apache.druid.segment.TestIndex; @@ -59,9 +58,12 @@ import org.apache.druid.segment.serde.ComplexMetrics; import org.apache.druid.testing.InitializedNullHandlingTest; import org.apache.druid.timeline.SegmentId; import org.junit.Assert; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; import javax.annotation.Nullable; +import java.io.File; import java.io.IOException; import java.net.URL; import java.nio.ByteBuffer; @@ -77,6 +79,9 @@ public class SegmentAnalyzerTest extends InitializedNullHandlingTest private static final EnumSet EMPTY_ANALYSES = EnumSet.noneOf(SegmentMetadataQuery.AnalysisType.class); + @Rule + public final TemporaryFolder temporaryFolder = new TemporaryFolder(); + @Test public void testIncrementalWorks() { @@ -282,20 +287,43 @@ public class SegmentAnalyzerTest extends InitializedNullHandlingTest .setMaxRowCount(10000) .build(); IncrementalIndex incrementalIndex = TestIndex.loadIncrementalIndex(retVal, source); - QueryableIndex queryableIndex = TestIndex.persistRealtimeAndLoadMMapped(incrementalIndex); - SegmentAnalyzer analyzer = new SegmentAnalyzer(EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE)); - QueryableIndexSegment segment = new QueryableIndexSegment( - queryableIndex, - SegmentId.dummy("ds") - ); - Map analyses = analyzer.analyze(segment); - ColumnAnalysis invalidColumnAnalysis = analyses.get(invalid_aggregator); - Assert.assertTrue(invalidColumnAnalysis.isError()); - // Run a segment metadata query also to verify it doesn't break - final List results = getSegmentAnalysises(segment, EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE)); - for (SegmentAnalysis result : results) { - Assert.assertTrue(result.getColumns().get(invalid_aggregator).isError()); + // Analyze the in-memory segment. + { + SegmentAnalyzer analyzer = new SegmentAnalyzer(EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE)); + IncrementalIndexSegment segment = new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("ds")); + Map analyses = analyzer.analyze(segment); + ColumnAnalysis columnAnalysis = analyses.get(invalid_aggregator); + Assert.assertFalse(columnAnalysis.isError()); + Assert.assertEquals("COMPLEX", columnAnalysis.getType()); + } + + // Persist the index. + final File segmentFile = TestIndex.INDEX_MERGER.persist( + incrementalIndex, + temporaryFolder.newFolder(), + TestIndex.INDEX_SPEC, + null + ); + + // Unload the complex serde, then analyze the persisted segment. + ComplexMetrics.unregisterSerde(InvalidAggregatorFactory.TYPE); + { + SegmentAnalyzer analyzer = new SegmentAnalyzer(EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE)); + QueryableIndexSegment segment = new QueryableIndexSegment( + TestIndex.INDEX_IO.loadIndex(segmentFile), + SegmentId.dummy("ds") + ); + Map analyses = analyzer.analyze(segment); + ColumnAnalysis invalidColumnAnalysis = analyses.get(invalid_aggregator); + Assert.assertTrue(invalidColumnAnalysis.isError()); + Assert.assertEquals("error:unknown_complex_invalid_complex_column_type", invalidColumnAnalysis.getErrorMessage()); + + // Run a segment metadata query also to verify it doesn't break + final List results = getSegmentAnalysises(segment, EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE)); + for (SegmentAnalysis result : results) { + Assert.assertTrue(result.getColumns().get(invalid_aggregator).isError()); + } } } diff --git a/processing/src/test/java/org/apache/druid/query/topn/TopNMetricSpecOptimizationsTest.java b/processing/src/test/java/org/apache/druid/query/topn/TopNMetricSpecOptimizationsTest.java index 4d322b7941c..0174b845250 100644 --- a/processing/src/test/java/org/apache/druid/query/topn/TopNMetricSpecOptimizationsTest.java +++ b/processing/src/test/java/org/apache/druid/query/topn/TopNMetricSpecOptimizationsTest.java @@ -302,13 +302,6 @@ public class TopNMetricSpecOptimizationsTest return null; } - @Nullable - @Override - public String getColumnTypeName(String column) - { - return null; - } - @Override public int getNumRows() { diff --git a/processing/src/test/java/org/apache/druid/segment/RowBasedStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/RowBasedStorageAdapterTest.java index 21d35bcd6c6..b3ab7b71063 100644 --- a/processing/src/test/java/org/apache/druid/segment/RowBasedStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/RowBasedStorageAdapterTest.java @@ -198,7 +198,10 @@ public class RowBasedStorageAdapterTest if (valueType == null || valueType == ValueType.COMPLEX) { return i -> null; } else { - return i -> DimensionHandlerUtils.convertObjectToType(i, ROW_SIGNATURE.getColumnType(columnName).orElse(null)); + return i -> DimensionHandlerUtils.convertObjectToType( + i, + ROW_SIGNATURE.getColumnType(columnName).orElse(null) + ); } } } @@ -399,26 +402,23 @@ public class RowBasedStorageAdapterTest } @Test - public void test_getColumnTypeName() + public void test_getColumnTypeString() { final RowBasedStorageAdapter adapter = createIntAdapter(0, 1, 2); for (String columnName : ROW_SIGNATURE.getColumnNames()) { if (UNKNOWN_TYPE_NAME.equals(columnName)) { - Assert.assertNull(columnName, adapter.getColumnTypeName(columnName)); + Assert.assertNull(columnName, adapter.getColumnCapabilities(columnName)); } else { - Assert.assertEquals(columnName, ValueType.valueOf(columnName).name(), adapter.getColumnTypeName(columnName)); + Assert.assertEquals( + columnName, + ValueType.valueOf(columnName).name(), + adapter.getColumnCapabilities(columnName).asTypeString() + ); } } } - @Test - public void test_getColumnTypeName_nonexistent() - { - final RowBasedStorageAdapter adapter = createIntAdapter(0, 1, 2); - Assert.assertNull(adapter.getColumnTypeName("nonexistent")); - } - @Test public void test_getNumRows() { diff --git a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java index 4ee41a2cef0..99002e59c14 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java @@ -241,31 +241,30 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag } @Test - public void test_getColumnTypeName_factToCountryFactColumn() - { - Assert.assertEquals("hyperUnique", makeFactToCountrySegment().getColumnTypeName("channel_uniques")); - } - - @Test - public void test_getColumnTypeName_factToCountryJoinColumn() + public void test_getColumnCapabilities_complexTypeName_factToCountryFactColumn() { Assert.assertEquals( - "STRING", - makeFactToCountrySegment().getColumnTypeName(FACT_TO_COUNTRY_ON_ISO_CODE_PREFIX + "countryName") + "hyperUnique", + makeFactToCountrySegment().getColumnCapabilities("channel_uniques").getComplexTypeName() ); } @Test - public void test_getColumnTypeName_factToCountryNonexistentFactColumn() + public void test_getColumnCapabilities_typeString_factToCountryFactColumn() { - Assert.assertNull(makeFactToCountrySegment().getColumnTypeName("nonexistent")); + Assert.assertEquals( + "COMPLEX", + makeFactToCountrySegment().getColumnCapabilities("channel_uniques").asTypeString() + ); } @Test - public void test_getColumnTypeName_factToCountryNonexistentJoinColumn() + public void test_getColumnCapabilities_typeString_factToCountryJoinColumn() { - Assert.assertNull( - makeFactToCountrySegment().getColumnTypeName(FACT_TO_COUNTRY_ON_ISO_CODE_PREFIX + "nonexistent") + Assert.assertEquals( + "STRING", + makeFactToCountrySegment().getColumnCapabilities(FACT_TO_COUNTRY_ON_ISO_CODE_PREFIX + "countryName") + .asTypeString() ); } diff --git a/server/src/test/java/org/apache/druid/server/coordination/ServerManagerTest.java b/server/src/test/java/org/apache/druid/server/coordination/ServerManagerTest.java index a76cc5b25be..2105c52b751 100644 --- a/server/src/test/java/org/apache/druid/server/coordination/ServerManagerTest.java +++ b/server/src/test/java/org/apache/druid/server/coordination/ServerManagerTest.java @@ -103,7 +103,6 @@ import org.junit.Test; import org.junit.rules.ExpectedException; import javax.annotation.Nullable; - import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -912,13 +911,6 @@ public class ServerManagerTest return null; } - @Nullable - @Override - public String getColumnTypeName(String column) - { - return null; - } - @Override public int getNumRows() {