From c72f96a4babdf5055912bb0fb5eb2236cfe0ef23 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 11 Aug 2020 11:07:17 -0700 Subject: [PATCH] fix bug with expressions on sparse string realtime columns without explicit null valued rows (#10248) * fix bug with realtime expressions on sparse string columns * fix test * add comment back * push capabilities for dimensions to dimension indexers since they know things * style * style * fixes * getting a bit carried away * missed one * fix it * benchmark build fix * review stuffs * javadoc and comments * add comment * more strict check * fix missed usaged of impl instead of interface --- .../StringDimensionIndexerBenchmark.java | 2 +- .../druid/indexer/IndexGeneratorJob.java | 4 +- .../epinephelinae/GroupByQueryEngineV2.java | 2 +- .../druid/query/metadata/SegmentAnalyzer.java | 2 +- .../druid/query/topn/TopNQueryEngine.java | 2 +- .../druid/segment/DimensionHandlerUtils.java | 12 +- .../druid/segment/DimensionIndexer.java | 2 + .../druid/segment/DoubleDimensionIndexer.java | 12 ++ .../druid/segment/FloatDimensionIndexer.java | 12 ++ .../apache/druid/segment/IndexMergerV9.java | 70 +++++++++++- .../druid/segment/LongDimensionIndexer.java | 13 +++ .../druid/segment/StringDimensionHandler.java | 6 +- .../druid/segment/StringDimensionIndexer.java | 56 ++++++++- .../segment/column/ColumnCapabilities.java | 87 +++++++++++++- .../column/ColumnCapabilitiesImpl.java | 106 +++++++++--------- .../segment/filter/ExpressionFilter.java | 2 +- .../apache/druid/segment/filter/Filters.java | 2 +- .../segment/incremental/IncrementalIndex.java | 95 ++++++++-------- .../IncrementalIndexStorageAdapter.java | 63 ++++++++++- ...yableIndexVectorColumnSelectorFactory.java | 6 +- .../segment/virtual/ExpressionSelectors.java | 6 +- .../druid/query/lookup/LookupSegmentTest.java | 4 +- .../druid/segment/IndexMergerTestBase.java | 27 ----- .../QueryableIndexColumnCapabilitiesTest.java | 18 +-- .../RowBasedColumnSelectorFactoryTest.java | 12 +- .../column/ColumnCapabilitiesImplTest.java | 6 +- .../HashJoinSegmentStorageAdapterTest.java | 4 +- .../join/table/IndexedTableJoinableTest.java | 4 +- ...Test.java => ExpressionSelectorsTest.java} | 103 ++++++++++++++++- .../virtual/ExpressionVirtualColumnTest.java | 4 +- .../druid/segment/realtime/plumber/Sink.java | 6 +- 31 files changed, 553 insertions(+), 197 deletions(-) rename processing/src/test/java/org/apache/druid/segment/virtual/{ExpressionColumnValueSelectorTest.java => ExpressionSelectorsTest.java} (67%) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java index 52fc85194e7..2e4490bd26c 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java @@ -59,7 +59,7 @@ public class StringDimensionIndexerBenchmark @Setup public void setup() { - indexer = new StringDimensionIndexer(DimensionSchema.MultiValueHandling.ofDefault(), true); + indexer = new StringDimensionIndexer(DimensionSchema.MultiValueHandling.ofDefault(), true, false); for (int i = 0; i < cardinality; i++) { indexer.processRowValsToUnsortedEncodedKeyComponent("abcd-" + i, true); diff --git a/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java b/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java index 5dfdcac9c52..72bbaee335a 100644 --- a/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java +++ b/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java @@ -47,7 +47,7 @@ import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.segment.BaseProgressIndicator; import org.apache.druid.segment.ProgressIndicator; import org.apache.druid.segment.QueryableIndex; -import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.indexing.TuningConfigs; @@ -289,7 +289,7 @@ public class IndexGeneratorJob implements Jobby AggregatorFactory[] aggs, HadoopDruidIndexerConfig config, Iterable oldDimOrder, - Map oldCapabilities + Map oldCapabilities ) { final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig(); diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index d3aaa4fd3ee..b4cda9b54d9 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -340,7 +340,7 @@ public class GroupByQueryEngineV2 // Now check column capabilities. final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension()); - return (columnCapabilities != null && !columnCapabilities.hasMultipleValues().isMaybeTrue()) || + return (columnCapabilities != null && columnCapabilities.hasMultipleValues().isFalse()) || (missingMeansNonExistent && columnCapabilities == null); }); } diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index 659b55b1680..15a081d8fca 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -227,7 +227,7 @@ public class SegmentAnalyzer min = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(0)); max = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(cardinality - 1)); } - } else if (capabilities.isDictionaryEncoded()) { + } else if (capabilities.isDictionaryEncoded().isTrue()) { // fallback if no bitmap index DictionaryEncodedColumn theColumn = (DictionaryEncodedColumn) columnHolder.getColumn(); cardinality = theColumn.getCardinality(); diff --git a/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java b/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java index eb22dc9b4ae..daad69fc7a6 100644 --- a/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java +++ b/processing/src/main/java/org/apache/druid/query/topn/TopNQueryEngine.java @@ -194,7 +194,7 @@ public class TopNQueryEngine } if (capabilities != null && capabilities.getType() == ValueType.STRING) { // string columns must use the on heap algorithm unless they have the following capabilites - return capabilities.isDictionaryEncoded() && capabilities.areDictionaryValuesUnique().isTrue(); + return capabilities.isDictionaryEncoded().isTrue() && capabilities.areDictionaryValuesUnique().isTrue(); } else { // non-strings are not eligible to use the pooled algorithm, and should use a heap algorithm return false; diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java index 9e5a12921c7..f5b7e9feb34 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java @@ -73,16 +73,16 @@ public final class DimensionHandlerUtils ) { if (capabilities == null) { - return new StringDimensionHandler(dimensionName, multiValueHandling, true); + return new StringDimensionHandler(dimensionName, multiValueHandling, true, false); } multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling; if (capabilities.getType() == ValueType.STRING) { - if (!capabilities.isDictionaryEncoded()) { + if (!capabilities.isDictionaryEncoded().isTrue()) { throw new IAE("String column must have dictionary encoding."); } - return new StringDimensionHandler(dimensionName, multiValueHandling, capabilities.hasBitmapIndexes()); + return new StringDimensionHandler(dimensionName, multiValueHandling, capabilities.hasBitmapIndexes(), capabilities.hasSpatialIndexes()); } if (capabilities.getType() == ValueType.LONG) { @@ -98,7 +98,7 @@ public final class DimensionHandlerUtils } // Return a StringDimensionHandler by default (null columns will be treated as String typed) - return new StringDimensionHandler(dimensionName, multiValueHandling, true); + return new StringDimensionHandler(dimensionName, multiValueHandling, true, false); } public static List getValueTypesFromDimensionSpecs(List dimSpecs) @@ -226,11 +226,11 @@ public final class DimensionHandlerUtils capabilities = ColumnCapabilitiesImpl.copyOf(capabilities) .setType(ValueType.STRING) .setDictionaryValuesUnique( - capabilities.isDictionaryEncoded() && + capabilities.isDictionaryEncoded().isTrue() && fn.getExtractionType() == ExtractionFn.ExtractionType.ONE_TO_ONE ) .setDictionaryValuesSorted( - capabilities.isDictionaryEncoded() && fn.preservesOrdering() + capabilities.isDictionaryEncoded().isTrue() && fn.preservesOrdering() ); } diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java index cf7631db08b..277deb94e9b 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionIndexer.java @@ -22,6 +22,7 @@ package org.apache.druid.segment; import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.MutableBitmap; import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexRowHolder; @@ -236,6 +237,7 @@ public interface DimensionIndexer IncrementalIndex.DimensionDesc desc ); + ColumnCapabilities getColumnCapabilities(); /** * Compares the row values for this DimensionIndexer's dimension from a Row key. * diff --git a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java index b802f755513..677ed41ba23 100644 --- a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java @@ -25,6 +25,9 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexRowHolder; @@ -38,6 +41,9 @@ public class DoubleDimensionIndexer implements DimensionIndexer DOUBLE_COMPARATOR = Comparators.naturalNullsFirst(); + private final ColumnCapabilitiesImpl capabilities = + ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE); + @Override public Double processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions) { @@ -89,6 +95,12 @@ public class DoubleDimensionIndexer implements DimensionIndexer FLOAT_COMPARATOR = Comparators.naturalNullsFirst(); + private final ColumnCapabilitiesImpl capabilities = + ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT); + @Override public Float processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions) { @@ -90,6 +96,12 @@ public class FloatDimensionIndexer implements DimensionIndexer - ColumnCapabilitiesImpl.snapshot(capabilities) - .merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities))); + ColumnCapabilitiesImpl.merge(capabilities, existingCapabilities, DIMENSION_CAPABILITY_MERGE_LOGIC) + ); } for (String metric : adapter.getMetricNames()) { ColumnCapabilities capabilities = adapter.getCapabilities(metric); capabilitiesMap.compute(metric, (m, existingCapabilities) -> - ColumnCapabilitiesImpl.snapshot(capabilities) - .merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities))); + ColumnCapabilitiesImpl.merge(capabilities, existingCapabilities, METRIC_CAPABILITY_MERGE_LOGIC) + ); metricsValueTypes.put(metric, capabilities.getType()); metricTypeNames.put(metric, adapter.getMetricType(metric)); } @@ -1011,7 +1068,10 @@ public class IndexMergerV9 implements IndexMerger { Map handlers = new LinkedHashMap<>(); for (int i = 0; i < mergedDimensions.size(); i++) { - ColumnCapabilities capabilities = dimCapabilities.get(i); + ColumnCapabilities capabilities = ColumnCapabilitiesImpl.snapshot( + dimCapabilities.get(i), + DIMENSION_CAPABILITY_MERGE_LOGIC + ); String dimName = mergedDimensions.get(i); DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimName, capabilities, null); handlers.put(dimName, handler); diff --git a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java index f2a91278f6b..266405504b4 100644 --- a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java @@ -25,6 +25,9 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexRowHolder; @@ -38,6 +41,10 @@ public class LongDimensionIndexer implements DimensionIndexer { public static final Comparator LONG_COMPARATOR = Comparators.naturalNullsFirst(); + private final ColumnCapabilitiesImpl capabilities = + ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG); + + @Override public Long processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions) { @@ -90,6 +97,12 @@ public class LongDimensionIndexer implements DimensionIndexer return DimensionDictionarySelector.CARDINALITY_UNKNOWN; } + @Override + public ColumnCapabilities getColumnCapabilities() + { + return capabilities; + } + @Override public DimensionSelector makeDimensionSelector( DimensionSpec spec, diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java index 65ff7f356d7..b7fdad15c19 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java @@ -98,12 +98,14 @@ public class StringDimensionHandler implements DimensionHandler makeIndexer() { - return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes); + return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes, hasSpatialIndexes); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index c0200e1e2ef..bca0a5c72a9 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -40,6 +40,9 @@ import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.ArrayBasedIndexedInts; import org.apache.druid.segment.data.CloseableIndexed; import org.apache.druid.segment.data.IndexedInts; @@ -74,7 +77,7 @@ public class StringDimensionIndexer implements DimensionIndexer valueToId = new Object2IntOpenHashMap<>(); @@ -233,17 +236,19 @@ public class StringDimensionIndexer implements DimensionIndexer extends AbstractIndex imp private final Map dimensionDescs; private final List dimensionDescsList; - private final Map columnCapabilities; + // dimension capabilities are provided by the indexers + private final Map timeAndMetricsColumnCapabilities; private final AtomicInteger numEntries = new AtomicInteger(); private final AtomicLong bytesInMemory = new AtomicLong(); @@ -287,7 +289,7 @@ public abstract class IncrementalIndex extends AbstractIndex imp this.deserializeComplexMetrics = deserializeComplexMetrics; this.reportParseExceptions = reportParseExceptions; - this.columnCapabilities = new HashMap<>(); + this.timeAndMetricsColumnCapabilities = new HashMap<>(); this.metadata = new Metadata( null, getCombiningAggregators(metrics), @@ -302,7 +304,7 @@ public abstract class IncrementalIndex extends AbstractIndex imp for (AggregatorFactory metric : metrics) { MetricDesc metricDesc = new MetricDesc(metricDescs.size(), metric); metricDescs.put(metricDesc.getName(), metricDesc); - columnCapabilities.put(metricDesc.getName(), metricDesc.getCapabilities()); + timeAndMetricsColumnCapabilities.put(metricDesc.getName(), metricDesc.getCapabilities()); } DimensionsSpec dimensionsSpec = incrementalIndexSchema.getDimensionsSpec(); @@ -312,24 +314,22 @@ public abstract class IncrementalIndex extends AbstractIndex imp for (DimensionSchema dimSchema : dimensionsSpec.getDimensions()) { ValueType type = TYPE_MAP.get(dimSchema.getValueType()); String dimName = dimSchema.getName(); - ColumnCapabilitiesImpl capabilities = makeCapabilitiesFromValueType(type); + ColumnCapabilitiesImpl capabilities = makeDefaultCapabilitiesFromValueType(type); capabilities.setHasBitmapIndexes(dimSchema.hasBitmapIndex()); if (dimSchema.getTypeName().equals(DimensionSchema.SPATIAL_TYPE_NAME)) { capabilities.setHasSpatialIndexes(true); - } else { - DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities( - dimName, - capabilities, - dimSchema.getMultiValueHandling() - ); - addNewDimension(dimName, capabilities, handler); } - columnCapabilities.put(dimName, capabilities); + DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities( + dimName, + capabilities, + dimSchema.getMultiValueHandling() + ); + addNewDimension(dimName, handler); } //__time capabilities - columnCapabilities.put( + timeAndMetricsColumnCapabilities.put( ColumnHolder.TIME_COLUMN_NAME, ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG) ); @@ -589,9 +589,13 @@ public abstract class IncrementalIndex extends AbstractIndex imp return row; } - public Map getColumnCapabilities() + public Map getColumnCapabilities() { - return columnCapabilities; + ImmutableMap.Builder builder = + ImmutableMap.builder().putAll(timeAndMetricsColumnCapabilities); + + dimensionDescs.forEach((dimension, desc) -> builder.put(dimension, desc.getCapabilities())); + return builder.build(); } /** @@ -658,23 +662,22 @@ public abstract class IncrementalIndex extends AbstractIndex imp continue; } boolean wasNewDim = false; - ColumnCapabilitiesImpl capabilities; DimensionDesc desc = dimensionDescs.get(dimension); if (desc != null) { - capabilities = desc.getCapabilities(); absentDimensions.remove(dimension); } else { wasNewDim = true; - capabilities = columnCapabilities.get(dimension); - if (capabilities == null) { - // For schemaless type discovery, assume everything is a String for now, can change later. - capabilities = makeCapabilitiesFromValueType(ValueType.STRING); - columnCapabilities.put(dimension, capabilities); - } - DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimension, capabilities, null); - desc = addNewDimension(dimension, capabilities, handler); + desc = addNewDimension( + dimension, + DimensionHandlerUtils.getHandlerFromCapabilities( + dimension, + // for schemaless type discovery, everything is a String. this should probably try to autodetect + // based on the value to use a better handler + makeDefaultCapabilitiesFromValueType(ValueType.STRING), + null + ) + ); } - DimensionHandler handler = desc.getHandler(); DimensionIndexer indexer = desc.getIndexer(); Object dimsKey = null; try { @@ -684,13 +687,6 @@ public abstract class IncrementalIndex extends AbstractIndex imp parseExceptionMessages.add(pe.getMessage()); } dimsKeySize += indexer.estimateEncodedKeyComponentSize(dimsKey); - // Set column capabilities as data is coming in - if (!capabilities.hasMultipleValues().isTrue() && - dimsKey != null && - handler.getLengthOfEncodedKeyComponent(dimsKey) > 1) { - capabilities.setHasMultipleValues(true); - } - if (wasNewDim) { // unless this is the first row we are processing, all newly discovered columns will be sparse if (maxIngestedEventTime != null) { @@ -928,7 +924,7 @@ public abstract class IncrementalIndex extends AbstractIndex imp } } - private ColumnCapabilitiesImpl makeCapabilitiesFromValueType(ValueType type) + private ColumnCapabilitiesImpl makeDefaultCapabilitiesFromValueType(ValueType type) { if (type == ValueType.STRING) { // we start out as not having multiple values, but this might change as we encounter them @@ -949,7 +945,7 @@ public abstract class IncrementalIndex extends AbstractIndex imp */ public void loadDimensionIterable( Iterable oldDimensionOrder, - Map oldColumnCapabilities + Map oldColumnCapabilities ) { synchronized (dimensionDescs) { @@ -958,19 +954,21 @@ public abstract class IncrementalIndex extends AbstractIndex imp } for (String dim : oldDimensionOrder) { if (dimensionDescs.get(dim) == null) { - ColumnCapabilitiesImpl capabilities = oldColumnCapabilities.get(dim); - columnCapabilities.put(dim, capabilities); + ColumnCapabilitiesImpl capabilities = ColumnCapabilitiesImpl.snapshot( + oldColumnCapabilities.get(dim), + IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC + ); DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dim, capabilities, null); - addNewDimension(dim, capabilities, handler); + addNewDimension(dim, handler); } } } } @GuardedBy("dimensionDescs") - private DimensionDesc addNewDimension(String dim, ColumnCapabilitiesImpl capabilities, DimensionHandler handler) + private DimensionDesc addNewDimension(String dim, DimensionHandler handler) { - DimensionDesc desc = new DimensionDesc(dimensionDescs.size(), dim, capabilities, handler); + DimensionDesc desc = new DimensionDesc(dimensionDescs.size(), dim, handler); dimensionDescs.put(dim, desc); dimensionDescsList.add(desc); return desc; @@ -998,7 +996,10 @@ public abstract class IncrementalIndex extends AbstractIndex imp @Nullable public ColumnCapabilities getCapabilities(String column) { - return columnCapabilities.get(column); + if (dimensionDescs.containsKey(column)) { + return dimensionDescs.get(column).getCapabilities(); + } + return timeAndMetricsColumnCapabilities.get(column); } public Metadata getMetadata() @@ -1080,15 +1081,13 @@ public abstract class IncrementalIndex extends AbstractIndex imp { private final int index; private final String name; - private final ColumnCapabilitiesImpl capabilities; private final DimensionHandler handler; private final DimensionIndexer indexer; - public DimensionDesc(int index, String name, ColumnCapabilitiesImpl capabilities, DimensionHandler handler) + public DimensionDesc(int index, String name, DimensionHandler handler) { this.index = index; this.name = name; - this.capabilities = capabilities; this.handler = handler; this.indexer = handler.makeIndexer(); } @@ -1103,9 +1102,9 @@ public abstract class IncrementalIndex extends AbstractIndex imp return name; } - public ColumnCapabilitiesImpl getCapabilities() + public ColumnCapabilities getCapabilities() { - return capabilities; + return indexer.getColumnCapabilities(); } public DimensionHandler getHandler() @@ -1124,7 +1123,7 @@ public abstract class IncrementalIndex extends AbstractIndex imp private final int index; private final String name; private final String type; - private final ColumnCapabilitiesImpl capabilities; + private final ColumnCapabilities capabilities; public MetricDesc(int index, AggregatorFactory factory) { @@ -1163,7 +1162,7 @@ public abstract class IncrementalIndex extends AbstractIndex imp return type; } - public ColumnCapabilitiesImpl getCapabilities() + public ColumnCapabilities getCapabilities() { return capabilities; } diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java index 8e8520d458b..9beb16820ab 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -52,6 +52,62 @@ import java.util.Iterator; */ public class IncrementalIndexStorageAdapter implements StorageAdapter { + private static final ColumnCapabilities.CoercionLogic STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC = + new ColumnCapabilities.CoercionLogic() + { + @Override + public boolean dictionaryEncoded() + { + return false; + } + + @Override + public boolean dictionaryValuesSorted() + { + return false; + } + + @Override + public boolean dictionaryValuesUnique() + { + return true; + } + + @Override + public boolean multipleValues() + { + return true; + } + }; + + private static final ColumnCapabilities.CoercionLogic SNAPSHOT_STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC = + new ColumnCapabilities.CoercionLogic() + { + @Override + public boolean dictionaryEncoded() + { + return true; + } + + @Override + public boolean dictionaryValuesSorted() + { + return true; + } + + @Override + public boolean dictionaryValuesUnique() + { + return true; + } + + @Override + public boolean multipleValues() + { + return false; + } + }; + final IncrementalIndex index; public IncrementalIndexStorageAdapter(IncrementalIndex index) @@ -154,7 +210,7 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter // to the StringDimensionIndexer so the selector built on top of it can produce values from the snapshot state of // multi-valuedness at cursor creation time, instead of the latest state, and getSnapshotColumnCapabilities could // be removed. - return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), true); + return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC); } /** @@ -165,7 +221,10 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter */ public ColumnCapabilities getSnapshotColumnCapabilities(String column) { - return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column)); + return ColumnCapabilitiesImpl.snapshot( + index.getCapabilities(column), + SNAPSHOT_STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC + ); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java index 269ac38429b..48c56c9ef6d 100644 --- a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java @@ -83,9 +83,9 @@ public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSe spec -> { final ColumnHolder holder = index.getColumnHolder(spec.getDimension()); if (holder == null - || !holder.getCapabilities().isDictionaryEncoded() + || holder.getCapabilities().isDictionaryEncoded().isFalse() || holder.getCapabilities().getType() != ValueType.STRING - || !holder.getCapabilities().hasMultipleValues().isMaybeTrue()) { + || holder.getCapabilities().hasMultipleValues().isFalse()) { throw new ISE( "Column[%s] is not a multi-value string column, do not ask for a multi-value selector", spec.getDimension() @@ -119,7 +119,7 @@ public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSe spec -> { final ColumnHolder holder = index.getColumnHolder(spec.getDimension()); if (holder == null - || !holder.getCapabilities().isDictionaryEncoded() + || !holder.getCapabilities().isDictionaryEncoded().isTrue() || holder.getCapabilities().getType() != ValueType.STRING) { // Asking for a single-value dimension selector on a non-string column gets you a bunch of nulls. return NilVectorSelector.create(offset); diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java index 5ab4e4694a2..17c6a3f3989 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionSelectors.java @@ -153,8 +153,8 @@ public class ExpressionSelectors ); } else if (capabilities != null && capabilities.getType() == ValueType.STRING - && capabilities.isDictionaryEncoded() - && !capabilities.hasMultipleValues().isMaybeTrue() + && capabilities.isDictionaryEncoded().isTrue() + && capabilities.hasMultipleValues().isFalse() && exprDetails.getArrayBindings().isEmpty()) { // Optimization for expressions that hit one scalar string column and nothing else. return new SingleStringInputCachingExpressionColumnValueSelector( @@ -225,7 +225,7 @@ public class ExpressionSelectors // not treating it as an array and not wanting to output an array if (capabilities != null && capabilities.getType() == ValueType.STRING - && capabilities.isDictionaryEncoded() + && capabilities.isDictionaryEncoded().isTrue() && !capabilities.hasMultipleValues().isUnknown() && !exprDetails.hasInputArrays() && !exprDetails.isOutputArray() diff --git a/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java b/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java index 3ca72aa5a69..96167fd38bd 100644 --- a/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java +++ b/processing/src/test/java/org/apache/druid/query/lookup/LookupSegmentTest.java @@ -138,7 +138,7 @@ public class LookupSegmentTest // reporting complete single-valued capabilities. It would be good to change this in the future, so query engines // running on top of lookups can take advantage of singly-valued optimizations. Assert.assertTrue(capabilities.hasMultipleValues().isUnknown()); - Assert.assertFalse(capabilities.isDictionaryEncoded()); + Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue()); } @Test @@ -151,7 +151,7 @@ public class LookupSegmentTest // running on top of lookups can take advantage of singly-valued optimizations. Assert.assertEquals(ValueType.STRING, capabilities.getType()); Assert.assertTrue(capabilities.hasMultipleValues().isUnknown()); - Assert.assertFalse(capabilities.isDictionaryEncoded()); + Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue()); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java index b253614cc3d..86e8503f997 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java @@ -45,7 +45,6 @@ import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; -import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.StringDictionaryEncodedColumn; @@ -2012,32 +2011,6 @@ public class IndexMergerTestBase extends InitializedNullHandlingTest Assert.assertEquals(-1, dictIdSeeker.seek(5)); } - @Test(expected = IllegalArgumentException.class) - public void testCloser() throws Exception - { - final long timestamp = System.currentTimeMillis(); - IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null); - IncrementalIndexTest.populateIndex(timestamp, toPersist); - ColumnCapabilitiesImpl capabilities = (ColumnCapabilitiesImpl) toPersist.getCapabilities("dim1"); - capabilities.setHasSpatialIndexes(true); - - final File tempDir = temporaryFolder.newFolder(); - final File v8TmpDir = new File(tempDir, "v8-tmp"); - final File v9TmpDir = new File(tempDir, "v9-tmp"); - - try { - indexMerger.persist(toPersist, tempDir, indexSpec, null); - } - finally { - if (v8TmpDir.exists()) { - Assert.fail("v8-tmp dir not clean."); - } - if (v9TmpDir.exists()) { - Assert.fail("v9-tmp dir not clean."); - } - } - } - @Test public void testMultiValueHandling() throws Exception { diff --git a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java index c7783e99219..57689a46cb0 100644 --- a/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java +++ b/processing/src/test/java/org/apache/druid/segment/QueryableIndexColumnCapabilitiesTest.java @@ -150,20 +150,25 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin ColumnCapabilities caps = INC_INDEX.getCapabilities("d1"); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertTrue(caps.hasBitmapIndexes()); - Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertTrue(caps.isDictionaryEncoded().isMaybeTrue()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); // multi-value is unknown unless explicitly set to 'true' Assert.assertTrue(caps.hasMultipleValues().isUnknown()); // at index merge or query time we 'complete' the capabilities to take a snapshot of the current state, // coercing any 'UNKNOWN' values to false - Assert.assertFalse(ColumnCapabilitiesImpl.snapshot(caps).hasMultipleValues().isMaybeTrue()); + Assert.assertFalse( + ColumnCapabilitiesImpl.snapshot(caps, IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC) + .hasMultipleValues() + .isMaybeTrue() + ); Assert.assertFalse(caps.hasSpatialIndexes()); caps = MMAP_INDEX.getColumnHolder("d1").getCapabilities(); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertTrue(caps.hasBitmapIndexes()); - Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); @@ -176,7 +181,7 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin ColumnCapabilities caps = INC_INDEX.getCapabilities("d2"); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertTrue(caps.hasBitmapIndexes()); - Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isTrue()); @@ -185,7 +190,7 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin caps = MMAP_INDEX.getColumnHolder("d2").getCapabilities(); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertTrue(caps.hasBitmapIndexes()); - Assert.assertTrue(caps.isDictionaryEncoded()); + Assert.assertTrue(caps.isDictionaryEncoded().isTrue()); Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isTrue()); @@ -199,12 +204,11 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin assertNonStringColumnCapabilities(MMAP_INDEX.getColumnHolder("m4").getCapabilities(), ValueType.COMPLEX); } - private void assertNonStringColumnCapabilities(ColumnCapabilities caps, ValueType valueType) { Assert.assertEquals(valueType, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); diff --git a/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java b/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java index e12dac4743c..a802b819b57 100644 --- a/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java +++ b/processing/src/test/java/org/apache/druid/segment/RowBasedColumnSelectorFactoryTest.java @@ -51,7 +51,7 @@ public class RowBasedColumnSelectorFactoryTest RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, ColumnHolder.TIME_COLUMN_NAME); Assert.assertEquals(ValueType.LONG, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); @@ -65,7 +65,7 @@ public class RowBasedColumnSelectorFactoryTest RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, STRING_COLUMN_NAME); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isUnknown()); @@ -79,7 +79,7 @@ public class RowBasedColumnSelectorFactoryTest RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, LONG_COLUMN_NAME); Assert.assertEquals(ValueType.LONG, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); @@ -93,7 +93,7 @@ public class RowBasedColumnSelectorFactoryTest RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, FLOAT_COLUMN_NAME); Assert.assertEquals(ValueType.FLOAT, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); @@ -107,7 +107,7 @@ public class RowBasedColumnSelectorFactoryTest RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, DOUBLE_COLUMN_NAME); Assert.assertEquals(ValueType.DOUBLE, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue()); @@ -121,7 +121,7 @@ public class RowBasedColumnSelectorFactoryTest RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, COMPLEX_COLUMN_NAME); Assert.assertEquals(ValueType.COMPLEX, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isUnknown()); diff --git a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java index e221edd9c73..ce98506dc7b 100644 --- a/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java +++ b/processing/src/test/java/org/apache/druid/segment/column/ColumnCapabilitiesImplTest.java @@ -44,8 +44,7 @@ public class ColumnCapabilitiesImplTest ColumnCapabilities cc = mapper.readValue(json, ColumnCapabilitiesImpl.class); Assert.assertEquals(ValueType.COMPLEX, cc.getType()); - Assert.assertTrue(cc.isDictionaryEncoded()); - Assert.assertFalse(cc.isRunLengthEncoded()); + Assert.assertTrue(cc.isDictionaryEncoded().isTrue()); Assert.assertTrue(cc.hasSpatialIndexes()); Assert.assertTrue(cc.hasMultipleValues().isTrue()); Assert.assertTrue(cc.hasBitmapIndexes()); @@ -69,8 +68,7 @@ public class ColumnCapabilitiesImplTest ColumnCapabilities cc = mapper.readValue(json, ColumnCapabilitiesImpl.class); Assert.assertEquals(ValueType.COMPLEX, cc.getType()); - Assert.assertTrue(cc.isDictionaryEncoded()); - Assert.assertTrue(cc.isRunLengthEncoded()); + Assert.assertTrue(cc.isDictionaryEncoded().isTrue()); Assert.assertTrue(cc.hasSpatialIndexes()); Assert.assertTrue(cc.hasMultipleValues().isTrue()); Assert.assertTrue(cc.hasBitmapIndexes()); diff --git a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java index 7b80bd2094c..6406d7afe09 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java @@ -200,7 +200,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag Assert.assertEquals(ValueType.STRING, capabilities.getType()); Assert.assertTrue(capabilities.hasBitmapIndexes()); - Assert.assertTrue(capabilities.isDictionaryEncoded()); + Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue()); Assert.assertTrue(capabilities.areDictionaryValuesSorted().isTrue()); Assert.assertTrue(capabilities.areDictionaryValuesUnique().isTrue()); } @@ -216,7 +216,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag Assert.assertFalse(capabilities.hasBitmapIndexes()); Assert.assertFalse(capabilities.areDictionaryValuesUnique().isTrue()); Assert.assertFalse(capabilities.areDictionaryValuesSorted().isTrue()); - Assert.assertTrue(capabilities.isDictionaryEncoded()); + Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue()); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java b/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java index c75232c9be9..61b56377b5d 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java @@ -137,7 +137,7 @@ public class IndexedTableJoinableTest { final ColumnCapabilities capabilities = target.getColumnCapabilities("str"); Assert.assertEquals(ValueType.STRING, capabilities.getType()); - Assert.assertTrue(capabilities.isDictionaryEncoded()); + Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue()); Assert.assertFalse(capabilities.hasBitmapIndexes()); Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(capabilities.hasSpatialIndexes()); @@ -148,7 +148,7 @@ public class IndexedTableJoinableTest { final ColumnCapabilities capabilities = target.getColumnCapabilities("long"); Assert.assertEquals(ValueType.LONG, capabilities.getType()); - Assert.assertFalse(capabilities.isDictionaryEncoded()); + Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue()); Assert.assertFalse(capabilities.hasBitmapIndexes()); Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue()); Assert.assertFalse(capabilities.hasSpatialIndexes()); diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java similarity index 67% rename from processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java rename to processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java index 0b7a66db20f..08b18c44ff7 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionColumnValueSelectorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionSelectorsTest.java @@ -21,20 +21,41 @@ package org.apache.druid.segment.virtual; import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.guava.SettableSupplier; +import org.apache.druid.data.input.MapBasedInputRow; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.TimestampSpec; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.math.expr.Parser; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.BaseSingleValueDimensionSelector; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.Cursor; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.TestObjectColumnSelector; +import org.apache.druid.segment.VirtualColumns; +import org.apache.druid.segment.incremental.IncrementalIndex; +import org.apache.druid.segment.incremental.IncrementalIndexSchema; +import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter; +import org.apache.druid.segment.incremental.IndexSizeExceededException; +import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Test; import java.util.ArrayList; +import java.util.Collections; import java.util.List; -public class ExpressionColumnValueSelectorTest +public class ExpressionSelectorsTest extends InitializedNullHandlingTest { @Test public void testSupplierFromDimensionSelector() @@ -231,6 +252,86 @@ public class ExpressionColumnValueSelectorTest ); } + @Test + public void testIncrementIndexStringSelector() throws IndexSizeExceededException + { + // This test covers a regression caused by ColumnCapabilites.isDictionaryEncoded not matching the value of + // DimensionSelector.nameLookupPossibleInAdvance in the indexers of an IncrementalIndex, which resulted in an + // exception trying to make an optimized string expression selector that was not appropriate to use for the + // underlying dimension selector. + // This occurred during schemaless ingestion with spare dimension values and no explicit null rows, so the + // conditions are replicated by this test. See https://github.com/apache/druid/pull/10248 for details + IncrementalIndexSchema schema = new IncrementalIndexSchema( + 0, + new TimestampSpec("time", "millis", DateTimes.nowUtc()), + Granularities.NONE, + VirtualColumns.EMPTY, + DimensionsSpec.EMPTY, + new AggregatorFactory[]{new CountAggregatorFactory("count")}, + true + ); + + IncrementalIndex index = new IncrementalIndex.Builder().setMaxRowCount(100).setIndexSchema(schema).buildOnheap(); + index.add( + new MapBasedInputRow( + DateTimes.nowUtc().getMillis(), + ImmutableList.of("x"), + ImmutableMap.of("x", "foo") + ) + ); + index.add( + new MapBasedInputRow( + DateTimes.nowUtc().plusMillis(1000).getMillis(), + ImmutableList.of("y"), + ImmutableMap.of("y", "foo") + ) + ); + + IncrementalIndexStorageAdapter adapter = new IncrementalIndexStorageAdapter(index); + + Sequence cursors = adapter.makeCursors( + null, + Intervals.ETERNITY, + VirtualColumns.EMPTY, + Granularities.ALL, + false, + null + ); + int rowsProcessed = cursors.map(cursor -> { + DimensionSelector xExprSelector = ExpressionSelectors.makeDimensionSelector( + cursor.getColumnSelectorFactory(), + Parser.parse("concat(x, 'foo')", ExprMacroTable.nil()), + null + ); + DimensionSelector yExprSelector = ExpressionSelectors.makeDimensionSelector( + cursor.getColumnSelectorFactory(), + Parser.parse("concat(y, 'foo')", ExprMacroTable.nil()), + null + ); + int rowCount = 0; + while (!cursor.isDone()) { + Object x = xExprSelector.getObject(); + Object y = yExprSelector.getObject(); + List expectedFoo = Collections.singletonList("foofoo"); + List expectedNull = NullHandling.replaceWithDefault() + ? Collections.singletonList("foo") + : Collections.singletonList(null); + if (rowCount == 0) { + Assert.assertEquals(expectedFoo, x); + Assert.assertEquals(expectedNull, y); + } else { + Assert.assertEquals(expectedNull, x); + Assert.assertEquals(expectedFoo, y); + } + rowCount++; + cursor.advance(); + } + return rowCount; + }).accumulate(0, (in, acc) -> in + acc); + + Assert.assertEquals(2, rowsProcessed); + } + private static DimensionSelector dimensionSelectorFromSupplier( final Supplier supplier ) diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java index 16e090dc3d5..84bf5fd7e74 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/ExpressionVirtualColumnTest.java @@ -810,7 +810,7 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest ColumnCapabilities caps = X_PLUS_Y.capabilities("expr"); Assert.assertEquals(ValueType.FLOAT, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isUnknown()); @@ -820,7 +820,7 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest caps = Z_CONCAT_X.capabilities("expr"); Assert.assertEquals(ValueType.STRING, caps.getType()); Assert.assertFalse(caps.hasBitmapIndexes()); - Assert.assertFalse(caps.isDictionaryEncoded()); + Assert.assertFalse(caps.isDictionaryEncoded().isTrue()); Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue()); Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue()); Assert.assertTrue(caps.hasMultipleValues().isUnknown()); diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java index 7002ac22fa6..2324a2aca6a 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java @@ -29,7 +29,7 @@ import org.apache.druid.java.util.common.ISE; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.ReferenceCountingSegment; -import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexAddResult; import org.apache.druid.segment.incremental.IncrementalIndexSchema; @@ -377,7 +377,7 @@ public class Sink implements Iterable, Overshadowable FireHydrant lastHydrant = hydrants.get(numHydrants - 1); newCount = lastHydrant.getCount() + 1; if (!indexSchema.getDimensionsSpec().hasCustomDimensions()) { - Map oldCapabilities; + Map oldCapabilities; if (lastHydrant.hasSwapped()) { oldCapabilities = new HashMap<>(); ReferenceCountingSegment segment = lastHydrant.getIncrementedSegment(); @@ -385,7 +385,7 @@ public class Sink implements Iterable, Overshadowable QueryableIndex oldIndex = segment.asQueryableIndex(); for (String dim : oldIndex.getAvailableDimensions()) { dimOrder.add(dim); - oldCapabilities.put(dim, (ColumnCapabilitiesImpl) oldIndex.getColumnHolder(dim).getCapabilities()); + oldCapabilities.put(dim, oldIndex.getColumnHolder(dim).getCapabilities()); } } finally {