From 277aaa5c572725b7cacb4e7be36bad164a50f889 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sun, 2 Jul 2023 19:37:15 -0700 Subject: [PATCH] remove druid.processing.columnCache.sizeBytes and CachingIndexed, combine string column implementations (#14500) * combine string column implementations changes: * generic indexed, front-coded, and auto string columns now all share the same column and index supplier implementations * remove CachingIndexed implementation, which I think is largely no longer needed by the switch of many things to directly using ByteBuffer, avoiding the cost of creating Strings * remove ColumnConfig.columnCacheSizeBytes since CachingIndexed was the only user --- .../druid/benchmark/BoundFilterBenchmark.java | 8 +- ...ryEncodedStringIndexSupplierBenchmark.java | 11 +- .../DimensionPredicateFilterBenchmark.java | 9 +- .../benchmark/FilterPartitionBenchmark.java | 5 - .../FilteredAggregatorBenchmark.java | 5 - .../GroupByTypeInterfaceBenchmark.java | 5 - .../druid/benchmark/InFilterBenchmark.java | 9 +- .../benchmark/JoinAndLookupBenchmark.java | 6 +- .../druid/benchmark/LikeFilterBenchmark.java | 9 +- .../benchmark/TopNTypeInterfaceBenchmark.java | 5 - ...arLongsEncodeDataFromSegmentBenchmark.java | 6 +- .../indexing/IndexMergeBenchmark.java | 6 +- .../indexing/IndexPersistBenchmark.java | 6 +- .../benchmark/query/GroupByBenchmark.java | 5 - .../druid/benchmark/query/ScanBenchmark.java | 6 +- .../benchmark/query/SearchBenchmark.java | 5 - .../benchmark/query/TimeseriesBenchmark.java | 5 - .../druid/benchmark/query/TopNBenchmark.java | 5 - .../timecompare/TimeCompareBenchmark.java | 5 - docs/configuration/index.md | 4 - .../msq/indexing/WorkerChatHandlerTest.java | 3 +- .../druid/msq/test/CalciteMSQTestsHelper.java | 3 +- .../apache/druid/msq/test/MSQTestBase.java | 3 +- .../druid/msq/test/MSQTestWorkerContext.java | 6 +- .../druid/indexing/common/TestUtils.java | 6 +- .../common/task/AppenderatorsTest.java | 6 +- .../common/task/BatchAppenderatorsTest.java | 8 +- .../common/task/CompactionTaskTest.java | 3 +- pom.xml | 4 +- .../druid/query/DruidProcessingConfig.java | 7 - .../org/apache/druid/segment/IndexIO.java | 27 +- .../apache/druid/segment/MMappedIndex.java | 8 - .../druid/segment/column/ColumnConfig.java | 18 +- .../IndexedStringDruidPredicateIndex.java | 29 - .../IndexedUtf8LexicographicalRangeIndex.java | 17 - .../column/StringDictionaryEncodedColumn.java | 807 ------------------ .../StringUtf8DictionaryEncodedColumn.java | 323 ++++++- .../druid/segment/data/CachingIndexed.java | 158 ---- .../NestedFieldDictionaryEncodedColumn.java | 6 +- .../ScalarStringColumnAndIndexSupplier.java | 115 +-- .../druid/segment/nested/VariantColumn.java | 4 +- .../DictionaryEncodedColumnPartSerde.java | 123 +-- .../DictionaryEncodedColumnSupplier.java | 100 --- .../DictionaryEncodedStringIndexSupplier.java | 127 --- .../NestedCommonFormatColumnPartSerde.java | 223 +++-- ...ava => StringUtf8ColumnIndexSupplier.java} | 38 +- ...gUtf8DictionaryEncodedColumnSupplier.java} | 17 +- .../query/DruidProcessingConfigTest.java | 4 - .../aggregation/AggregationTestHelper.java | 20 - ...ByLimitPushDownInsufficientBufferTest.java | 5 - ...roupByLimitPushDownMultiNodeMergeTest.java | 5 - .../groupby/GroupByMultiSegmentTest.java | 5 - .../groupby/NestedQueryPushDownTest.java | 6 +- .../query/metadata/SegmentAnalyzerTest.java | 4 +- ...ColumnSelectorColumnIndexSelectorTest.java | 4 +- .../segment/CustomSegmentizerFactoryTest.java | 3 +- .../apache/druid/segment/IndexBuilder.java | 4 +- .../IndexIONullColumnsCompatibilityTest.java | 2 +- .../druid/segment/IndexMergerTestBase.java | 6 +- .../org/apache/druid/segment/TestHelper.java | 22 +- .../druid/segment/V9IndexLoaderTest.java | 3 +- .../filter/ExtractionDimFilterTest.java | 7 +- .../PredicateValueMatcherFactoryTest.java | 18 +- .../segment/filter/ValueMatchersTest.java | 32 +- .../druid/segment/join/JoinTestHelper.java | 2 +- .../BroadcastSegmentIndexedTableTest.java | 3 +- ...MappedQueryableSegmentizerFactoryTest.java | 3 +- .../loading/SegmentizerFactoryTest.java | 5 - .../nested/NestedDataColumnSupplierTest.java | 5 - .../NestedFieldColumnIndexSupplierTest.java | 12 - ...tionaryEncodedStringIndexSupplierTest.java | 9 +- .../serde/NullColumnPartSerdeTest.java | 16 +- .../virtual/DummyStringVirtualColumn.java | 12 +- .../LocalDataStorageDruidModuleTest.java | 2 +- ...edSegmensSinksBatchAppenderatorTester.java | 6 +- ...DefaultOfflineAppenderatorFactoryTest.java | 5 - ...enAndClosedSegmentsAppenderatorTester.java | 5 - .../StreamAppenderatorTester.java | 5 - ...tManagerBroadcastJoinIndexedTableTest.java | 3 +- .../SegmentManagerThreadSafetyTest.java | 3 +- .../org/apache/druid/cli/DumpSegment.java | 5 - .../apache/druid/cli/ValidateSegments.java | 5 - 82 files changed, 669 insertions(+), 1901 deletions(-) delete mode 100644 processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java delete mode 100644 processing/src/main/java/org/apache/druid/segment/data/CachingIndexed.java delete mode 100644 processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java delete mode 100644 processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java rename processing/src/main/java/org/apache/druid/segment/serde/{StringFrontCodedColumnIndexSupplier.java => StringUtf8ColumnIndexSupplier.java} (80%) rename processing/src/main/java/org/apache/druid/segment/serde/{StringFrontCodedDictionaryEncodedColumnSupplier.java => StringUtf8DictionaryEncodedColumnSupplier.java} (80%) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java index 8f242c40f2f..e1dd7fe4f2f 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/BoundFilterBenchmark.java @@ -36,7 +36,7 @@ import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; import org.apache.druid.segment.filter.BoundFilter; import org.apache.druid.segment.filter.Filters; -import org.apache.druid.segment.serde.DictionaryEncodedStringIndexSupplier; +import org.apache.druid.segment.serde.StringUtf8ColumnIndexSupplier; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -161,10 +161,6 @@ public class BoundFilterBenchmark final BitmapFactory bitmapFactory = new RoaringBitmapFactory(); final BitmapSerdeFactory serdeFactory = RoaringBitmapSerdeFactory.getInstance(); final List ints = generateInts(); - final GenericIndexed dictionary = GenericIndexed.fromIterable( - FluentIterable.from(ints).transform(Object::toString), - GenericIndexed.STRING_STRATEGY - ); final GenericIndexed bitmaps = GenericIndexed.fromIterable( FluentIterable.from(ints) .transform( @@ -183,7 +179,7 @@ public class BoundFilterBenchmark ); selector = new MockColumnIndexSelector( bitmapFactory, - new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary, dictionaryUtf8, bitmaps, null) + new StringUtf8ColumnIndexSupplier<>(bitmapFactory, dictionaryUtf8::singleThreaded, bitmaps, null) ); } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java index cc3f68aba11..73cba8c5d97 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/DictionaryEncodedStringIndexSupplierBenchmark.java @@ -32,7 +32,7 @@ import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; -import org.apache.druid.segment.serde.DictionaryEncodedStringIndexSupplier; +import org.apache.druid.segment.serde.StringUtf8ColumnIndexSupplier; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -93,11 +93,6 @@ public class DictionaryEncodedStringIndexSupplierBenchmark final BitmapFactory bitmapFactory = new RoaringBitmapFactory(); final BitmapSerdeFactory serdeFactory = RoaringBitmapSerdeFactory.getInstance(); final Iterable ints = intGenerator(); - final GenericIndexed dictionary = GenericIndexed.fromIterable( - FluentIterable.from(ints) - .transform(Object::toString), - GenericIndexed.STRING_STRATEGY - ); final GenericIndexed dictionaryUtf8 = GenericIndexed.fromIterable( FluentIterable.from(ints) .transform(i -> ByteBuffer.wrap(StringUtils.toUtf8(String.valueOf(i)))), @@ -115,8 +110,8 @@ public class DictionaryEncodedStringIndexSupplierBenchmark .iterator(), serdeFactory.getObjectStrategy() ); - DictionaryEncodedStringIndexSupplier indexSupplier = - new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary, dictionaryUtf8, bitmaps, null); + StringUtf8ColumnIndexSupplier indexSupplier = + new StringUtf8ColumnIndexSupplier<>(bitmapFactory, dictionaryUtf8::singleThreaded, bitmaps, null); stringValueSetIndex = (IndexedUtf8ValueSetIndex) indexSupplier.as(StringValueSetIndex.class); List filterValues = new ArrayList<>(); List nonFilterValues = new ArrayList<>(); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java index 810ad4219c2..602f838680b 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/DimensionPredicateFilterBenchmark.java @@ -38,7 +38,7 @@ import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; import org.apache.druid.segment.filter.DimensionPredicateFilter; import org.apache.druid.segment.filter.Filters; -import org.apache.druid.segment.serde.DictionaryEncodedStringIndexSupplier; +import org.apache.druid.segment.serde.StringUtf8ColumnIndexSupplier; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -122,11 +122,6 @@ public class DimensionPredicateFilterBenchmark final BitmapFactory bitmapFactory = new RoaringBitmapFactory(); final BitmapSerdeFactory serdeFactory = RoaringBitmapSerdeFactory.getInstance(); final List ints = generateInts(); - final GenericIndexed dictionary = GenericIndexed.fromIterable( - FluentIterable.from(ints) - .transform(Object::toString), - GenericIndexed.STRING_STRATEGY - ); final GenericIndexed dictionaryUtf8 = GenericIndexed.fromIterable( FluentIterable.from(ints) .transform(i -> ByteBuffer.wrap(StringUtils.toUtf8(String.valueOf(i)))), @@ -145,7 +140,7 @@ public class DimensionPredicateFilterBenchmark ); selector = new MockColumnIndexSelector( bitmapFactory, - new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary, dictionaryUtf8, bitmaps, null) + new StringUtf8ColumnIndexSupplier<>(bitmapFactory, dictionaryUtf8::singleThreaded, bitmaps, null) ); } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/FilterPartitionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/FilterPartitionBenchmark.java index 775aa0bb1ed..bc5c79c0af3 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/FilterPartitionBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/FilterPartitionBenchmark.java @@ -137,11 +137,6 @@ public class FilterPartitionBenchmark JSON_MAPPER, new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } } ); INDEX_MERGER_V9 = new IndexMergerV9(JSON_MAPPER, INDEX_IO, OffHeapMemorySegmentWriteOutMediumFactory.instance()); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/FilteredAggregatorBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/FilteredAggregatorBenchmark.java index 4d077d44388..ac6d568ec44 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/FilteredAggregatorBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/FilteredAggregatorBenchmark.java @@ -140,11 +140,6 @@ public class FilteredAggregatorBenchmark JSON_MAPPER, new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } } ); INDEX_MERGER_V9 = new IndexMergerV9(JSON_MAPPER, INDEX_IO, OffHeapMemorySegmentWriteOutMediumFactory.instance()); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/GroupByTypeInterfaceBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/GroupByTypeInterfaceBenchmark.java index 3cb2f60b97d..87a421df5fc 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/GroupByTypeInterfaceBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/GroupByTypeInterfaceBenchmark.java @@ -158,11 +158,6 @@ public class GroupByTypeInterfaceBenchmark JSON_MAPPER, new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } } ); INDEX_MERGER_V9 = new IndexMergerV9(JSON_MAPPER, INDEX_IO, OffHeapMemorySegmentWriteOutMediumFactory.instance()); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/InFilterBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/InFilterBenchmark.java index 967e3fae0b7..7bdcaf1c6ab 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/InFilterBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/InFilterBenchmark.java @@ -32,7 +32,7 @@ import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; import org.apache.druid.segment.filter.Filters; -import org.apache.druid.segment.serde.DictionaryEncodedStringIndexSupplier; +import org.apache.druid.segment.serde.StringUtf8ColumnIndexSupplier; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -85,11 +85,6 @@ public class InFilterBenchmark final BitmapFactory bitmapFactory = new RoaringBitmapFactory(); final BitmapSerdeFactory serdeFactory = RoaringBitmapSerdeFactory.getInstance(); final Iterable ints = intGenerator(); - final GenericIndexed dictionary = GenericIndexed.fromIterable( - FluentIterable.from(ints) - .transform(Object::toString), - GenericIndexed.STRING_STRATEGY - ); final GenericIndexed dictionaryUtf8 = GenericIndexed.fromIterable( FluentIterable.from(ints) .transform(i -> ByteBuffer.wrap(StringUtils.toUtf8(String.valueOf(i)))), @@ -109,7 +104,7 @@ public class InFilterBenchmark ); selector = new MockColumnIndexSelector( bitmapFactory, - new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary, dictionaryUtf8, bitmaps, null) + new StringUtf8ColumnIndexSupplier<>(bitmapFactory, dictionaryUtf8::singleThreaded, bitmaps, null) ); inFilter = new InDimFilter( "dummy", diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/JoinAndLookupBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/JoinAndLookupBenchmark.java index d2fed4b0fab..7ac9931da48 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/JoinAndLookupBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/JoinAndLookupBenchmark.java @@ -95,9 +95,6 @@ public class JoinAndLookupBenchmark @Param({"500000"}) int rows; - @Param({"0", "16384"}) - int columnCacheSizeBytes; - private File tmpDir = null; private QueryableIndex index = null; private Segment baseSegment = null; @@ -123,8 +120,7 @@ public class JoinAndLookupBenchmark public void setup() throws IOException { tmpDir = FileUtils.createTempDir(); - ColumnConfig columnConfig = () -> columnCacheSizeBytes; - index = JoinTestHelper.createFactIndexBuilder(columnConfig, tmpDir, rows).buildMMappedIndex(); + index = JoinTestHelper.createFactIndexBuilder(ColumnConfig.DEFAULT, tmpDir, rows).buildMMappedIndex(); final String prefix = "c."; diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java index ee000f45de3..0bbfadf50a2 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/LikeFilterBenchmark.java @@ -37,7 +37,7 @@ import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; import org.apache.druid.segment.filter.Filters; -import org.apache.druid.segment.serde.DictionaryEncodedStringIndexSupplier; +import org.apache.druid.segment.serde.StringUtf8ColumnIndexSupplier; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -122,11 +122,6 @@ public class LikeFilterBenchmark final BitmapFactory bitmapFactory = new RoaringBitmapFactory(); final BitmapSerdeFactory serdeFactory = RoaringBitmapSerdeFactory.getInstance(); final List ints = generateInts(); - final GenericIndexed dictionary = GenericIndexed.fromIterable( - FluentIterable.from(ints) - .transform(Object::toString), - GenericIndexed.STRING_STRATEGY - ); final GenericIndexed dictionaryUtf8 = GenericIndexed.fromIterable( FluentIterable.from(ints) .transform(i -> ByteBuffer.wrap(StringUtils.toUtf8(String.valueOf(i)))), @@ -145,7 +140,7 @@ public class LikeFilterBenchmark ); selector = new MockColumnIndexSelector( bitmapFactory, - new DictionaryEncodedStringIndexSupplier(bitmapFactory, dictionary, dictionaryUtf8, bitmaps, null) + new StringUtf8ColumnIndexSupplier<>(bitmapFactory, dictionaryUtf8::singleThreaded, bitmaps, null) ); } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/TopNTypeInterfaceBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/TopNTypeInterfaceBenchmark.java index b96716aaf6e..df66a36a553 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/TopNTypeInterfaceBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/TopNTypeInterfaceBenchmark.java @@ -138,11 +138,6 @@ public class TopNTypeInterfaceBenchmark JSON_MAPPER, new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } } ); INDEX_MERGER_V9 = new IndexMergerV9(JSON_MAPPER, INDEX_IO, OffHeapMemorySegmentWriteOutMediumFactory.instance()); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java index 1fc373f713a..5834f25b284 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/compression/ColumnarLongsEncodeDataFromSegmentBenchmark.java @@ -25,6 +25,7 @@ import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.LongsColumn; import org.apache.druid.segment.column.ValueType; @@ -118,10 +119,7 @@ public class ColumnarLongsEncodeDataFromSegmentBenchmark extends BaseColumnarLon File dataFile = new File(dir, getColumnDataFileName(segmentName, columnName)); if (!dataFile.exists()) { - final IndexIO indexIO = new IndexIO( - new DefaultObjectMapper(), - () -> 0 - ); + final IndexIO indexIO = new IndexIO(new DefaultObjectMapper(), ColumnConfig.DEFAULT); try (final QueryableIndex index = indexIO.loadIndex(new File(segmentPath))) { final Set columnNames = new LinkedHashSet<>(); columnNames.add(ColumnHolder.TIME_COLUMN_NAME); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/IndexMergeBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/IndexMergeBenchmark.java index a394c1d6842..b9e43059c57 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/IndexMergeBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/IndexMergeBenchmark.java @@ -31,6 +31,7 @@ import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMergerV9; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.generator.DataGenerator; import org.apache.druid.segment.generator.GeneratorBasicSchemas; import org.apache.druid.segment.generator.GeneratorSchemaInfo; @@ -105,10 +106,7 @@ public class IndexMergeBenchmark InjectableValues.Std injectableValues = new InjectableValues.Std(); injectableValues.addValue(ExprMacroTable.class, ExprMacroTable.nil()); JSON_MAPPER.setInjectableValues(injectableValues); - INDEX_IO = new IndexIO( - JSON_MAPPER, - () -> 0 - ); + INDEX_IO = new IndexIO(JSON_MAPPER, ColumnConfig.DEFAULT); } @Setup diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/IndexPersistBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/IndexPersistBenchmark.java index cd3dae8b26e..14d296fda61 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/IndexPersistBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/indexing/IndexPersistBenchmark.java @@ -30,6 +30,7 @@ import org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMergerV9; import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.generator.DataGenerator; import org.apache.druid.segment.generator.GeneratorBasicSchemas; import org.apache.druid.segment.generator.GeneratorSchemaInfo; @@ -74,10 +75,7 @@ public class IndexPersistBenchmark static { NullHandling.initializeForTests(); JSON_MAPPER = new DefaultObjectMapper(); - INDEX_IO = new IndexIO( - JSON_MAPPER, - () -> 0 - ); + INDEX_IO = new IndexIO(JSON_MAPPER, ColumnConfig.DEFAULT); INDEX_MERGER_V9 = new IndexMergerV9(JSON_MAPPER, INDEX_IO, OffHeapMemorySegmentWriteOutMediumFactory.instance()); } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/GroupByBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/GroupByBenchmark.java index 8fd6ca75e9f..08c51b9edf2 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/GroupByBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/GroupByBenchmark.java @@ -174,11 +174,6 @@ public class GroupByBenchmark ), new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } } ); INDEX_MERGER_V9 = new IndexMergerV9(JSON_MAPPER, INDEX_IO, OffHeapMemorySegmentWriteOutMediumFactory.instance()); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/ScanBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/ScanBenchmark.java index 76c4f036f9b..b7bfe56ff02 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/ScanBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/ScanBenchmark.java @@ -64,6 +64,7 @@ import org.apache.druid.segment.IndexMergerV9; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndexSegment; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.generator.DataGenerator; import org.apache.druid.segment.generator.GeneratorBasicSchemas; import org.apache.druid.segment.generator.GeneratorSchemaInfo; @@ -136,10 +137,7 @@ public class ScanBenchmark static { JSON_MAPPER = new DefaultObjectMapper(); - INDEX_IO = new IndexIO( - JSON_MAPPER, - () -> 0 - ); + INDEX_IO = new IndexIO(JSON_MAPPER, ColumnConfig.DEFAULT); INDEX_MERGER_V9 = new IndexMergerV9(JSON_MAPPER, INDEX_IO, OffHeapMemorySegmentWriteOutMediumFactory.instance()); } diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SearchBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SearchBenchmark.java index 322a577e786..9a383f119cb 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SearchBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SearchBenchmark.java @@ -143,11 +143,6 @@ public class SearchBenchmark JSON_MAPPER, new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } } ); INDEX_MERGER_V9 = new IndexMergerV9(JSON_MAPPER, INDEX_IO, OffHeapMemorySegmentWriteOutMediumFactory.instance()); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/TimeseriesBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/TimeseriesBenchmark.java index ee54b17ceaa..97d20ca68ae 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/TimeseriesBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/TimeseriesBenchmark.java @@ -136,11 +136,6 @@ public class TimeseriesBenchmark JSON_MAPPER, new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } } ); INDEX_MERGER_V9 = new IndexMergerV9(JSON_MAPPER, INDEX_IO, OffHeapMemorySegmentWriteOutMediumFactory.instance()); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/TopNBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/TopNBenchmark.java index 843eae489ae..e69bd15a5f0 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/TopNBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/TopNBenchmark.java @@ -134,11 +134,6 @@ public class TopNBenchmark JSON_MAPPER, new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } } ); INDEX_MERGER_V9 = new IndexMergerV9(JSON_MAPPER, INDEX_IO, OffHeapMemorySegmentWriteOutMediumFactory.instance()); diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/timecompare/TimeCompareBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/timecompare/TimeCompareBenchmark.java index 002608cf8eb..6baf2478ada 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/timecompare/TimeCompareBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/timecompare/TimeCompareBenchmark.java @@ -160,11 +160,6 @@ public class TimeCompareBenchmark JSON_MAPPER, new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } } ); INDEX_MERGER_V9 = new IndexMergerV9(JSON_MAPPER, INDEX_IO, OffHeapMemorySegmentWriteOutMediumFactory.instance()); diff --git a/docs/configuration/index.md b/docs/configuration/index.md index 9698b28e431..b741728aad8 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -1478,7 +1478,6 @@ Processing properties set on the MiddleManager will be passed through to Peons. |`druid.processing.formatString`|Realtime and Historical processes use this format string to name their processing threads.|processing-%s| |`druid.processing.numMergeBuffers`|The number of direct memory buffers available for merging query results. The buffers are sized by `druid.processing.buffer.sizeBytes`. This property is effectively a concurrency limit for queries that require merging buffers. If you are using any queries that require merge buffers (currently, just groupBy v2) then you should have at least two of these.|`max(2, druid.processing.numThreads / 4)`| |`druid.processing.numThreads`|The number of processing threads to have available for parallel processing of segments. Our rule of thumb is `num_cores - 1`, which means that even under heavy load there will still be one core available to do background tasks like talking with ZooKeeper and pulling down segments. If only one core is available, this property defaults to the value `1`.|Number of cores - 1 (or 1)| -|`druid.processing.columnCache.sizeBytes`|Maximum size in bytes for the dimension value lookup cache. Any value greater than `0` enables the cache. It is currently disabled by default. Enabling the lookup cache can significantly improve the performance of aggregators operating on dimension values, such as the JavaScript aggregator, or cardinality aggregator, but can slow things down if the cache hit rate is low (i.e. dimensions with few repeating values). Enabling it may also require additional garbage collection tuning to avoid long GC pauses.|`0` (disabled)| |`druid.processing.fifo`|Enables the processing queue to treat tasks of equal priority in a FIFO manner.|`true`| |`druid.processing.tmpDir`|Path where temporary files created while processing a query should be stored. If specified, this configuration takes priority over the default `java.io.tmpdir` path.|path represented by `java.io.tmpdir`| |`druid.processing.intermediaryData.storage.type`|Storage type for intermediary segments of data shuffle between native parallel index tasks.
Set to `local` to store segment files in the local storage of the MiddleManager or Indexer.
Set to `deepstore` to use configured deep storage for better fault tolerance during rolling updates. When the storage type is `deepstore`, Druid stores the data in the `shuffle-data` directory under the configured deep storage path. Druid does not support automated cleanup for the `shuffle-data` directory. You can set up cloud storage lifecycle rules for automated cleanup of data at the `shuffle-data` prefix location.|`local`| @@ -1628,7 +1627,6 @@ Druid uses Jetty to serve HTTP requests. |`druid.processing.formatString`|Indexer processes use this format string to name their processing threads.|processing-%s| |`druid.processing.numMergeBuffers`|The number of direct memory buffers available for merging query results. The buffers are sized by `druid.processing.buffer.sizeBytes`. This property is effectively a concurrency limit for queries that require merging buffers. If you are using any queries that require merge buffers (currently, just groupBy v2) then you should have at least two of these.|`max(2, druid.processing.numThreads / 4)`| |`druid.processing.numThreads`|The number of processing threads to have available for parallel processing of segments. Our rule of thumb is `num_cores - 1`, which means that even under heavy load there will still be one core available to do background tasks like talking with ZooKeeper and pulling down segments. If only one core is available, this property defaults to the value `1`.|Number of cores - 1 (or 1)| -|`druid.processing.columnCache.sizeBytes`|Maximum size in bytes for the dimension value lookup cache. Any value greater than `0` enables the cache. It is currently disabled by default. Enabling the lookup cache can significantly improve the performance of aggregators operating on dimension values, such as the JavaScript aggregator, or cardinality aggregator, but can slow things down if the cache hit rate is low (i.e. dimensions with few repeating values). Enabling it may also require additional garbage collection tuning to avoid long GC pauses.|`0` (disabled)| |`druid.processing.fifo`|If the processing queue should treat tasks of equal priority in a FIFO manner|`true`| |`druid.processing.tmpDir`|Path where temporary files created while processing a query should be stored. If specified, this configuration takes priority over the default `java.io.tmpdir` path.|path represented by `java.io.tmpdir`| @@ -1738,7 +1736,6 @@ Druid uses Jetty to serve HTTP requests. |`druid.processing.formatString`|Realtime and Historical processes use this format string to name their processing threads.|processing-%s| |`druid.processing.numMergeBuffers`|The number of direct memory buffers available for merging query results. The buffers are sized by `druid.processing.buffer.sizeBytes`. This property is effectively a concurrency limit for queries that require merging buffers. If you are using any queries that require merge buffers (currently, just groupBy v2) then you should have at least two of these.|`max(2, druid.processing.numThreads / 4)`| |`druid.processing.numThreads`|The number of processing threads to have available for parallel processing of segments. Our rule of thumb is `num_cores - 1`, which means that even under heavy load there will still be one core available to do background tasks like talking with ZooKeeper and pulling down segments. If only one core is available, this property defaults to the value `1`.|Number of cores - 1 (or 1)| -|`druid.processing.columnCache.sizeBytes`|Maximum size in bytes for the dimension value lookup cache. Any value greater than `0` enables the cache. It is currently disabled by default. Enabling the lookup cache can significantly improve the performance of aggregators operating on dimension values, such as the JavaScript aggregator, or cardinality aggregator, but can slow things down if the cache hit rate is low (i.e. dimensions with few repeating values). Enabling it may also require additional garbage collection tuning to avoid long GC pauses.|`0` (disabled)| |`druid.processing.fifo`|If the processing queue should treat tasks of equal priority in a FIFO manner|`true`| |`druid.processing.tmpDir`|Path where temporary files created while processing a query should be stored. If specified, this configuration takes priority over the default `java.io.tmpdir` path.|path represented by `java.io.tmpdir`| @@ -1917,7 +1914,6 @@ The broker uses processing configs for nested groupBy queries. |`druid.processing.buffer.poolCacheInitialCount`|initializes the number of buffers allocated on the intermediate results pool. Note that pool can create more buffers if necessary.|`0`| |`druid.processing.buffer.poolCacheMaxCount`|processing buffer pool caches the buffers for later use, this is the maximum count cache will grow to. note that pool can create more buffers than it can cache if necessary.|Integer.MAX_VALUE| |`druid.processing.numMergeBuffers`|The number of direct memory buffers available for merging query results. The buffers are sized by `druid.processing.buffer.sizeBytes`. This property is effectively a concurrency limit for queries that require merging buffers. If you are using any queries that require merge buffers (currently, just groupBy v2) then you should have at least two of these.|`max(2, druid.processing.numThreads / 4)`| -|`druid.processing.columnCache.sizeBytes`|Maximum size in bytes for the dimension value lookup cache. Any value greater than `0` enables the cache. It is currently disabled by default. Enabling the lookup cache can significantly improve the performance of aggregators operating on dimension values, such as the JavaScript aggregator, or cardinality aggregator, but can slow things down if the cache hit rate is low (i.e. dimensions with few repeating values). Enabling it may also require additional garbage collection tuning to avoid long GC pauses.|`0` (disabled)| |`druid.processing.fifo`|If the processing queue should treat tasks of equal priority in a FIFO manner|`true`| |`druid.processing.tmpDir`|Path where temporary files created while processing a query should be stored. If specified, this configuration takes priority over the default `java.io.tmpdir` path.|path represented by `java.io.tmpdir`| |`druid.processing.merge.useParallelMergePool`|Enable automatic parallel merging for Brokers on a dedicated async ForkJoinPool. If `false`, instead merges will be done serially on the `HTTP` thread pool.|`true`| diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/WorkerChatHandlerTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/WorkerChatHandlerTest.java index 9b148ac2548..b0dbacee242 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/WorkerChatHandlerTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/WorkerChatHandlerTest.java @@ -34,6 +34,7 @@ import org.apache.druid.msq.kernel.WorkOrder; import org.apache.druid.msq.statistics.ClusterByStatisticsSnapshot; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMergerV9; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.apache.druid.server.security.AuthConfig; import org.apache.druid.server.security.AuthenticationResult; @@ -68,7 +69,7 @@ public class WorkerChatHandlerTest public void setUp() { ObjectMapper mapper = new DefaultObjectMapper(); - IndexIO indexIO = new IndexIO(mapper, () -> 0); + IndexIO indexIO = new IndexIO(mapper, ColumnConfig.DEFAULT); IndexMergerV9 indexMerger = new IndexMergerV9( mapper, indexIO, diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteMSQTestsHelper.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteMSQTestsHelper.java index ee2f3b65b1b..39fa01b3da4 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteMSQTestsHelper.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteMSQTestsHelper.java @@ -61,6 +61,7 @@ import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndexStorageAdapter; import org.apache.druid.segment.Segment; import org.apache.druid.segment.StorageAdapter; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.loading.DataSegmentPusher; import org.apache.druid.segment.loading.LocalDataSegmentPusher; @@ -137,7 +138,7 @@ public class CalciteMSQTestsHelper ) ); ObjectMapper testMapper = MSQTestBase.setupObjectMapper(dummyInjector); - IndexIO indexIO = new IndexIO(testMapper, () -> 0); + IndexIO indexIO = new IndexIO(testMapper, ColumnConfig.DEFAULT); SegmentCacheManager segmentCacheManager = null; try { segmentCacheManager = new SegmentCacheManagerFactory(testMapper).manufacturate(temporaryFolder.newFolder( diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java index 772279ada79..736ec2f430d 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java @@ -122,6 +122,7 @@ import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndexStorageAdapter; import org.apache.druid.segment.Segment; import org.apache.druid.segment.StorageAdapter; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.loading.DataSegmentPusher; @@ -356,7 +357,7 @@ public class MSQTestBase extends BaseCalciteQueryTest ); ObjectMapper secondMapper = setupObjectMapper(secondInjector); - indexIO = new IndexIO(secondMapper, () -> 0); + indexIO = new IndexIO(secondMapper, ColumnConfig.DEFAULT); try { segmentCacheManager = new SegmentCacheManagerFactory(secondMapper).manufacturate(tmpFolder.newFolder("test")); diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestWorkerContext.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestWorkerContext.java index 655077008db..a6f98b3ba85 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestWorkerContext.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestWorkerContext.java @@ -40,6 +40,7 @@ import org.apache.druid.msq.kernel.QueryDefinition; import org.apache.druid.msq.querykit.DataSegmentProvider; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMergerV9; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.incremental.NoopRowIngestionMeters; import org.apache.druid.segment.loading.DataSegmentPusher; import org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider; @@ -114,10 +115,7 @@ public class MSQTestWorkerContext implements WorkerContext @Override public FrameContext frameContext(QueryDefinition queryDef, int stageNumber) { - IndexIO indexIO = new IndexIO( - mapper, - () -> 0 - ); + IndexIO indexIO = new IndexIO(mapper, ColumnConfig.DEFAULT); IndexMergerV9 indexMerger = new IndexMergerV9( mapper, indexIO, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/TestUtils.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/TestUtils.java index abb4273cd41..44d629b2276 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/TestUtils.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/TestUtils.java @@ -46,6 +46,7 @@ import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMergerV9; import org.apache.druid.segment.IndexMergerV9Factory; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.incremental.RowIngestionMetersFactory; import org.apache.druid.segment.loading.LocalDataSegmentPuller; import org.apache.druid.segment.loading.LocalLoadSpec; @@ -82,10 +83,7 @@ public class TestUtils public TestUtils() { this.jsonMapper = new DefaultObjectMapper(); - indexIO = new IndexIO( - jsonMapper, - () -> 0 - ); + indexIO = new IndexIO(jsonMapper, ColumnConfig.DEFAULT); indexMergerV9Factory = new IndexMergerV9Factory( jsonMapper, indexIO, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorsTest.java index 62258ceb48d..c0489c61b42 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorsTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/AppenderatorsTest.java @@ -38,6 +38,7 @@ import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMerger; import org.apache.druid.segment.IndexMergerV9; import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.incremental.AppendableIndexSpec; import org.apache.druid.segment.incremental.ParseExceptionHandler; import org.apache.druid.segment.incremental.RowIngestionMeters; @@ -179,10 +180,7 @@ public class AppenderatorsTest ); metrics = new FireDepartmentMetrics(); - IndexIO indexIO = new IndexIO( - objectMapper, - () -> 0 - ); + IndexIO indexIO = new IndexIO(objectMapper, ColumnConfig.DEFAULT); IndexMergerV9 indexMerger = new IndexMergerV9( objectMapper, indexIO, diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/BatchAppenderatorsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/BatchAppenderatorsTest.java index d0ba1de3ce1..f8100772235 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/BatchAppenderatorsTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/BatchAppenderatorsTest.java @@ -41,6 +41,7 @@ import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMerger; import org.apache.druid.segment.IndexMergerV9; import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.incremental.AppendableIndexSpec; import org.apache.druid.segment.incremental.ParseExceptionHandler; import org.apache.druid.segment.incremental.RowIngestionMeters; @@ -184,10 +185,7 @@ public class BatchAppenderatorsTest ); metrics = new FireDepartmentMetrics(); - IndexIO indexIO = new IndexIO( - objectMapper, - () -> 0 - ); + IndexIO indexIO = new IndexIO(objectMapper, ColumnConfig.DEFAULT); IndexMergerV9 indexMerger = new IndexMergerV9( objectMapper, indexIO, @@ -574,7 +572,7 @@ public class BatchAppenderatorsTest .config(config) .joinableFactory(NoopJoinableFactory.INSTANCE) .jsonMapper(mapper) - .indexIO(new IndexIO(new ObjectMapper(), () -> 0)) + .indexIO(new IndexIO(new ObjectMapper(), ColumnConfig.DEFAULT)) .indexMergerV9(indexMergerV9) .taskReportFileWriter(new NoopTestTaskReportFileWriter()) .authorizerMapper(AuthTestUtils.TEST_AUTHORIZER_MAPPER) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java index ad9841bf26a..2f6d53e1a9a 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/CompactionTaskTest.java @@ -104,6 +104,7 @@ import org.apache.druid.segment.SimpleQueryableIndex; import org.apache.druid.segment.column.BaseColumn; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; @@ -1983,7 +1984,7 @@ public class CompactionTaskTest Map segmentFileMap ) { - super(mapper, () -> 0); + super(mapper, ColumnConfig.DEFAULT); queryableIndexMap = Maps.newHashMapWithExpectedSize(segmentFileMap.size()); for (Entry entry : segmentFileMap.entrySet()) { diff --git a/pom.xml b/pom.xml index 6d0d1337498..934a25b88be 100644 --- a/pom.xml +++ b/pom.xml @@ -236,7 +236,7 @@ false - + sigar @@ -246,7 +246,7 @@ - + ${repoOrgId} diff --git a/processing/src/main/java/org/apache/druid/query/DruidProcessingConfig.java b/processing/src/main/java/org/apache/druid/query/DruidProcessingConfig.java index ac6c270bd3d..823c9e71efb 100644 --- a/processing/src/main/java/org/apache/druid/query/DruidProcessingConfig.java +++ b/processing/src/main/java/org/apache/druid/query/DruidProcessingConfig.java @@ -142,13 +142,6 @@ public abstract class DruidProcessingConfig extends ExecutorServiceConfig implem return DEFAULT_NUM_MERGE_BUFFERS; } - @Override - @Config(value = "${base_path}.columnCache.sizeBytes") - public int columnCacheSizeBytes() - { - return 0; - } - @Override @Config(value = "${base_path}.indexes.skipValueRangeIndexScale") public double skipValueRangeIndexScale() diff --git a/processing/src/main/java/org/apache/druid/segment/IndexIO.java b/processing/src/main/java/org/apache/druid/segment/IndexIO.java index c88edbb122d..f2d57a5517e 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexIO.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexIO.java @@ -65,10 +65,10 @@ import org.apache.druid.segment.data.IndexedIterable; import org.apache.druid.segment.data.ListIndexed; import org.apache.druid.segment.data.VSizeColumnarMultiInts; import org.apache.druid.segment.serde.ComplexColumnPartSupplier; -import org.apache.druid.segment.serde.DictionaryEncodedColumnSupplier; -import org.apache.druid.segment.serde.DictionaryEncodedStringIndexSupplier; import org.apache.druid.segment.serde.FloatNumericColumnSupplier; import org.apache.druid.segment.serde.LongNumericColumnSupplier; +import org.apache.druid.segment.serde.StringUtf8ColumnIndexSupplier; +import org.apache.druid.segment.serde.StringUtf8DictionaryEncodedColumnSupplier; import org.joda.time.Interval; import javax.annotation.Nullable; @@ -109,7 +109,7 @@ public class IndexIO this.mapper = Preconditions.checkNotNull(mapper, "null ObjectMapper"); Preconditions.checkNotNull(columnConfig, "null ColumnConfig"); ImmutableMap.Builder indexLoadersBuilder = ImmutableMap.builder(); - LegacyIndexLoader legacyIndexLoader = new LegacyIndexLoader(new DefaultIndexIOHandler(), columnConfig); + LegacyIndexLoader legacyIndexLoader = new LegacyIndexLoader(new DefaultIndexIOHandler()); for (int i = 0; i <= V8_VERSION; i++) { indexLoadersBuilder.put(i, legacyIndexLoader); } @@ -364,7 +364,6 @@ public class IndexIO metrics.put(metric, holder); } - Map> dimValueLookups = new HashMap<>(); Map> dimValueUtf8Lookups = new HashMap<>(); Map dimColumns = new HashMap<>(); Map> bitmaps = new HashMap<>(); @@ -379,8 +378,6 @@ public class IndexIO fileDimensionName ); - // Duplicate the first buffer since we are reading the dictionary twice. - dimValueLookups.put(dimension, GenericIndexed.read(dimBuffer.duplicate(), GenericIndexed.STRING_STRATEGY)); dimValueUtf8Lookups.put(dimension, GenericIndexed.read(dimBuffer, GenericIndexed.UTF8_STRATEGY)); dimColumns.put(dimension, VSizeColumnarMultiInts.readFromByteBuffer(dimBuffer)); } @@ -410,7 +407,6 @@ public class IndexIO dataInterval, timestamps, metrics, - dimValueLookups, dimValueUtf8Lookups, dimColumns, bitmaps, @@ -432,12 +428,10 @@ public class IndexIO static class LegacyIndexLoader implements IndexLoader { private final IndexIOHandler legacyHandler; - private final ColumnConfig columnConfig; - LegacyIndexLoader(IndexIOHandler legacyHandler, ColumnConfig columnConfig) + LegacyIndexLoader(IndexIOHandler legacyHandler) { this.legacyHandler = legacyHandler; - this.columnConfig = columnConfig; } @Override @@ -452,21 +446,18 @@ public class IndexIO .setType(ValueType.STRING) .setHasMultipleValues(true) .setDictionaryEncodedColumnSupplier( - new DictionaryEncodedColumnSupplier( - index.getDimValueLookup(dimension), - index.getDimValueUtf8Lookup(dimension), + new StringUtf8DictionaryEncodedColumnSupplier<>( + index.getDimValueUtf8Lookup(dimension)::singleThreaded, null, - Suppliers.ofInstance(index.getDimColumn(dimension)), - columnConfig.columnCacheSizeBytes() + Suppliers.ofInstance(index.getDimColumn(dimension)) ) ); GenericIndexed bitmaps = index.getBitmapIndexes().get(dimension); ImmutableRTree spatialIndex = index.getSpatialIndexes().get(dimension); builder.setIndexSupplier( - new DictionaryEncodedStringIndexSupplier( + new StringUtf8ColumnIndexSupplier<>( new ConciseBitmapFactory(), - index.getDimValueLookup(dimension), - index.getDimValueUtf8Lookup(dimension), + index.getDimValueUtf8Lookup(dimension)::singleThreaded, bitmaps, spatialIndex ), diff --git a/processing/src/main/java/org/apache/druid/segment/MMappedIndex.java b/processing/src/main/java/org/apache/druid/segment/MMappedIndex.java index 737a2be9f20..e0fdcebd809 100644 --- a/processing/src/main/java/org/apache/druid/segment/MMappedIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/MMappedIndex.java @@ -42,7 +42,6 @@ public class MMappedIndex final Interval dataInterval; final CompressedColumnarLongsSupplier timestamps; final Map metrics; - final Map> dimValueLookups; final Map> dimValueUtf8Lookups; final Map dimColumns; final Map> invertedIndexes; @@ -55,7 +54,6 @@ public class MMappedIndex Interval dataInterval, CompressedColumnarLongsSupplier timestamps, Map metrics, - Map> dimValueLookups, Map> dimValueUtf8Lookups, Map dimColumns, Map> invertedIndexes, @@ -68,7 +66,6 @@ public class MMappedIndex this.dataInterval = dataInterval; this.timestamps = timestamps; this.metrics = metrics; - this.dimValueLookups = dimValueLookups; this.dimValueUtf8Lookups = dimValueUtf8Lookups; this.dimColumns = dimColumns; this.invertedIndexes = invertedIndexes; @@ -97,11 +94,6 @@ public class MMappedIndex return metrics.get(metric); } - public GenericIndexed getDimValueLookup(String dimension) - { - return dimValueLookups.get(dimension); - } - public GenericIndexed getDimValueUtf8Lookup(String dimension) { return dimValueUtf8Lookups.get(dimension); diff --git a/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java b/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java index 273f0dfb765..bad9dc6a6f3 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java +++ b/processing/src/main/java/org/apache/druid/segment/column/ColumnConfig.java @@ -21,7 +21,23 @@ package org.apache.druid.segment.column; public interface ColumnConfig { - int columnCacheSizeBytes(); + ColumnConfig DEFAULT = new ColumnConfig() {}; + + ColumnConfig ALWAYS_USE_INDEXES = new ColumnConfig() + { + + @Override + public double skipValueRangeIndexScale() + { + return 1.0; + } + + @Override + public double skipValuePredicateIndexScale() + { + return 1.0; + } + }; /** * If the total number of rows in a column multiplied by this value is smaller than the total number of bitmap diff --git a/processing/src/main/java/org/apache/druid/segment/column/IndexedStringDruidPredicateIndex.java b/processing/src/main/java/org/apache/druid/segment/column/IndexedStringDruidPredicateIndex.java index 49badda066c..9c5aa9af121 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/IndexedStringDruidPredicateIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/column/IndexedStringDruidPredicateIndex.java @@ -31,41 +31,12 @@ import java.util.NoSuchElementException; public final class IndexedStringDruidPredicateIndex> implements DruidPredicateIndex { - static final ColumnConfig ALWAYS_USE_INDEXES = new ColumnConfig() - { - @Override - public int columnCacheSizeBytes() - { - return 0; - } - - @Override - public double skipValueRangeIndexScale() - { - return 1.0; - } - - @Override - public double skipValuePredicateIndexScale() - { - return 1.0; - } - }; private final BitmapFactory bitmapFactory; private final TDictionary dictionary; private final Indexed bitmaps; private final ColumnConfig columnConfig; private final int numRows; - public IndexedStringDruidPredicateIndex( - BitmapFactory bitmapFactory, - TDictionary dictionary, - Indexed bitmaps - ) - { - this(bitmapFactory, dictionary, bitmaps, ALWAYS_USE_INDEXES, Integer.MAX_VALUE); - } - public IndexedStringDruidPredicateIndex( BitmapFactory bitmapFactory, TDictionary dictionary, diff --git a/processing/src/main/java/org/apache/druid/segment/column/IndexedUtf8LexicographicalRangeIndex.java b/processing/src/main/java/org/apache/druid/segment/column/IndexedUtf8LexicographicalRangeIndex.java index 3f5c121e9ed..5ddb01ba02c 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/IndexedUtf8LexicographicalRangeIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/column/IndexedUtf8LexicographicalRangeIndex.java @@ -47,23 +47,6 @@ public final class IndexedUtf8LexicographicalRangeIndex bitmaps, - boolean hasNull - ) - { - this( - bitmapFactory, - dictionary, - bitmaps, - hasNull, - IndexedStringDruidPredicateIndex.ALWAYS_USE_INDEXES, - Integer.MAX_VALUE - ); - } - public IndexedUtf8LexicographicalRangeIndex( BitmapFactory bitmapFactory, TDictionary dictionary, diff --git a/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java deleted file mode 100644 index 69b3af4c140..00000000000 --- a/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java +++ /dev/null @@ -1,807 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.column; - -import com.google.common.base.Predicate; -import com.google.common.base.Predicates; -import org.apache.druid.query.extraction.ExtractionFn; -import org.apache.druid.query.filter.ValueMatcher; -import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; -import org.apache.druid.segment.AbstractDimensionSelector; -import org.apache.druid.segment.DimensionSelectorUtils; -import org.apache.druid.segment.IdLookup; -import org.apache.druid.segment.data.ColumnarInts; -import org.apache.druid.segment.data.ColumnarMultiInts; -import org.apache.druid.segment.data.Indexed; -import org.apache.druid.segment.data.IndexedInts; -import org.apache.druid.segment.data.ReadableOffset; -import org.apache.druid.segment.data.SingleIndexedInt; -import org.apache.druid.segment.filter.BooleanValueMatcher; -import org.apache.druid.segment.historical.HistoricalDimensionSelector; -import org.apache.druid.segment.historical.SingleValueHistoricalDimensionSelector; -import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; -import org.apache.druid.segment.vector.ReadableVectorInspector; -import org.apache.druid.segment.vector.ReadableVectorOffset; -import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; -import org.apache.druid.segment.vector.VectorObjectSelector; -import org.apache.druid.utils.CloseableUtils; - -import javax.annotation.Nullable; -import java.io.Closeable; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.List; - -/** - * - */ -public class StringDictionaryEncodedColumn implements DictionaryEncodedColumn -{ - @Nullable - private final ColumnarInts column; - @Nullable - private final ColumnarMultiInts multiValueColumn; - private final Indexed dictionary; - private final Indexed dictionaryUtf8; - - public StringDictionaryEncodedColumn( - @Nullable ColumnarInts singleValueColumn, - @Nullable ColumnarMultiInts multiValueColumn, - Indexed dictionary, - Indexed dictionaryUtf8 - ) - { - this.column = singleValueColumn; - this.multiValueColumn = multiValueColumn; - this.dictionary = dictionary; - this.dictionaryUtf8 = dictionaryUtf8; - } - - @Override - public int length() - { - return hasMultipleValues() ? multiValueColumn.size() : column.size(); - } - - @Override - public boolean hasMultipleValues() - { - return column == null; - } - - @Override - public int getSingleValueRow(int rowNum) - { - return column.get(rowNum); - } - - @Override - public IndexedInts getMultiValueRow(int rowNum) - { - return multiValueColumn.get(rowNum); - } - - @Override - @Nullable - public String lookupName(int id) - { - return dictionary.get(id); - } - - - /** - * Returns the value for a particular dictionary id as UTF-8 bytes. - * - * The returned buffer is in big-endian order. It is not reused, so callers may modify the position, limit, byte - * order, etc of the buffer. - * - * The returned buffer points to the original data, so callers must take care not to use it outside the valid - * lifetime of this column. - * - * @param id id to lookup the dictionary value for - * - * @return dictionary value for the given id, or null if the value is itself null - */ - @Nullable - public ByteBuffer lookupNameUtf8(int id) - { - return dictionaryUtf8.get(id); - } - - @Override - public int lookupId(String name) - { - return dictionary.indexOf(name); - } - - @Override - public int getCardinality() - { - return dictionary.size(); - } - - @Override - public HistoricalDimensionSelector makeDimensionSelector( - final ReadableOffset offset, - @Nullable final ExtractionFn extractionFn - ) - { - abstract class QueryableDimensionSelector extends AbstractDimensionSelector - implements HistoricalDimensionSelector, IdLookup - { - @Override - public int getValueCardinality() - { - /* - This is technically wrong if - extractionFn != null && (extractionFn.getExtractionType() != ExtractionFn.ExtractionType.ONE_TO_ONE || - !extractionFn.preservesOrdering()) - However current behavior allows some GroupBy-V1 queries to work that wouldn't work otherwise and doesn't - cause any problems due to special handling of extractionFn everywhere. - See https://github.com/apache/druid/pull/8433 - */ - return getCardinality(); - } - - @Override - public String lookupName(int id) - { - final String value = StringDictionaryEncodedColumn.this.lookupName(id); - return extractionFn == null ? value : extractionFn.apply(value); - } - - @Nullable - @Override - public ByteBuffer lookupNameUtf8(int id) - { - return StringDictionaryEncodedColumn.this.lookupNameUtf8(id); - } - - @Override - public boolean supportsLookupNameUtf8() - { - return true; - } - - @Override - public boolean nameLookupPossibleInAdvance() - { - return true; - } - - @Nullable - @Override - public IdLookup idLookup() - { - return extractionFn == null ? this : null; - } - - @Override - public int lookupId(String name) - { - if (extractionFn != null) { - throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function"); - } - return StringDictionaryEncodedColumn.this.lookupId(name); - } - } - - if (hasMultipleValues()) { - class MultiValueDimensionSelector extends QueryableDimensionSelector - { - @Override - public IndexedInts getRow() - { - return multiValueColumn.get(offset.getOffset()); - } - - @Override - public IndexedInts getRow(int offset) - { - return multiValueColumn.get(offset); - } - - @Override - public ValueMatcher makeValueMatcher(@Nullable String value) - { - return DimensionSelectorUtils.makeValueMatcherGeneric(this, value); - } - - @Override - public ValueMatcher makeValueMatcher(Predicate predicate) - { - return DimensionSelectorUtils.makeValueMatcherGeneric(this, predicate); - } - - @Nullable - @Override - public Object getObject() - { - return defaultGetObject(); - } - - @Override - public Class classOfObject() - { - return Object.class; - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("multiValueColumn", multiValueColumn); - inspector.visit("offset", offset); - inspector.visit("extractionFn", extractionFn); - } - } - return new MultiValueDimensionSelector(); - } else { - class SingleValueQueryableDimensionSelector extends QueryableDimensionSelector - implements SingleValueHistoricalDimensionSelector - { - private final SingleIndexedInt row = new SingleIndexedInt(); - - @Override - public IndexedInts getRow() - { - row.setValue(getRowValue()); - return row; - } - - public int getRowValue() - { - return column.get(offset.getOffset()); - } - - @Override - public IndexedInts getRow(int offset) - { - row.setValue(getRowValue(offset)); - return row; - } - - @Override - public int getRowValue(int offset) - { - return column.get(offset); - } - - @Override - public ValueMatcher makeValueMatcher(final @Nullable String value) - { - if (extractionFn == null) { - final int valueId = lookupId(value); - if (valueId >= 0) { - return new ValueMatcher() - { - @Override - public boolean matches() - { - return getRowValue() == valueId; - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("column", StringDictionaryEncodedColumn.this); - } - }; - } else { - return BooleanValueMatcher.of(false); - } - } else { - // Employ caching BitSet optimization - return makeValueMatcher(Predicates.equalTo(value)); - } - } - - @Override - public ValueMatcher makeValueMatcher(final Predicate predicate) - { - final BitSet checkedIds = new BitSet(getCardinality()); - final BitSet matchingIds = new BitSet(getCardinality()); - - // Lazy matcher; only check an id if matches() is called. - return new ValueMatcher() - { - @Override - public boolean matches() - { - final int id = getRowValue(); - - if (checkedIds.get(id)) { - return matchingIds.get(id); - } else { - final boolean matches = predicate.apply(lookupName(id)); - checkedIds.set(id); - if (matches) { - matchingIds.set(id); - } - return matches; - } - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("column", StringDictionaryEncodedColumn.this); - } - }; - } - - @Override - public Object getObject() - { - return lookupName(getRowValue()); - } - - @Override - public Class classOfObject() - { - return String.class; - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("column", column); - inspector.visit("offset", offset); - inspector.visit("extractionFn", extractionFn); - } - } - return new SingleValueQueryableDimensionSelector(); - } - } - - @Override - public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(final ReadableVectorOffset offset) - { - final class StringVectorSelector extends StringSingleValueDimensionVectorSelector - { - public StringVectorSelector() - { - super(column, offset); - } - - @Override - public int getValueCardinality() - { - return getCardinality(); - } - - @Nullable - @Override - public String lookupName(final int id) - { - return StringDictionaryEncodedColumn.this.lookupName(id); - } - - @Nullable - @Override - public ByteBuffer lookupNameUtf8(int id) - { - return StringDictionaryEncodedColumn.this.lookupNameUtf8(id); - } - - @Override - public int lookupId(@Nullable final String name) - { - return StringDictionaryEncodedColumn.this.lookupId(name); - } - } - - return new StringVectorSelector(); - } - - @Override - public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector(final ReadableVectorOffset offset) - { - final class MultiStringVectorSelector extends StringMultiValueDimensionVectorSelector - { - - public MultiStringVectorSelector() - { - super(multiValueColumn, offset); - } - - @Override - public int getValueCardinality() - { - return getCardinality(); - } - - @Nullable - @Override - public String lookupName(final int id) - { - return StringDictionaryEncodedColumn.this.lookupName(id); - } - - @Nullable - @Override - public ByteBuffer lookupNameUtf8(int id) - { - return StringDictionaryEncodedColumn.this.lookupNameUtf8(id); - } - - @Override - public int lookupId(@Nullable final String name) - { - return StringDictionaryEncodedColumn.this.lookupId(name); - } - } - return new MultiStringVectorSelector(); - } - - @Override - public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset) - { - if (!hasMultipleValues()) { - final class StringVectorSelector extends StringVectorObjectSelector - { - public StringVectorSelector() - { - super(column, offset); - } - - @Nullable - @Override - public String lookupName(int id) - { - return StringDictionaryEncodedColumn.this.lookupName(id); - } - } - - return new StringVectorSelector(); - } else { - final class MultiStringVectorSelector extends MultiValueStringVectorObjectSelector - { - public MultiStringVectorSelector() - { - super(multiValueColumn, offset); - } - - @Nullable - @Override - public String lookupName(int id) - { - return StringDictionaryEncodedColumn.this.lookupName(id); - } - } - - return new MultiStringVectorSelector(); - } - } - - @Override - public void close() throws IOException - { - CloseableUtils.closeAll( - dictionary instanceof Closeable ? (Closeable) dictionary : null /* Dictionary may be CachingIndexed */, - column, - multiValueColumn - ); - } - - /** - * Base type for a {@link SingleValueDimensionVectorSelector} for a dictionary encoded {@link ColumnType#STRING} - * built around a {@link ColumnarInts}. Dictionary not included - BYO dictionary lookup methods. - * - * Assumes that all implementations return true for {@link #supportsLookupNameUtf8()}. - */ - public abstract static class StringSingleValueDimensionVectorSelector - implements SingleValueDimensionVectorSelector, IdLookup - { - private final ColumnarInts column; - private final ReadableVectorOffset offset; - private final int[] vector; - private int id = ReadableVectorInspector.NULL_ID; - - public StringSingleValueDimensionVectorSelector( - ColumnarInts column, - ReadableVectorOffset offset - ) - { - this.column = column; - this.offset = offset; - this.vector = new int[offset.getMaxVectorSize()]; - } - - @Override - public int[] getRowVector() - { - if (id == offset.getId()) { - return vector; - } - - if (offset.isContiguous()) { - column.get(vector, offset.getStartOffset(), offset.getCurrentVectorSize()); - } else { - column.get(vector, offset.getOffsets(), offset.getCurrentVectorSize()); - } - - id = offset.getId(); - return vector; - } - - @Override - public boolean supportsLookupNameUtf8() - { - return true; - } - - @Override - public boolean nameLookupPossibleInAdvance() - { - return true; - } - - @Nullable - @Override - public IdLookup idLookup() - { - return this; - } - - @Override - public int getCurrentVectorSize() - { - return offset.getCurrentVectorSize(); - } - - @Override - public int getMaxVectorSize() - { - return offset.getMaxVectorSize(); - } - } - - /** - * Base type for a {@link MultiValueDimensionVectorSelector} for a dictionary encoded {@link ColumnType#STRING} - * built around a {@link ColumnarMultiInts}. Dictionary not included - BYO dictionary lookup methods. - * - * Assumes that all implementations return true for {@link #supportsLookupNameUtf8()}. - */ - public abstract static class StringMultiValueDimensionVectorSelector - implements MultiValueDimensionVectorSelector, IdLookup - { - private final ColumnarMultiInts multiValueColumn; - private final ReadableVectorOffset offset; - - private final IndexedInts[] vector; - private int id = ReadableVectorInspector.NULL_ID; - - public StringMultiValueDimensionVectorSelector( - ColumnarMultiInts multiValueColumn, - ReadableVectorOffset offset - ) - { - this.multiValueColumn = multiValueColumn; - this.offset = offset; - this.vector = new IndexedInts[offset.getMaxVectorSize()]; - } - - @Override - public IndexedInts[] getRowVector() - { - if (id == offset.getId()) { - return vector; - } - - if (offset.isContiguous()) { - final int currentOffset = offset.getStartOffset(); - final int numRows = offset.getCurrentVectorSize(); - - for (int i = 0; i < numRows; i++) { - // Must use getUnshared, otherwise all elements in the vector could be the same shared object. - vector[i] = multiValueColumn.getUnshared(i + currentOffset); - } - } else { - final int[] offsets = offset.getOffsets(); - final int numRows = offset.getCurrentVectorSize(); - - for (int i = 0; i < numRows; i++) { - // Must use getUnshared, otherwise all elements in the vector could be the same shared object. - vector[i] = multiValueColumn.getUnshared(offsets[i]); - } - } - - id = offset.getId(); - return vector; - } - - @Override - public boolean supportsLookupNameUtf8() - { - return true; - } - - @Override - public boolean nameLookupPossibleInAdvance() - { - return true; - } - - @Nullable - @Override - public IdLookup idLookup() - { - return this; - } - @Override - public int getCurrentVectorSize() - { - return offset.getCurrentVectorSize(); - } - - @Override - public int getMaxVectorSize() - { - return offset.getMaxVectorSize(); - } - } - - /** - * Base type for a {@link VectorObjectSelector} for a dictionary encoded {@link ColumnType#STRING} - * built around a {@link ColumnarInts}. Dictionary not included - BYO dictionary lookup methods. - */ - public abstract static class StringVectorObjectSelector implements VectorObjectSelector - { - private final ColumnarInts column; - private final ReadableVectorOffset offset; - - private final int[] vector; - private final Object[] strings; - private int id = ReadableVectorInspector.NULL_ID; - - public StringVectorObjectSelector( - ColumnarInts column, - ReadableVectorOffset offset - ) - { - this.column = column; - this.offset = offset; - this.vector = new int[offset.getMaxVectorSize()]; - this.strings = new Object[offset.getMaxVectorSize()]; - } - - @Override - public Object[] getObjectVector() - { - if (id == offset.getId()) { - return strings; - } - - if (offset.isContiguous()) { - column.get(vector, offset.getStartOffset(), offset.getCurrentVectorSize()); - } else { - column.get(vector, offset.getOffsets(), offset.getCurrentVectorSize()); - } - for (int i = 0; i < offset.getCurrentVectorSize(); i++) { - strings[i] = lookupName(vector[i]); - } - id = offset.getId(); - - return strings; - } - - @Override - public int getMaxVectorSize() - { - return offset.getMaxVectorSize(); - } - - @Override - public int getCurrentVectorSize() - { - return offset.getCurrentVectorSize(); - } - - @Nullable - public abstract String lookupName(int id); - } - - /** - * Base type for a {@link VectorObjectSelector} for a dictionary encoded {@link ColumnType#STRING} - * built around a {@link ColumnarMultiInts}. Dictionary not included - BYO dictionary lookup methods. - */ - public abstract static class MultiValueStringVectorObjectSelector implements VectorObjectSelector - { - private final ColumnarMultiInts multiValueColumn; - private final ReadableVectorOffset offset; - - private final IndexedInts[] vector; - private final Object[] strings; - private int id = ReadableVectorInspector.NULL_ID; - - public MultiValueStringVectorObjectSelector( - ColumnarMultiInts multiValueColumn, - ReadableVectorOffset offset - ) - { - this.multiValueColumn = multiValueColumn; - this.offset = offset; - this.vector = new IndexedInts[offset.getMaxVectorSize()]; - this.strings = new Object[offset.getMaxVectorSize()]; - } - - @Nullable - public abstract String lookupName(int id); - - @Override - public Object[] getObjectVector() - { - if (id == offset.getId()) { - return strings; - } - - if (offset.isContiguous()) { - final int currentOffset = offset.getStartOffset(); - final int numRows = offset.getCurrentVectorSize(); - - for (int i = 0; i < numRows; i++) { - // Must use getUnshared, otherwise all elements in the vector could be the same shared object. - vector[i] = multiValueColumn.getUnshared(i + currentOffset); - } - } else { - final int[] offsets = offset.getOffsets(); - final int numRows = offset.getCurrentVectorSize(); - - for (int i = 0; i < numRows; i++) { - // Must use getUnshared, otherwise all elements in the vector could be the same shared object. - vector[i] = multiValueColumn.getUnshared(offsets[i]); - } - } - - for (int i = 0; i < offset.getCurrentVectorSize(); i++) { - IndexedInts ithRow = vector[i]; - if (ithRow.size() == 0) { - strings[i] = null; - } else if (ithRow.size() == 1) { - strings[i] = lookupName(ithRow.get(0)); - } else { - List row = new ArrayList<>(ithRow.size()); - // noinspection SSBasedInspection - for (int j = 0; j < ithRow.size(); j++) { - row.add(lookupName(ithRow.get(j))); - } - strings[i] = row; - } - } - - id = offset.getId(); - return strings; - } - - @Override - public int getMaxVectorSize() - { - return offset.getMaxVectorSize(); - } - - @Override - public int getCurrentVectorSize() - { - return offset.getCurrentVectorSize(); - } - } -} diff --git a/processing/src/main/java/org/apache/druid/segment/column/StringUtf8DictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/column/StringUtf8DictionaryEncodedColumn.java index baf7a4be9c7..63f2b36313c 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/StringUtf8DictionaryEncodedColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/StringUtf8DictionaryEncodedColumn.java @@ -39,6 +39,7 @@ import org.apache.druid.segment.historical.HistoricalDimensionSelector; import org.apache.druid.segment.historical.SingleValueHistoricalDimensionSelector; import org.apache.druid.segment.nested.NestedCommonFormatColumn; import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.ReadableVectorInspector; import org.apache.druid.segment.vector.ReadableVectorOffset; import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorObjectSelector; @@ -47,19 +48,18 @@ import org.apache.druid.utils.CloseableUtils; import javax.annotation.Nullable; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.BitSet; +import java.util.List; /** - * {@link DictionaryEncodedColumn} for a column which has only a UTF-8 dictionary, no String dictionary. + * {@link DictionaryEncodedColumn} for a column which has a {@link ByteBuffer} based UTF-8 dictionary. *

- * This class is otherwise nearly identical to {@link StringDictionaryEncodedColumn} other than lacking a - * String dictionary. *

* Implements {@link NestedCommonFormatColumn} so it can be used as a reader for single value string specializations * of {@link org.apache.druid.segment.AutoTypeColumnIndexer}. */ -public class StringUtf8DictionaryEncodedColumn implements DictionaryEncodedColumn, - NestedCommonFormatColumn +public class StringUtf8DictionaryEncodedColumn implements DictionaryEncodedColumn, NestedCommonFormatColumn { @Nullable private final ColumnarInts column; @@ -364,7 +364,7 @@ public class StringUtf8DictionaryEncodedColumn implements DictionaryEncodedColum @Override public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(final ReadableVectorOffset offset) { - final class StringVectorSelector extends StringDictionaryEncodedColumn.StringSingleValueDimensionVectorSelector + final class StringVectorSelector extends StringSingleValueDimensionVectorSelector { public StringVectorSelector() { @@ -404,7 +404,7 @@ public class StringUtf8DictionaryEncodedColumn implements DictionaryEncodedColum @Override public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector(final ReadableVectorOffset offset) { - final class MultiStringVectorSelector extends StringDictionaryEncodedColumn.StringMultiValueDimensionVectorSelector + final class MultiStringVectorSelector extends StringMultiValueDimensionVectorSelector { public MultiStringVectorSelector() { @@ -446,7 +446,7 @@ public class StringUtf8DictionaryEncodedColumn implements DictionaryEncodedColum public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset) { if (!hasMultipleValues()) { - final class StringVectorSelector extends StringDictionaryEncodedColumn.StringVectorObjectSelector + final class StringVectorSelector extends StringVectorObjectSelector { public StringVectorSelector() { @@ -462,7 +462,7 @@ public class StringUtf8DictionaryEncodedColumn implements DictionaryEncodedColum } return new StringVectorSelector(); } else { - final class MultiStringVectorSelector extends StringDictionaryEncodedColumn.MultiValueStringVectorObjectSelector + final class MultiStringVectorSelector extends MultiValueStringVectorObjectSelector { public MultiStringVectorSelector() { @@ -497,4 +497,309 @@ public class StringUtf8DictionaryEncodedColumn implements DictionaryEncodedColum { return new StringEncodingStrategies.Utf8ToStringIndexed(utf8Dictionary); } + + + + /** + * Base type for a {@link SingleValueDimensionVectorSelector} for a dictionary encoded {@link ColumnType#STRING} + * built around a {@link ColumnarInts}. Dictionary not included - BYO dictionary lookup methods. + * + * Assumes that all implementations return true for {@link #supportsLookupNameUtf8()}. + */ + public abstract static class StringSingleValueDimensionVectorSelector + implements SingleValueDimensionVectorSelector, IdLookup + { + private final ColumnarInts column; + private final ReadableVectorOffset offset; + private final int[] vector; + private int id = ReadableVectorInspector.NULL_ID; + + public StringSingleValueDimensionVectorSelector( + ColumnarInts column, + ReadableVectorOffset offset + ) + { + this.column = column; + this.offset = offset; + this.vector = new int[offset.getMaxVectorSize()]; + } + + @Override + public int[] getRowVector() + { + if (id == offset.getId()) { + return vector; + } + + if (offset.isContiguous()) { + column.get(vector, offset.getStartOffset(), offset.getCurrentVectorSize()); + } else { + column.get(vector, offset.getOffsets(), offset.getCurrentVectorSize()); + } + + id = offset.getId(); + return vector; + } + + @Override + public boolean supportsLookupNameUtf8() + { + return true; + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return true; + } + + @Nullable + @Override + public IdLookup idLookup() + { + return this; + } + + @Override + public int getCurrentVectorSize() + { + return offset.getCurrentVectorSize(); + } + + @Override + public int getMaxVectorSize() + { + return offset.getMaxVectorSize(); + } + } + + /** + * Base type for a {@link MultiValueDimensionVectorSelector} for a dictionary encoded {@link ColumnType#STRING} + * built around a {@link ColumnarMultiInts}. Dictionary not included - BYO dictionary lookup methods. + * + * Assumes that all implementations return true for {@link #supportsLookupNameUtf8()}. + */ + public abstract static class StringMultiValueDimensionVectorSelector + implements MultiValueDimensionVectorSelector, IdLookup + { + private final ColumnarMultiInts multiValueColumn; + private final ReadableVectorOffset offset; + + private final IndexedInts[] vector; + private int id = ReadableVectorInspector.NULL_ID; + + public StringMultiValueDimensionVectorSelector( + ColumnarMultiInts multiValueColumn, + ReadableVectorOffset offset + ) + { + this.multiValueColumn = multiValueColumn; + this.offset = offset; + this.vector = new IndexedInts[offset.getMaxVectorSize()]; + } + + @Override + public IndexedInts[] getRowVector() + { + if (id == offset.getId()) { + return vector; + } + + if (offset.isContiguous()) { + final int currentOffset = offset.getStartOffset(); + final int numRows = offset.getCurrentVectorSize(); + + for (int i = 0; i < numRows; i++) { + // Must use getUnshared, otherwise all elements in the vector could be the same shared object. + vector[i] = multiValueColumn.getUnshared(i + currentOffset); + } + } else { + final int[] offsets = offset.getOffsets(); + final int numRows = offset.getCurrentVectorSize(); + + for (int i = 0; i < numRows; i++) { + // Must use getUnshared, otherwise all elements in the vector could be the same shared object. + vector[i] = multiValueColumn.getUnshared(offsets[i]); + } + } + + id = offset.getId(); + return vector; + } + + @Override + public boolean supportsLookupNameUtf8() + { + return true; + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return true; + } + + @Nullable + @Override + public IdLookup idLookup() + { + return this; + } + @Override + public int getCurrentVectorSize() + { + return offset.getCurrentVectorSize(); + } + + @Override + public int getMaxVectorSize() + { + return offset.getMaxVectorSize(); + } + } + + /** + * Base type for a {@link VectorObjectSelector} for a dictionary encoded {@link ColumnType#STRING} + * built around a {@link ColumnarInts}. Dictionary not included - BYO dictionary lookup methods. + */ + public abstract static class StringVectorObjectSelector implements VectorObjectSelector + { + private final ColumnarInts column; + private final ReadableVectorOffset offset; + + private final int[] vector; + private final Object[] strings; + private int id = ReadableVectorInspector.NULL_ID; + + public StringVectorObjectSelector( + ColumnarInts column, + ReadableVectorOffset offset + ) + { + this.column = column; + this.offset = offset; + this.vector = new int[offset.getMaxVectorSize()]; + this.strings = new Object[offset.getMaxVectorSize()]; + } + + @Override + public Object[] getObjectVector() + { + if (id == offset.getId()) { + return strings; + } + + if (offset.isContiguous()) { + column.get(vector, offset.getStartOffset(), offset.getCurrentVectorSize()); + } else { + column.get(vector, offset.getOffsets(), offset.getCurrentVectorSize()); + } + for (int i = 0; i < offset.getCurrentVectorSize(); i++) { + strings[i] = lookupName(vector[i]); + } + id = offset.getId(); + + return strings; + } + + @Override + public int getMaxVectorSize() + { + return offset.getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return offset.getCurrentVectorSize(); + } + + @Nullable + public abstract String lookupName(int id); + } + + /** + * Base type for a {@link VectorObjectSelector} for a dictionary encoded {@link ColumnType#STRING} + * built around a {@link ColumnarMultiInts}. Dictionary not included - BYO dictionary lookup methods. + */ + public abstract static class MultiValueStringVectorObjectSelector implements VectorObjectSelector + { + private final ColumnarMultiInts multiValueColumn; + private final ReadableVectorOffset offset; + + private final IndexedInts[] vector; + private final Object[] strings; + private int id = ReadableVectorInspector.NULL_ID; + + public MultiValueStringVectorObjectSelector( + ColumnarMultiInts multiValueColumn, + ReadableVectorOffset offset + ) + { + this.multiValueColumn = multiValueColumn; + this.offset = offset; + this.vector = new IndexedInts[offset.getMaxVectorSize()]; + this.strings = new Object[offset.getMaxVectorSize()]; + } + + @Nullable + public abstract String lookupName(int id); + + @Override + public Object[] getObjectVector() + { + if (id == offset.getId()) { + return strings; + } + + if (offset.isContiguous()) { + final int currentOffset = offset.getStartOffset(); + final int numRows = offset.getCurrentVectorSize(); + + for (int i = 0; i < numRows; i++) { + // Must use getUnshared, otherwise all elements in the vector could be the same shared object. + vector[i] = multiValueColumn.getUnshared(i + currentOffset); + } + } else { + final int[] offsets = offset.getOffsets(); + final int numRows = offset.getCurrentVectorSize(); + + for (int i = 0; i < numRows; i++) { + // Must use getUnshared, otherwise all elements in the vector could be the same shared object. + vector[i] = multiValueColumn.getUnshared(offsets[i]); + } + } + + for (int i = 0; i < offset.getCurrentVectorSize(); i++) { + IndexedInts ithRow = vector[i]; + if (ithRow.size() == 0) { + strings[i] = null; + } else if (ithRow.size() == 1) { + strings[i] = lookupName(ithRow.get(0)); + } else { + List row = new ArrayList<>(ithRow.size()); + // noinspection SSBasedInspection + for (int j = 0; j < ithRow.size(); j++) { + row.add(lookupName(ithRow.get(j))); + } + strings[i] = row; + } + } + + id = offset.getId(); + return strings; + } + + @Override + public int getMaxVectorSize() + { + return offset.getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return offset.getCurrentVectorSize(); + } + } } diff --git a/processing/src/main/java/org/apache/druid/segment/data/CachingIndexed.java b/processing/src/main/java/org/apache/druid/segment/data/CachingIndexed.java deleted file mode 100644 index 6632eaa95e6..00000000000 --- a/processing/src/main/java/org/apache/druid/segment/data/CachingIndexed.java +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.data; - -import org.apache.druid.java.util.common.Pair; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; - -import javax.annotation.Nullable; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.Map; -import java.util.function.ToIntFunction; - -public class CachingIndexed implements CloseableIndexed -{ - private static final int INITIAL_CACHE_CAPACITY = 16384; - - private static final Logger log = new Logger(CachingIndexed.class); - - private final Indexed delegate; - private final ToIntFunction sizeFn; - @Nullable - private final SizedLRUMap cachedValues; - - /** - * Creates a CachingIndexed wrapping the given GenericIndexed with a value lookup cache - * - * CachingIndexed objects are not thread safe and should only be used by a single thread at a time. - * CachingIndexed objects must be closed to release any underlying cache resources. - * - * @param delegate the Indexed to wrap with a lookup cache. - * @param sizeFn function that determines the size in bytes of an object - * @param lookupCacheSize maximum size in bytes of the lookup cache if greater than zero - */ - public CachingIndexed(Indexed delegate, final ToIntFunction sizeFn, final int lookupCacheSize) - { - this.delegate = delegate; - this.sizeFn = sizeFn; - - if (lookupCacheSize > 0) { - log.debug("Allocating column cache of max size[%d]", lookupCacheSize); - cachedValues = new SizedLRUMap<>(INITIAL_CACHE_CAPACITY, lookupCacheSize); - } else { - cachedValues = null; - } - } - - @Override - public int size() - { - return delegate.size(); - } - - @Override - public T get(int index) - { - if (cachedValues != null) { - final T cached = cachedValues.getValue(index); - if (cached != null) { - return cached; - } - - final T value = delegate.get(index); - cachedValues.put(index, value, sizeFn.applyAsInt(value)); - return value; - } else { - return delegate.get(index); - } - } - - @Override - public int indexOf(@Nullable T value) - { - return delegate.indexOf(value); - } - - @Override - public boolean isSorted() - { - return delegate.isSorted(); - } - - @Override - public Iterator iterator() - { - return delegate.iterator(); - } - - @Override - public void close() - { - if (cachedValues != null) { - log.debug("Closing column cache"); - cachedValues.clear(); - } - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("cachedValues", cachedValues != null); - inspector.visit("delegate", delegate); - } - - private static class SizedLRUMap extends LinkedHashMap> - { - private final int maxBytes; - private int numBytes = 0; - - SizedLRUMap(int initialCapacity, int maxBytes) - { - super(initialCapacity, 0.75f, true); - this.maxBytes = maxBytes; - } - - @Override - protected boolean removeEldestEntry(Map.Entry> eldest) - { - if (numBytes > maxBytes) { - numBytes -= eldest.getValue().lhs; - return true; - } - return false; - } - - public void put(K key, @Nullable V value, int size) - { - final int totalSize = size + 48; // add approximate object overhead - numBytes += totalSize; - super.put(key, new Pair<>(totalSize, value)); - } - - @Nullable - public V getValue(Object key) - { - final Pair sizeValuePair = super.get(key); - return sizeValuePair == null ? null : sizeValuePair.rhs; - } - } -} diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldDictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldDictionaryEncodedColumn.java index 6dd627d652f..b21760416fe 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldDictionaryEncodedColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldDictionaryEncodedColumn.java @@ -40,7 +40,7 @@ import org.apache.druid.segment.IdLookup; import org.apache.druid.segment.LongColumnSelector; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.DictionaryEncodedColumn; -import org.apache.druid.segment.column.StringDictionaryEncodedColumn; +import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.column.Types; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.ColumnarDoubles; @@ -678,7 +678,7 @@ public class NestedFieldDictionaryEncodedColumn stringDictionary; - final Supplier frontCodedStringDictionarySupplier; + final Supplier> dictionarySupplier; final ByteBuffer stringDictionaryBuffer = NestedCommonFormatColumnPartSerde.loadInternalFile( mapper, @@ -92,17 +77,19 @@ public class ScalarStringColumnAndIndexSupplier implements Supplier stringDictionary; - private final Supplier frontCodedStringDictionarySupplier; + private final Supplier> dictionarySupplier; private final Supplier encodedColumnSupplier; private final GenericIndexed valueIndexes; - private final ImmutableBitmap nullValueBitmap; - private final BitmapFactory bitmapFactory; - private final ColumnConfig columnConfig; - private final int numRows; + private final ColumnIndexSupplier stringIndexSupplier; private ScalarStringColumnAndIndexSupplier( - GenericIndexed stringDictionary, - Supplier frontCodedStringDictionarySupplier, + Supplier> dictionarySupplier, Supplier encodedColumnSupplier, GenericIndexed valueIndexes, BitmapSerdeFactory serdeFactory, @@ -176,27 +158,23 @@ public class ScalarStringColumnAndIndexSupplier implements Supplier( + serdeFactory.getBitmapFactory(), + dictionarySupplier, + valueIndexes, + null, + columnConfig, + numRows + ); } @Override public NestedCommonFormatColumn get() { - if (frontCodedStringDictionarySupplier != null) { - return new StringUtf8DictionaryEncodedColumn( - encodedColumnSupplier.get(), - null, - frontCodedStringDictionarySupplier.get() - ); - } - return new StringUtf8DictionaryEncodedColumn(encodedColumnSupplier.get(), null, stringDictionary.singleThreaded()); + return new StringUtf8DictionaryEncodedColumn(encodedColumnSupplier.get(), null, dictionarySupplier.get()); } @Nullable @@ -204,44 +182,7 @@ public class ScalarStringColumnAndIndexSupplier implements Supplier T as(Class clazz) { if (valueIndexes != null) { - final Indexed singleThreadedBitmaps = valueIndexes.singleThreaded(); - final Indexed utf8Dictionary = frontCodedStringDictionarySupplier == null - ? stringDictionary.singleThreaded() - : frontCodedStringDictionarySupplier.get(); - if (clazz.equals(NullValueIndex.class)) { - final BitmapColumnIndex nullIndex = new SimpleImmutableBitmapIndex(nullValueBitmap); - return (T) (NullValueIndex) () -> nullIndex; - } else if (clazz.equals(StringValueSetIndex.class)) { - return (T) new IndexedUtf8ValueSetIndex<>( - bitmapFactory, - utf8Dictionary, - singleThreadedBitmaps - ); - } else if (clazz.equals(DruidPredicateIndex.class)) { - return (T) new IndexedStringDruidPredicateIndex<>( - bitmapFactory, - new StringEncodingStrategies.Utf8ToStringIndexed(utf8Dictionary), - singleThreadedBitmaps, - columnConfig, - numRows - ); - } else if (clazz.equals(LexicographicalRangeIndex.class)) { - return (T) new IndexedUtf8LexicographicalRangeIndex<>( - bitmapFactory, - utf8Dictionary, - singleThreadedBitmaps, - utf8Dictionary.get(0) == null, - columnConfig, - numRows - ); - } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) - || clazz.equals(DictionaryEncodedValueIndex.class)) { - return (T) new IndexedStringDictionaryEncodedStringValueIndex<>( - bitmapFactory, - new StringEncodingStrategies.Utf8ToStringIndexed(utf8Dictionary), - valueIndexes - ); - } + return stringIndexSupplier.as(clazz); } return null; } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumn.java index befe16c5511..bf09e9fe157 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/VariantColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/VariantColumn.java @@ -40,8 +40,8 @@ import org.apache.druid.segment.IdLookup; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ColumnTypeFactory; import org.apache.druid.segment.column.DictionaryEncodedColumn; -import org.apache.druid.segment.column.StringDictionaryEncodedColumn; import org.apache.druid.segment.column.StringEncodingStrategies; +import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.data.ColumnarInts; import org.apache.druid.segment.data.FixedIndexed; import org.apache.druid.segment.data.FrontCodedIntArrayIndexed; @@ -668,7 +668,7 @@ public class VariantColumn> @Override public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(ReadableVectorOffset offset) { - final class StringVectorSelector extends StringDictionaryEncodedColumn.StringSingleValueDimensionVectorSelector + final class StringVectorSelector extends StringUtf8DictionaryEncodedColumn.StringSingleValueDimensionVectorSelector { public StringVectorSelector() { diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java index b1eef3307b6..20ca158d1c6 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnPartSerde.java @@ -48,6 +48,7 @@ import org.apache.druid.segment.data.FrontCodedIndexed; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.GenericIndexedWriter; import org.apache.druid.segment.data.ImmutableRTreeObjectStrategy; +import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.V3CompressedVSizeColumnarMultiIntsSupplier; import org.apache.druid.segment.data.VSizeColumnarInts; import org.apache.druid.segment.data.VSizeColumnarMultiInts; @@ -313,16 +314,21 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde final int dictionaryStartPosition = buffer.position(); final byte dictionaryVersion = buffer.get(); + final Supplier> dictionarySupplier; if (dictionaryVersion == EncodedStringDictionaryWriter.VERSION) { final byte encodingId = buffer.get(); if (encodingId == StringEncodingStrategy.FRONT_CODED_ID) { - readFrontCodedColumn(buffer, builder, rVersion, rFlags, hasMultipleValues); + dictionarySupplier = FrontCodedIndexed.read(buffer, byteOrder); } else if (encodingId == StringEncodingStrategy.UTF8_ID) { // this cannot happen naturally right now since generic indexed is written in the 'legacy' format, but // this provides backwards compatibility should we switch at some point in the future to always // writing dictionaryVersion - readGenericIndexedColumn(buffer, builder, columnConfig, rVersion, rFlags, hasMultipleValues); + dictionarySupplier = GenericIndexed.read( + buffer, + GenericIndexed.UTF8_STRATEGY, + builder.getFileMapper() + )::singleThreaded; } else { throw new ISE("impossible, unknown encoding strategy id: %s", encodingId); } @@ -331,101 +337,13 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde // as dictionaryVersion is actually also the GenericIndexed version, so we reset start position so the // GenericIndexed version can be correctly read buffer.position(dictionaryStartPosition); - readGenericIndexedColumn(buffer, builder, columnConfig, rVersion, rFlags, hasMultipleValues); - } - } - - private void readGenericIndexedColumn( - ByteBuffer buffer, - ColumnBuilder builder, - ColumnConfig columnConfig, - VERSION rVersion, - int rFlags, - boolean hasMultipleValues - ) - { - // Duplicate the first buffer since we are reading the dictionary twice. - final GenericIndexed rDictionary = GenericIndexed.read( - buffer.duplicate(), - GenericIndexed.STRING_STRATEGY, - builder.getFileMapper() - ); - - final GenericIndexed rDictionaryUtf8 = GenericIndexed.read( - buffer, - GenericIndexed.UTF8_STRATEGY, - builder.getFileMapper() - ); - - final WritableSupplier rSingleValuedColumn; - final WritableSupplier rMultiValuedColumn; - - if (hasMultipleValues) { - rMultiValuedColumn = readMultiValuedColumn(rVersion, buffer, rFlags); - rSingleValuedColumn = null; - } else { - rSingleValuedColumn = readSingleValuedColumn(rVersion, buffer); - rMultiValuedColumn = null; - } - - final String firstDictionaryEntry = rDictionary.get(0); - - DictionaryEncodedColumnSupplier dictionaryEncodedColumnSupplier = new DictionaryEncodedColumnSupplier( - rDictionary, - rDictionaryUtf8, - rSingleValuedColumn, - rMultiValuedColumn, - columnConfig.columnCacheSizeBytes() - ); - - builder.setHasMultipleValues(hasMultipleValues) - .setHasNulls(firstDictionaryEntry == null) - .setDictionaryEncodedColumnSupplier(dictionaryEncodedColumnSupplier); - - GenericIndexed rBitmaps = null; - ImmutableRTree rSpatialIndex = null; - if (!Feature.NO_BITMAP_INDEX.isSet(rFlags)) { - rBitmaps = GenericIndexed.read( + dictionarySupplier = GenericIndexed.read( buffer, - bitmapSerdeFactory.getObjectStrategy(), + GenericIndexed.UTF8_STRATEGY, builder.getFileMapper() - ); + )::singleThreaded; } - if (buffer.hasRemaining()) { - rSpatialIndex = new ImmutableRTreeObjectStrategy( - bitmapSerdeFactory.getBitmapFactory() - ).fromByteBufferWithSize(buffer); - } - - if (rBitmaps != null || rSpatialIndex != null) { - builder.setIndexSupplier( - new DictionaryEncodedStringIndexSupplier( - bitmapSerdeFactory.getBitmapFactory(), - rDictionary, - rDictionaryUtf8, - rBitmaps, - rSpatialIndex - ), - rBitmaps != null, - rSpatialIndex != null - ); - } - } - - private void readFrontCodedColumn( - ByteBuffer buffer, - ColumnBuilder builder, - VERSION rVersion, - int rFlags, - boolean hasMultipleValues - ) - { - final Supplier rUtf8Dictionary = FrontCodedIndexed.read( - buffer, - byteOrder - ); - final WritableSupplier rSingleValuedColumn; final WritableSupplier rMultiValuedColumn; @@ -437,17 +355,16 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde rMultiValuedColumn = null; } - final boolean hasNulls = rUtf8Dictionary.get().get(0) == null; + final boolean hasNulls = dictionarySupplier.get().get(0) == null; - StringFrontCodedDictionaryEncodedColumnSupplier dictionaryEncodedColumnSupplier = - new StringFrontCodedDictionaryEncodedColumnSupplier( - rUtf8Dictionary, - rSingleValuedColumn, - rMultiValuedColumn - ); + final StringUtf8DictionaryEncodedColumnSupplier supplier = new StringUtf8DictionaryEncodedColumnSupplier<>( + dictionarySupplier, + rSingleValuedColumn, + rMultiValuedColumn + ); builder.setHasMultipleValues(hasMultipleValues) .setHasNulls(hasNulls) - .setDictionaryEncodedColumnSupplier(dictionaryEncodedColumnSupplier); + .setDictionaryEncodedColumnSupplier(supplier); GenericIndexed rBitmaps = null; ImmutableRTree rSpatialIndex = null; @@ -467,9 +384,9 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde if (rBitmaps != null || rSpatialIndex != null) { builder.setIndexSupplier( - new StringFrontCodedColumnIndexSupplier( + new StringUtf8ColumnIndexSupplier( bitmapSerdeFactory.getBitmapFactory(), - rUtf8Dictionary, + dictionarySupplier, rBitmaps, rSpatialIndex ), diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java deleted file mode 100644 index a8148ca6a25..00000000000 --- a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedColumnSupplier.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.serde; - -import com.google.common.base.Supplier; -import org.apache.druid.common.config.NullHandling; -import org.apache.druid.segment.column.DictionaryEncodedColumn; -import org.apache.druid.segment.column.StringDictionaryEncodedColumn; -import org.apache.druid.segment.data.CachingIndexed; -import org.apache.druid.segment.data.ColumnarInts; -import org.apache.druid.segment.data.ColumnarMultiInts; -import org.apache.druid.segment.data.GenericIndexed; -import org.apache.druid.segment.data.Indexed; - -import javax.annotation.Nullable; -import java.nio.ByteBuffer; - -/** - * - */ -public class DictionaryEncodedColumnSupplier implements Supplier> -{ - private final GenericIndexed dictionary; - private final GenericIndexed dictionaryUtf8; - private final @Nullable Supplier singleValuedColumn; - private final @Nullable Supplier multiValuedColumn; - private final int lookupCacheSize; - - public DictionaryEncodedColumnSupplier( - GenericIndexed dictionary, - GenericIndexed dictionaryUtf8, - @Nullable Supplier singleValuedColumn, - @Nullable Supplier multiValuedColumn, - int lookupCacheSize - ) - { - this.dictionary = dictionary; - this.dictionaryUtf8 = dictionaryUtf8; - this.singleValuedColumn = singleValuedColumn; - this.multiValuedColumn = multiValuedColumn; - this.lookupCacheSize = lookupCacheSize; - } - - @Override - public DictionaryEncodedColumn get() - { - final Indexed cacheWrappedDictionary; - final Indexed singleThreadedDictionaryUtf8 = dictionaryUtf8.singleThreaded(); - - if (lookupCacheSize > 0) { - cacheWrappedDictionary = new CachingIndexed<>( - dictionary.singleThreaded(), - s -> s == null ? 0 : s.length() * Character.BYTES, - lookupCacheSize - ); - } else { - cacheWrappedDictionary = dictionary.singleThreaded(); - } - - if (NullHandling.mustCombineNullAndEmptyInDictionary(singleThreadedDictionaryUtf8)) { - return new StringDictionaryEncodedColumn( - singleValuedColumn != null ? new CombineFirstTwoValuesColumnarInts(singleValuedColumn.get()) : null, - multiValuedColumn != null ? new CombineFirstTwoValuesColumnarMultiInts(multiValuedColumn.get()) : null, - CombineFirstTwoEntriesIndexed.returnNull(cacheWrappedDictionary), - CombineFirstTwoEntriesIndexed.returnNull(singleThreadedDictionaryUtf8) - ); - } else if (NullHandling.mustReplaceFirstValueWithNullInDictionary(singleThreadedDictionaryUtf8)) { - return new StringDictionaryEncodedColumn( - singleValuedColumn != null ? singleValuedColumn.get() : null, - multiValuedColumn != null ? multiValuedColumn.get() : null, - new ReplaceFirstValueWithNullIndexed<>(cacheWrappedDictionary), - new ReplaceFirstValueWithNullIndexed<>(singleThreadedDictionaryUtf8) - ); - } else { - return new StringDictionaryEncodedColumn( - singleValuedColumn != null ? singleValuedColumn.get() : null, - multiValuedColumn != null ? multiValuedColumn.get() : null, - cacheWrappedDictionary, - singleThreadedDictionaryUtf8 - ); - } - } -} diff --git a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java deleted file mode 100644 index d7dc25b9796..00000000000 --- a/processing/src/main/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplier.java +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.serde; - -import org.apache.druid.collections.bitmap.BitmapFactory; -import org.apache.druid.collections.bitmap.ImmutableBitmap; -import org.apache.druid.collections.spatial.ImmutableRTree; -import org.apache.druid.common.config.NullHandling; -import org.apache.druid.segment.column.BitmapColumnIndex; -import org.apache.druid.segment.column.ColumnIndexSupplier; -import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.column.DictionaryEncodedValueIndex; -import org.apache.druid.segment.column.DruidPredicateIndex; -import org.apache.druid.segment.column.IndexedStringDictionaryEncodedStringValueIndex; -import org.apache.druid.segment.column.IndexedStringDruidPredicateIndex; -import org.apache.druid.segment.column.IndexedUtf8LexicographicalRangeIndex; -import org.apache.druid.segment.column.IndexedUtf8ValueSetIndex; -import org.apache.druid.segment.column.LexicographicalRangeIndex; -import org.apache.druid.segment.column.NullValueIndex; -import org.apache.druid.segment.column.SimpleImmutableBitmapIndex; -import org.apache.druid.segment.column.SpatialIndex; -import org.apache.druid.segment.column.StringValueSetIndex; -import org.apache.druid.segment.column.Utf8ValueSetIndex; -import org.apache.druid.segment.data.GenericIndexed; -import org.apache.druid.segment.data.Indexed; - -import javax.annotation.Nullable; -import java.nio.ByteBuffer; - -public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier -{ - private final BitmapFactory bitmapFactory; - private final GenericIndexed dictionary; - private final GenericIndexed dictionaryUtf8; - @Nullable - private final GenericIndexed bitmaps; - @Nullable - private final ImmutableRTree indexedTree; - - public DictionaryEncodedStringIndexSupplier( - BitmapFactory bitmapFactory, - GenericIndexed dictionary, - GenericIndexed dictionaryUtf8, - @Nullable GenericIndexed bitmaps, - @Nullable ImmutableRTree indexedTree - ) - { - this.bitmapFactory = bitmapFactory; - this.dictionary = dictionary; - this.dictionaryUtf8 = dictionaryUtf8; - this.bitmaps = bitmaps; - this.indexedTree = indexedTree; - } - - @Nullable - @Override - @SuppressWarnings("unchecked") - public T as(Class clazz) - { - if (bitmaps != null) { - Indexed singleThreadedStrings = dictionary.singleThreaded(); - Indexed singleThreadedUtf8 = dictionaryUtf8.singleThreaded(); - Indexed singleThreadedBitmaps = bitmaps.singleThreaded(); - - if (NullHandling.mustCombineNullAndEmptyInDictionary(singleThreadedUtf8)) { - singleThreadedStrings = CombineFirstTwoEntriesIndexed.returnNull(singleThreadedStrings); - singleThreadedUtf8 = CombineFirstTwoEntriesIndexed.returnNull(singleThreadedUtf8); - singleThreadedBitmaps = CombineFirstTwoEntriesIndexed.unionBitmaps(bitmapFactory, singleThreadedBitmaps); - } else if (NullHandling.mustReplaceFirstValueWithNullInDictionary(singleThreadedUtf8)) { - singleThreadedStrings = new ReplaceFirstValueWithNullIndexed<>(singleThreadedStrings); - singleThreadedUtf8 = new ReplaceFirstValueWithNullIndexed<>(singleThreadedUtf8); - } - - if (clazz.equals(NullValueIndex.class)) { - final BitmapColumnIndex nullIndex; - final ByteBuffer firstValue = singleThreadedUtf8.get(0); - if (NullHandling.isNullOrEquivalent(firstValue)) { - nullIndex = new SimpleImmutableBitmapIndex(singleThreadedBitmaps.get(0)); - } else { - nullIndex = new SimpleImmutableBitmapIndex(bitmapFactory.makeEmptyImmutableBitmap()); - } - return (T) (NullValueIndex) () -> nullIndex; - } else if (clazz.equals(StringValueSetIndex.class)) { - return (T) new IndexedUtf8ValueSetIndex<>(bitmapFactory, singleThreadedUtf8, singleThreadedBitmaps); - } else if (clazz.equals(Utf8ValueSetIndex.class)) { - return (T) new IndexedUtf8ValueSetIndex<>(bitmapFactory, singleThreadedUtf8, singleThreadedBitmaps); - } else if (clazz.equals(DruidPredicateIndex.class)) { - return (T) new IndexedStringDruidPredicateIndex<>(bitmapFactory, singleThreadedStrings, singleThreadedBitmaps); - } else if (clazz.equals(LexicographicalRangeIndex.class)) { - return (T) new IndexedUtf8LexicographicalRangeIndex<>( - bitmapFactory, - singleThreadedUtf8, - singleThreadedBitmaps, - NullHandling.isNullOrEquivalent(singleThreadedStrings.get(0)) - ); - } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) - || clazz.equals(DictionaryEncodedValueIndex.class)) { - return (T) new IndexedStringDictionaryEncodedStringValueIndex<>( - bitmapFactory, - singleThreadedStrings, - singleThreadedBitmaps - ); - } - } - if (indexedTree != null && clazz.equals(SpatialIndex.class)) { - return (T) (SpatialIndex) () -> indexedTree; - } - return null; - } -} diff --git a/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java b/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java index 42e83985690..9b107b1d78b 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/NestedCommonFormatColumnPartSerde.java @@ -23,7 +23,9 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; +import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.BitmapSerdeFactory; @@ -128,106 +130,18 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde public Deserializer getDeserializer() { if (isVariantType || logicalType.isArray()) { - return ((buffer, builder, columnConfig) -> { - VariantColumnAndIndexSupplier supplier = VariantColumnAndIndexSupplier.read( - logicalType, - byteOrder, - bitmapSerdeFactory, - buffer, - builder, - columnConfig - ); - ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder(); - capabilitiesBuilder.setDictionaryEncoded(true); - capabilitiesBuilder.setDictionaryValuesSorted(true); - capabilitiesBuilder.setDictionaryValuesUnique(true); - builder.setType(logicalType); - builder.setNestedCommonFormatColumnSupplier(supplier); - builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); - builder.setFilterable(true); - }); + return new VariantColumnDeserializer(); } if (logicalType.is(ValueType.STRING)) { - return ((buffer, builder, columnConfig) -> { - ScalarStringColumnAndIndexSupplier supplier = ScalarStringColumnAndIndexSupplier.read( - byteOrder, - bitmapSerdeFactory, - buffer, - builder, - columnConfig - ); - ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder(); - capabilitiesBuilder.setDictionaryEncoded(true); - capabilitiesBuilder.setDictionaryValuesSorted(true); - capabilitiesBuilder.setDictionaryValuesUnique(true); - builder.setType(logicalType); - builder.setNestedCommonFormatColumnSupplier(supplier); - builder.setIndexSupplier(supplier, true, false); - builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); - builder.setFilterable(true); - }); + return new StringColumnDeserializer(); } if (logicalType.is(ValueType.LONG)) { - return ((buffer, builder, columnConfig) -> { - ScalarLongColumnAndIndexSupplier supplier = ScalarLongColumnAndIndexSupplier.read( - byteOrder, - bitmapSerdeFactory, - buffer, - builder, - columnConfig - ); - ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder(); - capabilitiesBuilder.setDictionaryEncoded(true); - capabilitiesBuilder.setDictionaryValuesSorted(true); - capabilitiesBuilder.setDictionaryValuesUnique(true); - builder.setType(logicalType); - builder.setNestedCommonFormatColumnSupplier(supplier); - builder.setIndexSupplier(supplier, true, false); - builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); - builder.setFilterable(true); - }); + return new LongColumnDeserializer(); } if (logicalType.is(ValueType.DOUBLE)) { - return ((buffer, builder, columnConfig) -> { - ScalarDoubleColumnAndIndexSupplier supplier = ScalarDoubleColumnAndIndexSupplier.read( - byteOrder, - bitmapSerdeFactory, - buffer, - builder, - columnConfig - ); - ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder(); - capabilitiesBuilder.setDictionaryEncoded(true); - capabilitiesBuilder.setDictionaryValuesSorted(true); - capabilitiesBuilder.setDictionaryValuesUnique(true); - builder.setType(logicalType); - builder.setNestedCommonFormatColumnSupplier(supplier); - builder.setIndexSupplier(supplier, true, false); - builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); - builder.setFilterable(true); - }); + return new DoubleColumnDeserializer(); } - return (buffer, builder, columnConfig) -> { - NestedDataColumnSupplier supplier = NestedDataColumnSupplier.read( - logicalType, - hasNulls, - buffer, - builder, - columnConfig, - bitmapSerdeFactory, - byteOrder - ); - ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder(); - capabilitiesBuilder.setDictionaryEncoded(true); - capabilitiesBuilder.setDictionaryValuesSorted(true); - capabilitiesBuilder.setDictionaryValuesUnique(true); - ColumnType simpleType = supplier.getLogicalType(); - ColumnType logicalType = simpleType == null ? ColumnType.NESTED_DATA : simpleType; - builder.setType(logicalType); - builder.setNestedCommonFormatColumnSupplier(supplier); - builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, hasNulls)); - builder.setFilterable(true); - }; + return new NestedColumnDeserializer(); } @JsonProperty @@ -260,6 +174,129 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde return bitmapSerdeFactory; } + private class StringColumnDeserializer implements Deserializer + { + @Override + public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnConfig) + { + ScalarStringColumnAndIndexSupplier supplier = ScalarStringColumnAndIndexSupplier.read( + byteOrder, + bitmapSerdeFactory, + buffer, + builder, + columnConfig + ); + ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder(); + capabilitiesBuilder.setDictionaryEncoded(true); + capabilitiesBuilder.setDictionaryValuesSorted(true); + capabilitiesBuilder.setDictionaryValuesUnique(true); + builder.setType(logicalType); + builder.setNestedCommonFormatColumnSupplier(supplier); + builder.setIndexSupplier(supplier, true, false); + builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); + builder.setFilterable(true); + } + } + + private class LongColumnDeserializer implements Deserializer + { + @Override + public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnConfig) + { + ScalarLongColumnAndIndexSupplier supplier = ScalarLongColumnAndIndexSupplier.read( + byteOrder, + bitmapSerdeFactory, + buffer, + builder, + columnConfig + ); + ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder(); + capabilitiesBuilder.setDictionaryEncoded(true); + capabilitiesBuilder.setDictionaryValuesSorted(true); + capabilitiesBuilder.setDictionaryValuesUnique(true); + builder.setType(logicalType); + builder.setNestedCommonFormatColumnSupplier(supplier); + builder.setIndexSupplier(supplier, true, false); + builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); + builder.setFilterable(true); + } + } + + private class DoubleColumnDeserializer implements Deserializer + { + @Override + public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnConfig) + { + ScalarDoubleColumnAndIndexSupplier supplier = ScalarDoubleColumnAndIndexSupplier.read( + byteOrder, + bitmapSerdeFactory, + buffer, + builder, + columnConfig + ); + ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder(); + capabilitiesBuilder.setDictionaryEncoded(true); + capabilitiesBuilder.setDictionaryValuesSorted(true); + capabilitiesBuilder.setDictionaryValuesUnique(true); + builder.setType(logicalType); + builder.setNestedCommonFormatColumnSupplier(supplier); + builder.setIndexSupplier(supplier, true, false); + builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); + builder.setFilterable(true); + } + } + + private class VariantColumnDeserializer implements Deserializer + { + @Override + public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnConfig) + { + VariantColumnAndIndexSupplier supplier = VariantColumnAndIndexSupplier.read( + logicalType, + byteOrder, + bitmapSerdeFactory, + buffer, + builder, + columnConfig + ); + ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder(); + capabilitiesBuilder.setDictionaryEncoded(true); + capabilitiesBuilder.setDictionaryValuesSorted(true); + capabilitiesBuilder.setDictionaryValuesUnique(true); + builder.setType(logicalType); + builder.setNestedCommonFormatColumnSupplier(supplier); + builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, capabilitiesBuilder.hasNulls().isTrue())); + builder.setFilterable(true); + } + } + + private class NestedColumnDeserializer implements Deserializer + { + @Override + public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnConfig) + { + NestedDataColumnSupplier supplier = NestedDataColumnSupplier.read( + logicalType, + hasNulls, + buffer, + builder, + columnConfig, + bitmapSerdeFactory, + byteOrder + ); + ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder(); + capabilitiesBuilder.setDictionaryEncoded(true); + capabilitiesBuilder.setDictionaryValuesSorted(true); + capabilitiesBuilder.setDictionaryValuesUnique(true); + ColumnType simpleType = supplier.getLogicalType(); + ColumnType logicalType = simpleType == null ? ColumnType.NESTED_DATA : simpleType; + builder.setType(logicalType); + builder.setNestedCommonFormatColumnSupplier(supplier); + builder.setColumnFormat(new NestedCommonFormatColumn.Format(logicalType, hasNulls)); + builder.setFilterable(true); + } + } + public static class SerializerBuilder { private ColumnType logicalType; diff --git a/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java similarity index 80% rename from processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedColumnIndexSupplier.java rename to processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java index a617e3b409f..de9c791c7eb 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8ColumnIndexSupplier.java @@ -25,6 +25,7 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.collections.spatial.ImmutableRTree; import org.apache.druid.common.config.NullHandling; import org.apache.druid.segment.column.BitmapColumnIndex; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; import org.apache.druid.segment.column.DictionaryEncodedValueIndex; @@ -39,17 +40,16 @@ import org.apache.druid.segment.column.SimpleImmutableBitmapIndex; import org.apache.druid.segment.column.SpatialIndex; import org.apache.druid.segment.column.StringEncodingStrategies; import org.apache.druid.segment.column.StringValueSetIndex; -import org.apache.druid.segment.data.FrontCodedIndexed; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.Indexed; import javax.annotation.Nullable; import java.nio.ByteBuffer; -public class StringFrontCodedColumnIndexSupplier implements ColumnIndexSupplier +public class StringUtf8ColumnIndexSupplier> implements ColumnIndexSupplier { private final BitmapFactory bitmapFactory; - private final Supplier utf8Dictionary; + private final Supplier utf8Dictionary; @Nullable private final GenericIndexed bitmaps; @@ -57,17 +57,34 @@ public class StringFrontCodedColumnIndexSupplier implements ColumnIndexSupplier @Nullable private final ImmutableRTree indexedTree; - public StringFrontCodedColumnIndexSupplier( + private final ColumnConfig columnConfig; + private final int numRows; + + public StringUtf8ColumnIndexSupplier( BitmapFactory bitmapFactory, - Supplier utf8Dictionary, + Supplier utf8Dictionary, @Nullable GenericIndexed bitmaps, @Nullable ImmutableRTree indexedTree ) + { + this(bitmapFactory, utf8Dictionary, bitmaps, indexedTree, ColumnConfig.ALWAYS_USE_INDEXES, Integer.MAX_VALUE); + } + + public StringUtf8ColumnIndexSupplier( + BitmapFactory bitmapFactory, + Supplier utf8Dictionary, + @Nullable GenericIndexed bitmaps, + @Nullable ImmutableRTree indexedTree, + @Nullable ColumnConfig columnConfig, + int numRows + ) { this.bitmapFactory = bitmapFactory; this.bitmaps = bitmaps; this.utf8Dictionary = utf8Dictionary; this.indexedTree = indexedTree; + this.columnConfig = columnConfig; + this.numRows = numRows; } @Nullable @@ -90,7 +107,8 @@ public class StringFrontCodedColumnIndexSupplier implements ColumnIndexSupplier final BitmapColumnIndex nullIndex; final ByteBuffer firstValue = dict.get(0); if (NullHandling.isNullOrEquivalent(firstValue)) { - nullIndex = new SimpleImmutableBitmapIndex(singleThreadedBitmaps.get(0)); + ImmutableBitmap bitmap = singleThreadedBitmaps.get(0); + nullIndex = new SimpleImmutableBitmapIndex(bitmap == null ? bitmapFactory.makeEmptyImmutableBitmap() : bitmap); } else { nullIndex = new SimpleImmutableBitmapIndex(bitmapFactory.makeEmptyImmutableBitmap()); } @@ -105,14 +123,18 @@ public class StringFrontCodedColumnIndexSupplier implements ColumnIndexSupplier return (T) new IndexedStringDruidPredicateIndex<>( bitmapFactory, new StringEncodingStrategies.Utf8ToStringIndexed(dict), - singleThreadedBitmaps + singleThreadedBitmaps, + columnConfig, + numRows ); } else if (clazz.equals(LexicographicalRangeIndex.class)) { return (T) new IndexedUtf8LexicographicalRangeIndex<>( bitmapFactory, dict, singleThreadedBitmaps, - dict.get(0) == null + dict.get(0) == null, + columnConfig, + numRows ); } else if (clazz.equals(DictionaryEncodedStringValueIndex.class) || clazz.equals(DictionaryEncodedValueIndex.class)) { diff --git a/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedDictionaryEncodedColumnSupplier.java b/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8DictionaryEncodedColumnSupplier.java similarity index 80% rename from processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedDictionaryEncodedColumnSupplier.java rename to processing/src/main/java/org/apache/druid/segment/serde/StringUtf8DictionaryEncodedColumnSupplier.java index 23bc28acff1..c0a8a0885e4 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/StringFrontCodedDictionaryEncodedColumnSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/StringUtf8DictionaryEncodedColumnSupplier.java @@ -22,26 +22,25 @@ package org.apache.druid.segment.serde; import com.google.common.base.Supplier; import org.apache.druid.common.config.NullHandling; import org.apache.druid.segment.column.DictionaryEncodedColumn; -import org.apache.druid.segment.column.StringDictionaryEncodedColumn; import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.data.ColumnarInts; import org.apache.druid.segment.data.ColumnarMultiInts; -import org.apache.druid.segment.data.FrontCodedIndexed; +import org.apache.druid.segment.data.Indexed; import javax.annotation.Nullable; +import java.nio.ByteBuffer; /** - * {@link DictionaryEncodedColumnSupplier} but for columns using a {@link StringUtf8DictionaryEncodedColumn} - * instead of the traditional {@link StringDictionaryEncodedColumn} + * Supplier for {@link StringUtf8DictionaryEncodedColumn} */ -public class StringFrontCodedDictionaryEncodedColumnSupplier implements Supplier> +public class StringUtf8DictionaryEncodedColumnSupplier> implements Supplier> { - private final Supplier utf8Dictionary; + private final Supplier utf8Dictionary; private final @Nullable Supplier singleValuedColumn; private final @Nullable Supplier multiValuedColumn; - public StringFrontCodedDictionaryEncodedColumnSupplier( - Supplier utf8Dictionary, + public StringUtf8DictionaryEncodedColumnSupplier( + Supplier utf8Dictionary, @Nullable Supplier singleValuedColumn, @Nullable Supplier multiValuedColumn ) @@ -54,7 +53,7 @@ public class StringFrontCodedDictionaryEncodedColumnSupplier implements Supplier @Override public DictionaryEncodedColumn get() { - final FrontCodedIndexed suppliedUtf8Dictionary = utf8Dictionary.get(); + final TIndexed suppliedUtf8Dictionary = utf8Dictionary.get(); if (NullHandling.mustCombineNullAndEmptyInDictionary(suppliedUtf8Dictionary)) { return new StringUtf8DictionaryEncodedColumn( diff --git a/processing/src/test/java/org/apache/druid/query/DruidProcessingConfigTest.java b/processing/src/test/java/org/apache/druid/query/DruidProcessingConfigTest.java index e63d6e83d80..8d9f7137280 100644 --- a/processing/src/test/java/org/apache/druid/query/DruidProcessingConfigTest.java +++ b/processing/src/test/java/org/apache/druid/query/DruidProcessingConfigTest.java @@ -97,7 +97,6 @@ public class DruidProcessingConfigTest Assert.assertEquals(Integer.MAX_VALUE, config.poolCacheMaxCount()); Assert.assertEquals(NUM_PROCESSORS - 1, config.getNumThreads()); Assert.assertEquals(Math.max(2, config.getNumThreads() / 4), config.getNumMergeBuffers()); - Assert.assertEquals(0, config.columnCacheSizeBytes()); Assert.assertTrue(config.isFifo()); Assert.assertEquals(System.getProperty("java.io.tmpdir"), config.getTmpDir()); Assert.assertEquals(BUFFER_SIZE, config.intermediateComputeSizeBytes()); @@ -112,7 +111,6 @@ public class DruidProcessingConfigTest Assert.assertEquals(Integer.MAX_VALUE, config.poolCacheMaxCount()); Assert.assertTrue(config.getNumThreads() == 1); Assert.assertEquals(Math.max(2, config.getNumThreads() / 4), config.getNumMergeBuffers()); - Assert.assertEquals(0, config.columnCacheSizeBytes()); Assert.assertTrue(config.isFifo()); Assert.assertEquals(System.getProperty("java.io.tmpdir"), config.getTmpDir()); Assert.assertEquals(BUFFER_SIZE, config.intermediateComputeSizeBytes()); @@ -138,7 +136,6 @@ public class DruidProcessingConfigTest props.setProperty("druid.processing.buffer.sizeBytes", "1"); props.setProperty("druid.processing.buffer.poolCacheMaxCount", "1"); props.setProperty("druid.processing.numThreads", "256"); - props.setProperty("druid.processing.columnCache.sizeBytes", "1"); props.setProperty("druid.processing.fifo", "false"); props.setProperty("druid.processing.tmpDir", "/test/path"); @@ -156,7 +153,6 @@ public class DruidProcessingConfigTest Assert.assertEquals(1, config.poolCacheMaxCount()); Assert.assertEquals(256, config.getNumThreads()); Assert.assertEquals(64, config.getNumMergeBuffers()); - Assert.assertEquals(1, config.columnCacheSizeBytes()); Assert.assertFalse(config.isFifo()); Assert.assertEquals("/test/path", config.getTmpDir()); Assert.assertEquals(0, config.getNumInitalBuffersForIntermediatePool()); diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java b/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java index 564ff92198e..c60520fbfa4 100644 --- a/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java +++ b/processing/src/test/java/org/apache/druid/query/aggregation/AggregationTestHelper.java @@ -168,11 +168,6 @@ public class AggregationTestHelper implements Closeable mapper, new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } } ); @@ -208,11 +203,6 @@ public class AggregationTestHelper implements Closeable mapper, new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } } ); @@ -260,11 +250,6 @@ public class AggregationTestHelper implements Closeable mapper, new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } } ); @@ -304,11 +289,6 @@ public class AggregationTestHelper implements Closeable mapper, new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } } ); diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByLimitPushDownInsufficientBufferTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByLimitPushDownInsufficientBufferTest.java index 3378a494b29..941505cf499 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByLimitPushDownInsufficientBufferTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByLimitPushDownInsufficientBufferTest.java @@ -119,11 +119,6 @@ public class GroupByLimitPushDownInsufficientBufferTest extends InitializedNullH JSON_MAPPER, new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } } ); INDEX_MERGER_V9 = new IndexMergerV9(JSON_MAPPER, INDEX_IO, OffHeapMemorySegmentWriteOutMediumFactory.instance()); diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByLimitPushDownMultiNodeMergeTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByLimitPushDownMultiNodeMergeTest.java index 831fe8e391e..2b50fcb4a5e 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByLimitPushDownMultiNodeMergeTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByLimitPushDownMultiNodeMergeTest.java @@ -128,11 +128,6 @@ public class GroupByLimitPushDownMultiNodeMergeTest JSON_MAPPER, new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } } ); INDEX_MERGER_V9 = new IndexMergerV9(JSON_MAPPER, INDEX_IO, OffHeapMemorySegmentWriteOutMediumFactory.instance()); diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByMultiSegmentTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByMultiSegmentTest.java index 2098a2fffe9..27c60c5d0fe 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByMultiSegmentTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByMultiSegmentTest.java @@ -113,11 +113,6 @@ public class GroupByMultiSegmentTest JSON_MAPPER, new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } } ); INDEX_MERGER_V9 = new IndexMergerV9(JSON_MAPPER, INDEX_IO, OffHeapMemorySegmentWriteOutMediumFactory.instance()); diff --git a/processing/src/test/java/org/apache/druid/query/groupby/NestedQueryPushDownTest.java b/processing/src/test/java/org/apache/druid/query/groupby/NestedQueryPushDownTest.java index f04dacff2c8..def6630144d 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/NestedQueryPushDownTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/NestedQueryPushDownTest.java @@ -74,6 +74,7 @@ import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndexSegment; import org.apache.druid.segment.Segment; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.incremental.OnheapIncrementalIndex; @@ -116,10 +117,7 @@ public class NestedQueryPushDownTest extends InitializedNullHandlingTest ExprMacroTable.nil() ) ); - INDEX_IO = new IndexIO( - JSON_MAPPER, - () -> 0 - ); + INDEX_IO = new IndexIO(JSON_MAPPER, ColumnConfig.DEFAULT); INDEX_MERGER_V9 = new IndexMergerV9(JSON_MAPPER, INDEX_IO, OffHeapMemorySegmentWriteOutMediumFactory.instance()); } diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentAnalyzerTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentAnalyzerTest.java index dc0dc72d255..4fdbe950c33 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentAnalyzerTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentAnalyzerTest.java @@ -56,7 +56,7 @@ import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.StringDictionaryEncodedColumn; +import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.ListIndexed; import org.apache.druid.segment.data.ObjectStrategy; @@ -434,7 +434,7 @@ public class SegmentAnalyzerTest extends InitializedNullHandlingTest ColumnHolder holder = EasyMock.createMock(ColumnHolder.class); EasyMock.expect(mockIndex.getColumnHolder("x")).andReturn(holder).atLeastOnce(); - StringDictionaryEncodedColumn dictionaryEncodedColumn = EasyMock.createMock(StringDictionaryEncodedColumn.class); + StringUtf8DictionaryEncodedColumn dictionaryEncodedColumn = EasyMock.createMock(StringUtf8DictionaryEncodedColumn.class); EasyMock.expect(holder.getColumn()).andReturn(dictionaryEncodedColumn).atLeastOnce(); dictionaryEncodedColumn.close(); diff --git a/processing/src/test/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelectorTest.java b/processing/src/test/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelectorTest.java index f875be0ce33..9d71c0c1e37 100644 --- a/processing/src/test/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelectorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/ColumnSelectorColumnIndexSelectorTest.java @@ -28,7 +28,7 @@ import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex; -import org.apache.druid.segment.column.StringDictionaryEncodedColumn; +import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.serde.NoIndexesColumnIndexSupplier; import org.easymock.EasyMock; @@ -62,7 +62,7 @@ public class ColumnSelectorColumnIndexSelectorTest ColumnHolder holder = EasyMock.createMock(ColumnHolder.class); EasyMock.expect(index.getColumnHolder(STRING_DICTIONARY_COLUMN_NAME)).andReturn(holder).anyTimes(); - StringDictionaryEncodedColumn stringColumn = EasyMock.createMock(StringDictionaryEncodedColumn.class); + StringUtf8DictionaryEncodedColumn stringColumn = EasyMock.createMock(StringUtf8DictionaryEncodedColumn.class); EasyMock.expect(holder.getCapabilities()).andReturn( ColumnCapabilitiesImpl.createDefault() .setType(ColumnType.STRING) diff --git a/processing/src/test/java/org/apache/druid/segment/CustomSegmentizerFactoryTest.java b/processing/src/test/java/org/apache/druid/segment/CustomSegmentizerFactoryTest.java index 7715a7eeef6..227636b3eef 100644 --- a/processing/src/test/java/org/apache/druid/segment/CustomSegmentizerFactoryTest.java +++ b/processing/src/test/java/org/apache/druid/segment/CustomSegmentizerFactoryTest.java @@ -27,6 +27,7 @@ import org.apache.druid.jackson.SegmentizerModule; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.loading.MMappedQueryableSegmentizerFactory; import org.apache.druid.segment.loading.SegmentLoadingException; @@ -58,7 +59,7 @@ public class CustomSegmentizerFactoryTest extends InitializedNullHandlingTest final ObjectMapper mapper = new DefaultObjectMapper(); mapper.registerModule(new SegmentizerModule()); mapper.registerSubtypes(new NamedType(CustomSegmentizerFactory.class, "customSegmentFactory")); - final IndexIO indexIO = new IndexIO(mapper, () -> 0); + final IndexIO indexIO = new IndexIO(mapper, ColumnConfig.DEFAULT); mapper.setInjectableValues( new InjectableValues.Std() diff --git a/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java b/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java index 8632a6ee01f..a04583a3b32 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexBuilder.java @@ -98,7 +98,7 @@ public class IndexBuilder public static IndexBuilder create() { - return new IndexBuilder(TestHelper.JSON_MAPPER, TestHelper.NO_CACHE_ALWAYS_USE_INDEXES_COLUMN_CONFIG); + return new IndexBuilder(TestHelper.JSON_MAPPER, ColumnConfig.ALWAYS_USE_INDEXES); } public static IndexBuilder create(ColumnConfig columnConfig) @@ -108,7 +108,7 @@ public class IndexBuilder public static IndexBuilder create(ObjectMapper jsonMapper) { - return new IndexBuilder(jsonMapper, TestHelper.NO_CACHE_ALWAYS_USE_INDEXES_COLUMN_CONFIG); + return new IndexBuilder(jsonMapper, ColumnConfig.ALWAYS_USE_INDEXES); } public static IndexBuilder create(ObjectMapper jsonMapper, ColumnConfig columnConfig) diff --git a/processing/src/test/java/org/apache/druid/segment/IndexIONullColumnsCompatibilityTest.java b/processing/src/test/java/org/apache/druid/segment/IndexIONullColumnsCompatibilityTest.java index 90d235c4a03..a8d114af9d1 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexIONullColumnsCompatibilityTest.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexIONullColumnsCompatibilityTest.java @@ -118,7 +118,7 @@ public class IndexIONullColumnsCompatibilityTest extends InitializedNullHandling @Test public void testV9LoaderThatIgnoresmptyColumns() throws IOException { - QueryableIndex queryableIndex = new V9IndexLoaderExceptEmptyColumns(TestHelper.NO_CACHE_ALWAYS_USE_INDEXES_COLUMN_CONFIG).load( + QueryableIndex queryableIndex = new V9IndexLoaderExceptEmptyColumns(ColumnConfig.ALWAYS_USE_INDEXES).load( segmentDir, TestHelper.makeJsonMapper(), false, diff --git a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java index 5a80e18a69e..30c41bea70f 100644 --- a/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java +++ b/processing/src/test/java/org/apache/druid/segment/IndexMergerTestBase.java @@ -48,7 +48,7 @@ import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.DictionaryEncodedColumn; -import org.apache.druid.segment.column.StringDictionaryEncodedColumn; +import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.BitmapValues; @@ -548,12 +548,12 @@ public class IndexMergerTestBase extends InitializedNullHandlingTest DictionaryEncodedColumn encodedColumn = (DictionaryEncodedColumn) index.getColumnHolder("dim2").getColumn(); Object obj; if (encodedColumn.hasMultipleValues()) { - Field field = StringDictionaryEncodedColumn.class.getDeclaredField("multiValueColumn"); + Field field = StringUtf8DictionaryEncodedColumn.class.getDeclaredField("multiValueColumn"); field.setAccessible(true); obj = field.get(encodedColumn); } else { - Field field = StringDictionaryEncodedColumn.class.getDeclaredField("column"); + Field field = StringUtf8DictionaryEncodedColumn.class.getDeclaredField("column"); field.setAccessible(true); obj = field.get(encodedColumn); diff --git a/processing/src/test/java/org/apache/druid/segment/TestHelper.java b/processing/src/test/java/org/apache/druid/segment/TestHelper.java index b68aafa7887..6aee1c433d9 100644 --- a/processing/src/test/java/org/apache/druid/segment/TestHelper.java +++ b/processing/src/test/java/org/apache/druid/segment/TestHelper.java @@ -62,26 +62,6 @@ import java.util.stream.IntStream; public class TestHelper { public static final ObjectMapper JSON_MAPPER = makeJsonMapper(); - public static final ColumnConfig NO_CACHE_ALWAYS_USE_INDEXES_COLUMN_CONFIG = new ColumnConfig() - { - @Override - public int columnCacheSizeBytes() - { - return 0; - } - - @Override - public double skipValueRangeIndexScale() - { - return 1.0; - } - - @Override - public double skipValuePredicateIndexScale() - { - return 1.0; - } - }; public static IndexMergerV9 getTestIndexMergerV9(SegmentWriteOutMediumFactory segmentWriteOutMediumFactory) { @@ -90,7 +70,7 @@ public class TestHelper public static IndexIO getTestIndexIO() { - return getTestIndexIO(NO_CACHE_ALWAYS_USE_INDEXES_COLUMN_CONFIG); + return getTestIndexIO(ColumnConfig.ALWAYS_USE_INDEXES); } public static IndexIO getTestIndexIO(ColumnConfig columnConfig) diff --git a/processing/src/test/java/org/apache/druid/segment/V9IndexLoaderTest.java b/processing/src/test/java/org/apache/druid/segment/V9IndexLoaderTest.java index a9435f9a2f4..980b23e140c 100644 --- a/processing/src/test/java/org/apache/druid/segment/V9IndexLoaderTest.java +++ b/processing/src/test/java/org/apache/druid/segment/V9IndexLoaderTest.java @@ -25,6 +25,7 @@ import org.apache.druid.jackson.DefaultObjectMapper; import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.segment.IndexIO.V9IndexLoader; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; @@ -119,7 +120,7 @@ public class V9IndexLoaderTest extends InitializedNullHandlingTest { private CannotDeserializeCountColumnV9IndexLoader() { - super(() -> 0); + super(ColumnConfig.DEFAULT); } @Override diff --git a/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java index f9fd5171a41..692a2d499f4 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/ExtractionDimFilterTest.java @@ -42,7 +42,7 @@ import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.ConciseBitmapSerdeFactory; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; -import org.apache.druid.segment.serde.DictionaryEncodedStringIndexSupplier; +import org.apache.druid.segment.serde.StringUtf8ColumnIndexSupplier; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Test; @@ -116,13 +116,12 @@ public class ExtractionDimFilterTest extends InitializedNullHandlingTest public ColumnIndexSupplier getIndexSupplier(String column) { if ("foo".equals(column)) { - return new DictionaryEncodedStringIndexSupplier( + return new StringUtf8ColumnIndexSupplier<>( factory, - GenericIndexed.fromIterable(Collections.singletonList("foo1"), GenericIndexed.STRING_STRATEGY), GenericIndexed.fromIterable( Collections.singletonList(ByteBuffer.wrap(StringUtils.toUtf8("foo1"))), GenericIndexed.UTF8_STRATEGY - ), + )::singleThreaded, GenericIndexed.fromIterable(Collections.singletonList(foo1BitMap), serdeFactory.getObjectStrategy()), null ); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/PredicateValueMatcherFactoryTest.java b/processing/src/test/java/org/apache/druid/segment/filter/PredicateValueMatcherFactoryTest.java index f7525e3f9c4..3a7418421f5 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/PredicateValueMatcherFactoryTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/PredicateValueMatcherFactoryTest.java @@ -32,7 +32,7 @@ import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.VSizeColumnarInts; import org.apache.druid.segment.data.VSizeColumnarMultiInts; import org.apache.druid.segment.selector.TestColumnValueSelector; -import org.apache.druid.segment.serde.DictionaryEncodedColumnSupplier; +import org.apache.druid.segment.serde.StringUtf8DictionaryEncodedColumnSupplier; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Assert; import org.junit.Test; @@ -67,8 +67,7 @@ public class PredicateValueMatcherFactoryTest extends InitializedNullHandlingTes public void testDimensionProcessorMultiValuedDimensionMatchingValue() { // Emulate multi-valued dimension - final DictionaryEncodedColumnSupplier columnSupplier = new DictionaryEncodedColumnSupplier( - GenericIndexed.fromIterable(ImmutableList.of("v1", "v2", "v3"), GenericIndexed.STRING_STRATEGY), + final StringUtf8DictionaryEncodedColumnSupplier columnSupplier = new StringUtf8DictionaryEncodedColumnSupplier<>( GenericIndexed.fromIterable( ImmutableList.of( ByteBuffer.wrap(StringUtils.toUtf8("v1")), @@ -76,10 +75,9 @@ public class PredicateValueMatcherFactoryTest extends InitializedNullHandlingTes ByteBuffer.wrap(StringUtils.toUtf8("v3")) ), GenericIndexed.UTF8_STRATEGY - ), + )::singleThreaded, null, - () -> VSizeColumnarMultiInts.fromIterable(ImmutableList.of(VSizeColumnarInts.fromArray(new int[]{1}))), - 0 + () -> VSizeColumnarMultiInts.fromIterable(ImmutableList.of(VSizeColumnarInts.fromArray(new int[]{1}))) ); final ValueMatcher matcher = forSelector("v2") .makeDimensionProcessor(columnSupplier.get().makeDimensionSelector(new SimpleAscendingOffset(1), null), true); @@ -90,8 +88,7 @@ public class PredicateValueMatcherFactoryTest extends InitializedNullHandlingTes public void testDimensionProcessorMultiValuedDimensionNotMatchingValue() { // Emulate multi-valued dimension - final DictionaryEncodedColumnSupplier columnSupplier = new DictionaryEncodedColumnSupplier( - GenericIndexed.fromIterable(ImmutableList.of("v1", "v2", "v3"), GenericIndexed.STRING_STRATEGY), + final StringUtf8DictionaryEncodedColumnSupplier columnSupplier = new StringUtf8DictionaryEncodedColumnSupplier( GenericIndexed.fromIterable( ImmutableList.of( ByteBuffer.wrap(StringUtils.toUtf8("v1")), @@ -99,10 +96,9 @@ public class PredicateValueMatcherFactoryTest extends InitializedNullHandlingTes ByteBuffer.wrap(StringUtils.toUtf8("v3")) ), GenericIndexed.UTF8_STRATEGY - ), + )::singleThreaded, null, - () -> VSizeColumnarMultiInts.fromIterable(ImmutableList.of(VSizeColumnarInts.fromArray(new int[]{1}))), - 0 + () -> VSizeColumnarMultiInts.fromIterable(ImmutableList.of(VSizeColumnarInts.fromArray(new int[]{1}))) ); final ValueMatcher matcher = forSelector("v3") .makeDimensionProcessor(columnSupplier.get().makeDimensionSelector(new SimpleAscendingOffset(1), null), true); diff --git a/processing/src/test/java/org/apache/druid/segment/filter/ValueMatchersTest.java b/processing/src/test/java/org/apache/druid/segment/filter/ValueMatchersTest.java index 98631d3dc9c..0f450a60117 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/ValueMatchersTest.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/ValueMatchersTest.java @@ -26,7 +26,7 @@ import org.apache.druid.segment.SimpleAscendingOffset; import org.apache.druid.segment.data.GenericIndexed; import org.apache.druid.segment.data.VSizeColumnarInts; import org.apache.druid.segment.data.VSizeColumnarMultiInts; -import org.apache.druid.segment.serde.DictionaryEncodedColumnSupplier; +import org.apache.druid.segment.serde.StringUtf8DictionaryEncodedColumnSupplier; import org.apache.druid.segment.vector.NilVectorSelector; import org.apache.druid.segment.vector.NoFilterVectorOffset; import org.apache.druid.testing.InitializedNullHandlingTest; @@ -38,50 +38,44 @@ import java.nio.ByteBuffer; public class ValueMatchersTest extends InitializedNullHandlingTest { - private DictionaryEncodedColumnSupplier supplierSingleConstant; - private DictionaryEncodedColumnSupplier supplierSingle; - private DictionaryEncodedColumnSupplier supplierMulti; + private StringUtf8DictionaryEncodedColumnSupplier supplierSingleConstant; + private StringUtf8DictionaryEncodedColumnSupplier supplierSingle; + private StringUtf8DictionaryEncodedColumnSupplier supplierMulti; @Before public void setup() { - supplierSingleConstant = new DictionaryEncodedColumnSupplier( - GenericIndexed.fromIterable(ImmutableList.of("value"), GenericIndexed.STRING_STRATEGY), + supplierSingleConstant = new StringUtf8DictionaryEncodedColumnSupplier<>( GenericIndexed.fromIterable( ImmutableList.of(ByteBuffer.wrap(StringUtils.toUtf8("value"))), GenericIndexed.UTF8_STRATEGY - ), + )::singleThreaded, () -> VSizeColumnarInts.fromArray(new int[]{0}), - null, - 0 + null ); - supplierSingle = new DictionaryEncodedColumnSupplier( - GenericIndexed.fromIterable(ImmutableList.of("value", "value2"), GenericIndexed.STRING_STRATEGY), + supplierSingle = new StringUtf8DictionaryEncodedColumnSupplier<>( GenericIndexed.fromIterable( ImmutableList.of( ByteBuffer.wrap(StringUtils.toUtf8("value")), ByteBuffer.wrap(StringUtils.toUtf8("value2")) ), GenericIndexed.UTF8_STRATEGY - ), + )::singleThreaded, () -> VSizeColumnarInts.fromArray(new int[]{0, 0, 1, 0, 1}), - null, - 0 + null ); - supplierMulti = new DictionaryEncodedColumnSupplier( - GenericIndexed.fromIterable(ImmutableList.of("value"), GenericIndexed.STRING_STRATEGY), + supplierMulti = new StringUtf8DictionaryEncodedColumnSupplier<>( GenericIndexed.fromIterable( ImmutableList.of(ByteBuffer.wrap(StringUtils.toUtf8("value"))), GenericIndexed.UTF8_STRATEGY - ), + )::singleThreaded, null, () -> VSizeColumnarMultiInts.fromIterable( ImmutableList.of( VSizeColumnarInts.fromArray(new int[]{0, 0}), VSizeColumnarInts.fromArray(new int[]{0}) ) - ), - 0 + ) ); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java b/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java index e579f0f0792..a5202260efe 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java +++ b/processing/src/test/java/org/apache/druid/segment/join/JoinTestHelper.java @@ -188,7 +188,7 @@ public class JoinTestHelper public static IndexBuilder createFactIndexBuilder(final File tmpDir) throws IOException { - return createFactIndexBuilder(TestHelper.NO_CACHE_ALWAYS_USE_INDEXES_COLUMN_CONFIG, tmpDir, -1); + return createFactIndexBuilder(ColumnConfig.ALWAYS_USE_INDEXES, tmpDir, -1); } public static IndexBuilder createFactIndexBuilder( diff --git a/processing/src/test/java/org/apache/druid/segment/join/table/BroadcastSegmentIndexedTableTest.java b/processing/src/test/java/org/apache/druid/segment/join/table/BroadcastSegmentIndexedTableTest.java index 8b11984354a..22ff1f3c5c2 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/table/BroadcastSegmentIndexedTableTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/table/BroadcastSegmentIndexedTableTest.java @@ -47,6 +47,7 @@ import org.apache.druid.segment.SegmentLazyLoadFailCallback; import org.apache.druid.segment.SimpleAscendingOffset; import org.apache.druid.segment.TestIndex; import org.apache.druid.segment.column.BaseColumn; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.loading.MMappedQueryableSegmentizerFactory; @@ -102,7 +103,7 @@ public class BroadcastSegmentIndexedTableTest extends InitializedNullHandlingTes { final ObjectMapper mapper = new DefaultObjectMapper(); mapper.registerModule(new SegmentizerModule()); - final IndexIO indexIO = new IndexIO(mapper, () -> 0); + final IndexIO indexIO = new IndexIO(mapper, ColumnConfig.DEFAULT); mapper.setInjectableValues( new InjectableValues.Std() .addValue(ExprMacroTable.class.getName(), TestExprMacroTable.INSTANCE) diff --git a/processing/src/test/java/org/apache/druid/segment/loading/BroadcastJoinableMMappedQueryableSegmentizerFactoryTest.java b/processing/src/test/java/org/apache/druid/segment/loading/BroadcastJoinableMMappedQueryableSegmentizerFactoryTest.java index 3f4f5ae9db7..9ce676f5ba6 100644 --- a/processing/src/test/java/org/apache/druid/segment/loading/BroadcastJoinableMMappedQueryableSegmentizerFactoryTest.java +++ b/processing/src/test/java/org/apache/druid/segment/loading/BroadcastJoinableMMappedQueryableSegmentizerFactoryTest.java @@ -36,6 +36,7 @@ import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.Segment; import org.apache.druid.segment.SegmentLazyLoadFailCallback; import org.apache.druid.segment.TestIndex; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.join.table.BroadcastSegmentIndexedTable; import org.apache.druid.segment.join.table.IndexedTable; @@ -67,7 +68,7 @@ public class BroadcastJoinableMMappedQueryableSegmentizerFactoryTest extends Ini { final ObjectMapper mapper = new DefaultObjectMapper(); mapper.registerModule(new SegmentizerModule()); - final IndexIO indexIO = new IndexIO(mapper, () -> 0); + final IndexIO indexIO = new IndexIO(mapper, ColumnConfig.DEFAULT); mapper.setInjectableValues( new InjectableValues.Std() .addValue(ExprMacroTable.class.getName(), TestExprMacroTable.INSTANCE) diff --git a/processing/src/test/java/org/apache/druid/segment/loading/SegmentizerFactoryTest.java b/processing/src/test/java/org/apache/druid/segment/loading/SegmentizerFactoryTest.java index 9873ebb188d..584722bdcca 100644 --- a/processing/src/test/java/org/apache/druid/segment/loading/SegmentizerFactoryTest.java +++ b/processing/src/test/java/org/apache/druid/segment/loading/SegmentizerFactoryTest.java @@ -46,11 +46,6 @@ public class SegmentizerFactoryTest mapper, new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 777; - } } ); mapper.setInjectableValues( diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java index 1182569c198..d6131b2d820 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java @@ -100,11 +100,6 @@ public class NestedDataColumnSupplierTest extends InitializedNullHandlingTest private static final ColumnConfig ALWAYS_USE_INDEXES = new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } @Override public double skipValueRangeIndexScale() diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java index fbe51d298dc..9503e55cc57 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplierTest.java @@ -66,12 +66,6 @@ public class NestedFieldColumnIndexSupplierTest extends InitializedNullHandlingT private static final int ROW_COUNT = 10; static final ColumnConfig ALWAYS_USE_INDEXES = new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } - @Override public double skipValueRangeIndexScale() { @@ -1470,12 +1464,6 @@ public class NestedFieldColumnIndexSupplierTest extends InitializedNullHandlingT { ColumnConfig twentyPercent = new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } - @Override public double skipValueRangeIndexScale() { diff --git a/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java index 87abedee1d7..7537dc53c92 100644 --- a/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/serde/DictionaryEncodedStringIndexSupplierTest.java @@ -52,7 +52,7 @@ public class DictionaryEncodedStringIndexSupplierTest extends InitializedNullHan @Test public void testStringColumnWithNullValueSetIndex() throws IOException { - DictionaryEncodedStringIndexSupplier indexSupplier = makeStringWithNullsSupplier(); + StringUtf8ColumnIndexSupplier indexSupplier = makeStringWithNullsSupplier(); StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class); Assert.assertNotNull(valueSetIndex); @@ -102,7 +102,7 @@ public class DictionaryEncodedStringIndexSupplierTest extends InitializedNullHan checkBitmap(bitmap); } - private DictionaryEncodedStringIndexSupplier makeStringWithNullsSupplier() throws IOException + private StringUtf8ColumnIndexSupplier makeStringWithNullsSupplier() throws IOException { ByteBuffer stringBuffer = ByteBuffer.allocate(1 << 12); ByteBuffer byteBuffer = ByteBuffer.allocate(1 << 12); @@ -164,10 +164,9 @@ public class DictionaryEncodedStringIndexSupplierTest extends InitializedNullHan writeToBuffer(bitmapsBuffer, bitmapWriter); GenericIndexed bitmaps = GenericIndexed.read(bitmapsBuffer, roaringFactory.getObjectStrategy()); - return new DictionaryEncodedStringIndexSupplier( + return new StringUtf8ColumnIndexSupplier<>( roaringFactory.getBitmapFactory(), - GenericIndexed.read(stringBuffer, GenericIndexed.STRING_STRATEGY), - GenericIndexed.read(byteBuffer, GenericIndexed.UTF8_STRATEGY), + GenericIndexed.read(byteBuffer, GenericIndexed.UTF8_STRATEGY)::singleThreaded, bitmaps, null ); diff --git a/processing/src/test/java/org/apache/druid/segment/serde/NullColumnPartSerdeTest.java b/processing/src/test/java/org/apache/druid/segment/serde/NullColumnPartSerdeTest.java index df7a1029844..2c3455478d0 100644 --- a/processing/src/test/java/org/apache/druid/segment/serde/NullColumnPartSerdeTest.java +++ b/processing/src/test/java/org/apache/druid/segment/serde/NullColumnPartSerdeTest.java @@ -49,9 +49,7 @@ import java.nio.ByteBuffer; public class NullColumnPartSerdeTest extends InitializedNullHandlingTest { - private static final String COLUMN_NAME = "missing"; private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0); - private static final ColumnConfig A_CONFIG = () -> 0; @Test public void testSerde() throws JsonProcessingException @@ -68,7 +66,7 @@ public class NullColumnPartSerdeTest extends InitializedNullHandlingTest { final NullColumnPartSerde partSerde = new NullColumnPartSerde(10, RoaringBitmapSerdeFactory.getInstance()); final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.DOUBLE); - partSerde.getDeserializer().read(EMPTY_BUFFER, builder, A_CONFIG); + partSerde.getDeserializer().read(EMPTY_BUFFER, builder, ColumnConfig.DEFAULT); final ColumnCapabilities columnCapabilities = builder.build().getCapabilities(); Assert.assertTrue(Types.is(columnCapabilities, ValueType.DOUBLE)); Assert.assertTrue(columnCapabilities.hasNulls().isTrue()); @@ -85,7 +83,7 @@ public class NullColumnPartSerdeTest extends InitializedNullHandlingTest { final NullColumnPartSerde partSerde = new NullColumnPartSerde(10, RoaringBitmapSerdeFactory.getInstance()); final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.STRING); - partSerde.getDeserializer().read(EMPTY_BUFFER, builder, A_CONFIG); + partSerde.getDeserializer().read(EMPTY_BUFFER, builder, ColumnConfig.DEFAULT); ColumnHolder holder = builder.build(); BaseColumn theColumn = holder.getColumn(); @@ -107,7 +105,7 @@ public class NullColumnPartSerdeTest extends InitializedNullHandlingTest { final NullColumnPartSerde partSerde = new NullColumnPartSerde(10, RoaringBitmapSerdeFactory.getInstance()); final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.STRING); - partSerde.getDeserializer().read(EMPTY_BUFFER, builder, A_CONFIG); + partSerde.getDeserializer().read(EMPTY_BUFFER, builder, ColumnConfig.DEFAULT); ColumnHolder holder = builder.build(); BaseColumn theColumn = holder.getColumn(); @@ -135,7 +133,7 @@ public class NullColumnPartSerdeTest extends InitializedNullHandlingTest { final NullColumnPartSerde partSerde = new NullColumnPartSerde(10, RoaringBitmapSerdeFactory.getInstance()); final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.STRING); - partSerde.getDeserializer().read(EMPTY_BUFFER, builder, A_CONFIG); + partSerde.getDeserializer().read(EMPTY_BUFFER, builder, ColumnConfig.DEFAULT); ColumnHolder holder = builder.build(); BaseColumn theColumn = holder.getColumn(); @@ -154,7 +152,7 @@ public class NullColumnPartSerdeTest extends InitializedNullHandlingTest { final NullColumnPartSerde partSerde = new NullColumnPartSerde(10, RoaringBitmapSerdeFactory.getInstance()); final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.DOUBLE); - partSerde.getDeserializer().read(EMPTY_BUFFER, builder, A_CONFIG); + partSerde.getDeserializer().read(EMPTY_BUFFER, builder, ColumnConfig.DEFAULT); ColumnHolder holder = builder.build(); BaseColumn theColumn = holder.getColumn(); @@ -175,7 +173,7 @@ public class NullColumnPartSerdeTest extends InitializedNullHandlingTest { final NullColumnPartSerde partSerde = new NullColumnPartSerde(10, RoaringBitmapSerdeFactory.getInstance()); final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.DOUBLE); - partSerde.getDeserializer().read(EMPTY_BUFFER, builder, A_CONFIG); + partSerde.getDeserializer().read(EMPTY_BUFFER, builder, ColumnConfig.DEFAULT); ColumnHolder holder = builder.build(); BaseColumn theColumn = holder.getColumn(); @@ -199,7 +197,7 @@ public class NullColumnPartSerdeTest extends InitializedNullHandlingTest { final NullColumnPartSerde partSerde = new NullColumnPartSerde(10, RoaringBitmapSerdeFactory.getInstance()); final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.DOUBLE); - partSerde.getDeserializer().read(EMPTY_BUFFER, builder, A_CONFIG); + partSerde.getDeserializer().read(EMPTY_BUFFER, builder, ColumnConfig.DEFAULT); ColumnHolder holder = builder.build(); Assert.assertNull(holder.getIndexSupplier()); } diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/DummyStringVirtualColumn.java b/processing/src/test/java/org/apache/druid/segment/virtual/DummyStringVirtualColumn.java index 8253b27bb59..cacec12e666 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/DummyStringVirtualColumn.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/DummyStringVirtualColumn.java @@ -38,7 +38,7 @@ import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.StringDictionaryEncodedColumn; +import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.data.ReadableOffset; @@ -95,7 +95,7 @@ public class DummyStringVirtualColumn implements VirtualColumn return DimensionSelector.constant(null); } - StringDictionaryEncodedColumn stringCol = toStringDictionaryEncodedColumn(holder.getColumn()); + StringUtf8DictionaryEncodedColumn stringCol = toStringDictionaryEncodedColumn(holder.getColumn()); DimensionSelector baseDimensionSelector = stringCol.makeDimensionSelector( offset, @@ -145,7 +145,7 @@ public class DummyStringVirtualColumn implements VirtualColumn return NilColumnValueSelector.instance(); } - StringDictionaryEncodedColumn stringCol = toStringDictionaryEncodedColumn(holder.getColumn()); + StringUtf8DictionaryEncodedColumn stringCol = toStringDictionaryEncodedColumn(holder.getColumn()); return stringCol.makeColumnValueSelector(offset); } else { return null; @@ -222,13 +222,13 @@ public class DummyStringVirtualColumn implements VirtualColumn return new byte[0]; } - private StringDictionaryEncodedColumn toStringDictionaryEncodedColumn(BaseColumn column) + private StringUtf8DictionaryEncodedColumn toStringDictionaryEncodedColumn(BaseColumn column) { - if (!(column instanceof StringDictionaryEncodedColumn)) { + if (!(column instanceof StringUtf8DictionaryEncodedColumn)) { throw new IAE("I can only work with StringDictionaryEncodedColumn"); } - return (StringDictionaryEncodedColumn) column; + return (StringUtf8DictionaryEncodedColumn) column; } private DimensionSelector disableValueMatchers(DimensionSelector base) diff --git a/server/src/test/java/org/apache/druid/guice/LocalDataStorageDruidModuleTest.java b/server/src/test/java/org/apache/druid/guice/LocalDataStorageDruidModuleTest.java index 2d1f6a477f5..8a7c1deb9ad 100644 --- a/server/src/test/java/org/apache/druid/guice/LocalDataStorageDruidModuleTest.java +++ b/server/src/test/java/org/apache/druid/guice/LocalDataStorageDruidModuleTest.java @@ -54,7 +54,7 @@ public class LocalDataStorageDruidModuleTest new LocalDataStorageDruidModule(), (Module) binder -> { binder.bind(new TypeLiteral>(){}).toInstance(ImmutableList.of()); - binder.bind(ColumnConfig.class).toInstance(() -> 0); + binder.bind(ColumnConfig.class).toInstance(ColumnConfig.DEFAULT); binder.bind(StorageLocationSelectorStrategy.class) .toInstance(new RandomStorageLocationSelectorStrategy(ImmutableList.of())); } diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/ClosedSegmensSinksBatchAppenderatorTester.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/ClosedSegmensSinksBatchAppenderatorTester.java index 858b989acf4..c1d2bf7a3a5 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/ClosedSegmensSinksBatchAppenderatorTester.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/ClosedSegmensSinksBatchAppenderatorTester.java @@ -38,6 +38,7 @@ import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexMerger; import org.apache.druid.segment.IndexMergerV9; import org.apache.druid.segment.IndexSpec; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.incremental.AppendableIndexSpec; import org.apache.druid.segment.incremental.ParseExceptionHandler; import org.apache.druid.segment.incremental.RowIngestionMeters; @@ -183,10 +184,7 @@ public class ClosedSegmensSinksBatchAppenderatorTester implements AutoCloseable ); metrics = new FireDepartmentMetrics(); - IndexIO indexIO = new IndexIO( - objectMapper, - () -> 0 - ); + IndexIO indexIO = new IndexIO(objectMapper, ColumnConfig.DEFAULT); IndexMergerV9 indexMerger = new IndexMergerV9( objectMapper, indexIO, diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/DefaultOfflineAppenderatorFactoryTest.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/DefaultOfflineAppenderatorFactoryTest.java index cf8f2eea0d3..c6c2069b681 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/DefaultOfflineAppenderatorFactoryTest.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/DefaultOfflineAppenderatorFactoryTest.java @@ -91,11 +91,6 @@ public class DefaultOfflineAppenderatorFactoryTest return 1; } - @Override - public int columnCacheSizeBytes() - { - return 25 * 1024 * 1024; - } } ); binder.bind(ColumnConfig.class).to(DruidProcessingConfig.class); diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/OpenAndClosedSegmentsAppenderatorTester.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/OpenAndClosedSegmentsAppenderatorTester.java index e2357d95c9b..552dc893baf 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/OpenAndClosedSegmentsAppenderatorTester.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/OpenAndClosedSegmentsAppenderatorTester.java @@ -159,11 +159,6 @@ public class OpenAndClosedSegmentsAppenderatorTester implements AutoCloseable objectMapper, new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } } ); indexMerger = new IndexMergerV9(objectMapper, indexIO, OffHeapMemorySegmentWriteOutMediumFactory.instance()); diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorTester.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorTester.java index 413f315f50a..217c90116c3 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorTester.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorTester.java @@ -157,11 +157,6 @@ public class StreamAppenderatorTester implements AutoCloseable objectMapper, new ColumnConfig() { - @Override - public int columnCacheSizeBytes() - { - return 0; - } } ); diff --git a/server/src/test/java/org/apache/druid/server/SegmentManagerBroadcastJoinIndexedTableTest.java b/server/src/test/java/org/apache/druid/server/SegmentManagerBroadcastJoinIndexedTableTest.java index 3d54b279df1..97c000ea7b2 100644 --- a/server/src/test/java/org/apache/druid/server/SegmentManagerBroadcastJoinIndexedTableTest.java +++ b/server/src/test/java/org/apache/druid/server/SegmentManagerBroadcastJoinIndexedTableTest.java @@ -42,6 +42,7 @@ import org.apache.druid.segment.IndexMergerV9; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.SegmentLazyLoadFailCallback; import org.apache.druid.segment.TestIndex; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.join.BroadcastTableJoinableFactory; import org.apache.druid.segment.join.JoinConditionAnalysis; @@ -116,7 +117,7 @@ public class SegmentManagerBroadcastJoinIndexedTableTest extends InitializedNull new SimpleModule().registerSubtypes(new NamedType(LocalLoadSpec.class, "local")) ); - indexIO = new IndexIO(objectMapper, () -> 0); + indexIO = new IndexIO(objectMapper, ColumnConfig.DEFAULT); objectMapper.setInjectableValues( new InjectableValues.Std().addValue(LocalDataSegmentPuller.class, segmentPuller) .addValue(ExprMacroTable.class.getName(), TestExprMacroTable.INSTANCE) diff --git a/server/src/test/java/org/apache/druid/server/SegmentManagerThreadSafetyTest.java b/server/src/test/java/org/apache/druid/server/SegmentManagerThreadSafetyTest.java index 27abe091615..6eee7aef0f8 100644 --- a/server/src/test/java/org/apache/druid/server/SegmentManagerThreadSafetyTest.java +++ b/server/src/test/java/org/apache/druid/server/SegmentManagerThreadSafetyTest.java @@ -36,6 +36,7 @@ import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.Segment; import org.apache.druid.segment.SegmentLazyLoadFailCallback; import org.apache.druid.segment.StorageAdapter; +import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.loading.DataSegmentPusher; import org.apache.druid.segment.loading.LocalDataSegmentPuller; import org.apache.druid.segment.loading.LocalLoadSpec; @@ -97,7 +98,7 @@ public class SegmentManagerThreadSafetyTest new SimpleModule().registerSubtypes(new NamedType(LocalLoadSpec.class, "local"), new NamedType(TestSegmentizerFactory.class, "test")) ) .setInjectableValues(new Std().addValue(LocalDataSegmentPuller.class, segmentPuller)); - indexIO = new IndexIO(objectMapper, () -> 0); + indexIO = new IndexIO(objectMapper, ColumnConfig.DEFAULT); segmentCacheDir = temporaryFolder.newFolder(); segmentDeepStorageDir = temporaryFolder.newFolder(); segmentCacheManager = new SegmentLocalCacheManager( diff --git a/services/src/main/java/org/apache/druid/cli/DumpSegment.java b/services/src/main/java/org/apache/druid/cli/DumpSegment.java index f78548adc4b..8c865af1664 100644 --- a/services/src/main/java/org/apache/druid/cli/DumpSegment.java +++ b/services/src/main/java/org/apache/druid/cli/DumpSegment.java @@ -761,11 +761,6 @@ public class DumpSegment extends GuiceRunnable return 1; } - @Override - public int columnCacheSizeBytes() - { - return 25 * 1024 * 1024; - } } ); binder.bind(ColumnConfig.class).to(DruidProcessingConfig.class); diff --git a/services/src/main/java/org/apache/druid/cli/ValidateSegments.java b/services/src/main/java/org/apache/druid/cli/ValidateSegments.java index de2b2bb6cb1..925d7c81a80 100644 --- a/services/src/main/java/org/apache/druid/cli/ValidateSegments.java +++ b/services/src/main/java/org/apache/druid/cli/ValidateSegments.java @@ -114,11 +114,6 @@ public class ValidateSegments extends GuiceRunnable return 1; } - @Override - public int columnCacheSizeBytes() - { - return 25 * 1024 * 1024; - } } ); binder.bind(ColumnConfig.class).to(DruidProcessingConfig.class);