From f41468fd466ab239d3373755fba28c0d4dbdb4b5 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 7 Apr 2023 03:11:15 -0700 Subject: [PATCH] fix off by one error in FrontCodedIndexedWriter and FrontCodedIntArrayIndexedWriter getCardinality method (#14047) * fix off by one error in FrontCodedIndexedWriter and FrontCodedIntArrayIndexedWriter getCardinality method --- .../segment/data/FrontCodedIndexedWriter.java | 2 +- .../data/FrontCodedIntArrayIndexedWriter.java | 2 +- .../druid/query/NestedDataTestUtils.java | 103 +++++++++++++++--- .../groupby/NestedDataGroupByQueryTest.java | 2 +- .../query/scan/NestedDataScanQueryTest.java | 13 ++- .../segment/data/FrontCodedIndexedTest.java | 1 + .../data/FrontCodedIntArrayIndexedTest.java | 1 + .../NestedFieldColumnSelectorsTest.java | 7 +- 8 files changed, 105 insertions(+), 26 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexedWriter.java b/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexedWriter.java index 34e81dfc72e..83b4850f971 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexedWriter.java +++ b/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIndexedWriter.java @@ -227,7 +227,7 @@ public class FrontCodedIndexedWriter implements DictionaryWriter @Override public int getCardinality() { - return numWritten; + return numWritten + (hasNulls ? 1 : 0); } private long getBucketOffset(int index) throws IOException diff --git a/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIntArrayIndexedWriter.java b/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIntArrayIndexedWriter.java index 05802fdad72..8116882191b 100644 --- a/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIntArrayIndexedWriter.java +++ b/processing/src/main/java/org/apache/druid/segment/data/FrontCodedIntArrayIndexedWriter.java @@ -227,7 +227,7 @@ public class FrontCodedIntArrayIndexedWriter implements DictionaryWriter @Override public int getCardinality() { - return numWritten; + return numWritten + (hasNulls ? 1 : 0); } private long getBucketOffset(int index) throws IOException diff --git a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java index 818e2ee48f0..c6b77c211cc 100644 --- a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java +++ b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java @@ -43,9 +43,11 @@ import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.segment.AutoTypeColumnSchema; import org.apache.druid.segment.IncrementalIndexSegment; import org.apache.druid.segment.IndexBuilder; +import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.QueryableIndexSegment; import org.apache.druid.segment.Segment; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.column.StringEncodingStrategy; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.transform.ExpressionTransform; import org.apache.druid.segment.transform.TransformSpec; @@ -180,7 +182,8 @@ public class NestedDataTestUtils SIMPLE_DATA_TSV_TRANSFORM, COUNT, granularity, - rollup + rollup, + new IndexSpec() ); } @@ -205,7 +208,8 @@ public class NestedDataTestUtils closer, SIMPLE_DATA_FILE, Granularities.NONE, - true + true, + new IndexSpec() ); } @@ -246,7 +250,8 @@ public class NestedDataTestUtils Closer closer, String inputFile, Granularity granularity, - boolean rollup + boolean rollup, + IndexSpec indexSpec ) throws Exception { return createSegments( @@ -259,7 +264,8 @@ public class NestedDataTestUtils TransformSpec.NONE, COUNT, granularity, - rollup + rollup, + indexSpec ); } @@ -288,14 +294,16 @@ public class NestedDataTestUtils TransformSpec.NONE, COUNT, granularity, - rollup + rollup, + new IndexSpec() ); } public static List createSegmentsForJsonInput( TemporaryFolder tempFolder, Closer closer, - String inputFile + String inputFile, + IndexSpec indexSpec ) throws Exception { @@ -304,7 +312,8 @@ public class NestedDataTestUtils closer, inputFile, Granularities.NONE, - true + true, + indexSpec ); } @@ -355,7 +364,8 @@ public class NestedDataTestUtils TransformSpec transformSpec, AggregatorFactory[] aggregators, Granularity queryGranularity, - boolean rollup + boolean rollup, + IndexSpec indexSpec ) throws Exception { return createSegments( @@ -368,7 +378,8 @@ public class NestedDataTestUtils transformSpec, aggregators, queryGranularity, - rollup + rollup, + indexSpec ); } @@ -382,7 +393,8 @@ public class NestedDataTestUtils TransformSpec transformSpec, AggregatorFactory[] aggregators, Granularity queryGranularity, - boolean rollup + boolean rollup, + IndexSpec indexSpec ) throws Exception { final List segments = Lists.newArrayListWithCapacity(inputs.size()); @@ -400,6 +412,7 @@ public class NestedDataTestUtils .withMinTimestamp(0) .build() ) + .indexSpec(indexSpec) .inputSource(inputSource) .inputFormat(inputFormat) .transform(transformSpec) @@ -464,7 +477,8 @@ public class NestedDataTestUtils NestedDataTestUtils.createSegmentsForJsonInput( tempFolder, closer, - jsonInputFile + jsonInputFile, + new IndexSpec() ) ) .add(NestedDataTestUtils.createIncrementalIndexForJsonInput(tempFolder, jsonInputFile)) @@ -514,14 +528,17 @@ public class NestedDataTestUtils NestedDataTestUtils.createSegmentsForJsonInput( tempFolder, closer, - jsonInputFile + jsonInputFile, + new IndexSpec() ) ) - .addAll(NestedDataTestUtils.createSegmentsForJsonInput( - tempFolder, - closer, - jsonInputFile - ) + .addAll( + NestedDataTestUtils.createSegmentsForJsonInput( + tempFolder, + closer, + jsonInputFile, + new IndexSpec() + ) ) .build(); } @@ -536,6 +553,58 @@ public class NestedDataTestUtils return "segments"; } }); + segmentsGenerators.add(new BiFunction>() + { + @Override + public List apply(TemporaryFolder tempFolder, Closer closer) + { + try { + return ImmutableList.builder() + .addAll( + NestedDataTestUtils.createSegmentsForJsonInput( + tempFolder, + closer, + jsonInputFile, + new IndexSpec( + null, + null, + new StringEncodingStrategy.FrontCoded(4, (byte) 0x01), + null, + null, + null, + null + ) + ) + ) + .addAll( + NestedDataTestUtils.createSegmentsForJsonInput( + tempFolder, + closer, + jsonInputFile, + new IndexSpec( + null, + null, + new StringEncodingStrategy.FrontCoded(4, (byte) 0x00), + null, + null, + null, + null + ) + ) + ) + .build(); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public String toString() + { + return "segments-frontcoded"; + } + }); return segmentsGenerators; } } diff --git a/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java b/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java index 030a40b29c3..c6f9a0b7761 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java @@ -569,7 +569,7 @@ public class NestedDataGroupByQueryTest extends InitializedNullHandlingTest return; } } - if (!"segments".equals(segmentsName)) { + if (!"segments".equals(segmentsName) && !"segments-frontcoded".equals(segmentsName)) { if (GroupByStrategySelector.STRATEGY_V1.equals(config.getDefaultStrategy())) { Throwable t = Assert.assertThrows(RuntimeException.class, runner::get); Assert.assertEquals( diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java index 6e70d4cf53c..adc379b38b1 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java @@ -39,6 +39,7 @@ import org.apache.druid.query.filter.BoundDimFilter; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.ordering.StringComparators; import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; +import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.Segment; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.transform.TransformSpec; @@ -135,7 +136,8 @@ public class NestedDataScanQueryTest extends InitializedNullHandlingTest TransformSpec.NONE, NestedDataTestUtils.COUNT, Granularities.YEAR, - true + true, + new IndexSpec() ) ).build(); @@ -308,7 +310,8 @@ public class NestedDataScanQueryTest extends InitializedNullHandlingTest closer, NestedDataTestUtils.SIMPLE_DATA_FILE, Granularities.HOUR, - true + true, + new IndexSpec() ); final Sequence seq = helper.runQueryOnSegmentsObjs(segs, scanQuery); @@ -491,7 +494,8 @@ public class NestedDataScanQueryTest extends InitializedNullHandlingTest TransformSpec.NONE, NestedDataTestUtils.COUNT, Granularities.DAY, - true + true, + new IndexSpec() ); @@ -550,7 +554,8 @@ public class NestedDataScanQueryTest extends InitializedNullHandlingTest TransformSpec.NONE, aggs, Granularities.NONE, - true + true, + new IndexSpec() ); diff --git a/processing/src/test/java/org/apache/druid/segment/data/FrontCodedIndexedTest.java b/processing/src/test/java/org/apache/druid/segment/data/FrontCodedIndexedTest.java index 98e6b4bbbf0..8b055188e63 100644 --- a/processing/src/test/java/org/apache/druid/segment/data/FrontCodedIndexedTest.java +++ b/processing/src/test/java/org/apache/druid/segment/data/FrontCodedIndexedTest.java @@ -444,6 +444,7 @@ public class FrontCodedIndexedTest extends InitializedNullHandlingTest } index++; } + Assert.assertEquals(index, writer.getCardinality()); // check 'get' again so that we aren't always reading from current page index = 0; diff --git a/processing/src/test/java/org/apache/druid/segment/data/FrontCodedIntArrayIndexedTest.java b/processing/src/test/java/org/apache/druid/segment/data/FrontCodedIntArrayIndexedTest.java index 8a08a2a972a..5466e8ff5a3 100644 --- a/processing/src/test/java/org/apache/druid/segment/data/FrontCodedIntArrayIndexedTest.java +++ b/processing/src/test/java/org/apache/druid/segment/data/FrontCodedIntArrayIndexedTest.java @@ -414,6 +414,7 @@ public class FrontCodedIntArrayIndexedTest assertSame(index, next, writer.get(index)); index++; } + Assert.assertEquals(index, writer.getCardinality()); // check 'get' again so that we aren't always reading from current page index = 0; diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnSelectorsTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnSelectorsTest.java index 486bb6df4f6..2917b7eb4f8 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnSelectorsTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedFieldColumnSelectorsTest.java @@ -36,6 +36,7 @@ import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.Cursor; import org.apache.druid.segment.DoubleColumnSelector; +import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.LongColumnSelector; import org.apache.druid.segment.Segment; import org.apache.druid.segment.StorageAdapter; @@ -336,7 +337,8 @@ public class NestedFieldColumnSelectorsTest TransformSpec.NONE, NestedDataTestUtils.COUNT, Granularities.NONE, - true + true, + new IndexSpec() ); Assert.assertEquals(1, segments.size()); StorageAdapter storageAdapter = segments.get(0).asStorageAdapter(); @@ -366,7 +368,8 @@ public class NestedFieldColumnSelectorsTest TransformSpec.NONE, NestedDataTestUtils.COUNT, Granularities.NONE, - true + true, + new IndexSpec() ); Assert.assertEquals(1, segments.size()); StorageAdapter storageAdapter = segments.get(0).asStorageAdapter();