fix nested column range index range computation (#13297)

* fix nested column range index range computation

* simplify, add missing bounds check for FixedIndexed
This commit is contained in:
Clint Wylie 2022-11-02 21:37:41 -07:00 committed by GitHub
parent e5ad24ff9f
commit 018f984781
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 160 additions and 17 deletions

View File

@ -22,6 +22,7 @@ package org.apache.druid.segment.data;
import com.google.common.base.Preconditions;
import com.google.common.base.Supplier;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.column.TypeStrategy;
@ -118,6 +119,12 @@ public class FixedIndexed<T> implements Indexed<T>
@Override
public T get(int index)
{
if (index < 0) {
throw new IAE("Index[%s] < 0", index);
}
if (index >= size) {
throw new IAE("Index[%d] >= size[%d]", index, size);
}
if (hasNull) {
if (index == 0) {
return null;

View File

@ -191,6 +191,7 @@ public class NestedFieldLiteralColumnIndexSupplier<TStringDictionary extends Ind
{
int globalStartIndex, globalEndIndex;
int localStartIndex, localEndIndex;
// start with standard range finding in global value dictionary
if (startValue == null) {
globalStartIndex = adjust == 0 ? 1 : adjust; // global index 0 is always the null value
} else {
@ -202,16 +203,6 @@ public class NestedFieldLiteralColumnIndexSupplier<TStringDictionary extends Ind
}
}
// with starting global index settled, now lets find starting local index
int localFound = localDictionary.indexOf(globalStartIndex);
if (localFound < 0) {
// the first valid global index is not within the local dictionary, so the insertion point is where we begin
localStartIndex = -(localFound + 1);
} else {
// valid global index in local dictionary, start here
localStartIndex = localFound;
}
if (endValue == null) {
globalEndIndex = globalDictionary.size() + adjust;
} else {
@ -223,16 +214,32 @@ public class NestedFieldLiteralColumnIndexSupplier<TStringDictionary extends Ind
}
}
globalEndIndex = Math.max(globalStartIndex, globalEndIndex);
// end index is not inclusive, so we find the last value in the local dictionary that falls within the range
int localEndFound = localDictionary.indexOf(globalEndIndex - 1);
if (globalStartIndex == globalEndIndex) {
return new IntIntImmutablePair(0, 0);
}
// with global dictionary id range settled, now lets map that onto a local dictionary id range
int localFound = localDictionary.indexOf(globalStartIndex);
if (localFound < 0) {
// the first valid global index is not within the local dictionary, so the insertion point is where we begin
localStartIndex = -(localFound + 1);
} else {
// valid global index in local dictionary, start here
localStartIndex = localFound;
}
// global end index is exclusive already, so we don't adjust local end index even for missing values
int localEndFound = localDictionary.indexOf(globalEndIndex);
if (localEndFound < 0) {
localEndIndex = -localEndFound;
} else {
// add 1 because the last valid global end value is in the local dictionary, and end index is exclusive
localEndIndex = localEndFound + 1;
localEndIndex = localEndFound;
}
return new IntIntImmutablePair(localStartIndex, Math.min(localDictionary.size(), localEndIndex));
localStartIndex = Math.min(localStartIndex, localDictionary.size());
localEndIndex = Math.max(localStartIndex, Math.min(localDictionary.size(), localEndIndex));
return new IntIntImmutablePair(localStartIndex, localEndIndex);
}

View File

@ -75,6 +75,9 @@ public class FixedIndexedTest extends InitializedNullHandlingTest
Assert.assertEquals(LONGS[i], fixedIndexed.get(i));
Assert.assertEquals(i, fixedIndexed.indexOf(LONGS[i]));
}
Assert.assertThrows(IllegalArgumentException.class, () -> fixedIndexed.get(-1));
Assert.assertThrows(IllegalArgumentException.class, () -> fixedIndexed.get(LONGS.length));
}
@Test

View File

@ -23,6 +23,7 @@ import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableSet;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.collections.bitmap.MutableBitmap;
import org.apache.druid.java.util.common.guava.Comparators;
import org.apache.druid.query.BitmapResultFactory;
import org.apache.druid.query.DefaultBitmapResultFactory;
import org.apache.druid.query.filter.DruidPredicateFactory;
@ -35,6 +36,7 @@ import org.apache.druid.segment.column.DruidPredicateIndex;
import org.apache.druid.segment.column.LexicographicalRangeIndex;
import org.apache.druid.segment.column.NullValueIndex;
import org.apache.druid.segment.column.NumericRangeIndex;
import org.apache.druid.segment.column.SpatialIndex;
import org.apache.druid.segment.column.StringValueSetIndex;
import org.apache.druid.segment.column.TypeStrategies;
import org.apache.druid.segment.data.BitmapSerdeFactory;
@ -139,6 +141,9 @@ public class NestedFieldLiteralColumnIndexSupplierTest extends InitializedNullHa
NullValueIndex nullIndex = indexSupplier.as(NullValueIndex.class);
Assert.assertNotNull(nullIndex);
// sanity check to make sure we don't return indexes we don't support
Assert.assertNull(indexSupplier.as(SpatialIndex.class));
// 10 rows
// local: [b, foo, fooo, z]
// column: [foo, b, fooo, b, z, fooo, z, b, b, foo]
@ -500,6 +505,9 @@ public class NestedFieldLiteralColumnIndexSupplierTest extends InitializedNullHa
StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class);
Assert.assertNotNull(valueSetIndex);
// sanity check to make sure we don't return indexes we don't support
Assert.assertNull(indexSupplier.as(SpatialIndex.class));
// 10 rows
// local: [1, 3, 100, 300]
// column: [100, 1, 300, 1, 3, 3, 100, 300, 300, 1]
@ -614,6 +622,25 @@ public class NestedFieldLiteralColumnIndexSupplierTest extends InitializedNullHa
Assert.assertEquals(0.5, columnIndex.estimateSelectivity(10), 0.0);
bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap, 1, 3, 4, 7, 9);
// set index with null
TreeSet<String> treeSet = new TreeSet<>(Comparators.naturalNullsFirst());
treeSet.add(null);
treeSet.add("1");
treeSet.add("3");
treeSet.add("300");
columnIndex = valueSetIndex.forSortedValues(treeSet);
Assert.assertNotNull(columnIndex);
Assert.assertEquals(0.8, columnIndex.estimateSelectivity(10), 0.0);
bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap, 1, 2, 3, 4, 5, 7, 8, 9);
// null value should really use NullValueIndex, but this works for classic reasons
columnIndex = valueSetIndex.forValue(null);
Assert.assertNotNull(columnIndex);
Assert.assertEquals(0.3, columnIndex.estimateSelectivity(10), 0.0);
bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap, 2, 5, 8);
}
@Test
@ -677,6 +704,9 @@ public class NestedFieldLiteralColumnIndexSupplierTest extends InitializedNullHa
StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class);
Assert.assertNotNull(valueSetIndex);
// sanity check to make sure we don't return indexes we don't support
Assert.assertNull(indexSupplier.as(SpatialIndex.class));
// 10 rows
// local: [1.1, 1.2, 3.3, 6.6]
// column: [1.1, 1.1, 1.2, 3.3, 1.2, 6.6, 3.3, 1.2, 1.1, 3.3]
@ -723,10 +753,10 @@ public class NestedFieldLiteralColumnIndexSupplierTest extends InitializedNullHa
forRange = rangeIndex.forRange(1.1, true, 3.3, true);
Assert.assertNotNull(forRange);
Assert.assertEquals(0.6, forRange.estimateSelectivity(10), 0.0);
Assert.assertEquals(0.3, forRange.estimateSelectivity(10), 0.0);
bitmap = forRange.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap, 2, 3, 4, 6, 7, 9);
checkBitmap(bitmap, 2, 4, 7);
forRange = rangeIndex.forRange(null, true, null, true);
Assert.assertEquals(1.0, forRange.estimateSelectivity(10), 0.0);
@ -737,6 +767,55 @@ public class NestedFieldLiteralColumnIndexSupplierTest extends InitializedNullHa
Assert.assertEquals(1.0, forRange.estimateSelectivity(10), 0.0);
bitmap = forRange.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
forRange = rangeIndex.forRange(1.111, true, 1.19, true);
Assert.assertNotNull(forRange);
Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
bitmap = forRange.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap);
forRange = rangeIndex.forRange(1.01, true, 1.09, true);
Assert.assertNotNull(forRange);
Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
bitmap = forRange.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap);
forRange = rangeIndex.forRange(0.05, true, 0.98, true);
Assert.assertNotNull(forRange);
Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
bitmap = forRange.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap);
forRange = rangeIndex.forRange(0.05, true, 1.1, true);
Assert.assertNotNull(forRange);
Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
bitmap = forRange.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap);
forRange = rangeIndex.forRange(8.99, true, 10.10, true);
Assert.assertNotNull(forRange);
Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
bitmap = forRange.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap);
forRange = rangeIndex.forRange(8.99, true, 10.10, true);
Assert.assertNotNull(forRange);
Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
bitmap = forRange.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap);
forRange = rangeIndex.forRange(10.00, true, 10.10, true);
Assert.assertNotNull(forRange);
Assert.assertEquals(0.0, forRange.estimateSelectivity(10), 0.0);
bitmap = forRange.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap);
}
@Test
@ -805,6 +884,25 @@ public class NestedFieldLiteralColumnIndexSupplierTest extends InitializedNullHa
Assert.assertEquals(0.4, columnIndex.estimateSelectivity(10), 0.0);
bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap, 2, 4, 7, 9);
// set index with null
TreeSet<String> treeSet = new TreeSet<>(Comparators.naturalNullsFirst());
treeSet.add(null);
treeSet.add("1.2");
treeSet.add("3.3");
treeSet.add("7.7");
columnIndex = valueSetIndex.forSortedValues(treeSet);
Assert.assertNotNull(columnIndex);
Assert.assertEquals(0.7, columnIndex.estimateSelectivity(10), 0.0);
bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap, 1, 2, 3, 4, 6, 7, 9);
// null value should really use NullValueIndex, but this works for classic reasons
columnIndex = valueSetIndex.forValue(null);
Assert.assertNotNull(columnIndex);
Assert.assertEquals(0.3, columnIndex.estimateSelectivity(10), 0.0);
bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap, 1, 3, 6);
}
@Test
@ -868,6 +966,9 @@ public class NestedFieldLiteralColumnIndexSupplierTest extends InitializedNullHa
NullValueIndex nullIndex = indexSupplier.as(NullValueIndex.class);
Assert.assertNotNull(nullIndex);
// sanity check to make sure we don't return indexes we don't support
Assert.assertNull(indexSupplier.as(SpatialIndex.class));
// 10 rows
// local: [null, b, z, 1, 300, 1.1, 9.9]
// column: [1, b, null, 9.9, 300, 1, z, null, 1.1, b]
@ -915,6 +1016,26 @@ public class NestedFieldLiteralColumnIndexSupplierTest extends InitializedNullHa
Assert.assertEquals(0.4, columnIndex.estimateSelectivity(10), 0.0);
bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap, 1, 3, 4, 9);
// set index with null
TreeSet<String> treeSet = new TreeSet<>(Comparators.naturalNullsFirst());
treeSet.add(null);
treeSet.add("b");
treeSet.add("300");
treeSet.add("9.9");
treeSet.add("1.6");
columnIndex = valueSetIndex.forSortedValues(treeSet);
Assert.assertNotNull(columnIndex);
Assert.assertEquals(0.6, columnIndex.estimateSelectivity(10), 0.0);
bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap, 1, 2, 3, 4, 7, 9);
// null value should really use NullValueIndex, but this works for classic reasons
columnIndex = valueSetIndex.forValue(null);
Assert.assertNotNull(columnIndex);
Assert.assertEquals(0.2, columnIndex.estimateSelectivity(10), 0.0);
bitmap = columnIndex.computeBitmapResult(bitmapResultFactory);
checkBitmap(bitmap, 2, 7);
}
@Test
@ -972,6 +1093,11 @@ public class NestedFieldLiteralColumnIndexSupplierTest extends InitializedNullHa
Assert.assertEquals("300", lowLevelIndex.getValue(4));
Assert.assertEquals("1.1", lowLevelIndex.getValue(5));
Assert.assertEquals("9.9", lowLevelIndex.getValue(6));
Assert.assertEquals(7, lowLevelIndex.getCardinality());
checkBitmap(lowLevelIndex.getBitmap(0), 2, 7);
checkBitmap(lowLevelIndex.getBitmap(1), 1, 9);
checkBitmap(lowLevelIndex.getBitmap(-1));
}
private NestedFieldLiteralColumnIndexSupplier<?> makeSingleTypeStringSupplier() throws IOException