smarter nested column index utilization (#13977)

* smarter nested column index utilization
changes:
* adds skipValueRangeIndexScale and skipValuePredicateIndexScale to ColumnConfig (e.g. DruidProcessingConfig) available as system config via druid.processing.indexes.skipValueRangeIndexScale and druid.processing.indexes.skipValuePredicateIndexScale
* NestedColumnIndexSupplier uses skipValueRangeIndexScale and skipValuePredicateIndexScale to multiply by the total number of rows to be processed to determine the threshold at which we should no longer consider using bitmap indexes because it will be too many operations
* Default values for skipValueRangeIndexScale and skipValuePredicateIndexScale have been initially set to 0.08, but are separate to allow independent tuning
* these are not documented on purpose yet because they are kind of hard to explain, the mainly exist to help conduct larger scale experiments than the jmh benchmarks used to derive the initial set of values
* these changes provide a pretty sweet performance boost for filter processing on nested columns
This commit is contained in:
Clint Wylie 2023-04-06 04:09:24 -07:00 committed by GitHub
parent 030ed911d4
commit b11c0bc249
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
38 changed files with 806 additions and 245 deletions

View File

@ -175,7 +175,31 @@ public class SqlNestedDataBenchmark
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 1005.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0",
// 28, 29
"SELECT SUM(long1) FROM foo WHERE double3 < 2000.0 AND double3 > 1000.0",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 2000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0"
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 2000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0",
// 30, 31
"SELECT SUM(long1) FROM foo WHERE double3 < 3000.0 AND double3 > 1000.0",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 3000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0",
// 32,33
"SELECT SUM(long1) FROM foo WHERE double3 < 5000.0 AND double3 > 1000.0",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) < 5000.0 AND JSON_VALUE(nested, '$.nesteder.double3' RETURNING DOUBLE) > 1000.0",
// 34,35 smaller cardinality like range filter
"SELECT SUM(long1) FROM foo WHERE string1 LIKE '1%'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string1') LIKE '1%'",
// 36,37 smaller cardinality like predicate filter
"SELECT SUM(long1) FROM foo WHERE string1 LIKE '%1%'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string1') LIKE '%1%'",
// 38-39 moderate cardinality like range
"SELECT SUM(long1) FROM foo WHERE string5 LIKE '1%'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '1%'",
// 40, 41 big cardinality lex range
"SELECT SUM(long1) FROM foo WHERE string5 > '1'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string5') > '1'",
// 42, 43 big cardinality like predicate filter
"SELECT SUM(long1) FROM foo WHERE string5 LIKE '%1%'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%'",
// 44, 45 big cardinality like filter + selector filter
"SELECT SUM(long1) FROM foo WHERE string5 LIKE '%1%' AND string1 = '1000'",
"SELECT SUM(JSON_VALUE(nested, '$.long1' RETURNING BIGINT)) FROM foo WHERE JSON_VALUE(nested, '$.nesteder.string5') LIKE '%1%' AND JSON_VALUE(nested, '$.nesteder.string1') = '1000'"
);
@Param({"5000000"})
@ -187,7 +211,11 @@ public class SqlNestedDataBenchmark
})
private String vectorize;
@Param({"none", "front-coded-4", "front-coded-16"})
@Param({
"none",
"front-coded-4",
"front-coded-16"
})
private String stringEncoding;
@Param({
@ -220,7 +248,23 @@ public class SqlNestedDataBenchmark
"26",
"27",
"28",
"29"
"29",
"30",
"31",
"32",
"33",
"34",
"35",
"36",
"37",
"38",
"39",
"40",
"41",
"42",
"43",
"44",
"45"
})
private String query;
@ -253,7 +297,7 @@ public class SqlNestedDataBenchmark
ImmutableList.of(
new ExpressionTransform(
"nested",
"json_object('long1', long1, 'nesteder', json_object('string1', string1, 'long2', long2, 'double3',double3))",
"json_object('long1', long1, 'nesteder', json_object('string1', string1, 'long2', long2, 'double3',double3, 'string5', string5))",
TestExprMacroTable.INSTANCE
)
)

View File

@ -149,6 +149,20 @@ public abstract class DruidProcessingConfig extends ExecutorServiceConfig implem
return 0;
}
@Override
@Config(value = "${base_path}.indexes.skipValueRangeIndexScale")
public double skipValueRangeIndexScale()
{
return ColumnConfig.super.skipValueRangeIndexScale();
}
@Override
@Config(value = "${base_path}.indexes.skipValuePredicateIndexScale")
public double skipValuePredicateIndexScale()
{
return ColumnConfig.super.skipValuePredicateIndexScale();
}
@Config(value = "${base_path}.fifo")
public boolean isFifo()
{

View File

@ -77,7 +77,6 @@ public class AutoTypeColumnMerger implements DimensionMergerV9
private final String name;
private final IndexSpec indexSpec;
private final SegmentWriteOutMedium segmentWriteOutMedium;
private final ProgressIndicator progressIndicator;
private final Closer closer;
private NestedCommonFormatColumnSerializer serializer;
@ -87,7 +86,6 @@ public class AutoTypeColumnMerger implements DimensionMergerV9
String name,
IndexSpec indexSpec,
SegmentWriteOutMedium segmentWriteOutMedium,
ProgressIndicator progressIndicator,
Closer closer
)
{
@ -95,7 +93,6 @@ public class AutoTypeColumnMerger implements DimensionMergerV9
this.name = name;
this.indexSpec = indexSpec;
this.segmentWriteOutMedium = segmentWriteOutMedium;
this.progressIndicator = progressIndicator;
this.closer = closer;
}
@ -142,7 +139,6 @@ public class AutoTypeColumnMerger implements DimensionMergerV9
name,
indexSpec,
segmentWriteOutMedium,
progressIndicator,
closer
);
serializer = longSerializer;
@ -152,7 +148,6 @@ public class AutoTypeColumnMerger implements DimensionMergerV9
name,
indexSpec,
segmentWriteOutMedium,
progressIndicator,
closer
);
serializer = doubleSerializer;
@ -162,7 +157,6 @@ public class AutoTypeColumnMerger implements DimensionMergerV9
name,
indexSpec,
segmentWriteOutMedium,
progressIndicator,
closer
);
serializer = stringSerializer;
@ -172,7 +166,6 @@ public class AutoTypeColumnMerger implements DimensionMergerV9
name,
indexSpec,
segmentWriteOutMedium,
progressIndicator,
closer
);
serializer = arraySerializer;
@ -191,7 +184,6 @@ public class AutoTypeColumnMerger implements DimensionMergerV9
name,
indexSpec,
segmentWriteOutMedium,
progressIndicator,
closer
);
serializer = defaultSerializer;

View File

@ -80,7 +80,7 @@ public class NestedCommonFormatColumnHandler implements DimensionHandler<Structu
Closer closer
)
{
return new AutoTypeColumnMerger(name, indexSpec, segmentWriteOutMedium, progress, closer);
return new AutoTypeColumnMerger(name, indexSpec, segmentWriteOutMedium, closer);
}
@Override

View File

@ -54,7 +54,6 @@ public class NestedDataColumnMerger implements DimensionMergerV9
private final String name;
private final IndexSpec indexSpec;
private final SegmentWriteOutMedium segmentWriteOutMedium;
private final ProgressIndicator progressIndicator;
private final Closer closer;
private ColumnDescriptor.Builder descriptorBuilder;
@ -64,7 +63,6 @@ public class NestedDataColumnMerger implements DimensionMergerV9
String name,
IndexSpec indexSpec,
SegmentWriteOutMedium segmentWriteOutMedium,
ProgressIndicator progressIndicator,
Closer closer
)
{
@ -72,7 +70,6 @@ public class NestedDataColumnMerger implements DimensionMergerV9
this.name = name;
this.indexSpec = indexSpec;
this.segmentWriteOutMedium = segmentWriteOutMedium;
this.progressIndicator = progressIndicator;
this.closer = closer;
}
@ -115,7 +112,6 @@ public class NestedDataColumnMerger implements DimensionMergerV9
name,
indexSpec,
segmentWriteOutMedium,
progressIndicator,
closer
);
serializer = defaultSerializer;

View File

@ -80,7 +80,7 @@ public class NestedDataDimensionHandler implements DimensionHandler<StructuredDa
Closer closer
)
{
return new NestedDataColumnMerger(name, indexSpec, segmentWriteOutMedium, progress, closer);
return new NestedDataColumnMerger(name, indexSpec, segmentWriteOutMedium, closer);
}
@Override

View File

@ -22,4 +22,70 @@ package org.apache.druid.segment.column;
public interface ColumnConfig
{
int columnCacheSizeBytes();
/**
* If the total number of rows in a column multiplied by this value is smaller than the total number of bitmap
* index operations required to perform to use a {@link LexicographicalRangeIndex} or {@link NumericRangeIndex},
* then for any {@link ColumnIndexSupplier} which chooses to participate in this config it will skip computing the
* index, indicated by a return value of null from the 'forRange' methods, to force the filter to be processed
* with a scan using a {@link org.apache.druid.query.filter.ValueMatcher} instead.
* <p>
* For range indexes on columns where every value has an index, the number of bitmap operations is determined by how
* many individual values fall in the range, a subset of the columns total cardinality.
* <p>
* Currently only the {@link org.apache.druid.segment.nested.NestedCommonFormatColumn} implementations of
* {@link ColumnIndexSupplier} support this behavior.
* <p>
* This can make some standalone filters faster in cases where the overhead of walking the value dictionary and
* combining bitmaps to construct a {@link org.apache.druid.segment.BitmapOffset} or
* {@link org.apache.druid.segment.vector.BitmapVectorOffset} can exceed the cost of just using doing a full scan
* and using a {@link org.apache.druid.query.filter.ValueMatcher}.
* <p>
* Where this is especially useful is in cases where the range index is used as part of some
* {@link org.apache.druid.segment.filter.AndFilter}, which segment processing partitions into groups of 'pre'
* filters, composed of those which should use indexes, and 'post' filters, which should use a matcher on the offset
* created by the indexes to filter the remaining results. This value pushes what would have been expensive index
* computations to go into the 'pre' group into using a value matcher as part of the 'post' group instead, sometimes
* providing an order of magnitude or higher performance increase.
*/
default double skipValueRangeIndexScale()
{
// this value was chosen testing bound filters on double columns with a variety of ranges at which this ratio
// of number of bitmaps compared to total number of rows appeared to be around the threshold where indexes stopped
// performing consistently faster than a full scan + value matcher
return 0.08;
}
/**
* If the total number of rows in a column multiplied by this value is smaller than the total number of bitmap
* index operations required to perform to use a {@link DruidPredicateIndex} then for any {@link ColumnIndexSupplier}
* which chooses to participate in this config it will skip computing the index, in favor of doing a full scan and
* using a {@link org.apache.druid.query.filter.ValueMatcher} instead. This is indicated returning null from
* {@link ColumnIndexSupplier#as(Class)} even though it would have otherwise been able to create a
* {@link BitmapColumnIndex}. For predicate indexes, this is determined by the total value cardinality of the column
* for columns with an index for every value.
* <p>
* Currently only the {@link org.apache.druid.segment.nested.NestedCommonFormatColumn} implementations of
* {@link ColumnIndexSupplier} support this behavior.
* <p>
* This can make some standalone filters faster in cases where the overhead of walking the value dictionary and
* combining bitmaps to construct a {@link org.apache.druid.segment.BitmapOffset} or
* {@link org.apache.druid.segment.vector.BitmapVectorOffset} can exceed the cost of just using doing a full scan
* and using a {@link org.apache.druid.query.filter.ValueMatcher}.
* <p>
* Where this is especially useful is in cases where the predicate index is used as part of some
* {@link org.apache.druid.segment.filter.AndFilter}, which segment processing partitions into groups of 'pre'
* filters, composed of those which should use indexes, and 'post' filters, which should use a matcher on the offset
* created by the indexes to filter the remaining results. This value pushes what would have been expensive index
* computations to go into the 'pre' group into using a value matcher as part of the 'post' group instead, sometimes
* providing an order of magnitude or higher performance increase.
* <p>
* This value is separate from {@link #skipValueRangeIndexScale()} since the dynamics of computing predicate indexes
* is potentially different than the much cheaper range calculations (especially for numeric values), so having a
* separate control knob allows for corrections to be done to tune things separately from ranges.
*/
default double skipValuePredicateIndexScale()
{
return 0.08;
}
}

View File

@ -44,4 +44,14 @@ public interface ColumnIndexSupplier
*/
@Nullable
<T> T as(Class<T> clazz);
static boolean skipComputingRangeIndexes(ColumnConfig columnConfig, int numRows, int rangeSize)
{
return rangeSize > (int) Math.ceil(columnConfig.skipValueRangeIndexScale() * numRows);
}
static boolean skipComputingPredicateIndexes(ColumnConfig columnConfig, int numRowsToScan, int dictionaryCardinality)
{
return dictionaryCardinality > (int) Math.ceil(columnConfig.skipValuePredicateIndexScale() * numRowsToScan);
}
}

View File

@ -19,6 +19,8 @@
package org.apache.druid.segment.column;
import org.apache.druid.collections.bitmap.BitmapFactory;
import javax.annotation.Nullable;
/**
@ -41,4 +43,7 @@ public interface DictionaryEncodedStringValueIndex extends DictionaryEncodedValu
*/
@Nullable
String getValue(int index);
@SuppressWarnings({"unreachable", "unused"})
BitmapFactory getBitmapFactory();
}

View File

@ -21,10 +21,19 @@ package org.apache.druid.segment.column;
import org.apache.druid.query.filter.DruidPredicateFactory;
import javax.annotation.Nullable;
/**
* Uses a {@link DruidPredicateFactory} to construct a {@link BitmapColumnIndex}
*/
public interface DruidPredicateIndex
{
/**
* Get a {@link BitmapColumnIndex} corresponding to all the rows that match the supplied {@link DruidPredicateFactory}
* <p>
* If this method returns null it indicates that there was no index that matched the respective values and a
* {@link org.apache.druid.query.filter.ValueMatcher} must be used instead.
*/
@Nullable
BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory);
}

View File

@ -56,6 +56,12 @@ public final class IndexedStringDictionaryEncodedStringValueIndex<TDictionary ex
return dictionary.get(index);
}
@Override
public BitmapFactory getBitmapFactory()
{
return bitmapFactory;
}
@Override
public ImmutableBitmap getBitmap(int idx)
{

View File

@ -31,24 +31,63 @@ import java.util.NoSuchElementException;
public final class IndexedStringDruidPredicateIndex<TDictionary extends Indexed<String>> implements DruidPredicateIndex
{
static final ColumnConfig ALWAYS_USE_INDEXES = new ColumnConfig()
{
@Override
public int columnCacheSizeBytes()
{
return 0;
}
@Override
public double skipValueRangeIndexScale()
{
return 1.0;
}
@Override
public double skipValuePredicateIndexScale()
{
return 1.0;
}
};
private final BitmapFactory bitmapFactory;
private final TDictionary dictionary;
private final Indexed<ImmutableBitmap> bitmaps;
private final ColumnConfig columnConfig;
private final int numRows;
public IndexedStringDruidPredicateIndex(
BitmapFactory bitmapFactory,
TDictionary dictionary,
Indexed<ImmutableBitmap> bitmaps
)
{
this(bitmapFactory, dictionary, bitmaps, ALWAYS_USE_INDEXES, Integer.MAX_VALUE);
}
public IndexedStringDruidPredicateIndex(
BitmapFactory bitmapFactory,
TDictionary dictionary,
Indexed<ImmutableBitmap> bitmaps,
@Nullable ColumnConfig columnConfig,
int numRows
)
{
this.bitmapFactory = bitmapFactory;
this.dictionary = dictionary;
this.bitmaps = bitmaps;
this.columnConfig = columnConfig;
this.numRows = numRows;
}
@Override
@Nullable
public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory)
{
if (ColumnIndexSupplier.skipComputingPredicateIndexes(columnConfig, numRows, dictionary.size())) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
{
@Override

View File

@ -44,21 +44,46 @@ public final class IndexedUtf8LexicographicalRangeIndex<TDictionary extends Inde
private final Indexed<ImmutableBitmap> bitmaps;
private final boolean hasNull;
private final ColumnConfig columnConfig;
private final int numRows;
public IndexedUtf8LexicographicalRangeIndex(
BitmapFactory bitmapFactory,
TDictionary dictionary,
Indexed<ImmutableBitmap> bitmaps,
boolean hasNull
)
{
this(
bitmapFactory,
dictionary,
bitmaps,
hasNull,
IndexedStringDruidPredicateIndex.ALWAYS_USE_INDEXES,
Integer.MAX_VALUE
);
}
public IndexedUtf8LexicographicalRangeIndex(
BitmapFactory bitmapFactory,
TDictionary dictionary,
Indexed<ImmutableBitmap> bitmaps,
boolean hasNull,
@Nullable ColumnConfig columnConfig,
int numRows
)
{
Preconditions.checkArgument(dictionary.isSorted(), "Dictionary must be sorted");
this.bitmapFactory = bitmapFactory;
this.dictionary = dictionary;
this.bitmaps = bitmaps;
this.hasNull = hasNull;
this.columnConfig = columnConfig;
this.numRows = numRows;
}
@Override
@Nullable
public BitmapColumnIndex forRange(
@Nullable String startValue,
boolean startStrict,
@ -66,6 +91,11 @@ public final class IndexedUtf8LexicographicalRangeIndex<TDictionary extends Inde
boolean endStrict
)
{
final IntIntPair range = getRange(startValue, startStrict, endValue, endStrict);
final int start = range.leftInt(), end = range.rightInt();
if (ColumnIndexSupplier.skipComputingRangeIndexes(columnConfig, numRows, end - start)) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
{
@Override
@ -94,6 +124,7 @@ public final class IndexedUtf8LexicographicalRangeIndex<TDictionary extends Inde
}
@Override
@Nullable
public BitmapColumnIndex forRange(
@Nullable String startValue,
boolean startStrict,

View File

@ -34,7 +34,11 @@ public interface LexicographicalRangeIndex
* Get a {@link BitmapColumnIndex} corresponding to the values supplied in the specified range. If supplied starting
* value is null, the range will begin at the first non-null value in the underlying value dictionary. If the end
* value is null, the range will extend to the last value in the underlying value dictionary.
* <p>
* If this method returns null it indicates that there is no index available that matches the requested range and a
* {@link org.apache.druid.query.filter.ValueMatcher} must be used instead.
*/
@Nullable
BitmapColumnIndex forRange(
@Nullable String startValue,
boolean startStrict,
@ -47,10 +51,14 @@ public interface LexicographicalRangeIndex
* also match some predicate, such as to match a prefix. If supplied starting value is null, the range will begin at
* the first non-null value in the underlying value dictionary that matches the predicate. If the end value is null,
* the range will extend to the last value in the underlying value dictionary that matches the predicate.
*
* <p>
* If the provided {@code} matcher is always true, it's better to use the other
* {@link #forRange(String, boolean, String, boolean)} method.
* <p>
* If this method returns null it indicates that there is no index available that matches the requested range and a
* {@link org.apache.druid.query.filter.ValueMatcher} must be used instead.
*/
@Nullable
BitmapColumnIndex forRange(
@Nullable String startValue,
boolean startStrict,

View File

@ -32,7 +32,11 @@ public interface NumericRangeIndex
* Get a {@link BitmapColumnIndex} corresponding to the values supplied in the specified range. If supplied starting
* value is null, the range will begin at the first non-null value in the underlying value dictionary. If the end
* value is null, the range will extend to the last value in the underlying value dictionary.
* <p>
* If this method returns null it indicates that there is no index available that matches the requested range and a
* {@link org.apache.druid.query.filter.ValueMatcher} must be used instead.
*/
@Nullable
BitmapColumnIndex forRange(
@Nullable Number startValue,
boolean startStrict,

View File

@ -89,11 +89,13 @@ public class BoundFilter implements Filter
boundDimFilter.getUpper(),
boundDimFilter.isUpperStrict()
);
// preserve sad backwards compatible behavior where bound filter matches 'null' if the lower bound is not set
if (boundDimFilter.hasLowerBound() && !NullHandling.isNullOrEquivalent(boundDimFilter.getLower())) {
return rangeBitmaps;
} else {
return wrapRangeIndexWithNullValueIndex(indexSupplier, rangeBitmaps);
if (rangeBitmaps != null) {
// preserve sad backwards compatible behavior where bound filter matches 'null' if the lower bound is not set
if (boundDimFilter.hasLowerBound() && !NullHandling.isNullOrEquivalent(boundDimFilter.getLower())) {
return rangeBitmaps;
} else {
return wrapRangeIndexWithNullValueIndex(indexSupplier, rangeBitmaps);
}
}
}
}
@ -112,16 +114,20 @@ public class BoundFilter implements Filter
upper,
boundDimFilter.isUpperStrict()
);
// preserve sad backwards compatible behavior where bound filter matches 'null' if the lower bound is not set
if (boundDimFilter.hasLowerBound() && !NullHandling.isNullOrEquivalent(boundDimFilter.getLower())) {
return rangeBitmaps;
} else {
return wrapRangeIndexWithNullValueIndex(indexSupplier, rangeBitmaps);
if (rangeBitmaps != null) {
// preserve sad backwards compatible behavior where bound filter matches 'null' if the lower bound is not set
if (boundDimFilter.hasLowerBound() && !NullHandling.isNullOrEquivalent(boundDimFilter.getLower())) {
return rangeBitmaps;
} else {
return wrapRangeIndexWithNullValueIndex(indexSupplier, rangeBitmaps);
}
}
}
}
// fall back to predicate based index if it is available
return Filters.makePredicateIndex(boundDimFilter.getDimension(), selector, getPredicateFactory());
}
@Nullable

View File

@ -95,6 +95,7 @@ public abstract class CompressedNestedDataComplexColumn<TStringDictionary extend
extends NestedDataComplexColumn implements NestedCommonFormatColumn
{
public static final IntTypeStrategy INT_TYPE_STRATEGY = new IntTypeStrategy();
private final ColumnConfig columnConfig;
private final Closer closer;
private final CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier;
private CompressedVariableSizedBlobColumn compressedRawColumn;
@ -155,6 +156,7 @@ public abstract class CompressedNestedDataComplexColumn<TStringDictionary extend
this.bitmapSerdeFactory = bitmapSerdeFactory;
this.byteOrder = byteOrder;
this.rootFieldPath = rootFieldPath;
this.columnConfig = columnConfig;
}
public abstract List<NestedPathPart> parsePath(String path);
@ -974,17 +976,24 @@ public abstract class CompressedNestedDataComplexColumn<TStringDictionary extend
columnBuilder.setHasMultipleValues(false)
.setHasNulls(hasNull)
.setDictionaryEncodedColumnSupplier(columnSupplier);
final int size;
try (ColumnarInts throwAway = ints.get()) {
size = throwAway.size();
}
columnBuilder.setIndexSupplier(
new NestedFieldColumnIndexSupplier(
types,
bitmapSerdeFactory.getBitmapFactory(),
columnConfig,
rBitmaps,
localDictionarySupplier,
stringDictionarySupplier,
longDictionarySupplier,
doubleDictionarySupplier,
arrayElementDictionarySupplier,
arrayElementBitmaps
arrayElementBitmaps,
size
),
true,
false

View File

@ -34,7 +34,6 @@ import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.IndexMerger;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.ProgressIndicator;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.StringEncodingStrategies;
import org.apache.druid.segment.column.Types;
@ -146,7 +145,6 @@ public class NestedDataColumnSerializer extends NestedCommonFormatColumnSerializ
String name,
IndexSpec indexSpec,
SegmentWriteOutMedium segmentWriteOutMedium,
@SuppressWarnings("unused") ProgressIndicator progressIndicator,
Closer closer
)
{

View File

@ -36,7 +36,6 @@ import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.GenericColumnSerializer;
import org.apache.druid.segment.IndexMerger;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.ProgressIndicator;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.StringEncodingStrategies;
import org.apache.druid.segment.column.Types;
@ -140,7 +139,6 @@ public class NestedDataColumnSerializerV4 implements GenericColumnSerializer<Str
String name,
IndexSpec indexSpec,
SegmentWriteOutMedium segmentWriteOutMedium,
@SuppressWarnings("unused") ProgressIndicator progressIndicator,
Closer closer
)
{

View File

@ -45,6 +45,7 @@ import org.apache.druid.query.filter.DruidLongPredicate;
import org.apache.druid.query.filter.DruidPredicateFactory;
import org.apache.druid.segment.IntListUtils;
import org.apache.druid.segment.column.BitmapColumnIndex;
import org.apache.druid.segment.column.ColumnConfig;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex;
@ -92,17 +93,21 @@ public class NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
private final int adjustLongId;
private final int adjustDoubleId;
private final ColumnConfig columnConfig;
private final int numRows;
public NestedFieldColumnIndexSupplier(
FieldTypeInfo.TypeSet types,
BitmapFactory bitmapFactory,
ColumnConfig columnConfig,
GenericIndexed<ImmutableBitmap> bitmaps,
Supplier<FixedIndexed<Integer>> localDictionarySupplier,
Supplier<TStringDictionary> globalStringDictionarySupplier,
Supplier<FixedIndexed<Long>> globalLongDictionarySupplier,
Supplier<FixedIndexed<Double>> globalDoubleDictionarySupplier,
@Nullable Supplier<FixedIndexed<Integer>> arrayElementDictionarySupplier,
@Nullable GenericIndexed<ImmutableBitmap> arrayElementBitmaps
@Nullable GenericIndexed<ImmutableBitmap> arrayElementBitmaps,
int numRows
)
{
this.singleType = types.getSingleType();
@ -116,6 +121,8 @@ public class NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
this.arrayElementBitmaps = arrayElementBitmaps;
this.adjustLongId = globalStringDictionarySupplier.get().size();
this.adjustDoubleId = adjustLongId + globalLongDictionarySupplier.get().size();
this.columnConfig = columnConfig;
this.numRows = numRows;
}
@Nullable
@ -255,6 +262,7 @@ public class NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
}
@Nullable
private <T> BitmapColumnIndex makeRangeIndex(
@Nullable T startValue,
boolean startStrict,
@ -276,6 +284,10 @@ public class NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
);
final int startIndex = localRange.leftInt();
final int endIndex = localRange.rightInt();
final int size = endIndex - startIndex;
if (ColumnIndexSupplier.skipComputingRangeIndexes(columnConfig, numRows, size)) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
{
@Override
@ -328,6 +340,12 @@ public class NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
}
}
@Override
public BitmapFactory getBitmapFactory()
{
return bitmapFactory;
}
@Override
public ImmutableBitmap getBitmap(int idx)
{
@ -420,6 +438,7 @@ public class NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
private class NestedStringLexicographicalRangeIndex implements LexicographicalRangeIndex
{
@Override
@Nullable
public BitmapColumnIndex forRange(
@Nullable String startValue,
boolean startStrict,
@ -439,6 +458,7 @@ public class NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
}
@Override
@Nullable
public BitmapColumnIndex forRange(
@Nullable String startValue,
boolean startStrict,
@ -447,24 +467,26 @@ public class NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
Predicate<String> matcher
)
{
final FixedIndexed<Integer> localDictionary = localDictionarySupplier.get();
final Indexed<ByteBuffer> stringDictionary = globalStringDictionarySupplier.get();
final IntIntPair range = getLocalRangeFromDictionary(
StringUtils.toUtf8ByteBuffer(startValue),
startStrict,
StringUtils.toUtf8ByteBuffer(endValue),
endStrict,
localDictionary,
stringDictionary,
0
);
final int start = range.leftInt(), end = range.rightInt();
if (ColumnIndexSupplier.skipComputingRangeIndexes(columnConfig, numRows, end - start)) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
{
@Override
public Iterable<ImmutableBitmap> getBitmapIterable()
{
final FixedIndexed<Integer> localDictionary = localDictionarySupplier.get();
final Indexed<ByteBuffer> stringDictionary = globalStringDictionarySupplier.get();
final IntIntPair range = getLocalRangeFromDictionary(
StringUtils.toUtf8ByteBuffer(startValue),
startStrict,
StringUtils.toUtf8ByteBuffer(endValue),
endStrict,
localDictionary,
stringDictionary,
0
);
final int start = range.leftInt(), end = range.rightInt();
return () -> new Iterator<ImmutableBitmap>()
{
int currIndex = start;
@ -514,18 +536,20 @@ public class NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
private class NestedStringPredicateIndex implements DruidPredicateIndex
{
@Override
@Nullable
public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory)
{
final FixedIndexed<Integer> localDictionary = localDictionarySupplier.get();
if (ColumnIndexSupplier.skipComputingPredicateIndexes(columnConfig, numRows, localDictionary.size())) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
{
@Override
public Iterable<ImmutableBitmap> getBitmapIterable()
{
return () -> new Iterator<ImmutableBitmap>()
{
final FixedIndexed<Integer> localDictionary = localDictionarySupplier.get();
final Indexed<ByteBuffer> stringDictionary = globalStringDictionarySupplier.get();
final Predicate<String> stringPredicate = matcherFactory.makeStringPredicate();
@ -697,6 +721,7 @@ public class NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
private class NestedLongNumericRangeIndex implements NumericRangeIndex
{
@Override
@Nullable
public BitmapColumnIndex forRange(
@Nullable Number startValue,
boolean startStrict,
@ -719,8 +744,13 @@ public class NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
private class NestedLongPredicateIndex implements DruidPredicateIndex
{
@Override
@Nullable
public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory)
{
final FixedIndexed<Integer> localDictionary = localDictionarySupplier.get();
if (ColumnIndexSupplier.skipComputingPredicateIndexes(columnConfig, numRows, localDictionary.size())) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
{
@Override
@ -728,7 +758,6 @@ public class NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
{
return () -> new Iterator<ImmutableBitmap>()
{
final FixedIndexed<Integer> localDictionary = localDictionarySupplier.get();
final FixedIndexed<Long> longDictionary = globalLongDictionarySupplier.get();
final DruidLongPredicate longPredicate = matcherFactory.makeLongPredicate();
@ -904,6 +933,7 @@ public class NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
private class NestedDoubleNumericRangeIndex implements NumericRangeIndex
{
@Override
@Nullable
public BitmapColumnIndex forRange(
@Nullable Number startValue,
boolean startStrict,
@ -926,8 +956,13 @@ public class NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
private class NestedDoublePredicateIndex implements DruidPredicateIndex
{
@Override
@Nullable
public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory)
{
final FixedIndexed<Integer> localDictionary = localDictionarySupplier.get();
if (ColumnIndexSupplier.skipComputingPredicateIndexes(columnConfig, numRows, localDictionary.size())) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
{
@Override
@ -1121,8 +1156,12 @@ public class NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By
private class NestedVariantPredicateIndex extends NestedVariantLiteralIndex implements DruidPredicateIndex
{
@Override
@Nullable
public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory)
{
if (ColumnIndexSupplier.skipComputingPredicateIndexes(columnConfig, numRows, localDictionary.size())) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
{
@Override

View File

@ -38,6 +38,7 @@ import org.apache.druid.query.filter.DruidPredicateFactory;
import org.apache.druid.segment.IntListUtils;
import org.apache.druid.segment.column.BitmapColumnIndex;
import org.apache.druid.segment.column.ColumnBuilder;
import org.apache.druid.segment.column.ColumnConfig;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex;
@ -71,7 +72,8 @@ public class ScalarDoubleColumnAndIndexSupplier implements Supplier<NestedCommon
ByteOrder byteOrder,
BitmapSerdeFactory bitmapSerdeFactory,
ByteBuffer bb,
ColumnBuilder columnBuilder
ColumnBuilder columnBuilder,
ColumnConfig columnConfig
)
{
final byte version = bb.get();
@ -115,11 +117,17 @@ public class ScalarDoubleColumnAndIndexSupplier implements Supplier<NestedCommon
bitmapSerdeFactory.getObjectStrategy(),
columnBuilder.getFileMapper()
);
final int size;
try (ColumnarDoubles throwAway = doubles.get()) {
size = throwAway.size();
}
return new ScalarDoubleColumnAndIndexSupplier(
doubleDictionarySupplier,
doubles,
rBitmaps,
bitmapSerdeFactory.getBitmapFactory()
bitmapSerdeFactory.getBitmapFactory(),
columnConfig,
size
);
}
catch (IOException ex) {
@ -130,6 +138,8 @@ public class ScalarDoubleColumnAndIndexSupplier implements Supplier<NestedCommon
}
}
private final Supplier<FixedIndexed<Double>> doubleDictionarySupplier;
private final Supplier<ColumnarDoubles> valueColumnSupplier;
@ -138,12 +148,16 @@ public class ScalarDoubleColumnAndIndexSupplier implements Supplier<NestedCommon
private final BitmapFactory bitmapFactory;
private final ImmutableBitmap nullValueBitmap;
private final ColumnConfig columnConfig;
private final int numRows;
private ScalarDoubleColumnAndIndexSupplier(
Supplier<FixedIndexed<Double>> longDictionary,
Supplier<ColumnarDoubles> valueColumnSupplier,
GenericIndexed<ImmutableBitmap> valueIndexes,
BitmapFactory bitmapFactory
BitmapFactory bitmapFactory,
ColumnConfig columnConfig,
int numRows
)
{
this.doubleDictionarySupplier = longDictionary;
@ -151,6 +165,8 @@ public class ScalarDoubleColumnAndIndexSupplier implements Supplier<NestedCommon
this.valueIndexes = valueIndexes;
this.bitmapFactory = bitmapFactory;
this.nullValueBitmap = valueIndexes.get(0) == null ? bitmapFactory.makeEmptyImmutableBitmap() : valueIndexes.get(0);
this.columnConfig = columnConfig;
this.numRows = numRows;
}
@Override
@ -314,6 +330,7 @@ public class ScalarDoubleColumnAndIndexSupplier implements Supplier<NestedCommon
private class DoubleNumericRangeIndex implements NumericRangeIndex
{
@Nullable
@Override
public BitmapColumnIndex forRange(
@Nullable Number startValue,
@ -332,6 +349,9 @@ public class ScalarDoubleColumnAndIndexSupplier implements Supplier<NestedCommon
final int startIndex = range.leftInt();
final int endIndex = range.rightInt();
if (ColumnIndexSupplier.skipComputingRangeIndexes(columnConfig, numRows, endIndex - startIndex)) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
{
@Override
@ -360,9 +380,14 @@ public class ScalarDoubleColumnAndIndexSupplier implements Supplier<NestedCommon
private class DoublePredicateIndex implements DruidPredicateIndex
{
@Nullable
@Override
public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory)
{
final FixedIndexed<Double> dictionary = doubleDictionarySupplier.get();
if (ColumnIndexSupplier.skipComputingPredicateIndexes(columnConfig, numRows, dictionary.size())) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
{
@Override
@ -443,5 +468,11 @@ public class ScalarDoubleColumnAndIndexSupplier implements Supplier<NestedCommon
final Double value = dictionary.get(index);
return value == null ? null : String.valueOf(value);
}
@Override
public BitmapFactory getBitmapFactory()
{
return bitmapFactory;
}
}
}

View File

@ -30,7 +30,6 @@ import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.ProgressIndicator;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.data.ColumnarDoublesSerializer;
import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSerializer;
@ -71,7 +70,6 @@ public class ScalarDoubleColumnSerializer extends NestedCommonFormatColumnSerial
String name,
IndexSpec indexSpec,
SegmentWriteOutMedium segmentWriteOutMedium,
@SuppressWarnings("unused") ProgressIndicator progressIndicator,
Closer closer
)
{

View File

@ -37,6 +37,7 @@ import org.apache.druid.query.filter.DruidPredicateFactory;
import org.apache.druid.segment.IntListUtils;
import org.apache.druid.segment.column.BitmapColumnIndex;
import org.apache.druid.segment.column.ColumnBuilder;
import org.apache.druid.segment.column.ColumnConfig;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex;
@ -70,7 +71,8 @@ public class ScalarLongColumnAndIndexSupplier implements Supplier<NestedCommonFo
ByteOrder byteOrder,
BitmapSerdeFactory bitmapSerdeFactory,
ByteBuffer bb,
ColumnBuilder columnBuilder
ColumnBuilder columnBuilder,
ColumnConfig columnConfig
)
{
final byte version = bb.get();
@ -114,11 +116,17 @@ public class ScalarLongColumnAndIndexSupplier implements Supplier<NestedCommonFo
longsValueColumn,
byteOrder
);
final int size;
try (ColumnarLongs throwAway = longs.get()) {
size = throwAway.size();
}
return new ScalarLongColumnAndIndexSupplier(
longDictionarySupplier,
longs,
rBitmaps,
bitmapSerdeFactory.getBitmapFactory()
bitmapSerdeFactory.getBitmapFactory(),
columnConfig,
size
);
}
catch (IOException ex) {
@ -129,6 +137,7 @@ public class ScalarLongColumnAndIndexSupplier implements Supplier<NestedCommonFo
}
}
private final Supplier<FixedIndexed<Long>> longDictionarySupplier;
private final Supplier<ColumnarLongs> valueColumnSupplier;
@ -138,12 +147,16 @@ public class ScalarLongColumnAndIndexSupplier implements Supplier<NestedCommonFo
private final BitmapFactory bitmapFactory;
private final ImmutableBitmap nullValueBitmap;
private final ColumnConfig columnConfig;
private final int numRows;
private ScalarLongColumnAndIndexSupplier(
Supplier<FixedIndexed<Long>> longDictionarySupplier,
Supplier<ColumnarLongs> valueColumnSupplier,
GenericIndexed<ImmutableBitmap> valueIndexes,
BitmapFactory bitmapFactory
BitmapFactory bitmapFactory,
ColumnConfig columnConfig,
int numRows
)
{
this.longDictionarySupplier = longDictionarySupplier;
@ -151,6 +164,8 @@ public class ScalarLongColumnAndIndexSupplier implements Supplier<NestedCommonFo
this.valueIndexes = valueIndexes;
this.bitmapFactory = bitmapFactory;
this.nullValueBitmap = valueIndexes.get(0) == null ? bitmapFactory.makeEmptyImmutableBitmap() : valueIndexes.get(0);
this.columnConfig = columnConfig;
this.numRows = numRows;
}
@Override
@ -314,6 +329,7 @@ public class ScalarLongColumnAndIndexSupplier implements Supplier<NestedCommonFo
private class LongNumericRangeIndex implements NumericRangeIndex
{
@Nullable
@Override
public BitmapColumnIndex forRange(
@Nullable Number startValue,
@ -332,6 +348,9 @@ public class ScalarLongColumnAndIndexSupplier implements Supplier<NestedCommonFo
final int startIndex = range.leftInt();
final int endIndex = range.rightInt();
if (ColumnIndexSupplier.skipComputingRangeIndexes(columnConfig, numRows, endIndex - startIndex)) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
{
@Override
@ -360,9 +379,14 @@ public class ScalarLongColumnAndIndexSupplier implements Supplier<NestedCommonFo
private class LongPredicateIndex implements DruidPredicateIndex
{
@Nullable
@Override
public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory)
{
FixedIndexed<Long> dictionary = longDictionarySupplier.get();
if (ColumnIndexSupplier.skipComputingPredicateIndexes(columnConfig, numRows, dictionary.size())) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
{
@Override
@ -370,7 +394,7 @@ public class ScalarLongColumnAndIndexSupplier implements Supplier<NestedCommonFo
{
return () -> new Iterator<ImmutableBitmap>()
{
final Iterator<Long> iterator = longDictionarySupplier.get().iterator();
final Iterator<Long> iterator = dictionary.iterator();
final DruidLongPredicate longPredicate = matcherFactory.makeLongPredicate();
int next;
@ -444,5 +468,11 @@ public class ScalarLongColumnAndIndexSupplier implements Supplier<NestedCommonFo
final Long value = dictionary.get(index);
return value == null ? null : String.valueOf(value);
}
@Override
public BitmapFactory getBitmapFactory()
{
return bitmapFactory;
}
}
}

View File

@ -30,7 +30,6 @@ import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.ProgressIndicator;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.data.ColumnarLongsSerializer;
import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSerializer;
@ -71,7 +70,6 @@ public class ScalarLongColumnSerializer extends NestedCommonFormatColumnSerializ
String name,
IndexSpec indexSpec,
SegmentWriteOutMedium segmentWriteOutMedium,
@SuppressWarnings("unused") ProgressIndicator progressIndicator,
Closer closer
)
{

View File

@ -28,6 +28,7 @@ import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper;
import org.apache.druid.segment.column.BitmapColumnIndex;
import org.apache.druid.segment.column.ColumnBuilder;
import org.apache.druid.segment.column.ColumnConfig;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex;
import org.apache.druid.segment.column.DictionaryEncodedValueIndex;
@ -51,6 +52,7 @@ import org.apache.druid.segment.data.FrontCodedIndexed;
import org.apache.druid.segment.data.GenericIndexed;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.data.VByte;
import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde;
import javax.annotation.Nullable;
import java.io.IOException;
@ -63,7 +65,8 @@ public class ScalarStringColumnAndIndexSupplier implements Supplier<NestedCommon
ByteOrder byteOrder,
BitmapSerdeFactory bitmapSerdeFactory,
ByteBuffer bb,
ColumnBuilder columnBuilder
ColumnBuilder columnBuilder,
ColumnConfig columnConfig
)
{
final byte version = bb.get();
@ -77,7 +80,7 @@ public class ScalarStringColumnAndIndexSupplier implements Supplier<NestedCommon
final GenericIndexed<ByteBuffer> stringDictionary;
final Supplier<FrontCodedIndexed> frontCodedStringDictionarySupplier;
final ByteBuffer stringDictionaryBuffer = loadInternalFile(
final ByteBuffer stringDictionaryBuffer = NestedCommonFormatColumnPartSerde.loadInternalFile(
mapper,
columnName,
NestedCommonFormatColumnSerializer.STRING_DICTIONARY_FILE_NAME
@ -111,7 +114,7 @@ public class ScalarStringColumnAndIndexSupplier implements Supplier<NestedCommon
stringDictionary = GenericIndexed.read(stringDictionaryBuffer, GenericIndexed.UTF8_STRATEGY, mapper);
frontCodedStringDictionarySupplier = null;
}
final ByteBuffer encodedValueColumn = loadInternalFile(
final ByteBuffer encodedValueColumn = NestedCommonFormatColumnPartSerde.loadInternalFile(
mapper,
columnName,
NestedCommonFormatColumnSerializer.ENCODED_VALUE_COLUMN_FILE_NAME
@ -120,7 +123,7 @@ public class ScalarStringColumnAndIndexSupplier implements Supplier<NestedCommon
encodedValueColumn,
byteOrder
);
final ByteBuffer valueIndexBuffer = loadInternalFile(
final ByteBuffer valueIndexBuffer = NestedCommonFormatColumnPartSerde.loadInternalFile(
mapper,
columnName,
NestedCommonFormatColumnSerializer.BITMAP_INDEX_FILE_NAME
@ -130,12 +133,18 @@ public class ScalarStringColumnAndIndexSupplier implements Supplier<NestedCommon
bitmapSerdeFactory.getObjectStrategy(),
columnBuilder.getFileMapper()
);
final int size;
try (ColumnarInts throwAway = ints.get()) {
size = throwAway.size();
}
return new ScalarStringColumnAndIndexSupplier(
stringDictionary,
frontCodedStringDictionarySupplier,
ints,
valueIndexes,
bitmapSerdeFactory
bitmapSerdeFactory,
columnConfig,
size
);
}
catch (IOException ex) {
@ -147,19 +156,24 @@ public class ScalarStringColumnAndIndexSupplier implements Supplier<NestedCommon
}
private final GenericIndexed<ByteBuffer> stringDictionary;
private final Supplier<FrontCodedIndexed> frontCodedStringDictionarySupplier;
private final Supplier<ColumnarInts> encodedColumnSupplier;
private final GenericIndexed<ImmutableBitmap> valueIndexes;
private final ImmutableBitmap nullValueBitmap;
private final BitmapFactory bitmapFactory;
private final ColumnConfig columnConfig;
private final int numRows;
private ScalarStringColumnAndIndexSupplier(
GenericIndexed<ByteBuffer> stringDictionary,
Supplier<FrontCodedIndexed> frontCodedStringDictionarySupplier,
Supplier<ColumnarInts> encodedColumnSupplier,
GenericIndexed<ImmutableBitmap> valueIndexes,
BitmapSerdeFactory serdeFactory
BitmapSerdeFactory serdeFactory,
ColumnConfig columnConfig,
int numRows
)
{
this.stringDictionary = stringDictionary;
@ -168,6 +182,8 @@ public class ScalarStringColumnAndIndexSupplier implements Supplier<NestedCommon
this.valueIndexes = valueIndexes;
this.bitmapFactory = serdeFactory.getBitmapFactory();
this.nullValueBitmap = valueIndexes.get(0) == null ? bitmapFactory.makeEmptyImmutableBitmap() : valueIndexes.get(0);
this.columnConfig = columnConfig;
this.numRows = numRows;
}
@Override
@ -183,17 +199,6 @@ public class ScalarStringColumnAndIndexSupplier implements Supplier<NestedCommon
return new ScalarStringDictionaryEncodedColumn<>(encodedColumnSupplier.get(), stringDictionary.singleThreaded());
}
private static ByteBuffer loadInternalFile(
SmooshedFileMapper fileMapper,
String filenameBase,
String internalFileName
) throws IOException
{
return fileMapper.mapFile(
NestedCommonFormatColumnSerializer.getInternalFileName(filenameBase, internalFileName)
);
}
@Nullable
@Override
public <T> T as(Class<T> clazz)
@ -216,14 +221,18 @@ public class ScalarStringColumnAndIndexSupplier implements Supplier<NestedCommon
return (T) new IndexedStringDruidPredicateIndex<>(
bitmapFactory,
new StringEncodingStrategies.Utf8ToStringIndexed(utf8Dictionary),
singleThreadedBitmaps
singleThreadedBitmaps,
columnConfig,
numRows
);
} else if (clazz.equals(LexicographicalRangeIndex.class)) {
return (T) new IndexedUtf8LexicographicalRangeIndex<>(
bitmapFactory,
utf8Dictionary,
singleThreadedBitmaps,
utf8Dictionary.get(0) == null
utf8Dictionary.get(0) == null,
columnConfig,
numRows
);
} else if (clazz.equals(DictionaryEncodedStringValueIndex.class)
|| clazz.equals(DictionaryEncodedValueIndex.class)) {

View File

@ -32,7 +32,6 @@ import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.ProgressIndicator;
import org.apache.druid.segment.column.StringEncodingStrategies;
import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSerializer;
import org.apache.druid.segment.data.CompressionStrategy;
@ -69,7 +68,6 @@ public class ScalarStringColumnSerializer extends NestedCommonFormatColumnSerial
String name,
IndexSpec indexSpec,
SegmentWriteOutMedium segmentWriteOutMedium,
@SuppressWarnings("unused") ProgressIndicator progressIndicator,
Closer closer
)
{

View File

@ -28,6 +28,7 @@ import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper;
import org.apache.druid.segment.column.BitmapColumnIndex;
import org.apache.druid.segment.column.ColumnBuilder;
import org.apache.druid.segment.column.ColumnConfig;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.NullValueIndex;
@ -56,7 +57,8 @@ public class VariantArrayColumnAndIndexSupplier implements Supplier<NestedCommon
ByteOrder byteOrder,
BitmapSerdeFactory bitmapSerdeFactory,
ByteBuffer bb,
ColumnBuilder columnBuilder
ColumnBuilder columnBuilder,
ColumnConfig columnConfig
)
{
final byte version = bb.get();
@ -157,6 +159,10 @@ public class VariantArrayColumnAndIndexSupplier implements Supplier<NestedCommon
arrayDictionarybuffer,
byteOrder
);
final int size;
try (ColumnarInts throwAway = ints.get()) {
size = throwAway.size();
}
return new VariantArrayColumnAndIndexSupplier(
logicalType,
stringDictionary,
@ -166,7 +172,9 @@ public class VariantArrayColumnAndIndexSupplier implements Supplier<NestedCommon
arrayDictionarySupplier,
ints,
valueIndexes,
bitmapSerdeFactory.getBitmapFactory()
bitmapSerdeFactory.getBitmapFactory(),
columnConfig,
size
);
}
catch (IOException ex) {
@ -199,7 +207,9 @@ public class VariantArrayColumnAndIndexSupplier implements Supplier<NestedCommon
Supplier<FrontCodedIntArrayIndexed> arrayDictionarySupplier,
Supplier<ColumnarInts> encodedValueColumnSupplier,
GenericIndexed<ImmutableBitmap> valueIndexes,
@SuppressWarnings("unused") BitmapFactory bitmapFactory
@SuppressWarnings("unused") BitmapFactory bitmapFactory,
@SuppressWarnings("unused") ColumnConfig columnConfig,
@SuppressWarnings("unused") int numRows
)
{
this.logicalType = logicalType;

View File

@ -33,7 +33,6 @@ import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.ProgressIndicator;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.StringEncodingStrategies;
import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSerializer;
@ -83,7 +82,6 @@ public class VariantArrayColumnSerializer extends NestedCommonFormatColumnSerial
String name,
IndexSpec indexSpec,
SegmentWriteOutMedium segmentWriteOutMedium,
@SuppressWarnings("unused") ProgressIndicator progressIndicator,
Closer closer
)
{

View File

@ -110,7 +110,8 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde
byteOrder,
bitmapSerdeFactory,
buffer,
builder
builder,
columnConfig
);
ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder();
capabilitiesBuilder.setDictionaryEncoded(true);
@ -128,7 +129,8 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde
byteOrder,
bitmapSerdeFactory,
buffer,
builder
builder,
columnConfig
);
ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder();
capabilitiesBuilder.setDictionaryEncoded(true);
@ -146,7 +148,8 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde
byteOrder,
bitmapSerdeFactory,
buffer,
builder
builder,
columnConfig
);
ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder();
capabilitiesBuilder.setDictionaryEncoded(true);
@ -165,7 +168,8 @@ public class NestedCommonFormatColumnPartSerde implements ColumnPartSerde
byteOrder,
bitmapSerdeFactory,
buffer,
builder
builder,
columnConfig
);
ColumnCapabilitiesImpl capabilitiesBuilder = builder.getCapabilitiesBuilder();
capabilitiesBuilder.setDictionaryEncoded(true);

View File

@ -24,6 +24,7 @@ import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.query.BitmapResultFactory;
@ -503,6 +504,7 @@ public class ListFilteredVirtualColumn implements VirtualColumn
}
@Override
@Nullable
public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory)
{
return new SimpleBitmapColumnIndex()
@ -542,6 +544,7 @@ public class ListFilteredVirtualColumn implements VirtualColumn
}
@Override
@Nullable
public BitmapColumnIndex forRange(
@Nullable String startValue,
boolean startStrict,
@ -553,6 +556,7 @@ public class ListFilteredVirtualColumn implements VirtualColumn
}
@Override
@Nullable
public BitmapColumnIndex forRange(
@Nullable String startValue,
boolean startStrict,
@ -621,6 +625,12 @@ public class ListFilteredVirtualColumn implements VirtualColumn
return delegate.getValue(idMapping.getReverseId(index));
}
@Override
public BitmapFactory getBitmapFactory()
{
return delegate.getBitmapFactory();
}
@Override
public ImmutableBitmap getBitmap(int idx)
{

View File

@ -40,7 +40,6 @@ import org.apache.druid.query.filter.SelectorPredicateFactory;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.AutoTypeColumnIndexer;
import org.apache.druid.segment.AutoTypeColumnMerger;
import org.apache.druid.segment.BaseProgressIndicator;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.IndexSpec;
@ -49,6 +48,7 @@ import org.apache.druid.segment.ObjectColumnSelector;
import org.apache.druid.segment.SimpleAscendingOffset;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.column.ColumnBuilder;
import org.apache.druid.segment.column.ColumnConfig;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.DruidPredicateIndex;
@ -94,6 +94,27 @@ public class NestedDataColumnSupplierTest extends InitializedNullHandlingTest
private static final String NO_MATCH = "no";
private static final ColumnConfig ALWAYS_USE_INDEXES = new ColumnConfig()
{
@Override
public int columnCacheSizeBytes()
{
return 0;
}
@Override
public double skipValueRangeIndexScale()
{
return 1.0;
}
@Override
public double skipValuePredicateIndexScale()
{
return 1.0;
}
};
@Rule
public final TemporaryFolder tempFolder = new TemporaryFolder();
@ -172,7 +193,6 @@ public class NestedDataColumnSupplierTest extends InitializedNullHandlingTest
fileNameBase,
new IndexSpec(),
writeOutMediumFactory.makeSegmentWriteOutMedium(tempFolder.newFolder()),
new BaseProgressIndicator(),
closer
);
@ -230,7 +250,7 @@ public class NestedDataColumnSupplierTest extends InitializedNullHandlingTest
false,
baseBuffer,
bob,
() -> 0,
ALWAYS_USE_INDEXES,
bitmapSerdeFactory,
ByteOrder.nativeOrder()
);
@ -267,7 +287,7 @@ public class NestedDataColumnSupplierTest extends InitializedNullHandlingTest
false,
baseBuffer,
bob,
() -> 0,
ALWAYS_USE_INDEXES,
bitmapSerdeFactory,
ByteOrder.nativeOrder()
);

View File

@ -37,7 +37,6 @@ import org.apache.druid.java.util.common.io.smoosh.SmooshedWriter;
import org.apache.druid.query.DefaultBitmapResultFactory;
import org.apache.druid.query.filter.SelectorPredicateFactory;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.BaseProgressIndicator;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.IndexSpec;
@ -167,7 +166,6 @@ public class NestedDataColumnSupplierV4Test extends InitializedNullHandlingTest
fileNameBase,
new IndexSpec(),
writeOutMediumFactory.makeSegmentWriteOutMedium(tempFolder.newFolder()),
new BaseProgressIndicator(),
closer
);
@ -219,7 +217,7 @@ public class NestedDataColumnSupplierV4Test extends InitializedNullHandlingTest
NestedDataColumnSupplierV4 supplier = NestedDataColumnSupplierV4.read(
baseBuffer,
bob,
() -> 0,
NestedFieldColumnIndexSupplierTest.ALWAYS_USE_INDEXES,
NestedDataComplexTypeSerde.OBJECT_MAPPER,
new OnlyPositionalReadsTypeStrategy<>(ColumnType.LONG.getStrategy()),
new OnlyPositionalReadsTypeStrategy<>(ColumnType.DOUBLE.getStrategy())
@ -238,7 +236,7 @@ public class NestedDataColumnSupplierV4Test extends InitializedNullHandlingTest
NestedDataColumnSupplierV4 supplier = NestedDataColumnSupplierV4.read(
baseBuffer,
bob,
() -> 0,
NestedFieldColumnIndexSupplierTest.ALWAYS_USE_INDEXES,
NestedDataComplexTypeSerde.OBJECT_MAPPER
);
final String expectedReason = "none";

View File

@ -35,7 +35,6 @@ import org.apache.druid.query.DefaultBitmapResultFactory;
import org.apache.druid.query.filter.SelectorPredicateFactory;
import org.apache.druid.segment.AutoTypeColumnIndexer;
import org.apache.druid.segment.AutoTypeColumnMerger;
import org.apache.druid.segment.BaseProgressIndicator;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.IndexableAdapter;
@ -125,7 +124,6 @@ public class ScalarDoubleColumnSupplierTest extends InitializedNullHandlingTest
fileNameBase,
new IndexSpec(),
writeOutMediumFactory.makeSegmentWriteOutMedium(tempFolder.newFolder()),
new BaseProgressIndicator(),
closer
);
@ -182,7 +180,8 @@ public class ScalarDoubleColumnSupplierTest extends InitializedNullHandlingTest
ByteOrder.nativeOrder(),
bitmapSerdeFactory,
baseBuffer,
bob
bob,
NestedFieldColumnIndexSupplierTest.ALWAYS_USE_INDEXES
);
try (ScalarDoubleColumn column = (ScalarDoubleColumn) supplier.get()) {
smokeTest(supplier, column);
@ -199,7 +198,8 @@ public class ScalarDoubleColumnSupplierTest extends InitializedNullHandlingTest
ByteOrder.nativeOrder(),
bitmapSerdeFactory,
baseBuffer,
bob
bob,
NestedFieldColumnIndexSupplierTest.ALWAYS_USE_INDEXES
);
final String expectedReason = "none";
final AtomicReference<String> failureReason = new AtomicReference<>(expectedReason);

View File

@ -35,7 +35,6 @@ import org.apache.druid.query.DefaultBitmapResultFactory;
import org.apache.druid.query.filter.SelectorPredicateFactory;
import org.apache.druid.segment.AutoTypeColumnIndexer;
import org.apache.druid.segment.AutoTypeColumnMerger;
import org.apache.druid.segment.BaseProgressIndicator;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.IndexableAdapter;
@ -125,7 +124,6 @@ public class ScalarLongColumnSupplierTest extends InitializedNullHandlingTest
fileNameBase,
new IndexSpec(),
writeOutMediumFactory.makeSegmentWriteOutMedium(tempFolder.newFolder()),
new BaseProgressIndicator(),
closer
);
@ -182,7 +180,8 @@ public class ScalarLongColumnSupplierTest extends InitializedNullHandlingTest
ByteOrder.nativeOrder(),
bitmapSerdeFactory,
baseBuffer,
bob
bob,
NestedFieldColumnIndexSupplierTest.ALWAYS_USE_INDEXES
);
try (ScalarLongColumn column = (ScalarLongColumn) supplier.get()) {
smokeTest(supplier, column);
@ -199,7 +198,8 @@ public class ScalarLongColumnSupplierTest extends InitializedNullHandlingTest
ByteOrder.nativeOrder(),
bitmapSerdeFactory,
baseBuffer,
bob
bob,
NestedFieldColumnIndexSupplierTest.ALWAYS_USE_INDEXES
);
final String expectedReason = "none";
final AtomicReference<String> failureReason = new AtomicReference<>(expectedReason);

View File

@ -36,7 +36,6 @@ import org.apache.druid.query.DefaultBitmapResultFactory;
import org.apache.druid.query.filter.SelectorPredicateFactory;
import org.apache.druid.segment.AutoTypeColumnIndexer;
import org.apache.druid.segment.AutoTypeColumnMerger;
import org.apache.druid.segment.BaseProgressIndicator;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.IndexSpec;
@ -128,7 +127,6 @@ public class ScalarStringColumnSupplierTest extends InitializedNullHandlingTest
fileNameBase,
new IndexSpec(),
writeOutMediumFactory.makeSegmentWriteOutMedium(tempFolder.newFolder()),
new BaseProgressIndicator(),
closer
);
@ -185,7 +183,8 @@ public class ScalarStringColumnSupplierTest extends InitializedNullHandlingTest
ByteOrder.nativeOrder(),
bitmapSerdeFactory,
baseBuffer,
bob
bob,
NestedFieldColumnIndexSupplierTest.ALWAYS_USE_INDEXES
);
try (ScalarStringDictionaryEncodedColumn column = (ScalarStringDictionaryEncodedColumn) supplier.get()) {
smokeTest(supplier, column);
@ -202,7 +201,8 @@ public class ScalarStringColumnSupplierTest extends InitializedNullHandlingTest
ByteOrder.nativeOrder(),
bitmapSerdeFactory,
baseBuffer,
bob
bob,
NestedFieldColumnIndexSupplierTest.ALWAYS_USE_INDEXES
);
final String expectedReason = "none";
final AtomicReference<String> failureReason = new AtomicReference<>(expectedReason);

View File

@ -33,7 +33,6 @@ import org.apache.druid.java.util.common.io.smoosh.SmooshedWriter;
import org.apache.druid.query.DefaultBitmapResultFactory;
import org.apache.druid.segment.AutoTypeColumnIndexer;
import org.apache.druid.segment.AutoTypeColumnMerger;
import org.apache.druid.segment.BaseProgressIndicator;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.IndexableAdapter;
@ -122,7 +121,6 @@ public class VariantArrayColumnSupplierTest extends InitializedNullHandlingTest
fileNameBase,
new IndexSpec(),
writeOutMediumFactory.makeSegmentWriteOutMedium(tempFolder.newFolder()),
new BaseProgressIndicator(),
closer
);
@ -180,7 +178,8 @@ public class VariantArrayColumnSupplierTest extends InitializedNullHandlingTest
ByteOrder.nativeOrder(),
bitmapSerdeFactory,
baseBuffer,
bob
bob,
NestedFieldColumnIndexSupplierTest.ALWAYS_USE_INDEXES
);
try (VariantArrayColumn column = (VariantArrayColumn) supplier.get()) {
smokeTest(supplier, column);
@ -198,7 +197,8 @@ public class VariantArrayColumnSupplierTest extends InitializedNullHandlingTest
ByteOrder.nativeOrder(),
bitmapSerdeFactory,
baseBuffer,
bob
bob,
NestedFieldColumnIndexSupplierTest.ALWAYS_USE_INDEXES
);
final String expectedReason = "none";
final AtomicReference<String> failureReason = new AtomicReference<>(expectedReason);

View File

@ -2661,6 +2661,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
.build()
),
ImmutableList.of(
new Object[]{NullHandling.defaultStringValue(), 4L},
new Object[]{"100", 2L}
),
RowSignature.builder()
@ -2922,6 +2923,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest
.build()
),
ImmutableList.of(
new Object[]{NullHandling.defaultStringValue(), 4L},
new Object[]{"2.02", 2L}
),
RowSignature.builder()