split out null value index (#12627)

* split out null value index

* gg spotbugs

* fix stuff
This commit is contained in:
Clint Wylie 2022-06-17 15:29:23 -07:00 committed by GitHub
parent 893759de91
commit 18937ffee2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 504 additions and 159 deletions

View File

@ -208,7 +208,10 @@ public class SqlExpressionBenchmark
// 40: LATEST aggregator double
"SELECT LATEST(float3) FROM foo",
// 41: LATEST aggregator double
"SELECT LATEST(float3), LATEST(long1), LATEST(double4) FROM foo"
"SELECT LATEST(float3), LATEST(long1), LATEST(double4) FROM foo",
// 42,43: filter numeric nulls
"SELECT SUM(long5) FROM foo WHERE long5 IS NOT NULL",
"SELECT string2, SUM(long5) FROM foo WHERE long5 IS NOT NULL GROUP BY 1"
);
@Param({"5000000"})
@ -229,7 +232,7 @@ public class SqlExpressionBenchmark
"4",
"5",
"6",
// expressions
// expressions, etc
"7",
"8",
"9",
@ -264,7 +267,9 @@ public class SqlExpressionBenchmark
"38",
"39",
"40",
"41"
"41",
"42",
"43"
})
private String query;

View File

@ -64,7 +64,6 @@ final class PredicateFilteredDimensionSelector extends AbstractDimensionSelector
@Override
public ValueMatcher makeValueMatcher(final String value)
{
final boolean matchNull = predicate.apply(null);
return new ValueMatcher()
{
@Override
@ -82,8 +81,8 @@ final class PredicateFilteredDimensionSelector extends AbstractDimensionSelector
nullRow = false;
}
}
// null should match empty rows in multi-value columns if predicate matches null
return nullRow && value == null && matchNull;
// null should match empty rows in multi-value columns
return nullRow && value == null;
}
@Override
@ -98,7 +97,7 @@ final class PredicateFilteredDimensionSelector extends AbstractDimensionSelector
@Override
public ValueMatcher makeValueMatcher(final Predicate<String> matcherPredicate)
{
final boolean matchNull = predicate.apply(null) && matcherPredicate.apply(null);
final boolean matchNull = matcherPredicate.apply(null);
return new ValueMatcher()
{
@Override

View File

@ -21,8 +21,11 @@ package org.apache.druid.segment.column;
import com.google.common.base.Preconditions;
import com.google.common.base.Supplier;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper;
import org.apache.druid.segment.serde.NoIndexesColumnIndexSupplier;
import org.apache.druid.segment.serde.NullValueIndexSupplier;
import javax.annotation.Nullable;
@ -71,6 +74,7 @@ public class ColumnBuilder
public ColumnBuilder setDictionaryEncodedColumnSupplier(Supplier<? extends DictionaryEncodedColumn<?>> columnSupplier)
{
checkColumnSupplierNotSet();
this.columnSupplier = columnSupplier;
this.capabilitiesBuilder.setDictionaryEncoded(true);
this.capabilitiesBuilder.setDictionaryValuesSorted(true);
@ -87,12 +91,14 @@ public class ColumnBuilder
public ColumnBuilder setComplexColumnSupplier(Supplier<? extends ComplexColumn> columnSupplier)
{
checkColumnSupplierNotSet();
this.columnSupplier = columnSupplier;
return this;
}
public ColumnBuilder setNumericColumnSupplier(Supplier<? extends NumericColumn> columnSupplier)
{
checkColumnSupplierNotSet();
this.columnSupplier = columnSupplier;
return this;
}
@ -103,12 +109,20 @@ public class ColumnBuilder
boolean hasSpatial
)
{
checkIndexSupplierNotSet();
this.indexSupplier = indexSupplier;
capabilitiesBuilder.setHasBitmapIndexes(hasBitmapIndex);
capabilitiesBuilder.setHasSpatialIndexes(hasSpatial);
return this;
}
public ColumnBuilder setNullValueIndexSupplier(ImmutableBitmap nullValueIndex)
{
checkIndexSupplierNotSet();
this.indexSupplier = new NullValueIndexSupplier(nullValueIndex);
return this;
}
public ColumnBuilder setHasNulls(boolean nullable)
{
this.capabilitiesBuilder.setHasNulls(nullable);
@ -126,4 +140,19 @@ public class ColumnBuilder
return new SimpleColumnHolder(capabilitiesBuilder, columnSupplier, indexSupplier);
}
private void checkColumnSupplierNotSet()
{
if (columnSupplier != null) {
throw new ISE("Column supplier already set!");
}
}
private void checkIndexSupplierNotSet()
{
//noinspection ObjectEquality
if (indexSupplier != NoIndexesColumnIndexSupplier.getInstance()) {
throw new ISE("Index supplier already set!");
}
}
}

View File

@ -25,12 +25,15 @@ import javax.annotation.Nullable;
/**
* An optimized column value {@link BitmapColumnIndex} provider for columns which are stored in 'lexicographical' order,
* allowing short-circuit processing of string value ranges.
* allowing short-circuit processing of string value ranges. This index does not match null values, union the results
* of this index with {@link NullValueIndex} if null values should be considered part of the value range.
*/
public interface LexicographicalRangeIndex
{
/**
* Get a {@link BitmapColumnIndex} corresponding to the values supplied in the specified range.
* Get a {@link BitmapColumnIndex} corresponding to the values supplied in the specified range. If supplied starting
* value is null, the range will begin at the first non-null value in the underlying value dictionary. If the end
* value is null, the range will extend to the last value in the underlying value dictionary.
*/
BitmapColumnIndex forRange(
@Nullable String startValue,
@ -41,7 +44,9 @@ public interface LexicographicalRangeIndex
/**
* Get a {@link BitmapColumnIndex} corresponding to the values supplied in the specified range whose dictionary ids
* also match some predicate, such as to match a prefix.
* also match some predicate, such as to match a prefix. If supplied starting value is null, the range will begin at
* the first non-null value in the underlying value dictionary that matches the predicate. If the end value is null,
* the range will extend to the last value in the underlying value dictionary that matches the predicate.
*
* If the provided {@code} matcher is always true, it's better to use the other
* {@link #forRange(String, boolean, String, boolean)} method.

View File

@ -0,0 +1,28 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.column;
/**
* Provides index for all null rows in a column, to use with IS/IS NOT NULL filters
*/
public interface NullValueIndex
{
BitmapColumnIndex forNull();
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.column;
/**
* {@link BitmapColumnIndex} with Druids "default" {@link ColumnIndexCapabilities}.
*/
public abstract class SimpleBitmapColumnIndex implements BitmapColumnIndex
{
public static final ColumnIndexCapabilities CAPABILITIES = new SimpleColumnIndexCapabilities(true, true);
@Override
public ColumnIndexCapabilities getIndexCapabilities()
{
return CAPABILITIES;
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.column;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.query.BitmapResultFactory;
/**
* {@link SimpleBitmapColumnIndex} which wraps a single {@link ImmutableBitmap}
*/
public final class SimpleImmutableBitmapIndex extends SimpleBitmapColumnIndex
{
private final ImmutableBitmap bitmap;
public SimpleImmutableBitmapIndex(ImmutableBitmap bitmap)
{
this.bitmap = bitmap;
}
@Override
public double estimateSelectivity(int totalRows)
{
return Math.min(1, (double) bitmap.size() / totalRows);
}
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory)
{
return bitmapResultFactory.wrapDimensionValue(bitmap);
}
}

View File

@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.column;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.query.BitmapResultFactory;
import org.apache.druid.segment.filter.Filters;
/**
* {@link SimpleBitmapColumnIndex} for anything which can compute an {@link Iterable<ImmutableBitmap>} in some manner
*/
public abstract class SimpleImmutableBitmapIterableIndex extends SimpleBitmapColumnIndex
{
@Override
public double estimateSelectivity(int totalRows)
{
return Filters.estimateSelectivity(getBitmapIterable().iterator(), totalRows);
}
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory)
{
return bitmapResultFactory.unionDimensionValueBitmaps(getBitmapIterable());
}
protected abstract Iterable<ImmutableBitmap> getBitmapIterable();
}

View File

@ -22,8 +22,10 @@ package org.apache.druid.segment.filter;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Predicate;
import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableList;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.query.BitmapResultFactory;
import org.apache.druid.query.extraction.ExtractionFn;
import org.apache.druid.query.filter.BoundDimFilter;
import org.apache.druid.query.filter.ColumnIndexSelector;
@ -42,8 +44,10 @@ import org.apache.druid.segment.ColumnProcessors;
import org.apache.druid.segment.ColumnSelector;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.column.BitmapColumnIndex;
import org.apache.druid.segment.column.ColumnIndexCapabilities;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.LexicographicalRangeIndex;
import org.apache.druid.segment.column.NullValueIndex;
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
import javax.annotation.Nullable;
@ -81,12 +85,50 @@ public class BoundFilter implements Filter
// column
return null;
}
return rangeIndex.forRange(
final BitmapColumnIndex rangeBitmaps = rangeIndex.forRange(
boundDimFilter.getLower(),
boundDimFilter.isLowerStrict(),
boundDimFilter.getUpper(),
boundDimFilter.isUpperStrict()
);
// preserve sad backwards compatible behavior where bound filter matches 'null' if the lower bound is not set
if (boundDimFilter.hasLowerBound() && !NullHandling.isNullOrEquivalent(boundDimFilter.getLower())) {
return rangeBitmaps;
} else {
final NullValueIndex nulls = indexSupplier.as(NullValueIndex.class);
if (nulls == null) {
return null;
}
final BitmapColumnIndex nullBitmap = nulls.forNull();
return new BitmapColumnIndex()
{
@Override
public ColumnIndexCapabilities getIndexCapabilities()
{
return rangeBitmaps.getIndexCapabilities().merge(nullBitmap.getIndexCapabilities());
}
@Override
public double estimateSelectivity(int totalRows)
{
return Math.min(
1.0,
rangeBitmaps.estimateSelectivity(totalRows) + nullBitmap.estimateSelectivity(totalRows)
);
}
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory)
{
return bitmapResultFactory.union(
ImmutableList.of(
rangeBitmaps.computeBitmapResult(bitmapResultFactory),
nullBitmap.computeBitmapResult(bitmapResultFactory)
)
);
}
};
}
} else {
return Filters.makePredicateIndex(boundDimFilter.getDimension(), selector, getPredicateFactory());
}

View File

@ -35,6 +35,7 @@ import org.apache.druid.segment.ColumnSelector;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.column.BitmapColumnIndex;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.NullValueIndex;
import org.apache.druid.segment.column.StringValueSetIndex;
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
@ -83,16 +84,25 @@ public class SelectorFilter implements Filter
if (!Filters.checkFilterTuningUseIndex(dimension, selector, filterTuning)) {
return null;
}
final boolean isNull = NullHandling.isNullOrEquivalent(value);
final ColumnIndexSupplier indexSupplier = selector.getIndexSupplier(dimension);
if (indexSupplier == null) {
return Filters.makeNullIndex(NullHandling.isNullOrEquivalent(value), selector);
return Filters.makeNullIndex(isNull, selector);
}
final StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class);
if (valueSetIndex == null) {
// column exists, but has no index
return null;
if (isNull) {
final NullValueIndex nullValueIndex = indexSupplier.as(NullValueIndex.class);
if (nullValueIndex == null) {
return null;
}
return nullValueIndex.forNull();
} else {
final StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class);
if (valueSetIndex == null) {
// column exists, but has no index
return null;
}
return valueSetIndex.forValue(value);
}
return valueSetIndex.forValue(value);
}
@Override

View File

@ -33,18 +33,19 @@ import org.apache.druid.query.BitmapResultFactory;
import org.apache.druid.query.filter.DruidPredicateFactory;
import org.apache.druid.segment.IntListUtils;
import org.apache.druid.segment.column.BitmapColumnIndex;
import org.apache.druid.segment.column.ColumnIndexCapabilities;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex;
import org.apache.druid.segment.column.DruidPredicateIndex;
import org.apache.druid.segment.column.LexicographicalRangeIndex;
import org.apache.druid.segment.column.SimpleColumnIndexCapabilities;
import org.apache.druid.segment.column.NullValueIndex;
import org.apache.druid.segment.column.SimpleBitmapColumnIndex;
import org.apache.druid.segment.column.SimpleImmutableBitmapIndex;
import org.apache.druid.segment.column.SimpleImmutableBitmapIterableIndex;
import org.apache.druid.segment.column.SpatialIndex;
import org.apache.druid.segment.column.StringValueSetIndex;
import org.apache.druid.segment.column.Utf8ValueSetIndex;
import org.apache.druid.segment.data.GenericIndexed;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.filter.Filters;
import javax.annotation.Nullable;
import java.nio.ByteBuffer;
@ -54,8 +55,6 @@ import java.util.SortedSet;
public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
{
public static final ColumnIndexCapabilities CAPABILITIES = new SimpleColumnIndexCapabilities(true, true);
private final BitmapFactory bitmapFactory;
private final GenericIndexed<String> dictionary;
private final GenericIndexed<ByteBuffer> dictionaryUtf8;
@ -84,35 +83,38 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
@SuppressWarnings("unchecked")
public <T> T as(Class<T> clazz)
{
if (bitmaps != null && clazz.equals(StringValueSetIndex.class)) {
return (T) new GenericIndexedDictionaryEncodedStringValueSetIndex(bitmapFactory, dictionaryUtf8, bitmaps);
} else if (bitmaps != null && clazz.equals(Utf8ValueSetIndex.class)) {
return (T) new GenericIndexedDictionaryEncodedStringValueSetIndex(bitmapFactory, dictionaryUtf8, bitmaps);
} else if (bitmaps != null && clazz.equals(DruidPredicateIndex.class)) {
return (T) new GenericIndexedDictionaryEncodedStringDruidPredicateIndex(bitmapFactory, dictionary, bitmaps);
} else if (bitmaps != null && clazz.equals(LexicographicalRangeIndex.class)) {
return (T) new GenericIndexedDictionaryEncodedColumnLexicographicalRangeIndex(
bitmapFactory,
dictionaryUtf8,
bitmaps
);
} else if (bitmaps != null && clazz.equals(DictionaryEncodedStringValueIndex.class)) {
return (T) new GenericIndexedDictionaryEncodedStringValueIndex(bitmapFactory, dictionary, bitmaps);
} else if (indexedTree != null && clazz.equals(SpatialIndex.class)) {
if (bitmaps != null) {
if (clazz.equals(NullValueIndex.class)) {
final BitmapColumnIndex nullIndex;
if (NullHandling.isNullOrEquivalent(dictionary.get(0))) {
nullIndex = new SimpleImmutableBitmapIndex(bitmaps.get(0));
} else {
nullIndex = new SimpleImmutableBitmapIndex(bitmapFactory.makeEmptyImmutableBitmap());
}
return (T) (NullValueIndex) () -> nullIndex;
} else if (clazz.equals(StringValueSetIndex.class)) {
return (T) new GenericIndexedDictionaryEncodedStringValueSetIndex(bitmapFactory, dictionaryUtf8, bitmaps);
} else if (clazz.equals(Utf8ValueSetIndex.class)) {
return (T) new GenericIndexedDictionaryEncodedStringValueSetIndex(bitmapFactory, dictionaryUtf8, bitmaps);
} else if (clazz.equals(DruidPredicateIndex.class)) {
return (T) new GenericIndexedDictionaryEncodedStringDruidPredicateIndex(bitmapFactory, dictionary, bitmaps);
} else if (clazz.equals(LexicographicalRangeIndex.class)) {
return (T) new GenericIndexedDictionaryEncodedColumnLexicographicalRangeIndex(
bitmapFactory,
dictionaryUtf8,
bitmaps,
NullHandling.isNullOrEquivalent(dictionary.get(0))
);
} else if (clazz.equals(DictionaryEncodedStringValueIndex.class)) {
return (T) new GenericIndexedDictionaryEncodedStringValueIndex(bitmapFactory, dictionary, bitmaps);
}
}
if (indexedTree != null && clazz.equals(SpatialIndex.class)) {
return (T) (SpatialIndex) () -> indexedTree;
}
return null;
}
private abstract static class DictionaryEncodedStringBitmapColumnIndex implements BitmapColumnIndex
{
@Override
public ColumnIndexCapabilities getIndexCapabilities()
{
return CAPABILITIES;
}
}
private abstract static class BaseGenericIndexedDictionaryEncodedIndex<T>
{
protected final BitmapFactory bitmapFactory;
@ -196,7 +198,7 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
@Override
public BitmapColumnIndex forValue(@Nullable String value)
{
return new DictionaryEncodedStringBitmapColumnIndex()
return new SimpleBitmapColumnIndex()
{
@Override
public double estimateSelectivity(int totalRows)
@ -251,21 +253,10 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
*/
private BitmapColumnIndex getBitmapColumnIndexForSortedIterableUtf8(Iterable<ByteBuffer> valuesUtf8)
{
return new DictionaryEncodedStringBitmapColumnIndex()
return new SimpleImmutableBitmapIterableIndex()
{
@Override
public double estimateSelectivity(int totalRows)
{
return Filters.estimateSelectivity(getBitmapsIterable().iterator(), totalRows);
}
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory)
{
return bitmapResultFactory.unionDimensionValueBitmaps(getBitmapsIterable());
}
private Iterable<ImmutableBitmap> getBitmapsIterable()
public Iterable<ImmutableBitmap> getBitmapIterable()
{
final int dictionarySize = dictionary.size();
@ -332,24 +323,10 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
@Override
public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory)
{
return new DictionaryEncodedStringBitmapColumnIndex()
return new SimpleImmutableBitmapIterableIndex()
{
@Override
public double estimateSelectivity(int totalRows)
{
return Filters.estimateSelectivity(
getBitmapIterable().iterator(),
totalRows
);
}
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory)
{
return bitmapResultFactory.unionDimensionValueBitmaps(getBitmapIterable());
}
private Iterable<ImmutableBitmap> getBitmapIterable()
public Iterable<ImmutableBitmap> getBitmapIterable()
{
return () -> new Iterator<ImmutableBitmap>()
{
@ -406,14 +383,17 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
public static final class GenericIndexedDictionaryEncodedColumnLexicographicalRangeIndex
extends BaseGenericIndexedDictionaryEncodedIndex<ByteBuffer> implements LexicographicalRangeIndex
{
private final boolean hasNull;
public GenericIndexedDictionaryEncodedColumnLexicographicalRangeIndex(
BitmapFactory bitmapFactory,
GenericIndexed<ByteBuffer> dictionary,
GenericIndexed<ImmutableBitmap> bitmaps
GenericIndexed<ImmutableBitmap> bitmaps,
boolean hasNull
)
{
super(bitmapFactory, dictionary, bitmaps);
this.hasNull = hasNull;
}
@Override
@ -424,21 +404,10 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
boolean endStrict
)
{
return new DictionaryEncodedStringBitmapColumnIndex()
return new SimpleImmutableBitmapIterableIndex()
{
@Override
public double estimateSelectivity(int totalRows)
{
return Filters.estimateSelectivity(getBitmapIterable().iterator(), totalRows);
}
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory)
{
return bitmapResultFactory.unionDimensionValueBitmaps(getBitmapIterable());
}
private Iterable<ImmutableBitmap> getBitmapIterable()
public Iterable<ImmutableBitmap> getBitmapIterable()
{
final IntIntPair range = getRange(startValue, startStrict, endValue, endStrict);
final int start = range.leftInt(), end = range.rightInt();
@ -471,22 +440,10 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
Predicate<String> matcher
)
{
return new DictionaryEncodedStringBitmapColumnIndex()
return new SimpleImmutableBitmapIterableIndex()
{
@Override
public double estimateSelectivity(int totalRows)
{
return Filters.estimateSelectivity(getBitmapIterable().iterator(), totalRows);
}
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory)
{
return bitmapResultFactory.unionDimensionValueBitmaps(getBitmapIterable());
}
private Iterable<ImmutableBitmap> getBitmapIterable()
public Iterable<ImmutableBitmap> getBitmapIterable()
{
final IntIntPair range = getRange(startValue, startStrict, endValue, endStrict);
final int start = range.leftInt(), end = range.rightInt();
@ -552,13 +509,14 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
boolean endStrict
)
{
final int firstValue = hasNull ? 1 : 0;
int startIndex, endIndex;
if (startValue == null) {
startIndex = 0;
startIndex = firstValue;
} else {
final String startValueToUse = NullHandling.emptyToNullIfNeeded(startValue);
final int found = dictionary.indexOf(StringUtils.toUtf8ByteBuffer(startValueToUse));
if (found >= 0) {
if (found >= firstValue) {
startIndex = startStrict ? found + 1 : found;
} else {
startIndex = -(found + 1);
@ -570,7 +528,7 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
} else {
final String endValueToUse = NullHandling.emptyToNullIfNeeded(endValue);
final int found = dictionary.indexOf(StringUtils.toUtf8ByteBuffer(endValueToUse));
if (found >= 0) {
if (found >= firstValue) {
endIndex = endStrict ? found : found + 1;
} else {
endIndex = -(found + 1);

View File

@ -163,7 +163,8 @@ public class DoubleNumericColumnPartSerdeV2 implements ColumnPartSerde
builder.setType(ValueType.DOUBLE)
.setHasMultipleValues(false)
.setHasNulls(hasNulls)
.setNumericColumnSupplier(new DoubleNumericColumnSupplier(column, bitmap));
.setNumericColumnSupplier(new DoubleNumericColumnSupplier(column, bitmap))
.setNullValueIndexSupplier(bitmap);
};
}
}

View File

@ -160,7 +160,8 @@ public class FloatNumericColumnPartSerdeV2 implements ColumnPartSerde
builder.setType(ValueType.FLOAT)
.setHasMultipleValues(false)
.setHasNulls(hasNulls)
.setNumericColumnSupplier(new FloatNumericColumnSupplier(column, bitmap));
.setNumericColumnSupplier(new FloatNumericColumnSupplier(column, bitmap))
.setNullValueIndexSupplier(bitmap);
};
}
}

View File

@ -162,7 +162,8 @@ public class LongNumericColumnPartSerdeV2 implements ColumnPartSerde
builder.setType(ValueType.LONG)
.setHasMultipleValues(false)
.setHasNulls(hasNulls)
.setNumericColumnSupplier(new LongNumericColumnSupplier(column, bitmap));
.setNumericColumnSupplier(new LongNumericColumnSupplier(column, bitmap))
.setNullValueIndexSupplier(bitmap);
};
}
}

View File

@ -0,0 +1,63 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.serde;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.segment.column.BitmapColumnIndex;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.NullValueIndex;
import org.apache.druid.segment.column.SimpleImmutableBitmapIndex;
import javax.annotation.Nullable;
/**
* {@link ColumnIndexSupplier} for columns which only have an {@link ImmutableBitmap} to indicate which rows only have
* null values, such as {@link LongNumericColumnPartSerdeV2}, {@link DoubleNumericColumnPartSerdeV2}, and
* {@link FloatNumericColumnPartSerdeV2}.
*
*/
public class NullValueIndexSupplier implements ColumnIndexSupplier
{
private final SimpleImmutableBitmapIndex nullValueIndex;
public NullValueIndexSupplier(ImmutableBitmap nullValueBitmap)
{
this.nullValueIndex = new SimpleImmutableBitmapIndex(nullValueBitmap);
}
@Nullable
@Override
public <T> T as(Class<T> clazz)
{
if (clazz.equals(NullValueIndex.class)) {
return (T) new NullableNumericNullValueIndex();
}
return null;
}
private final class NullableNumericNullValueIndex implements NullValueIndex
{
@Override
public BitmapColumnIndex forNull()
{
return nullValueIndex;
}
}
}

View File

@ -44,14 +44,15 @@ import org.apache.druid.segment.column.BitmapColumnIndex;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnIndexCapabilities;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.DictionaryEncodedStringValueIndex;
import org.apache.druid.segment.column.DruidPredicateIndex;
import org.apache.druid.segment.column.LexicographicalRangeIndex;
import org.apache.druid.segment.column.NullValueIndex;
import org.apache.druid.segment.column.SimpleBitmapColumnIndex;
import org.apache.druid.segment.column.SimpleImmutableBitmapIterableIndex;
import org.apache.druid.segment.column.StringValueSetIndex;
import org.apache.druid.segment.filter.Filters;
import org.apache.druid.segment.serde.DictionaryEncodedStringIndexSupplier;
import javax.annotation.Nullable;
import java.util.Collections;
@ -184,16 +185,26 @@ public class ListFilteredVirtualColumn implements VirtualColumn
@Override
public <T> T as(Class<T> clazz)
{
final ColumnHolder holder = columnSelector.getColumnHolder(delegate.getDimension());
if (holder == null) {
return null;
}
DictionaryEncodedStringValueIndex underlyingIndex = holder.getIndexSupplier().as(
// someday maybe we can have a better way to get row count..
final ColumnHolder time = columnSelector.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME);
final int numRows = time.getLength();
ColumnIndexSupplier indexSupplier = holder.getIndexSupplier();
if (indexSupplier == null) {
return null;
}
DictionaryEncodedStringValueIndex underlyingIndex = indexSupplier.as(
DictionaryEncodedStringValueIndex.class
);
if (underlyingIndex == null) {
return null;
}
final IdMapping idMapping;
if (allowList) {
idMapping = ListFilteredDimensionSpec.buildAllowListIdMapping(
@ -209,7 +220,10 @@ public class ListFilteredVirtualColumn implements VirtualColumn
underlyingIndex::getValue
);
}
if (clazz.equals(StringValueSetIndex.class)) {
if (clazz.equals(NullValueIndex.class)) {
return (T) new ListFilteredNullValueIndex(underlyingIndex, idMapping, numRows);
} else if (clazz.equals(StringValueSetIndex.class)) {
return (T) new ListFilteredStringValueSetIndex(underlyingIndex, idMapping);
} else if (clazz.equals(DruidPredicateIndex.class)) {
return (T) new ListFilteredDruidPredicateIndex(underlyingIndex, idMapping);
@ -254,15 +268,6 @@ public class ListFilteredVirtualColumn implements VirtualColumn
'}';
}
private abstract static class BaseVirtualIndex implements BitmapColumnIndex
{
@Override
public ColumnIndexCapabilities getIndexCapabilities()
{
return DictionaryEncodedStringIndexSupplier.CAPABILITIES;
}
}
private static class BaseListFilteredColumnIndex
{
final DictionaryEncodedStringValueIndex delegate;
@ -357,6 +362,46 @@ public class ListFilteredVirtualColumn implements VirtualColumn
}
}
private static class ListFilteredNullValueIndex extends BaseListFilteredColumnIndex implements NullValueIndex
{
private final int numRows;
private ListFilteredNullValueIndex(DictionaryEncodedStringValueIndex delegate, IdMapping idMapping, int numRows)
{
super(delegate, idMapping);
this.numRows = numRows;
}
@Override
public BitmapColumnIndex forNull()
{
return new SimpleImmutableBitmapIterableIndex()
{
@Override
public double estimateSelectivity(int totalRows)
{
return 1.0 - Filters.estimateSelectivity(getBitmapIterable().iterator(), totalRows);
}
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory)
{
return bitmapResultFactory.complement(
bitmapResultFactory.unionDimensionValueBitmaps(getBitmapIterable()),
numRows
);
}
@Override
protected Iterable<ImmutableBitmap> getBitmapIterable()
{
final int start = NullHandling.isNullOrEquivalent(delegate.getValue(idMapping.getReverseId(0))) ? 1 : 0;
return getBitmapsInRange(v -> true, start, idMapping.getValueCardinality());
}
};
}
}
private static class ListFilteredStringValueSetIndex extends BaseListFilteredColumnIndex
implements StringValueSetIndex
{
@ -372,9 +417,8 @@ public class ListFilteredVirtualColumn implements VirtualColumn
@Override
public BitmapColumnIndex forValue(@Nullable String value)
{
return new BaseVirtualIndex()
return new SimpleBitmapColumnIndex()
{
@Override
public double estimateSelectivity(int totalRows)
{
@ -401,21 +445,10 @@ public class ListFilteredVirtualColumn implements VirtualColumn
@Override
public BitmapColumnIndex forSortedValues(SortedSet<String> values)
{
return new BaseVirtualIndex()
return new SimpleImmutableBitmapIterableIndex()
{
@Override
public double estimateSelectivity(int totalRows)
{
return Filters.estimateSelectivity(getBitmapsIterable().iterator(), totalRows);
}
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory)
{
return bitmapResultFactory.unionDimensionValueBitmaps(getBitmapsIterable());
}
private Iterable<ImmutableBitmap> getBitmapsIterable()
public Iterable<ImmutableBitmap> getBitmapIterable()
{
return () -> new Iterator<ImmutableBitmap>()
{
@ -470,9 +503,8 @@ public class ListFilteredVirtualColumn implements VirtualColumn
@Override
public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory)
{
return new BaseVirtualIndex()
return new SimpleBitmapColumnIndex()
{
@Override
public double estimateSelectivity(int totalRows)
{
@ -527,22 +559,10 @@ public class ListFilteredVirtualColumn implements VirtualColumn
Predicate<String> matcher
)
{
return new BaseVirtualIndex()
return new SimpleImmutableBitmapIterableIndex()
{
@Override
public double estimateSelectivity(int totalRows)
{
return Filters.estimateSelectivity(getBitmapIterable().iterator(), totalRows);
}
@Override
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory)
{
return bitmapResultFactory.unionDimensionValueBitmaps(getBitmapIterable());
}
private Iterable<ImmutableBitmap> getBitmapIterable()
public Iterable<ImmutableBitmap> getBitmapIterable()
{
int startIndex, endIndex;
if (startValue == null) {

View File

@ -450,6 +450,12 @@ public class BoundFilterTest extends BaseFilterTest
new BoundDimFilter("allow-dim0", "0", "6", false, false, false, null, StringComparators.LEXICOGRAPHIC),
ImmutableList.of("3", "4")
);
// the bound filter matches null, so it is what it is...
assertFilterMatchesSkipVectorize(
new BoundDimFilter("allow-dim0", null, "6", false, false, false, null, StringComparators.LEXICOGRAPHIC),
ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7")
);
assertFilterMatchesSkipVectorize(
new BoundDimFilter("deny-dim0", "0", "6", false, false, false, null, StringComparators.LEXICOGRAPHIC),
ImmutableList.of("0", "1", "2", "5", "6")
@ -458,6 +464,11 @@ public class BoundFilterTest extends BaseFilterTest
new BoundDimFilter("deny-dim0", "3", "4", false, false, false, null, StringComparators.LEXICOGRAPHIC),
ImmutableList.of()
);
// the bound filter matches null, so it is what it is...
assertFilterMatchesSkipVectorize(
new BoundDimFilter("deny-dim0", null, "6", false, false, false, null, StringComparators.LEXICOGRAPHIC),
ImmutableList.of("0", "1", "2", "3", "4", "5", "6")
);
assertFilterMatchesSkipVectorize(
new BoundDimFilter("allow-dim2", "a", "c", false, false, false, null, StringComparators.LEXICOGRAPHIC),
@ -467,6 +478,11 @@ public class BoundFilterTest extends BaseFilterTest
new BoundDimFilter("allow-dim2", "c", "z", false, false, false, null, StringComparators.LEXICOGRAPHIC),
ImmutableList.of()
);
// the bound filter matches null, so it is what it is...
assertFilterMatchesSkipVectorize(
new BoundDimFilter("allow-dim2", null, "z", false, false, false, null, StringComparators.LEXICOGRAPHIC),
ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7")
);
assertFilterMatchesSkipVectorize(
new BoundDimFilter("deny-dim2", "a", "b", false, true, false, null, StringComparators.LEXICOGRAPHIC),
@ -476,6 +492,11 @@ public class BoundFilterTest extends BaseFilterTest
new BoundDimFilter("deny-dim2", "c", "z", false, false, false, null, StringComparators.LEXICOGRAPHIC),
ImmutableList.of("4", "7")
);
// the bound filter matches null, so it is what it is...
assertFilterMatchesSkipVectorize(
new BoundDimFilter("deny-dim2", null, "z", false, false, false, null, StringComparators.LEXICOGRAPHIC),
ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7")
);
}
@Test

View File

@ -272,6 +272,23 @@ public class LikeFilterTest extends BaseFilterTest
);
}
@Test
public void testListFilteredVirtualColumn()
{
assertFilterMatchesSkipVectorize(
new LikeDimFilter("allow-dim0", "1%", null, null),
ImmutableList.of()
);
assertFilterMatchesSkipVectorize(
new LikeDimFilter("allow-dim0", "3%", null, null),
ImmutableList.of("3")
);
assertFilterMatchesSkipVectorize(
new LikeDimFilter("allow-dim0", "%3", null, null),
ImmutableList.of("3")
);
}
@Test
public void testRequiredColumnRewrite()
{

View File

@ -120,13 +120,26 @@ public class SelectorFilterTest extends BaseFilterTest
{
assertFilterMatchesSkipVectorize(new SelectorDimFilter("allow-dim0", "1", null), ImmutableList.of());
assertFilterMatchesSkipVectorize(new SelectorDimFilter("allow-dim0", "4", null), ImmutableList.of("4"));
assertFilterMatchesSkipVectorize(new SelectorDimFilter("allow-dim0", null, null), ImmutableList.of("0", "1", "2", "5"));
assertFilterMatchesSkipVectorize(new SelectorDimFilter("deny-dim0", "0", null), ImmutableList.of("0"));
assertFilterMatchesSkipVectorize(new SelectorDimFilter("deny-dim0", "4", null), ImmutableList.of());
assertFilterMatchesSkipVectorize(new SelectorDimFilter("deny-dim0", null, null), ImmutableList.of("3", "4"));
assertFilterMatchesSkipVectorize(new SelectorDimFilter("allow-dim2", "b", null), ImmutableList.of());
assertFilterMatchesSkipVectorize(new SelectorDimFilter("allow-dim2", "a", null), ImmutableList.of("0", "3"));
assertFilterMatchesSkipVectorize(new SelectorDimFilter("allow-dim2", null, null), ImmutableList.of("1", "2", "4", "5"));
assertFilterMatchesSkipVectorize(new SelectorDimFilter("deny-dim2", "b", null), ImmutableList.of("0"));
assertFilterMatchesSkipVectorize(new SelectorDimFilter("deny-dim2", "a", null), ImmutableList.of());
if (NullHandling.replaceWithDefault()) {
assertFilterMatchesSkipVectorize(
new SelectorDimFilter("deny-dim2", null, null),
ImmutableList.of("1", "2", "3", "5")
);
} else {
assertFilterMatchesSkipVectorize(
new SelectorDimFilter("deny-dim2", null, null),
ImmutableList.of("1", "3", "5")
);
}
}
@Test

View File

@ -169,12 +169,15 @@ public class ListFilteredVirtualColumnSelectorTest extends InitializedNullHandli
ColumnSelector selector = EasyMock.createMock(ColumnSelector.class);
ColumnHolder holder = EasyMock.createMock(ColumnHolder.class);
ColumnHolder timeHolder = EasyMock.createMock(ColumnHolder.class);
DictionaryEncodedStringValueIndex index = EasyMock.createMock(DictionaryEncodedStringValueIndex.class);
ImmutableBitmap bitmap = EasyMock.createMock(ImmutableBitmap.class);
BitmapFactory bitmapFactory = EasyMock.createMock(BitmapFactory.class);
ColumnIndexSupplier indexSupplier = EasyMock.createMock(ColumnIndexSupplier.class);
EasyMock.expect(selector.getColumnHolder(COLUMN_NAME)).andReturn(holder).atLeastOnce();
EasyMock.expect(selector.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME)).andReturn(timeHolder).atLeastOnce();
EasyMock.expect(timeHolder.getLength()).andReturn(10).anyTimes();
EasyMock.expect(selector.getColumnCapabilities(COLUMN_NAME))
.andReturn(new ColumnCapabilitiesImpl().setType(ColumnType.STRING)
.setDictionaryEncoded(true)
@ -195,7 +198,7 @@ public class ListFilteredVirtualColumnSelectorTest extends InitializedNullHandli
EasyMock.expect(index.getBitmap(2)).andReturn(bitmap).once();
EasyMock.expect(index.hasNulls()).andReturn(true).once();
EasyMock.replay(selector, holder, indexSupplier, index, bitmap, bitmapFactory);
EasyMock.replay(selector, holder, timeHolder, indexSupplier, index, bitmap, bitmapFactory);
ColumnSelectorColumnIndexSelector bitmapIndexSelector = new ColumnSelectorColumnIndexSelector(
new RoaringBitmapFactory(),
@ -217,7 +220,7 @@ public class ListFilteredVirtualColumnSelectorTest extends InitializedNullHandli
Assert.assertEquals(bitmap, listFilteredIndex.getBitmap(1));
Assert.assertTrue(listFilteredIndex.hasNulls());
EasyMock.verify(selector, holder, indexSupplier, index, bitmap, bitmapFactory);
EasyMock.verify(selector, holder, timeHolder, indexSupplier, index, bitmap, bitmapFactory);
}
@Test
@ -233,12 +236,15 @@ public class ListFilteredVirtualColumnSelectorTest extends InitializedNullHandli
ColumnSelector selector = EasyMock.createMock(ColumnSelector.class);
ColumnHolder holder = EasyMock.createMock(ColumnHolder.class);
ColumnHolder timeHolder = EasyMock.createMock(ColumnHolder.class);
DictionaryEncodedStringValueIndex index = EasyMock.createMock(DictionaryEncodedStringValueIndex.class);
ImmutableBitmap bitmap = EasyMock.createMock(ImmutableBitmap.class);
ColumnIndexSupplier indexSupplier = EasyMock.createMock(ColumnIndexSupplier.class);
BitmapFactory bitmapFactory = EasyMock.createMock(BitmapFactory.class);
EasyMock.expect(selector.getColumnHolder(COLUMN_NAME)).andReturn(holder).atLeastOnce();
EasyMock.expect(selector.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME)).andReturn(timeHolder).atLeastOnce();
EasyMock.expect(timeHolder.getLength()).andReturn(10).anyTimes();
EasyMock.expect(selector.getColumnCapabilities(COLUMN_NAME))
.andReturn(new ColumnCapabilitiesImpl().setType(ColumnType.STRING)
.setDictionaryEncoded(true)
@ -256,7 +262,7 @@ public class ListFilteredVirtualColumnSelectorTest extends InitializedNullHandli
EasyMock.expect(index.getBitmap(0)).andReturn(bitmap).once();
EasyMock.expect(index.hasNulls()).andReturn(true).once();
EasyMock.replay(selector, holder, indexSupplier, index, bitmap, bitmapFactory);
EasyMock.replay(selector, holder, timeHolder, indexSupplier, index, bitmap, bitmapFactory);
ColumnSelectorColumnIndexSelector bitmapIndexSelector = new ColumnSelectorColumnIndexSelector(
new RoaringBitmapFactory(),
@ -276,7 +282,7 @@ public class ListFilteredVirtualColumnSelectorTest extends InitializedNullHandli
Assert.assertEquals(bitmap, listFilteredIndex.getBitmap(1));
Assert.assertTrue(listFilteredIndex.hasNulls());
EasyMock.verify(selector, holder, indexSupplier, index, bitmap, bitmapFactory);
EasyMock.verify(selector, holder, timeHolder, indexSupplier, index, bitmap, bitmapFactory);
}
private void assertCapabilities(VirtualizedColumnSelectorFactory selectorFactory, String columnName)