Properly read SQL-compatible segments in default-value mode. (#14142)

* Properly read SQL-compatible segments in default-value mode.

Main changes:

1) Dictionary-encoded and front-coded string columns: in default-value
   mode, detect cases where a dictionary has the empty string in it, then
   either combine it with null (if null is present) or replace it with
   null (if null is not present).

2) Numeric nullable columns: in default-value mode, ignore the null
   value bitmap. This causes all null numbers to be read as zeroes.

Testing strategy:

1) Add a mmappedWithSqlCompatibleNulls case to BaseFilterTest that
   writes segments under SQL-compatible mode, and reads them under
   default-value mode.

2) Unit tests for the new wrapper classes (CombineFirstTwoEntriesIndexed,
   CombineFirstTwoValuesColumnarInts, CombineFirstTwoValuesColumnarMultiInts,
   CombineFirstTwoValuesIndexedInts).

* Fix a mistake, use more singlethreadedness.

* WIP

* Tests, improvements.

* Style.

* See Spot bug.

* Remove unused method.

* Address review comments.

1) Read bitmaps even if we don't retain them.
2) Combine StringFrontCodedDictionaryEncodedColumn and ScalarStringDictionaryEncodedColumn.

* Add missing tests.
This commit is contained in:
Gian Merlino 2023-06-28 10:30:27 -07:00 committed by GitHub
parent 233233c92d
commit 82fbb31c7c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
31 changed files with 1553 additions and 533 deletions

View File

@ -22,8 +22,10 @@ package org.apache.druid.common.config;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Strings;
import com.google.inject.Inject;
import org.apache.druid.segment.data.Indexed;
import javax.annotation.Nullable;
import java.nio.ByteBuffer;
/**
* Helper class for NullHandling. This class is used to switch between SQL compatible Null Handling behavior
@ -163,4 +165,45 @@ public class NullHandling
{
return replaceWithDefault() ? Strings.isNullOrEmpty(value) : value == null;
}
public static boolean isNullOrEquivalent(@Nullable ByteBuffer buffer)
{
return buffer == null || (replaceWithDefault() && buffer.remaining() == 0);
}
/**
* Given a UTF-8 dictionary, returns whether the first two entries must be coalesced into a single null entry.
* This happens if we are in default-value mode and the first two entries are null and empty string.
*
* This and {@link #mustReplaceFirstValueWithNullInDictionary(Indexed)} are never both true.
*
* Provided to enable compatibility for segments written under {@link #sqlCompatible()} mode but
* read under {@link #replaceWithDefault()} mode.
*/
public static boolean mustCombineNullAndEmptyInDictionary(final Indexed<ByteBuffer> dictionaryUtf8)
{
return NullHandling.replaceWithDefault()
&& dictionaryUtf8.size() >= 2
&& isNullOrEquivalent(dictionaryUtf8.get(0))
&& isNullOrEquivalent(dictionaryUtf8.get(1));
}
/**
* Given a UTF-8 dictionary, returns whether the first entry must be replaced with null. This happens if we
* are in default-value mode and the first entry is an empty string. (Default-value mode expects it to be null.)
*
* This and {@link #mustCombineNullAndEmptyInDictionary(Indexed)} are never both true.
*
* Provided to enable compatibility for segments written under {@link #sqlCompatible()} mode but
* read under {@link #replaceWithDefault()} mode.
*/
public static boolean mustReplaceFirstValueWithNullInDictionary(final Indexed<ByteBuffer> dictionaryUtf8)
{
if (NullHandling.replaceWithDefault() && dictionaryUtf8.size() >= 1) {
final ByteBuffer firstValue = dictionaryUtf8.get(0);
return firstValue != null && firstValue.remaining() == 0;
}
return false;
}
}

View File

@ -27,7 +27,6 @@ import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.AbstractDimensionSelector;
import org.apache.druid.segment.DimensionSelectorUtils;
import org.apache.druid.segment.IdLookup;
import org.apache.druid.segment.data.CachingIndexed;
import org.apache.druid.segment.data.ColumnarInts;
import org.apache.druid.segment.data.ColumnarMultiInts;
import org.apache.druid.segment.data.Indexed;
@ -45,6 +44,7 @@ import org.apache.druid.segment.vector.VectorObjectSelector;
import org.apache.druid.utils.CloseableUtils;
import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
@ -60,19 +60,19 @@ public class StringDictionaryEncodedColumn implements DictionaryEncodedColumn<St
private final ColumnarInts column;
@Nullable
private final ColumnarMultiInts multiValueColumn;
private final CachingIndexed<String> cachedDictionary;
private final Indexed<String> dictionary;
private final Indexed<ByteBuffer> dictionaryUtf8;
public StringDictionaryEncodedColumn(
@Nullable ColumnarInts singleValueColumn,
@Nullable ColumnarMultiInts multiValueColumn,
CachingIndexed<String> dictionary,
Indexed<String> dictionary,
Indexed<ByteBuffer> dictionaryUtf8
)
{
this.column = singleValueColumn;
this.multiValueColumn = multiValueColumn;
this.cachedDictionary = dictionary;
this.dictionary = dictionary;
this.dictionaryUtf8 = dictionaryUtf8;
}
@ -104,7 +104,7 @@ public class StringDictionaryEncodedColumn implements DictionaryEncodedColumn<St
@Nullable
public String lookupName(int id)
{
return cachedDictionary.get(id);
return dictionary.get(id);
}
@ -130,13 +130,13 @@ public class StringDictionaryEncodedColumn implements DictionaryEncodedColumn<St
@Override
public int lookupId(String name)
{
return cachedDictionary.indexOf(name);
return dictionary.indexOf(name);
}
@Override
public int getCardinality()
{
return cachedDictionary.size();
return dictionary.size();
}
@Override
@ -495,7 +495,11 @@ public class StringDictionaryEncodedColumn implements DictionaryEncodedColumn<St
@Override
public void close() throws IOException
{
CloseableUtils.closeAll(cachedDictionary, column, multiValueColumn);
CloseableUtils.closeAll(
dictionary instanceof Closeable ? (Closeable) dictionary : null /* Dictionary may be CachingIndexed */,
column,
multiValueColumn
);
}
/**

View File

@ -30,7 +30,6 @@ import org.apache.druid.segment.DimensionSelectorUtils;
import org.apache.druid.segment.IdLookup;
import org.apache.druid.segment.data.ColumnarInts;
import org.apache.druid.segment.data.ColumnarMultiInts;
import org.apache.druid.segment.data.FrontCodedIndexed;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.data.ReadableOffset;
@ -51,29 +50,27 @@ import java.nio.ByteBuffer;
import java.util.BitSet;
/**
* {@link DictionaryEncodedColumn<String>} for a column which uses a {@link FrontCodedIndexed} to store its value
* dictionary, which 'delta encodes' strings (instead of {@link org.apache.druid.segment.data.GenericIndexed} like
* {@link StringDictionaryEncodedColumn}).
* {@link DictionaryEncodedColumn<String>} for a column which has only a UTF-8 dictionary, no String dictionary.
* <p>
* This class is otherwise nearly identical to {@link StringDictionaryEncodedColumn} other than the dictionary
* difference.
* This class is otherwise nearly identical to {@link StringDictionaryEncodedColumn} other than lacking a
* String dictionary.
* <p>
* Implements {@link NestedCommonFormatColumn} so it can be used as a reader for single value string specializations
* of {@link org.apache.druid.segment.AutoTypeColumnIndexer}.
*/
public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncodedColumn<String>,
public class StringUtf8DictionaryEncodedColumn implements DictionaryEncodedColumn<String>,
NestedCommonFormatColumn
{
@Nullable
private final ColumnarInts column;
@Nullable
private final ColumnarMultiInts multiValueColumn;
private final FrontCodedIndexed utf8Dictionary;
private final Indexed<ByteBuffer> utf8Dictionary;
public StringFrontCodedDictionaryEncodedColumn(
public StringUtf8DictionaryEncodedColumn(
@Nullable ColumnarInts singleValueColumn,
@Nullable ColumnarMultiInts multiValueColumn,
FrontCodedIndexed utf8Dictionary
Indexed<ByteBuffer> utf8Dictionary
)
{
this.column = singleValueColumn;
@ -102,6 +99,9 @@ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncode
@Override
public IndexedInts getMultiValueRow(int rowNum)
{
if (!hasMultipleValues()) {
throw new UnsupportedOperationException("Column is not multi-valued");
}
return multiValueColumn.get(rowNum);
}
@ -154,7 +154,7 @@ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncode
@Override
public String lookupName(int id)
{
final String value = StringFrontCodedDictionaryEncodedColumn.this.lookupName(id);
final String value = StringUtf8DictionaryEncodedColumn.this.lookupName(id);
return extractionFn == null ? value : extractionFn.apply(value);
}
@ -190,7 +190,7 @@ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncode
if (extractionFn != null) {
throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function");
}
return StringFrontCodedDictionaryEncodedColumn.this.lookupId(name);
return StringUtf8DictionaryEncodedColumn.this.lookupId(name);
}
}
@ -291,7 +291,7 @@ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncode
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
inspector.visit("column", StringFrontCodedDictionaryEncodedColumn.this);
inspector.visit("column", StringUtf8DictionaryEncodedColumn.this);
}
};
} else {
@ -332,7 +332,7 @@ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncode
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
inspector.visit("column", StringFrontCodedDictionaryEncodedColumn.this);
inspector.visit("column", StringUtf8DictionaryEncodedColumn.this);
}
};
}
@ -381,7 +381,7 @@ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncode
@Override
public String lookupName(final int id)
{
return StringFrontCodedDictionaryEncodedColumn.this.lookupName(id);
return StringUtf8DictionaryEncodedColumn.this.lookupName(id);
}
@Nullable
@ -394,7 +394,7 @@ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncode
@Override
public int lookupId(@Nullable String name)
{
return StringFrontCodedDictionaryEncodedColumn.this.lookupId(name);
return StringUtf8DictionaryEncodedColumn.this.lookupId(name);
}
}
@ -421,7 +421,7 @@ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncode
@Override
public String lookupName(final int id)
{
return StringFrontCodedDictionaryEncodedColumn.this.lookupName(id);
return StringUtf8DictionaryEncodedColumn.this.lookupName(id);
}
@Nullable
@ -435,7 +435,7 @@ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncode
@Override
public int lookupId(@Nullable String name)
{
return StringFrontCodedDictionaryEncodedColumn.this.lookupId(name);
return StringUtf8DictionaryEncodedColumn.this.lookupId(name);
}
}
@ -457,7 +457,7 @@ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncode
@Override
public String lookupName(int id)
{
return StringFrontCodedDictionaryEncodedColumn.this.lookupName(id);
return StringUtf8DictionaryEncodedColumn.this.lookupName(id);
}
}
return new StringVectorSelector();
@ -473,7 +473,7 @@ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncode
@Override
public String lookupName(int id)
{
return StringFrontCodedDictionaryEncodedColumn.this.lookupName(id);
return StringUtf8DictionaryEncodedColumn.this.lookupName(id);
}
}
return new MultiStringVectorSelector();

View File

@ -27,6 +27,7 @@ import javax.annotation.Nullable;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.function.ToIntFunction;
public class CachingIndexed<T> implements CloseableIndexed<T>
{
@ -34,7 +35,8 @@ public class CachingIndexed<T> implements CloseableIndexed<T>
private static final Logger log = new Logger(CachingIndexed.class);
private final GenericIndexed<T>.BufferIndexed delegate;
private final Indexed<T> delegate;
private final ToIntFunction<T> sizeFn;
@Nullable
private final SizedLRUMap<Integer, T> cachedValues;
@ -44,12 +46,14 @@ public class CachingIndexed<T> implements CloseableIndexed<T>
* CachingIndexed objects are not thread safe and should only be used by a single thread at a time.
* CachingIndexed objects must be closed to release any underlying cache resources.
*
* @param delegate the GenericIndexed to wrap with a lookup cache.
* @param delegate the Indexed to wrap with a lookup cache.
* @param sizeFn function that determines the size in bytes of an object
* @param lookupCacheSize maximum size in bytes of the lookup cache if greater than zero
*/
public CachingIndexed(GenericIndexed<T> delegate, final int lookupCacheSize)
public CachingIndexed(Indexed<T> delegate, final ToIntFunction<T> sizeFn, final int lookupCacheSize)
{
this.delegate = delegate.singleThreaded();
this.delegate = delegate;
this.sizeFn = sizeFn;
if (lookupCacheSize > 0) {
log.debug("Allocating column cache of max size[%d]", lookupCacheSize);
@ -75,7 +79,7 @@ public class CachingIndexed<T> implements CloseableIndexed<T>
}
final T value = delegate.get(index);
cachedValues.put(index, value, delegate.getLastValueSize());
cachedValues.put(index, value, sizeFn.applyAsInt(value));
return value;
} else {
return delegate.get(index);

View File

@ -459,8 +459,6 @@ public class GenericIndexed<T> implements CloseableIndexed<T>, Serializer
*/
public abstract class BufferIndexed implements Indexed<T>
{
int lastReadSize;
@Override
public int size()
{
@ -492,7 +490,6 @@ public class GenericIndexed<T> implements CloseableIndexed<T>, Serializer
|| copyValueBuffer.get(startOffset - Integer.BYTES) == NULL_VALUE_SIZE_MARKER)) {
return null;
}
lastReadSize = size;
// ObjectStrategy.fromByteBuffer() is allowed to reset the limit of the buffer. So if the limit is changed,
// position() call could throw an exception, if the position is set beyond the new limit. Calling limit()
@ -511,16 +508,6 @@ public class GenericIndexed<T> implements CloseableIndexed<T>, Serializer
@Nullable
protected abstract ByteBuffer getByteBuffer(int index);
/**
* This method makes no guarantees with respect to thread safety
*
* @return the size in bytes of the last value read
*/
int getLastValueSize()
{
return lastReadSize;
}
@Override
public int indexOf(@Nullable T value)
{

View File

@ -30,6 +30,7 @@ import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnFormat;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde;
@ -45,8 +46,7 @@ import java.util.TreeMap;
*
* @see ScalarDoubleColumn
* @see ScalarLongColumn
* @see ScalarStringDictionaryEncodedColumn
* @see org.apache.druid.segment.column.StringFrontCodedDictionaryEncodedColumn
* @see StringUtf8DictionaryEncodedColumn
* @see VariantColumn
* @see CompressedNestedDataComplexColumn
*/

View File

@ -42,7 +42,7 @@ import org.apache.druid.segment.column.NullValueIndex;
import org.apache.druid.segment.column.SimpleImmutableBitmapIndex;
import org.apache.druid.segment.column.StringEncodingStrategies;
import org.apache.druid.segment.column.StringEncodingStrategy;
import org.apache.druid.segment.column.StringFrontCodedDictionaryEncodedColumn;
import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn;
import org.apache.druid.segment.column.StringValueSetIndex;
import org.apache.druid.segment.data.BitmapSerdeFactory;
import org.apache.druid.segment.data.ColumnarInts;
@ -190,13 +190,13 @@ public class ScalarStringColumnAndIndexSupplier implements Supplier<NestedCommon
public NestedCommonFormatColumn get()
{
if (frontCodedStringDictionarySupplier != null) {
return new StringFrontCodedDictionaryEncodedColumn(
return new StringUtf8DictionaryEncodedColumn(
encodedColumnSupplier.get(),
null,
frontCodedStringDictionarySupplier.get()
);
}
return new ScalarStringDictionaryEncodedColumn<>(encodedColumnSupplier.get(), stringDictionary.singleThreaded());
return new StringUtf8DictionaryEncodedColumn(encodedColumnSupplier.get(), null, stringDictionary.singleThreaded());
}
@Nullable

View File

@ -33,6 +33,7 @@ import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.column.StringEncodingStrategies;
import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn;
import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSerializer;
import org.apache.druid.segment.data.CompressionStrategy;
import org.apache.druid.segment.data.DictionaryWriter;
@ -45,10 +46,8 @@ import java.nio.ByteBuffer;
import java.nio.channels.WritableByteChannel;
/**
* Serializer for a string {@link NestedCommonFormatColumn} that can be read with either
* {@link ScalarStringDictionaryEncodedColumn} or
* {@link org.apache.druid.segment.column.StringFrontCodedDictionaryEncodedColumn} (if written with a front-coded
* dictionary).
* Serializer for a string {@link NestedCommonFormatColumn} that can be read with
* {@link StringUtf8DictionaryEncodedColumn}.
*/
public class ScalarStringColumnSerializer extends NestedCommonFormatColumnSerializer
{

View File

@ -1,382 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.nested;
import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.extraction.ExtractionFn;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.AbstractDimensionSelector;
import org.apache.druid.segment.IdLookup;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.DictionaryEncodedColumn;
import org.apache.druid.segment.column.StringDictionaryEncodedColumn;
import org.apache.druid.segment.column.StringEncodingStrategies;
import org.apache.druid.segment.data.ColumnarInts;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.data.ReadableOffset;
import org.apache.druid.segment.data.SingleIndexedInt;
import org.apache.druid.segment.filter.BooleanValueMatcher;
import org.apache.druid.segment.historical.HistoricalDimensionSelector;
import org.apache.druid.segment.historical.SingleValueHistoricalDimensionSelector;
import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector;
import org.apache.druid.segment.vector.ReadableVectorOffset;
import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
import org.apache.druid.segment.vector.VectorObjectSelector;
import org.apache.druid.utils.CloseableUtils;
import javax.annotation.Nullable;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.BitSet;
/**
* {@link NestedCommonFormatColumn} specialization for {@link ColumnType#STRING} with a generic buffer based utf8
* dictionary. This is used when not using the more specific
* {@link org.apache.druid.segment.column.StringFrontCodedDictionaryEncodedColumn}, and only supports single value
* strings.
*/
public class ScalarStringDictionaryEncodedColumn<TIndexed extends Indexed<ByteBuffer>>
implements DictionaryEncodedColumn<String>, NestedCommonFormatColumn
{
private final ColumnarInts column;
private final TIndexed utf8Dictionary;
public ScalarStringDictionaryEncodedColumn(
ColumnarInts singleValueColumn,
TIndexed utf8Dictionary
)
{
this.column = singleValueColumn;
this.utf8Dictionary = utf8Dictionary;
}
@Override
public int length()
{
return column.size();
}
@Override
public boolean hasMultipleValues()
{
return false;
}
@Override
public int getSingleValueRow(int rowNum)
{
return column.get(rowNum);
}
@Override
public IndexedInts getMultiValueRow(int rowNum)
{
throw new UnsupportedOperationException("Column is not multi-valued");
}
@Override
@Nullable
public String lookupName(int id)
{
final ByteBuffer buffer = utf8Dictionary.get(id);
if (buffer == null) {
return null;
}
return StringUtils.fromUtf8(buffer);
}
@Override
public int lookupId(String name)
{
return utf8Dictionary.indexOf(StringUtils.toUtf8ByteBuffer(name));
}
@Override
public int getCardinality()
{
return utf8Dictionary.size();
}
@Override
public HistoricalDimensionSelector makeDimensionSelector(
final ReadableOffset offset,
@Nullable final ExtractionFn extractionFn
)
{
class SingleValueQueryableDimensionSelector extends AbstractDimensionSelector
implements SingleValueHistoricalDimensionSelector, IdLookup, HistoricalDimensionSelector
{
private final SingleIndexedInt row = new SingleIndexedInt();
@Override
public int getValueCardinality()
{
/*
This is technically wrong if
extractionFn != null && (extractionFn.getExtractionType() != ExtractionFn.ExtractionType.ONE_TO_ONE ||
!extractionFn.preservesOrdering())
However current behavior allows some GroupBy-V1 queries to work that wouldn't work otherwise and doesn't
cause any problems due to special handling of extractionFn everywhere.
See https://github.com/apache/druid/pull/8433
*/
return getCardinality();
}
@Override
public String lookupName(int id)
{
final String value = ScalarStringDictionaryEncodedColumn.this.lookupName(id);
return extractionFn == null ? value : extractionFn.apply(value);
}
@Nullable
@Override
public ByteBuffer lookupNameUtf8(int id)
{
return utf8Dictionary.get(id);
}
@Override
public boolean supportsLookupNameUtf8()
{
return true;
}
@Override
public boolean nameLookupPossibleInAdvance()
{
return true;
}
@Nullable
@Override
public IdLookup idLookup()
{
return extractionFn == null ? this : null;
}
@Override
public int lookupId(String name)
{
if (extractionFn != null) {
throw new UnsupportedOperationException("cannot perform lookup when applying an extraction function");
}
return ScalarStringDictionaryEncodedColumn.this.lookupId(name);
}
@Override
public IndexedInts getRow()
{
row.setValue(getRowValue());
return row;
}
public int getRowValue()
{
return column.get(offset.getOffset());
}
@Override
public IndexedInts getRow(int offset)
{
row.setValue(getRowValue(offset));
return row;
}
@Override
public int getRowValue(int offset)
{
return column.get(offset);
}
@Override
public ValueMatcher makeValueMatcher(final @Nullable String value)
{
if (extractionFn == null) {
final int valueId = lookupId(value);
if (valueId >= 0) {
return new ValueMatcher()
{
@Override
public boolean matches()
{
return getRowValue() == valueId;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
inspector.visit("column", ScalarStringDictionaryEncodedColumn.this);
}
};
} else {
return BooleanValueMatcher.of(false);
}
} else {
// Employ caching BitSet optimization
return makeValueMatcher(Predicates.equalTo(value));
}
}
@Override
public ValueMatcher makeValueMatcher(final Predicate<String> predicate)
{
final BitSet checkedIds = new BitSet(getCardinality());
final BitSet matchingIds = new BitSet(getCardinality());
// Lazy matcher; only check an id if matches() is called.
return new ValueMatcher()
{
@Override
public boolean matches()
{
final int id = getRowValue();
if (checkedIds.get(id)) {
return matchingIds.get(id);
} else {
final boolean matches = predicate.apply(lookupName(id));
checkedIds.set(id);
if (matches) {
matchingIds.set(id);
}
return matches;
}
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
inspector.visit("column", ScalarStringDictionaryEncodedColumn.this);
}
};
}
@Override
public Object getObject()
{
return lookupName(getRowValue());
}
@Override
public Class classOfObject()
{
return String.class;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
inspector.visit("column", column);
inspector.visit("offset", offset);
inspector.visit("extractionFn", extractionFn);
}
}
return new SingleValueQueryableDimensionSelector();
}
@Override
public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(final ReadableVectorOffset offset)
{
final class StringVectorSelector extends StringDictionaryEncodedColumn.StringSingleValueDimensionVectorSelector
{
public StringVectorSelector()
{
super(column, offset);
}
@Override
public int getValueCardinality()
{
return getCardinality();
}
@Nullable
@Override
public String lookupName(final int id)
{
return ScalarStringDictionaryEncodedColumn.this.lookupName(id);
}
@Nullable
@Override
public ByteBuffer lookupNameUtf8(int id)
{
return utf8Dictionary.get(id);
}
@Override
public int lookupId(@Nullable String name)
{
return ScalarStringDictionaryEncodedColumn.this.lookupId(name);
}
}
return new StringVectorSelector();
}
@Override
public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector(ReadableVectorOffset vectorOffset)
{
throw new UnsupportedOperationException("Column not multi-valued");
}
@Override
public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset)
{
final class StringVectorSelector extends StringDictionaryEncodedColumn.StringVectorObjectSelector
{
public StringVectorSelector()
{
super(column, offset);
}
@Nullable
@Override
public String lookupName(int id)
{
return ScalarStringDictionaryEncodedColumn.this.lookupName(id);
}
}
return new StringVectorSelector();
}
@Override
public void close() throws IOException
{
CloseableUtils.closeAll(column);
}
@Override
public ColumnType getLogicalType()
{
return ColumnType.STRING;
}
@Override
public Indexed<String> getStringDictionary()
{
return new StringEncodingStrategies.Utf8ToStringIndexed(utf8Dictionary);
}
}

View File

@ -0,0 +1,196 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.serde;
import com.google.common.collect.ImmutableList;
import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.data.Indexed;
import javax.annotation.Nullable;
import java.util.Iterator;
import java.util.Objects;
/**
* An {@link Indexed} that delegates to an underyling instance, but combines the first two entries.
*
* Unlike {@link CombineFirstTwoValuesIndexedInts}, this class combines the first two *entries*.
* So [0, 1, 2] becomes [(something), 2]. The first two entries, 0 and 1, were replaced with (something). That something
* is given by {@link #newFirstValue()}.
*
* Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but
* read under {@link NullHandling#replaceWithDefault()} mode.
*
* Important note: {@link #isSorted()} returns the same value as the underlying delegate. In this case, this class
* assumes that {@link #newFirstValue()} is the lowest possible value in the universe: including anything in
* {@link #delegate} and anything that might be passed to {@link #indexOf(Object)}. Callers must ensure that this
* precondition is met.
*
* @see NullHandling#mustCombineNullAndEmptyInDictionary(Indexed)
*/
public abstract class CombineFirstTwoEntriesIndexed<T> implements Indexed<T>
{
private static final int FIRST_ID = 0;
protected final Indexed<T> delegate;
protected CombineFirstTwoEntriesIndexed(Indexed<T> delegate)
{
this.delegate = delegate;
if (delegate.size() < 2) {
throw new ISE("Size[%s] must be >= 2", delegate.size());
}
}
/**
* Combine the first two values into a literal null.
*/
public static <T> CombineFirstTwoEntriesIndexed<T> returnNull(final Indexed<T> delegate)
{
return new CombineFirstTwoEntriesIndexed<T>(delegate)
{
@Nullable
@Override
protected T newFirstValue()
{
return null;
}
};
}
/**
* Union the first two bitmaps.
*/
public static CombineFirstTwoEntriesIndexed<ImmutableBitmap> unionBitmaps(
final BitmapFactory bitmapFactory,
final Indexed<ImmutableBitmap> delegate
)
{
return new CombineFirstTwoEntriesIndexed<ImmutableBitmap>(delegate)
{
@Nullable
@Override
protected ImmutableBitmap newFirstValue()
{
return bitmapFactory.union(ImmutableList.of(delegate.get(FIRST_ID), delegate.get(FIRST_ID + 1)));
}
};
}
@Nullable
protected abstract T newFirstValue();
@Override
public int size()
{
return delegate.size() - 1;
}
@Nullable
@Override
public T get(int index)
{
if (index == FIRST_ID) {
return newFirstValue();
} else {
return delegate.get(index + 1);
}
}
@Override
public int indexOf(@Nullable T value)
{
if (Objects.equals(newFirstValue(), value)) {
return FIRST_ID;
} else {
final int index = delegate.indexOf(value);
if (index > FIRST_ID + 1) {
// Item found, index needs adjustment.
return index - 1;
} else if (index >= 0) {
// Item found, but shadowed, so really not found.
// Insertion point is after FIRST_ID. (See class-level javadoc: newFirstValue is required to be
// lower than all elements in the universe.)
return -2;
} else if (index >= -2) {
// Item not found, and insertion point is prior to, or within, the shadowed portion of delegate. Return
// insertion point immediately after newFirstValue, since that value is required to be lower than all elements
// in the universe.
return -2;
} else {
// Item not found, and insertion point is after the shadowed portion of delegate. Adjust and return.
return index + 1;
}
}
}
@Override
public Iterator<T> iterator()
{
final Iterator<T> it = delegate.iterator();
// Skip first two values.
//CHECKSTYLE.OFF: Regexp
it.next();
it.next();
//CHECKSTYLE.ON: Regexp
class CoalescingIndexedIterator implements Iterator<T>
{
boolean returnedFirstValue;
@Override
public boolean hasNext()
{
return !returnedFirstValue || it.hasNext();
}
@Override
public T next()
{
if (!returnedFirstValue) {
returnedFirstValue = true;
return newFirstValue();
} else {
return it.next();
}
}
}
return new CoalescingIndexedIterator();
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
delegate.inspectRuntimeShape(inspector);
}
@Override
public boolean isSorted()
{
return delegate.isSorted();
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.serde;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.segment.data.ColumnarInts;
import org.apache.druid.segment.data.Indexed;
import java.io.IOException;
/**
* A {@link ColumnarInts} facade over {@link CombineFirstTwoValuesIndexedInts}.
*
* Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but
* read under {@link NullHandling#replaceWithDefault()} mode.
*
* @see NullHandling#mustCombineNullAndEmptyInDictionary(Indexed)
*/
public class CombineFirstTwoValuesColumnarInts extends CombineFirstTwoValuesIndexedInts implements ColumnarInts
{
public CombineFirstTwoValuesColumnarInts(ColumnarInts delegate)
{
super(delegate);
}
@Override
public void close() throws IOException
{
((ColumnarInts) delegate).close();
}
}

View File

@ -0,0 +1,106 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.serde;
import com.google.common.collect.Iterators;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.data.ColumnarMultiInts;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.data.ZeroIndexedInts;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.Iterator;
/**
* A {@link ColumnarMultiInts} that delegates to an underyling instance, but applies
* {@link CombineFirstTwoValuesIndexedInts} to each row's set of values.
*
* Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but
* read under {@link NullHandling#replaceWithDefault()} mode.
*
* @see NullHandling#mustCombineNullAndEmptyInDictionary(Indexed)
*/
public class CombineFirstTwoValuesColumnarMultiInts implements ColumnarMultiInts
{
private final ColumnarMultiInts delegate;
private final CombineFirstTwoValuesIndexedInts rowValues;
public CombineFirstTwoValuesColumnarMultiInts(ColumnarMultiInts delegate)
{
this.delegate = delegate;
this.rowValues = new CombineFirstTwoValuesIndexedInts(ZeroIndexedInts.instance());
}
@Override
public IndexedInts get(int index)
{
rowValues.delegate = delegate.get(index);
return rowValues;
}
@Override
public IndexedInts getUnshared(int index)
{
return new CombineFirstTwoValuesIndexedInts(delegate.getUnshared(index));
}
@Override
public int size()
{
return delegate.size();
}
@Override
public int indexOf(@Nullable IndexedInts value)
{
// No ColumnarMultiInts implement this method
throw new UnsupportedOperationException("Reverse lookup not allowed.");
}
@Override
public boolean isSorted()
{
return delegate.isSorted();
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
delegate.inspectRuntimeShape(inspector);
}
@Override
public Iterator<IndexedInts> iterator()
{
return Iterators.transform(
delegate.iterator(),
CombineFirstTwoValuesIndexedInts::new
);
}
@Override
public void close() throws IOException
{
delegate.close();
}
}

View File

@ -0,0 +1,97 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.serde;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.data.IndexedInts;
/**
* A {@link IndexedInts} that delegates to an underyling instance, but combines the values 0 and 1 into 0, and shifts
* all other values down by one. For example:
*
* - [2, 0, 1] => [1, 0, 0]
* - [3, 2, 1] => [2, 1, 0]
* - [0, 1, 0] => [0, 0, 0]
*
* Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but
* read under {@link NullHandling#replaceWithDefault()} mode.
*
* @see NullHandling#mustCombineNullAndEmptyInDictionary(Indexed)
*/
public class CombineFirstTwoValuesIndexedInts implements IndexedInts
{
private static final int ZERO_ID = 0;
IndexedInts delegate;
public CombineFirstTwoValuesIndexedInts(IndexedInts delegate)
{
this.delegate = delegate;
}
@Override
public int size()
{
return delegate.size();
}
@Override
public int get(int index)
{
final int i = delegate.get(index);
if (i == ZERO_ID) {
return i;
} else {
return i - 1;
}
}
@Override
public void get(int[] out, int start, int length)
{
delegate.get(out, start, length);
for (int i = 0; i < length; i++) {
if (out[i] != ZERO_ID) {
out[i]--;
}
}
}
@Override
public void get(int[] out, int[] indexes, int length)
{
delegate.get(out, indexes, length);
for (int i = 0; i < length; i++) {
if (out[i] != ZERO_ID) {
out[i]--;
}
}
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
delegate.inspectRuntimeShape(inspector);
}
}

View File

@ -20,17 +20,20 @@
package org.apache.druid.segment.serde;
import com.google.common.base.Supplier;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.segment.column.DictionaryEncodedColumn;
import org.apache.druid.segment.column.StringDictionaryEncodedColumn;
import org.apache.druid.segment.data.CachingIndexed;
import org.apache.druid.segment.data.ColumnarInts;
import org.apache.druid.segment.data.ColumnarMultiInts;
import org.apache.druid.segment.data.GenericIndexed;
import org.apache.druid.segment.data.Indexed;
import javax.annotation.Nullable;
import java.nio.ByteBuffer;
/**
*
*/
public class DictionaryEncodedColumnSupplier implements Supplier<DictionaryEncodedColumn<?>>
{
@ -58,11 +61,40 @@ public class DictionaryEncodedColumnSupplier implements Supplier<DictionaryEncod
@Override
public DictionaryEncodedColumn<?> get()
{
return new StringDictionaryEncodedColumn(
singleValuedColumn != null ? singleValuedColumn.get() : null,
multiValuedColumn != null ? multiValuedColumn.get() : null,
new CachingIndexed<>(dictionary, lookupCacheSize),
dictionaryUtf8.singleThreaded()
);
final Indexed<String> cacheWrappedDictionary;
final Indexed<ByteBuffer> singleThreadedDictionaryUtf8 = dictionaryUtf8.singleThreaded();
if (lookupCacheSize > 0) {
cacheWrappedDictionary = new CachingIndexed<>(
dictionary.singleThreaded(),
s -> s == null ? 0 : s.length() * Character.BYTES,
lookupCacheSize
);
} else {
cacheWrappedDictionary = dictionary.singleThreaded();
}
if (NullHandling.mustCombineNullAndEmptyInDictionary(singleThreadedDictionaryUtf8)) {
return new StringDictionaryEncodedColumn(
singleValuedColumn != null ? new CombineFirstTwoValuesColumnarInts(singleValuedColumn.get()) : null,
multiValuedColumn != null ? new CombineFirstTwoValuesColumnarMultiInts(multiValuedColumn.get()) : null,
CombineFirstTwoEntriesIndexed.returnNull(cacheWrappedDictionary),
CombineFirstTwoEntriesIndexed.returnNull(singleThreadedDictionaryUtf8)
);
} else if (NullHandling.mustReplaceFirstValueWithNullInDictionary(singleThreadedDictionaryUtf8)) {
return new StringDictionaryEncodedColumn(
singleValuedColumn != null ? singleValuedColumn.get() : null,
multiValuedColumn != null ? multiValuedColumn.get() : null,
new ReplaceFirstValueWithNullIndexed<>(cacheWrappedDictionary),
new ReplaceFirstValueWithNullIndexed<>(singleThreadedDictionaryUtf8)
);
} else {
return new StringDictionaryEncodedColumn(
singleValuedColumn != null ? singleValuedColumn.get() : null,
multiValuedColumn != null ? multiValuedColumn.get() : null,
cacheWrappedDictionary,
singleThreadedDictionaryUtf8
);
}
}
}

View File

@ -75,13 +75,24 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
public <T> T as(Class<T> clazz)
{
if (bitmaps != null) {
final Indexed<String> singleThreadedStrings = dictionary.singleThreaded();
final Indexed<ByteBuffer> singleThreadedUtf8 = dictionaryUtf8.singleThreaded();
final Indexed<ImmutableBitmap> singleThreadedBitmaps = bitmaps.singleThreaded();
Indexed<String> singleThreadedStrings = dictionary.singleThreaded();
Indexed<ByteBuffer> singleThreadedUtf8 = dictionaryUtf8.singleThreaded();
Indexed<ImmutableBitmap> singleThreadedBitmaps = bitmaps.singleThreaded();
if (NullHandling.mustCombineNullAndEmptyInDictionary(singleThreadedUtf8)) {
singleThreadedStrings = CombineFirstTwoEntriesIndexed.returnNull(singleThreadedStrings);
singleThreadedUtf8 = CombineFirstTwoEntriesIndexed.returnNull(singleThreadedUtf8);
singleThreadedBitmaps = CombineFirstTwoEntriesIndexed.unionBitmaps(bitmapFactory, singleThreadedBitmaps);
} else if (NullHandling.mustReplaceFirstValueWithNullInDictionary(singleThreadedUtf8)) {
singleThreadedStrings = new ReplaceFirstValueWithNullIndexed<>(singleThreadedStrings);
singleThreadedUtf8 = new ReplaceFirstValueWithNullIndexed<>(singleThreadedUtf8);
}
if (clazz.equals(NullValueIndex.class)) {
final BitmapColumnIndex nullIndex;
if (NullHandling.isNullOrEquivalent(dictionary.get(0))) {
nullIndex = new SimpleImmutableBitmapIndex(bitmaps.get(0));
final ByteBuffer firstValue = singleThreadedUtf8.get(0);
if (NullHandling.isNullOrEquivalent(firstValue)) {
nullIndex = new SimpleImmutableBitmapIndex(singleThreadedBitmaps.get(0));
} else {
nullIndex = new SimpleImmutableBitmapIndex(bitmapFactory.makeEmptyImmutableBitmap());
}
@ -97,13 +108,14 @@ public class DictionaryEncodedStringIndexSupplier implements ColumnIndexSupplier
bitmapFactory,
singleThreadedUtf8,
singleThreadedBitmaps,
NullHandling.isNullOrEquivalent(dictionary.get(0))
NullHandling.isNullOrEquivalent(singleThreadedStrings.get(0))
);
} else if (clazz.equals(DictionaryEncodedStringValueIndex.class) || clazz.equals(DictionaryEncodedValueIndex.class)) {
} else if (clazz.equals(DictionaryEncodedStringValueIndex.class)
|| clazz.equals(DictionaryEncodedValueIndex.class)) {
return (T) new IndexedStringDictionaryEncodedStringValueIndex<>(
bitmapFactory,
singleThreadedStrings,
bitmaps
singleThreadedBitmaps
);
}
}

View File

@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Supplier;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.io.smoosh.FileSmoosher;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.BitmapSerde;
@ -154,7 +155,14 @@ public class DoubleNumericColumnPartSerdeV2 implements ColumnPartSerde
final ImmutableBitmap bitmap;
final boolean hasNulls;
if (buffer.hasRemaining()) {
bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer);
if (NullHandling.sqlCompatible()) {
bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer);
} else {
// Read from the buffer (to advance its position) but do not actually retain the bitmaps.
bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer);
bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap();
}
hasNulls = !bitmap.isEmpty();
} else {
bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap();

View File

@ -22,6 +22,7 @@ package org.apache.druid.segment.serde;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.io.smoosh.FileSmoosher;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.BitmapSerde;
@ -150,8 +151,15 @@ public class FloatNumericColumnPartSerdeV2 implements ColumnPartSerde
buffer.position(initialPos + offset);
final ImmutableBitmap bitmap;
final boolean hasNulls;
if (buffer.hasRemaining()) {
bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer);
if (buffer.hasRemaining() && NullHandling.sqlCompatible()) {
if (NullHandling.sqlCompatible()) {
bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer);
} else {
// Read from the buffer (to advance its position) but do not actually retain the bitmaps.
bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer);
bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap();
}
hasNulls = !bitmap.isEmpty();
} else {
bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap();

View File

@ -22,6 +22,7 @@ package org.apache.druid.segment.serde;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.io.smoosh.FileSmoosher;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.BitmapSerde;
@ -152,8 +153,15 @@ public class LongNumericColumnPartSerdeV2 implements ColumnPartSerde
buffer.position(initialPos + offset);
final ImmutableBitmap bitmap;
final boolean hasNulls;
if (buffer.hasRemaining()) {
bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer);
if (buffer.hasRemaining() && NullHandling.sqlCompatible()) {
if (NullHandling.sqlCompatible()) {
bitmap = bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer);
} else {
// Read from the buffer (to advance its position) but do not actually retain the bitmaps.
bitmapSerdeFactory.getObjectStrategy().fromByteBufferWithSize(buffer);
bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap();
}
hasNulls = !bitmap.isEmpty();
} else {
bitmap = bitmapSerdeFactory.getBitmapFactory().makeEmptyImmutableBitmap();

View File

@ -0,0 +1,131 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.serde;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.data.Indexed;
import javax.annotation.Nullable;
import java.util.Iterator;
/**
* An Indexed that replaces the first value with a literal null.
*
* Provided to enable compatibility for segments written under {@link NullHandling#sqlCompatible()} mode but
* read under {@link NullHandling#replaceWithDefault()} mode.
*
* Important note: {@link #isSorted()} returns the same value as the underlying delegate. In this case, this class
* assumes that {@code null} is the lowest possible value in the universe: including anything in {@link #delegate} and
* anything that might be passed to {@link #indexOf(Object)}. Callers must ensure that this precondition is met.
*
* @see NullHandling#mustReplaceFirstValueWithNullInDictionary(Indexed)
*/
public class ReplaceFirstValueWithNullIndexed<T> implements Indexed<T>
{
private final Indexed<T> delegate;
public ReplaceFirstValueWithNullIndexed(Indexed<T> delegate)
{
this.delegate = delegate;
if (delegate.size() < 1) {
throw new ISE("Size[%s] must be >= 1", delegate.size());
}
}
@Override
public int size()
{
return delegate.size();
}
@Nullable
@Override
public T get(int index)
{
if (index == 0) {
return null;
} else {
return delegate.get(index);
}
}
@Override
public int indexOf(@Nullable T value)
{
if (value == null) {
return 0;
} else {
final int result = delegate.indexOf(value);
if (result == 0 || result == -1) {
return -2;
} else {
return result;
}
}
}
@Override
public boolean isSorted()
{
return delegate.isSorted();
}
@Override
public Iterator<T> iterator()
{
final Iterator<T> it = delegate.iterator();
// Skip first value.
it.next();
class ReplaceFirstValueWithNullIndexedIterator implements Iterator<T>
{
boolean returnedNull;
@Override
public boolean hasNext()
{
return !returnedNull || it.hasNext();
}
@Override
public T next()
{
if (!returnedNull) {
returnedNull = true;
return null;
} else {
return it.next();
}
}
}
return new ReplaceFirstValueWithNullIndexedIterator();
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
delegate.inspectRuntimeShape(inspector);
}
}

View File

@ -44,11 +44,11 @@ import org.apache.druid.segment.data.GenericIndexed;
import org.apache.druid.segment.data.Indexed;
import javax.annotation.Nullable;
import java.nio.ByteBuffer;
public class StringFrontCodedColumnIndexSupplier implements ColumnIndexSupplier
{
private final BitmapFactory bitmapFactory;
private final Supplier<StringEncodingStrategies.Utf8ToStringIndexed> dictionary;
private final Supplier<FrontCodedIndexed> utf8Dictionary;
@Nullable
@ -67,21 +67,30 @@ public class StringFrontCodedColumnIndexSupplier implements ColumnIndexSupplier
this.bitmapFactory = bitmapFactory;
this.bitmaps = bitmaps;
this.utf8Dictionary = utf8Dictionary;
this.dictionary = () -> new StringEncodingStrategies.Utf8ToStringIndexed(this.utf8Dictionary.get());
this.indexedTree = indexedTree;
}
@Nullable
@Override
@SuppressWarnings("unchecked")
public <T> T as(Class<T> clazz)
{
if (bitmaps != null) {
final Indexed<ImmutableBitmap> singleThreadedBitmaps = bitmaps.singleThreaded();
Indexed<ByteBuffer> dict = utf8Dictionary.get();
Indexed<ImmutableBitmap> singleThreadedBitmaps = bitmaps.singleThreaded();
if (NullHandling.mustCombineNullAndEmptyInDictionary(dict)) {
dict = CombineFirstTwoEntriesIndexed.returnNull(dict);
singleThreadedBitmaps = CombineFirstTwoEntriesIndexed.unionBitmaps(bitmapFactory, singleThreadedBitmaps);
} else if (NullHandling.mustReplaceFirstValueWithNullInDictionary(dict)) {
dict = new ReplaceFirstValueWithNullIndexed<>(dict);
}
if (clazz.equals(NullValueIndex.class)) {
final BitmapColumnIndex nullIndex;
final StringEncodingStrategies.Utf8ToStringIndexed stringDictionary = dictionary.get();
if (NullHandling.isNullOrEquivalent(stringDictionary.get(0))) {
nullIndex = new SimpleImmutableBitmapIndex(bitmaps.get(0));
final ByteBuffer firstValue = dict.get(0);
if (NullHandling.isNullOrEquivalent(firstValue)) {
nullIndex = new SimpleImmutableBitmapIndex(singleThreadedBitmaps.get(0));
} else {
nullIndex = new SimpleImmutableBitmapIndex(bitmapFactory.makeEmptyImmutableBitmap());
}
@ -89,17 +98,16 @@ public class StringFrontCodedColumnIndexSupplier implements ColumnIndexSupplier
} else if (clazz.equals(StringValueSetIndex.class)) {
return (T) new IndexedUtf8ValueSetIndex<>(
bitmapFactory,
utf8Dictionary.get(),
dict,
singleThreadedBitmaps
);
} else if (clazz.equals(DruidPredicateIndex.class)) {
return (T) new IndexedStringDruidPredicateIndex<>(
bitmapFactory,
dictionary.get(),
new StringEncodingStrategies.Utf8ToStringIndexed(dict),
singleThreadedBitmaps
);
} else if (clazz.equals(LexicographicalRangeIndex.class)) {
final FrontCodedIndexed dict = utf8Dictionary.get();
return (T) new IndexedUtf8LexicographicalRangeIndex<>(
bitmapFactory,
dict,
@ -108,10 +116,11 @@ public class StringFrontCodedColumnIndexSupplier implements ColumnIndexSupplier
);
} else if (clazz.equals(DictionaryEncodedStringValueIndex.class)
|| clazz.equals(DictionaryEncodedValueIndex.class)) {
// Need string dictionary instead of UTF8 dictionary
return (T) new IndexedStringDictionaryEncodedStringValueIndex<>(
bitmapFactory,
dictionary.get(),
bitmaps
new StringEncodingStrategies.Utf8ToStringIndexed(dict),
singleThreadedBitmaps
);
}
}

View File

@ -20,9 +20,10 @@
package org.apache.druid.segment.serde;
import com.google.common.base.Supplier;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.segment.column.DictionaryEncodedColumn;
import org.apache.druid.segment.column.StringDictionaryEncodedColumn;
import org.apache.druid.segment.column.StringFrontCodedDictionaryEncodedColumn;
import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn;
import org.apache.druid.segment.data.ColumnarInts;
import org.apache.druid.segment.data.ColumnarMultiInts;
import org.apache.druid.segment.data.FrontCodedIndexed;
@ -30,7 +31,7 @@ import org.apache.druid.segment.data.FrontCodedIndexed;
import javax.annotation.Nullable;
/**
* {@link DictionaryEncodedColumnSupplier} but for columns using a {@link StringFrontCodedDictionaryEncodedColumn}
* {@link DictionaryEncodedColumnSupplier} but for columns using a {@link StringUtf8DictionaryEncodedColumn}
* instead of the traditional {@link StringDictionaryEncodedColumn}
*/
public class StringFrontCodedDictionaryEncodedColumnSupplier implements Supplier<DictionaryEncodedColumn<?>>
@ -53,10 +54,26 @@ public class StringFrontCodedDictionaryEncodedColumnSupplier implements Supplier
@Override
public DictionaryEncodedColumn<?> get()
{
return new StringFrontCodedDictionaryEncodedColumn(
singleValuedColumn != null ? singleValuedColumn.get() : null,
multiValuedColumn != null ? multiValuedColumn.get() : null,
utf8Dictionary.get()
);
final FrontCodedIndexed suppliedUtf8Dictionary = utf8Dictionary.get();
if (NullHandling.mustCombineNullAndEmptyInDictionary(suppliedUtf8Dictionary)) {
return new StringUtf8DictionaryEncodedColumn(
singleValuedColumn != null ? new CombineFirstTwoValuesColumnarInts(singleValuedColumn.get()) : null,
multiValuedColumn != null ? new CombineFirstTwoValuesColumnarMultiInts(multiValuedColumn.get()) : null,
CombineFirstTwoEntriesIndexed.returnNull(suppliedUtf8Dictionary)
);
} else if (NullHandling.mustReplaceFirstValueWithNullInDictionary(suppliedUtf8Dictionary)) {
return new StringUtf8DictionaryEncodedColumn(
singleValuedColumn != null ? singleValuedColumn.get() : null,
multiValuedColumn != null ? multiValuedColumn.get() : null,
new ReplaceFirstValueWithNullIndexed<>(suppliedUtf8Dictionary)
);
} else {
return new StringUtf8DictionaryEncodedColumn(
singleValuedColumn != null ? singleValuedColumn.get() : null,
multiValuedColumn != null ? multiValuedColumn.get() : null,
suppliedUtf8Dictionary
);
}
}
}

View File

@ -19,10 +19,14 @@
package org.apache.druid.common.config;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.segment.data.ListIndexed;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.junit.Assert;
import org.junit.Test;
import java.util.Collections;
import static org.apache.druid.common.config.NullHandling.replaceWithDefault;
import static org.junit.Assert.assertEquals;
@ -99,4 +103,86 @@ public final class NullHandlingTest extends InitializedNullHandlingTest
NullHandling.initializeForTests();
}
}
@Test
public void test_mustCombineNullAndEmptyInDictionary()
{
Assert.assertFalse(
NullHandling.mustCombineNullAndEmptyInDictionary(
new ListIndexed<>(Collections.singletonList(null))
)
);
Assert.assertFalse(
NullHandling.mustCombineNullAndEmptyInDictionary(
new ListIndexed<>(StringUtils.toUtf8ByteBuffer("foo"))
)
);
Assert.assertFalse(
NullHandling.mustCombineNullAndEmptyInDictionary(
new ListIndexed<>(StringUtils.toUtf8ByteBuffer(""))
)
);
Assert.assertFalse(
NullHandling.mustCombineNullAndEmptyInDictionary(
new ListIndexed<>(StringUtils.toUtf8ByteBuffer(""), StringUtils.toUtf8ByteBuffer("foo"))
)
);
Assert.assertEquals(
NullHandling.replaceWithDefault(),
NullHandling.mustCombineNullAndEmptyInDictionary(
new ListIndexed<>(null, StringUtils.toUtf8ByteBuffer(""))
)
);
Assert.assertEquals(
NullHandling.replaceWithDefault(),
NullHandling.mustCombineNullAndEmptyInDictionary(
new ListIndexed<>(null, StringUtils.toUtf8ByteBuffer(""), StringUtils.toUtf8ByteBuffer("foo")))
);
}
@Test
public void test_mustReplaceFirstValueWithNullInDictionary()
{
Assert.assertFalse(
NullHandling.mustReplaceFirstValueWithNullInDictionary(
new ListIndexed<>(Collections.singletonList(null))
)
);
Assert.assertFalse(
NullHandling.mustReplaceFirstValueWithNullInDictionary(
new ListIndexed<>(StringUtils.toUtf8ByteBuffer("foo"))
)
);
Assert.assertEquals(
NullHandling.replaceWithDefault(),
NullHandling.mustReplaceFirstValueWithNullInDictionary(
new ListIndexed<>(StringUtils.toUtf8ByteBuffer(""))
)
);
Assert.assertEquals(
NullHandling.replaceWithDefault(),
NullHandling.mustReplaceFirstValueWithNullInDictionary(
new ListIndexed<>(StringUtils.toUtf8ByteBuffer(""), StringUtils.toUtf8ByteBuffer("foo"))
)
);
Assert.assertFalse(
NullHandling.mustReplaceFirstValueWithNullInDictionary(
new ListIndexed<>(null, StringUtils.toUtf8ByteBuffer(""))
)
);
Assert.assertFalse(
NullHandling.mustReplaceFirstValueWithNullInDictionary(
new ListIndexed<>(null, StringUtils.toUtf8ByteBuffer(""), StringUtils.toUtf8ByteBuffer("foo")))
);
}
}

View File

@ -116,6 +116,11 @@ public class IndexBuilder
return new IndexBuilder(jsonMapper, columnConfig);
}
public IndexIO getIndexIO()
{
return indexIO;
}
public IndexBuilder schema(IncrementalIndexSchema schema)
{
this.schema = schema;
@ -198,12 +203,6 @@ public class IndexBuilder
return this;
}
public IndexBuilder maxRows(int maxRows)
{
this.maxRows = maxRows;
return this;
}
public IndexBuilder intermediaryPersistSize(int rows)
{
this.intermediatePersistSize = rows;
@ -231,7 +230,7 @@ public class IndexBuilder
return buildIncrementalIndexWithRows(schema, maxRows, rows);
}
public QueryableIndex buildMMappedIndex()
public File buildMMappedIndexFile()
{
Preconditions.checkNotNull(indexMerger, "indexMerger");
Preconditions.checkNotNull(tmpDir, "tmpDir");
@ -255,16 +254,14 @@ public class IndexBuilder
// queryable index instead of the incremental index, which also mimics the behavior of real ingestion tasks
// which persist incremental indexes as intermediate segments and then merges all the intermediate segments to
// publish
return indexIO.loadIndex(
indexMerger.merge(
adapters,
schema.isRollup(),
schema.getMetrics(),
tmpDir,
schema.getDimensionsSpec(),
indexSpec,
Integer.MAX_VALUE
)
return indexMerger.merge(
adapters,
schema.isRollup(),
schema.getMetrics(),
tmpDir,
schema.getDimensionsSpec(),
indexSpec,
Integer.MAX_VALUE
);
}
catch (IOException e) {
@ -272,6 +269,15 @@ public class IndexBuilder
}
}
public QueryableIndex buildMMappedIndex()
{
try {
return indexIO.loadIndex(buildMMappedIndexFile());
}
catch (IOException e) {
throw new RuntimeException(e);
}
}
public QueryableIndex buildMMappedMergedIndex()
{

View File

@ -140,7 +140,7 @@ public class IndexMergerNullHandlingTest
// Compute all unique values, the same way that IndexMerger is expected to do it.
final Set<String> uniqueValues = new HashSet<>();
for (Map<String, Object> m : subsetList) {
final List<String> dValues = normalize(m.get("d"), hasMultipleValues);
final List<String> dValues = normalize(m.get("d"));
uniqueValues.addAll(dValues);
if (nullFlavors.contains(m)) {
@ -167,7 +167,7 @@ public class IndexMergerNullHandlingTest
subsetList.toString(),
ImmutableMultiset.copyOf(
subsetList.stream()
.map(m -> normalize(m.get("d"), hasMultipleValues))
.map(m -> normalize(m.get("d")))
.distinct() // Distinct values only, because we expect rollup.
.collect(Collectors.toList())
),
@ -224,7 +224,7 @@ public class IndexMergerNullHandlingTest
/**
* Normalize an input value the same way that IndexMerger is expected to do it.
*/
private static List<String> normalize(final Object value, final boolean hasMultipleValues)
private static List<String> normalize(final Object value)
{
final List<String> retVal = new ArrayList<>();
@ -237,7 +237,7 @@ public class IndexMergerNullHandlingTest
if (list.isEmpty()) {
// empty lists become nulls in single valued columns
// they sometimes also become nulls in multi-valued columns (see comments in getRow())
retVal.add(NullHandling.emptyToNullIfNeeded(null));
retVal.add(null);
} else {
retVal.addAll(list.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toList()));
}

View File

@ -104,6 +104,7 @@ import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory;
import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.junit.Assert;
import org.junit.Assume;
import org.junit.Before;
import org.junit.Rule;
import org.junit.rules.TemporaryFolder;
@ -111,6 +112,8 @@ import org.junit.runners.Parameterized;
import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
@ -146,11 +149,17 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest
static final TimestampSpec DEFAULT_TIMESTAMP_SPEC = new TimestampSpec(TIMESTAMP_COLUMN, "iso", DateTimes.of("2000"));
static final DimensionsSpec DEFAULT_DIM_SPEC = new DimensionsSpec(
ImmutableList.<DimensionSchema>builder()
.addAll(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim0", "dim1", "dim2", "dim3", "timeDim")))
.add(new DoubleDimensionSchema("d0"))
.add(new FloatDimensionSchema("f0"))
.add(new LongDimensionSchema("l0"))
.build()
.addAll(DimensionsSpec.getDefaultSchemas(ImmutableList.of(
"dim0",
"dim1",
"dim2",
"dim3",
"timeDim"
)))
.add(new DoubleDimensionSchema("d0"))
.add(new FloatDimensionSchema("f0"))
.add(new LongDimensionSchema("l0"))
.build()
);
static final InputRowParser<Map<String, Object>> DEFAULT_PARSER = new MapInputRowParser(
@ -350,32 +359,35 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest
.put(
"mmappedAutoTypesMerged",
input -> {
input.mapSchema(
schema ->
new IncrementalIndexSchema(
schema.getMinTimestamp(),
schema.getTimestampSpec(),
schema.getGran(),
schema.getVirtualColumns(),
schema.getDimensionsSpec().withDimensions(
schema.getDimensionsSpec()
.getDimensions()
.stream()
.map(
dimensionSchema -> new AutoTypeColumnSchema(dimensionSchema.getName())
)
.collect(Collectors.toList())
),
schema.getMetrics(),
schema.isRollup()
final QueryableIndex index =
input
.mapSchema(
schema ->
new IncrementalIndexSchema(
schema.getMinTimestamp(),
schema.getTimestampSpec(),
schema.getGran(),
schema.getVirtualColumns(),
schema.getDimensionsSpec().withDimensions(
schema.getDimensionsSpec()
.getDimensions()
.stream()
.map(
dimensionSchema -> new AutoTypeColumnSchema(dimensionSchema.getName())
)
.collect(Collectors.toList())
),
schema.getMetrics(),
schema.isRollup()
)
)
);
// if 1 row per segment some of the columns have null values for the row which causes 'auto'
// typing default value coercion to be lost in default value mode, so make sure there is at
// least one number in each segment for these tests to pass correctly because the column
// is typeless and so doesn't write out zeros like regular numbers do
input.intermediaryPersistSize(3);
final QueryableIndex index = input.buildMMappedMergedIndex();
// if 1 row per segment some of the columns have null values for the row which causes 'auto'
// typing default value coercion to be lost in default value mode, so make sure there is at
// least one number in each segment for these tests to pass correctly because the column
// is typeless and so doesn't write out zeros like regular numbers do
.intermediaryPersistSize(3)
.buildMMappedIndex();
return Pair.of(new QueryableIndexStorageAdapter(index), index);
}
)
@ -393,15 +405,38 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest
return Pair.of(new QueryableIndexStorageAdapter(index), index);
}
)
.put(
"mmappedWithSqlCompatibleNulls",
input -> {
// Build mmapped index in SQL-compatible null handling mode; read it in default-value mode.
Assume.assumeTrue(NullHandling.replaceWithDefault());
final File file;
try {
NullHandling.initializeForTestsWithValues(false, null);
Assert.assertTrue(NullHandling.sqlCompatible());
file = input.buildMMappedIndexFile();
}
finally {
NullHandling.initializeForTests();
}
Assert.assertTrue(NullHandling.replaceWithDefault());
try {
final QueryableIndex index = input.getIndexIO().loadIndex(file);
return Pair.of(new QueryableIndexStorageAdapter(index), index);
}
catch (IOException e) {
throw new RuntimeException(e);
}
}
)
.put(
"rowBasedWithoutTypeSignature",
input -> Pair.of(input.buildRowBasedSegmentWithoutTypeSignature().asStorageAdapter(), () -> {
})
input -> Pair.of(input.buildRowBasedSegmentWithoutTypeSignature().asStorageAdapter(), () -> {})
)
.put(
"rowBasedWithTypeSignature",
input -> Pair.of(input.buildRowBasedSegmentWithTypeSignature().asStorageAdapter(), () -> {
})
input -> Pair.of(input.buildRowBasedSegmentWithTypeSignature().asStorageAdapter(), () -> {})
)
.put("frame (row-based)", input -> {
final FrameSegment segment = input.buildFrameSegment(FrameType.ROW_BASED);
@ -811,7 +846,6 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest
}
return values;
}
}

View File

@ -45,6 +45,7 @@ import org.apache.druid.segment.column.ColumnBuilder;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.DruidPredicateIndex;
import org.apache.druid.segment.column.NullValueIndex;
import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn;
import org.apache.druid.segment.column.StringValueSetIndex;
import org.apache.druid.segment.data.BitmapSerdeFactory;
import org.apache.druid.segment.data.RoaringBitmapSerdeFactory;
@ -192,7 +193,7 @@ public class ScalarStringColumnSupplierTest extends InitializedNullHandlingTest
bob,
NestedFieldColumnIndexSupplierTest.ALWAYS_USE_INDEXES
);
try (ScalarStringDictionaryEncodedColumn column = (ScalarStringDictionaryEncodedColumn) supplier.get()) {
try (StringUtf8DictionaryEncodedColumn column = (StringUtf8DictionaryEncodedColumn) supplier.get()) {
smokeTest(supplier, column);
}
}
@ -225,7 +226,7 @@ public class ScalarStringColumnSupplierTest extends InitializedNullHandlingTest
try {
threadsStartLatch.await();
for (int iter = 0; iter < 5000; iter++) {
try (ScalarStringDictionaryEncodedColumn column = (ScalarStringDictionaryEncodedColumn) supplier.get()) {
try (StringUtf8DictionaryEncodedColumn column = (StringUtf8DictionaryEncodedColumn) supplier.get()) {
smokeTest(supplier, column);
}
}
@ -241,7 +242,7 @@ public class ScalarStringColumnSupplierTest extends InitializedNullHandlingTest
Assert.assertEquals(expectedReason, failureReason.get());
}
private void smokeTest(ScalarStringColumnAndIndexSupplier supplier, ScalarStringDictionaryEncodedColumn column)
private void smokeTest(ScalarStringColumnAndIndexSupplier supplier, StringUtf8DictionaryEncodedColumn column)
{
SimpleAscendingOffset offset = new SimpleAscendingOffset(data.size());
ColumnValueSelector<?> valueSelector = column.makeColumnValueSelector(offset);

View File

@ -0,0 +1,162 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.serde;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import org.apache.druid.segment.data.GenericIndexed;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.data.ListIndexed;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.hamcrest.CoreMatchers;
import org.hamcrest.MatcherAssert;
import org.junit.Assert;
import org.junit.Test;
import org.junit.internal.matchers.ThrowableMessageMatcher;
import javax.annotation.Nullable;
import java.util.Collections;
/**
* Test for {@link CombineFirstTwoEntriesIndexed}.
*/
public class CombineFirstTwoEntriesIndexedTest extends InitializedNullHandlingTest
{
@Test
public void testSizeZero()
{
final IllegalStateException e = Assert.assertThrows(
IllegalStateException.class,
() -> wrap(Indexed.empty(), "xyz")
);
MatcherAssert.assertThat(
e,
ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString("Size[0] must be >= 2"))
);
}
@Test
public void testSizeOne()
{
final IllegalStateException e = Assert.assertThrows(
IllegalStateException.class,
() -> wrap(new ListIndexed<>("foo"), "xyz")
);
MatcherAssert.assertThat(
e,
ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString("Size[1] must be >= 2"))
);
}
@Test
public void testSizeTwo()
{
final CombineFirstTwoEntriesIndexed<String> indexed = wrap(new ListIndexed<>("bar", "foo"), "xyz");
Assert.assertEquals(0, indexed.indexOf("xyz"));
Assert.assertEquals(-2, indexed.indexOf("foo"));
Assert.assertEquals(-2, indexed.indexOf("bar"));
Assert.assertEquals(-2, indexed.indexOf("baz"));
Assert.assertEquals(-2, indexed.indexOf("qux"));
Assert.assertEquals(-2, indexed.indexOf(""));
Assert.assertEquals(-2, indexed.indexOf(null));
Assert.assertEquals(1, indexed.size());
Assert.assertEquals("xyz", indexed.get(0));
Assert.assertFalse(indexed.isSorted()); // Matches delegate. See class-level note in CombineFirstTwoEntriesIndexed.
Assert.assertEquals(ImmutableList.of("xyz"), ImmutableList.copyOf(indexed));
}
@Test
public void testSizeThree()
{
final CombineFirstTwoEntriesIndexed<String> indexed = wrap(new ListIndexed<>("bar", "baz", "foo"), "xyz");
Assert.assertEquals(0, indexed.indexOf("xyz"));
Assert.assertEquals(1, indexed.indexOf("foo"));
Assert.assertEquals(-2, indexed.indexOf("bar"));
Assert.assertEquals(-2, indexed.indexOf("baz"));
Assert.assertEquals(-2, indexed.indexOf("qux"));
Assert.assertEquals(-2, indexed.indexOf(""));
Assert.assertEquals(-2, indexed.indexOf(null));
Assert.assertEquals("xyz", indexed.get(0));
Assert.assertEquals("foo", indexed.get(1));
Assert.assertFalse(indexed.isSorted()); // Matches delegate. See class-level note in CombineFirstTwoEntriesIndexed.
Assert.assertEquals(ImmutableList.of("xyz", "foo"), ImmutableList.copyOf(indexed));
}
@Test
public void testSizeTwoSorted()
{
final CombineFirstTwoEntriesIndexed<String> indexed = wrap(
GenericIndexed.fromArray(
new String[]{"bar", "foo"},
GenericIndexed.STRING_STRATEGY
),
null
);
Assert.assertEquals(0, indexed.indexOf(null));
Assert.assertEquals(-2, indexed.indexOf("foo"));
Assert.assertEquals(-2, indexed.indexOf("bar"));
Assert.assertEquals(-2, indexed.indexOf("baz"));
Assert.assertEquals(-2, indexed.indexOf("qux"));
Assert.assertEquals(-2, indexed.indexOf(""));
Assert.assertEquals(1, indexed.size());
Assert.assertNull(indexed.get(0));
Assert.assertTrue(indexed.isSorted()); // Matches delegate. See class-level note in CombineFirstTwoEntriesIndexed.
Assert.assertEquals(Collections.singletonList(null), Lists.newArrayList(indexed));
}
@Test
public void testSizeThreeSorted()
{
final CombineFirstTwoEntriesIndexed<String> indexed = wrap(
GenericIndexed.fromArray(
new String[]{"bar", "baz", "foo"},
GenericIndexed.STRING_STRATEGY
),
null
);
Assert.assertEquals(0, indexed.indexOf(null));
Assert.assertEquals(1, indexed.indexOf("foo"));
Assert.assertEquals(-2, indexed.indexOf("bar"));
Assert.assertEquals(-2, indexed.indexOf("baz"));
Assert.assertEquals(-3, indexed.indexOf("qux"));
Assert.assertEquals(-2, indexed.indexOf(""));
Assert.assertEquals(2, indexed.size());
Assert.assertNull(indexed.get(0));
Assert.assertEquals("foo", indexed.get(1));
Assert.assertTrue(indexed.isSorted()); // Matches delegate. See class-level note in CombineFirstTwoEntriesIndexed.
Assert.assertEquals(Lists.newArrayList(null, "foo"), Lists.newArrayList(indexed));
}
private <T> CombineFirstTwoEntriesIndexed<T> wrap(final Indexed<T> indexed, @Nullable final T newFirstValue)
{
return new CombineFirstTwoEntriesIndexed<T>(indexed)
{
@Override
protected T newFirstValue()
{
return newFirstValue;
}
};
}
}

View File

@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.serde;
import org.apache.druid.segment.data.ArrayBasedIndexedInts;
import org.junit.Test;
/**
* Test for {@link CombineFirstTwoValuesColumnarInts}.
*/
public class CombineFirstTwoValuesColumnarIntsTest
{
@Test
public void testCombineFirstTwoValues()
{
// (expectedCombined, original)
assertCombine(new int[]{0, 1, 2}, new int[]{1, 2, 3});
assertCombine(new int[]{0, 0, 1, 2}, new int[]{0, 1, 2, 3});
assertCombine(new int[]{2, 0, 1, 0, 4, 0}, new int[]{3, 0, 2, 1, 5, 0});
}
private static void assertCombine(final int[] expectedCombined, final int[] original)
{
CombineFirstTwoValuesIndexedIntsTest.assertCombine(
expectedCombined,
original,
arr -> new CombineFirstTwoValuesIndexedInts(new ArrayBasedIndexedInts(arr))
);
}
}

View File

@ -0,0 +1,119 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.serde;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import org.apache.druid.segment.data.ColumnarMultiInts;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.data.VSizeColumnarInts;
import org.apache.druid.segment.data.VSizeColumnarMultiInts;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.util.List;
/**
* Test for {@link CombineFirstTwoValuesColumnarMultiInts}.
*/
public class CombineFirstTwoValuesColumnarMultiIntsTest
{
private ColumnarMultiInts original;
private CombineFirstTwoValuesColumnarMultiInts combined;
@Before
public void setUp()
{
original = VSizeColumnarMultiInts.fromIterable(
ImmutableList.of(
VSizeColumnarInts.fromArray(new int[]{1, 2, 3}),
VSizeColumnarInts.fromArray(new int[]{0, 1, 2, 3}),
VSizeColumnarInts.fromArray(new int[]{3, 0, 2, 1, 5, 0})
)
);
combined = new CombineFirstTwoValuesColumnarMultiInts(original);
}
@Test
public void testSize()
{
Assert.assertEquals(original.size(), combined.size());
}
@Test
public void testGet()
{
assertEquals(new int[]{0, 1, 2}, combined.get(0));
assertEquals(new int[]{0, 0, 1, 2}, combined.get(1));
assertEquals(new int[]{2, 0, 1, 0, 4, 0}, combined.get(2));
// "get" reuses a holder
Assert.assertSame(combined.get(1), combined.get(0));
}
@Test
public void testGetUnshared()
{
assertEquals(new int[]{0, 1, 2}, combined.getUnshared(0));
assertEquals(new int[]{0, 0, 1, 2}, combined.getUnshared(1));
assertEquals(new int[]{2, 0, 1, 0, 4, 0}, combined.getUnshared(2));
// Unlike "get", "getUnshared" does not reuse a holder
Assert.assertNotSame(combined.getUnshared(1), combined.getUnshared(0));
}
@Test
public void testIndexOf()
{
Assert.assertThrows(
UnsupportedOperationException.class,
() -> combined.indexOf(original.get(0))
);
}
@Test
public void testIsSorted()
{
Assert.assertFalse(combined.isSorted());
}
@Test
public void testIterator()
{
final List<IndexedInts> fromIterator = Lists.newArrayList(combined.iterator());
Assert.assertEquals(3, fromIterator.size());
assertEquals(new int[]{0, 1, 2}, fromIterator.get(0));
assertEquals(new int[]{0, 0, 1, 2}, fromIterator.get(1));
assertEquals(new int[]{2, 0, 1, 0, 4, 0}, fromIterator.get(2));
}
public void assertEquals(final int[] expected, final IndexedInts actual)
{
final int sz = actual.size();
final int[] actualArray = new int[sz];
for (int i = 0; i < sz; i++) {
actualArray[i] = actual.get(i);
}
Assert.assertArrayEquals(expected, actualArray);
}
}

View File

@ -0,0 +1,101 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.serde;
import it.unimi.dsi.fastutil.ints.IntArrays;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.segment.data.ArrayBasedIndexedInts;
import org.apache.druid.segment.data.IndexedInts;
import org.junit.Assert;
import org.junit.Test;
import java.util.Arrays;
import java.util.function.Function;
/**
* Test for {@link CombineFirstTwoValuesIndexedInts}.
*/
public class CombineFirstTwoValuesIndexedIntsTest
{
@Test
public void testCombineFirstTwoValues()
{
// (expectedCombined, original)
assertCombine(new int[]{0, 1, 2}, new int[]{1, 2, 3});
assertCombine(new int[]{0, 0, 1, 2}, new int[]{0, 1, 2, 3});
assertCombine(new int[]{2, 0, 1, 0, 4, 0}, new int[]{3, 0, 2, 1, 5, 0});
}
private static void assertCombine(final int[] expectedCombined, final int[] original)
{
assertCombine(
expectedCombined,
original,
arr -> new CombineFirstTwoValuesIndexedInts(new ArrayBasedIndexedInts(arr))
);
}
static void assertCombine(
final int[] expectedCombined,
final int[] original,
final Function<int[], IndexedInts> combineFn
)
{
final IndexedInts combined = combineFn.apply(original);
// Check size.
Assert.assertEquals(
StringUtils.format("%s (size)", Arrays.toString(original)),
expectedCombined.length,
combined.size()
);
// Check regular get.
final int[] arr = new int[expectedCombined.length];
for (int i = 0; i < expectedCombined.length; i++) {
arr[i] = combined.get(i);
}
Assert.assertArrayEquals(StringUtils.format("%s (get)", Arrays.toString(original)), expectedCombined, arr);
// Check contiguous vector get.
Arrays.fill(arr, Integer.MIN_VALUE);
combined.get(arr, 0, arr.length);
Assert.assertArrayEquals(
StringUtils.format("%s (contiguous vector get)", Arrays.toString(original)),
expectedCombined,
arr
);
// Check noncontiguous vector get.
final int[] indexes = new int[expectedCombined.length];
for (int i = 0; i < expectedCombined.length; i++) {
indexes[indexes.length - 1 - i] = i; // Fetch backwards.
}
Arrays.fill(arr, Integer.MIN_VALUE);
combined.get(arr, indexes, arr.length);
final int[] expectedCombinedReversed = IntArrays.reverse(IntArrays.copy(expectedCombined));
Assert.assertArrayEquals(
StringUtils.format("%s (noncontiguous vector get, reversed)", Arrays.toString(original)),
expectedCombinedReversed,
arr
);
}
}

View File

@ -0,0 +1,137 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.serde;
import com.google.common.collect.Lists;
import org.apache.druid.segment.data.GenericIndexed;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.data.ListIndexed;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.hamcrest.CoreMatchers;
import org.hamcrest.MatcherAssert;
import org.junit.Assert;
import org.junit.Test;
import org.junit.internal.matchers.ThrowableMessageMatcher;
import java.util.Collections;
/**
* Test for {@link ReplaceFirstValueWithNullIndexed}.
*/
public class ReplaceFirstValueWithNullIndexedTest extends InitializedNullHandlingTest
{
@Test
public void testSizeZero()
{
final IllegalStateException e = Assert.assertThrows(
IllegalStateException.class,
() -> new ReplaceFirstValueWithNullIndexed<>(Indexed.empty())
);
MatcherAssert.assertThat(
e,
ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString("Size[0] must be >= 1"))
);
}
@Test
public void testSizeOne()
{
final ReplaceFirstValueWithNullIndexed<String> indexed =
new ReplaceFirstValueWithNullIndexed<>(new ListIndexed<>("bar"));
Assert.assertEquals(0, indexed.indexOf(null));
Assert.assertEquals(-2, indexed.indexOf(""));
Assert.assertEquals(-2, indexed.indexOf("foo"));
Assert.assertEquals(-2, indexed.indexOf("bar"));
Assert.assertEquals(-2, indexed.indexOf("baz"));
Assert.assertEquals(-2, indexed.indexOf("qux"));
Assert.assertEquals(1, indexed.size());
Assert.assertNull(indexed.get(0));
Assert.assertFalse(indexed.isSorted()); // Matches delegate. See class doc for ReplaceFirstValueWithNullIndexed.
Assert.assertEquals(Collections.singletonList(null), Lists.newArrayList(indexed));
}
@Test
public void testSizeTwo()
{
final ReplaceFirstValueWithNullIndexed<String> indexed =
new ReplaceFirstValueWithNullIndexed<>(new ListIndexed<>("bar", "foo"));
Assert.assertEquals(0, indexed.indexOf(null));
Assert.assertEquals(1, indexed.indexOf("foo"));
Assert.assertEquals(-2, indexed.indexOf(""));
Assert.assertEquals(-2, indexed.indexOf("bar"));
Assert.assertEquals(-2, indexed.indexOf("baz"));
Assert.assertEquals(-2, indexed.indexOf("qux"));
Assert.assertEquals(2, indexed.size());
Assert.assertNull(indexed.get(0));
Assert.assertEquals("foo", indexed.get(1));
Assert.assertFalse(indexed.isSorted()); // Matches delegate. See class doc for ReplaceFirstValueWithNullIndexed.
Assert.assertEquals(Lists.newArrayList(null, "foo"), Lists.newArrayList(indexed));
}
@Test
public void testSizeOneSorted()
{
final ReplaceFirstValueWithNullIndexed<String> indexed =
new ReplaceFirstValueWithNullIndexed<>(
GenericIndexed.fromArray(
new String[]{"bar"},
GenericIndexed.STRING_STRATEGY
)
);
Assert.assertEquals(0, indexed.indexOf(null));
Assert.assertEquals(-2, indexed.indexOf(""));
Assert.assertEquals(-2, indexed.indexOf("foo"));
Assert.assertEquals(-2, indexed.indexOf("bar"));
Assert.assertEquals(-2, indexed.indexOf("baz"));
Assert.assertEquals(-2, indexed.indexOf("qux"));
Assert.assertEquals(1, indexed.size());
Assert.assertNull(indexed.get(0));
Assert.assertTrue(indexed.isSorted()); // Matches delegate. See class doc for ReplaceFirstValueWithNullIndexed.
Assert.assertEquals(Collections.singletonList(null), Lists.newArrayList(indexed));
}
@Test
public void testSizeTwoSorted()
{
final ReplaceFirstValueWithNullIndexed<String> indexed =
new ReplaceFirstValueWithNullIndexed<>(
GenericIndexed.fromArray(
new String[]{"bar", "foo"},
GenericIndexed.STRING_STRATEGY
)
);
Assert.assertEquals(0, indexed.indexOf(null));
Assert.assertEquals(1, indexed.indexOf("foo"));
Assert.assertEquals(-2, indexed.indexOf(""));
Assert.assertEquals(-2, indexed.indexOf("bar"));
Assert.assertEquals(-2, indexed.indexOf("baz"));
Assert.assertEquals(-3, indexed.indexOf("qux"));
Assert.assertEquals(2, indexed.size());
Assert.assertNull(indexed.get(0));
Assert.assertEquals("foo", indexed.get(1));
Assert.assertTrue(indexed.isSorted()); // Matches delegate. See class doc for ReplaceFirstValueWithNullIndexed.
Assert.assertEquals(Lists.newArrayList(null, "foo"), Lists.newArrayList(indexed));
}
}