mirror of https://github.com/apache/druid.git
reduce heap footprint of ingesting auto typed columns by pushing compression and index generation into writeTo (#14615)
This commit is contained in:
parent
c2e6758580
commit
024ce40f1a
|
@ -99,11 +99,11 @@ public class EqualityFilter extends AbstractOptimizableDimFilter implements Filt
|
|||
throw InvalidInput.exception("Invalid equality filter on column [%s], matchValueType cannot be null", column);
|
||||
}
|
||||
this.matchValueType = matchValueType;
|
||||
if (matchValue == null) {
|
||||
throw InvalidInput.exception("Invalid equality filter on column [%s], matchValue cannot be null", column);
|
||||
}
|
||||
this.matchValue = matchValue;
|
||||
this.matchValueEval = ExprEval.ofType(ExpressionType.fromColumnTypeStrict(matchValueType), matchValue);
|
||||
if (matchValueEval.value() == null) {
|
||||
throw InvalidInput.exception("Invalid equality filter on column [%s], matchValue cannot be null", column);
|
||||
}
|
||||
this.filterTuning = filterTuning;
|
||||
this.predicateFactory = new EqualityPredicateFactory(matchValueEval);
|
||||
}
|
||||
|
@ -239,6 +239,8 @@ public class EqualityFilter extends AbstractOptimizableDimFilter implements Filt
|
|||
|
||||
final ValueIndexes valueIndexes = indexSupplier.as(ValueIndexes.class);
|
||||
if (valueIndexes != null) {
|
||||
// matchValueEval.value() cannot be null here due to check in the constructor
|
||||
//noinspection DataFlowIssue
|
||||
return valueIndexes.forValue(matchValueEval.value(), matchValueType);
|
||||
}
|
||||
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.druid.io.Channels;
|
|||
import org.apache.druid.java.util.common.ISE;
|
||||
import org.apache.druid.java.util.common.io.smoosh.FileSmoosher;
|
||||
import org.apache.druid.segment.column.TypeStrategy;
|
||||
import org.apache.druid.segment.serde.Serializer;
|
||||
import org.apache.druid.segment.writeout.SegmentWriteOutMedium;
|
||||
import org.apache.druid.segment.writeout.WriteOutBytes;
|
||||
|
||||
|
@ -39,7 +38,7 @@ import java.util.Iterator;
|
|||
/**
|
||||
* Writer for a {@link FixedIndexed}
|
||||
*/
|
||||
public class FixedIndexedWriter<T> implements Serializer
|
||||
public class FixedIndexedWriter<T> implements DictionaryWriter<T>
|
||||
{
|
||||
private static final int PAGE_SIZE = 4096;
|
||||
private final SegmentWriteOutMedium segmentWriteOutMedium;
|
||||
|
@ -73,11 +72,19 @@ public class FixedIndexedWriter<T> implements Serializer
|
|||
this.isSorted = isSorted;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isSorted()
|
||||
{
|
||||
return isSorted;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open() throws IOException
|
||||
{
|
||||
this.valuesOut = segmentWriteOutMedium.makeWriteOutBytes();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCardinality()
|
||||
{
|
||||
return hasNulls ? numWritten + 1 : numWritten;
|
||||
|
@ -89,6 +96,7 @@ public class FixedIndexedWriter<T> implements Serializer
|
|||
return Byte.BYTES + Byte.BYTES + Integer.BYTES + valuesOut.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(@Nullable T objectToWrite) throws IOException
|
||||
{
|
||||
if (prevObject != null && isSorted && comparator.compare(prevObject, objectToWrite) >= 0) {
|
||||
|
@ -140,6 +148,7 @@ public class FixedIndexedWriter<T> implements Serializer
|
|||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@Override
|
||||
@Nullable
|
||||
public T get(int index) throws IOException
|
||||
{
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
package org.apache.druid.segment.index;
|
||||
|
||||
import org.apache.druid.collections.bitmap.BitmapFactory;
|
||||
import org.apache.druid.query.BitmapResultFactory;
|
||||
import org.apache.druid.query.filter.ColumnIndexSelector;
|
||||
import org.apache.druid.segment.column.ColumnIndexCapabilities;
|
||||
|
@ -26,11 +27,16 @@ import org.apache.druid.segment.column.SimpleColumnIndexCapabilities;
|
|||
|
||||
public class AllFalseBitmapColumnIndex implements BitmapColumnIndex
|
||||
{
|
||||
private final ColumnIndexSelector selector;
|
||||
private final BitmapFactory bitmapFactory;
|
||||
|
||||
public AllFalseBitmapColumnIndex(ColumnIndexSelector indexSelector)
|
||||
{
|
||||
this.selector = indexSelector;
|
||||
this(indexSelector.getBitmapFactory());
|
||||
}
|
||||
|
||||
public AllFalseBitmapColumnIndex(BitmapFactory bitmapFactory)
|
||||
{
|
||||
this.bitmapFactory = bitmapFactory;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -48,6 +54,6 @@ public class AllFalseBitmapColumnIndex implements BitmapColumnIndex
|
|||
@Override
|
||||
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory)
|
||||
{
|
||||
return bitmapResultFactory.wrapAllFalse(selector.getBitmapFactory().makeEmptyImmutableBitmap());
|
||||
return bitmapResultFactory.wrapAllFalse(bitmapFactory.makeEmptyImmutableBitmap());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.druid.segment.index.semantic.StringValueSetIndexes;
|
|||
import org.apache.druid.segment.index.semantic.Utf8ValueSetIndexes;
|
||||
import org.apache.druid.segment.index.semantic.ValueIndexes;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Comparator;
|
||||
|
@ -101,7 +102,7 @@ public final class IndexedUtf8ValueIndexes<TDictionary extends Indexed<ByteBuffe
|
|||
|
||||
@Nullable
|
||||
@Override
|
||||
public BitmapColumnIndex forValue(Object value, TypeSignature<ValueType> valueType)
|
||||
public BitmapColumnIndex forValue(@Nonnull Object value, TypeSignature<ValueType> valueType)
|
||||
{
|
||||
if (valueType.isPrimitive()) {
|
||||
return forValue(
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.segment.index.semantic;
|
||||
|
||||
import org.apache.druid.collections.bitmap.ImmutableBitmap;
|
||||
import org.apache.druid.segment.column.TypeSignature;
|
||||
import org.apache.druid.segment.column.ValueType;
|
||||
import org.apache.druid.segment.index.BitmapColumnIndex;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
public interface ArrayElementIndexes
|
||||
{
|
||||
/**
|
||||
* Get the {@link ImmutableBitmap} corresponding to rows with array elements matching the supplied value. Generates
|
||||
* an empty bitmap when passed a value that doesn't exist in any array. May return null if a value index cannot be
|
||||
* computed for the supplied value type.
|
||||
*
|
||||
* @param value value to match against any array element in a row
|
||||
* @param valueType type of the value to match, used to assist conversion from the match value type to the column
|
||||
* value type
|
||||
* @return {@link ImmutableBitmap} corresponding to the rows with array elements which match the value, or
|
||||
* null if an index connot be computed for the supplied value type
|
||||
*/
|
||||
@Nullable
|
||||
BitmapColumnIndex containsValue(@Nullable Object value, TypeSignature<ValueType> valueType);
|
||||
}
|
|
@ -24,14 +24,17 @@ import org.apache.druid.segment.column.TypeSignature;
|
|||
import org.apache.druid.segment.column.ValueType;
|
||||
import org.apache.druid.segment.index.BitmapColumnIndex;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
public interface ValueIndexes
|
||||
{
|
||||
|
||||
/**
|
||||
* Get the {@link ImmutableBitmap} corresponding to the supplied value. Generates an empty bitmap when passed a
|
||||
* value that doesn't exist. May return null if a value index cannot be computed for the supplied value type.
|
||||
* Get the {@link ImmutableBitmap} corresponding to rows matching the supplied value. Generates an empty bitmap when
|
||||
* passed a value that doesn't exist. May return null if a value index cannot be computed for the supplied value type.
|
||||
*
|
||||
* Does not match null, use {@link NullValueIndex} for matching nulls.
|
||||
*
|
||||
* @param value value to match
|
||||
* @param valueType type of the value to match, used to assist conversion from the match value type to the column
|
||||
|
@ -40,5 +43,5 @@ public interface ValueIndexes
|
|||
* connot be computed for the supplied value type
|
||||
*/
|
||||
@Nullable
|
||||
BitmapColumnIndex forValue(Object value, TypeSignature<ValueType> valueType);
|
||||
BitmapColumnIndex forValue(@Nonnull Object value, TypeSignature<ValueType> valueType);
|
||||
}
|
||||
|
|
|
@ -65,6 +65,7 @@ import org.apache.druid.segment.index.semantic.StringValueSetIndexes;
|
|||
import org.apache.druid.segment.index.semantic.ValueIndexes;
|
||||
import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
|
@ -228,11 +229,12 @@ public class ScalarDoubleColumnAndIndexSupplier implements Supplier<NestedCommon
|
|||
{
|
||||
@Nullable
|
||||
@Override
|
||||
public BitmapColumnIndex forValue(Object value, TypeSignature<ValueType> valueType)
|
||||
public BitmapColumnIndex forValue(@Nonnull Object value, TypeSignature<ValueType> valueType)
|
||||
{
|
||||
final ExprEval<?> eval = ExprEval.ofType(ExpressionType.fromColumnTypeStrict(valueType), value)
|
||||
.castTo(ExpressionType.DOUBLE);
|
||||
if (eval.isNumericNull()) {
|
||||
// value wasn't null, but not a number?
|
||||
return null;
|
||||
}
|
||||
final double doubleValue = eval.asDouble();
|
||||
|
|
|
@ -19,55 +19,28 @@
|
|||
|
||||
package org.apache.druid.segment.nested;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.druid.collections.bitmap.ImmutableBitmap;
|
||||
import org.apache.druid.collections.bitmap.MutableBitmap;
|
||||
import org.apache.druid.java.util.common.ISE;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.java.util.common.io.Closer;
|
||||
import org.apache.druid.java.util.common.io.smoosh.FileSmoosher;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.math.expr.ExprEval;
|
||||
import org.apache.druid.segment.ColumnValueSelector;
|
||||
import org.apache.druid.segment.IndexSpec;
|
||||
import org.apache.druid.segment.column.ColumnType;
|
||||
import org.apache.druid.segment.data.ColumnarDoublesSerializer;
|
||||
import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSerializer;
|
||||
import org.apache.druid.segment.data.CompressionFactory;
|
||||
import org.apache.druid.segment.data.CompressionStrategy;
|
||||
import org.apache.druid.segment.data.FixedIndexedWriter;
|
||||
import org.apache.druid.segment.data.GenericIndexedWriter;
|
||||
import org.apache.druid.segment.data.SingleValueColumnarIntsSerializer;
|
||||
import org.apache.druid.segment.writeout.SegmentWriteOutMedium;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.channels.WritableByteChannel;
|
||||
|
||||
/**
|
||||
* Serializer for a {@link ScalarDoubleColumn}
|
||||
*/
|
||||
public class ScalarDoubleColumnSerializer extends NestedCommonFormatColumnSerializer
|
||||
public class ScalarDoubleColumnSerializer extends ScalarNestedCommonFormatColumnSerializer<Double>
|
||||
{
|
||||
private static final Logger log = new Logger(ScalarDoubleColumnSerializer.class);
|
||||
|
||||
private final String name;
|
||||
private final SegmentWriteOutMedium segmentWriteOutMedium;
|
||||
private final IndexSpec indexSpec;
|
||||
@SuppressWarnings("unused")
|
||||
private final Closer closer;
|
||||
private DictionaryIdLookup dictionaryIdLookup;
|
||||
private FixedIndexedWriter<Double> doubleDictionaryWriter;
|
||||
private int rowCount = 0;
|
||||
private boolean closedForWrite = false;
|
||||
private boolean dictionarySerialized = false;
|
||||
|
||||
private SingleValueColumnarIntsSerializer encodedValueSerializer;
|
||||
private ColumnarDoublesSerializer doublesSerializer;
|
||||
private GenericIndexedWriter<ImmutableBitmap> bitmapIndexWriter;
|
||||
private MutableBitmap[] bitmaps;
|
||||
private ByteBuffer columnNameBytes = null;
|
||||
|
||||
public ScalarDoubleColumnSerializer(
|
||||
String name,
|
||||
|
@ -76,54 +49,35 @@ public class ScalarDoubleColumnSerializer extends NestedCommonFormatColumnSerial
|
|||
Closer closer
|
||||
)
|
||||
{
|
||||
this.name = name;
|
||||
this.segmentWriteOutMedium = segmentWriteOutMedium;
|
||||
this.indexSpec = indexSpec;
|
||||
this.closer = closer;
|
||||
this.dictionaryIdLookup = new DictionaryIdLookup();
|
||||
super(name, DOUBLE_DICTIONARY_FILE_NAME, indexSpec, segmentWriteOutMedium, closer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getColumnName()
|
||||
protected int processValue(@Nullable Object rawValue) throws IOException
|
||||
{
|
||||
return name;
|
||||
final ExprEval<?> eval = ExprEval.bestEffortOf(rawValue);
|
||||
final double val = eval.asDouble();
|
||||
final int dictId = eval.isNumericNull() ? 0 : dictionaryIdLookup.lookupDouble(val);
|
||||
doublesSerializer.add(dictId == 0 ? 0.0 : val);
|
||||
return dictId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DictionaryIdLookup getGlobalLookup()
|
||||
public void openDictionaryWriter() throws IOException
|
||||
{
|
||||
return dictionaryIdLookup;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNulls()
|
||||
{
|
||||
return !bitmaps[0].isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open() throws IOException
|
||||
{
|
||||
if (!dictionarySerialized) {
|
||||
throw new IllegalStateException("Dictionary not serialized, cannot open value serializer");
|
||||
}
|
||||
String filenameBase = StringUtils.format("%s.forward_dim", name);
|
||||
final CompressionStrategy compression = indexSpec.getDimensionCompression();
|
||||
final CompressionStrategy compressionToUse;
|
||||
if (compression != CompressionStrategy.UNCOMPRESSED && compression != CompressionStrategy.NONE) {
|
||||
compressionToUse = compression;
|
||||
} else {
|
||||
compressionToUse = CompressionStrategy.LZ4;
|
||||
}
|
||||
encodedValueSerializer = CompressedVSizeColumnarIntsSerializer.create(
|
||||
name,
|
||||
dictionaryWriter = new FixedIndexedWriter<>(
|
||||
segmentWriteOutMedium,
|
||||
filenameBase,
|
||||
doubleDictionaryWriter.getCardinality(),
|
||||
compressionToUse
|
||||
ColumnType.DOUBLE.getStrategy(),
|
||||
ByteOrder.nativeOrder(),
|
||||
Long.BYTES,
|
||||
true
|
||||
);
|
||||
encodedValueSerializer.open();
|
||||
dictionaryWriter.open();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void openValueColumnSerializer() throws IOException
|
||||
{
|
||||
doublesSerializer = CompressionFactory.getDoubleSerializer(
|
||||
name,
|
||||
segmentWriteOutMedium,
|
||||
|
@ -132,31 +86,6 @@ public class ScalarDoubleColumnSerializer extends NestedCommonFormatColumnSerial
|
|||
indexSpec.getDimensionCompression()
|
||||
);
|
||||
doublesSerializer.open();
|
||||
|
||||
bitmapIndexWriter = new GenericIndexedWriter<>(
|
||||
segmentWriteOutMedium,
|
||||
name,
|
||||
indexSpec.getBitmapSerdeFactory().getObjectStrategy()
|
||||
);
|
||||
bitmapIndexWriter.open();
|
||||
bitmapIndexWriter.setObjectsNotSorted();
|
||||
bitmaps = new MutableBitmap[doubleDictionaryWriter.getCardinality()];
|
||||
for (int i = 0; i < bitmaps.length; i++) {
|
||||
bitmaps[i] = indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeEmptyMutableBitmap();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void openDictionaryWriter() throws IOException
|
||||
{
|
||||
doubleDictionaryWriter = new FixedIndexedWriter<>(
|
||||
segmentWriteOutMedium,
|
||||
ColumnType.DOUBLE.getStrategy(),
|
||||
ByteOrder.nativeOrder(),
|
||||
Long.BYTES,
|
||||
true
|
||||
);
|
||||
doubleDictionaryWriter.open();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -168,81 +97,26 @@ public class ScalarDoubleColumnSerializer extends NestedCommonFormatColumnSerial
|
|||
) throws IOException
|
||||
{
|
||||
if (dictionarySerialized) {
|
||||
throw new ISE("String dictionary already serialized for column [%s], cannot serialize again", name);
|
||||
throw new ISE("Double dictionary already serialized for column [%s], cannot serialize again", name);
|
||||
}
|
||||
|
||||
// null is always 0
|
||||
doubleDictionaryWriter.write(null);
|
||||
dictionaryWriter.write(null);
|
||||
dictionaryIdLookup.addNumericNull();
|
||||
|
||||
for (Double value : doubles) {
|
||||
if (value == null) {
|
||||
continue;
|
||||
}
|
||||
doubleDictionaryWriter.write(value);
|
||||
dictionaryWriter.write(value);
|
||||
dictionaryIdLookup.addDouble(value);
|
||||
}
|
||||
dictionarySerialized = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void serialize(ColumnValueSelector<? extends StructuredData> selector) throws IOException
|
||||
protected void writeValueColumn(FileSmoosher smoosher) throws IOException
|
||||
{
|
||||
if (!dictionarySerialized) {
|
||||
throw new ISE("Must serialize value dictionaries before serializing values for column [%s]", name);
|
||||
}
|
||||
|
||||
final Object value = StructuredData.unwrap(selector.getObject());
|
||||
final ExprEval<?> eval = ExprEval.bestEffortOf(value);
|
||||
|
||||
final double val = eval.asDouble();
|
||||
final int dictId = eval.isNumericNull() ? 0 : dictionaryIdLookup.lookupDouble(val);
|
||||
encodedValueSerializer.addValue(dictId);
|
||||
doublesSerializer.add(dictId == 0 ? 0.0 : val);
|
||||
bitmaps[dictId].add(rowCount);
|
||||
rowCount++;
|
||||
}
|
||||
|
||||
|
||||
private void closeForWrite() throws IOException
|
||||
{
|
||||
if (!closedForWrite) {
|
||||
for (int i = 0; i < bitmaps.length; i++) {
|
||||
final MutableBitmap bitmap = bitmaps[i];
|
||||
bitmapIndexWriter.write(
|
||||
indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeImmutableBitmap(bitmap)
|
||||
);
|
||||
bitmaps[i] = null; // Reclaim memory
|
||||
}
|
||||
columnNameBytes = computeFilenameBytes();
|
||||
closedForWrite = true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSerializedSize() throws IOException
|
||||
{
|
||||
closeForWrite();
|
||||
|
||||
long size = 1 + columnNameBytes.capacity();
|
||||
// the value dictionaries, raw column, and null index are all stored in separate files
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(
|
||||
WritableByteChannel channel,
|
||||
FileSmoosher smoosher
|
||||
) throws IOException
|
||||
{
|
||||
Preconditions.checkState(closedForWrite, "Not closed yet!");
|
||||
|
||||
writeV0Header(channel, columnNameBytes);
|
||||
writeInternal(smoosher, doubleDictionaryWriter, DOUBLE_DICTIONARY_FILE_NAME);
|
||||
writeInternal(smoosher, encodedValueSerializer, ENCODED_VALUE_COLUMN_FILE_NAME);
|
||||
writeInternal(smoosher, doublesSerializer, DOUBLE_VALUE_COLUMN_FILE_NAME);
|
||||
writeInternal(smoosher, bitmapIndexWriter, BITMAP_INDEX_FILE_NAME);
|
||||
|
||||
log.info("Column [%s] serialized successfully.", name);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -64,6 +64,7 @@ import org.apache.druid.segment.index.semantic.StringValueSetIndexes;
|
|||
import org.apache.druid.segment.index.semantic.ValueIndexes;
|
||||
import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
|
@ -228,11 +229,13 @@ public class ScalarLongColumnAndIndexSupplier implements Supplier<NestedCommonFo
|
|||
{
|
||||
@Nullable
|
||||
@Override
|
||||
public BitmapColumnIndex forValue(Object value, TypeSignature<ValueType> valueType)
|
||||
public BitmapColumnIndex forValue(@Nonnull Object value, TypeSignature<ValueType> valueType)
|
||||
{
|
||||
|
||||
final ExprEval<?> eval = ExprEval.ofType(ExpressionType.fromColumnTypeStrict(valueType), value)
|
||||
.castTo(ExpressionType.LONG);
|
||||
if (eval.isNumericNull()) {
|
||||
// value wasn't null, but not a number
|
||||
return null;
|
||||
}
|
||||
final long longValue = eval.asLong();
|
||||
|
|
|
@ -19,55 +19,28 @@
|
|||
|
||||
package org.apache.druid.segment.nested;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.druid.collections.bitmap.ImmutableBitmap;
|
||||
import org.apache.druid.collections.bitmap.MutableBitmap;
|
||||
import org.apache.druid.java.util.common.ISE;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.java.util.common.io.Closer;
|
||||
import org.apache.druid.java.util.common.io.smoosh.FileSmoosher;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.math.expr.ExprEval;
|
||||
import org.apache.druid.segment.ColumnValueSelector;
|
||||
import org.apache.druid.segment.IndexSpec;
|
||||
import org.apache.druid.segment.column.ColumnType;
|
||||
import org.apache.druid.segment.data.ColumnarLongsSerializer;
|
||||
import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSerializer;
|
||||
import org.apache.druid.segment.data.CompressionFactory;
|
||||
import org.apache.druid.segment.data.CompressionStrategy;
|
||||
import org.apache.druid.segment.data.FixedIndexedWriter;
|
||||
import org.apache.druid.segment.data.GenericIndexedWriter;
|
||||
import org.apache.druid.segment.data.SingleValueColumnarIntsSerializer;
|
||||
import org.apache.druid.segment.writeout.SegmentWriteOutMedium;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.channels.WritableByteChannel;
|
||||
|
||||
/**
|
||||
* Serializer for a {@link ScalarLongColumn}
|
||||
*/
|
||||
public class ScalarLongColumnSerializer extends NestedCommonFormatColumnSerializer
|
||||
public class ScalarLongColumnSerializer extends ScalarNestedCommonFormatColumnSerializer<Long>
|
||||
{
|
||||
private static final Logger log = new Logger(ScalarLongColumnSerializer.class);
|
||||
|
||||
private final String name;
|
||||
private final SegmentWriteOutMedium segmentWriteOutMedium;
|
||||
private final IndexSpec indexSpec;
|
||||
@SuppressWarnings("unused")
|
||||
private final Closer closer;
|
||||
private DictionaryIdLookup dictionaryIdLookup;
|
||||
private FixedIndexedWriter<Long> longDictionaryWriter;
|
||||
private int rowCount = 0;
|
||||
private boolean closedForWrite = false;
|
||||
private boolean dictionarySerialized = false;
|
||||
|
||||
private SingleValueColumnarIntsSerializer encodedValueSerializer;
|
||||
private ColumnarLongsSerializer longsSerializer;
|
||||
private GenericIndexedWriter<ImmutableBitmap> bitmapIndexWriter;
|
||||
private MutableBitmap[] bitmaps;
|
||||
private ByteBuffer columnNameBytes = null;
|
||||
|
||||
public ScalarLongColumnSerializer(
|
||||
String name,
|
||||
|
@ -76,54 +49,36 @@ public class ScalarLongColumnSerializer extends NestedCommonFormatColumnSerializ
|
|||
Closer closer
|
||||
)
|
||||
{
|
||||
this.name = name;
|
||||
this.segmentWriteOutMedium = segmentWriteOutMedium;
|
||||
this.indexSpec = indexSpec;
|
||||
this.closer = closer;
|
||||
this.dictionaryIdLookup = new DictionaryIdLookup();
|
||||
super(name, LONG_DICTIONARY_FILE_NAME, indexSpec, segmentWriteOutMedium, closer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getColumnName()
|
||||
protected int processValue(@Nullable Object rawValue) throws IOException
|
||||
{
|
||||
return name;
|
||||
final ExprEval<?> eval = ExprEval.bestEffortOf(rawValue);
|
||||
|
||||
final long val = eval.asLong();
|
||||
final int dictId = eval.isNumericNull() ? 0 : dictionaryIdLookup.lookupLong(val);
|
||||
longsSerializer.add(dictId == 0 ? 0L : val);
|
||||
return dictId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DictionaryIdLookup getGlobalLookup()
|
||||
public void openDictionaryWriter() throws IOException
|
||||
{
|
||||
return dictionaryIdLookup;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNulls()
|
||||
{
|
||||
return !bitmaps[0].isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open() throws IOException
|
||||
{
|
||||
if (!dictionarySerialized) {
|
||||
throw new IllegalStateException("Dictionary not serialized, cannot open value serializer");
|
||||
}
|
||||
String filenameBase = StringUtils.format("%s.forward_dim", name);
|
||||
final CompressionStrategy compression = indexSpec.getDimensionCompression();
|
||||
final CompressionStrategy compressionToUse;
|
||||
if (compression != CompressionStrategy.UNCOMPRESSED && compression != CompressionStrategy.NONE) {
|
||||
compressionToUse = compression;
|
||||
} else {
|
||||
compressionToUse = CompressionStrategy.LZ4;
|
||||
}
|
||||
encodedValueSerializer = CompressedVSizeColumnarIntsSerializer.create(
|
||||
name,
|
||||
dictionaryWriter = new FixedIndexedWriter<>(
|
||||
segmentWriteOutMedium,
|
||||
filenameBase,
|
||||
longDictionaryWriter.getCardinality(),
|
||||
compressionToUse
|
||||
ColumnType.LONG.getStrategy(),
|
||||
ByteOrder.nativeOrder(),
|
||||
Long.BYTES,
|
||||
true
|
||||
);
|
||||
encodedValueSerializer.open();
|
||||
dictionaryWriter.open();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void openValueColumnSerializer() throws IOException
|
||||
{
|
||||
longsSerializer = CompressionFactory.getLongSerializer(
|
||||
name,
|
||||
segmentWriteOutMedium,
|
||||
|
@ -133,33 +88,7 @@ public class ScalarLongColumnSerializer extends NestedCommonFormatColumnSerializ
|
|||
indexSpec.getDimensionCompression()
|
||||
);
|
||||
longsSerializer.open();
|
||||
|
||||
bitmapIndexWriter = new GenericIndexedWriter<>(
|
||||
segmentWriteOutMedium,
|
||||
name,
|
||||
indexSpec.getBitmapSerdeFactory().getObjectStrategy()
|
||||
);
|
||||
bitmapIndexWriter.open();
|
||||
bitmapIndexWriter.setObjectsNotSorted();
|
||||
bitmaps = new MutableBitmap[longDictionaryWriter.getCardinality()];
|
||||
for (int i = 0; i < bitmaps.length; i++) {
|
||||
bitmaps[i] = indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeEmptyMutableBitmap();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void openDictionaryWriter() throws IOException
|
||||
{
|
||||
longDictionaryWriter = new FixedIndexedWriter<>(
|
||||
segmentWriteOutMedium,
|
||||
ColumnType.LONG.getStrategy(),
|
||||
ByteOrder.nativeOrder(),
|
||||
Long.BYTES,
|
||||
true
|
||||
);
|
||||
longDictionaryWriter.open();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void serializeDictionaries(
|
||||
|
@ -170,81 +99,26 @@ public class ScalarLongColumnSerializer extends NestedCommonFormatColumnSerializ
|
|||
) throws IOException
|
||||
{
|
||||
if (dictionarySerialized) {
|
||||
throw new ISE("String dictionary already serialized for column [%s], cannot serialize again", name);
|
||||
throw new ISE("Long dictionary already serialized for column [%s], cannot serialize again", name);
|
||||
}
|
||||
|
||||
// null is always 0
|
||||
longDictionaryWriter.write(null);
|
||||
dictionaryWriter.write(null);
|
||||
dictionaryIdLookup.addNumericNull();
|
||||
|
||||
for (Long value : longs) {
|
||||
if (value == null) {
|
||||
continue;
|
||||
}
|
||||
longDictionaryWriter.write(value);
|
||||
dictionaryWriter.write(value);
|
||||
dictionaryIdLookup.addLong(value);
|
||||
}
|
||||
dictionarySerialized = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void serialize(ColumnValueSelector<? extends StructuredData> selector) throws IOException
|
||||
protected void writeValueColumn(FileSmoosher smoosher) throws IOException
|
||||
{
|
||||
if (!dictionarySerialized) {
|
||||
throw new ISE("Must serialize value dictionaries before serializing values for column [%s]", name);
|
||||
}
|
||||
|
||||
final Object value = StructuredData.unwrap(selector.getObject());
|
||||
final ExprEval<?> eval = ExprEval.bestEffortOf(value);
|
||||
|
||||
final long val = eval.asLong();
|
||||
final int dictId = eval.isNumericNull() ? 0 : dictionaryIdLookup.lookupLong(val);
|
||||
encodedValueSerializer.addValue(dictId);
|
||||
longsSerializer.add(dictId == 0 ? 0L : val);
|
||||
bitmaps[dictId].add(rowCount);
|
||||
rowCount++;
|
||||
}
|
||||
|
||||
|
||||
private void closeForWrite() throws IOException
|
||||
{
|
||||
if (!closedForWrite) {
|
||||
for (int i = 0; i < bitmaps.length; i++) {
|
||||
final MutableBitmap bitmap = bitmaps[i];
|
||||
bitmapIndexWriter.write(
|
||||
indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeImmutableBitmap(bitmap)
|
||||
);
|
||||
bitmaps[i] = null; // Reclaim memory
|
||||
}
|
||||
columnNameBytes = computeFilenameBytes();
|
||||
closedForWrite = true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSerializedSize() throws IOException
|
||||
{
|
||||
closeForWrite();
|
||||
|
||||
long size = 1 + columnNameBytes.capacity();
|
||||
// the value dictionaries, raw column, and null index are all stored in separate files
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(
|
||||
WritableByteChannel channel,
|
||||
FileSmoosher smoosher
|
||||
) throws IOException
|
||||
{
|
||||
Preconditions.checkState(closedForWrite, "Not closed yet!");
|
||||
|
||||
writeV0Header(channel, columnNameBytes);
|
||||
writeInternal(smoosher, longDictionaryWriter, LONG_DICTIONARY_FILE_NAME);
|
||||
writeInternal(smoosher, encodedValueSerializer, ENCODED_VALUE_COLUMN_FILE_NAME);
|
||||
writeInternal(smoosher, longsSerializer, LONG_VALUE_COLUMN_FILE_NAME);
|
||||
writeInternal(smoosher, bitmapIndexWriter, BITMAP_INDEX_FILE_NAME);
|
||||
|
||||
log.info("Column [%s] serialized successfully.", name);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,230 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.segment.nested;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import it.unimi.dsi.fastutil.ints.IntIterator;
|
||||
import org.apache.druid.collections.bitmap.ImmutableBitmap;
|
||||
import org.apache.druid.collections.bitmap.MutableBitmap;
|
||||
import org.apache.druid.java.util.common.ISE;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.java.util.common.io.Closer;
|
||||
import org.apache.druid.java.util.common.io.smoosh.FileSmoosher;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.segment.ColumnValueSelector;
|
||||
import org.apache.druid.segment.IndexSpec;
|
||||
import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSerializer;
|
||||
import org.apache.druid.segment.data.CompressionStrategy;
|
||||
import org.apache.druid.segment.data.DictionaryWriter;
|
||||
import org.apache.druid.segment.data.FixedIndexedIntWriter;
|
||||
import org.apache.druid.segment.data.GenericIndexedWriter;
|
||||
import org.apache.druid.segment.data.SingleValueColumnarIntsSerializer;
|
||||
import org.apache.druid.segment.writeout.SegmentWriteOutMedium;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.WritableByteChannel;
|
||||
|
||||
public abstract class ScalarNestedCommonFormatColumnSerializer<T> extends NestedCommonFormatColumnSerializer
|
||||
{
|
||||
protected static final Logger log = new Logger(ScalarNestedCommonFormatColumnSerializer.class);
|
||||
|
||||
protected final String name;
|
||||
protected final SegmentWriteOutMedium segmentWriteOutMedium;
|
||||
protected final IndexSpec indexSpec;
|
||||
@SuppressWarnings("unused")
|
||||
protected final Closer closer;
|
||||
protected final String dictionaryFileName;
|
||||
|
||||
protected DictionaryIdLookup dictionaryIdLookup;
|
||||
protected DictionaryWriter<T> dictionaryWriter;
|
||||
protected boolean closedForWrite = false;
|
||||
protected boolean dictionarySerialized = false;
|
||||
protected FixedIndexedIntWriter intermediateValueWriter;
|
||||
protected ByteBuffer columnNameBytes = null;
|
||||
|
||||
protected boolean hasNulls;
|
||||
|
||||
|
||||
public ScalarNestedCommonFormatColumnSerializer(
|
||||
String name,
|
||||
String dictionaryFileName,
|
||||
IndexSpec indexSpec,
|
||||
SegmentWriteOutMedium segmentWriteOutMedium,
|
||||
Closer closer
|
||||
)
|
||||
{
|
||||
this.name = name;
|
||||
this.dictionaryFileName = dictionaryFileName;
|
||||
this.segmentWriteOutMedium = segmentWriteOutMedium;
|
||||
this.indexSpec = indexSpec;
|
||||
this.closer = closer;
|
||||
this.dictionaryIdLookup = new DictionaryIdLookup();
|
||||
}
|
||||
|
||||
/**
|
||||
* Called during {@link #serialize(ColumnValueSelector)} to convert value to dictionary id.
|
||||
* <p>
|
||||
* Implementations may optionally also serialize the value to a type specific value column if they opened one with
|
||||
* {@link #openValueColumnSerializer()}, or do whatever else is useful to do while handling a single row value.
|
||||
*/
|
||||
protected abstract int processValue(@Nullable Object rawValue) throws IOException;
|
||||
|
||||
/**
|
||||
* Called during {@link #open()} to allow opening any separate type specific value column serializers
|
||||
*/
|
||||
protected abstract void openValueColumnSerializer() throws IOException;
|
||||
|
||||
/**
|
||||
* Called during {@link #writeTo(WritableByteChannel, FileSmoosher)} to allow any type specific value column
|
||||
* serializers to use the {@link FileSmoosher} to write stuff to places.
|
||||
*/
|
||||
protected abstract void writeValueColumn(FileSmoosher smoosher) throws IOException;
|
||||
|
||||
@Override
|
||||
public String getColumnName()
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DictionaryIdLookup getGlobalLookup()
|
||||
{
|
||||
return dictionaryIdLookup;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNulls()
|
||||
{
|
||||
return hasNulls;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open() throws IOException
|
||||
{
|
||||
if (!dictionarySerialized) {
|
||||
throw new IllegalStateException("Dictionary not serialized, cannot open value serializer");
|
||||
}
|
||||
intermediateValueWriter = new FixedIndexedIntWriter(segmentWriteOutMedium, false);
|
||||
intermediateValueWriter.open();
|
||||
openValueColumnSerializer();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void serialize(ColumnValueSelector<? extends StructuredData> selector) throws IOException
|
||||
{
|
||||
if (!dictionarySerialized) {
|
||||
throw new ISE("Must serialize value dictionaries before serializing values for column [%s]", name);
|
||||
}
|
||||
|
||||
final Object value = StructuredData.unwrap(selector.getObject());
|
||||
final int dictId = processValue(value);
|
||||
intermediateValueWriter.write(dictId);
|
||||
hasNulls = hasNulls || dictId == 0;
|
||||
}
|
||||
|
||||
private void closeForWrite()
|
||||
{
|
||||
if (!closedForWrite) {
|
||||
columnNameBytes = computeFilenameBytes();
|
||||
closedForWrite = true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSerializedSize()
|
||||
{
|
||||
closeForWrite();
|
||||
|
||||
// standard string version
|
||||
long size = 1 + columnNameBytes.capacity();
|
||||
// the value dictionaries, raw column, and null index are all stored in separate files
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(
|
||||
WritableByteChannel channel,
|
||||
FileSmoosher smoosher
|
||||
) throws IOException
|
||||
{
|
||||
Preconditions.checkState(closedForWrite, "Not closed yet!");
|
||||
Preconditions.checkArgument(dictionaryWriter.isSorted(), "Dictionary not sorted?!?");
|
||||
|
||||
// write out compressed dictionaryId int column and bitmap indexes by iterating intermediate value column
|
||||
// the intermediate value column should be replaced someday by a cooler compressed int column writer that allows
|
||||
// easy iteration of the values it writes out, so that we could just build the bitmap indexes here instead of
|
||||
// doing both things
|
||||
String filenameBase = StringUtils.format("%s.forward_dim", name);
|
||||
final CompressionStrategy compression = indexSpec.getDimensionCompression();
|
||||
final CompressionStrategy compressionToUse;
|
||||
if (compression != CompressionStrategy.UNCOMPRESSED && compression != CompressionStrategy.NONE) {
|
||||
compressionToUse = compression;
|
||||
} else {
|
||||
compressionToUse = CompressionStrategy.LZ4;
|
||||
}
|
||||
final SingleValueColumnarIntsSerializer encodedValueSerializer = CompressedVSizeColumnarIntsSerializer.create(
|
||||
name,
|
||||
segmentWriteOutMedium,
|
||||
filenameBase,
|
||||
dictionaryWriter.getCardinality(),
|
||||
compressionToUse
|
||||
);
|
||||
encodedValueSerializer.open();
|
||||
|
||||
final GenericIndexedWriter<ImmutableBitmap> bitmapIndexWriter = new GenericIndexedWriter<>(
|
||||
segmentWriteOutMedium,
|
||||
name,
|
||||
indexSpec.getBitmapSerdeFactory().getObjectStrategy()
|
||||
);
|
||||
bitmapIndexWriter.open();
|
||||
bitmapIndexWriter.setObjectsNotSorted();
|
||||
final MutableBitmap[] bitmaps;
|
||||
bitmaps = new MutableBitmap[dictionaryWriter.getCardinality()];
|
||||
for (int i = 0; i < bitmaps.length; i++) {
|
||||
bitmaps[i] = indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeEmptyMutableBitmap();
|
||||
}
|
||||
|
||||
final IntIterator rows = intermediateValueWriter.getIterator();
|
||||
int rowCount = 0;
|
||||
while (rows.hasNext()) {
|
||||
final int dictId = rows.nextInt();
|
||||
encodedValueSerializer.addValue(dictId);
|
||||
bitmaps[dictId].add(rowCount++);
|
||||
}
|
||||
|
||||
for (int i = 0; i < bitmaps.length; i++) {
|
||||
final MutableBitmap bitmap = bitmaps[i];
|
||||
bitmapIndexWriter.write(
|
||||
indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeImmutableBitmap(bitmap)
|
||||
);
|
||||
bitmaps[i] = null; // Reclaim memory
|
||||
}
|
||||
|
||||
writeV0Header(channel, columnNameBytes);
|
||||
writeInternal(smoosher, dictionaryWriter, dictionaryFileName);
|
||||
writeInternal(smoosher, encodedValueSerializer, ENCODED_VALUE_COLUMN_FILE_NAME);
|
||||
writeValueColumn(smoosher);
|
||||
writeInternal(smoosher, bitmapIndexWriter, BITMAP_INDEX_FILE_NAME);
|
||||
|
||||
log.info("Column [%s] serialized successfully.", name);
|
||||
}
|
||||
}
|
|
@ -19,56 +19,26 @@
|
|||
|
||||
package org.apache.druid.segment.nested;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.druid.collections.bitmap.ImmutableBitmap;
|
||||
import org.apache.druid.collections.bitmap.MutableBitmap;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.java.util.common.ISE;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.java.util.common.io.Closer;
|
||||
import org.apache.druid.java.util.common.io.smoosh.FileSmoosher;
|
||||
import org.apache.druid.java.util.common.logger.Logger;
|
||||
import org.apache.druid.math.expr.ExprEval;
|
||||
import org.apache.druid.math.expr.ExpressionType;
|
||||
import org.apache.druid.segment.ColumnValueSelector;
|
||||
import org.apache.druid.segment.IndexSpec;
|
||||
import org.apache.druid.segment.column.StringEncodingStrategies;
|
||||
import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn;
|
||||
import org.apache.druid.segment.data.CompressedVSizeColumnarIntsSerializer;
|
||||
import org.apache.druid.segment.data.CompressionStrategy;
|
||||
import org.apache.druid.segment.data.DictionaryWriter;
|
||||
import org.apache.druid.segment.data.GenericIndexedWriter;
|
||||
import org.apache.druid.segment.data.SingleValueColumnarIntsSerializer;
|
||||
import org.apache.druid.segment.writeout.SegmentWriteOutMedium;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.WritableByteChannel;
|
||||
|
||||
/**
|
||||
* Serializer for a string {@link NestedCommonFormatColumn} that can be read with
|
||||
* {@link StringUtf8DictionaryEncodedColumn}.
|
||||
*/
|
||||
public class ScalarStringColumnSerializer extends NestedCommonFormatColumnSerializer
|
||||
public class ScalarStringColumnSerializer extends ScalarNestedCommonFormatColumnSerializer<String>
|
||||
{
|
||||
private static final Logger log = new Logger(ScalarStringColumnSerializer.class);
|
||||
|
||||
private final String name;
|
||||
private final SegmentWriteOutMedium segmentWriteOutMedium;
|
||||
private final IndexSpec indexSpec;
|
||||
@SuppressWarnings("unused")
|
||||
private final Closer closer;
|
||||
private DictionaryIdLookup dictionaryIdLookup;
|
||||
private DictionaryWriter<String> dictionaryWriter;
|
||||
private int rowCount = 0;
|
||||
private boolean closedForWrite = false;
|
||||
private boolean dictionarySerialized = false;
|
||||
|
||||
private SingleValueColumnarIntsSerializer encodedValueSerializer;
|
||||
private GenericIndexedWriter<ImmutableBitmap> bitmapIndexWriter;
|
||||
private MutableBitmap[] bitmaps;
|
||||
private ByteBuffer columnNameBytes = null;
|
||||
|
||||
public ScalarStringColumnSerializer(
|
||||
String name,
|
||||
IndexSpec indexSpec,
|
||||
|
@ -76,29 +46,16 @@ public class ScalarStringColumnSerializer extends NestedCommonFormatColumnSerial
|
|||
Closer closer
|
||||
)
|
||||
{
|
||||
this.name = name;
|
||||
this.segmentWriteOutMedium = segmentWriteOutMedium;
|
||||
this.indexSpec = indexSpec;
|
||||
this.closer = closer;
|
||||
this.dictionaryIdLookup = new DictionaryIdLookup();
|
||||
super(name, STRING_DICTIONARY_FILE_NAME, indexSpec, segmentWriteOutMedium, closer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getColumnName()
|
||||
protected int processValue(@Nullable Object rawValue)
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DictionaryIdLookup getGlobalLookup()
|
||||
{
|
||||
return dictionaryIdLookup;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNulls()
|
||||
{
|
||||
return !bitmaps[0].isEmpty();
|
||||
final ExprEval<?> eval = ExprEval.bestEffortOf(rawValue);
|
||||
final String s = eval.castTo(ExpressionType.STRING).asString();
|
||||
final int dictId = dictionaryIdLookup.lookupString(s);
|
||||
return dictId;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -113,40 +70,9 @@ public class ScalarStringColumnSerializer extends NestedCommonFormatColumnSerial
|
|||
}
|
||||
|
||||
@Override
|
||||
public void open() throws IOException
|
||||
protected void openValueColumnSerializer()
|
||||
{
|
||||
if (!dictionarySerialized) {
|
||||
throw new IllegalStateException("Dictionary not serialized, cannot open value serializer");
|
||||
}
|
||||
String filenameBase = StringUtils.format("%s.forward_dim", name);
|
||||
final CompressionStrategy compression = indexSpec.getDimensionCompression();
|
||||
final CompressionStrategy compressionToUse;
|
||||
if (compression != CompressionStrategy.UNCOMPRESSED && compression != CompressionStrategy.NONE) {
|
||||
compressionToUse = compression;
|
||||
} else {
|
||||
// always compress
|
||||
compressionToUse = CompressionStrategy.LZ4;
|
||||
}
|
||||
encodedValueSerializer = CompressedVSizeColumnarIntsSerializer.create(
|
||||
name,
|
||||
segmentWriteOutMedium,
|
||||
filenameBase,
|
||||
dictionaryWriter.getCardinality(),
|
||||
compressionToUse
|
||||
);
|
||||
encodedValueSerializer.open();
|
||||
|
||||
bitmapIndexWriter = new GenericIndexedWriter<>(
|
||||
segmentWriteOutMedium,
|
||||
name,
|
||||
indexSpec.getBitmapSerdeFactory().getObjectStrategy()
|
||||
);
|
||||
bitmapIndexWriter.open();
|
||||
bitmapIndexWriter.setObjectsNotSorted();
|
||||
bitmaps = new MutableBitmap[dictionaryWriter.getCardinality()];
|
||||
for (int i = 0; i < bitmaps.length; i++) {
|
||||
bitmaps[i] = indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeEmptyMutableBitmap();
|
||||
}
|
||||
// no extra value column for strings
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -177,61 +103,8 @@ public class ScalarStringColumnSerializer extends NestedCommonFormatColumnSerial
|
|||
}
|
||||
|
||||
@Override
|
||||
public void serialize(ColumnValueSelector<? extends StructuredData> selector) throws IOException
|
||||
protected void writeValueColumn(FileSmoosher smoosher)
|
||||
{
|
||||
if (!dictionarySerialized) {
|
||||
throw new ISE("Must serialize value dictionaries before serializing values for column [%s]", name);
|
||||
}
|
||||
|
||||
final Object value = StructuredData.unwrap(selector.getObject());
|
||||
final ExprEval<?> eval = ExprEval.bestEffortOf(value);
|
||||
final String s = eval.castTo(ExpressionType.STRING).asString();
|
||||
final int dictId = dictionaryIdLookup.lookupString(s);
|
||||
encodedValueSerializer.addValue(dictId);
|
||||
bitmaps[dictId].add(rowCount);
|
||||
rowCount++;
|
||||
}
|
||||
|
||||
private void closeForWrite() throws IOException
|
||||
{
|
||||
if (!closedForWrite) {
|
||||
for (int i = 0; i < bitmaps.length; i++) {
|
||||
final MutableBitmap bitmap = bitmaps[i];
|
||||
bitmapIndexWriter.write(
|
||||
indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeImmutableBitmap(bitmap)
|
||||
);
|
||||
bitmaps[i] = null; // Reclaim memory
|
||||
}
|
||||
columnNameBytes = computeFilenameBytes();
|
||||
closedForWrite = true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSerializedSize() throws IOException
|
||||
{
|
||||
closeForWrite();
|
||||
|
||||
// standard string version
|
||||
long size = 1 + columnNameBytes.capacity();
|
||||
// the value dictionaries, raw column, and null index are all stored in separate files
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(
|
||||
WritableByteChannel channel,
|
||||
FileSmoosher smoosher
|
||||
) throws IOException
|
||||
{
|
||||
Preconditions.checkState(closedForWrite, "Not closed yet!");
|
||||
Preconditions.checkArgument(dictionaryWriter.isSorted(), "Dictionary not sorted?!?");
|
||||
|
||||
writeV0Header(channel, columnNameBytes);
|
||||
writeInternal(smoosher, dictionaryWriter, STRING_DICTIONARY_FILE_NAME);
|
||||
writeInternal(smoosher, encodedValueSerializer, ENCODED_VALUE_COLUMN_FILE_NAME);
|
||||
writeInternal(smoosher, bitmapIndexWriter, BITMAP_INDEX_FILE_NAME);
|
||||
|
||||
log.info("Column [%s] serialized successfully.", name);
|
||||
// no extra value column for strings
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.druid.java.util.common.RE;
|
|||
import org.apache.druid.java.util.common.StringUtils;
|
||||
import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper;
|
||||
import org.apache.druid.math.expr.ExprEval;
|
||||
import org.apache.druid.math.expr.ExprType;
|
||||
import org.apache.druid.math.expr.ExpressionType;
|
||||
import org.apache.druid.query.BitmapResultFactory;
|
||||
import org.apache.druid.segment.column.ColumnBuilder;
|
||||
|
@ -47,13 +48,16 @@ import org.apache.druid.segment.data.FrontCodedIntArrayIndexed;
|
|||
import org.apache.druid.segment.data.GenericIndexed;
|
||||
import org.apache.druid.segment.data.Indexed;
|
||||
import org.apache.druid.segment.data.VByte;
|
||||
import org.apache.druid.segment.index.AllFalseBitmapColumnIndex;
|
||||
import org.apache.druid.segment.index.BitmapColumnIndex;
|
||||
import org.apache.druid.segment.index.SimpleBitmapColumnIndex;
|
||||
import org.apache.druid.segment.index.SimpleImmutableBitmapIndex;
|
||||
import org.apache.druid.segment.index.semantic.ArrayElementIndexes;
|
||||
import org.apache.druid.segment.index.semantic.NullValueIndex;
|
||||
import org.apache.druid.segment.index.semantic.ValueIndexes;
|
||||
import org.apache.druid.segment.serde.NestedCommonFormatColumnPartSerde;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
|
@ -93,6 +97,7 @@ public class VariantColumnAndIndexSupplier implements Supplier<NestedCommonForma
|
|||
final Supplier<FixedIndexed<Long>> longDictionarySupplier;
|
||||
final Supplier<FixedIndexed<Double>> doubleDictionarySupplier;
|
||||
final Supplier<FrontCodedIntArrayIndexed> arrayDictionarySupplier;
|
||||
final Supplier<FixedIndexed<Integer>> arrayElementDictionarySupplier;
|
||||
|
||||
final ByteBuffer stringDictionaryBuffer = NestedCommonFormatColumnPartSerde.loadInternalFile(
|
||||
mapper,
|
||||
|
@ -147,6 +152,11 @@ public class VariantColumnAndIndexSupplier implements Supplier<NestedCommonForma
|
|||
columnName,
|
||||
NestedCommonFormatColumnSerializer.DOUBLE_DICTIONARY_FILE_NAME
|
||||
);
|
||||
final ByteBuffer arrayElementDictionaryBuffer = NestedCommonFormatColumnPartSerde.loadInternalFile(
|
||||
mapper,
|
||||
columnName,
|
||||
NestedCommonFormatColumnSerializer.ARRAY_ELEMENT_DICTIONARY_FILE_NAME
|
||||
);
|
||||
final ByteBuffer valueIndexBuffer = NestedCommonFormatColumnPartSerde.loadInternalFile(
|
||||
mapper,
|
||||
columnName,
|
||||
|
@ -180,6 +190,7 @@ public class VariantColumnAndIndexSupplier implements Supplier<NestedCommonForma
|
|||
byteOrder,
|
||||
Double.BYTES
|
||||
);
|
||||
|
||||
final ByteBuffer arrayDictionarybuffer = NestedCommonFormatColumnPartSerde.loadInternalFile(
|
||||
mapper,
|
||||
columnName,
|
||||
|
@ -193,6 +204,12 @@ public class VariantColumnAndIndexSupplier implements Supplier<NestedCommonForma
|
|||
try (ColumnarInts throwAway = ints.get()) {
|
||||
size = throwAway.size();
|
||||
}
|
||||
arrayElementDictionarySupplier = FixedIndexed.read(
|
||||
arrayElementDictionaryBuffer,
|
||||
CompressedNestedDataComplexColumn.INT_TYPE_STRATEGY,
|
||||
byteOrder,
|
||||
Integer.BYTES
|
||||
);
|
||||
return new VariantColumnAndIndexSupplier(
|
||||
logicalType,
|
||||
variantTypeByte,
|
||||
|
@ -201,6 +218,7 @@ public class VariantColumnAndIndexSupplier implements Supplier<NestedCommonForma
|
|||
longDictionarySupplier,
|
||||
doubleDictionarySupplier,
|
||||
arrayDictionarySupplier,
|
||||
arrayElementDictionarySupplier,
|
||||
ints,
|
||||
valueIndexes,
|
||||
arrayElementIndexes,
|
||||
|
@ -222,11 +240,11 @@ public class VariantColumnAndIndexSupplier implements Supplier<NestedCommonForma
|
|||
@Nullable
|
||||
private final Byte variantTypeSetByte;
|
||||
private final BitmapFactory bitmapFactory;
|
||||
private final GenericIndexed<ByteBuffer> stringDictionary;
|
||||
private final Supplier<FrontCodedIndexed> frontCodedStringDictionarySupplier;
|
||||
private final Supplier<? extends Indexed<ByteBuffer>> stringDictionarySupplier;
|
||||
private final Supplier<FixedIndexed<Long>> longDictionarySupplier;
|
||||
private final Supplier<FixedIndexed<Double>> doubleDictionarySupplier;
|
||||
private final Supplier<FrontCodedIntArrayIndexed> arrayDictionarySupplier;
|
||||
private final Supplier<FixedIndexed<Integer>> arrayElementDictionarySupplier;
|
||||
private final Supplier<ColumnarInts> encodedValueColumnSupplier;
|
||||
@SuppressWarnings("unused")
|
||||
private final GenericIndexed<ImmutableBitmap> valueIndexes;
|
||||
|
@ -242,6 +260,7 @@ public class VariantColumnAndIndexSupplier implements Supplier<NestedCommonForma
|
|||
Supplier<FixedIndexed<Long>> longDictionarySupplier,
|
||||
Supplier<FixedIndexed<Double>> doubleDictionarySupplier,
|
||||
Supplier<FrontCodedIntArrayIndexed> arrayDictionarySupplier,
|
||||
Supplier<FixedIndexed<Integer>> arrayElementDictionarySupplier,
|
||||
Supplier<ColumnarInts> encodedValueColumnSupplier,
|
||||
GenericIndexed<ImmutableBitmap> valueIndexes,
|
||||
GenericIndexed<ImmutableBitmap> elementIndexes,
|
||||
|
@ -252,11 +271,13 @@ public class VariantColumnAndIndexSupplier implements Supplier<NestedCommonForma
|
|||
{
|
||||
this.logicalType = logicalType;
|
||||
this.variantTypeSetByte = variantTypeSetByte;
|
||||
this.stringDictionary = stringDictionary;
|
||||
this.frontCodedStringDictionarySupplier = frontCodedStringDictionarySupplier;
|
||||
stringDictionarySupplier = frontCodedStringDictionarySupplier != null
|
||||
? frontCodedStringDictionarySupplier
|
||||
: stringDictionary::singleThreaded;
|
||||
this.longDictionarySupplier = longDictionarySupplier;
|
||||
this.doubleDictionarySupplier = doubleDictionarySupplier;
|
||||
this.arrayDictionarySupplier = arrayDictionarySupplier;
|
||||
this.arrayElementDictionarySupplier = arrayElementDictionarySupplier;
|
||||
this.encodedValueColumnSupplier = encodedValueColumnSupplier;
|
||||
this.valueIndexes = valueIndexes;
|
||||
this.arrayElementIndexes = elementIndexes;
|
||||
|
@ -273,20 +294,8 @@ public class VariantColumnAndIndexSupplier implements Supplier<NestedCommonForma
|
|||
@Override
|
||||
public NestedCommonFormatColumn get()
|
||||
{
|
||||
if (frontCodedStringDictionarySupplier != null) {
|
||||
return new VariantColumn<>(
|
||||
frontCodedStringDictionarySupplier.get(),
|
||||
longDictionarySupplier.get(),
|
||||
doubleDictionarySupplier.get(),
|
||||
arrayDictionarySupplier.get(),
|
||||
encodedValueColumnSupplier.get(),
|
||||
nullValueBitmap,
|
||||
logicalType,
|
||||
variantTypeSetByte
|
||||
);
|
||||
}
|
||||
return new VariantColumn<>(
|
||||
stringDictionary.singleThreaded(),
|
||||
stringDictionarySupplier.get(),
|
||||
longDictionarySupplier.get(),
|
||||
doubleDictionarySupplier.get(),
|
||||
arrayDictionarySupplier.get(),
|
||||
|
@ -306,6 +315,8 @@ public class VariantColumnAndIndexSupplier implements Supplier<NestedCommonForma
|
|||
return (T) (NullValueIndex) () -> nullIndex;
|
||||
} else if (clazz.equals(ValueIndexes.class) && variantTypeSetByte == null && logicalType.isArray()) {
|
||||
return (T) new ArrayValueIndexes();
|
||||
} else if (clazz.equals(ArrayElementIndexes.class) && variantTypeSetByte == null && logicalType.isArray()) {
|
||||
return (T) new VariantArrayElementIndexes();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
@ -320,30 +331,42 @@ public class VariantColumnAndIndexSupplier implements Supplier<NestedCommonForma
|
|||
return bitmap == null ? bitmapFactory.makeEmptyImmutableBitmap() : bitmap;
|
||||
}
|
||||
|
||||
private ImmutableBitmap getElementBitmap(int idx)
|
||||
{
|
||||
if (idx < 0) {
|
||||
return bitmapFactory.makeEmptyImmutableBitmap();
|
||||
}
|
||||
final int elementDictionaryIndex = arrayElementDictionarySupplier.get().indexOf(idx);
|
||||
if (elementDictionaryIndex < 0) {
|
||||
return bitmapFactory.makeEmptyImmutableBitmap();
|
||||
}
|
||||
final ImmutableBitmap bitmap = arrayElementIndexes.get(elementDictionaryIndex);
|
||||
return bitmap == null ? bitmapFactory.makeEmptyImmutableBitmap() : bitmap;
|
||||
}
|
||||
|
||||
private class ArrayValueIndexes implements ValueIndexes
|
||||
{
|
||||
@Nullable
|
||||
@Override
|
||||
public BitmapColumnIndex forValue(Object value, TypeSignature<ValueType> valueType)
|
||||
public BitmapColumnIndex forValue(@Nonnull Object value, TypeSignature<ValueType> valueType)
|
||||
{
|
||||
final ExprEval<?> eval = ExprEval.ofType(ExpressionType.fromColumnTypeStrict(valueType), value)
|
||||
.castTo(ExpressionType.fromColumnTypeStrict(logicalType));
|
||||
if (eval.value() == null) {
|
||||
return null;
|
||||
}
|
||||
final Object[] arrayToMatch = eval.asArray();
|
||||
Indexed elements;
|
||||
final int elementOffset;
|
||||
switch (logicalType.getElementType().getType()) {
|
||||
case STRING:
|
||||
elements = frontCodedStringDictionarySupplier != null
|
||||
? frontCodedStringDictionarySupplier.get()
|
||||
: stringDictionary.singleThreaded();
|
||||
elements = stringDictionarySupplier.get();
|
||||
elementOffset = 0;
|
||||
break;
|
||||
case LONG:
|
||||
elements = longDictionarySupplier.get();
|
||||
elementOffset = stringDictionarySupplier.get().size();
|
||||
break;
|
||||
case DOUBLE:
|
||||
elements = doubleDictionarySupplier.get();
|
||||
elementOffset = stringDictionarySupplier.get().size() + longDictionarySupplier.get().size();
|
||||
break;
|
||||
default:
|
||||
throw DruidException.defensive(
|
||||
|
@ -353,30 +376,29 @@ public class VariantColumnAndIndexSupplier implements Supplier<NestedCommonForma
|
|||
}
|
||||
|
||||
final int[] ids = new int[arrayToMatch.length];
|
||||
boolean hasMissingElement = false;
|
||||
final int arrayOffset = stringDictionarySupplier.get().size() + longDictionarySupplier.get().size() + doubleDictionarySupplier.get().size();
|
||||
for (int i = 0; i < arrayToMatch.length; i++) {
|
||||
if (logicalType.getElementType().is(ValueType.STRING)) {
|
||||
if (arrayToMatch[i] == null) {
|
||||
ids[i] = 0;
|
||||
} else if (logicalType.getElementType().is(ValueType.STRING)) {
|
||||
ids[i] = elements.indexOf(StringUtils.toUtf8ByteBuffer((String) arrayToMatch[i]));
|
||||
} else {
|
||||
ids[i] = elements.indexOf(arrayToMatch[i]);
|
||||
ids[i] = elements.indexOf(arrayToMatch[i]) + elementOffset;
|
||||
}
|
||||
if (ids[i] < 0) {
|
||||
hasMissingElement = true;
|
||||
break;
|
||||
if (value == null) {
|
||||
return new AllFalseBitmapColumnIndex(bitmapFactory);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final boolean noMatch = hasMissingElement;
|
||||
final FrontCodedIntArrayIndexed dictionary = arrayDictionarySupplier.get();
|
||||
return new SimpleBitmapColumnIndex()
|
||||
{
|
||||
@Override
|
||||
public double estimateSelectivity(int totalRows)
|
||||
{
|
||||
if (noMatch) {
|
||||
return 0.0;
|
||||
}
|
||||
final int id = dictionary.indexOf(ids);
|
||||
final int id = dictionary.indexOf(ids) + arrayOffset;
|
||||
if (id < 0) {
|
||||
return 0.0;
|
||||
}
|
||||
|
@ -386,10 +408,7 @@ public class VariantColumnAndIndexSupplier implements Supplier<NestedCommonForma
|
|||
@Override
|
||||
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory)
|
||||
{
|
||||
if (noMatch) {
|
||||
return bitmapResultFactory.wrapDimensionValue(bitmapFactory.makeEmptyImmutableBitmap());
|
||||
}
|
||||
final int id = dictionary.indexOf(ids);
|
||||
final int id = dictionary.indexOf(ids) + arrayOffset;
|
||||
if (id < 0) {
|
||||
return bitmapResultFactory.wrapDimensionValue(bitmapFactory.makeEmptyImmutableBitmap());
|
||||
}
|
||||
|
@ -398,4 +417,73 @@ public class VariantColumnAndIndexSupplier implements Supplier<NestedCommonForma
|
|||
};
|
||||
}
|
||||
}
|
||||
|
||||
private class VariantArrayElementIndexes implements ArrayElementIndexes
|
||||
{
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public BitmapColumnIndex containsValue(@Nullable Object value, TypeSignature<ValueType> elementValueType)
|
||||
{
|
||||
final ExprEval<?> eval = ExprEval.ofType(ExpressionType.fromColumnTypeStrict(elementValueType), value)
|
||||
.castTo(ExpressionType.fromColumnTypeStrict(logicalType.getElementType()));
|
||||
|
||||
Indexed elements;
|
||||
final int elementOffset;
|
||||
switch (logicalType.getElementType().getType()) {
|
||||
case STRING:
|
||||
elements = stringDictionarySupplier.get();
|
||||
elementOffset = 0;
|
||||
break;
|
||||
case LONG:
|
||||
elements = longDictionarySupplier.get();
|
||||
elementOffset = stringDictionarySupplier.get().size();
|
||||
break;
|
||||
case DOUBLE:
|
||||
elements = doubleDictionarySupplier.get();
|
||||
elementOffset = stringDictionarySupplier.get().size() + longDictionarySupplier.get().size();
|
||||
break;
|
||||
default:
|
||||
throw DruidException.defensive(
|
||||
"Unhandled array type [%s] how did this happen?",
|
||||
logicalType.getElementType()
|
||||
);
|
||||
}
|
||||
|
||||
return new SimpleBitmapColumnIndex()
|
||||
{
|
||||
@Override
|
||||
public double estimateSelectivity(int totalRows)
|
||||
{
|
||||
final int elementId = getElementId();
|
||||
if (elementId < 0) {
|
||||
return 0.0;
|
||||
}
|
||||
return (double) getElementBitmap(elementId).size() / totalRows;
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> T computeBitmapResult(BitmapResultFactory<T> bitmapResultFactory)
|
||||
{
|
||||
final int elementId = getElementId();
|
||||
|
||||
if (elementId < 0) {
|
||||
return bitmapResultFactory.wrapDimensionValue(bitmapFactory.makeEmptyImmutableBitmap());
|
||||
}
|
||||
return bitmapResultFactory.wrapDimensionValue(getElementBitmap(elementId));
|
||||
}
|
||||
|
||||
private int getElementId()
|
||||
{
|
||||
if (eval.value() == null) {
|
||||
return 0;
|
||||
} else if (eval.type().is(ExprType.STRING)) {
|
||||
return elements.indexOf(StringUtils.toUtf8ByteBuffer(eval.asString()));
|
||||
} else {
|
||||
return elements.indexOf(eval.value()) + elementOffset;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ package org.apache.druid.segment.nested;
|
|||
import com.google.common.base.Preconditions;
|
||||
import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
|
||||
import it.unimi.dsi.fastutil.ints.Int2ObjectRBTreeMap;
|
||||
import it.unimi.dsi.fastutil.ints.IntIterator;
|
||||
import org.apache.druid.collections.bitmap.ImmutableBitmap;
|
||||
import org.apache.druid.collections.bitmap.MutableBitmap;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
|
@ -71,15 +72,12 @@ public class VariantColumnSerializer extends NestedCommonFormatColumnSerializer
|
|||
private FixedIndexedWriter<Double> doubleDictionaryWriter;
|
||||
private FrontCodedIntArrayIndexedWriter arrayDictionaryWriter;
|
||||
private FixedIndexedIntWriter arrayElementDictionaryWriter;
|
||||
private int rowCount = 0;
|
||||
private boolean closedForWrite = false;
|
||||
private boolean dictionarySerialized = false;
|
||||
private SingleValueColumnarIntsSerializer encodedValueSerializer;
|
||||
private GenericIndexedWriter<ImmutableBitmap> bitmapIndexWriter;
|
||||
private GenericIndexedWriter<ImmutableBitmap> arrayElementIndexWriter;
|
||||
private MutableBitmap[] bitmaps;
|
||||
private FixedIndexedIntWriter intermediateValueWriter;
|
||||
|
||||
private ByteBuffer columnNameBytes = null;
|
||||
private final Int2ObjectRBTreeMap<MutableBitmap> arrayElements = new Int2ObjectRBTreeMap<>();
|
||||
private boolean hasNulls;
|
||||
@Nullable
|
||||
private final Byte variantTypeSetByte;
|
||||
|
||||
|
@ -114,7 +112,7 @@ public class VariantColumnSerializer extends NestedCommonFormatColumnSerializer
|
|||
@Override
|
||||
public boolean hasNulls()
|
||||
{
|
||||
return !bitmaps[0].isEmpty();
|
||||
return hasNulls;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -161,45 +159,8 @@ public class VariantColumnSerializer extends NestedCommonFormatColumnSerializer
|
|||
if (!dictionarySerialized) {
|
||||
throw new IllegalStateException("Dictionary not serialized, cannot open value serializer");
|
||||
}
|
||||
String filenameBase = StringUtils.format("%s.forward_dim", name);
|
||||
final int cardinality = dictionaryWriter.getCardinality()
|
||||
+ longDictionaryWriter.getCardinality()
|
||||
+ doubleDictionaryWriter.getCardinality()
|
||||
+ arrayDictionaryWriter.getCardinality();
|
||||
final CompressionStrategy compression = indexSpec.getDimensionCompression();
|
||||
final CompressionStrategy compressionToUse;
|
||||
if (compression != CompressionStrategy.UNCOMPRESSED && compression != CompressionStrategy.NONE) {
|
||||
compressionToUse = compression;
|
||||
} else {
|
||||
compressionToUse = CompressionStrategy.LZ4;
|
||||
}
|
||||
encodedValueSerializer = CompressedVSizeColumnarIntsSerializer.create(
|
||||
name,
|
||||
segmentWriteOutMedium,
|
||||
filenameBase,
|
||||
cardinality,
|
||||
compressionToUse
|
||||
);
|
||||
encodedValueSerializer.open();
|
||||
|
||||
bitmapIndexWriter = new GenericIndexedWriter<>(
|
||||
segmentWriteOutMedium,
|
||||
name,
|
||||
indexSpec.getBitmapSerdeFactory().getObjectStrategy()
|
||||
);
|
||||
bitmapIndexWriter.open();
|
||||
bitmapIndexWriter.setObjectsNotSorted();
|
||||
bitmaps = new MutableBitmap[cardinality];
|
||||
for (int i = 0; i < bitmaps.length; i++) {
|
||||
bitmaps[i] = indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeEmptyMutableBitmap();
|
||||
}
|
||||
arrayElementIndexWriter = new GenericIndexedWriter<>(
|
||||
segmentWriteOutMedium,
|
||||
name + "_arrays",
|
||||
indexSpec.getBitmapSerdeFactory().getObjectStrategy()
|
||||
);
|
||||
arrayElementIndexWriter.open();
|
||||
arrayElementIndexWriter.setObjectsNotSorted();
|
||||
intermediateValueWriter = new FixedIndexedIntWriter(segmentWriteOutMedium, false);
|
||||
intermediateValueWriter.open();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -211,7 +172,7 @@ public class VariantColumnSerializer extends NestedCommonFormatColumnSerializer
|
|||
) throws IOException
|
||||
{
|
||||
if (dictionarySerialized) {
|
||||
throw new ISE("String dictionary already serialized for column [%s], cannot serialize again", name);
|
||||
throw new ISE("Value dictionaries already serialized for column [%s], cannot serialize again", name);
|
||||
}
|
||||
|
||||
// null is always 0
|
||||
|
@ -277,14 +238,10 @@ public class VariantColumnSerializer extends NestedCommonFormatColumnSerializer
|
|||
globalIds[i] = -1;
|
||||
}
|
||||
Preconditions.checkArgument(globalIds[i] >= 0, "unknown global id [%s] for value [%s]", globalIds[i], array[i]);
|
||||
arrayElements.computeIfAbsent(
|
||||
globalIds[i],
|
||||
(id) -> indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeEmptyMutableBitmap()
|
||||
).add(rowCount);
|
||||
}
|
||||
final int dictId = dictionaryIdLookup.lookupArray(globalIds);
|
||||
encodedValueSerializer.addValue(dictId);
|
||||
bitmaps[dictId].add(rowCount);
|
||||
intermediateValueWriter.write(dictId);
|
||||
hasNulls = hasNulls || dictId == 0;
|
||||
} else {
|
||||
final Object o = eval.value();
|
||||
final int dictId;
|
||||
|
@ -300,43 +257,21 @@ public class VariantColumnSerializer extends NestedCommonFormatColumnSerializer
|
|||
dictId = -1;
|
||||
}
|
||||
Preconditions.checkArgument(dictId >= 0, "unknown global id [%s] for value [%s]", dictId, o);
|
||||
if (dictId != 0) {
|
||||
// treat as single element array
|
||||
arrayElements.computeIfAbsent(
|
||||
dictId,
|
||||
(id) -> indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeEmptyMutableBitmap()
|
||||
).add(rowCount);
|
||||
intermediateValueWriter.write(dictId);
|
||||
hasNulls = hasNulls || dictId == 0;
|
||||
}
|
||||
encodedValueSerializer.addValue(dictId);
|
||||
bitmaps[dictId].add(rowCount);
|
||||
}
|
||||
|
||||
rowCount++;
|
||||
}
|
||||
|
||||
private void closeForWrite() throws IOException
|
||||
private void closeForWrite()
|
||||
{
|
||||
if (!closedForWrite) {
|
||||
for (int i = 0; i < bitmaps.length; i++) {
|
||||
final MutableBitmap bitmap = bitmaps[i];
|
||||
bitmapIndexWriter.write(
|
||||
indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeImmutableBitmap(bitmap)
|
||||
);
|
||||
bitmaps[i] = null; // Reclaim memory
|
||||
}
|
||||
for (Int2ObjectMap.Entry<MutableBitmap> arrayElement : arrayElements.int2ObjectEntrySet()) {
|
||||
arrayElementDictionaryWriter.write(arrayElement.getIntKey());
|
||||
arrayElementIndexWriter.write(
|
||||
indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeImmutableBitmap(arrayElement.getValue())
|
||||
);
|
||||
}
|
||||
columnNameBytes = computeFilenameBytes();
|
||||
closedForWrite = true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSerializedSize() throws IOException
|
||||
public long getSerializedSize()
|
||||
{
|
||||
closeForWrite();
|
||||
|
||||
|
@ -357,6 +292,87 @@ public class VariantColumnSerializer extends NestedCommonFormatColumnSerializer
|
|||
Preconditions.checkState(closedForWrite, "Not closed yet!");
|
||||
Preconditions.checkArgument(dictionaryWriter.isSorted(), "Dictionary not sorted?!?");
|
||||
|
||||
// write out compressed dictionaryId int column, bitmap indexes, and array element bitmap indexes
|
||||
// by iterating intermediate value column the intermediate value column should be replaced someday by a cooler
|
||||
// compressed int column writer that allows easy iteration of the values it writes out, so that we could just
|
||||
// build the bitmap indexes here instead of doing both things
|
||||
String filenameBase = StringUtils.format("%s.forward_dim", name);
|
||||
final int cardinality = dictionaryWriter.getCardinality()
|
||||
+ longDictionaryWriter.getCardinality()
|
||||
+ doubleDictionaryWriter.getCardinality()
|
||||
+ arrayDictionaryWriter.getCardinality();
|
||||
final CompressionStrategy compression = indexSpec.getDimensionCompression();
|
||||
final CompressionStrategy compressionToUse;
|
||||
if (compression != CompressionStrategy.UNCOMPRESSED && compression != CompressionStrategy.NONE) {
|
||||
compressionToUse = compression;
|
||||
} else {
|
||||
compressionToUse = CompressionStrategy.LZ4;
|
||||
}
|
||||
|
||||
final SingleValueColumnarIntsSerializer encodedValueSerializer = CompressedVSizeColumnarIntsSerializer.create(
|
||||
name,
|
||||
segmentWriteOutMedium,
|
||||
filenameBase,
|
||||
cardinality,
|
||||
compressionToUse
|
||||
);
|
||||
encodedValueSerializer.open();
|
||||
|
||||
final GenericIndexedWriter<ImmutableBitmap> bitmapIndexWriter = new GenericIndexedWriter<>(
|
||||
segmentWriteOutMedium,
|
||||
name,
|
||||
indexSpec.getBitmapSerdeFactory().getObjectStrategy()
|
||||
);
|
||||
bitmapIndexWriter.open();
|
||||
bitmapIndexWriter.setObjectsNotSorted();
|
||||
final MutableBitmap[] bitmaps = new MutableBitmap[cardinality];
|
||||
final Int2ObjectRBTreeMap<MutableBitmap> arrayElements = new Int2ObjectRBTreeMap<>();
|
||||
for (int i = 0; i < bitmaps.length; i++) {
|
||||
bitmaps[i] = indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeEmptyMutableBitmap();
|
||||
}
|
||||
final GenericIndexedWriter<ImmutableBitmap> arrayElementIndexWriter = new GenericIndexedWriter<>(
|
||||
segmentWriteOutMedium,
|
||||
name + "_arrays",
|
||||
indexSpec.getBitmapSerdeFactory().getObjectStrategy()
|
||||
);
|
||||
arrayElementIndexWriter.open();
|
||||
arrayElementIndexWriter.setObjectsNotSorted();
|
||||
|
||||
final IntIterator rows = intermediateValueWriter.getIterator();
|
||||
int rowCount = 0;
|
||||
final int arrayBaseId = dictionaryWriter.getCardinality()
|
||||
+ longDictionaryWriter.getCardinality()
|
||||
+ doubleDictionaryWriter.getCardinality();
|
||||
while (rows.hasNext()) {
|
||||
final int dictId = rows.nextInt();
|
||||
encodedValueSerializer.addValue(dictId);
|
||||
bitmaps[dictId].add(rowCount);
|
||||
if (dictId >= arrayBaseId) {
|
||||
int[] array = arrayDictionaryWriter.get(dictId - arrayBaseId);
|
||||
for (int elementId : array) {
|
||||
arrayElements.computeIfAbsent(
|
||||
elementId,
|
||||
(id) -> indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeEmptyMutableBitmap()
|
||||
).add(rowCount);
|
||||
}
|
||||
}
|
||||
rowCount++;
|
||||
}
|
||||
|
||||
for (int i = 0; i < bitmaps.length; i++) {
|
||||
final MutableBitmap bitmap = bitmaps[i];
|
||||
bitmapIndexWriter.write(
|
||||
indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeImmutableBitmap(bitmap)
|
||||
);
|
||||
bitmaps[i] = null; // Reclaim memory
|
||||
}
|
||||
for (Int2ObjectMap.Entry<MutableBitmap> arrayElement : arrayElements.int2ObjectEntrySet()) {
|
||||
arrayElementDictionaryWriter.write(arrayElement.getIntKey());
|
||||
arrayElementIndexWriter.write(
|
||||
indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeImmutableBitmap(arrayElement.getValue())
|
||||
);
|
||||
}
|
||||
|
||||
writeV0Header(channel, columnNameBytes);
|
||||
if (variantTypeSetByte != null) {
|
||||
channel.write(ByteBuffer.wrap(new byte[]{variantTypeSetByte}));
|
||||
|
|
|
@ -47,6 +47,7 @@ import org.apache.druid.segment.data.RoaringBitmapSerdeFactory;
|
|||
import org.apache.druid.segment.index.semantic.DruidPredicateIndexes;
|
||||
import org.apache.druid.segment.index.semantic.NullValueIndex;
|
||||
import org.apache.druid.segment.index.semantic.StringValueSetIndexes;
|
||||
import org.apache.druid.segment.index.semantic.ValueIndexes;
|
||||
import org.apache.druid.segment.vector.NoFilterVectorOffset;
|
||||
import org.apache.druid.segment.vector.VectorValueSelector;
|
||||
import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory;
|
||||
|
@ -248,6 +249,7 @@ public class ScalarDoubleColumnSupplierTest extends InitializedNullHandlingTest
|
|||
ColumnValueSelector<?> valueSelector = column.makeColumnValueSelector(offset);
|
||||
VectorValueSelector vectorValueSelector = column.makeVectorValueSelector(vectorOffset);
|
||||
|
||||
ValueIndexes valueIndexes = supplier.as(ValueIndexes.class);
|
||||
StringValueSetIndexes valueSetIndex = supplier.as(StringValueSetIndexes.class);
|
||||
DruidPredicateIndexes predicateIndex = supplier.as(DruidPredicateIndexes.class);
|
||||
NullValueIndex nullValueIndex = supplier.as(NullValueIndex.class);
|
||||
|
@ -279,6 +281,7 @@ public class ScalarDoubleColumnSupplierTest extends InitializedNullHandlingTest
|
|||
}
|
||||
|
||||
Assert.assertTrue(valueSetIndex.forValue(String.valueOf(row)).computeBitmapResult(resultFactory).get(i));
|
||||
Assert.assertTrue(valueIndexes.forValue(row, ColumnType.DOUBLE).computeBitmapResult(resultFactory).get(i));
|
||||
Assert.assertTrue(valueSetIndex.forSortedValues(new TreeSet<>(ImmutableSet.of(String.valueOf(row))))
|
||||
.computeBitmapResult(resultFactory)
|
||||
.get(i));
|
||||
|
|
|
@ -47,6 +47,7 @@ import org.apache.druid.segment.data.RoaringBitmapSerdeFactory;
|
|||
import org.apache.druid.segment.index.semantic.DruidPredicateIndexes;
|
||||
import org.apache.druid.segment.index.semantic.NullValueIndex;
|
||||
import org.apache.druid.segment.index.semantic.StringValueSetIndexes;
|
||||
import org.apache.druid.segment.index.semantic.ValueIndexes;
|
||||
import org.apache.druid.segment.vector.NoFilterVectorOffset;
|
||||
import org.apache.druid.segment.vector.VectorValueSelector;
|
||||
import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory;
|
||||
|
@ -248,6 +249,7 @@ public class ScalarLongColumnSupplierTest extends InitializedNullHandlingTest
|
|||
ColumnValueSelector<?> valueSelector = column.makeColumnValueSelector(offset);
|
||||
VectorValueSelector vectorValueSelector = column.makeVectorValueSelector(vectorOffset);
|
||||
|
||||
ValueIndexes valueIndexes = supplier.as(ValueIndexes.class);
|
||||
StringValueSetIndexes valueSetIndex = supplier.as(StringValueSetIndexes.class);
|
||||
DruidPredicateIndexes predicateIndex = supplier.as(DruidPredicateIndexes.class);
|
||||
NullValueIndex nullValueIndex = supplier.as(NullValueIndex.class);
|
||||
|
@ -279,6 +281,7 @@ public class ScalarLongColumnSupplierTest extends InitializedNullHandlingTest
|
|||
}
|
||||
|
||||
Assert.assertTrue(valueSetIndex.forValue(String.valueOf(row)).computeBitmapResult(resultFactory).get(i));
|
||||
Assert.assertTrue(valueIndexes.forValue(row, ColumnType.LONG).computeBitmapResult(resultFactory).get(i));
|
||||
Assert.assertTrue(valueSetIndex.forSortedValues(new TreeSet<>(ImmutableSet.of(String.valueOf(row))))
|
||||
.computeBitmapResult(resultFactory)
|
||||
.get(i));
|
||||
|
|
|
@ -49,6 +49,7 @@ import org.apache.druid.segment.data.RoaringBitmapSerdeFactory;
|
|||
import org.apache.druid.segment.index.semantic.DruidPredicateIndexes;
|
||||
import org.apache.druid.segment.index.semantic.NullValueIndex;
|
||||
import org.apache.druid.segment.index.semantic.StringValueSetIndexes;
|
||||
import org.apache.druid.segment.index.semantic.ValueIndexes;
|
||||
import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory;
|
||||
import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory;
|
||||
import org.apache.druid.testing.InitializedNullHandlingTest;
|
||||
|
@ -248,6 +249,7 @@ public class ScalarStringColumnSupplierTest extends InitializedNullHandlingTest
|
|||
ColumnValueSelector<?> valueSelector = column.makeColumnValueSelector(offset);
|
||||
DimensionSelector dimSelector = column.makeDimensionSelector(offset, null);
|
||||
|
||||
ValueIndexes valueIndexes = supplier.as(ValueIndexes.class);
|
||||
StringValueSetIndexes valueSetIndex = supplier.as(StringValueSetIndexes.class);
|
||||
DruidPredicateIndexes predicateIndex = supplier.as(DruidPredicateIndexes.class);
|
||||
NullValueIndex nullValueIndex = supplier.as(NullValueIndex.class);
|
||||
|
@ -270,6 +272,7 @@ public class ScalarStringColumnSupplierTest extends InitializedNullHandlingTest
|
|||
Assert.assertEquals(dimSelector.idLookup().lookupId(dimSelectorLookupVal), dimSelector.getRow().get(0));
|
||||
|
||||
Assert.assertTrue(valueSetIndex.forValue(row).computeBitmapResult(resultFactory).get(i));
|
||||
Assert.assertTrue(valueIndexes.forValue(row, ColumnType.STRING).computeBitmapResult(resultFactory).get(i));
|
||||
Assert.assertTrue(valueSetIndex.forSortedValues(new TreeSet<>(ImmutableSet.of(row)))
|
||||
.computeBitmapResult(resultFactory)
|
||||
.get(i));
|
||||
|
|
|
@ -47,9 +47,11 @@ import org.apache.druid.segment.data.BitmapSerdeFactory;
|
|||
import org.apache.druid.segment.data.CompressionFactory;
|
||||
import org.apache.druid.segment.data.FrontCodedIndexed;
|
||||
import org.apache.druid.segment.data.RoaringBitmapSerdeFactory;
|
||||
import org.apache.druid.segment.index.semantic.ArrayElementIndexes;
|
||||
import org.apache.druid.segment.index.semantic.DruidPredicateIndexes;
|
||||
import org.apache.druid.segment.index.semantic.NullValueIndex;
|
||||
import org.apache.druid.segment.index.semantic.StringValueSetIndexes;
|
||||
import org.apache.druid.segment.index.semantic.ValueIndexes;
|
||||
import org.apache.druid.segment.vector.NoFilterVectorOffset;
|
||||
import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
|
||||
import org.apache.druid.segment.vector.VectorObjectSelector;
|
||||
|
@ -377,6 +379,15 @@ public class VariantColumnSupplierTest extends InitializedNullHandlingTest
|
|||
Assert.assertNull(predicateIndex);
|
||||
NullValueIndex nullValueIndex = supplier.as(NullValueIndex.class);
|
||||
Assert.assertNotNull(nullValueIndex);
|
||||
ValueIndexes valueIndexes = supplier.as(ValueIndexes.class);
|
||||
ArrayElementIndexes arrayElementIndexes = supplier.as(ArrayElementIndexes.class);
|
||||
if (expectedType.getSingleType() != null && expectedType.getSingleType().isArray()) {
|
||||
Assert.assertNotNull(valueIndexes);
|
||||
Assert.assertNotNull(arrayElementIndexes);
|
||||
} else {
|
||||
Assert.assertNull(valueIndexes);
|
||||
Assert.assertNull(arrayElementIndexes);
|
||||
}
|
||||
|
||||
SortedMap<String, FieldTypeInfo.MutableTypeSet> fields = column.getFieldTypeInfo();
|
||||
Assert.assertEquals(1, fields.size());
|
||||
|
@ -397,6 +408,10 @@ public class VariantColumnSupplierTest extends InitializedNullHandlingTest
|
|||
Assert.assertArrayEquals(((List) row).toArray(), (Object[]) valueSelector.getObject());
|
||||
if (expectedType.getSingleType() != null) {
|
||||
Assert.assertArrayEquals(((List) row).toArray(), (Object[]) vectorObjectSelector.getObjectVector()[0]);
|
||||
Assert.assertTrue(valueIndexes.forValue(row, expectedType.getSingleType()).computeBitmapResult(resultFactory).get(i));
|
||||
for (Object o : ((List) row)) {
|
||||
Assert.assertTrue("Failed on row: " + row, arrayElementIndexes.containsValue(o, expectedType.getSingleType().getElementType()).computeBitmapResult(resultFactory).get(i));
|
||||
}
|
||||
} else {
|
||||
// mixed type vector object selector coerces to the most common type
|
||||
Assert.assertArrayEquals(ExprEval.ofType(expressionType, row).asArray(), (Object[]) vectorObjectSelector.getObjectVector()[0]);
|
||||
|
@ -440,6 +455,9 @@ public class VariantColumnSupplierTest extends InitializedNullHandlingTest
|
|||
}
|
||||
}
|
||||
Assert.assertTrue(nullValueIndex.get().computeBitmapResult(resultFactory).get(i));
|
||||
if (expectedType.getSingleType() != null) {
|
||||
Assert.assertFalse(arrayElementIndexes.containsValue(null, expectedType.getSingleType()).computeBitmapResult(resultFactory).get(i));
|
||||
}
|
||||
}
|
||||
|
||||
offset.increment();
|
||||
|
|
Loading…
Reference in New Issue