Support for disabling bitmap indexes. (#5402)

* Support for disabling bitmap indexes.

Can save space for columns where bitmap indexes are pointless (like
free-form text).

* Remove import.

* Fix CompactionTaskTest.

* Update for review comments.

* Review comments, tests.

* Fix test.
This commit is contained in:
Gian Merlino 2018-02-28 19:19:56 -08:00 committed by GitHub
parent 6a3a5350b8
commit e4eaee3806
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 376 additions and 140 deletions

View File

@ -120,11 +120,13 @@ public abstract class DimensionSchema
private final String name;
private final MultiValueHandling multiValueHandling;
private final boolean createBitmapIndex;
protected DimensionSchema(String name, MultiValueHandling multiValueHandling)
protected DimensionSchema(String name, MultiValueHandling multiValueHandling, boolean createBitmapIndex)
{
this.name = Preconditions.checkNotNull(name, "Dimension name cannot be null.");
this.multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling;
this.createBitmapIndex = createBitmapIndex;
}
@JsonProperty
@ -139,6 +141,12 @@ public abstract class DimensionSchema
return multiValueHandling;
}
@JsonProperty("createBitmapIndex")
public boolean hasBitmapIndex()
{
return createBitmapIndex;
}
@JsonIgnore
public abstract String getTypeName();
@ -146,7 +154,7 @@ public abstract class DimensionSchema
public abstract ValueType getValueType();
@Override
public boolean equals(Object o)
public boolean equals(final Object o)
{
if (this == o) {
return true;
@ -154,33 +162,29 @@ public abstract class DimensionSchema
if (o == null || getClass() != o.getClass()) {
return false;
}
DimensionSchema that = (DimensionSchema) o;
if (!name.equals(that.name)) {
return false;
}
if (!getValueType().equals(that.getValueType())) {
return false;
}
return Objects.equals(multiValueHandling, that.multiValueHandling);
final DimensionSchema that = (DimensionSchema) o;
return createBitmapIndex == that.createBitmapIndex &&
Objects.equals(name, that.name) &&
Objects.equals(getTypeName(), that.getTypeName()) &&
Objects.equals(getValueType(), that.getValueType()) &&
multiValueHandling == that.multiValueHandling;
}
@Override
public int hashCode()
{
return Objects.hash(name, getValueType(), multiValueHandling);
return Objects.hash(name, multiValueHandling, createBitmapIndex, getTypeName(), getValueType());
}
@Override
public String toString()
{
return "DimensionSchema{" +
"name='" + name + "'" +
", valueType='" + getValueType() + "'" +
", multiValueHandling='" + getMultiValueHandling() + "'" +
"}";
"name='" + name + '\'' +
", valueType=" + getValueType() +
", typeName=" + getTypeName() +
", multiValueHandling=" + multiValueHandling +
", createBitmapIndex=" + createBitmapIndex +
'}';
}
}

View File

@ -36,6 +36,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
@PublicApi
public class DimensionsSpec
@ -56,17 +57,9 @@ public class DimensionsSpec
final DimensionSchema.MultiValueHandling multiValueHandling
)
{
return Lists.transform(
dimNames,
new Function<String, DimensionSchema>()
{
@Override
public DimensionSchema apply(String input)
{
return new StringDimensionSchema(input, multiValueHandling);
}
}
);
return dimNames.stream()
.map(input -> new StringDimensionSchema(input, multiValueHandling, true))
.collect(Collectors.toList());
}
public static DimensionSchema convertSpatialSchema(SpatialDimensionSchema spatialSchema)

View File

@ -19,7 +19,6 @@
package io.druid.data.input.impl;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
@ -28,7 +27,7 @@ public class DoubleDimensionSchema extends DimensionSchema
@JsonCreator
public DoubleDimensionSchema(@JsonProperty("name") String name)
{
super(name, null);
super(name, null, false);
}
@Override

View File

@ -30,7 +30,7 @@ public class FloatDimensionSchema extends DimensionSchema
@JsonProperty("name") String name
)
{
super(name, null);
super(name, null, false);
}
@Override

View File

@ -30,7 +30,7 @@ public class LongDimensionSchema extends DimensionSchema
@JsonProperty("name") String name
)
{
super(name, null);
super(name, null, false);
}
@Override

View File

@ -43,7 +43,7 @@ public class NewSpatialDimensionSchema extends DimensionSchema
@JsonProperty("dims") List<String> dims
)
{
super(name, null);
super(name, null, true);
this.dims = dims;
}

View File

@ -25,6 +25,8 @@ import com.fasterxml.jackson.annotation.JsonProperty;
public class StringDimensionSchema extends DimensionSchema
{
private static final boolean DEFAULT_CREATE_BITMAP_INDEX = true;
@JsonCreator
public static StringDimensionSchema create(String name)
{
@ -34,15 +36,16 @@ public class StringDimensionSchema extends DimensionSchema
@JsonCreator
public StringDimensionSchema(
@JsonProperty("name") String name,
@JsonProperty("multiValueHandling") MultiValueHandling multiValueHandling
@JsonProperty("multiValueHandling") MultiValueHandling multiValueHandling,
@JsonProperty("createBitmapIndex") Boolean createBitmapIndex
)
{
super(name, multiValueHandling);
super(name, multiValueHandling, createBitmapIndex == null ? DEFAULT_CREATE_BITMAP_INDEX : createBitmapIndex);
}
public StringDimensionSchema(String name)
{
this(name, null);
this(name, null, DEFAULT_CREATE_BITMAP_INDEX);
}
@Override

View File

@ -0,0 +1,49 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.data.input.impl;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.Assert;
import org.junit.Test;
public class DimensionSchemaTest
{
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@Test
public void testStringDimensionSchemaSerde() throws Exception
{
final StringDimensionSchema schema1 = new StringDimensionSchema("foo");
Assert.assertEquals(
schema1,
OBJECT_MAPPER.readValue(OBJECT_MAPPER.writeValueAsString(schema1), DimensionSchema.class)
);
final StringDimensionSchema schema2 = new StringDimensionSchema(
"foo",
DimensionSchema.MultiValueHandling.ARRAY,
false
);
Assert.assertEquals(
schema2,
OBJECT_MAPPER.readValue(OBJECT_MAPPER.writeValueAsString(schema2), DimensionSchema.class)
);
}
}

View File

@ -19,6 +19,7 @@
package io.druid.data.input.impl;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import junit.framework.Assert;
import org.junit.Test;
@ -30,7 +31,11 @@ import java.util.List;
*/
public class DimensionsSpecSerdeTest
{
private final ObjectMapper mapper = new ObjectMapper();
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
static {
OBJECT_MAPPER.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
}
@Test
public void testDimensionsSpecSerde() throws Exception
@ -59,9 +64,9 @@ public class DimensionsSpecSerdeTest
+ "\"spatialDimensions\": [{\"dimName\":\"IMPR\", \"dims\":[\"S\",\"P\",\"Q\",\"R\"]}]"
+ "}";
DimensionsSpec actual = mapper.readValue(
mapper.writeValueAsString(
mapper.readValue(jsonStr, DimensionsSpec.class)
DimensionsSpec actual = OBJECT_MAPPER.readValue(
OBJECT_MAPPER.writeValueAsString(
OBJECT_MAPPER.readValue(jsonStr, DimensionsSpec.class)
),
DimensionsSpec.class
);

View File

@ -198,7 +198,13 @@ handle all formatting decisions on their own, without using the ParseSpec.
#### Dimension Schema
A dimension schema specifies the type and name of a dimension to be ingested.
For example, the following `dimensionsSpec` section from a `dataSchema` ingests one column as Long (`countryNum`), two columns as Float (`userLatitude`, `userLongitude`), and the other columns as Strings:
For string columns, the dimension schema can also be used to enable or disable bitmap indexing by setting the
`createBitmapIndex` boolean. By default, bitmap indexes are enabled for all string columns. Only string columns can have
bitmap indexes; they are not supported for numeric columns.
For example, the following `dimensionsSpec` section from a `dataSchema` ingests one column as Long (`countryNum`), two
columns as Float (`userLatitude`, `userLongitude`), and the other columns as Strings, with bitmap indexes disabled
for the `comment` column.
```json
"dimensionsSpec" : {
@ -215,6 +221,11 @@ For example, the following `dimensionsSpec` section from a `dataSchema` ingests
"country",
"region",
"city",
{
"type": "string",
"name": "comment",
"createBitmapIndex": false
},
{
"type": "long",
"name": "countryNum"
@ -233,7 +244,6 @@ For example, the following `dimensionsSpec` section from a `dataSchema` ingests
}
```
## GranularitySpec
The default granularity spec is `uniform`, and can be changed by setting the `type` field.

View File

@ -354,7 +354,8 @@ public class CompactionTask extends AbstractTask
createDimensionSchema(
column.getCapabilities().getType(),
dimension,
dimensionHandler.getMultivalueHandling()
dimensionHandler.getMultivalueHandling(),
column.getCapabilities().hasBitmapIndexes()
)
);
}
@ -402,7 +403,8 @@ public class CompactionTask extends AbstractTask
private static DimensionSchema createDimensionSchema(
ValueType type,
String name,
MultiValueHandling multiValueHandling
MultiValueHandling multiValueHandling,
boolean hasBitmapIndexes
)
{
switch (type) {
@ -428,7 +430,7 @@ public class CompactionTask extends AbstractTask
);
return new DoubleDimensionSchema(name);
case STRING:
return new StringDimensionSchema(name, multiValueHandling);
return new StringDimensionSchema(name, multiValueHandling, hasBitmapIndexes);
default:
throw new ISE("Unsupported value type[%s] for dimension[%s]", type, name);
}

View File

@ -69,8 +69,8 @@ import io.druid.segment.SimpleQueryableIndex;
import io.druid.segment.column.Column;
import io.druid.segment.column.ColumnBuilder;
import io.druid.segment.column.ValueType;
import io.druid.segment.data.CompressionStrategy;
import io.druid.segment.data.CompressionFactory.LongEncodingStrategy;
import io.druid.segment.data.CompressionStrategy;
import io.druid.segment.data.ListIndexed;
import io.druid.segment.data.RoaringBitmapSerdeFactory;
import io.druid.segment.incremental.IncrementalIndex;
@ -109,13 +109,13 @@ public class CompactionTaskTest
private static final Interval COMPACTION_INTERVAL = Intervals.of("2017-01-01/2017-06-01");
private static final Map<Interval, DimensionSchema> MIXED_TYPE_COLUMN_MAP = ImmutableMap.of(
Intervals.of("2017-01-01/2017-02-01"),
new StringDimensionSchema(MIXED_TYPE_COLUMN, null),
new StringDimensionSchema(MIXED_TYPE_COLUMN),
Intervals.of("2017-02-01/2017-03-01"),
new StringDimensionSchema(MIXED_TYPE_COLUMN, null),
new StringDimensionSchema(MIXED_TYPE_COLUMN),
Intervals.of("2017-03-01/2017-04-01"),
new StringDimensionSchema(MIXED_TYPE_COLUMN, null),
new StringDimensionSchema(MIXED_TYPE_COLUMN),
Intervals.of("2017-04-01/2017-05-01"),
new StringDimensionSchema(MIXED_TYPE_COLUMN, null),
new StringDimensionSchema(MIXED_TYPE_COLUMN),
Intervals.of("2017-05-01/2017-06-01"),
new DoubleDimensionSchema(MIXED_TYPE_COLUMN)
);
@ -138,6 +138,7 @@ public class CompactionTaskTest
for (int i = 0; i < 5; i++) {
final StringDimensionSchema schema = new StringDimensionSchema(
"string_dim_" + i,
null,
null
);
DIMENSIONS.put(schema.getName(), schema);

View File

@ -25,6 +25,11 @@
"dimensions":[
"page"
],
"filter":{
"type":"selector",
"dimension":"language",
"value":"zh"
},
"aggregations":[
{
"type":"count",

View File

@ -37,8 +37,18 @@
},
"dimensionsSpec": {
"dimensions": [
"page", "language", "user", "unpatrolled", "newPage", "robot", "anonymous",
"namespace", "continent", "country", "region", "city"
"page",
{"type": "string", "name": "language", "createBitmapIndex": false},
"user",
"unpatrolled",
"newPage",
"robot",
"anonymous",
"namespace",
"continent",
"country",
"region",
"city"
]
}
}

View File

@ -62,16 +62,16 @@ public final class DimensionHandlerUtils
)
{
if (capabilities == null) {
return new StringDimensionHandler(dimensionName, multiValueHandling);
return new StringDimensionHandler(dimensionName, multiValueHandling, true);
}
multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling;
if (capabilities.getType() == ValueType.STRING) {
if (!capabilities.isDictionaryEncoded() || !capabilities.hasBitmapIndexes()) {
throw new IAE("String column must have dictionary encoding and bitmap index.");
if (!capabilities.isDictionaryEncoded()) {
throw new IAE("String column must have dictionary encoding.");
}
return new StringDimensionHandler(dimensionName, multiValueHandling);
return new StringDimensionHandler(dimensionName, multiValueHandling, capabilities.hasBitmapIndexes());
}
if (capabilities.getType() == ValueType.LONG) {
@ -87,7 +87,7 @@ public final class DimensionHandlerUtils
}
// Return a StringDimensionHandler by default (null columns will be treated as String typed)
return new StringDimensionHandler(dimensionName, multiValueHandling);
return new StringDimensionHandler(dimensionName, multiValueHandling, true);
}
public static List<ValueType> getValueTypesFromDimensionSpecs(List<DimensionSpec> dimSpecs)

View File

@ -21,12 +21,13 @@ package io.druid.segment;
import com.google.common.primitives.Ints;
import io.druid.data.input.impl.DimensionSchema.MultiValueHandling;
import io.druid.segment.writeout.SegmentWriteOutMedium;
import io.druid.java.util.common.ISE;
import io.druid.segment.column.Column;
import io.druid.segment.column.ColumnCapabilities;
import io.druid.segment.column.DictionaryEncodedColumn;
import io.druid.segment.data.Indexed;
import io.druid.segment.data.IndexedInts;
import io.druid.segment.writeout.SegmentWriteOutMedium;
import javax.annotation.Nullable;
import java.io.Closeable;
@ -37,11 +38,13 @@ public class StringDimensionHandler implements DimensionHandler<Integer, int[],
{
private final String dimensionName;
private final MultiValueHandling multiValueHandling;
private final boolean hasBitmapIndexes;
public StringDimensionHandler(String dimensionName, MultiValueHandling multiValueHandling)
public StringDimensionHandler(String dimensionName, MultiValueHandling multiValueHandling, boolean hasBitmapIndexes)
{
this.dimensionName = dimensionName;
this.multiValueHandling = multiValueHandling;
this.hasBitmapIndexes = hasBitmapIndexes;
}
@Override
@ -207,18 +210,26 @@ public class StringDimensionHandler implements DimensionHandler<Integer, int[],
@Override
public DimensionIndexer<Integer, int[], String> makeIndexer()
{
return new StringDimensionIndexer(multiValueHandling);
return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes);
}
@Override
public DimensionMergerV9 makeMerger(
public DimensionMergerV9<int[]> makeMerger(
IndexSpec indexSpec,
SegmentWriteOutMedium segmentWriteOutMedium,
ColumnCapabilities capabilities,
ProgressIndicator progress
)
{
// Sanity-check capabilities.
if (hasBitmapIndexes != capabilities.hasBitmapIndexes()) {
throw new ISE(
"capabilities.hasBitmapIndexes[%s] != this.hasBitmapIndexes[%s]",
capabilities.hasBitmapIndexes(),
hasBitmapIndexes
);
}
return new StringDimensionMergerV9(dimensionName, indexSpec, segmentWriteOutMedium, capabilities, progress);
}
}

View File

@ -200,12 +200,14 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
private final DimensionDictionary dimLookup;
private final MultiValueHandling multiValueHandling;
private final boolean hasBitmapIndexes;
private SortedDimensionDictionary sortedLookup;
public StringDimensionIndexer(MultiValueHandling multiValueHandling)
public StringDimensionIndexer(MultiValueHandling multiValueHandling, boolean hasBitmapIndexes)
{
this.dimLookup = new DimensionDictionary();
this.multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling;
this.hasBitmapIndexes = hasBitmapIndexes;
}
@Override
@ -640,6 +642,10 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
int[] key, int rowNum, MutableBitmap[] bitmapIndexes, BitmapFactory factory
)
{
if (!hasBitmapIndexes) {
throw new UnsupportedOperationException("This column does not include bitmap indexes");
}
for (int dimValIdx : key) {
if (bitmapIndexes[dimValIdx] == null) {
bitmapIndexes[dimValIdx] = factory.makeEmptyMutableBitmap();

View File

@ -38,14 +38,14 @@ import io.druid.segment.data.ArrayIndexed;
import io.druid.segment.data.BitmapSerdeFactory;
import io.druid.segment.data.BitmapValues;
import io.druid.segment.data.ByteBufferWriter;
import io.druid.segment.data.V3CompressedVSizeColumnarMultiIntsSerializer;
import io.druid.segment.data.ColumnarIntsSerializer;
import io.druid.segment.data.CompressedVSizeColumnarIntsSerializer;
import io.druid.segment.data.CompressionStrategy;
import io.druid.segment.data.GenericIndexed;
import io.druid.segment.data.GenericIndexedWriter;
import io.druid.segment.data.ImmutableRTreeObjectStrategy;
import io.druid.segment.data.Indexed;
import io.druid.segment.data.ColumnarIntsSerializer;
import io.druid.segment.data.V3CompressedVSizeColumnarMultiIntsSerializer;
import io.druid.segment.data.VSizeColumnarIntsSerializer;
import io.druid.segment.data.VSizeColumnarMultiIntsSerializer;
import io.druid.segment.serde.DictionaryEncodedColumnPartSerde;
@ -275,6 +275,10 @@ public class StringDimensionMergerV9 implements DimensionMergerV9<int[]>
@Override
public void writeIndexes(List<IntBuffer> segmentRowNumConversions) throws IOException
{
if (!capabilities.hasBitmapIndexes()) {
return;
}
long dimStartTime = System.currentTimeMillis();
final BitmapSerdeFactory bitmapSerdeFactory = indexSpec.getBitmapSerdeFactory();

View File

@ -297,6 +297,8 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
ValueType type = TYPE_MAP.get(dimSchema.getValueType());
String dimName = dimSchema.getName();
ColumnCapabilitiesImpl capabilities = makeCapabilitesFromValueType(type);
capabilities.setHasBitmapIndexes(dimSchema.hasBitmapIndex());
if (dimSchema.getTypeName().equals(DimensionSchema.SPATIAL_TYPE_NAME)) {
capabilities.setHasSpatialIndexes(true);
} else {

View File

@ -58,11 +58,13 @@ import java.nio.channels.WritableByteChannel;
public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde
{
private static final int NO_FLAGS = 0;
private static final int STARTING_FLAGS = Feature.NO_BITMAP_INDEX.getMask();
enum Feature
{
MULTI_VALUE,
MULTI_VALUE_V3;
MULTI_VALUE_V3,
NO_BITMAP_INDEX;
public boolean isSet(int flags)
{
@ -79,8 +81,8 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde
{
UNCOMPRESSED_SINGLE_VALUE, // 0x0
UNCOMPRESSED_MULTI_VALUE, // 0x1
COMPRESSED; // 0x2
COMPRESSED, // 0x2
UNCOMPRESSED_WITH_FLAGS; // 0x3
public static VERSION fromByte(byte b)
{
@ -143,7 +145,7 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde
public static class SerializerBuilder
{
private VERSION version = null;
private int flags = NO_FLAGS;
private int flags = STARTING_FLAGS;
private GenericIndexedWriter<String> dictionaryWriter = null;
private ColumnarIntsSerializer valueWriter = null;
private BitmapSerdeFactory bitmapSerdeFactory = null;
@ -163,8 +165,14 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde
return this;
}
public SerializerBuilder withBitmapIndex(GenericIndexedWriter<ImmutableBitmap> bitmapIndexWriter)
public SerializerBuilder withBitmapIndex(@Nullable GenericIndexedWriter<ImmutableBitmap> bitmapIndexWriter)
{
if (bitmapIndexWriter == null) {
flags |= Feature.NO_BITMAP_INDEX.getMask();
} else {
flags &= ~Feature.NO_BITMAP_INDEX.getMask();
}
this.bitmapIndexWriter = bitmapIndexWriter;
return this;
}
@ -204,6 +212,11 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde
public DictionaryEncodedColumnPartSerde build()
{
if (mustWriteFlags(flags) && version.compareTo(VERSION.COMPRESSED) < 0) {
// Must upgrade version so we can write out flags.
this.version = VERSION.UNCOMPRESSED_WITH_FLAGS;
}
return new DictionaryEncodedColumnPartSerde(
byteOrder,
bitmapSerdeFactory,
@ -309,20 +322,23 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde
);
builder.setHasMultipleValues(hasMultipleValues).setDictionaryEncodedColumn(dictionaryEncodedColumnSupplier);
GenericIndexed<ImmutableBitmap> rBitmaps = GenericIndexed.read(
buffer, bitmapSerdeFactory.getObjectStrategy(), builder.getFileMapper()
);
builder.setBitmapIndex(
new BitmapIndexColumnPartSupplier(
bitmapSerdeFactory.getBitmapFactory(),
rBitmaps,
rDictionary
)
);
if (!Feature.NO_BITMAP_INDEX.isSet(rFlags)) {
GenericIndexed<ImmutableBitmap> rBitmaps = GenericIndexed.read(
buffer,
bitmapSerdeFactory.getObjectStrategy(),
builder.getFileMapper()
);
builder.setBitmapIndex(
new BitmapIndexColumnPartSupplier(
bitmapSerdeFactory.getBitmapFactory(),
rBitmaps,
rDictionary
)
);
}
ImmutableRTree rSpatialIndex = null;
if (buffer.hasRemaining()) {
rSpatialIndex =
ImmutableRTree rSpatialIndex =
new ImmutableRTreeObjectStrategy(bitmapSerdeFactory.getBitmapFactory()).fromByteBufferWithSize(buffer);
builder.setSpatialIndex(new SpatialIndexColumnPartSupplier(rSpatialIndex));
}
@ -333,6 +349,7 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde
{
switch (version) {
case UNCOMPRESSED_SINGLE_VALUE:
case UNCOMPRESSED_WITH_FLAGS:
return VSizeColumnarInts.readFromByteBuffer(buffer);
case COMPRESSED:
return CompressedVSizeColumnarIntsSupplier.fromByteBuffer(buffer, byteOrder);
@ -349,13 +366,20 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde
case UNCOMPRESSED_MULTI_VALUE: {
return VSizeColumnarMultiInts.readFromByteBuffer(buffer);
}
case UNCOMPRESSED_WITH_FLAGS: {
if (Feature.MULTI_VALUE.isSet(flags)) {
return VSizeColumnarMultiInts.readFromByteBuffer(buffer);
} else {
throw new IAE("Unrecognized multi-value flag[%d] for version[%s]", flags, version);
}
}
case COMPRESSED: {
if (Feature.MULTI_VALUE.isSet(flags)) {
return CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(buffer, byteOrder);
} else if (Feature.MULTI_VALUE_V3.isSet(flags)) {
return V3CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(buffer, byteOrder);
} else {
throw new IAE("Unrecognized multi-value flag[%d]", flags);
throw new IAE("Unrecognized multi-value flag[%d] for version[%s]", flags, version);
}
}
default:
@ -364,4 +388,10 @@ public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde
}
};
}
private static boolean mustWriteFlags(final int flags)
{
// Flags that are not implied by version codes < COMPRESSED must be written. This includes MULTI_VALUE_V3.
return flags != NO_FLAGS && flags != Feature.MULTI_VALUE.getMask();
}
}

View File

@ -51,6 +51,7 @@ import io.druid.segment.data.BitmapSerdeFactory;
import io.druid.segment.data.BitmapValues;
import io.druid.segment.data.CompressionFactory;
import io.druid.segment.data.CompressionStrategy;
import io.druid.segment.data.ConciseBitmapSerdeFactory;
import io.druid.segment.data.IncrementalIndexTest;
import io.druid.segment.incremental.IncrementalIndex;
import io.druid.segment.incremental.IncrementalIndexAdapter;
@ -78,6 +79,7 @@ import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class IndexMergerTestBase
{
@ -131,12 +133,13 @@ public class IndexMergerTestBase
private final IndexSpec indexSpec;
private final IndexIO indexIO;
private final boolean useBitmapIndexes;
@Rule
public final CloserRule closer = new CloserRule(false);
protected IndexMergerTestBase(
BitmapSerdeFactory bitmapSerdeFactory,
@Nullable BitmapSerdeFactory bitmapSerdeFactory,
CompressionStrategy compressionStrategy,
CompressionStrategy dimCompressionStrategy,
CompressionFactory.LongEncodingStrategy longEncodingStrategy,
@ -144,12 +147,13 @@ public class IndexMergerTestBase
)
{
this.indexSpec = makeIndexSpec(
bitmapSerdeFactory,
bitmapSerdeFactory != null ? bitmapSerdeFactory : new ConciseBitmapSerdeFactory(),
compressionStrategy,
dimCompressionStrategy,
longEncodingStrategy
);
this.indexIO = TestHelper.getTestIndexIO(segmentWriteOutMediumFactory);
this.useBitmapIndexes = bitmapSerdeFactory != null;
}
@Test
@ -923,7 +927,7 @@ public class IndexMergerTestBase
{
IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder()
.withDimensionsSpec(new DimensionsSpec(
DimensionsSpec.getDefaultSchemas(Arrays.asList("dimA", "dimB", "dimC")),
makeDimensionSchemas(Arrays.asList("dimA", "dimB", "dimC")),
null,
null
))
@ -1014,15 +1018,20 @@ public class IndexMergerTestBase
Assert.assertArrayEquals(new int[][]{{2}, {0}}, boatList.get(3).getDims());
Assert.assertArrayEquals(new Object[]{2L}, boatList.get(3).getMetrics());
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dimA", ""));
checkBitmapIndex(Lists.newArrayList(2), adapter.getBitmapIndex("dimA", "1"));
checkBitmapIndex(Lists.newArrayList(3), adapter.getBitmapIndex("dimA", "2"));
Assert.assertEquals(useBitmapIndexes, adapter.getCapabilities("dimA").hasBitmapIndexes());
Assert.assertEquals(useBitmapIndexes, adapter.getCapabilities("dimC").hasBitmapIndexes());
checkBitmapIndex(new ArrayList<Integer>(), adapter.getBitmapIndex("dimB", ""));
if (useBitmapIndexes) {
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dimA", ""));
checkBitmapIndex(Lists.newArrayList(2), adapter.getBitmapIndex("dimA", "1"));
checkBitmapIndex(Lists.newArrayList(3), adapter.getBitmapIndex("dimA", "2"));
checkBitmapIndex(Lists.newArrayList(2, 3), adapter.getBitmapIndex("dimC", ""));
checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dimC", "1"));
checkBitmapIndex(Lists.newArrayList(1), adapter.getBitmapIndex("dimC", "2"));
checkBitmapIndex(Lists.newArrayList(2, 3), adapter.getBitmapIndex("dimC", ""));
checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dimC", "1"));
checkBitmapIndex(Lists.newArrayList(1), adapter.getBitmapIndex("dimC", "2"));
}
checkBitmapIndex(new ArrayList<>(), adapter.getBitmapIndex("dimB", ""));
}
@ -1139,14 +1148,20 @@ public class IndexMergerTestBase
Assert.assertArrayEquals(new int[][]{{2}, {0}}, boatList2.get(4).getDims());
Assert.assertArrayEquals(new Object[]{1L}, boatList2.get(4).getMetrics());
// dimA always has bitmap indexes, since it has them in indexA (it comes in through discovery).
Assert.assertTrue(adapter2.getCapabilities("dimA").hasBitmapIndexes());
checkBitmapIndex(Lists.newArrayList(0, 1, 2), adapter2.getBitmapIndex("dimA", ""));
checkBitmapIndex(Lists.newArrayList(3), adapter2.getBitmapIndex("dimA", "1"));
checkBitmapIndex(Lists.newArrayList(4), adapter2.getBitmapIndex("dimA", "2"));
checkBitmapIndex(Lists.newArrayList(3, 4), adapter2.getBitmapIndex("dimB", ""));
checkBitmapIndex(Lists.newArrayList(0), adapter2.getBitmapIndex("dimB", "1"));
checkBitmapIndex(Lists.newArrayList(1), adapter2.getBitmapIndex("dimB", "2"));
checkBitmapIndex(Lists.newArrayList(2), adapter2.getBitmapIndex("dimB", "3"));
// dimB may or may not have bitmap indexes, since it comes in through explicit definition in indexB2.
Assert.assertEquals(useBitmapIndexes, adapter2.getCapabilities("dimB").hasBitmapIndexes());
if (useBitmapIndexes) {
checkBitmapIndex(Lists.newArrayList(3, 4), adapter2.getBitmapIndex("dimB", ""));
checkBitmapIndex(Lists.newArrayList(0), adapter2.getBitmapIndex("dimB", "1"));
checkBitmapIndex(Lists.newArrayList(1), adapter2.getBitmapIndex("dimB", "2"));
checkBitmapIndex(Lists.newArrayList(2), adapter2.getBitmapIndex("dimB", "3"));
}
}
@Test
@ -1565,7 +1580,7 @@ public class IndexMergerTestBase
private void checkBitmapIndex(ArrayList<Integer> expected, BitmapValues real)
{
Assert.assertEquals(expected.size(), real.size());
Assert.assertEquals("bitmap size", expected.size(), real.size());
int i = 0;
for (IntIterator iterator = real.iterator(); iterator.hasNext(); ) {
int index = iterator.nextInt();
@ -2122,7 +2137,7 @@ public class IndexMergerTestBase
Arrays.asList(
new LongDimensionSchema("dimA"),
new FloatDimensionSchema("dimB"),
new StringDimensionSchema("dimC")
new StringDimensionSchema("dimC", MultiValueHandling.SORTED_ARRAY, useBitmapIndexes)
)
);
@ -2281,7 +2296,7 @@ public class IndexMergerTestBase
private IncrementalIndex getIndexWithDims(List<String> dims)
{
IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder()
.withDimensionsSpec(new DimensionsSpec(DimensionsSpec.getDefaultSchemas(dims), null, null))
.withDimensionsSpec(new DimensionsSpec(makeDimensionSchemas(dims), null, null))
.withMetrics(new CountAggregatorFactory("count"))
.build();
@ -2384,7 +2399,7 @@ public class IndexMergerTestBase
List<Rowboat> boatList;
// xaab-axbx + abx-xab --> aabx-abxx + abx-abx --> abx-abx + aabx-abxx
schema = DimensionsSpec.getDefaultSchemas(Arrays.asList("dim1", "dim2"), MultiValueHandling.SORTED_ARRAY);
schema = makeDimensionSchemas(Arrays.asList("dim1", "dim2"), MultiValueHandling.SORTED_ARRAY);
index = persistAndLoad(schema, rows);
adapter = new QueryableIndexIndexableAdapter(index);
boatList = ImmutableList.copyOf(adapter.getRows());
@ -2397,17 +2412,22 @@ public class IndexMergerTestBase
Assert.assertArrayEquals(new int[][]{{0, 1, 2}, {0, 1, 2}}, boatList.get(0).getDims());
Assert.assertArrayEquals(new int[][]{{0, 0, 1, 2}, {0, 1, 2, 2}}, boatList.get(1).getDims());
checkBitmapIndex(new ArrayList<Integer>(), adapter.getBitmapIndex("dim1", ""));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim1", "a"));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim1", "b"));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim1", "x"));
Assert.assertEquals(useBitmapIndexes, adapter.getCapabilities("dim1").hasBitmapIndexes());
Assert.assertEquals(useBitmapIndexes, adapter.getCapabilities("dim2").hasBitmapIndexes());
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim2", "a"));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim2", "b"));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim2", "x"));
if (useBitmapIndexes) {
checkBitmapIndex(new ArrayList<>(), adapter.getBitmapIndex("dim1", ""));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim1", "a"));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim1", "b"));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim1", "x"));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim2", "a"));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim2", "b"));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim2", "x"));
}
// xaab-axbx + abx-xab --> abx-abx + abx-abx --> abx-abx
schema = DimensionsSpec.getDefaultSchemas(Arrays.asList("dim1", "dim2"), MultiValueHandling.SORTED_SET);
schema = makeDimensionSchemas(Arrays.asList("dim1", "dim2"), MultiValueHandling.SORTED_SET);
index = persistAndLoad(schema, rows);
Assert.assertEquals(1, index.getColumn(Column.TIME_COLUMN_NAME).getLength());
@ -2420,17 +2440,22 @@ public class IndexMergerTestBase
Assert.assertEquals(1, boatList.size());
Assert.assertArrayEquals(new int[][]{{0, 1, 2}, {0, 1, 2}}, boatList.get(0).getDims());
checkBitmapIndex(new ArrayList<Integer>(), adapter.getBitmapIndex("dim1", ""));
checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dim1", "a"));
checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dim1", "b"));
checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dim1", "x"));
Assert.assertEquals(useBitmapIndexes, adapter.getCapabilities("dim1").hasBitmapIndexes());
Assert.assertEquals(useBitmapIndexes, adapter.getCapabilities("dim2").hasBitmapIndexes());
checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dim2", "a"));
checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dim2", "b"));
checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dim2", "x"));
if (useBitmapIndexes) {
checkBitmapIndex(new ArrayList<>(), adapter.getBitmapIndex("dim1", ""));
checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dim1", "a"));
checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dim1", "b"));
checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dim1", "x"));
checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dim2", "a"));
checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dim2", "b"));
checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dim2", "x"));
}
// xaab-axbx + abx-xab --> abx-xab + xaab-axbx
schema = DimensionsSpec.getDefaultSchemas(Arrays.asList("dim1", "dim2"), MultiValueHandling.ARRAY);
schema = makeDimensionSchemas(Arrays.asList("dim1", "dim2"), MultiValueHandling.ARRAY);
index = persistAndLoad(schema, rows);
Assert.assertEquals(2, index.getColumn(Column.TIME_COLUMN_NAME).getLength());
@ -2444,14 +2469,19 @@ public class IndexMergerTestBase
Assert.assertArrayEquals(new int[][]{{0, 1, 2}, {2, 0, 1}}, boatList.get(0).getDims());
Assert.assertArrayEquals(new int[][]{{2, 0, 0, 1}, {0, 2, 1, 2}}, boatList.get(1).getDims());
checkBitmapIndex(new ArrayList<Integer>(), adapter.getBitmapIndex("dim1", ""));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim1", "a"));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim1", "b"));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim1", "x"));
Assert.assertEquals(useBitmapIndexes, adapter.getCapabilities("dim1").hasBitmapIndexes());
Assert.assertEquals(useBitmapIndexes, adapter.getCapabilities("dim2").hasBitmapIndexes());
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim2", "a"));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim2", "b"));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim2", "x"));
if (useBitmapIndexes) {
checkBitmapIndex(new ArrayList<>(), adapter.getBitmapIndex("dim1", ""));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim1", "a"));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim1", "b"));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim1", "x"));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim2", "a"));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim2", "b"));
checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim2", "x"));
}
}
private QueryableIndex persistAndLoad(List<DimensionSchema> schema, InputRow... rows) throws IOException
@ -2464,4 +2494,25 @@ public class IndexMergerTestBase
final File tempDir = temporaryFolder.newFolder();
return closer.closeLater(indexIO.loadIndex(indexMerger.persist(toPersist, tempDir, indexSpec, null)));
}
private List<DimensionSchema> makeDimensionSchemas(final List<String> dimensions)
{
return makeDimensionSchemas(dimensions, MultiValueHandling.SORTED_ARRAY);
}
private List<DimensionSchema> makeDimensionSchemas(
final List<String> dimensions,
final MultiValueHandling multiValueHandling
)
{
return dimensions.stream()
.map(
dimension -> new StringDimensionSchema(
dimension,
multiValueHandling,
useBitmapIndexes
)
)
.collect(Collectors.toList());
}
}

View File

@ -0,0 +1,47 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.segment;
import io.druid.segment.data.CompressionFactory.LongEncodingStrategy;
import io.druid.segment.data.CompressionStrategy;
import io.druid.segment.writeout.SegmentWriteOutMediumFactory;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@RunWith(Parameterized.class)
public class NoBitmapIndexMergerV9Test extends IndexMergerTestBase
{
public NoBitmapIndexMergerV9Test(
CompressionStrategy compressionStrategy,
CompressionStrategy dimCompressionStrategy,
LongEncodingStrategy longEncodingStrategy,
SegmentWriteOutMediumFactory segmentWriteOutMediumFactory
)
{
super(
null,
compressionStrategy,
dimCompressionStrategy,
longEncodingStrategy,
segmentWriteOutMediumFactory
);
indexMerger = TestHelper.getTestIndexMergerV9(segmentWriteOutMediumFactory);
}
}

View File

@ -104,15 +104,8 @@ public abstract class BaseFilterTest
// For filter tests, the test setup creates a segment.
// Creating a new segment for every test method call is pretty slow, so cache the StorageAdapters.
// Each thread gets its own map.
protected static ThreadLocal<Map<String, Map<String, Pair<StorageAdapter, Closeable>>>> adapterCache =
new ThreadLocal<Map<String, Map<String, Pair<StorageAdapter, Closeable>>>>()
{
@Override
protected Map<String, Map<String, Pair<StorageAdapter, Closeable>>> initialValue()
{
return new HashMap<>();
}
};
private static ThreadLocal<Map<String, Map<String, Pair<StorageAdapter, Closeable>>>> adapterCache =
ThreadLocal.withInitial(HashMap::new);
public BaseFilterTest(
String testName,

View File

@ -34,7 +34,7 @@ public class RowboatTest
{
DimensionHandler[] handlers = new DimensionHandler[size];
for (int i = 0; i < size; i++) {
handlers[i] = new StringDimensionHandler(String.valueOf(i), null);
handlers[i] = new StringDimensionHandler(String.valueOf(i), null, true);
}
return handlers;
}

View File

@ -39,6 +39,7 @@ import io.druid.query.lookup.LookupExtractionFn;
import io.druid.query.lookup.LookupExtractor;
import io.druid.segment.IndexBuilder;
import io.druid.segment.StorageAdapter;
import io.druid.segment.incremental.IncrementalIndexSchema;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.Test;
@ -83,7 +84,17 @@ public class SelectorFilterTest extends BaseFilterTest
boolean optimize
)
{
super(testName, ROWS, indexBuilder, finisher, cnf, optimize);
super(
testName,
ROWS,
indexBuilder.schema(
new IncrementalIndexSchema.Builder()
.withDimensionsSpec(PARSER.getParseSpec().getDimensionsSpec()).build()
),
finisher,
cnf,
optimize
);
}
@AfterClass

View File

@ -45,9 +45,9 @@ public class IncrementalIndexMultiValueSpecTest
{
DimensionsSpec dimensionsSpec = new DimensionsSpec(
Arrays.<DimensionSchema>asList(
new StringDimensionSchema("string1", DimensionSchema.MultiValueHandling.ARRAY),
new StringDimensionSchema("string2", DimensionSchema.MultiValueHandling.SORTED_ARRAY),
new StringDimensionSchema("string3", DimensionSchema.MultiValueHandling.SORTED_SET)
new StringDimensionSchema("string1", DimensionSchema.MultiValueHandling.ARRAY, true),
new StringDimensionSchema("string2", DimensionSchema.MultiValueHandling.SORTED_ARRAY, true),
new StringDimensionSchema("string3", DimensionSchema.MultiValueHandling.SORTED_SET, true)
),
null, null
);