mirror of https://github.com/apache/druid.git
Configurable compressRunOnSerialization for Roaring bitmaps. (#3228)
Defaults to true, which is a change in behavior (this used to be false and unconfigurable).
This commit is contained in:
parent
5d9fd0a713
commit
ea03906fcf
|
@ -153,7 +153,7 @@ public class BoundFilterBenchmark
|
|||
{
|
||||
step = (END_INT - START_INT) / cardinality;
|
||||
final BitmapFactory bitmapFactory = new RoaringBitmapFactory();
|
||||
final BitmapSerdeFactory serdeFactory = new RoaringBitmapSerdeFactory();
|
||||
final BitmapSerdeFactory serdeFactory = new RoaringBitmapSerdeFactory(null);
|
||||
final List<Integer> ints = generateInts();
|
||||
final GenericIndexed<String> dictionary = GenericIndexed.fromIterable(
|
||||
FluentIterable.from(ints)
|
||||
|
|
|
@ -89,7 +89,7 @@ public class DimensionPredicateFilterBenchmark
|
|||
public void setup() throws IOException
|
||||
{
|
||||
final BitmapFactory bitmapFactory = new RoaringBitmapFactory();
|
||||
final BitmapSerdeFactory serdeFactory = new RoaringBitmapSerdeFactory();
|
||||
final BitmapSerdeFactory serdeFactory = new RoaringBitmapSerdeFactory(null);
|
||||
final List<Integer> ints = generateInts();
|
||||
final GenericIndexed<String> dictionary = GenericIndexed.fromIterable(
|
||||
FluentIterable.from(ints)
|
||||
|
|
|
@ -128,10 +128,25 @@ The tuningConfig is optional and default parameters will be used if no tuningCon
|
|||
|
||||
|Field|Type|Description|Required|
|
||||
|-----|----|-----------|--------|
|
||||
|`bitmap`|String|The type of bitmap index to create. Choose from `roaring` or `concise`.|no (default == `concise`)|
|
||||
|`bitmap`|Object|Compression format for bitmap indexes. Should be a JSON object; see below for options.|no (defaults to Concise)|
|
||||
|`dimensionCompression`|String|Compression format for dimension columns. Choose from `LZ4`, `LZF`, or `uncompressed`.|no (default == `LZ4`)|
|
||||
|`metricCompression`|String|Compression format for metric columns. Choose from `LZ4`, `LZF`, or `uncompressed`.|no (default == `LZ4`)|
|
||||
|
||||
##### Bitmap types
|
||||
|
||||
For Concise bitmaps:
|
||||
|
||||
|Field|Type|Description|Required|
|
||||
|-----|----|-----------|--------|
|
||||
|`type`|String|Must be `concise`.|yes|
|
||||
|
||||
For Roaring bitmaps:
|
||||
|
||||
|Field|Type|Description|Required|
|
||||
|-----|----|-----------|--------|
|
||||
|`type`|String|Must be `roaring`.|yes|
|
||||
|`compressRunOnSerialization`|Boolean|Use a run-length encoding where it is estimated as more space efficient.|no (default == `true`)|
|
||||
|
||||
### KafkaSupervisorIOConfig
|
||||
|
||||
|Field|Type|Description|Required|
|
||||
|
|
|
@ -191,9 +191,24 @@ The following properties can be used to tune how the MapReduce job is configured
|
|||
|
||||
|Field|Type|Description|Required|
|
||||
|-----|----|-----------|--------|
|
||||
|bitmap|String|The type of bitmap index to create. Choose from `roaring` or `concise`, or null to use the default (`concise`).|No|
|
||||
|dimensionCompression|String|Compression format for dimension columns. Choose from `LZ4`, `LZF`, or `uncompressed`. The default is `LZ4`.|No|
|
||||
|metricCompression|String|Compression format for metric columns. Choose from `LZ4`, `LZF`, or `uncompressed`. The default is `LZ4`.|No|
|
||||
|bitmap|Object|Compression format for bitmap indexes. Should be a JSON object; see below for options.|no (defaults to Concise)|
|
||||
|dimensionCompression|String|Compression format for dimension columns. Choose from `LZ4`, `LZF`, or `uncompressed`.|no (default == `LZ4`)|
|
||||
|metricCompression|String|Compression format for metric columns. Choose from `LZ4`, `LZF`, or `uncompressed`.|no (default == `LZ4`)|
|
||||
|
||||
##### Bitmap types
|
||||
|
||||
For Concise bitmaps:
|
||||
|
||||
|Field|Type|Description|Required|
|
||||
|-----|----|-----------|--------|
|
||||
|type|String|Must be `concise`.|yes|
|
||||
|
||||
For Roaring bitmaps:
|
||||
|
||||
|Field|Type|Description|Required|
|
||||
|-----|----|-----------|--------|
|
||||
|type|String|Must be `roaring`.|yes|
|
||||
|compressRunOnSerialization|Boolean|Use a run-length encoding where it is estimated as more space efficient.|no (default == `true`)|
|
||||
|
||||
### Partitioning specification
|
||||
|
||||
|
|
|
@ -167,13 +167,28 @@ The following policies are available:
|
|||
* `messageTime` – Can be used for non-"current time" as long as that data is relatively in sequence. Events are rejected if they are less than `windowPeriod` from the event with the latest timestamp. Hand off only occurs if an event is seen after the segmentGranularity and `windowPeriod` (hand off will not periodically occur unless you have a constant stream of data).
|
||||
* `none` – All events are accepted. Never hands off data unless shutdown() is called on the configured firehose.
|
||||
|
||||
### Index Spec
|
||||
#### IndexSpec
|
||||
|
||||
|Field|Type|Description|Required|
|
||||
|-----|----|-----------|--------|
|
||||
|bitmap|String|The type of bitmap index to create. Choose from `roaring` or `concise`, or null to use the default (`concise`).|No|
|
||||
|dimensionCompression|String|Compression format for dimension columns. Choose from `LZ4`, `LZF`, or `uncompressed`. The default is `LZ4`.|No|
|
||||
|metricCompression|String|Compression format for metric columns. Choose from `LZ4`, `LZF`, or `uncompressed`. The default is `LZ4`.|No|
|
||||
|bitmap|Object|Compression format for bitmap indexes. Should be a JSON object; see below for options.|no (defaults to Concise)|
|
||||
|dimensionCompression|String|Compression format for dimension columns. Choose from `LZ4`, `LZF`, or `uncompressed`.|no (default == `LZ4`)|
|
||||
|metricCompression|String|Compression format for metric columns. Choose from `LZ4`, `LZF`, or `uncompressed`.|no (default == `LZ4`)|
|
||||
|
||||
##### Bitmap types
|
||||
|
||||
For Concise bitmaps:
|
||||
|
||||
|Field|Type|Description|Required|
|
||||
|-----|----|-----------|--------|
|
||||
|type|String|Must be `concise`.|yes|
|
||||
|
||||
For Roaring bitmaps:
|
||||
|
||||
|Field|Type|Description|Required|
|
||||
|-----|----|-----------|--------|
|
||||
|type|String|Must be `roaring`.|yes|
|
||||
|compressRunOnSerialization|Boolean|Use a run-length encoding where it is estimated as more space efficient.|no (default == `true`)|
|
||||
|
||||
#### Sharding
|
||||
|
||||
|
|
|
@ -116,16 +116,29 @@ The tuningConfig is optional and default parameters will be used if no tuningCon
|
|||
|
||||
#### IndexSpec
|
||||
|
||||
The indexSpec defines segment storage format options to be used at indexing
|
||||
time, such as bitmap type, and column compression formats.
|
||||
The indexSpec defines segment storage format options to be used at indexing time, such as bitmap type and column
|
||||
compression formats. The indexSpec is optional and default parameters will be used if not specified.
|
||||
|
||||
The indexSpec is optional and default parameters will be used if not specified.
|
||||
|Field|Type|Description|Required|
|
||||
|-----|----|-----------|--------|
|
||||
|bitmap|Object|Compression format for bitmap indexes. Should be a JSON object; see below for options.|no (defaults to Concise)|
|
||||
|dimensionCompression|String|Compression format for dimension columns. Choose from `LZ4`, `LZF`, or `uncompressed`.|no (default == `LZ4`)|
|
||||
|metricCompression|String|Compression format for metric columns. Choose from `LZ4`, `LZF`, or `uncompressed`.|no (default == `LZ4`)|
|
||||
|
||||
|property|description|possible values|default|required?|
|
||||
|--------|-----------|---------------|-------|---------|
|
||||
|bitmap|type of bitmap compression to use for inverted indices.|`"concise"`, `"roaring"`|`"concise"`|no|
|
||||
|dimensionCompression|compression format for dimension columns|`"uncompressed"`, `"lz4"`, `"lzf"`|`"lz4"`|no|
|
||||
|metricCompression|compression format for metric columns, defaults to LZ4|`"lz4"`, `"lzf"`|`"lz4"`|no|
|
||||
##### Bitmap types
|
||||
|
||||
For Concise bitmaps:
|
||||
|
||||
|Field|Type|Description|Required|
|
||||
|-----|----|-----------|--------|
|
||||
|type|String|Must be `concise`.|yes|
|
||||
|
||||
For Roaring bitmaps:
|
||||
|
||||
|Field|Type|Description|Required|
|
||||
|-----|----|-----------|--------|
|
||||
|type|String|Must be `roaring`.|yes|
|
||||
|compressRunOnSerialization|Boolean|Use a run-length encoding where it is estimated as more space efficient.|no (default == `true`)|
|
||||
|
||||
Segment Merging Tasks
|
||||
---------------------
|
||||
|
|
|
@ -283,7 +283,7 @@ public class HadoopConverterJobTest
|
|||
new HadoopDruidConverterConfig(
|
||||
DATASOURCE,
|
||||
interval,
|
||||
new IndexSpec(new RoaringBitmapSerdeFactory(), "uncompressed", "uncompressed"),
|
||||
new IndexSpec(new RoaringBitmapSerdeFactory(null), "uncompressed", "uncompressed"),
|
||||
oldSemgments,
|
||||
true,
|
||||
tmpDir.toURI(),
|
||||
|
@ -386,7 +386,7 @@ public class HadoopConverterJobTest
|
|||
new HadoopDruidConverterConfig(
|
||||
DATASOURCE,
|
||||
interval,
|
||||
new IndexSpec(new RoaringBitmapSerdeFactory(), "uncompressed", "uncompressed"),
|
||||
new IndexSpec(new RoaringBitmapSerdeFactory(null), "uncompressed", "uncompressed"),
|
||||
oldSemgments,
|
||||
true,
|
||||
tmpDir.toURI(),
|
||||
|
|
|
@ -517,7 +517,7 @@ public class TaskSerdeTest
|
|||
);
|
||||
final ConvertSegmentTask convertSegmentTaskOriginal = ConvertSegmentTask.create(
|
||||
segment,
|
||||
new IndexSpec(new RoaringBitmapSerdeFactory(), "lzf", "uncompressed"),
|
||||
new IndexSpec(new RoaringBitmapSerdeFactory(null), "lzf", "uncompressed"),
|
||||
false,
|
||||
true,
|
||||
null
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.roaringbitmap.IntIterator;
|
|||
public class BitmapOffset implements Offset
|
||||
{
|
||||
private static final int INVALID_VALUE = -1;
|
||||
private static final BitmapFactory ROARING_BITMAP_FACTORY = new RoaringBitmapSerdeFactory(false).getBitmapFactory();
|
||||
|
||||
private final IntIterator itr;
|
||||
private final BitmapFactory bitmapFactory;
|
||||
|
@ -44,13 +45,12 @@ public class BitmapOffset implements Offset
|
|||
{
|
||||
ImmutableBitmap roaringBitmap = bitmapIndex;
|
||||
if (!(bitmapIndex instanceof WrappedImmutableRoaringBitmap)) {
|
||||
final BitmapFactory factory = RoaringBitmapSerdeFactory.bitmapFactory;
|
||||
final MutableBitmap bitmap = factory.makeEmptyMutableBitmap();
|
||||
final MutableBitmap bitmap = ROARING_BITMAP_FACTORY.makeEmptyMutableBitmap();
|
||||
final IntIterator iterator = bitmapIndex.iterator();
|
||||
while (iterator.hasNext()) {
|
||||
bitmap.add(iterator.next());
|
||||
}
|
||||
roaringBitmap = factory.makeImmutableBitmap(bitmap);
|
||||
roaringBitmap = ROARING_BITMAP_FACTORY.makeImmutableBitmap(bitmap);
|
||||
}
|
||||
return ((WrappedImmutableRoaringBitmap) roaringBitmap).getBitmap().getReverseIntIterator();
|
||||
}
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
|
||||
package io.druid.segment.data;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.google.common.collect.Ordering;
|
||||
import com.metamx.collections.bitmap.BitmapFactory;
|
||||
import com.metamx.collections.bitmap.ImmutableBitmap;
|
||||
|
@ -32,8 +34,28 @@ import java.nio.ByteBuffer;
|
|||
*/
|
||||
public class RoaringBitmapSerdeFactory implements BitmapSerdeFactory
|
||||
{
|
||||
public static final ObjectStrategy<ImmutableBitmap> objectStrategy = new ImmutableRoaringBitmapObjectStrategy();
|
||||
public static final BitmapFactory bitmapFactory = new RoaringBitmapFactory();
|
||||
private static final boolean DEFAULT_COMPRESS_RUN_ON_SERIALIZATION = true;
|
||||
private static final ObjectStrategy<ImmutableBitmap> objectStrategy = new ImmutableRoaringBitmapObjectStrategy();
|
||||
|
||||
private final boolean compressRunOnSerialization;
|
||||
private final BitmapFactory bitmapFactory;
|
||||
|
||||
@JsonCreator
|
||||
public RoaringBitmapSerdeFactory(
|
||||
@JsonProperty("compressRunOnSerialization") Boolean compressRunOnSerialization
|
||||
)
|
||||
{
|
||||
this.compressRunOnSerialization = compressRunOnSerialization == null
|
||||
? DEFAULT_COMPRESS_RUN_ON_SERIALIZATION
|
||||
: compressRunOnSerialization;
|
||||
this.bitmapFactory = new RoaringBitmapFactory(this.compressRunOnSerialization);
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public boolean getCompressRunOnSerialization()
|
||||
{
|
||||
return compressRunOnSerialization;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ObjectStrategy<ImmutableBitmap> getObjectStrategy()
|
||||
|
|
|
@ -91,7 +91,7 @@ public class IndexMergerTest
|
|||
false
|
||||
),
|
||||
ImmutableSet.of(
|
||||
new RoaringBitmapSerdeFactory(),
|
||||
new RoaringBitmapSerdeFactory(null),
|
||||
new ConciseBitmapSerdeFactory()
|
||||
),
|
||||
ImmutableSet.of(
|
||||
|
|
|
@ -35,7 +35,7 @@ public class IndexSpecTest
|
|||
final String json = "{ \"bitmap\" : { \"type\" : \"roaring\" }, \"dimensionCompression\" : \"lz4\", \"metricCompression\" : \"lzf\" }";
|
||||
|
||||
final IndexSpec spec = objectMapper.readValue(json, IndexSpec.class);
|
||||
Assert.assertEquals(new RoaringBitmapSerdeFactory(), spec.getBitmapSerdeFactory());
|
||||
Assert.assertEquals(new RoaringBitmapSerdeFactory(null), spec.getBitmapSerdeFactory());
|
||||
Assert.assertEquals(CompressedObjectStrategy.CompressionStrategy.LZ4, spec.getDimensionCompressionStrategy());
|
||||
Assert.assertEquals(CompressedObjectStrategy.CompressionStrategy.LZF, spec.getMetricCompressionStrategy());
|
||||
|
||||
|
|
|
@ -30,7 +30,9 @@ public class BitmapSerdeFactoryTest
|
|||
public void testSerialization() throws Exception
|
||||
{
|
||||
ObjectMapper mapper = new DefaultObjectMapper();
|
||||
Assert.assertEquals("{\"type\":\"roaring\"}", mapper.writeValueAsString(new RoaringBitmapSerdeFactory()));
|
||||
Assert.assertEquals("{\"type\":\"roaring\",\"compressRunOnSerialization\":true}", mapper.writeValueAsString(new RoaringBitmapSerdeFactory(null)));
|
||||
Assert.assertEquals("{\"type\":\"roaring\",\"compressRunOnSerialization\":false}", mapper.writeValueAsString(new RoaringBitmapSerdeFactory(false)));
|
||||
Assert.assertEquals("{\"type\":\"roaring\",\"compressRunOnSerialization\":true}", mapper.writeValueAsString(new RoaringBitmapSerdeFactory(true)));
|
||||
Assert.assertEquals("{\"type\":\"concise\"}", mapper.writeValueAsString(new ConciseBitmapSerdeFactory()));
|
||||
Assert.assertEquals("{\"type\":\"concise\"}", mapper.writeValueAsString(BitmapSerde.createLegacyFactory()));
|
||||
Assert.assertEquals("{\"type\":\"concise\"}", mapper.writeValueAsString(new BitmapSerde.DefaultBitmapSerdeFactory()));
|
||||
|
@ -41,7 +43,17 @@ public class BitmapSerdeFactoryTest
|
|||
public void testDeserialization() throws Exception
|
||||
{
|
||||
ObjectMapper mapper = new DefaultObjectMapper();
|
||||
Assert.assertTrue(mapper.readValue("{\"type\":\"roaring\"}", BitmapSerdeFactory.class) instanceof RoaringBitmapSerdeFactory);
|
||||
final BitmapSerdeFactory roaringFactory = mapper.readValue("{\"type\":\"roaring\"}", BitmapSerdeFactory.class);
|
||||
Assert.assertTrue(roaringFactory instanceof RoaringBitmapSerdeFactory);
|
||||
Assert.assertTrue(((RoaringBitmapSerdeFactory)roaringFactory).getCompressRunOnSerialization());
|
||||
|
||||
final BitmapSerdeFactory compressingRoaringFactory = mapper.readValue(
|
||||
"{\"type\":\"roaring\", \"compressRunOnSerialization\":false}",
|
||||
BitmapSerdeFactory.class
|
||||
);
|
||||
Assert.assertTrue(compressingRoaringFactory instanceof RoaringBitmapSerdeFactory);
|
||||
Assert.assertFalse(((RoaringBitmapSerdeFactory)compressingRoaringFactory).getCompressRunOnSerialization());
|
||||
|
||||
Assert.assertTrue(mapper.readValue("{\"type\":\"concise\"}", BitmapSerdeFactory.class) instanceof ConciseBitmapSerdeFactory);
|
||||
Assert.assertTrue(mapper.readValue("{\"type\":\"BitmapSerde$SomeRandomClass\"}", BitmapSerdeFactory.class) instanceof ConciseBitmapSerdeFactory);
|
||||
}
|
||||
|
|
|
@ -156,7 +156,7 @@ public abstract class BaseFilterTest
|
|||
|
||||
final Map<String, BitmapSerdeFactory> bitmapSerdeFactories = ImmutableMap.<String, BitmapSerdeFactory>of(
|
||||
"concise", new ConciseBitmapSerdeFactory(),
|
||||
"roaring", new RoaringBitmapSerdeFactory()
|
||||
"roaring", new RoaringBitmapSerdeFactory(true)
|
||||
);
|
||||
|
||||
final Map<String, IndexMerger> indexMergers = ImmutableMap.<String, IndexMerger>of(
|
||||
|
|
|
@ -71,7 +71,7 @@ public class ExtractionDimFilterTest
|
|||
{
|
||||
return ImmutableList.of(
|
||||
new Object[]{new ConciseBitmapFactory(), new ConciseBitmapSerdeFactory()},
|
||||
new Object[]{new RoaringBitmapFactory(), new RoaringBitmapSerdeFactory()}
|
||||
new Object[]{new RoaringBitmapFactory(), new RoaringBitmapSerdeFactory(null)}
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -320,7 +320,7 @@ public class DumpSegment extends GuiceRunnable
|
|||
if (bitmapFactory instanceof ConciseBitmapFactory) {
|
||||
bitmapSerdeFactory = new ConciseBitmapSerdeFactory();
|
||||
} else if (bitmapFactory instanceof RoaringBitmapFactory) {
|
||||
bitmapSerdeFactory = new RoaringBitmapSerdeFactory();
|
||||
bitmapSerdeFactory = new RoaringBitmapSerdeFactory(null);
|
||||
} else {
|
||||
throw new ISE(
|
||||
"Don't know which BitmapSerdeFactory to use for BitmapFactory[%s]!",
|
||||
|
|
Loading…
Reference in New Issue