mirror of
https://github.com/apache/druid.git
synced 2025-02-17 07:25:02 +00:00
Adding zstandard compression library (#12408)
* Adding zstandard compression library * 1. Took @clintropolis's advice to have ZStandard decompressor use the byte array when the buffers are not direct. 2. Cleaned up checkstyle issues. * Fixing zstandard version to latest stable version in pom's and updating license files * Removing zstd from benchmarks and adding to processing (poms) * fix the intellij inspection issue * Removing the prefix v for the version in the license check for ztsd * Fixing license checks Co-authored-by: Rahul Gidwani <r_gidwani@apple.com>
This commit is contained in:
parent
79f86a0511
commit
7291c92f4f
@ -61,7 +61,9 @@ public class BaseColumnarLongsBenchmark
|
|||||||
* encoding of values within the block.
|
* encoding of values within the block.
|
||||||
*/
|
*/
|
||||||
@Param({
|
@Param({
|
||||||
|
"zstd-longs",
|
||||||
"lz4-longs",
|
"lz4-longs",
|
||||||
|
"zstd-auto",
|
||||||
"lz4-auto"
|
"lz4-auto"
|
||||||
})
|
})
|
||||||
String encoding;
|
String encoding;
|
||||||
@ -271,6 +273,26 @@ public class BaseColumnarLongsBenchmark
|
|||||||
CompressionStrategy.NONE
|
CompressionStrategy.NONE
|
||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
|
case "zstd-longs":
|
||||||
|
serializer = CompressionFactory.getLongSerializer(
|
||||||
|
encoding,
|
||||||
|
writeOutMedium,
|
||||||
|
"zstd-longs",
|
||||||
|
ByteOrder.LITTLE_ENDIAN,
|
||||||
|
CompressionFactory.LongEncodingStrategy.LONGS,
|
||||||
|
CompressionStrategy.ZSTD
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
case "zstd-auto":
|
||||||
|
serializer = CompressionFactory.getLongSerializer(
|
||||||
|
encoding,
|
||||||
|
writeOutMedium,
|
||||||
|
"zstd-auto",
|
||||||
|
ByteOrder.LITTLE_ENDIAN,
|
||||||
|
CompressionFactory.LongEncodingStrategy.AUTO,
|
||||||
|
CompressionStrategy.ZSTD
|
||||||
|
);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
throw new RuntimeException("unknown encoding");
|
throw new RuntimeException("unknown encoding");
|
||||||
}
|
}
|
||||||
@ -290,6 +312,8 @@ public class BaseColumnarLongsBenchmark
|
|||||||
case "lz4-auto":
|
case "lz4-auto":
|
||||||
case "none-auto":
|
case "none-auto":
|
||||||
case "none-longs":
|
case "none-longs":
|
||||||
|
case "zstd-auto":
|
||||||
|
case "zstd-longs":
|
||||||
return CompressedColumnarLongsSupplier.fromByteBuffer(buffer, ByteOrder.LITTLE_ENDIAN).get();
|
return CompressedColumnarLongsSupplier.fromByteBuffer(buffer, ByteOrder.LITTLE_ENDIAN).get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -233,7 +233,8 @@ def build_compatible_license_names():
|
|||||||
|
|
||||||
compatible_licenses['BSD-2-Clause License'] = 'BSD-2-Clause License'
|
compatible_licenses['BSD-2-Clause License'] = 'BSD-2-Clause License'
|
||||||
compatible_licenses['BSD-2-Clause'] = 'BSD-2-Clause License'
|
compatible_licenses['BSD-2-Clause'] = 'BSD-2-Clause License'
|
||||||
compatible_licenses['BSD 2-Clause license'] = 'BSD 2-Clause License'
|
compatible_licenses['BSD 2-Clause license'] = 'BSD-2-Clause License'
|
||||||
|
compatible_licenses['BSD 2-Clause License'] = 'BSD-2-Clause License'
|
||||||
|
|
||||||
compatible_licenses['BSD-3-Clause License'] = 'BSD-3-Clause License'
|
compatible_licenses['BSD-3-Clause License'] = 'BSD-3-Clause License'
|
||||||
compatible_licenses['New BSD license'] = 'BSD-3-Clause License'
|
compatible_licenses['New BSD license'] = 'BSD-3-Clause License'
|
||||||
|
@ -208,8 +208,8 @@ The `tuningConfig` is optional and default parameters will be used if no `tuning
|
|||||||
|Field|Type|Description|Required|
|
|Field|Type|Description|Required|
|
||||||
|-----|----|-----------|--------|
|
|-----|----|-----------|--------|
|
||||||
|bitmap|Object|Compression format for bitmap indexes. Should be a JSON object. See [Bitmap types](#bitmap-types) below for options.|no (defaults to Roaring)|
|
|bitmap|Object|Compression format for bitmap indexes. Should be a JSON object. See [Bitmap types](#bitmap-types) below for options.|no (defaults to Roaring)|
|
||||||
|dimensionCompression|String|Compression format for dimension columns. Choose from `LZ4`, `LZF`, or `uncompressed`.|no (default == `LZ4`)|
|
|dimensionCompression|String|Compression format for dimension columns. Choose from `LZ4`, `LZF`, `ZSTD` or `uncompressed`.|no (default == `LZ4`)|
|
||||||
|metricCompression|String|Compression format for primitive type metric columns. Choose from `LZ4`, `LZF`, `uncompressed`, or `none`.|no (default == `LZ4`)|
|
|metricCompression|String|Compression format for primitive type metric columns. Choose from `LZ4`, `LZF`, `ZSTD`, `uncompressed` or `none`.|no (default == `LZ4`)|
|
||||||
|longEncoding|String|Encoding format for metric and dimension columns with type long. Choose from `auto` or `longs`. `auto` encodes the values using offset or lookup table depending on column cardinality, and store them with variable size. `longs` stores the value as is with 8 bytes each.|no (default == `longs`)|
|
|longEncoding|String|Encoding format for metric and dimension columns with type long. Choose from `auto` or `longs`. `auto` encodes the values using offset or lookup table depending on column cardinality, and store them with variable size. `longs` stores the value as is with 8 bytes each.|no (default == `longs`)|
|
||||||
|
|
||||||
##### Bitmap types
|
##### Bitmap types
|
||||||
|
@ -3964,7 +3964,7 @@ name: JNI binding for Zstd
|
|||||||
license_category: binary
|
license_category: binary
|
||||||
module: java-core
|
module: java-core
|
||||||
license_name: BSD-2-Clause License
|
license_name: BSD-2-Clause License
|
||||||
version: 1.3.3-1
|
version: 1.5.2-3
|
||||||
copyright: Luben Karavelov
|
copyright: Luben Karavelov
|
||||||
license_file_path: licenses/bin/zstd-jni.BSD2
|
license_file_path: licenses/bin/zstd-jni.BSD2
|
||||||
libraries:
|
libraries:
|
||||||
@ -5028,7 +5028,7 @@ libraries:
|
|||||||
name: DNS Java
|
name: DNS Java
|
||||||
license_category: binary
|
license_category: binary
|
||||||
module: java-core
|
module: java-core
|
||||||
license_name: BSD 2-Clause license
|
license_name: BSD-2-Clause License
|
||||||
version: 2.1.7
|
version: 2.1.7
|
||||||
libraries:
|
libraries:
|
||||||
- dnsjava: dnsjava
|
- dnsjava: dnsjava
|
||||||
|
2
pom.xml
2
pom.xml
@ -449,7 +449,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.github.luben</groupId>
|
<groupId>com.github.luben</groupId>
|
||||||
<artifactId>zstd-jni</artifactId>
|
<artifactId>zstd-jni</artifactId>
|
||||||
<version>1.3.3-1</version>
|
<version>1.5.2-3</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson</groupId>
|
<groupId>com.fasterxml.jackson</groupId>
|
||||||
|
@ -169,6 +169,10 @@
|
|||||||
<groupId>io.netty</groupId>
|
<groupId>io.netty</groupId>
|
||||||
<artifactId>netty-common</artifactId>
|
<artifactId>netty-common</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.github.luben</groupId>
|
||||||
|
<artifactId>zstd-jni</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<!-- Tests -->
|
<!-- Tests -->
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -21,6 +21,7 @@ package org.apache.druid.segment.data;
|
|||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||||
import com.fasterxml.jackson.annotation.JsonValue;
|
import com.fasterxml.jackson.annotation.JsonValue;
|
||||||
|
import com.github.luben.zstd.Zstd;
|
||||||
import com.ning.compress.BufferRecycler;
|
import com.ning.compress.BufferRecycler;
|
||||||
import com.ning.compress.lzf.LZFDecoder;
|
import com.ning.compress.lzf.LZFDecoder;
|
||||||
import com.ning.compress.lzf.LZFEncoder;
|
import com.ning.compress.lzf.LZFEncoder;
|
||||||
@ -75,6 +76,21 @@ public enum CompressionStrategy
|
|||||||
return LZ4Compressor.DEFAULT_COMPRESSOR;
|
return LZ4Compressor.DEFAULT_COMPRESSOR;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
ZSTD((byte) 0x2) {
|
||||||
|
@Override
|
||||||
|
public Decompressor getDecompressor()
|
||||||
|
{
|
||||||
|
return ZstdDecompressor.DEFAULT_COMPRESSOR;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Compressor getCompressor()
|
||||||
|
{
|
||||||
|
return ZstdCompressor.DEFAULT_COMPRESSOR;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
UNCOMPRESSED((byte) 0xFF) {
|
UNCOMPRESSED((byte) 0xFF) {
|
||||||
@Override
|
@Override
|
||||||
public Decompressor getDecompressor()
|
public Decompressor getDecompressor()
|
||||||
@ -171,7 +187,7 @@ public enum CompressionStrategy
|
|||||||
/**
|
/**
|
||||||
* Allocates a buffer that should be passed to {@link #compress} method as input buffer. Different Compressors
|
* Allocates a buffer that should be passed to {@link #compress} method as input buffer. Different Compressors
|
||||||
* require (or work more efficiently with) different kinds of buffers.
|
* require (or work more efficiently with) different kinds of buffers.
|
||||||
*
|
* <p>
|
||||||
* If the allocated buffer is a direct buffer, it should be registered to be freed with the given Closer.
|
* If the allocated buffer is a direct buffer, it should be registered to be freed with the given Closer.
|
||||||
*/
|
*/
|
||||||
ByteBuffer allocateInBuffer(int inputSize, Closer closer)
|
ByteBuffer allocateInBuffer(int inputSize, Closer closer)
|
||||||
@ -182,9 +198,9 @@ public enum CompressionStrategy
|
|||||||
/**
|
/**
|
||||||
* Allocates a buffer that should be passed to {@link #compress} method as output buffer. Different Compressors
|
* Allocates a buffer that should be passed to {@link #compress} method as output buffer. Different Compressors
|
||||||
* require (or work more efficiently with) different kinds of buffers.
|
* require (or work more efficiently with) different kinds of buffers.
|
||||||
*
|
* <p>
|
||||||
* Allocates a buffer that is always enough to compress a byte sequence of the given size.
|
* Allocates a buffer that is always enough to compress a byte sequence of the given size.
|
||||||
*
|
* <p>
|
||||||
* If the allocated buffer is a direct buffer, it should be registered to be freed with the given Closer.
|
* If the allocated buffer is a direct buffer, it should be registered to be freed with the given Closer.
|
||||||
*/
|
*/
|
||||||
abstract ByteBuffer allocateOutBuffer(int inputSize, Closer closer);
|
abstract ByteBuffer allocateOutBuffer(int inputSize, Closer closer);
|
||||||
@ -344,6 +360,81 @@ public enum CompressionStrategy
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static class ZstdCompressor extends Compressor
|
||||||
|
{
|
||||||
|
private static final ZstdCompressor DEFAULT_COMPRESSOR = new ZstdCompressor();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
ByteBuffer allocateInBuffer(int inputSize, Closer closer)
|
||||||
|
{
|
||||||
|
ByteBuffer inBuffer = ByteBuffer.allocateDirect(inputSize);
|
||||||
|
closer.register(() -> ByteBufferUtils.free(inBuffer));
|
||||||
|
return inBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
ByteBuffer allocateOutBuffer(int inputSize, Closer closer)
|
||||||
|
{
|
||||||
|
ByteBuffer outBuffer = ByteBuffer.allocateDirect((int) Zstd.compressBound(inputSize));
|
||||||
|
closer.register(() -> ByteBufferUtils.free(outBuffer));
|
||||||
|
return outBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ByteBuffer compress(ByteBuffer in, ByteBuffer out)
|
||||||
|
{
|
||||||
|
int position = in.position();
|
||||||
|
out.clear();
|
||||||
|
long sizeNeeded = Zstd.compressBound(in.remaining());
|
||||||
|
if (out.remaining() < sizeNeeded) {
|
||||||
|
throw new RuntimeException("Output buffer too small, please allocate more space. " + sizeNeeded + " required.");
|
||||||
|
}
|
||||||
|
Zstd.compress(out, in, Zstd.maxCompressionLevel());
|
||||||
|
in.position(position);
|
||||||
|
out.flip();
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class ZstdDecompressor implements Decompressor
|
||||||
|
{
|
||||||
|
private static final ZstdDecompressor DEFAULT_COMPRESSOR = new ZstdDecompressor();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void decompress(ByteBuffer in, int numBytes, ByteBuffer out)
|
||||||
|
{
|
||||||
|
out.clear();
|
||||||
|
if (!in.isDirect() || !out.isDirect()) {
|
||||||
|
// fall back to heap byte arrays if both buffers are not direct
|
||||||
|
final byte[] inputBytes = new byte[numBytes];
|
||||||
|
in.get(inputBytes);
|
||||||
|
try (final ResourceHolder<byte[]> outputBytesHolder = CompressedPools.getOutputBytes()) {
|
||||||
|
final byte[] outputBytes = outputBytesHolder.get();
|
||||||
|
int decompressedBytes = (int) Zstd.decompressByteArray(
|
||||||
|
outputBytes,
|
||||||
|
0,
|
||||||
|
outputBytes.length,
|
||||||
|
inputBytes,
|
||||||
|
0,
|
||||||
|
numBytes
|
||||||
|
);
|
||||||
|
out.put(outputBytes, 0, decompressedBytes);
|
||||||
|
out.flip();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int decompressedBytes = (int) Zstd.decompressDirectByteBuffer(
|
||||||
|
out,
|
||||||
|
out.position(),
|
||||||
|
out.remaining(),
|
||||||
|
in,
|
||||||
|
in.position(),
|
||||||
|
numBytes
|
||||||
|
);
|
||||||
|
out.limit(out.position() + decompressedBytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Logs info relating to whether LZ4 is using native or pure Java implementations
|
* Logs info relating to whether LZ4 is using native or pure Java implementations
|
||||||
*/
|
*/
|
||||||
|
@ -19,7 +19,6 @@
|
|||||||
|
|
||||||
package org.apache.druid.segment.data;
|
package org.apache.druid.segment.data;
|
||||||
|
|
||||||
import com.google.common.base.Function;
|
|
||||||
import com.google.common.collect.Iterables;
|
import com.google.common.collect.Iterables;
|
||||||
import org.apache.druid.collections.ResourceHolder;
|
import org.apache.druid.collections.ResourceHolder;
|
||||||
import org.apache.druid.java.util.common.ByteBufferUtils;
|
import org.apache.druid.java.util.common.ByteBufferUtils;
|
||||||
@ -40,7 +39,6 @@ import java.util.Collection;
|
|||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.concurrent.ArrayBlockingQueue;
|
import java.util.concurrent.ArrayBlockingQueue;
|
||||||
import java.util.concurrent.BlockingQueue;
|
import java.util.concurrent.BlockingQueue;
|
||||||
import java.util.concurrent.Callable;
|
|
||||||
import java.util.concurrent.Future;
|
import java.util.concurrent.Future;
|
||||||
import java.util.concurrent.ThreadPoolExecutor;
|
import java.util.concurrent.ThreadPoolExecutor;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
@ -53,14 +51,7 @@ public class CompressionStrategyTest
|
|||||||
{
|
{
|
||||||
return Iterables.transform(
|
return Iterables.transform(
|
||||||
Arrays.asList(CompressionStrategy.noNoneValues()),
|
Arrays.asList(CompressionStrategy.noNoneValues()),
|
||||||
new Function<CompressionStrategy, Object[]>()
|
compressionStrategy -> new Object[]{compressionStrategy}
|
||||||
{
|
|
||||||
@Override
|
|
||||||
public Object[] apply(CompressionStrategy compressionStrategy)
|
|
||||||
{
|
|
||||||
return new Object[]{compressionStrategy};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -101,22 +92,12 @@ public class CompressionStrategyTest
|
|||||||
public void testBasicOperations()
|
public void testBasicOperations()
|
||||||
{
|
{
|
||||||
ByteBuffer compressionOut = compressionStrategy.getCompressor().allocateOutBuffer(originalData.length, closer);
|
ByteBuffer compressionOut = compressionStrategy.getCompressor().allocateOutBuffer(originalData.length, closer);
|
||||||
ByteBuffer compressed = compressionStrategy.getCompressor().compress(ByteBuffer.wrap(originalData), compressionOut);
|
ByteBuffer compressionIn = compressionStrategy.getCompressor().allocateInBuffer(originalData.length, closer);
|
||||||
ByteBuffer output = ByteBuffer.allocate(originalData.length);
|
|
||||||
compressionStrategy.getDecompressor().decompress(compressed, compressed.remaining(), output);
|
|
||||||
byte[] checkArray = new byte[DATA_SIZER];
|
|
||||||
output.get(checkArray);
|
|
||||||
Assert.assertArrayEquals("Uncompressed data does not match", originalData, checkArray);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testDirectMemoryOperations()
|
|
||||||
{
|
|
||||||
ByteBuffer compressionOut = compressionStrategy.getCompressor().allocateOutBuffer(originalData.length, closer);
|
|
||||||
ByteBuffer compressed = compressionStrategy.getCompressor().compress(ByteBuffer.wrap(originalData), compressionOut);
|
|
||||||
|
|
||||||
try (final ResourceHolder<ByteBuffer> holder = ByteBufferUtils.allocateDirect(originalData.length)) {
|
try (final ResourceHolder<ByteBuffer> holder = ByteBufferUtils.allocateDirect(originalData.length)) {
|
||||||
final ByteBuffer output = holder.get();
|
final ByteBuffer output = holder.get();
|
||||||
|
compressionIn.put(originalData);
|
||||||
|
compressionIn.rewind();
|
||||||
|
ByteBuffer compressed = compressionStrategy.getCompressor().compress(compressionIn, compressionOut);
|
||||||
compressionStrategy.getDecompressor().decompress(compressed, compressed.remaining(), output);
|
compressionStrategy.getDecompressor().decompress(compressed, compressed.remaining(), output);
|
||||||
byte[] checkArray = new byte[DATA_SIZER];
|
byte[] checkArray = new byte[DATA_SIZER];
|
||||||
output.get(checkArray);
|
output.get(checkArray);
|
||||||
@ -140,27 +121,33 @@ public class CompressionStrategyTest
|
|||||||
for (int i = 0; i < numThreads; ++i) {
|
for (int i = 0; i < numThreads; ++i) {
|
||||||
results.add(
|
results.add(
|
||||||
threadPoolExecutor.submit(
|
threadPoolExecutor.submit(
|
||||||
new Callable<Boolean>()
|
() -> {
|
||||||
{
|
ByteBuffer compressionOut = compressionStrategy.getCompressor()
|
||||||
@Override
|
.allocateOutBuffer(originalData.length, closer);
|
||||||
public Boolean call()
|
ByteBuffer compressionIn = compressionStrategy.getCompressor()
|
||||||
{
|
.allocateInBuffer(originalData.length, closer);
|
||||||
ByteBuffer compressionOut = compressionStrategy.getCompressor().allocateOutBuffer(originalData.length, closer);
|
try {
|
||||||
ByteBuffer compressed = compressionStrategy.getCompressor().compress(ByteBuffer.wrap(originalData), compressionOut);
|
compressionIn.put(originalData);
|
||||||
ByteBuffer output = ByteBuffer.allocate(originalData.length);
|
compressionIn.position(0);
|
||||||
|
ByteBuffer compressed = compressionStrategy.getCompressor().compress(compressionIn, compressionOut);
|
||||||
|
ByteBuffer output = compressionStrategy.getCompressor().allocateOutBuffer(originalData.length, closer);
|
||||||
compressionStrategy.getDecompressor().decompress(compressed, compressed.remaining(), output);
|
compressionStrategy.getDecompressor().decompress(compressed, compressed.remaining(), output);
|
||||||
byte[] checkArray = new byte[DATA_SIZER];
|
byte[] checkArray = new byte[DATA_SIZER];
|
||||||
output.get(checkArray);
|
output.get(checkArray);
|
||||||
Assert.assertArrayEquals("Uncompressed data does not match", originalData, checkArray);
|
Assert.assertArrayEquals("Uncompressed data does not match", originalData, checkArray);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
finally {
|
||||||
|
ByteBufferUtils.free(compressionIn);
|
||||||
|
ByteBufferUtils.free(compressionOut);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
threadPoolExecutor.shutdown();
|
threadPoolExecutor.shutdown();
|
||||||
for (Future result : results) {
|
for (Future<Boolean> result : results) {
|
||||||
Assert.assertTrue((Boolean) result.get());
|
Assert.assertTrue(result.get());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,156 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.druid.segment.data;
|
||||||
|
|
||||||
|
import com.google.common.base.Function;
|
||||||
|
import com.google.common.collect.Iterables;
|
||||||
|
import org.apache.druid.java.util.common.ByteBufferUtils;
|
||||||
|
import org.apache.druid.java.util.common.io.Closer;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.runner.RunWith;
|
||||||
|
import org.junit.runners.Parameterized;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
import java.nio.channels.WritableByteChannel;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.BitSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.ThreadLocalRandom;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
@RunWith(Parameterized.class)
|
||||||
|
public class TestColumnCompression
|
||||||
|
{
|
||||||
|
|
||||||
|
private final CompressionStrategy compressionType;
|
||||||
|
private ColumnarMultiInts compressed;
|
||||||
|
int bytes = 1;
|
||||||
|
int valuesPerRowBound = 5;
|
||||||
|
int filteredRowCount = 1000;
|
||||||
|
private BitSet filter;
|
||||||
|
private ByteBuffer buffer;
|
||||||
|
|
||||||
|
public TestColumnCompression(CompressionStrategy strategy)
|
||||||
|
{
|
||||||
|
compressionType = strategy;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Parameterized.Parameters
|
||||||
|
public static Iterable<Object[]> compressionStrategies()
|
||||||
|
{
|
||||||
|
return Arrays.stream(CompressionStrategy.values())
|
||||||
|
.filter(x -> !CompressionStrategy.NONE.equals(x))
|
||||||
|
.map(strategy -> new Object[]{strategy}).collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() throws Exception
|
||||||
|
{
|
||||||
|
Random rand = ThreadLocalRandom.current();
|
||||||
|
List<int[]> rows = new ArrayList<>();
|
||||||
|
final int bound = 1 << bytes;
|
||||||
|
for (int i = 0; i < 0x100000; i++) {
|
||||||
|
int count = rand.nextInt(valuesPerRowBound) + 1;
|
||||||
|
int[] row = new int[rand.nextInt(count)];
|
||||||
|
for (int j = 0; j < row.length; j++) {
|
||||||
|
row[j] = rand.nextInt(bound);
|
||||||
|
}
|
||||||
|
rows.add(row);
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer = serialize(
|
||||||
|
CompressedVSizeColumnarMultiIntsSupplier.fromIterable(
|
||||||
|
Iterables.transform(rows, (Function<int[], ColumnarInts>) input -> VSizeColumnarInts.fromArray(input, 20)),
|
||||||
|
bound - 1,
|
||||||
|
ByteOrder.nativeOrder(),
|
||||||
|
compressionType,
|
||||||
|
Closer.create()
|
||||||
|
)
|
||||||
|
);
|
||||||
|
this.compressed = CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(
|
||||||
|
buffer,
|
||||||
|
ByteOrder.nativeOrder()
|
||||||
|
).get();
|
||||||
|
|
||||||
|
filter = new BitSet();
|
||||||
|
for (int i = 0; i < filteredRowCount; i++) {
|
||||||
|
int rowToAccess = rand.nextInt(rows.size());
|
||||||
|
// Skip already selected rows if any
|
||||||
|
while (filter.get(rowToAccess)) {
|
||||||
|
rowToAccess = (rowToAccess + 1) % rows.size();
|
||||||
|
}
|
||||||
|
filter.set(rowToAccess);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void tearDown()
|
||||||
|
{
|
||||||
|
ByteBufferUtils.free(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static ByteBuffer serialize(WritableSupplier<ColumnarMultiInts> writableSupplier) throws IOException
|
||||||
|
{
|
||||||
|
final ByteBuffer buffer = ByteBuffer.allocateDirect((int) writableSupplier.getSerializedSize());
|
||||||
|
WritableByteChannel channel = new WritableByteChannel()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public int write(ByteBuffer src)
|
||||||
|
{
|
||||||
|
int size = src.remaining();
|
||||||
|
buffer.put(src);
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isOpen()
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close()
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
writableSupplier.writeTo(channel, null);
|
||||||
|
buffer.rewind();
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCompressed()
|
||||||
|
{
|
||||||
|
for (int i = filter.nextSetBit(0); i >= 0; i = filter.nextSetBit(i + 1)) {
|
||||||
|
IndexedInts row = compressed.get(i);
|
||||||
|
for (int j = 0, rowSize = row != null ? row.size() : 0; j < rowSize; j++) {
|
||||||
|
row.get(j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user