mirror of https://github.com/apache/druid.git
actually backwards compatible frontCoded string encoding strategy (#13996)
This commit is contained in:
parent
51f3db2ce6
commit
e3211e3be0
|
@ -39,6 +39,7 @@ import org.apache.druid.segment.QueryableIndex;
|
|||
import org.apache.druid.segment.QueryableIndexSegment;
|
||||
import org.apache.druid.segment.QueryableIndexStorageAdapter;
|
||||
import org.apache.druid.segment.column.StringEncodingStrategy;
|
||||
import org.apache.druid.segment.data.FrontCodedIndexed;
|
||||
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
|
||||
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
|
||||
import org.apache.druid.segment.generator.SegmentGenerator;
|
||||
|
@ -449,7 +450,7 @@ public class SqlBenchmark
|
|||
if (stringEncoding.startsWith("front-coded")) {
|
||||
String[] split = stringEncoding.split("-");
|
||||
int bucketSize = Integer.parseInt(split[2]);
|
||||
encodingStrategy = new StringEncodingStrategy.FrontCoded(bucketSize);
|
||||
encodingStrategy = new StringEncodingStrategy.FrontCoded(bucketSize, FrontCodedIndexed.V1);
|
||||
} else {
|
||||
encodingStrategy = new StringEncodingStrategy.Utf8();
|
||||
}
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.druid.segment.IndexSpec;
|
|||
import org.apache.druid.segment.NestedDataDimensionSchema;
|
||||
import org.apache.druid.segment.QueryableIndex;
|
||||
import org.apache.druid.segment.column.StringEncodingStrategy;
|
||||
import org.apache.druid.segment.data.FrontCodedIndexed;
|
||||
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
|
||||
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
|
||||
import org.apache.druid.segment.generator.SegmentGenerator;
|
||||
|
@ -268,7 +269,7 @@ public class SqlNestedDataBenchmark
|
|||
if (stringEncoding.startsWith("front-coded")) {
|
||||
String[] split = stringEncoding.split("-");
|
||||
int bucketSize = Integer.parseInt(split[2]);
|
||||
encodingStrategy = new StringEncodingStrategy.FrontCoded(bucketSize);
|
||||
encodingStrategy = new StringEncodingStrategy.FrontCoded(bucketSize, FrontCodedIndexed.V1);
|
||||
} else {
|
||||
encodingStrategy = new StringEncodingStrategy.Utf8();
|
||||
}
|
||||
|
|
|
@ -477,7 +477,7 @@ The `indexSpec` object can include the following properties:
|
|||
|-----|-----------|-------|
|
||||
|bitmap|Compression format for bitmap indexes. Should be a JSON object with `type` set to `roaring` or `concise`.|`{"type": "roaring"}`|
|
||||
|dimensionCompression|Compression format for dimension columns. Options are `lz4`, `lzf`, `zstd`, or `uncompressed`.|`lz4`|
|
||||
|stringDictionaryEncoding|Encoding format for STRING value dictionaries used by STRING and COMPLEX<json> columns. <br /><br />Example to enable front coding: `{"type":"frontCoded", "bucketSize": 4}`<br />`bucketSize` is the number of values to place in a bucket to perform delta encoding. Must be a power of 2, maximum is 128. Defaults to 4.<br /> `formatVersion` can specify older versions for backwards compatibility during rolling upgrades, valid options are `0` and `1`. Defaults to `1`<br /><br />See [Front coding](#front-coding) for more information.|`{"type":"utf8"}`|
|
||||
|stringDictionaryEncoding|Encoding format for STRING value dictionaries used by STRING and COMPLEX<json> columns. <br /><br />Example to enable front coding: `{"type":"frontCoded", "bucketSize": 4}`<br />`bucketSize` is the number of values to place in a bucket to perform delta encoding. Must be a power of 2, maximum is 128. Defaults to 4.<br /> `formatVersion` can specify older versions for backwards compatibility during rolling upgrades, valid options are `0` and `1`. Defaults to `0` for backwards compatibility.<br /><br />See [Front coding](#front-coding) for more information.|`{"type":"utf8"}`|
|
||||
|metricCompression|Compression format for primitive type metric columns. Options are `lz4`, `lzf`, `zstd`, `uncompressed`, or `none` (which is more efficient than `uncompressed`, but not supported by older versions of Druid).|`lz4`|
|
||||
|longEncoding|Encoding format for long-typed columns. Applies regardless of whether they are dimensions or metrics. Options are `auto` or `longs`. `auto` encodes the values using offset or lookup table depending on column cardinality, and store them with variable size. `longs` stores the value as-is with 8 bytes each.|`longs`|
|
||||
|jsonCompression|Compression format to use for nested column raw data. Options are `lz4`, `lzf`, `zstd`, or `uncompressed`.|`lz4`|
|
||||
|
@ -488,7 +488,7 @@ Front coding is an experimental feature starting in version 25.0. Front coding i
|
|||
|
||||
You can enable front coding with all types of ingestion. For information on defining an `indexSpec` in a query context, see [SQL-based ingestion reference](../multi-stage-query/reference.md#context-parameters).
|
||||
|
||||
> Front coding was originally introduced in Druid 25.0, and an improved 'version 1' was introduced in Druid 26.0, with typically faster read speed and smaller storage size. The current recommendation is to enable it in a staging environment and fully test your use case before using in production. By default, segments created with front coding enabled in Druid 26.0 are not backwards compatible with Druid 25.0, and those created with Druid 25.0 are not compatible with Druid versions older than 25.0. If using front coding in Druid 25.0 and upgrading to Druid 26.0, the `formatVersion` can be specified as `0` keep writing out the older format to enable seamless downgrades to Druid 25.0, and then later changed to `1` (or removed) once determined that rollback is not necessary.
|
||||
> Front coding was originally introduced in Druid 25.0, and an improved 'version 1' was introduced in Druid 26.0, with typically faster read speed and smaller storage size. The current recommendation is to enable it in a staging environment and fully test your use case before using in production. By default, segments created with front coding enabled in Druid 26.0 are backwards compatible with Druid 25.0, but those created with Druid 26.0 or 25.0 are not compatible with Druid versions older than 25.0. If using front coding in Druid 25.0 and upgrading to Druid 26.0, the `formatVersion` defaults to `0` to keep writing out the older format to enable seamless downgrades to Druid 25.0, and then later is recommended to be changed to `1` once determined that rollback is not necessary.
|
||||
|
||||
Beyond these properties, each ingestion method has its own specific tuning properties. See the documentation for each
|
||||
[ingestion method](./index.md#ingestion-methods) for details.
|
||||
|
|
|
@ -24,6 +24,7 @@ import nl.jqno.equalsverifier.EqualsVerifier;
|
|||
import org.apache.druid.segment.IndexSpec;
|
||||
import org.apache.druid.segment.TestHelper;
|
||||
import org.apache.druid.segment.column.StringEncodingStrategy;
|
||||
import org.apache.druid.segment.data.FrontCodedIndexed;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -54,7 +55,7 @@ public class MSQTuningConfigTest
|
|||
new IndexSpec(
|
||||
null,
|
||||
null,
|
||||
new StringEncodingStrategy.FrontCoded(null),
|
||||
new StringEncodingStrategy.FrontCoded(null, FrontCodedIndexed.V1),
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
|
|
|
@ -202,7 +202,7 @@ public class MultiStageQueryContextTest
|
|||
Assert.assertEquals(new IndexSpec(), decodeIndexSpec(Collections.emptyMap()));
|
||||
|
||||
Assert.assertEquals(
|
||||
new IndexSpec(null, null, new StringEncodingStrategy.FrontCoded(null), null, null, null, null),
|
||||
new IndexSpec(null, null, new StringEncodingStrategy.FrontCoded(null, null), null, null, null, null),
|
||||
decodeIndexSpec("{\"stringDictionaryEncoding\":{\"type\":\"frontCoded\"}}")
|
||||
);
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ public final class FrontCodedIndexed implements Indexed<ByteBuffer>
|
|||
{
|
||||
public static final byte V0 = 0;
|
||||
public static final byte V1 = 1;
|
||||
public static final byte DEFAULT_VERSION = V1;
|
||||
public static final byte DEFAULT_VERSION = V0;
|
||||
public static final int DEFAULT_BUCKET_SIZE = 4;
|
||||
|
||||
public static byte validateVersion(byte version)
|
||||
|
|
|
@ -49,6 +49,7 @@ import org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde;
|
|||
import org.apache.druid.query.expression.TestExprMacroTable;
|
||||
import org.apache.druid.segment.column.ColumnType;
|
||||
import org.apache.druid.segment.column.StringEncodingStrategy;
|
||||
import org.apache.druid.segment.data.FrontCodedIndexed;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndex;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
|
||||
import org.apache.druid.segment.incremental.OnheapIncrementalIndex;
|
||||
|
@ -217,7 +218,7 @@ public class TestIndex
|
|||
new IndexSpec(
|
||||
null,
|
||||
null,
|
||||
new StringEncodingStrategy.FrontCoded(4),
|
||||
new StringEncodingStrategy.FrontCoded(4, FrontCodedIndexed.V1),
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
|
|
|
@ -54,7 +54,8 @@ public class StringEncodingStrategyTest
|
|||
// this next assert seems silly, but its a sanity check to make us think hard before changing the default version,
|
||||
// to make us think of the backwards compatibility implications, as new versions of segment format stuff cannot be
|
||||
// downgraded to older versions of Druid and still read
|
||||
Assert.assertEquals(FrontCodedIndexed.V1, FrontCodedIndexed.DEFAULT_VERSION);
|
||||
// the default version should be changed to V1 after Druid 26.0 is released
|
||||
Assert.assertEquals(FrontCodedIndexed.V0, FrontCodedIndexed.DEFAULT_VERSION);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -84,6 +84,7 @@ import org.apache.druid.segment.column.RowSignature;
|
|||
import org.apache.druid.segment.column.StringEncodingStrategy;
|
||||
import org.apache.druid.segment.data.BitmapSerdeFactory;
|
||||
import org.apache.druid.segment.data.ConciseBitmapSerdeFactory;
|
||||
import org.apache.druid.segment.data.FrontCodedIndexed;
|
||||
import org.apache.druid.segment.data.IndexedInts;
|
||||
import org.apache.druid.segment.data.RoaringBitmapSerdeFactory;
|
||||
import org.apache.druid.segment.filter.cnf.CNFFilterExplosionException;
|
||||
|
@ -350,7 +351,8 @@ public abstract class BaseFilterTest extends InitializedNullHandlingTest
|
|||
|
||||
StringEncodingStrategy[] stringEncoding = new StringEncodingStrategy[]{
|
||||
new StringEncodingStrategy.Utf8(),
|
||||
new StringEncodingStrategy.FrontCoded(4)
|
||||
new StringEncodingStrategy.FrontCoded(4, FrontCodedIndexed.V0),
|
||||
new StringEncodingStrategy.FrontCoded(4, FrontCodedIndexed.V1)
|
||||
};
|
||||
for (Map.Entry<String, BitmapSerdeFactory> bitmapSerdeFactoryEntry : bitmapSerdeFactories.entrySet()) {
|
||||
for (Map.Entry<String, SegmentWriteOutMediumFactory> segmentWriteOutMediumFactoryEntry :
|
||||
|
|
|
@ -55,6 +55,7 @@ import org.apache.druid.segment.QueryableIndexSegment;
|
|||
import org.apache.druid.segment.Segment;
|
||||
import org.apache.druid.segment.TestHelper;
|
||||
import org.apache.druid.segment.column.StringEncodingStrategy;
|
||||
import org.apache.druid.segment.data.FrontCodedIndexed;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndex;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
|
||||
import org.apache.druid.segment.incremental.OnheapIncrementalIndex;
|
||||
|
@ -99,7 +100,7 @@ public class SpatialFilterTest extends InitializedNullHandlingTest
|
|||
final IndexSpec frontCodedIndexSpec = new IndexSpec(
|
||||
null,
|
||||
null,
|
||||
new StringEncodingStrategy.FrontCoded(4),
|
||||
new StringEncodingStrategy.FrontCoded(4, FrontCodedIndexed.V1),
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
|
|
Loading…
Reference in New Issue