mirror of https://github.com/apache/druid.git
Increase default DatasourceCompactionConfig.inputSegmentSizeBytes to Long.MAX_VALUE (#12381)
The current default value of inputSegmentSizeBytes is 400MB, which is pretty low for most compaction use cases. Thus most users are forced to override the default. The default value is now increased to Long.MAX_VALUE.
This commit is contained in:
parent
c5531be553
commit
984904779b
|
@ -962,7 +962,7 @@ A description of the compaction config is:
|
||||||
|--------|-----------|--------|
|
|--------|-----------|--------|
|
||||||
|`dataSource`|dataSource name to be compacted.|yes|
|
|`dataSource`|dataSource name to be compacted.|yes|
|
||||||
|`taskPriority`|[Priority](../ingestion/tasks.md#priority) of compaction task.|no (default = 25)|
|
|`taskPriority`|[Priority](../ingestion/tasks.md#priority) of compaction task.|no (default = 25)|
|
||||||
|`inputSegmentSizeBytes`|Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.|no (default = 419430400)|
|
|`inputSegmentSizeBytes`|Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.|no (default = Long.MAX_VALUE)|
|
||||||
|`maxRowsPerSegment`|Max number of rows per segment after compaction.|no|
|
|`maxRowsPerSegment`|Max number of rows per segment after compaction.|no|
|
||||||
|`skipOffsetFromLatest`|The offset for searching segments to be compacted in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) duration format. Strongly recommended to set for realtime dataSources. See [Data handling with compaction](../ingestion/compaction.md#data-handling-with-compaction)|no (default = "P1D")|
|
|`skipOffsetFromLatest`|The offset for searching segments to be compacted in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) duration format. Strongly recommended to set for realtime dataSources. See [Data handling with compaction](../ingestion/compaction.md#data-handling-with-compaction)|no (default = "P1D")|
|
||||||
|`tuningConfig`|Tuning config for compaction tasks. See below [Compaction Task TuningConfig](#automatic-compaction-tuningconfig).|no|
|
|`tuningConfig`|Tuning config for compaction tasks. See below [Compaction Task TuningConfig](#automatic-compaction-tuningconfig).|no|
|
||||||
|
|
|
@ -13,4 +13,4 @@
|
||||||
-- See the License for the specific language governing permissions and
|
-- See the License for the specific language governing permissions and
|
||||||
-- limitations under the License.
|
-- limitations under the License.
|
||||||
|
|
||||||
INSERT INTO druid_config (name, payload) VALUES ('coordinator.compaction.config', '{"compactionConfigs":[{"dataSource":"upgradeTest","taskPriority":25,"inputSegmentSizeBytes":419430400,"maxRowsPerSegment":null,"skipOffsetFromLatest":"P1D","tuningConfig":{"maxRowsInMemory":null,"maxBytesInMemory":null,"maxTotalRows":null,"splitHintSpec":null,"partitionsSpec":{"type":"hashed","numShards":null,"partitionDimensions":[],"partitionFunction":"murmur3_32_abs","maxRowsPerSegment":5000000},"indexSpec":null,"indexSpecForIntermediatePersists":null,"maxPendingPersists":null,"pushTimeout":null,"segmentWriteOutMediumFactory":null,"maxNumConcurrentSubTasks":null,"maxRetry":null,"taskStatusCheckPeriodMs":null,"chatHandlerTimeout":null,"chatHandlerNumRetries":null,"maxNumSegmentsToMerge":null,"totalNumMergeTasks":null,"forceGuaranteedRollup":true,"type":"index_parallel"},"taskContext":null}],"compactionTaskSlotRatio":0.1,"maxCompactionTaskSlots":2147483647}');
|
INSERT INTO druid_config (name, payload) VALUES ('coordinator.compaction.config', '{"compactionConfigs":[{"dataSource":"upgradeTest","taskPriority":25,"inputSegmentSizeBytes":9223372036854775807,"maxRowsPerSegment":null,"skipOffsetFromLatest":"P1D","tuningConfig":{"maxRowsInMemory":null,"maxBytesInMemory":null,"maxTotalRows":null,"splitHintSpec":null,"partitionsSpec":{"type":"hashed","numShards":null,"partitionDimensions":[],"partitionFunction":"murmur3_32_abs","maxRowsPerSegment":5000000},"indexSpec":null,"indexSpecForIntermediatePersists":null,"maxPendingPersists":null,"pushTimeout":null,"segmentWriteOutMediumFactory":null,"maxNumConcurrentSubTasks":null,"maxRetry":null,"taskStatusCheckPeriodMs":null,"chatHandlerTimeout":null,"chatHandlerNumRetries":null,"maxNumSegmentsToMerge":null,"totalNumMergeTasks":null,"forceGuaranteedRollup":true,"type":"index_parallel"},"taskContext":null}],"compactionTaskSlotRatio":0.1,"maxCompactionTaskSlots":2147483647}');
|
||||||
|
|
|
@ -34,7 +34,7 @@ public class DataSourceCompactionConfig
|
||||||
{
|
{
|
||||||
/** Must be synced with Tasks.DEFAULT_MERGE_TASK_PRIORITY */
|
/** Must be synced with Tasks.DEFAULT_MERGE_TASK_PRIORITY */
|
||||||
public static final int DEFAULT_COMPACTION_TASK_PRIORITY = 25;
|
public static final int DEFAULT_COMPACTION_TASK_PRIORITY = 25;
|
||||||
private static final long DEFAULT_INPUT_SEGMENT_SIZE_BYTES = 400 * 1024 * 1024;
|
private static final long DEFAULT_INPUT_SEGMENT_SIZE_BYTES = Long.MAX_VALUE;
|
||||||
private static final Period DEFAULT_SKIP_OFFSET_FROM_LATEST = new Period("P1D");
|
private static final Period DEFAULT_SKIP_OFFSET_FROM_LATEST = new Period("P1D");
|
||||||
|
|
||||||
private final String dataSource;
|
private final String dataSource;
|
||||||
|
|
|
@ -75,7 +75,6 @@ describe('AutoForm', () => {
|
||||||
{
|
{
|
||||||
dataSource: 'ds',
|
dataSource: 'ds',
|
||||||
taskPriority: 25,
|
taskPriority: 25,
|
||||||
inputSegmentSizeBytes: 419430400,
|
|
||||||
maxRowsPerSegment: null,
|
maxRowsPerSegment: null,
|
||||||
skipOffsetFromLatest: 'P4D',
|
skipOffsetFromLatest: 'P4D',
|
||||||
tuningConfig: {
|
tuningConfig: {
|
||||||
|
@ -121,7 +120,6 @@ describe('AutoForm', () => {
|
||||||
{
|
{
|
||||||
dataSource: 'ds',
|
dataSource: 'ds',
|
||||||
taskPriority: 25,
|
taskPriority: 25,
|
||||||
inputSegmentSizeBytes: 419430400,
|
|
||||||
skipOffsetFromLatest: 'P4D',
|
skipOffsetFromLatest: 'P4D',
|
||||||
tuningConfig: {
|
tuningConfig: {
|
||||||
partitionsSpec: {
|
partitionsSpec: {
|
||||||
|
|
|
@ -271,14 +271,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (dynamic partit
|
||||||
"name": "tuningConfig.partitionsSpec.assumeGrouped",
|
"name": "tuningConfig.partitionsSpec.assumeGrouped",
|
||||||
"type": "boolean",
|
"type": "boolean",
|
||||||
},
|
},
|
||||||
Object {
|
|
||||||
"defaultValue": 419430400,
|
|
||||||
"info": <p>
|
|
||||||
Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.
|
|
||||||
</p>,
|
|
||||||
"name": "inputSegmentSizeBytes",
|
|
||||||
"type": "number",
|
|
||||||
},
|
|
||||||
Object {
|
Object {
|
||||||
"defaultValue": 1,
|
"defaultValue": 1,
|
||||||
"info": <React.Fragment>
|
"info": <React.Fragment>
|
||||||
|
@ -641,14 +633,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti
|
||||||
"name": "tuningConfig.partitionsSpec.assumeGrouped",
|
"name": "tuningConfig.partitionsSpec.assumeGrouped",
|
||||||
"type": "boolean",
|
"type": "boolean",
|
||||||
},
|
},
|
||||||
Object {
|
|
||||||
"defaultValue": 419430400,
|
|
||||||
"info": <p>
|
|
||||||
Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.
|
|
||||||
</p>,
|
|
||||||
"name": "inputSegmentSizeBytes",
|
|
||||||
"type": "number",
|
|
||||||
},
|
|
||||||
Object {
|
Object {
|
||||||
"defaultValue": 1,
|
"defaultValue": 1,
|
||||||
"info": <React.Fragment>
|
"info": <React.Fragment>
|
||||||
|
@ -1011,14 +995,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (range partitio
|
||||||
"name": "tuningConfig.partitionsSpec.assumeGrouped",
|
"name": "tuningConfig.partitionsSpec.assumeGrouped",
|
||||||
"type": "boolean",
|
"type": "boolean",
|
||||||
},
|
},
|
||||||
Object {
|
|
||||||
"defaultValue": 419430400,
|
|
||||||
"info": <p>
|
|
||||||
Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.
|
|
||||||
</p>,
|
|
||||||
"name": "inputSegmentSizeBytes",
|
|
||||||
"type": "number",
|
|
||||||
},
|
|
||||||
Object {
|
Object {
|
||||||
"defaultValue": 1,
|
"defaultValue": 1,
|
||||||
"info": <React.Fragment>
|
"info": <React.Fragment>
|
||||||
|
@ -1381,14 +1357,6 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = `
|
||||||
"name": "tuningConfig.partitionsSpec.assumeGrouped",
|
"name": "tuningConfig.partitionsSpec.assumeGrouped",
|
||||||
"type": "boolean",
|
"type": "boolean",
|
||||||
},
|
},
|
||||||
Object {
|
|
||||||
"defaultValue": 419430400,
|
|
||||||
"info": <p>
|
|
||||||
Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.
|
|
||||||
</p>,
|
|
||||||
"name": "inputSegmentSizeBytes",
|
|
||||||
"type": "number",
|
|
||||||
},
|
|
||||||
Object {
|
Object {
|
||||||
"defaultValue": 1,
|
"defaultValue": 1,
|
||||||
"info": <React.Fragment>
|
"info": <React.Fragment>
|
||||||
|
|
|
@ -230,20 +230,6 @@ export const COMPACTION_CONFIG_FIELDS: Field<CompactionConfig>[] = [
|
||||||
</p>
|
</p>
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
{
|
|
||||||
name: 'inputSegmentSizeBytes',
|
|
||||||
type: 'number',
|
|
||||||
defaultValue: 419430400,
|
|
||||||
info: (
|
|
||||||
<p>
|
|
||||||
Maximum number of total segment bytes processed per compaction task. Since a time chunk must
|
|
||||||
be processed in its entirety, if the segments for a particular time chunk have a total size
|
|
||||||
in bytes greater than this parameter, compaction will not run for that time chunk. Because
|
|
||||||
each compaction task runs with a single thread, setting this value too far above 1–2GB will
|
|
||||||
result in compaction tasks taking an excessive amount of time.
|
|
||||||
</p>
|
|
||||||
),
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: 'tuningConfig.maxNumConcurrentSubTasks',
|
name: 'tuningConfig.maxNumConcurrentSubTasks',
|
||||||
type: 'number',
|
type: 'number',
|
||||||
|
|
Loading…
Reference in New Issue