Increase default DatasourceCompactionConfig.inputSegmentSizeBytes to Long.MAX_VALUE (#12381)

The current default value of inputSegmentSizeBytes is 400MB, which is pretty
low for most compaction use cases. Thus most users are forced to override the
default.

The default value is now increased to Long.MAX_VALUE.
This commit is contained in:
Tejaswini Bandlamudi 2022-04-04 16:28:53 +05:30 committed by GitHub
parent c5531be553
commit 984904779b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 3 additions and 51 deletions

View File

@ -962,7 +962,7 @@ A description of the compaction config is:
|--------|-----------|--------| |--------|-----------|--------|
|`dataSource`|dataSource name to be compacted.|yes| |`dataSource`|dataSource name to be compacted.|yes|
|`taskPriority`|[Priority](../ingestion/tasks.md#priority) of compaction task.|no (default = 25)| |`taskPriority`|[Priority](../ingestion/tasks.md#priority) of compaction task.|no (default = 25)|
|`inputSegmentSizeBytes`|Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 12GB will result in compaction tasks taking an excessive amount of time.|no (default = 419430400)| |`inputSegmentSizeBytes`|Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 12GB will result in compaction tasks taking an excessive amount of time.|no (default = Long.MAX_VALUE)|
|`maxRowsPerSegment`|Max number of rows per segment after compaction.|no| |`maxRowsPerSegment`|Max number of rows per segment after compaction.|no|
|`skipOffsetFromLatest`|The offset for searching segments to be compacted in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) duration format. Strongly recommended to set for realtime dataSources. See [Data handling with compaction](../ingestion/compaction.md#data-handling-with-compaction)|no (default = "P1D")| |`skipOffsetFromLatest`|The offset for searching segments to be compacted in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) duration format. Strongly recommended to set for realtime dataSources. See [Data handling with compaction](../ingestion/compaction.md#data-handling-with-compaction)|no (default = "P1D")|
|`tuningConfig`|Tuning config for compaction tasks. See below [Compaction Task TuningConfig](#automatic-compaction-tuningconfig).|no| |`tuningConfig`|Tuning config for compaction tasks. See below [Compaction Task TuningConfig](#automatic-compaction-tuningconfig).|no|

View File

@ -13,4 +13,4 @@
-- See the License for the specific language governing permissions and -- See the License for the specific language governing permissions and
-- limitations under the License. -- limitations under the License.
INSERT INTO druid_config (name, payload) VALUES ('coordinator.compaction.config', '{"compactionConfigs":[{"dataSource":"upgradeTest","taskPriority":25,"inputSegmentSizeBytes":419430400,"maxRowsPerSegment":null,"skipOffsetFromLatest":"P1D","tuningConfig":{"maxRowsInMemory":null,"maxBytesInMemory":null,"maxTotalRows":null,"splitHintSpec":null,"partitionsSpec":{"type":"hashed","numShards":null,"partitionDimensions":[],"partitionFunction":"murmur3_32_abs","maxRowsPerSegment":5000000},"indexSpec":null,"indexSpecForIntermediatePersists":null,"maxPendingPersists":null,"pushTimeout":null,"segmentWriteOutMediumFactory":null,"maxNumConcurrentSubTasks":null,"maxRetry":null,"taskStatusCheckPeriodMs":null,"chatHandlerTimeout":null,"chatHandlerNumRetries":null,"maxNumSegmentsToMerge":null,"totalNumMergeTasks":null,"forceGuaranteedRollup":true,"type":"index_parallel"},"taskContext":null}],"compactionTaskSlotRatio":0.1,"maxCompactionTaskSlots":2147483647}'); INSERT INTO druid_config (name, payload) VALUES ('coordinator.compaction.config', '{"compactionConfigs":[{"dataSource":"upgradeTest","taskPriority":25,"inputSegmentSizeBytes":9223372036854775807,"maxRowsPerSegment":null,"skipOffsetFromLatest":"P1D","tuningConfig":{"maxRowsInMemory":null,"maxBytesInMemory":null,"maxTotalRows":null,"splitHintSpec":null,"partitionsSpec":{"type":"hashed","numShards":null,"partitionDimensions":[],"partitionFunction":"murmur3_32_abs","maxRowsPerSegment":5000000},"indexSpec":null,"indexSpecForIntermediatePersists":null,"maxPendingPersists":null,"pushTimeout":null,"segmentWriteOutMediumFactory":null,"maxNumConcurrentSubTasks":null,"maxRetry":null,"taskStatusCheckPeriodMs":null,"chatHandlerTimeout":null,"chatHandlerNumRetries":null,"maxNumSegmentsToMerge":null,"totalNumMergeTasks":null,"forceGuaranteedRollup":true,"type":"index_parallel"},"taskContext":null}],"compactionTaskSlotRatio":0.1,"maxCompactionTaskSlots":2147483647}');

View File

@ -34,7 +34,7 @@ public class DataSourceCompactionConfig
{ {
/** Must be synced with Tasks.DEFAULT_MERGE_TASK_PRIORITY */ /** Must be synced with Tasks.DEFAULT_MERGE_TASK_PRIORITY */
public static final int DEFAULT_COMPACTION_TASK_PRIORITY = 25; public static final int DEFAULT_COMPACTION_TASK_PRIORITY = 25;
private static final long DEFAULT_INPUT_SEGMENT_SIZE_BYTES = 400 * 1024 * 1024; private static final long DEFAULT_INPUT_SEGMENT_SIZE_BYTES = Long.MAX_VALUE;
private static final Period DEFAULT_SKIP_OFFSET_FROM_LATEST = new Period("P1D"); private static final Period DEFAULT_SKIP_OFFSET_FROM_LATEST = new Period("P1D");
private final String dataSource; private final String dataSource;

View File

@ -75,7 +75,6 @@ describe('AutoForm', () => {
{ {
dataSource: 'ds', dataSource: 'ds',
taskPriority: 25, taskPriority: 25,
inputSegmentSizeBytes: 419430400,
maxRowsPerSegment: null, maxRowsPerSegment: null,
skipOffsetFromLatest: 'P4D', skipOffsetFromLatest: 'P4D',
tuningConfig: { tuningConfig: {
@ -121,7 +120,6 @@ describe('AutoForm', () => {
{ {
dataSource: 'ds', dataSource: 'ds',
taskPriority: 25, taskPriority: 25,
inputSegmentSizeBytes: 419430400,
skipOffsetFromLatest: 'P4D', skipOffsetFromLatest: 'P4D',
tuningConfig: { tuningConfig: {
partitionsSpec: { partitionsSpec: {

View File

@ -271,14 +271,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (dynamic partit
"name": "tuningConfig.partitionsSpec.assumeGrouped", "name": "tuningConfig.partitionsSpec.assumeGrouped",
"type": "boolean", "type": "boolean",
}, },
Object {
"defaultValue": 419430400,
"info": <p>
Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 12GB will result in compaction tasks taking an excessive amount of time.
</p>,
"name": "inputSegmentSizeBytes",
"type": "number",
},
Object { Object {
"defaultValue": 1, "defaultValue": 1,
"info": <React.Fragment> "info": <React.Fragment>
@ -641,14 +633,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti
"name": "tuningConfig.partitionsSpec.assumeGrouped", "name": "tuningConfig.partitionsSpec.assumeGrouped",
"type": "boolean", "type": "boolean",
}, },
Object {
"defaultValue": 419430400,
"info": <p>
Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 12GB will result in compaction tasks taking an excessive amount of time.
</p>,
"name": "inputSegmentSizeBytes",
"type": "number",
},
Object { Object {
"defaultValue": 1, "defaultValue": 1,
"info": <React.Fragment> "info": <React.Fragment>
@ -1011,14 +995,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (range partitio
"name": "tuningConfig.partitionsSpec.assumeGrouped", "name": "tuningConfig.partitionsSpec.assumeGrouped",
"type": "boolean", "type": "boolean",
}, },
Object {
"defaultValue": 419430400,
"info": <p>
Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 12GB will result in compaction tasks taking an excessive amount of time.
</p>,
"name": "inputSegmentSizeBytes",
"type": "number",
},
Object { Object {
"defaultValue": 1, "defaultValue": 1,
"info": <React.Fragment> "info": <React.Fragment>
@ -1381,14 +1357,6 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = `
"name": "tuningConfig.partitionsSpec.assumeGrouped", "name": "tuningConfig.partitionsSpec.assumeGrouped",
"type": "boolean", "type": "boolean",
}, },
Object {
"defaultValue": 419430400,
"info": <p>
Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 12GB will result in compaction tasks taking an excessive amount of time.
</p>,
"name": "inputSegmentSizeBytes",
"type": "number",
},
Object { Object {
"defaultValue": 1, "defaultValue": 1,
"info": <React.Fragment> "info": <React.Fragment>

View File

@ -230,20 +230,6 @@ export const COMPACTION_CONFIG_FIELDS: Field<CompactionConfig>[] = [
</p> </p>
), ),
}, },
{
name: 'inputSegmentSizeBytes',
type: 'number',
defaultValue: 419430400,
info: (
<p>
Maximum number of total segment bytes processed per compaction task. Since a time chunk must
be processed in its entirety, if the segments for a particular time chunk have a total size
in bytes greater than this parameter, compaction will not run for that time chunk. Because
each compaction task runs with a single thread, setting this value too far above 12GB will
result in compaction tasks taking an excessive amount of time.
</p>
),
},
{ {
name: 'tuningConfig.maxNumConcurrentSubTasks', name: 'tuningConfig.maxNumConcurrentSubTasks',
type: 'number', type: 'number',