From 984904779bea348905d96c95e1c3f05f44883af8 Mon Sep 17 00:00:00 2001 From: Tejaswini Bandlamudi <96047043+tejaswini-imply@users.noreply.github.com> Date: Mon, 4 Apr 2022 16:28:53 +0530 Subject: [PATCH] Increase default DatasourceCompactionConfig.inputSegmentSizeBytes to Long.MAX_VALUE (#12381) The current default value of inputSegmentSizeBytes is 400MB, which is pretty low for most compaction use cases. Thus most users are forced to override the default. The default value is now increased to Long.MAX_VALUE. --- docs/configuration/index.md | 2 +- .../docker/test-data/upgrade-sample-data.sql | 2 +- .../DataSourceCompactionConfig.java | 2 +- .../components/auto-form/auto-form.spec.tsx | 2 -- .../compaction-dialog.spec.tsx.snap | 32 ------------------- .../src/druid-models/compaction-config.tsx | 14 -------- 6 files changed, 3 insertions(+), 51 deletions(-) diff --git a/docs/configuration/index.md b/docs/configuration/index.md index 6383778ea58..68451611767 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -962,7 +962,7 @@ A description of the compaction config is: |--------|-----------|--------| |`dataSource`|dataSource name to be compacted.|yes| |`taskPriority`|[Priority](../ingestion/tasks.md#priority) of compaction task.|no (default = 25)| -|`inputSegmentSizeBytes`|Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.|no (default = 419430400)| +|`inputSegmentSizeBytes`|Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.|no (default = Long.MAX_VALUE)| |`maxRowsPerSegment`|Max number of rows per segment after compaction.|no| |`skipOffsetFromLatest`|The offset for searching segments to be compacted in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) duration format. Strongly recommended to set for realtime dataSources. See [Data handling with compaction](../ingestion/compaction.md#data-handling-with-compaction)|no (default = "P1D")| |`tuningConfig`|Tuning config for compaction tasks. See below [Compaction Task TuningConfig](#automatic-compaction-tuningconfig).|no| diff --git a/integration-tests/docker/test-data/upgrade-sample-data.sql b/integration-tests/docker/test-data/upgrade-sample-data.sql index a58fdab38ae..3ecad88da0b 100644 --- a/integration-tests/docker/test-data/upgrade-sample-data.sql +++ b/integration-tests/docker/test-data/upgrade-sample-data.sql @@ -13,4 +13,4 @@ -- See the License for the specific language governing permissions and -- limitations under the License. -INSERT INTO druid_config (name, payload) VALUES ('coordinator.compaction.config', '{"compactionConfigs":[{"dataSource":"upgradeTest","taskPriority":25,"inputSegmentSizeBytes":419430400,"maxRowsPerSegment":null,"skipOffsetFromLatest":"P1D","tuningConfig":{"maxRowsInMemory":null,"maxBytesInMemory":null,"maxTotalRows":null,"splitHintSpec":null,"partitionsSpec":{"type":"hashed","numShards":null,"partitionDimensions":[],"partitionFunction":"murmur3_32_abs","maxRowsPerSegment":5000000},"indexSpec":null,"indexSpecForIntermediatePersists":null,"maxPendingPersists":null,"pushTimeout":null,"segmentWriteOutMediumFactory":null,"maxNumConcurrentSubTasks":null,"maxRetry":null,"taskStatusCheckPeriodMs":null,"chatHandlerTimeout":null,"chatHandlerNumRetries":null,"maxNumSegmentsToMerge":null,"totalNumMergeTasks":null,"forceGuaranteedRollup":true,"type":"index_parallel"},"taskContext":null}],"compactionTaskSlotRatio":0.1,"maxCompactionTaskSlots":2147483647}'); +INSERT INTO druid_config (name, payload) VALUES ('coordinator.compaction.config', '{"compactionConfigs":[{"dataSource":"upgradeTest","taskPriority":25,"inputSegmentSizeBytes":9223372036854775807,"maxRowsPerSegment":null,"skipOffsetFromLatest":"P1D","tuningConfig":{"maxRowsInMemory":null,"maxBytesInMemory":null,"maxTotalRows":null,"splitHintSpec":null,"partitionsSpec":{"type":"hashed","numShards":null,"partitionDimensions":[],"partitionFunction":"murmur3_32_abs","maxRowsPerSegment":5000000},"indexSpec":null,"indexSpecForIntermediatePersists":null,"maxPendingPersists":null,"pushTimeout":null,"segmentWriteOutMediumFactory":null,"maxNumConcurrentSubTasks":null,"maxRetry":null,"taskStatusCheckPeriodMs":null,"chatHandlerTimeout":null,"chatHandlerNumRetries":null,"maxNumSegmentsToMerge":null,"totalNumMergeTasks":null,"forceGuaranteedRollup":true,"type":"index_parallel"},"taskContext":null}],"compactionTaskSlotRatio":0.1,"maxCompactionTaskSlots":2147483647}'); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DataSourceCompactionConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/DataSourceCompactionConfig.java index b4b2245ee9a..b786554feb3 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DataSourceCompactionConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DataSourceCompactionConfig.java @@ -34,7 +34,7 @@ public class DataSourceCompactionConfig { /** Must be synced with Tasks.DEFAULT_MERGE_TASK_PRIORITY */ public static final int DEFAULT_COMPACTION_TASK_PRIORITY = 25; - private static final long DEFAULT_INPUT_SEGMENT_SIZE_BYTES = 400 * 1024 * 1024; + private static final long DEFAULT_INPUT_SEGMENT_SIZE_BYTES = Long.MAX_VALUE; private static final Period DEFAULT_SKIP_OFFSET_FROM_LATEST = new Period("P1D"); private final String dataSource; diff --git a/web-console/src/components/auto-form/auto-form.spec.tsx b/web-console/src/components/auto-form/auto-form.spec.tsx index cc928ea3ea6..f345d514bbd 100644 --- a/web-console/src/components/auto-form/auto-form.spec.tsx +++ b/web-console/src/components/auto-form/auto-form.spec.tsx @@ -75,7 +75,6 @@ describe('AutoForm', () => { { dataSource: 'ds', taskPriority: 25, - inputSegmentSizeBytes: 419430400, maxRowsPerSegment: null, skipOffsetFromLatest: 'P4D', tuningConfig: { @@ -121,7 +120,6 @@ describe('AutoForm', () => { { dataSource: 'ds', taskPriority: 25, - inputSegmentSizeBytes: 419430400, skipOffsetFromLatest: 'P4D', tuningConfig: { partitionsSpec: { diff --git a/web-console/src/dialogs/compaction-dialog/__snapshots__/compaction-dialog.spec.tsx.snap b/web-console/src/dialogs/compaction-dialog/__snapshots__/compaction-dialog.spec.tsx.snap index edcef7992d5..898acd1b3d6 100644 --- a/web-console/src/dialogs/compaction-dialog/__snapshots__/compaction-dialog.spec.tsx.snap +++ b/web-console/src/dialogs/compaction-dialog/__snapshots__/compaction-dialog.spec.tsx.snap @@ -271,14 +271,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (dynamic partit "name": "tuningConfig.partitionsSpec.assumeGrouped", "type": "boolean", }, - Object { - "defaultValue": 419430400, - "info":
- Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. -
, - "name": "inputSegmentSizeBytes", - "type": "number", - }, Object { "defaultValue": 1, "info":- Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. -
, - "name": "inputSegmentSizeBytes", - "type": "number", - }, Object { "defaultValue": 1, "info":- Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. -
, - "name": "inputSegmentSizeBytes", - "type": "number", - }, Object { "defaultValue": 1, "info":- Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. -
, - "name": "inputSegmentSizeBytes", - "type": "number", - }, Object { "defaultValue": 1, "info":- Maximum number of total segment bytes processed per compaction task. Since a time chunk must - be processed in its entirety, if the segments for a particular time chunk have a total size - in bytes greater than this parameter, compaction will not run for that time chunk. Because - each compaction task runs with a single thread, setting this value too far above 1–2GB will - result in compaction tasks taking an excessive amount of time. -
- ), - }, { name: 'tuningConfig.maxNumConcurrentSubTasks', type: 'number',