From 5da99499926395c7c5fe2cc23b56c8e358c04fd9 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Sat, 19 Oct 2024 13:33:33 +0530 Subject: [PATCH] Fail MSQ compaction if multi-valued partition dimensions are found (#17344) MSQ currently supports only single-valued string dimensions as partition keys. This patch adds a check to ensure that partition keys are single-valued in case this info is available by virtue of segment download for schema inference. During compaction, if MSQ finds multi-valued dimensions (MVDs) declared as part of `range` partitionsSpec, it switches partitioning type to dynamic, ending up in repeated compactions of the same interval. To avoid this scenario, the segment download logic is also updated to always download segments if info on multi-valued dimensions is required. --- .../msq/indexing/MSQCompactionRunner.java | 43 ++++++- .../msq/indexing/MSQCompactionRunnerTest.java | 58 +++++++-- .../indexing/common/task/CompactionTask.java | 66 ++++++++-- .../common/task/CompactionTaskTest.java | 117 ++++++++++++++++++ .../indexing/ClientCompactionRunnerInfo.java | 9 +- .../ClientCompactionRunnerInfoTest.java | 29 ++++- 6 files changed, 296 insertions(+), 26 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQCompactionRunner.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQCompactionRunner.java index e20188d5829..d05ab12ea3f 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQCompactionRunner.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQCompactionRunner.java @@ -29,6 +29,7 @@ import com.google.common.collect.Iterables; import com.google.inject.Injector; import org.apache.druid.client.indexing.ClientCompactionRunnerInfo; import org.apache.druid.data.input.impl.DimensionSchema; +import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.indexer.TaskStatus; import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec; import org.apache.druid.indexer.partitions.PartitionsSpec; @@ -84,6 +85,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; @@ -130,7 +132,7 @@ public class MSQCompactionRunner implements CompactionRunner * The following configs aren't supported: *