Compute range partitionsSpec using effective maxRowsPerSegment (#16987)

In the compaction config, a range type partitionsSpec supports setting one of maxRowsPerSegment and targetRowsPerSegment. When compaction is run with the native engine, while maxRowsPerSegment = x results in segments of size x, targetRowsPerSegment = y results in segments of size 1.5 * y.

MSQ only supports rowsPerSegment = x as part of its tuning config, the resulting segment size being approx. x -- which is in line with maxRowsPerSegment behaviour in native compaction.

This PR makes the following changes:

use effective maxRowsPerSegment to pass as rowsPerSegment parameter for MSQ
persist rowsPerSegment as maxRowsPerSegment in lastCompactionState for MSQ
Use effective maxRowsPerSegment-based range spec in CompactionStatus check for both Native and MSQ.
This commit is contained in:
Vishesh Garg 2024-09-09 10:53:58 +05:30 committed by GitHub
parent b7a21a9f67
commit 37d4174245
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 60 additions and 24 deletions

View File

@ -1603,9 +1603,10 @@ public class ControllerImpl implements Controller
if (shardSpec != null) { if (shardSpec != null) {
if (Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE)) { if (Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE)) {
List<String> partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions(); List<String> partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions();
// Effective maxRowsPerSegment is propagated as rowsPerSegment in MSQ
partitionSpec = new DimensionRangePartitionsSpec( partitionSpec = new DimensionRangePartitionsSpec(
tuningConfig.getRowsPerSegment(),
null, null,
tuningConfig.getRowsPerSegment(),
partitionDimensions, partitionDimensions,
false false
); );
@ -1623,9 +1624,10 @@ public class ControllerImpl implements Controller
))); )));
} }
} else if (clusterBy != null && !clusterBy.getColumns().isEmpty()) { } else if (clusterBy != null && !clusterBy.getColumns().isEmpty()) {
// Effective maxRowsPerSegment is propagated as rowsPerSegment in MSQ
partitionSpec = new DimensionRangePartitionsSpec( partitionSpec = new DimensionRangePartitionsSpec(
tuningConfig.getRowsPerSegment(),
null, null,
tuningConfig.getRowsPerSegment(),
clusterBy.getColumns() clusterBy.getColumns()
.stream() .stream()
.map(KeyColumn::columnName).collect(Collectors.toList()), .map(KeyColumn::columnName).collect(Collectors.toList()),

View File

@ -30,7 +30,6 @@ import org.apache.druid.client.indexing.ClientCompactionRunnerInfo;
import org.apache.druid.data.input.impl.DimensionSchema; import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.indexer.TaskStatus; import org.apache.druid.indexer.TaskStatus;
import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec; import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec;
import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
import org.apache.druid.indexer.partitions.PartitionsSpec; import org.apache.druid.indexer.partitions.PartitionsSpec;
import org.apache.druid.indexer.partitions.SecondaryPartitionType; import org.apache.druid.indexer.partitions.SecondaryPartitionType;
import org.apache.druid.indexing.common.TaskToolbox; import org.apache.druid.indexing.common.TaskToolbox;
@ -286,19 +285,10 @@ public class MSQCompactionRunner implements CompactionRunner
private static Integer getRowsPerSegment(CompactionTask compactionTask) private static Integer getRowsPerSegment(CompactionTask compactionTask)
{ {
Integer rowsPerSegment = PartitionsSpec.DEFAULT_MAX_ROWS_PER_SEGMENT; if (compactionTask.getTuningConfig() != null && compactionTask.getTuningConfig().getPartitionsSpec() != null) {
if (compactionTask.getTuningConfig() != null) { return compactionTask.getTuningConfig().getPartitionsSpec().getMaxRowsPerSegment();
PartitionsSpec partitionsSpec = compactionTask.getTuningConfig().getPartitionsSpec();
if (partitionsSpec instanceof DynamicPartitionsSpec) {
rowsPerSegment = partitionsSpec.getMaxRowsPerSegment();
} else if (partitionsSpec instanceof DimensionRangePartitionsSpec) {
DimensionRangePartitionsSpec dimensionRangePartitionsSpec = (DimensionRangePartitionsSpec) partitionsSpec;
rowsPerSegment = dimensionRangePartitionsSpec.getTargetRowsPerSegment() != null
? dimensionRangePartitionsSpec.getTargetRowsPerSegment()
: dimensionRangePartitionsSpec.getMaxRowsPerSegment();
}
} }
return rowsPerSegment; return PartitionsSpec.DEFAULT_MAX_ROWS_PER_SEGMENT;
} }
private static RowSignature getRowSignature(DataSchema dataSchema) private static RowSignature getRowSignature(DataSchema dataSchema)

View File

@ -2643,8 +2643,8 @@ public class MSQReplaceTest extends MSQTestBase
); );
} else { } else {
partitionsSpec = new DimensionRangePartitionsSpec( partitionsSpec = new DimensionRangePartitionsSpec(
MultiStageQueryContext.getRowsPerSegment(QueryContext.of(context)),
null, null,
MultiStageQueryContext.getRowsPerSegment(QueryContext.of(context)),
partitionDimensions, partitionDimensions,
false false
); );

View File

@ -93,6 +93,7 @@ public class MSQCompactionRunnerTest
private static final String TIMESTAMP_COLUMN = ColumnHolder.TIME_COLUMN_NAME; private static final String TIMESTAMP_COLUMN = ColumnHolder.TIME_COLUMN_NAME;
private static final int TARGET_ROWS_PER_SEGMENT = 100000; private static final int TARGET_ROWS_PER_SEGMENT = 100000;
private static final int MAX_ROWS_PER_SEGMENT = 150000;
private static final GranularityType SEGMENT_GRANULARITY = GranularityType.HOUR; private static final GranularityType SEGMENT_GRANULARITY = GranularityType.HOUR;
private static final GranularityType QUERY_GRANULARITY = GranularityType.HOUR; private static final GranularityType QUERY_GRANULARITY = GranularityType.HOUR;
private static List<String> PARTITION_DIMENSIONS; private static List<String> PARTITION_DIMENSIONS;
@ -286,7 +287,7 @@ public class MSQCompactionRunnerTest
new MSQTuningConfig( new MSQTuningConfig(
1, 1,
MultiStageQueryContext.DEFAULT_ROWS_IN_MEMORY, MultiStageQueryContext.DEFAULT_ROWS_IN_MEMORY,
TARGET_ROWS_PER_SEGMENT, MAX_ROWS_PER_SEGMENT,
null, null,
createIndexSpec() createIndexSpec()
), ),
@ -326,7 +327,7 @@ public class MSQCompactionRunnerTest
DimFilter dimFilter = new SelectorDimFilter("dim1", "foo", null); DimFilter dimFilter = new SelectorDimFilter("dim1", "foo", null);
CompactionTask taskCreatedWithTransformSpec = createCompactionTask( CompactionTask taskCreatedWithTransformSpec = createCompactionTask(
new DimensionRangePartitionsSpec(TARGET_ROWS_PER_SEGMENT, null, PARTITION_DIMENSIONS, false), new DimensionRangePartitionsSpec(null, MAX_ROWS_PER_SEGMENT, PARTITION_DIMENSIONS, false),
dimFilter, dimFilter,
Collections.emptyMap(), Collections.emptyMap(),
null, null,
@ -364,7 +365,7 @@ public class MSQCompactionRunnerTest
new MSQTuningConfig( new MSQTuningConfig(
1, 1,
MultiStageQueryContext.DEFAULT_ROWS_IN_MEMORY, MultiStageQueryContext.DEFAULT_ROWS_IN_MEMORY,
TARGET_ROWS_PER_SEGMENT, MAX_ROWS_PER_SEGMENT,
null, null,
createIndexSpec() createIndexSpec()
), ),

View File

@ -692,16 +692,26 @@ public class ITAutoCompactionTest extends AbstractIndexerTest
LOG.info("Auto compaction test with range partitioning"); LOG.info("Auto compaction test with range partitioning");
final DimensionRangePartitionsSpec rangePartitionsSpec = new DimensionRangePartitionsSpec( final DimensionRangePartitionsSpec inputRangePartitionsSpec = new DimensionRangePartitionsSpec(
5, 5,
null, null,
ImmutableList.of("city"), ImmutableList.of("city"),
false false
); );
submitCompactionConfig(rangePartitionsSpec, NO_SKIP_OFFSET, 1, null, null, null, null, false, engine); DimensionRangePartitionsSpec expectedRangePartitionsSpec = inputRangePartitionsSpec;
if (engine == CompactionEngine.MSQ) {
// Range spec is transformed to its effective maxRowsPerSegment equivalent in MSQ
expectedRangePartitionsSpec = new DimensionRangePartitionsSpec(
null,
7,
ImmutableList.of("city"),
false
);
}
submitCompactionConfig(inputRangePartitionsSpec, NO_SKIP_OFFSET, 1, null, null, null, null, false, engine);
forceTriggerAutoCompaction(2); forceTriggerAutoCompaction(2);
verifyQuery(INDEX_QUERIES_RESOURCE); verifyQuery(INDEX_QUERIES_RESOURCE);
verifySegmentsCompacted(rangePartitionsSpec, 2); verifySegmentsCompacted(expectedRangePartitionsSpec, 2);
checkCompactionIntervals(intervalsBeforeCompaction); checkCompactionIntervals(intervalsBeforeCompaction);
} }
} }

View File

@ -223,11 +223,27 @@ public class CompactionStatus
partitionsSpecFromTuningConfig.getMaxRowsPerSegment(), partitionsSpecFromTuningConfig.getMaxRowsPerSegment(),
((DynamicPartitionsSpec) partitionsSpecFromTuningConfig).getMaxTotalRowsOr(Long.MAX_VALUE) ((DynamicPartitionsSpec) partitionsSpecFromTuningConfig).getMaxTotalRowsOr(Long.MAX_VALUE)
); );
} else if (partitionsSpecFromTuningConfig instanceof DimensionRangePartitionsSpec) {
return getEffectiveRangePartitionsSpec((DimensionRangePartitionsSpec) partitionsSpecFromTuningConfig);
} else { } else {
return partitionsSpecFromTuningConfig; return partitionsSpecFromTuningConfig;
} }
} }
/**
* Converts to have only the effective maxRowsPerSegment to avoid false positives when targetRowsPerSegment is set but
* effectively translates to the same maxRowsPerSegment.
*/
static DimensionRangePartitionsSpec getEffectiveRangePartitionsSpec(DimensionRangePartitionsSpec partitionsSpec)
{
return new DimensionRangePartitionsSpec(
null,
partitionsSpec.getMaxRowsPerSegment(),
partitionsSpec.getPartitionDimensions(),
partitionsSpec.isAssumeGrouped()
);
}
/** /**
* Evaluates {@link #CHECKS} to determine the compaction status. * Evaluates {@link #CHECKS} to determine the compaction status.
*/ */
@ -286,10 +302,14 @@ public class CompactionStatus
private CompactionStatus partitionsSpecIsUpToDate() private CompactionStatus partitionsSpecIsUpToDate()
{ {
PartitionsSpec existingPartionsSpec = lastCompactionState.getPartitionsSpec();
if (existingPartionsSpec instanceof DimensionRangePartitionsSpec) {
existingPartionsSpec = getEffectiveRangePartitionsSpec((DimensionRangePartitionsSpec) existingPartionsSpec);
}
return CompactionStatus.completeIfEqual( return CompactionStatus.completeIfEqual(
"partitionsSpec", "partitionsSpec",
findPartitionsSpecFromConfig(tuningConfig), findPartitionsSpecFromConfig(tuningConfig),
lastCompactionState.getPartitionsSpec(), existingPartionsSpec,
CompactionStatus::asString CompactionStatus::asString
); );
} }

View File

@ -162,7 +162,7 @@ public class CompactionStatusTest
} }
@Test @Test
public void testFindPartitionsSpecWhenGivenIsRange() public void testFindPartitionsSpecWhenGivenIsRangeWithMaxRows()
{ {
final PartitionsSpec partitionsSpec = final PartitionsSpec partitionsSpec =
new DimensionRangePartitionsSpec(null, 10000, Collections.singletonList("dim"), false); new DimensionRangePartitionsSpec(null, 10000, Collections.singletonList("dim"), false);
@ -174,6 +174,19 @@ public class CompactionStatusTest
); );
} }
@Test
public void testFindPartitionsSpecWhenGivenIsRangeWithTargetRows()
{
final PartitionsSpec partitionsSpec =
new DimensionRangePartitionsSpec(10000, null, Collections.singletonList("dim"), false);
final ClientCompactionTaskQueryTuningConfig tuningConfig
= ClientCompactionTaskQueryTuningConfig.from(createCompactionConfig(partitionsSpec));
Assert.assertEquals(
new DimensionRangePartitionsSpec(null, 15000, Collections.singletonList("dim"), false),
CompactionStatus.findPartitionsSpecFromConfig(tuningConfig)
);
}
@Test @Test
public void testStatusWhenLastCompactionStateIsNull() public void testStatusWhenLastCompactionStateIsNull()
{ {