Fix behaviour of downsampling buckets to a single key (#13663)

This commit is contained in:
Adarsh Sanjeev 2023-01-12 21:24:24 +05:30 committed by GitHub
parent 0a486c3bcf
commit cb16a7f6a9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 22 additions and 4 deletions

View File

@ -375,7 +375,7 @@ public class ClusterByStatisticsCollectorImpl implements ClusterByStatisticsColl
* number the same, if downsampling is not possible. (For example: downsampling is not possible if all buckets
* have been downsampled all the way to one key each.)
*/
private void downSample()
void downSample()
{
long newTotalRetainedBytes = totalRetainedBytes;
final long targetTotalRetainedBytes = totalRetainedBytes / 2;
@ -405,7 +405,7 @@ public class ClusterByStatisticsCollectorImpl implements ClusterByStatisticsColl
bucketHolder.keyCollector.downSample();
newTotalRetainedBytes += bucketHolder.updateRetainedBytes();
if (i == sortedHolders.size() - 1 || sortedHolders.get(i + 1).retainedBytes > bucketHolder.retainedBytes) {
if (i == sortedHolders.size() - 1 || sortedHolders.get(i + 1).retainedBytes > bucketHolder.retainedBytes || bucketHolder.keyCollector.estimatedRetainedKeys() <= 1) {
i++;
}
}

View File

@ -66,8 +66,11 @@ public class ClusterByStatisticsCollectorImplTest extends InitializedNullHandlin
{
private static final double PARTITION_SIZE_LEEWAY = 0.3;
private static final RowSignature SIGNATURE =
RowSignature.builder().add("x", ColumnType.LONG).add("y", ColumnType.LONG).build();
private static final RowSignature SIGNATURE = RowSignature.builder()
.add("x", ColumnType.LONG)
.add("y", ColumnType.LONG)
.add("z", ColumnType.STRING)
.build();
private static final ClusterBy CLUSTER_BY_X = new ClusterBy(
ImmutableList.of(new SortColumn("x", false)),
@ -78,6 +81,10 @@ public class ClusterByStatisticsCollectorImplTest extends InitializedNullHandlin
ImmutableList.of(new SortColumn("x", false), new SortColumn("y", false)),
1
);
private static final ClusterBy CLUSTER_BY_XYZ_BUCKET_BY_X = new ClusterBy(
ImmutableList.of(new SortColumn("x", false), new SortColumn("y", false), new SortColumn("z", false)),
1
);
// These numbers are roughly 10x lower than authentic production numbers. (See StageDefinition.)
private static final int MAX_BYTES = 1_000_000;
@ -438,6 +445,17 @@ public class ClusterByStatisticsCollectorImplTest extends InitializedNullHandlin
);
}
@Test
public void testBucketDownsampledToSingleKeyFinishesCorrectly()
{
ClusterByStatisticsCollectorImpl clusterByStatisticsCollector = makeCollector(CLUSTER_BY_XYZ_BUCKET_BY_X, false);
clusterByStatisticsCollector.add(createKey(CLUSTER_BY_XYZ_BUCKET_BY_X, 1, 1, "Extremely long key string for unit test; Extremely long key string for unit test;"), 2);
clusterByStatisticsCollector.add(createKey(CLUSTER_BY_XYZ_BUCKET_BY_X, 2, 1, "b"), 2);
clusterByStatisticsCollector.downSample();
}
@Test(expected = IllegalArgumentException.class)
public void testMoreBucketsThanKeysThrowsException()
{