mirror of https://github.com/apache/druid.git
Resolve a bug where datasketches would not downsample sketches sufficiently (#16119)
* Fix sketch memory issue * Rename function * Add unit test * Revert downsampling change
This commit is contained in:
parent
b8dd7478d0
commit
18a4722d11
|
@ -212,6 +212,12 @@ public class ClusterByStatisticsCollectorImpl implements ClusterByStatisticsColl
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
long getTotalRetainedBytes()
|
||||||
|
{
|
||||||
|
return totalRetainedBytes;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean hasMultipleValues(final int keyPosition)
|
public boolean hasMultipleValues(final int keyPosition)
|
||||||
{
|
{
|
||||||
|
@ -414,7 +420,7 @@ public class ClusterByStatisticsCollectorImpl implements ClusterByStatisticsColl
|
||||||
void downSample()
|
void downSample()
|
||||||
{
|
{
|
||||||
long newTotalRetainedBytes = totalRetainedBytes;
|
long newTotalRetainedBytes = totalRetainedBytes;
|
||||||
final long targetTotalRetainedBytes = totalRetainedBytes / 2;
|
final long targetTotalRetainedBytes = Math.min(totalRetainedBytes / 2, maxRetainedBytes);
|
||||||
|
|
||||||
final List<Pair<Long, BucketHolder>> sortedHolders = new ArrayList<>(buckets.size());
|
final List<Pair<Long, BucketHolder>> sortedHolders = new ArrayList<>(buckets.size());
|
||||||
final RowKeyReader trimmedRowReader = keyReader.trimmedKeyReader(clusterBy.getBucketByCount());
|
final RowKeyReader trimmedRowReader = keyReader.trimmedKeyReader(clusterBy.getBucketByCount());
|
||||||
|
|
|
@ -451,6 +451,26 @@ public class ClusterByStatisticsCollectorImplTest extends InitializedNullHandlin
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testShouldDownsampleSingleBucket()
|
||||||
|
{
|
||||||
|
ClusterByStatisticsCollectorImpl clusterByStatisticsCollector =
|
||||||
|
(ClusterByStatisticsCollectorImpl) ClusterByStatisticsCollectorImpl.create(
|
||||||
|
CLUSTER_BY_XYZ_BUCKET_BY_X,
|
||||||
|
SIGNATURE,
|
||||||
|
35000,
|
||||||
|
500,
|
||||||
|
false,
|
||||||
|
false
|
||||||
|
);
|
||||||
|
|
||||||
|
clusterByStatisticsCollector.add(createKey(CLUSTER_BY_XYZ_BUCKET_BY_X, 2, 1, "value1"), 1);
|
||||||
|
clusterByStatisticsCollector.add(createKey(CLUSTER_BY_XYZ_BUCKET_BY_X, 2, 3, "value2"), 1);
|
||||||
|
clusterByStatisticsCollector.add(createKey(CLUSTER_BY_XYZ_BUCKET_BY_X, 1, 1, "Extremely long key string for unit test; Extremely long key string for unit test;"), 500);
|
||||||
|
|
||||||
|
Assert.assertTrue(clusterByStatisticsCollector.getTotalRetainedBytes() <= 35000);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBucketDownsampledToSingleKeyFinishesCorrectly()
|
public void testBucketDownsampledToSingleKeyFinishesCorrectly()
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue