Fix bug with while adding DistinctSketches (#17184)

* Fix bug with checking the incorrect key

* Add tests
This commit is contained in:
Adarsh Sanjeev 2024-09-30 11:35:32 +05:30 committed by GitHub
parent ab361747a8
commit 95fb813b6d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 19 additions and 2 deletions

View File

@ -120,8 +120,9 @@ public class DistinctKeyCollector implements KeyCollector<DistinctKeyCollector>
if (isNewMin || isKeySelected(key)) {
if (isNewMin && !retainedKeys.isEmpty() && !isKeySelected(retainedKeys.firstKey())) {
// Old min should be kicked out.
totalWeightUnadjusted -= retainedKeys.removeLong(retainedKeys.firstKey());
retainedBytes -= retainedKeys.firstKey().estimatedObjectSizeBytes();
RowKey rowKey = retainedKeys.firstKey();
totalWeightUnadjusted -= retainedKeys.removeLong(rowKey);
retainedBytes -= rowKey.estimatedObjectSizeBytes();
}
if (retainedKeys.putIfAbsent(key, weight) == MISSING_KEY_WEIGHT) {

View File

@ -20,6 +20,7 @@
package org.apache.druid.msq.statistics;
import com.google.common.collect.ImmutableList;
import it.unimi.dsi.fastutil.objects.Object2LongRBTreeMap;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.frame.key.ClusterBy;
import org.apache.druid.frame.key.ClusterByPartition;
@ -90,6 +91,21 @@ public class DistinctKeyCollectorTest
);
}
@Test
public void test_single_key_addition()
{
DistinctKeyCollector distinctKeyCollector = new DistinctKeyCollector(
clusterBy.keyComparator(signature),
new Object2LongRBTreeMap<>(comparator),
2
);
List<Pair<RowKey, Integer>> pairs = KeyCollectorTestUtils.sequentialKeys(2);
distinctKeyCollector.add(pairs.get(1).lhs, pairs.get(1).rhs);
distinctKeyCollector.downSample();
distinctKeyCollector.add(pairs.get(0).lhs, pairs.get(0).rhs);
}
@Test
public void test_uniformRandomKeys_unweighted()
{