From 4f0145fb8506257f6531cce04b5dee8354a1a1a1 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 28 Oct 2022 08:31:52 -0700 Subject: [PATCH] MSQ: Use long instead of double for estimatedRetainedBytes. (#13272) Fixes a problem where, due to the inexactness of floating-point math, we would potentially drift while tracking retained byte counts and run into assertion failures in assertRetainedByteCountsAreTrackedCorrectly. --- .../msq/statistics/ClusterByStatisticsCollectorImpl.java | 9 +++++---- .../druid/msq/statistics/DelegateOrMinKeyCollector.java | 2 +- .../druid/msq/statistics/DistinctKeyCollector.java | 2 +- .../org/apache/druid/msq/statistics/KeyCollector.java | 2 +- .../msq/statistics/QuantilesSketchKeyCollector.java | 4 ++-- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/ClusterByStatisticsCollectorImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/ClusterByStatisticsCollectorImpl.java index 9e033c87498..13ab5dc01ce 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/ClusterByStatisticsCollectorImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/ClusterByStatisticsCollectorImpl.java @@ -58,7 +58,7 @@ public class ClusterByStatisticsCollectorImpl implements ClusterByStatisticsColl private final int maxRetainedBytes; private final int maxBuckets; - private double totalRetainedBytes; + private long totalRetainedBytes; private ClusterByStatisticsCollectorImpl( final ClusterBy clusterBy, @@ -369,8 +369,8 @@ public class ClusterByStatisticsCollectorImpl implements ClusterByStatisticsColl */ private void downSample() { - double newTotalRetainedBytes = totalRetainedBytes; - final double targetTotalRetainedBytes = totalRetainedBytes / 2; + long newTotalRetainedBytes = totalRetainedBytes; + final long targetTotalRetainedBytes = totalRetainedBytes / 2; final List sortedHolders = new ArrayList<>(buckets.size()); @@ -384,7 +384,8 @@ public class ClusterByStatisticsCollectorImpl implements ClusterByStatisticsColl // Downsample least-dense buckets first. (They're less likely to need high resolution.) sortedHolders.sort( Comparator.comparing((BucketHolder holder) -> - (double) holder.keyCollector.estimatedTotalWeight() / holder.keyCollector.estimatedRetainedKeys()) + (double) holder.keyCollector.estimatedTotalWeight() + / holder.keyCollector.estimatedRetainedKeys()) ); int i = 0; diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollector.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollector.java index 179c2bc3aef..e2e2282a6bb 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollector.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DelegateOrMinKeyCollector.java @@ -128,7 +128,7 @@ public class DelegateOrMinKeyCollector } @Override - public double estimatedRetainedBytes() + public long estimatedRetainedBytes() { if (delegate != null) { return delegate.estimatedRetainedBytes(); diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DistinctKeyCollector.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DistinctKeyCollector.java index 6868597667a..9a1716a9fbb 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DistinctKeyCollector.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/DistinctKeyCollector.java @@ -172,7 +172,7 @@ public class DistinctKeyCollector implements KeyCollector } @Override - public double estimatedRetainedBytes() + public long estimatedRetainedBytes() { return retainedBytes; } diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/KeyCollector.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/KeyCollector.java index 48287e74be5..bf059f19962 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/KeyCollector.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/KeyCollector.java @@ -57,7 +57,7 @@ public interface KeyCollector> * Returns an estimate of the number of bytes currently retained by this collector. This may change over time as * more keys are added. */ - double estimatedRetainedBytes(); + long estimatedRetainedBytes(); /** * Downsample this collector, dropping about half of the keys that are currently retained. Returns true if diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollector.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollector.java index 950f9419af8..16203247354 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollector.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollector.java @@ -102,9 +102,9 @@ public class QuantilesSketchKeyCollector implements KeyCollector