diff --git a/extensions-core/histogram/src/main/java/io/druid/query/aggregation/histogram/ApproximateHistogram.java b/extensions-core/histogram/src/main/java/io/druid/query/aggregation/histogram/ApproximateHistogram.java index f1907fabeeb..e608b6fa4a7 100644 --- a/extensions-core/histogram/src/main/java/io/druid/query/aggregation/histogram/ApproximateHistogram.java +++ b/extensions-core/histogram/src/main/java/io/druid/query/aggregation/histogram/ApproximateHistogram.java @@ -1546,6 +1546,20 @@ public class ApproximateHistogram final long[] bins = this.bins(); for (int j = 0; j < probabilities.length; ++j) { + // Adapted from Ben-Haiv/Tom-Tov (Algorithm 4: Uniform Procedure) + // Our histogram has a set of bins {(p1, m1), (p2, m2), ... (pB, mB)} + // and properties of min and max saved during construction, such + // that min <= p1 and pB <= max. + // + // When min < p1 or pB < max, these are special cases of using the + // dummy bins (p0, 0) or (pB+1, 0) respectively, where p0 == min + // and pB+1 == max. + // + // This histogram gives an ordered set of numbers {pi, pi+1, ..., pn}, + // such that min <= pi < pn <= max, and n is the sum({m1, ..., mB}). + // We use s to determine which pairs of (pi, mi), (pi+1, mi+1) to + // calculate our quantile from by computing sum([pi,mi]) < s < sum + // ([pi+1, mi+1]) final double s = probabilities[j] * this.count(); int i = 0; @@ -1564,6 +1578,7 @@ public class ApproximateHistogram } if (i == 0) { + // At the first bin, there are no points to the left of p (min) quantiles[j] = this.min(); } else { final double d = s - sum; @@ -1577,7 +1592,11 @@ public class ApproximateHistogram z = (-b + Math.sqrt(b * b - 4 * a * c)) / (2 * a); } final double uj = this.positions[i - 1] + (this.positions[i] - this.positions[i - 1]) * z; - quantiles[j] = (float) uj; + // A given bin (p, m) has m/2 points to the left and right of p, and + // uj could be one of those points. However, uj is still subject to: + // [min] (p0, 0) < uj < (p, m) or + // (p, m) < uj < (pB+1, 0) [max] + quantiles[j] = ((float) uj < this.max()) ? (float) uj : this.max(); } } diff --git a/extensions-core/histogram/src/test/java/io/druid/query/aggregation/histogram/ApproximateHistogramTest.java b/extensions-core/histogram/src/test/java/io/druid/query/aggregation/histogram/ApproximateHistogramTest.java index f75485a9a95..cdecfcf782a 100644 --- a/extensions-core/histogram/src/test/java/io/druid/query/aggregation/histogram/ApproximateHistogramTest.java +++ b/extensions-core/histogram/src/test/java/io/druid/query/aggregation/histogram/ApproximateHistogramTest.java @@ -48,6 +48,12 @@ public class ApproximateHistogramTest static final float[] VALUES5 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; static final float[] VALUES6 = {1f, 1.5f, 2f, 2.5f, 3f, 3.5f, 4f, 4.5f, 5f, 5.5f, 6f, 6.5f, 7f, 7.5f, 8f, 8.5f, 9f, 9.5f, 10f}; + // Based on the example from https://metamarkets.com/2013/histograms/ + // This dataset can make getQuantiles() return values exceeding max + // for example: q=0.95 returns 25.16 when max=25 + static final float[] VALUES7 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 12, 12, + 15, 20, 25, 25, 25}; + protected ApproximateHistogram buildHistogram(int size, float[] values) { ApproximateHistogram h = new ApproximateHistogram(size); @@ -393,6 +399,35 @@ public class ApproximateHistogramTest ); } + @Test + public void testQuantileBetweenMinMax() + { + ApproximateHistogram h = buildHistogram(20, VALUES7); + + Assert.assertTrue( + "min value incorrect", + VALUES7[0] == h.min() + ); + Assert.assertTrue( + "max value incorrect", + VALUES7[VALUES7.length - 1] == h.max() + ); + + Assert.assertArrayEquals( + "expected quantiles match actual quantiles", + new float[]{1.8f, 3.6f, 5.4f, 7.2f, 9f, 11.05f, 12.37f, 17f, 23.5f}, + h.getQuantiles(new float[]{.1f, .2f, .3f, .4f, .5f, .6f, .7f, .8f, + .9f}), 0.1f + ); + + // Test for outliers (0.05f and 0.95f, which should be min <= value <= max) + Assert.assertArrayEquals( + "expected quantiles match actual quantiles", + new float[]{h.min(), h.max()}, + h.getQuantiles(new float[]{.05f, .95f}), 0.1f + ); + } + @Test public void testQuantileBigger() {