From 2fb2221d487d925fd5d716173a80c798986aadf0 Mon Sep 17 00:00:00 2001 From: Phil Steitz Date: Mon, 15 Dec 2014 06:44:53 -0700 Subject: [PATCH] Fixed integer overflow in KolmogorovSmirnovTest causing 2-sample test to use exact method when the product of the sample sizes exceeds Integer.MAX_VALUE, resulting in effectively hung execution. JIRA: MATH-1181 Reported by Gilad --- src/changes/changes.xml | 5 +++++ .../stat/inference/KolmogorovSmirnovTest.java | 5 +++-- .../inference/KolmogorovSmirnovTestTest.java | 16 ++++++++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index e41d4ddbc..034e13c7e 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -79,6 +79,11 @@ Users are encouraged to upgrade to this version as this release not 2. A few methods in the FastMath class are in fact slower that their counterpart in either Math or StrictMath (cf. MATH-740 and MATH-901). "> + + Fixed integer overflow in KolmogorovSmirnovTest causing 2-sample test + to use exact method when the product of the sample sizes exceeds + Integer.MAX_VALUE, resulting in effectively hung execution. + Method to create a sequence of integers (in "o.a.c.m.util.MathArrays"). diff --git a/src/main/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.java b/src/main/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.java index 6154eeb7b..131d0c656 100644 --- a/src/main/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.java +++ b/src/main/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTest.java @@ -240,10 +240,11 @@ public class KolmogorovSmirnovTest { * @throws NullArgumentException if either {@code x} or {@code y} is null */ public double kolmogorovSmirnovTest(double[] x, double[] y, boolean strict) { - if (x.length * y.length < SMALL_SAMPLE_PRODUCT) { + final long lengthProduct = (long) x.length * y.length; + if (lengthProduct < SMALL_SAMPLE_PRODUCT) { return exactP(kolmogorovSmirnovStatistic(x, y), x.length, y.length, strict); } - if (x.length * y.length < LARGE_SAMPLE_PRODUCT) { + if (lengthProduct < LARGE_SAMPLE_PRODUCT) { return monteCarloP(kolmogorovSmirnovStatistic(x, y), x.length, y.length, strict, MONTE_CARLO_ITERATIONS); } return approximateP(kolmogorovSmirnovStatistic(x, y), x.length, y.length); diff --git a/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java b/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java index 98baf6d8f..9ac5c7cf5 100644 --- a/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java +++ b/src/test/java/org/apache/commons/math3/stat/inference/KolmogorovSmirnovTestTest.java @@ -254,6 +254,22 @@ public class KolmogorovSmirnovTestTest { Assert.assertEquals(0.0319983962391632, test.kolmogorovSmirnovTest(gaussian, gaussian2), TOLERANCE); Assert.assertEquals(0.202352941176471, test.kolmogorovSmirnovStatistic(gaussian, gaussian2), TOLERANCE); } + + /** + * MATH-1181 + * Verify that large sample method is selected for sample product > Integer.MAX_VALUE + * (integer overflow in sample product) + */ + @Test(timeout=5000) + public void testTwoSampleProductSizeOverflow() { + final int n = 50000; + Assert.assertTrue(n * n < 0); + double[] x = new double[n]; + double[] y = new double[n]; + final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(); + Assert.assertFalse(Double.isNaN(test.kolmogorovSmirnovTest(x, y))); + } + /** * Verifies that Monte Carlo simulation gives results close to exact p values. This test is a