Fixed integer overflow in KolmogorovSmirnovTest causing 2-sample test

to use exact method when the product of the sample sizes exceeds
Integer.MAX_VALUE, resulting in effectively hung execution.

JIRA: MATH-1181
Reported by Gilad
This commit is contained in:
Phil Steitz 2014-12-15 06:44:53 -07:00
parent 7fb571b16c
commit 2fb2221d48
3 changed files with 24 additions and 2 deletions

View File

@ -79,6 +79,11 @@ Users are encouraged to upgrade to this version as this release not
2. A few methods in the FastMath class are in fact slower that their
counterpart in either Math or StrictMath (cf. MATH-740 and MATH-901).
">
<action dev="psteitz" type="fix" issue="MATH-1181">
Fixed integer overflow in KolmogorovSmirnovTest causing 2-sample test
to use exact method when the product of the sample sizes exceeds
Integer.MAX_VALUE, resulting in effectively hung execution.
</action>
<action dev="erans" type="add" issue="MATH-1180">
Method to create a sequence of integers (in "o.a.c.m.util.MathArrays").
</action>

View File

@ -240,10 +240,11 @@ public class KolmogorovSmirnovTest {
* @throws NullArgumentException if either {@code x} or {@code y} is null
*/
public double kolmogorovSmirnovTest(double[] x, double[] y, boolean strict) {
if (x.length * y.length < SMALL_SAMPLE_PRODUCT) {
final long lengthProduct = (long) x.length * y.length;
if (lengthProduct < SMALL_SAMPLE_PRODUCT) {
return exactP(kolmogorovSmirnovStatistic(x, y), x.length, y.length, strict);
}
if (x.length * y.length < LARGE_SAMPLE_PRODUCT) {
if (lengthProduct < LARGE_SAMPLE_PRODUCT) {
return monteCarloP(kolmogorovSmirnovStatistic(x, y), x.length, y.length, strict, MONTE_CARLO_ITERATIONS);
}
return approximateP(kolmogorovSmirnovStatistic(x, y), x.length, y.length);

View File

@ -254,6 +254,22 @@ public class KolmogorovSmirnovTestTest {
Assert.assertEquals(0.0319983962391632, test.kolmogorovSmirnovTest(gaussian, gaussian2), TOLERANCE);
Assert.assertEquals(0.202352941176471, test.kolmogorovSmirnovStatistic(gaussian, gaussian2), TOLERANCE);
}
/**
* MATH-1181
* Verify that large sample method is selected for sample product > Integer.MAX_VALUE
* (integer overflow in sample product)
*/
@Test(timeout=5000)
public void testTwoSampleProductSizeOverflow() {
final int n = 50000;
Assert.assertTrue(n * n < 0);
double[] x = new double[n];
double[] y = new double[n];
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
Assert.assertFalse(Double.isNaN(test.kolmogorovSmirnovTest(x, y)));
}
/**
* Verifies that Monte Carlo simulation gives results close to exact p values. This test is a