mirror of
https://github.com/apache/commons-math.git
synced 2025-02-14 22:16:48 +00:00
Fixed error in computing discrete distribution of D statistics for small-sample
2-sample Kolmogorov-Smirnov tests. Error was causing incorrect p-values returned by exactP and monteCarloP methods (used by default for small, mid-size samples). JIRA: MATH-1245
This commit is contained in:
parent
759fed8a76
commit
7a6aa92c8a
@ -51,6 +51,11 @@ If the output is not quite correct, check for invisible trailing spaces!
|
||||
</properties>
|
||||
<body>
|
||||
<release version="3.6" date="XXXX-XX-XX" description="">
|
||||
<action dev="psteitz" type="fix" issue="MATH-1245">
|
||||
Fixed error in computing discrete distribution of D statistics for small-sample
|
||||
2-sample Kolmogorov-Smirnov tests. Error was causing incorrect p-values returned
|
||||
by exactP and monteCarloP methods (used by default for small, mid-size samples).
|
||||
</action>
|
||||
<action dev="tn" type="fix" issue="MATH-1240">
|
||||
"KolmogorovSmirnovTest#ksSum(...)" returned wrong result in case the provided
|
||||
t-parameters was zero. This affected the calculation of "approximateP(...)" for
|
||||
|
@ -21,6 +21,7 @@ import java.math.BigDecimal;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.commons.math3.util.Precision;
|
||||
import org.apache.commons.math3.distribution.RealDistribution;
|
||||
import org.apache.commons.math3.exception.InsufficientDataException;
|
||||
import org.apache.commons.math3.exception.MathArithmeticException;
|
||||
@ -885,6 +886,7 @@ public class KolmogorovSmirnovTest {
|
||||
long tail = 0;
|
||||
final double[] nSet = new double[n];
|
||||
final double[] mSet = new double[m];
|
||||
final double tol = 1e-12; // d-values within tol of one another are considered equal
|
||||
while (combinationsIterator.hasNext()) {
|
||||
// Generate an n-set
|
||||
final int[] nSetI = combinationsIterator.next();
|
||||
@ -899,9 +901,8 @@ public class KolmogorovSmirnovTest {
|
||||
}
|
||||
}
|
||||
final double curD = kolmogorovSmirnovStatistic(nSet, mSet);
|
||||
if (curD > d) {
|
||||
tail++;
|
||||
} else if (curD == d && !strict) {
|
||||
final int order = Precision.compareTo(curD, d, tol);
|
||||
if (order > 0 || (order == 0 && !strict)) {
|
||||
tail++;
|
||||
}
|
||||
}
|
||||
@ -957,6 +958,7 @@ public class KolmogorovSmirnovTest {
|
||||
final int nn = FastMath.max(n, m);
|
||||
final int mm = FastMath.min(n, m);
|
||||
final int sum = nn + mm;
|
||||
final double tol = 1e-12; // d-values within tol of one another are considered equal
|
||||
|
||||
int tail = 0;
|
||||
final boolean b[] = new boolean[sum];
|
||||
@ -978,7 +980,8 @@ public class KolmogorovSmirnovTest {
|
||||
final double cdf_n = rankN / (double) nn;
|
||||
final double cdf_m = rankM / (double) mm;
|
||||
final double curD = FastMath.abs(cdf_n - cdf_m);
|
||||
if (curD > d || (curD == d && !strict)) {
|
||||
final int order = Precision.compareTo(curD, d, tol);
|
||||
if (order > 0 || (order == 0 && !strict)) {
|
||||
tail++;
|
||||
break;
|
||||
}
|
||||
|
@ -323,7 +323,7 @@ public class KolmogorovSmirnovTestTest {
|
||||
*/
|
||||
// @Test
|
||||
public void testTwoSampleMonteCarloPerformance() {
|
||||
int numIterations = 100_000;
|
||||
int numIterations = 100000;
|
||||
int N = (int)Math.sqrt(KolmogorovSmirnovTest.LARGE_SAMPLE_PRODUCT);
|
||||
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(new Well19937c(1000));
|
||||
for (int n = 2; n <= N; ++n) {
|
||||
@ -400,7 +400,7 @@ public class KolmogorovSmirnovTestTest {
|
||||
|
||||
@Test
|
||||
public void testTwoSamplesAllEqual() {
|
||||
int iterations = 10_000;
|
||||
int iterations = 10000;
|
||||
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
|
||||
for (int i = 2; i < 30; ++i) {
|
||||
// testing values with ties
|
||||
@ -427,6 +427,57 @@ public class KolmogorovSmirnovTestTest {
|
||||
Assert.assertEquals(1.0, test.approximateP(0, values.length, values.length), 0.);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* JIRA: MATH-1245
|
||||
*
|
||||
* Verify that D-values are not viewed as distinct when they are mathematically equal
|
||||
* when computing p-statistics for small sample tests. Reference values are from R 3.2.0.
|
||||
*/
|
||||
@Test
|
||||
public void testDRounding() {
|
||||
final double tol = 1e-12;
|
||||
final double[] x = {0, 2, 3, 4, 5, 6, 7, 8, 9, 12};
|
||||
final double[] y = {1, 10, 11, 13, 14, 15, 16, 17, 18};
|
||||
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
|
||||
Assert.assertEquals(0.0027495724090154106, test.kolmogorovSmirnovTest(x, y,false), tol);
|
||||
|
||||
final double[] x1 = {2, 4, 6, 8, 9, 10, 11, 12, 13};
|
||||
final double[] y1 = {0, 1, 3, 5, 7};
|
||||
Assert.assertEquals(0.085914085914085896, test.kolmogorovSmirnovTest(x1, y1, false), tol);
|
||||
|
||||
final double[] x2 = {4, 6, 7, 8, 9, 10, 11};
|
||||
final double[] y2 = {0, 1, 2, 3, 5};
|
||||
Assert.assertEquals(0.015151515151515027, test.kolmogorovSmirnovTest(x2, y2, false), tol);
|
||||
}
|
||||
|
||||
/**
|
||||
* JIRA: MATH-1245
|
||||
*
|
||||
* Verify that D-values are not viewed as distinct when they are mathematically equal
|
||||
* when computing p-statistics for small sample tests. Reference values are from R 3.2.0.
|
||||
*/
|
||||
@Test
|
||||
public void testDRoundingMonteCarlo() {
|
||||
final double tol = 1e-2;
|
||||
final int iterations = 1000000;
|
||||
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(new Well19937c(1000));
|
||||
|
||||
final double[] x = {0, 2, 3, 4, 5, 6, 7, 8, 9, 12};
|
||||
final double[] y = {1, 10, 11, 13, 14, 15, 16, 17, 18};
|
||||
double d = test.kolmogorovSmirnovStatistic(x, y);
|
||||
Assert.assertEquals(0.0027495724090154106, test.monteCarloP(d, x.length, y.length, false, iterations), tol);
|
||||
|
||||
final double[] x1 = {2, 4, 6, 8, 9, 10, 11, 12, 13};
|
||||
final double[] y1 = {0, 1, 3, 5, 7};
|
||||
d = test.kolmogorovSmirnovStatistic(x1, y1);
|
||||
Assert.assertEquals(0.085914085914085896, test.monteCarloP(d, x1.length, y1.length, false, iterations), tol);
|
||||
|
||||
final double[] x2 = {4, 6, 7, 8, 9, 10, 11};
|
||||
final double[] y2 = {0, 1, 2, 3, 5};
|
||||
d = test.kolmogorovSmirnovStatistic(x2, y2);
|
||||
Assert.assertEquals(0.015151515151515027, test.monteCarloP(d, x2.length, y2.length, false, iterations), tol);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies the inequality exactP(criticalValue, n, m, true) < alpha < exactP(criticalValue, n,
|
||||
|
Loading…
x
Reference in New Issue
Block a user