Removed deprecated instance field and associated contructors.

The RNG instance is passed as argument to the methods that require it.
This commit is contained in:
Gilles 2017-05-10 14:41:17 +02:00
parent 5d87a88952
commit 10e3811403
4 changed files with 89 additions and 101 deletions

View File

@ -18,6 +18,7 @@ package org.apache.commons.math4.stat.inference;
import java.util.Collection;
import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.math4.distribution.RealDistribution;
import org.apache.commons.math4.exception.ConvergenceException;
import org.apache.commons.math4.exception.DimensionMismatchException;
@ -728,13 +729,14 @@ public class InferenceTestUtils {
* @param m second sample size
* @param iterations number of random partitions to generate
* @param strict whether or not the probability to compute is expressed as a strict inequality
* @param rng RNG used for generating the partitions.
* @return proportion of randomly generated m-n partitions of m + n that result in \(D_{n,m}\)
* greater than (resp. greater than or equal to) {@code d}
* @see org.apache.commons.math4.stat.inference.KolmogorovSmirnovTest#monteCarloP(double, int, int, boolean, int)
* greater than (resp. greater than or equal to) {@code d}
* @see org.apache.commons.math4.stat.inference.KolmogorovSmirnovTest#monteCarloP(double,int,int,boolean,int,UniformRandomProvider)
* @since 3.3
*/
public static double monteCarloP(double d, int n, int m, boolean strict, int iterations) {
return KS_TEST.monteCarloP(d, n, m, strict, iterations);
public static double monteCarloP(double d, int n, int m, boolean strict, int iterations, UniformRandomProvider rng) {
return KS_TEST.monteCarloP(d, n, m, strict, iterations, rng);
}

View File

@ -20,6 +20,8 @@ package org.apache.commons.math4.stat.inference;
import java.math.BigDecimal;
import java.util.Arrays;
import org.apache.commons.rng.simple.RandomSource;
import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.math4.distribution.EnumeratedRealDistribution;
import org.apache.commons.math4.distribution.RealDistribution;
import org.apache.commons.math4.distribution.AbstractRealDistribution;
@ -39,8 +41,6 @@ import org.apache.commons.math4.linear.Array2DRowFieldMatrix;
import org.apache.commons.math4.linear.FieldMatrix;
import org.apache.commons.math4.linear.MatrixUtils;
import org.apache.commons.math4.linear.RealMatrix;
import org.apache.commons.rng.simple.RandomSource;
import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.math4.util.CombinatoricsUtils;
import org.apache.commons.math4.util.FastMath;
import org.apache.commons.math4.util.MathArrays;
@ -76,7 +76,7 @@ import org.apache.commons.math4.util.MathUtils;
* </ul><p>
* If the product of the sample sizes is less than {@value #LARGE_SAMPLE_PRODUCT} and the sample
* data contains ties, random jitter is added to the sample data to break ties before applying
* the algorithm above. Alternatively, the {@link #bootstrap(double[], double[], int, boolean)}
* the algorithm above. Alternatively, the {@link #bootstrap(double[],double[],int,boolean,UniformRandomProvider)}
* method, modeled after <a href="http://sekhon.berkeley.edu/matching/ks.boot.html">ks.boot</a>
* in the R Matching package [3], can be used if ties are known to be present in the data.
* </p>
@ -137,36 +137,11 @@ public class KolmogorovSmirnovTest {
*/
protected static final int LARGE_SAMPLE_PRODUCT = 10000;
/** Default number of iterations used by {@link #monteCarloP(double, int, int, boolean, int)}.
/** Default number of iterations used by {@link #monteCarloP(double,int,int,boolean,int,UniformRandomProvider)}.
* Deprecated as of version 3.6, as this method is no longer needed. */
@Deprecated
protected static final int MONTE_CARLO_ITERATIONS = 1000000;
/** No longer used. */
@Deprecated
private final UniformRandomProvider rng;
/**
* Construct a KolmogorovSmirnovTest instance with a default random data generator.
*/
public KolmogorovSmirnovTest() {
rng = RandomSource.create(RandomSource.WELL_19937_C);
}
/**
* Construct a KolmogorovSmirnovTest with the provided random data generator.
* The #monteCarloP(double, int, int, boolean, int) that uses the generator supplied to this
* constructor is deprecated as of version 3.6.
*
* @param source random data generator used by {@link #monteCarloP(double, int, int, boolean, int)}
* @param seed Seed.
*/
@Deprecated
public KolmogorovSmirnovTest(RandomSource source,
long seed) {
rng = RandomSource.create(source, seed);
}
/**
* Computes the <i>p-value</i>, or <i>observed significance level</i>, of a one-sample <a
* href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a>
@ -239,7 +214,7 @@ public class KolmogorovSmirnovTest {
* on (-minDelta / 2, minDelta / 2) where minDelta is the smallest pairwise difference between
* values in the combined sample.</p>
* <p>
* If ties are known to be present in the data, {@link #bootstrap(double[], double[], int, boolean)}
* If ties are known to be present in the data, {@link #bootstrap(double[],double[],int,boolean,UniformRandomProvider)}
* may be used as an alternative method for estimating the p-value.</p>
*
* @param x first sample dataset.
@ -252,7 +227,7 @@ public class KolmogorovSmirnovTest {
* not have length at least 2.
* @throws NullArgumentException if either {@code x} or {@code y} is null.
* @throws NotANumberException if the input arrays contain NaN values.
* @see #bootstrap(double[], double[], int, boolean)
* @see #bootstrap(double[],double[],int,boolean,UniformRandomProvider)
*/
public double kolmogorovSmirnovTest(double[] x, double[] y, boolean strict) {
final long lengthProduct = (long) x.length * y.length;
@ -398,23 +373,31 @@ public class KolmogorovSmirnovTest {
/**
* Estimates the <i>p-value</i> of a two-sample
* <a href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a>
* evaluating the null hypothesis that {@code x} and {@code y} are samples drawn from the same
* probability distribution. This method estimates the p-value by repeatedly sampling sets of size
* {@code x.length} and {@code y.length} from the empirical distribution of the combined sample.
* When {@code strict} is true, this is equivalent to the algorithm implemented in the R function
* {@code ks.boot}, described in <pre>
* <a href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test">Kolmogorov-Smirnov test</a>
* evaluating the null hypothesis that {@code x} and {@code y} are samples
* drawn from the same probability distribution.
* This method estimates the p-value by repeatedly sampling sets of size
* {@code x.length} and {@code y.length} from the empirical distribution
* of the combined sample.
* When {@code strict} is true, this is equivalent to the algorithm implemented
* in the R function {@code ks.boot}, described in <pre>
* Jasjeet S. Sekhon. 2011. 'Multivariate and Propensity Score Matching
* Software with Automated Balance Optimization: The Matching package for R.'
* Journal of Statistical Software, 42(7): 1-52.
* </pre>
* @param x first sample
* @param y second sample
* @param iterations number of bootstrap resampling iterations
* @param strict whether or not the null hypothesis is expressed as a strict inequality
* @return estimated p-value
*
* @param x First sample.
* @param y Second sample.
* @param iterations Number of bootstrap resampling iterations.
* @param strict Whether or not the null hypothesis is expressed as a strict inequality.
* @param rng RNG for creating the sampling sets.
* @return the estimated p-value.
*/
public double bootstrap(double[] x, double[] y, int iterations, boolean strict) {
public double bootstrap(double[] x,
double[] y,
int iterations,
boolean strict,
UniformRandomProvider rng) {
final int xLength = x.length;
final int yLength = y.length;
final double[] combined = new double[xLength + yLength];
@ -441,20 +424,6 @@ public class KolmogorovSmirnovTest {
(greaterCount + equalCount) / (double) iterations;
}
/**
* Computes {@code bootstrap(x, y, iterations, true)}.
* This is equivalent to ks.boot(x,y, nboots=iterations) using the R Matching
* package function. See #bootstrap(double[], double[], int, boolean).
*
* @param x first sample
* @param y second sample
* @param iterations number of bootstrap resampling iterations
* @return estimated p-value
*/
public double bootstrap(double[] x, double[] y, int iterations) {
return bootstrap(x, y, iterations, true);
}
/**
* Calculates \(P(D_n &lt; d)\) using the method described in [1] with quick decisions for extreme
* values given in [2] (see above). The result is not exact as with
@ -1061,36 +1030,45 @@ public class KolmogorovSmirnovTest {
* {@code d} if {@code strict} is {@code false}.
* </p>
*
* @param d D-statistic value
* @param n first sample size
* @param m second sample size
* @param iterations number of random partitions to generate
* @param d D-statistic value.
* @param n First sample size.
* @param m Second sample size.
* @param iterations Number of random partitions to generate.
* @param strict whether or not the probability to compute is expressed as a strict inequality
* @param rng RNG used for generating the partitions.
* @return proportion of randomly generated m-n partitions of m + n that result in \(D_{n,m}\)
* greater than (resp. greater than or equal to) {@code d}
* greater than (resp. greater than or equal to) {@code d}.
*/
public double monteCarloP(final double d, final int n, final int m, final boolean strict,
final int iterations) {
return integralMonteCarloP(calculateIntegralD(d, n, m, strict), n, m, iterations);
public double monteCarloP(final double d,
final int n,
final int m,
final boolean strict,
final int iterations,
UniformRandomProvider rng) {
return integralMonteCarloP(calculateIntegralD(d, n, m, strict), n, m, iterations, rng);
}
/**
* Uses Monte Carlo simulation to approximate \(P(D_{n,m} >= d/(n*m))\) where \(D_{n,m}\) is the
* 2-sample Kolmogorov-Smirnov statistic.
* Uses Monte Carlo simulation to approximate \(P(D_{n,m} >= d / (n * m))\)
* where \(D_{n,m}\) is the 2-sample Kolmogorov-Smirnov statistic.
* <p>
* Here d is the D-statistic represented as long value.
* The real D-statistic is obtained by dividing d by n*m.
* See also {@link #monteCarloP(double, int, int, boolean, int)}.
* Here {@code d} is the D-statistic represented as long value.
* The real D-statistic is obtained by dividing {@code d} by {@code n * m}.
* See also {@link #monteCarloP(double,int,int,boolean,int,UniformRandomProvider)}.
*
* @param d integral D-statistic
* @param n first sample size
* @param m second sample size
* @param iterations number of random partitions to generate
* @param d Integral D-statistic.
* @param n First sample size.
* @param m Second sample size.
* @param iterations Number of random partitions to generate.
* @param rng RNG used for generating the partitions.
* @return proportion of randomly generated m-n partitions of m + n that result in \(D_{n,m}\)
* greater than or equal to {@code d/(n*m))}
* greater than or equal to {@code d / (n * m))}.
*/
private double integralMonteCarloP(final long d, final int n, final int m, final int iterations) {
private double integralMonteCarloP(final long d,
final int n,
final int m,
final int iterations,
UniformRandomProvider rng) {
// ensure that nn is always the max of (n, m) to require fewer random numbers
final int nn = FastMath.max(n, m);
final int mm = FastMath.min(n, m);

View File

@ -354,7 +354,7 @@ public class BetaDistributionTest {
Assert.assertFalse("G goodness-of-fit test rejected null at alpha = " + level,
gTest(betaDistribution, observed) < level);
Assert.assertFalse("KS goodness-of-fit test rejected null at alpha = " + level,
new KolmogorovSmirnovTest(RandomSource.JDK, 3448845623L).kolmogorovSmirnovTest(betaDistribution, observed) < level);
new KolmogorovSmirnovTest().kolmogorovSmirnovTest(betaDistribution, observed) < level);
}
}
}

View File

@ -319,7 +319,8 @@ public class KolmogorovSmirnovTestTest {
*/
@Test
public void testTwoSampleMonteCarlo() {
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 1000);
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000);
final int sampleSize = 14;
final double tol = .001;
final double[] shortUniform = new double[sampleSize];
@ -336,9 +337,9 @@ public class KolmogorovSmirnovTestTest {
double exactPStrict = test.exactP(dv, sampleSize, sampleSize, true);
double exactPNonStrict = test.exactP(dv, sampleSize, sampleSize, false);
double montePStrict = test.monteCarloP(dv, sampleSize, sampleSize, true,
KolmogorovSmirnovTest.MONTE_CARLO_ITERATIONS);
KolmogorovSmirnovTest.MONTE_CARLO_ITERATIONS, rng);
double montePNonStrict = test.monteCarloP(dv, sampleSize, sampleSize, false,
KolmogorovSmirnovTest.MONTE_CARLO_ITERATIONS);
KolmogorovSmirnovTest.MONTE_CARLO_ITERATIONS, rng);
Assert.assertEquals(exactPStrict, montePStrict, tol);
Assert.assertEquals(exactPNonStrict, montePNonStrict, tol);
}
@ -346,7 +347,8 @@ public class KolmogorovSmirnovTestTest {
@Test
public void testTwoSampleMonteCarloDifferentSampleSizes() {
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 1000);
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000);
final int sampleSize1 = 14;
final int sampleSize2 = 7;
final double d = 0.3;
@ -354,7 +356,7 @@ public class KolmogorovSmirnovTestTest {
final double tol = 1e-2;
Assert.assertEquals(test.exactP(d, sampleSize1, sampleSize2, strict),
test.monteCarloP(d, sampleSize1, sampleSize2, strict,
KolmogorovSmirnovTest.MONTE_CARLO_ITERATIONS),
KolmogorovSmirnovTest.MONTE_CARLO_ITERATIONS, rng),
tol);
}
@ -365,11 +367,12 @@ public class KolmogorovSmirnovTestTest {
public void testTwoSampleMonteCarloPerformance() {
int numIterations = 100_000;
int N = (int)Math.sqrt(KolmogorovSmirnovTest.LARGE_SAMPLE_PRODUCT);
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 1000);
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000);
for (int n = 2; n <= N; ++n) {
long startMillis = System.currentTimeMillis();
int m = KolmogorovSmirnovTest.LARGE_SAMPLE_PRODUCT/n;
Assert.assertEquals(0d, test.monteCarloP(Double.POSITIVE_INFINITY, n, m, true, numIterations), 0d);
Assert.assertEquals(0d, test.monteCarloP(Double.POSITIVE_INFINITY, n, m, true, numIterations, rng), 0d);
long endMillis = System.currentTimeMillis();
System.out.println("n=" + n + ", m=" + m + ", time=" + (endMillis-startMillis)/1000d + "s");
}
@ -531,6 +534,7 @@ public class KolmogorovSmirnovTestTest {
public void testTwoSamplesAllEqual() {
int iterations = 10_000;
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000);
for (int i = 2; i < 30; ++i) {
// testing values with ties
double[] values = new double[i];
@ -549,8 +553,8 @@ public class KolmogorovSmirnovTestTest {
Assert.assertEquals(1.0, test.exactP(0, values.length, values.length, false), 0.);
}
Assert.assertEquals(1.0, test.monteCarloP(0, values.length, values.length, true, iterations), 0.);
Assert.assertEquals(1.0, test.monteCarloP(0, values.length, values.length, false, iterations), 0.);
Assert.assertEquals(1.0, test.monteCarloP(0, values.length, values.length, true, iterations, rng), 0.);
Assert.assertEquals(1.0, test.monteCarloP(0, values.length, values.length, false, iterations, rng), 0.);
Assert.assertEquals(1.0, test.approximateP(0, values.length, values.length), 0.);
Assert.assertEquals(1.0, test.approximateP(0, values.length, values.length), 0.);
@ -590,22 +594,23 @@ public class KolmogorovSmirnovTestTest {
public void testDRoundingMonteCarlo() {
final double tol = 1e-2;
final int iterations = 1000000;
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 1000);
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000);
final double[] x = {0, 2, 3, 4, 5, 6, 7, 8, 9, 12};
final double[] y = {1, 10, 11, 13, 14, 15, 16, 17, 18};
double d = test.kolmogorovSmirnovStatistic(x, y);
Assert.assertEquals(0.0027495724090154106, test.monteCarloP(d, x.length, y.length, false, iterations), tol);
Assert.assertEquals(0.0027495724090154106, test.monteCarloP(d, x.length, y.length, false, iterations, rng), tol);
final double[] x1 = {2, 4, 6, 8, 9, 10, 11, 12, 13};
final double[] y1 = {0, 1, 3, 5, 7};
d = test.kolmogorovSmirnovStatistic(x1, y1);
Assert.assertEquals(0.085914085914085896, test.monteCarloP(d, x1.length, y1.length, false, iterations), tol);
Assert.assertEquals(0.085914085914085896, test.monteCarloP(d, x1.length, y1.length, false, iterations, rng), tol);
final double[] x2 = {4, 6, 7, 8, 9, 10, 11};
final double[] y2 = {0, 1, 2, 3, 5};
d = test.kolmogorovSmirnovStatistic(x2, y2);
Assert.assertEquals(0.015151515151515027, test.monteCarloP(d, x2.length, y2.length, false, iterations), tol);
Assert.assertEquals(0.015151515151515027, test.monteCarloP(d, x2.length, y2.length, false, iterations, rng), tol);
}
@Test
@ -669,8 +674,9 @@ public class KolmogorovSmirnovTestTest {
public void testBootstrapSmallSamplesWithTies() {
final double[] x = {0, 2, 4, 6, 8, 8, 10, 15, 22, 30, 33, 36, 38};
final double[] y = {9, 17, 20, 33, 40, 51, 60, 60, 72, 90, 101};
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 2000);
Assert.assertEquals(0.0059, test.bootstrap(x, y, 10000, false), 1E-3);
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 2000);
Assert.assertEquals(0.0059, test.bootstrap(x, y, 10000, false, rng), 1E-3);
}
/**
@ -679,8 +685,9 @@ public class KolmogorovSmirnovTestTest {
*/
@Test
public void testBootstrapLargeSamples() {
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 1000);
Assert.assertEquals(0.0237, test.bootstrap(gaussian, gaussian2, 10000), 1E-2);
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000);
Assert.assertEquals(0.0237, test.bootstrap(gaussian, gaussian2, 10000, true, rng), 1E-2);
}
/**
@ -692,8 +699,9 @@ public class KolmogorovSmirnovTestTest {
public void testBootstrapRounding() {
final double[] x = {2,4,6,8,9,10,11,12,13};
final double[] y = {0,1,3,5,7};
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 1000);
Assert.assertEquals(0.06303, test.bootstrap(x, y, 10000, false), 1E-2);
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000);
Assert.assertEquals(0.06303, test.bootstrap(x, y, 10000, false, rng), 1E-2);
}
@Test