From 10e38114032cb4c585b29e946e1d70e05d52a878 Mon Sep 17 00:00:00 2001 From: Gilles Date: Wed, 10 May 2017 14:41:17 +0200 Subject: [PATCH] Removed deprecated instance field and associated contructors. The RNG instance is passed as argument to the methods that require it. --- .../stat/inference/InferenceTestUtils.java | 10 +- .../stat/inference/KolmogorovSmirnovTest.java | 132 ++++++++---------- .../distribution/BetaDistributionTest.java | 2 +- .../inference/KolmogorovSmirnovTestTest.java | 46 +++--- 4 files changed, 89 insertions(+), 101 deletions(-) diff --git a/src/main/java/org/apache/commons/math4/stat/inference/InferenceTestUtils.java b/src/main/java/org/apache/commons/math4/stat/inference/InferenceTestUtils.java index a06e3d165..c9e70d415 100644 --- a/src/main/java/org/apache/commons/math4/stat/inference/InferenceTestUtils.java +++ b/src/main/java/org/apache/commons/math4/stat/inference/InferenceTestUtils.java @@ -18,6 +18,7 @@ package org.apache.commons.math4.stat.inference; import java.util.Collection; +import org.apache.commons.rng.UniformRandomProvider; import org.apache.commons.math4.distribution.RealDistribution; import org.apache.commons.math4.exception.ConvergenceException; import org.apache.commons.math4.exception.DimensionMismatchException; @@ -728,13 +729,14 @@ public class InferenceTestUtils { * @param m second sample size * @param iterations number of random partitions to generate * @param strict whether or not the probability to compute is expressed as a strict inequality + * @param rng RNG used for generating the partitions. * @return proportion of randomly generated m-n partitions of m + n that result in \(D_{n,m}\) - * greater than (resp. greater than or equal to) {@code d} - * @see org.apache.commons.math4.stat.inference.KolmogorovSmirnovTest#monteCarloP(double, int, int, boolean, int) + * greater than (resp. greater than or equal to) {@code d} + * @see org.apache.commons.math4.stat.inference.KolmogorovSmirnovTest#monteCarloP(double,int,int,boolean,int,UniformRandomProvider) * @since 3.3 */ - public static double monteCarloP(double d, int n, int m, boolean strict, int iterations) { - return KS_TEST.monteCarloP(d, n, m, strict, iterations); + public static double monteCarloP(double d, int n, int m, boolean strict, int iterations, UniformRandomProvider rng) { + return KS_TEST.monteCarloP(d, n, m, strict, iterations, rng); } diff --git a/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java b/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java index 4950420e3..71bd73b2e 100644 --- a/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java +++ b/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java @@ -20,6 +20,8 @@ package org.apache.commons.math4.stat.inference; import java.math.BigDecimal; import java.util.Arrays; +import org.apache.commons.rng.simple.RandomSource; +import org.apache.commons.rng.UniformRandomProvider; import org.apache.commons.math4.distribution.EnumeratedRealDistribution; import org.apache.commons.math4.distribution.RealDistribution; import org.apache.commons.math4.distribution.AbstractRealDistribution; @@ -39,8 +41,6 @@ import org.apache.commons.math4.linear.Array2DRowFieldMatrix; import org.apache.commons.math4.linear.FieldMatrix; import org.apache.commons.math4.linear.MatrixUtils; import org.apache.commons.math4.linear.RealMatrix; -import org.apache.commons.rng.simple.RandomSource; -import org.apache.commons.rng.UniformRandomProvider; import org.apache.commons.math4.util.CombinatoricsUtils; import org.apache.commons.math4.util.FastMath; import org.apache.commons.math4.util.MathArrays; @@ -76,7 +76,7 @@ import org.apache.commons.math4.util.MathUtils; *

* If the product of the sample sizes is less than {@value #LARGE_SAMPLE_PRODUCT} and the sample * data contains ties, random jitter is added to the sample data to break ties before applying - * the algorithm above. Alternatively, the {@link #bootstrap(double[], double[], int, boolean)} + * the algorithm above. Alternatively, the {@link #bootstrap(double[],double[],int,boolean,UniformRandomProvider)} * method, modeled after ks.boot * in the R Matching package [3], can be used if ties are known to be present in the data. *

@@ -137,36 +137,11 @@ public class KolmogorovSmirnovTest { */ protected static final int LARGE_SAMPLE_PRODUCT = 10000; - /** Default number of iterations used by {@link #monteCarloP(double, int, int, boolean, int)}. + /** Default number of iterations used by {@link #monteCarloP(double,int,int,boolean,int,UniformRandomProvider)}. * Deprecated as of version 3.6, as this method is no longer needed. */ @Deprecated protected static final int MONTE_CARLO_ITERATIONS = 1000000; - /** No longer used. */ - @Deprecated - private final UniformRandomProvider rng; - - /** - * Construct a KolmogorovSmirnovTest instance with a default random data generator. - */ - public KolmogorovSmirnovTest() { - rng = RandomSource.create(RandomSource.WELL_19937_C); - } - - /** - * Construct a KolmogorovSmirnovTest with the provided random data generator. - * The #monteCarloP(double, int, int, boolean, int) that uses the generator supplied to this - * constructor is deprecated as of version 3.6. - * - * @param source random data generator used by {@link #monteCarloP(double, int, int, boolean, int)} - * @param seed Seed. - */ - @Deprecated - public KolmogorovSmirnovTest(RandomSource source, - long seed) { - rng = RandomSource.create(source, seed); - } - /** * Computes the p-value, or observed significance level, of a one-sample Kolmogorov-Smirnov test @@ -239,7 +214,7 @@ public class KolmogorovSmirnovTest { * on (-minDelta / 2, minDelta / 2) where minDelta is the smallest pairwise difference between * values in the combined sample.

*

- * If ties are known to be present in the data, {@link #bootstrap(double[], double[], int, boolean)} + * If ties are known to be present in the data, {@link #bootstrap(double[],double[],int,boolean,UniformRandomProvider)} * may be used as an alternative method for estimating the p-value.

* * @param x first sample dataset. @@ -252,7 +227,7 @@ public class KolmogorovSmirnovTest { * not have length at least 2. * @throws NullArgumentException if either {@code x} or {@code y} is null. * @throws NotANumberException if the input arrays contain NaN values. - * @see #bootstrap(double[], double[], int, boolean) + * @see #bootstrap(double[],double[],int,boolean,UniformRandomProvider) */ public double kolmogorovSmirnovTest(double[] x, double[] y, boolean strict) { final long lengthProduct = (long) x.length * y.length; @@ -398,23 +373,31 @@ public class KolmogorovSmirnovTest { /** * Estimates the p-value of a two-sample - * Kolmogorov-Smirnov test - * evaluating the null hypothesis that {@code x} and {@code y} are samples drawn from the same - * probability distribution. This method estimates the p-value by repeatedly sampling sets of size - * {@code x.length} and {@code y.length} from the empirical distribution of the combined sample. - * When {@code strict} is true, this is equivalent to the algorithm implemented in the R function - * {@code ks.boot}, described in
+     * Kolmogorov-Smirnov test
+     * evaluating the null hypothesis that {@code x} and {@code y} are samples
+     * drawn from the same probability distribution.
+     * This method estimates the p-value by repeatedly sampling sets of size
+     * {@code x.length} and {@code y.length} from the empirical distribution
+     * of the combined sample.
+     * When {@code strict} is true, this is equivalent to the algorithm implemented
+     * in the R function {@code ks.boot}, described in 
      * Jasjeet S. Sekhon. 2011. 'Multivariate and Propensity Score Matching
      * Software with Automated Balance Optimization: The Matching package for R.'
      * Journal of Statistical Software, 42(7): 1-52.
      * 
- * @param x first sample - * @param y second sample - * @param iterations number of bootstrap resampling iterations - * @param strict whether or not the null hypothesis is expressed as a strict inequality - * @return estimated p-value + * + * @param x First sample. + * @param y Second sample. + * @param iterations Number of bootstrap resampling iterations. + * @param strict Whether or not the null hypothesis is expressed as a strict inequality. + * @param rng RNG for creating the sampling sets. + * @return the estimated p-value. */ - public double bootstrap(double[] x, double[] y, int iterations, boolean strict) { + public double bootstrap(double[] x, + double[] y, + int iterations, + boolean strict, + UniformRandomProvider rng) { final int xLength = x.length; final int yLength = y.length; final double[] combined = new double[xLength + yLength]; @@ -441,20 +424,6 @@ public class KolmogorovSmirnovTest { (greaterCount + equalCount) / (double) iterations; } - /** - * Computes {@code bootstrap(x, y, iterations, true)}. - * This is equivalent to ks.boot(x,y, nboots=iterations) using the R Matching - * package function. See #bootstrap(double[], double[], int, boolean). - * - * @param x first sample - * @param y second sample - * @param iterations number of bootstrap resampling iterations - * @return estimated p-value - */ - public double bootstrap(double[] x, double[] y, int iterations) { - return bootstrap(x, y, iterations, true); - } - /** * Calculates \(P(D_n < d)\) using the method described in [1] with quick decisions for extreme * values given in [2] (see above). The result is not exact as with @@ -1061,36 +1030,45 @@ public class KolmogorovSmirnovTest { * {@code d} if {@code strict} is {@code false}. *

* - * @param d D-statistic value - * @param n first sample size - * @param m second sample size - * @param iterations number of random partitions to generate + * @param d D-statistic value. + * @param n First sample size. + * @param m Second sample size. + * @param iterations Number of random partitions to generate. * @param strict whether or not the probability to compute is expressed as a strict inequality + * @param rng RNG used for generating the partitions. * @return proportion of randomly generated m-n partitions of m + n that result in \(D_{n,m}\) - * greater than (resp. greater than or equal to) {@code d} + * greater than (resp. greater than or equal to) {@code d}. */ - public double monteCarloP(final double d, final int n, final int m, final boolean strict, - final int iterations) { - return integralMonteCarloP(calculateIntegralD(d, n, m, strict), n, m, iterations); + public double monteCarloP(final double d, + final int n, + final int m, + final boolean strict, + final int iterations, + UniformRandomProvider rng) { + return integralMonteCarloP(calculateIntegralD(d, n, m, strict), n, m, iterations, rng); } /** - * Uses Monte Carlo simulation to approximate \(P(D_{n,m} >= d/(n*m))\) where \(D_{n,m}\) is the - * 2-sample Kolmogorov-Smirnov statistic. + * Uses Monte Carlo simulation to approximate \(P(D_{n,m} >= d / (n * m))\) + * where \(D_{n,m}\) is the 2-sample Kolmogorov-Smirnov statistic. *

- * Here d is the D-statistic represented as long value. - * The real D-statistic is obtained by dividing d by n*m. - * See also {@link #monteCarloP(double, int, int, boolean, int)}. + * Here {@code d} is the D-statistic represented as long value. + * The real D-statistic is obtained by dividing {@code d} by {@code n * m}. + * See also {@link #monteCarloP(double,int,int,boolean,int,UniformRandomProvider)}. * - * @param d integral D-statistic - * @param n first sample size - * @param m second sample size - * @param iterations number of random partitions to generate + * @param d Integral D-statistic. + * @param n First sample size. + * @param m Second sample size. + * @param iterations Number of random partitions to generate. + * @param rng RNG used for generating the partitions. * @return proportion of randomly generated m-n partitions of m + n that result in \(D_{n,m}\) - * greater than or equal to {@code d/(n*m))} + * greater than or equal to {@code d / (n * m))}. */ - private double integralMonteCarloP(final long d, final int n, final int m, final int iterations) { - + private double integralMonteCarloP(final long d, + final int n, + final int m, + final int iterations, + UniformRandomProvider rng) { // ensure that nn is always the max of (n, m) to require fewer random numbers final int nn = FastMath.max(n, m); final int mm = FastMath.min(n, m); diff --git a/src/test/java/org/apache/commons/math4/distribution/BetaDistributionTest.java b/src/test/java/org/apache/commons/math4/distribution/BetaDistributionTest.java index d414db493..cf5b58f7e 100644 --- a/src/test/java/org/apache/commons/math4/distribution/BetaDistributionTest.java +++ b/src/test/java/org/apache/commons/math4/distribution/BetaDistributionTest.java @@ -354,7 +354,7 @@ public class BetaDistributionTest { Assert.assertFalse("G goodness-of-fit test rejected null at alpha = " + level, gTest(betaDistribution, observed) < level); Assert.assertFalse("KS goodness-of-fit test rejected null at alpha = " + level, - new KolmogorovSmirnovTest(RandomSource.JDK, 3448845623L).kolmogorovSmirnovTest(betaDistribution, observed) < level); + new KolmogorovSmirnovTest().kolmogorovSmirnovTest(betaDistribution, observed) < level); } } } diff --git a/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java b/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java index 4ad08721f..46b876425 100644 --- a/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java +++ b/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java @@ -319,7 +319,8 @@ public class KolmogorovSmirnovTestTest { */ @Test public void testTwoSampleMonteCarlo() { - final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 1000); + final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(); + final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000); final int sampleSize = 14; final double tol = .001; final double[] shortUniform = new double[sampleSize]; @@ -336,9 +337,9 @@ public class KolmogorovSmirnovTestTest { double exactPStrict = test.exactP(dv, sampleSize, sampleSize, true); double exactPNonStrict = test.exactP(dv, sampleSize, sampleSize, false); double montePStrict = test.monteCarloP(dv, sampleSize, sampleSize, true, - KolmogorovSmirnovTest.MONTE_CARLO_ITERATIONS); + KolmogorovSmirnovTest.MONTE_CARLO_ITERATIONS, rng); double montePNonStrict = test.monteCarloP(dv, sampleSize, sampleSize, false, - KolmogorovSmirnovTest.MONTE_CARLO_ITERATIONS); + KolmogorovSmirnovTest.MONTE_CARLO_ITERATIONS, rng); Assert.assertEquals(exactPStrict, montePStrict, tol); Assert.assertEquals(exactPNonStrict, montePNonStrict, tol); } @@ -346,7 +347,8 @@ public class KolmogorovSmirnovTestTest { @Test public void testTwoSampleMonteCarloDifferentSampleSizes() { - final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 1000); + final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(); + final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000); final int sampleSize1 = 14; final int sampleSize2 = 7; final double d = 0.3; @@ -354,7 +356,7 @@ public class KolmogorovSmirnovTestTest { final double tol = 1e-2; Assert.assertEquals(test.exactP(d, sampleSize1, sampleSize2, strict), test.monteCarloP(d, sampleSize1, sampleSize2, strict, - KolmogorovSmirnovTest.MONTE_CARLO_ITERATIONS), + KolmogorovSmirnovTest.MONTE_CARLO_ITERATIONS, rng), tol); } @@ -365,11 +367,12 @@ public class KolmogorovSmirnovTestTest { public void testTwoSampleMonteCarloPerformance() { int numIterations = 100_000; int N = (int)Math.sqrt(KolmogorovSmirnovTest.LARGE_SAMPLE_PRODUCT); - final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 1000); + final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(); + final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000); for (int n = 2; n <= N; ++n) { long startMillis = System.currentTimeMillis(); int m = KolmogorovSmirnovTest.LARGE_SAMPLE_PRODUCT/n; - Assert.assertEquals(0d, test.monteCarloP(Double.POSITIVE_INFINITY, n, m, true, numIterations), 0d); + Assert.assertEquals(0d, test.monteCarloP(Double.POSITIVE_INFINITY, n, m, true, numIterations, rng), 0d); long endMillis = System.currentTimeMillis(); System.out.println("n=" + n + ", m=" + m + ", time=" + (endMillis-startMillis)/1000d + "s"); } @@ -531,6 +534,7 @@ public class KolmogorovSmirnovTestTest { public void testTwoSamplesAllEqual() { int iterations = 10_000; final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(); + final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000); for (int i = 2; i < 30; ++i) { // testing values with ties double[] values = new double[i]; @@ -549,8 +553,8 @@ public class KolmogorovSmirnovTestTest { Assert.assertEquals(1.0, test.exactP(0, values.length, values.length, false), 0.); } - Assert.assertEquals(1.0, test.monteCarloP(0, values.length, values.length, true, iterations), 0.); - Assert.assertEquals(1.0, test.monteCarloP(0, values.length, values.length, false, iterations), 0.); + Assert.assertEquals(1.0, test.monteCarloP(0, values.length, values.length, true, iterations, rng), 0.); + Assert.assertEquals(1.0, test.monteCarloP(0, values.length, values.length, false, iterations, rng), 0.); Assert.assertEquals(1.0, test.approximateP(0, values.length, values.length), 0.); Assert.assertEquals(1.0, test.approximateP(0, values.length, values.length), 0.); @@ -590,22 +594,23 @@ public class KolmogorovSmirnovTestTest { public void testDRoundingMonteCarlo() { final double tol = 1e-2; final int iterations = 1000000; - final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 1000); + final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(); + final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000); final double[] x = {0, 2, 3, 4, 5, 6, 7, 8, 9, 12}; final double[] y = {1, 10, 11, 13, 14, 15, 16, 17, 18}; double d = test.kolmogorovSmirnovStatistic(x, y); - Assert.assertEquals(0.0027495724090154106, test.monteCarloP(d, x.length, y.length, false, iterations), tol); + Assert.assertEquals(0.0027495724090154106, test.monteCarloP(d, x.length, y.length, false, iterations, rng), tol); final double[] x1 = {2, 4, 6, 8, 9, 10, 11, 12, 13}; final double[] y1 = {0, 1, 3, 5, 7}; d = test.kolmogorovSmirnovStatistic(x1, y1); - Assert.assertEquals(0.085914085914085896, test.monteCarloP(d, x1.length, y1.length, false, iterations), tol); + Assert.assertEquals(0.085914085914085896, test.monteCarloP(d, x1.length, y1.length, false, iterations, rng), tol); final double[] x2 = {4, 6, 7, 8, 9, 10, 11}; final double[] y2 = {0, 1, 2, 3, 5}; d = test.kolmogorovSmirnovStatistic(x2, y2); - Assert.assertEquals(0.015151515151515027, test.monteCarloP(d, x2.length, y2.length, false, iterations), tol); + Assert.assertEquals(0.015151515151515027, test.monteCarloP(d, x2.length, y2.length, false, iterations, rng), tol); } @Test @@ -669,8 +674,9 @@ public class KolmogorovSmirnovTestTest { public void testBootstrapSmallSamplesWithTies() { final double[] x = {0, 2, 4, 6, 8, 8, 10, 15, 22, 30, 33, 36, 38}; final double[] y = {9, 17, 20, 33, 40, 51, 60, 60, 72, 90, 101}; - final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 2000); - Assert.assertEquals(0.0059, test.bootstrap(x, y, 10000, false), 1E-3); + final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(); + final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 2000); + Assert.assertEquals(0.0059, test.bootstrap(x, y, 10000, false, rng), 1E-3); } /** @@ -679,8 +685,9 @@ public class KolmogorovSmirnovTestTest { */ @Test public void testBootstrapLargeSamples() { - final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 1000); - Assert.assertEquals(0.0237, test.bootstrap(gaussian, gaussian2, 10000), 1E-2); + final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(); + final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000); + Assert.assertEquals(0.0237, test.bootstrap(gaussian, gaussian2, 10000, true, rng), 1E-2); } /** @@ -692,8 +699,9 @@ public class KolmogorovSmirnovTestTest { public void testBootstrapRounding() { final double[] x = {2,4,6,8,9,10,11,12,13}; final double[] y = {0,1,3,5,7}; - final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 1000); - Assert.assertEquals(0.06303, test.bootstrap(x, y, 10000, false), 1E-2); + final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(); + final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000); + Assert.assertEquals(0.06303, test.bootstrap(x, y, 10000, false, rng), 1E-2); } @Test