Removed deprecated instance field and associated contructors.

The RNG instance is passed as argument to the methods that require it.
2017-05-10 14:41:17 +02:00 · 2017-05-10 14:41:17 +02:00 · 10e3811403
parent 5d87a88952
commit 10e3811403
4 changed files with 89 additions and 101 deletions
--- a/src/main/java/org/apache/commons/math4/stat/inference/InferenceTestUtils.java
+++ b/src/main/java/org/apache/commons/math4/stat/inference/InferenceTestUtils.java
@ -18,6 +18,7 @@ package org.apache.commons.math4.stat.inference;

 import java.util.Collection;

+import org.apache.commons.rng.UniformRandomProvider;
 import org.apache.commons.math4.distribution.RealDistribution;
 import org.apache.commons.math4.exception.ConvergenceException;
 import org.apache.commons.math4.exception.DimensionMismatchException;
@ -728,13 +729,14 @@ public class InferenceTestUtils {
     * @param m second sample size
     * @param iterations number of random partitions to generate
     * @param strict whether or not the probability to compute is expressed as a strict inequality
+     * @param rng RNG used for generating the partitions.
     * @return proportion of randomly generated m-n partitions of m + n that result in \(D_{n,m}\)
-     *         greater than (resp. greater than or equal to) {@code d}
-     * @see org.apache.commons.math4.stat.inference.KolmogorovSmirnovTest#monteCarloP(double, int, int, boolean, int)
+     * greater than (resp. greater than or equal to) {@code d}
+     * @see org.apache.commons.math4.stat.inference.KolmogorovSmirnovTest#monteCarloP(double,int,int,boolean,int,UniformRandomProvider)
     * @since 3.3
     */
-    public static double monteCarloP(double d, int n, int m, boolean strict, int iterations) {
-        return KS_TEST.monteCarloP(d, n, m, strict, iterations);
+    public static double monteCarloP(double d, int n, int m, boolean strict, int iterations, UniformRandomProvider rng) {
+        return KS_TEST.monteCarloP(d, n, m, strict, iterations, rng);
    }


--- a/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java
+++ b/src/main/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTest.java
@ -20,6 +20,8 @@ package org.apache.commons.math4.stat.inference;
 import java.math.BigDecimal;
 import java.util.Arrays;

+import org.apache.commons.rng.simple.RandomSource;
+import org.apache.commons.rng.UniformRandomProvider;
 import org.apache.commons.math4.distribution.EnumeratedRealDistribution;
 import org.apache.commons.math4.distribution.RealDistribution;
 import org.apache.commons.math4.distribution.AbstractRealDistribution;
@ -39,8 +41,6 @@ import org.apache.commons.math4.linear.Array2DRowFieldMatrix;
 import org.apache.commons.math4.linear.FieldMatrix;
 import org.apache.commons.math4.linear.MatrixUtils;
 import org.apache.commons.math4.linear.RealMatrix;
-import org.apache.commons.rng.simple.RandomSource;
-import org.apache.commons.rng.UniformRandomProvider;
 import org.apache.commons.math4.util.CombinatoricsUtils;
 import org.apache.commons.math4.util.FastMath;
 import org.apache.commons.math4.util.MathArrays;
@ -76,7 +76,7 @@ import org.apache.commons.math4.util.MathUtils;
 * </ul><p>
 * If the product of the sample sizes is less than {@value #LARGE_SAMPLE_PRODUCT} and the sample
 * data contains ties, random jitter is added to the sample data to break ties before applying
- * the algorithm above. Alternatively, the {@link #bootstrap(double[], double[], int, boolean)}
+ * the algorithm above. Alternatively, the {@link #bootstrap(double[],double[],int,boolean,UniformRandomProvider)}
 * method, modeled after <a href="http://sekhon.berkeley.edu/matching/ks.boot.html">ks.boot</a>
 * in the R Matching package [3], can be used if ties are known to be present in the data.
 * </p>
@ -137,36 +137,11 @@ public class KolmogorovSmirnovTest {
     */
    protected static final int LARGE_SAMPLE_PRODUCT = 10000;

-    /** Default number of iterations used by {@link #monteCarloP(double, int, int, boolean, int)}.
+    /** Default number of iterations used by {@link #monteCarloP(double,int,int,boolean,int,UniformRandomProvider)}.
     *  Deprecated as of version 3.6, as this method is no longer needed. */
    @Deprecated
    protected static final int MONTE_CARLO_ITERATIONS = 1000000;

-    /** No longer used. */
-    @Deprecated
-    private final UniformRandomProvider rng;
-
-    /**
-     * Construct a KolmogorovSmirnovTest instance with a default random data generator.
-     */
-    public KolmogorovSmirnovTest() {
-        rng = RandomSource.create(RandomSource.WELL_19937_C);
-    }
-
-    /**
-     * Construct a KolmogorovSmirnovTest with the provided random data generator.
-     * The #monteCarloP(double, int, int, boolean, int) that uses the generator supplied to this
-     * constructor is deprecated as of version 3.6.
-     *
-     * @param source random data generator used by {@link #monteCarloP(double, int, int, boolean, int)}
-     * @param seed Seed.
-     */
-    @Deprecated
-    public KolmogorovSmirnovTest(RandomSource source,
-                                 long seed) {
-        rng = RandomSource.create(source, seed);
-    }
-
    /**
     * Computes the <i>p-value</i>, or <i>observed significance level</i>, of a one-sample <a
     * href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a>
@ -239,7 +214,7 @@ public class KolmogorovSmirnovTest {
     * on (-minDelta / 2, minDelta / 2) where minDelta is the smallest pairwise difference between
     * values in the combined sample.</p>
     * <p>
-     * If ties are known to be present in the data, {@link #bootstrap(double[], double[], int, boolean)}
+     * If ties are known to be present in the data, {@link #bootstrap(double[],double[],int,boolean,UniformRandomProvider)}
     * may be used as an alternative method for estimating the p-value.</p>
     *
     * @param x first sample dataset.
@ -252,7 +227,7 @@ public class KolmogorovSmirnovTest {
     * not have length at least 2.
     * @throws NullArgumentException if either {@code x} or {@code y} is null.
     * @throws NotANumberException if the input arrays contain NaN values.
-     * @see #bootstrap(double[], double[], int, boolean)
+     * @see #bootstrap(double[],double[],int,boolean,UniformRandomProvider)
     */
    public double kolmogorovSmirnovTest(double[] x, double[] y, boolean strict) {
        final long lengthProduct = (long) x.length * y.length;
@ -398,23 +373,31 @@ public class KolmogorovSmirnovTest {

    /**
     * Estimates the <i>p-value</i> of a two-sample
-     * <a href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a>
-     * evaluating the null hypothesis that {@code x} and {@code y} are samples drawn from the same
-     * probability distribution. This method estimates the p-value by repeatedly sampling sets of size
-     * {@code x.length} and {@code y.length} from the empirical distribution of the combined sample.
-     * When {@code strict} is true, this is equivalent to the algorithm implemented in the R function
-     * {@code ks.boot}, described in <pre>
+     * <a href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test">Kolmogorov-Smirnov test</a>
+     * evaluating the null hypothesis that {@code x} and {@code y} are samples
+     * drawn from the same probability distribution.
+     * This method estimates the p-value by repeatedly sampling sets of size
+     * {@code x.length} and {@code y.length} from the empirical distribution
+     * of the combined sample.
+     * When {@code strict} is true, this is equivalent to the algorithm implemented
+     * in the R function {@code ks.boot}, described in <pre>
     * Jasjeet S. Sekhon. 2011. 'Multivariate and Propensity Score Matching
     * Software with Automated Balance Optimization: The Matching package for R.'
     * Journal of Statistical Software, 42(7): 1-52.
     * </pre>
-     * @param x first sample
-     * @param y second sample
-     * @param iterations number of bootstrap resampling iterations
-     * @param strict whether or not the null hypothesis is expressed as a strict inequality
-     * @return estimated p-value
+     *
+     * @param x First sample.
+     * @param y Second sample.
+     * @param iterations Number of bootstrap resampling iterations.
+     * @param strict Whether or not the null hypothesis is expressed as a strict inequality.
+     * @param rng RNG for creating the sampling sets.
+     * @return the estimated p-value.
     */
-    public double bootstrap(double[] x, double[] y, int iterations, boolean strict) {
+    public double bootstrap(double[] x,
+                            double[] y,
+                            int iterations,
+                            boolean strict,
+                            UniformRandomProvider rng) {
        final int xLength = x.length;
        final int yLength = y.length;
        final double[] combined = new double[xLength + yLength];
@ -441,20 +424,6 @@ public class KolmogorovSmirnovTest {
            (greaterCount + equalCount) / (double) iterations;
    }

-    /**
-     * Computes {@code bootstrap(x, y, iterations, true)}.
-     * This is equivalent to ks.boot(x,y, nboots=iterations) using the R Matching
-     * package function. See #bootstrap(double[], double[], int, boolean).
-     *
-     * @param x first sample
-     * @param y second sample
-     * @param iterations number of bootstrap resampling iterations
-     * @return estimated p-value
-     */
-    public double bootstrap(double[] x, double[] y, int iterations) {
-        return bootstrap(x, y, iterations, true);
-    }
-
    /**
     * Calculates \(P(D_n &lt; d)\) using the method described in [1] with quick decisions for extreme
     * values given in [2] (see above). The result is not exact as with
@ -1061,36 +1030,45 @@ public class KolmogorovSmirnovTest {
     * {@code d} if {@code strict} is {@code false}.
     * </p>
     *
-     * @param d D-statistic value
-     * @param n first sample size
-     * @param m second sample size
-     * @param iterations number of random partitions to generate
+     * @param d D-statistic value.
+     * @param n First sample size.
+     * @param m Second sample size.
+     * @param iterations Number of random partitions to generate.
     * @param strict whether or not the probability to compute is expressed as a strict inequality
+     * @param rng RNG used for generating the partitions.
     * @return proportion of randomly generated m-n partitions of m + n that result in \(D_{n,m}\)
-     *         greater than (resp. greater than or equal to) {@code d}
+     * greater than (resp. greater than or equal to) {@code d}.
     */
-    public double monteCarloP(final double d, final int n, final int m, final boolean strict,
-                              final int iterations) {
-        return integralMonteCarloP(calculateIntegralD(d, n, m, strict), n, m, iterations);
+    public double monteCarloP(final double d,
+                              final int n,
+                              final int m,
+                              final boolean strict,
+                              final int iterations,
+                              UniformRandomProvider rng) {
+        return integralMonteCarloP(calculateIntegralD(d, n, m, strict), n, m, iterations, rng);
    }

    /**
-     * Uses Monte Carlo simulation to approximate \(P(D_{n,m} >= d/(n*m))\) where \(D_{n,m}\) is the
-     * 2-sample Kolmogorov-Smirnov statistic.
+     * Uses Monte Carlo simulation to approximate \(P(D_{n,m} >= d / (n * m))\)
+     * where \(D_{n,m}\) is the 2-sample Kolmogorov-Smirnov statistic.
     * <p>
-     * Here d is the D-statistic represented as long value.
-     * The real D-statistic is obtained by dividing d by n*m.
-     * See also {@link #monteCarloP(double, int, int, boolean, int)}.
+     * Here {@code d} is the D-statistic represented as long value.
+     * The real D-statistic is obtained by dividing {@code d} by {@code n * m}.
+     * See also {@link #monteCarloP(double,int,int,boolean,int,UniformRandomProvider)}.
     *
-     * @param d integral D-statistic
-     * @param n first sample size
-     * @param m second sample size
-     * @param iterations number of random partitions to generate
+     * @param d Integral D-statistic.
+     * @param n First sample size.
+     * @param m Second sample size.
+     * @param iterations Number of random partitions to generate.
+     * @param rng RNG used for generating the partitions.
     * @return proportion of randomly generated m-n partitions of m + n that result in \(D_{n,m}\)
-     *         greater than or equal to {@code d/(n*m))}
+     * greater than or equal to {@code d / (n * m))}.
     */
-    private double integralMonteCarloP(final long d, final int n, final int m, final int iterations) {
-
+    private double integralMonteCarloP(final long d,
+                                       final int n,
+                                       final int m,
+                                       final int iterations,
+                                       UniformRandomProvider rng) {
        // ensure that nn is always the max of (n, m) to require fewer random numbers
        final int nn = FastMath.max(n, m);
        final int mm = FastMath.min(n, m);
--- a/src/test/java/org/apache/commons/math4/distribution/BetaDistributionTest.java
+++ b/src/test/java/org/apache/commons/math4/distribution/BetaDistributionTest.java
@ -354,7 +354,7 @@ public class BetaDistributionTest {
                Assert.assertFalse("G goodness-of-fit test rejected null at alpha = " + level,
                                   gTest(betaDistribution, observed) < level);
                Assert.assertFalse("KS goodness-of-fit test rejected null at alpha = " + level,
-                                   new KolmogorovSmirnovTest(RandomSource.JDK, 3448845623L).kolmogorovSmirnovTest(betaDistribution, observed) < level);
+                                   new KolmogorovSmirnovTest().kolmogorovSmirnovTest(betaDistribution, observed) < level);
            }
        }
    }
--- a/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java
+++ b/src/test/java/org/apache/commons/math4/stat/inference/KolmogorovSmirnovTestTest.java
@ -319,7 +319,8 @@ public class KolmogorovSmirnovTestTest {
     */
    @Test
    public void testTwoSampleMonteCarlo() {
-        final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 1000);
+        final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
+        final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000);
        final int sampleSize = 14;
        final double tol = .001;
        final double[] shortUniform = new double[sampleSize];
@ -336,9 +337,9 @@ public class KolmogorovSmirnovTestTest {
            double exactPStrict = test.exactP(dv, sampleSize, sampleSize, true);
            double exactPNonStrict = test.exactP(dv, sampleSize, sampleSize, false);
            double montePStrict = test.monteCarloP(dv, sampleSize, sampleSize, true,
-                                                   KolmogorovSmirnovTest.MONTE_CARLO_ITERATIONS);
+                                                   KolmogorovSmirnovTest.MONTE_CARLO_ITERATIONS, rng);
            double montePNonStrict = test.monteCarloP(dv, sampleSize, sampleSize, false,
-                                                      KolmogorovSmirnovTest.MONTE_CARLO_ITERATIONS);
+                                                      KolmogorovSmirnovTest.MONTE_CARLO_ITERATIONS, rng);
            Assert.assertEquals(exactPStrict, montePStrict, tol);
            Assert.assertEquals(exactPNonStrict, montePNonStrict, tol);
        }
@ -346,7 +347,8 @@ public class KolmogorovSmirnovTestTest {

    @Test
    public void testTwoSampleMonteCarloDifferentSampleSizes() {
-        final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 1000);
+        final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
+        final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000);
        final int sampleSize1 = 14;
        final int sampleSize2 = 7;
        final double d = 0.3;
@ -354,7 +356,7 @@ public class KolmogorovSmirnovTestTest {
        final double tol = 1e-2;
        Assert.assertEquals(test.exactP(d, sampleSize1, sampleSize2, strict),
                            test.monteCarloP(d, sampleSize1, sampleSize2, strict,
-                                             KolmogorovSmirnovTest.MONTE_CARLO_ITERATIONS),
+                                             KolmogorovSmirnovTest.MONTE_CARLO_ITERATIONS, rng),
                            tol);
    }

@ -365,11 +367,12 @@ public class KolmogorovSmirnovTestTest {
    public void testTwoSampleMonteCarloPerformance() {
        int numIterations = 100_000;
        int N = (int)Math.sqrt(KolmogorovSmirnovTest.LARGE_SAMPLE_PRODUCT);
-        final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 1000);
+        final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
+        final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000);
        for (int n = 2; n <= N; ++n) {
            long startMillis = System.currentTimeMillis();
            int m = KolmogorovSmirnovTest.LARGE_SAMPLE_PRODUCT/n;
-            Assert.assertEquals(0d, test.monteCarloP(Double.POSITIVE_INFINITY, n, m, true, numIterations), 0d);
+            Assert.assertEquals(0d, test.monteCarloP(Double.POSITIVE_INFINITY, n, m, true, numIterations, rng), 0d);
            long endMillis = System.currentTimeMillis();
            System.out.println("n=" + n + ", m=" + m + ", time=" + (endMillis-startMillis)/1000d + "s");
        }
@ -531,6 +534,7 @@ public class KolmogorovSmirnovTestTest {
    public void testTwoSamplesAllEqual() {
        int iterations = 10_000;
        final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
+        final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000);
        for (int i = 2; i < 30; ++i) {
            // testing values with ties
            double[] values = new double[i];
@ -549,8 +553,8 @@ public class KolmogorovSmirnovTestTest {
                Assert.assertEquals(1.0, test.exactP(0, values.length, values.length, false), 0.);
            }

-            Assert.assertEquals(1.0, test.monteCarloP(0, values.length, values.length, true, iterations), 0.);
-            Assert.assertEquals(1.0, test.monteCarloP(0, values.length, values.length, false, iterations), 0.);
+            Assert.assertEquals(1.0, test.monteCarloP(0, values.length, values.length, true, iterations, rng), 0.);
+            Assert.assertEquals(1.0, test.monteCarloP(0, values.length, values.length, false, iterations, rng), 0.);

            Assert.assertEquals(1.0, test.approximateP(0, values.length, values.length), 0.);
            Assert.assertEquals(1.0, test.approximateP(0, values.length, values.length), 0.);
@ -590,22 +594,23 @@ public class KolmogorovSmirnovTestTest {
    public void testDRoundingMonteCarlo() {
        final double tol = 1e-2;
        final int iterations = 1000000;
-        final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 1000);
+        final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
+        final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000);

        final double[] x = {0, 2, 3, 4, 5, 6, 7, 8, 9, 12};
        final double[] y = {1, 10, 11, 13, 14, 15, 16, 17, 18};
        double d = test.kolmogorovSmirnovStatistic(x, y);
-        Assert.assertEquals(0.0027495724090154106, test.monteCarloP(d, x.length, y.length, false, iterations), tol);
+        Assert.assertEquals(0.0027495724090154106, test.monteCarloP(d, x.length, y.length, false, iterations, rng), tol);

        final double[] x1 = {2, 4, 6, 8, 9, 10, 11, 12, 13};
        final double[] y1 = {0, 1, 3, 5, 7};
        d = test.kolmogorovSmirnovStatistic(x1, y1);
-        Assert.assertEquals(0.085914085914085896, test.monteCarloP(d, x1.length, y1.length, false, iterations), tol);
+        Assert.assertEquals(0.085914085914085896, test.monteCarloP(d, x1.length, y1.length, false, iterations, rng), tol);

        final double[] x2 = {4, 6, 7, 8, 9, 10, 11};
        final double[] y2 = {0, 1, 2, 3, 5};
        d = test.kolmogorovSmirnovStatistic(x2, y2);
-        Assert.assertEquals(0.015151515151515027, test.monteCarloP(d, x2.length, y2.length, false, iterations), tol);
+        Assert.assertEquals(0.015151515151515027, test.monteCarloP(d, x2.length, y2.length, false, iterations, rng), tol);
    }

    @Test
@ -669,8 +674,9 @@ public class KolmogorovSmirnovTestTest {
    public void testBootstrapSmallSamplesWithTies() {
        final double[] x = {0, 2, 4, 6, 8, 8, 10, 15, 22, 30, 33, 36, 38};
        final double[] y = {9, 17, 20, 33, 40, 51, 60, 60, 72, 90, 101};
-        final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 2000);
-        Assert.assertEquals(0.0059, test.bootstrap(x, y, 10000, false), 1E-3);
+        final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
+        final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 2000);
+        Assert.assertEquals(0.0059, test.bootstrap(x, y, 10000, false, rng), 1E-3);
    }

    /**
@ -679,8 +685,9 @@ public class KolmogorovSmirnovTestTest {
     */
    @Test
    public void testBootstrapLargeSamples() {
-        final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 1000);
-        Assert.assertEquals(0.0237, test.bootstrap(gaussian, gaussian2, 10000), 1E-2);
+        final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
+        final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000);
+        Assert.assertEquals(0.0237, test.bootstrap(gaussian, gaussian2, 10000, true, rng), 1E-2);
    }

    /**
@ -692,8 +699,9 @@ public class KolmogorovSmirnovTestTest {
    public void testBootstrapRounding() {
        final double[] x = {2,4,6,8,9,10,11,12,13};
        final double[] y = {0,1,3,5,7};
-        final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(RandomSource.WELL_19937_C, 1000);
-        Assert.assertEquals(0.06303, test.bootstrap(x, y, 10000, false), 1E-2);
+        final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
+        final UniformRandomProvider rng = RandomSource.create(RandomSource.WELL_19937_C, 1000);
+        Assert.assertEquals(0.06303, test.bootstrap(x, y, 10000, false, rng), 1E-2);
    }

    @Test