MATH-1443: Depend on "Commons Statistics".

This commit is contained in:
Gilles 2018-01-25 17:08:40 +01:00
parent bd15dc78c7
commit c4218b8385
16 changed files with 61 additions and 51 deletions

View File

@ -18,6 +18,7 @@ package org.apache.commons.math4.distribution;
import java.io.Serializable;
import org.apache.commons.statistics.distribution.DiscreteDistribution;
import org.apache.commons.math4.exception.MathInternalError;
import org.apache.commons.math4.exception.NumberIsTooLargeException;
import org.apache.commons.math4.exception.OutOfRangeException;
@ -186,7 +187,7 @@ public abstract class AbstractIntegerDistribution implements IntegerDistribution
* @return an array of size {@code n}.
*/
public static int[] sample(int n,
IntegerDistribution.Sampler sampler) {
DiscreteDistribution.Sampler sampler) {
final int[] samples = new int[n];
for (int i = 0; i < n; i++) {
samples[i] = sampler.sample();
@ -196,7 +197,7 @@ public abstract class AbstractIntegerDistribution implements IntegerDistribution
/**{@inheritDoc} */
@Override
public IntegerDistribution.Sampler createSampler(final UniformRandomProvider rng) {
public Sampler createSampler(final UniformRandomProvider rng) {
return new IntegerDistribution.Sampler() {
/**
* Inversion method distribution sampler.

View File

@ -18,6 +18,7 @@ package org.apache.commons.math4.distribution;
import java.io.Serializable;
import org.apache.commons.statistics.distribution.ContinuousDistribution;
import org.apache.commons.math4.analysis.UnivariateFunction;
import org.apache.commons.math4.analysis.solvers.UnivariateSolverUtils;
import org.apache.commons.math4.exception.NumberIsTooLargeException;
@ -234,7 +235,7 @@ public abstract class AbstractRealDistribution
* @return an array of size {@code n}.
*/
public static double[] sample(int n,
RealDistribution.Sampler sampler) {
ContinuousDistribution.Sampler sampler) {
final double[] samples = new double[n];
for (int i = 0; i < n; i++) {
samples[i] = sampler.sample();
@ -244,7 +245,7 @@ public abstract class AbstractRealDistribution
/**{@inheritDoc} */
@Override
public RealDistribution.Sampler createSampler(final UniformRandomProvider rng) {
public Sampler createSampler(final UniformRandomProvider rng) {
return new RealDistribution.Sampler() {
/**
* Inversion method distribution sampler.

View File

@ -21,7 +21,9 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.io.Serializable;
import org.apache.commons.statistics.distribution.ContinuousDistribution;
import org.apache.commons.math4.exception.DimensionMismatchException;
import org.apache.commons.math4.exception.MathArithmeticException;
import org.apache.commons.math4.exception.NotANumberException;
@ -41,8 +43,9 @@ import org.apache.commons.math4.util.Pair;
*
* @since 3.2
*/
public class EnumeratedRealDistribution extends AbstractRealDistribution {
public class EnumeratedRealDistribution
implements ContinuousDistribution,
Serializable {
/** Serializable UID. */
private static final long serialVersionUID = 20160311L;
@ -195,7 +198,7 @@ public class EnumeratedRealDistribution extends AbstractRealDistribution {
* @return {@code sum(singletons[i] * probabilities[i])}
*/
@Override
public double getNumericalMean() {
public double getMean() {
double mean = 0;
for (final Pair<Double, Double> sample : innerDistribution.getPmf()) {
@ -211,7 +214,7 @@ public class EnumeratedRealDistribution extends AbstractRealDistribution {
* @return {@code sum((singletons[i] - mean) ^ 2 * probabilities[i])}
*/
@Override
public double getNumericalVariance() {
public double getVariance() {
double mean = 0;
double meanOfSquares = 0;
@ -275,8 +278,8 @@ public class EnumeratedRealDistribution extends AbstractRealDistribution {
/** {@inheritDoc} */
@Override
public RealDistribution.Sampler createSampler(final UniformRandomProvider rng) {
return new RealDistribution.Sampler() {
public ContinuousDistribution.Sampler createSampler(final UniformRandomProvider rng) {
return new ContinuousDistribution.Sampler() {
/** Delegate. */
private final EnumeratedDistribution<Double>.Sampler inner =
innerDistribution.createSampler(rng);

View File

@ -16,6 +16,7 @@
*/
package org.apache.commons.math4.distribution;
import org.apache.commons.statistics.distribution.DiscreteDistribution;
import org.apache.commons.math4.exception.NumberIsTooLargeException;
import org.apache.commons.math4.exception.OutOfRangeException;
import org.apache.commons.rng.UniformRandomProvider;
@ -157,7 +158,7 @@ public interface IntegerDistribution {
/**
* Sampling functionality.
*/
interface Sampler {
interface Sampler extends DiscreteDistribution.Sampler {
/**
* Generates a random value sampled from this distribution.
*

View File

@ -16,6 +16,7 @@
*/
package org.apache.commons.math4.distribution;
import org.apache.commons.statistics.distribution.ContinuousDistribution;
import org.apache.commons.math4.exception.NumberIsTooLargeException;
import org.apache.commons.math4.exception.OutOfRangeException;
import org.apache.commons.rng.UniformRandomProvider;
@ -172,7 +173,7 @@ public interface RealDistribution {
/**
* Sampling functionality.
*/
interface Sampler {
interface Sampler extends ContinuousDistribution.Sampler {
/**
* Generates a random value sampled from this distribution.
*

View File

@ -238,7 +238,7 @@ public class CMAESOptimizer
this.isActiveCMA = isActiveCMA;
this.diagonalOnly = diagonalOnly;
this.checkFeasableCount = checkFeasableCount;
this.random = new NormalDistribution().createSampler(rng);
this.random = new NormalDistribution(0, 1).createSampler(rng);
this.generateStatistics = generateStatistics;
}

View File

@ -19,7 +19,7 @@ package org.apache.commons.math4.stat.inference;
import java.util.Collection;
import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.math4.distribution.RealDistribution;
import org.apache.commons.statistics.distribution.ContinuousDistribution;
import org.apache.commons.math4.exception.ConvergenceException;
import org.apache.commons.math4.exception.DimensionMismatchException;
import org.apache.commons.math4.exception.InsufficientDataException;
@ -606,10 +606,10 @@ public class InferenceTestUtils {
* @param dist reference distribution
* @param data sample being evaluated
* @return Kolmogorov-Smirnov statistic \(D_n\)
* @see org.apache.commons.math4.stat.inference.KolmogorovSmirnovTest#kolmogorovSmirnovStatistic(RealDistribution, double[])
* @see org.apache.commons.math4.stat.inference.KolmogorovSmirnovTest#kolmogorovSmirnovStatistic(ContinuousDistribution, double[])
* @since 3.3
*/
public static double kolmogorovSmirnovStatistic(RealDistribution dist, double[] data)
public static double kolmogorovSmirnovStatistic(ContinuousDistribution dist, double[] data)
throws InsufficientDataException, NullArgumentException {
return KS_TEST.kolmogorovSmirnovStatistic(dist, data);
}
@ -619,10 +619,10 @@ public class InferenceTestUtils {
* @param data sample being being evaluated
* @return the p-value associated with the null hypothesis that {@code data} is a sample from
* {@code distribution}
* @see org.apache.commons.math4.stat.inference.KolmogorovSmirnovTest#kolmogorovSmirnovTest(RealDistribution, double[])
* @see org.apache.commons.math4.stat.inference.KolmogorovSmirnovTest#kolmogorovSmirnovTest(ContinuousDistribution, double[])
* @since 3.3
*/
public static double kolmogorovSmirnovTest(RealDistribution dist, double[] data)
public static double kolmogorovSmirnovTest(ContinuousDistribution dist, double[] data)
throws InsufficientDataException, NullArgumentException {
return KS_TEST.kolmogorovSmirnovTest(dist, data);
}
@ -633,10 +633,10 @@ public class InferenceTestUtils {
* @param strict whether or not to force exact computation of the p-value
* @return the p-value associated with the null hypothesis that {@code data} is a sample from
* {@code distribution}
* @see org.apache.commons.math4.stat.inference.KolmogorovSmirnovTest#kolmogorovSmirnovTest(RealDistribution, double[], boolean)
* @see org.apache.commons.math4.stat.inference.KolmogorovSmirnovTest#kolmogorovSmirnovTest(ContinuousDistribution, double[], boolean)
* @since 3.3
*/
public static double kolmogorovSmirnovTest(RealDistribution dist, double[] data, boolean strict)
public static double kolmogorovSmirnovTest(ContinuousDistribution dist, double[] data, boolean strict)
throws InsufficientDataException, NullArgumentException {
return KS_TEST.kolmogorovSmirnovTest(dist, data, strict);
}
@ -647,10 +647,10 @@ public class InferenceTestUtils {
* @param alpha significance level of the test
* @return true iff the null hypothesis that {@code data} is a sample from {@code distribution}
* can be rejected with confidence 1 - {@code alpha}
* @see org.apache.commons.math4.stat.inference.KolmogorovSmirnovTest#kolmogorovSmirnovTest(RealDistribution, double[], double)
* @see org.apache.commons.math4.stat.inference.KolmogorovSmirnovTest#kolmogorovSmirnovTest(ContinuousDistribution, double[], double)
* @since 3.3
*/
public static boolean kolmogorovSmirnovTest(RealDistribution dist, double[] data, double alpha)
public static boolean kolmogorovSmirnovTest(ContinuousDistribution dist, double[] data, double alpha)
throws InsufficientDataException, NullArgumentException {
return KS_TEST.kolmogorovSmirnovTest(dist, data, alpha);
}

View File

@ -22,9 +22,9 @@ import java.util.Arrays;
import org.apache.commons.rng.simple.RandomSource;
import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.statistics.distribution.ContinuousDistribution;
import org.apache.commons.numbers.combinatorics.BinomialCoefficientDouble;
import org.apache.commons.math4.distribution.EnumeratedRealDistribution;
import org.apache.commons.math4.distribution.RealDistribution;
import org.apache.commons.math4.distribution.AbstractRealDistribution;
import org.apache.commons.math4.exception.InsufficientDataException;
import org.apache.commons.math4.exception.MathArithmeticException;
@ -144,7 +144,7 @@ public class KolmogorovSmirnovTest {
* @throws InsufficientDataException if {@code data} does not have length at least 2
* @throws NullArgumentException if {@code data} is null
*/
public double kolmogorovSmirnovTest(RealDistribution distribution, double[] data, boolean exact) {
public double kolmogorovSmirnovTest(ContinuousDistribution distribution, double[] data, boolean exact) {
return 1d - cdf(kolmogorovSmirnovStatistic(distribution, data), data.length, exact);
}
@ -160,7 +160,7 @@ public class KolmogorovSmirnovTest {
* @throws InsufficientDataException if {@code data} does not have length at least 2
* @throws NullArgumentException if {@code data} is null
*/
public double kolmogorovSmirnovStatistic(RealDistribution distribution, double[] data) {
public double kolmogorovSmirnovStatistic(ContinuousDistribution distribution, double[] data) {
checkArray(data);
final int n = data.length;
final double nd = n;
@ -224,7 +224,7 @@ public class KolmogorovSmirnovTest {
* href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"> Kolmogorov-Smirnov test</a>
* evaluating the null hypothesis that {@code x} and {@code y} are samples drawn from the same
* probability distribution. Assumes the strict form of the inequality used to compute the
* p-value. See {@link #kolmogorovSmirnovTest(RealDistribution, double[], boolean)}.
* p-value. See {@link #kolmogorovSmirnovTest(ContinuousDistribution, double[], boolean)}.
*
* @param x first sample dataset
* @param y second sample dataset
@ -320,7 +320,7 @@ public class KolmogorovSmirnovTest {
* @throws InsufficientDataException if {@code data} does not have length at least 2
* @throws NullArgumentException if {@code data} is null
*/
public double kolmogorovSmirnovTest(RealDistribution distribution, double[] data) {
public double kolmogorovSmirnovTest(ContinuousDistribution distribution, double[] data) {
return kolmogorovSmirnovTest(distribution, data, false);
}
@ -336,7 +336,7 @@ public class KolmogorovSmirnovTest {
* @throws InsufficientDataException if {@code data} does not have length at least 2
* @throws NullArgumentException if {@code data} is null
*/
public boolean kolmogorovSmirnovTest(RealDistribution distribution, double[] data, double alpha) {
public boolean kolmogorovSmirnovTest(ContinuousDistribution distribution, double[] data, double alpha) {
if ((alpha <= 0) || (alpha > 0.5)) {
throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL, alpha, 0, 0.5);
}
@ -375,7 +375,7 @@ public class KolmogorovSmirnovTest {
final double[] combined = new double[xLength + yLength];
System.arraycopy(x, 0, combined, 0, xLength);
System.arraycopy(y, 0, combined, xLength, yLength);
final RealDistribution.Sampler sampler = new EnumeratedRealDistribution(combined).createSampler(rng);
final ContinuousDistribution.Sampler sampler = new EnumeratedRealDistribution(combined).createSampler(rng);
final long d = integralKolmogorovSmirnovStatistic(x, y);
int greaterCount = 0;
int equalCount = 0;

View File

@ -34,7 +34,7 @@ public class AgrestiCoullInterval implements BinomialConfidenceInterval {
public ConfidenceInterval createInterval(int numberOfTrials, int numberOfSuccesses, double confidenceLevel) {
IntervalUtils.checkParameters(numberOfTrials, numberOfSuccesses, confidenceLevel);
final double alpha = (1.0 - confidenceLevel) / 2;
final NormalDistribution normalDistribution = new NormalDistribution();
final NormalDistribution normalDistribution = new NormalDistribution(0, 1);
final double z = normalDistribution.inverseCumulativeProbability(1 - alpha);
final double zSquared = FastMath.pow(z, 2);
final double modifiedNumberOfTrials = numberOfTrials + zSquared;

View File

@ -36,7 +36,7 @@ public class NormalApproximationInterval implements BinomialConfidenceInterval {
IntervalUtils.checkParameters(numberOfTrials, numberOfSuccesses, confidenceLevel);
final double mean = (double) numberOfSuccesses / (double) numberOfTrials;
final double alpha = (1.0 - confidenceLevel) / 2;
final NormalDistribution normalDistribution = new NormalDistribution();
final NormalDistribution normalDistribution = new NormalDistribution(0, 1);
final double difference = normalDistribution.inverseCumulativeProbability(1 - alpha) *
FastMath.sqrt(1.0 / numberOfTrials * mean * (1 - mean));
return new ConfidenceInterval(mean - difference, mean + difference, confidenceLevel);

View File

@ -32,7 +32,7 @@ public class WilsonScoreInterval implements BinomialConfidenceInterval {
public ConfidenceInterval createInterval(int numberOfTrials, int numberOfSuccesses, double confidenceLevel) {
IntervalUtils.checkParameters(numberOfTrials, numberOfSuccesses, confidenceLevel);
final double alpha = (1 - confidenceLevel) / 2;
final NormalDistribution normalDistribution = new NormalDistribution();
final NormalDistribution normalDistribution = new NormalDistribution(0, 1);
final double z = normalDistribution.inverseCumulativeProbability(1 - alpha);
final double zSquared = z * z;
final double oneOverNumTrials = 1d / numberOfTrials;

View File

@ -18,6 +18,7 @@ package org.apache.commons.math4.distribution;
import java.util.Arrays;
import org.apache.commons.statistics.distribution.ContinuousDistribution;
import org.apache.commons.rng.simple.RandomSource;
import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.math4.stat.StatUtils;
@ -346,9 +347,10 @@ public class BetaDistributionTest {
final double level = 0.01;
for (final double alpha : alphaBetas) {
for (final double beta : alphaBetas) {
final BetaDistribution betaDistribution = new BetaDistribution(alpha, beta);
final org.apache.commons.statistics.distribution.BetaDistribution betaDistribution =
new org.apache.commons.statistics.distribution.BetaDistribution(alpha, beta);
final RealDistribution.Sampler sampler = betaDistribution.createSampler(rng);
final ContinuousDistribution.Sampler sampler = betaDistribution.createSampler(rng);
final double[] observed = AbstractRealDistribution.sample(numSamples, sampler);
Assert.assertFalse("G goodness-of-fit test rejected null at alpha = " + level,
@ -359,7 +361,7 @@ public class BetaDistributionTest {
}
}
private double gTest(final RealDistribution expectedDistribution, final double[] values) {
private double gTest(final ContinuousDistribution expectedDistribution, final double[] values) {
final int numBins = values.length / 30;
final double[] breaks = new double[numBins];
for (int b = 0; b < breaks.length; b++) {

View File

@ -21,6 +21,7 @@ import static org.junit.Assert.assertEquals;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.statistics.distribution.ContinuousDistribution;
import org.apache.commons.math4.distribution.EnumeratedDistribution;
import org.apache.commons.math4.distribution.EnumeratedRealDistribution;
import org.apache.commons.math4.exception.DimensionMismatchException;
@ -136,7 +137,7 @@ public class EnumeratedRealDistributionTest {
*/
@Test
public void testGetNumericalMean() {
Assert.assertEquals(3.4, testDistribution.getNumericalMean(), 1e-10);
Assert.assertEquals(3.4, testDistribution.getMean(), 1e-10);
}
/**
@ -144,7 +145,7 @@ public class EnumeratedRealDistributionTest {
*/
@Test
public void testGetNumericalVariance() {
Assert.assertEquals(7.84, testDistribution.getNumericalVariance(), 1e-10);
Assert.assertEquals(7.84, testDistribution.getVariance(), 1e-10);
}
/**
@ -177,7 +178,7 @@ public class EnumeratedRealDistributionTest {
@Test
public void testSample() {
final int n = 1000000;
final RealDistribution.Sampler sampler =
final ContinuousDistribution.Sampler sampler =
testDistribution.createSampler(RandomSource.create(RandomSource.WELL_1024_A, -123456789));
final double[] samples = AbstractRealDistribution.sample(n, sampler);
Assert.assertEquals(n, samples.length);
@ -187,9 +188,9 @@ public class EnumeratedRealDistributionTest {
sum += samples[i];
sumOfSquares += samples[i] * samples[i];
}
Assert.assertEquals(testDistribution.getNumericalMean(),
Assert.assertEquals(testDistribution.getMean(),
sum / n, 1e-2);
Assert.assertEquals(testDistribution.getNumericalVariance(),
Assert.assertEquals(testDistribution.getVariance(),
sumOfSquares / n - FastMath.pow(sum / n, 2), 1e-2);
}

View File

@ -24,9 +24,9 @@ import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Set;
import org.apache.commons.math4.distribution.LogNormalDistribution;
import org.apache.commons.math4.distribution.NormalDistribution;
import org.apache.commons.math4.distribution.RealDistribution;
import org.apache.commons.statistics.distribution.LogNormalDistribution;
import org.apache.commons.statistics.distribution.NormalDistribution;
import org.apache.commons.statistics.distribution.ContinuousDistribution;
import org.apache.commons.math4.distribution.AbstractRealDistribution;
import org.apache.commons.math4.exception.MathIllegalArgumentException;
import org.apache.commons.math4.exception.NullArgumentException;
@ -709,8 +709,8 @@ public class PSquarePercentileTest extends
STANDARD = 1000, BIG = 10000, VERY_BIG = 50000, LARGE = 1000000,
VERY_LARGE = 10000000;
private void doDistributionTest(RealDistribution distribution) {
final RealDistribution.Sampler sampler =
private void doDistributionTest(ContinuousDistribution distribution) {
final ContinuousDistribution.Sampler sampler =
distribution.createSampler(RandomSource.create(RandomSource.WELL_19937_C, 1000));
double data[];

View File

@ -19,7 +19,7 @@ package org.apache.commons.math4.stat.inference;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.math4.distribution.NormalDistribution;
import org.apache.commons.statistics.distribution.NormalDistribution;
import org.apache.commons.math4.exception.DimensionMismatchException;
import org.apache.commons.math4.exception.NotPositiveException;
import org.apache.commons.math4.exception.NotStrictlyPositiveException;

View File

@ -21,8 +21,8 @@ import java.lang.reflect.Method;
import java.util.Arrays;
import org.apache.commons.math4.TestUtils;
import org.apache.commons.math4.distribution.NormalDistribution;
import org.apache.commons.math4.distribution.UniformRealDistribution;
import org.apache.commons.statistics.distribution.NormalDistribution;
import org.apache.commons.statistics.distribution.UniformContinuousDistribution;
import org.apache.commons.rng.simple.RandomSource;
import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.numbers.combinatorics.BinomialCoefficient;
@ -142,7 +142,7 @@ public class KolmogorovSmirnovTestTest {
// @Test - takes about 6 seconds, uncomment for
public void testOneSampleUniformUniform() {
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
final UniformRealDistribution unif = new UniformRealDistribution(-0.5, 0.5);
final UniformContinuousDistribution unif = new UniformContinuousDistribution(-0.5, 0.5);
Assert.assertEquals(8.881784197001252E-16, test.kolmogorovSmirnovTest(unif, uniform, false), TOLERANCE);
Assert.assertTrue(test.kolmogorovSmirnovTest(unif, uniform, 0.05));
Assert.assertEquals(0.5400666982352942, test.kolmogorovSmirnovStatistic(unif, uniform), TOLERANCE);
@ -152,7 +152,7 @@ public class KolmogorovSmirnovTestTest {
@Test
public void testOneSampleUniformUniformSmallSample() {
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
final UniformRealDistribution unif = new UniformRealDistribution(-0.5, 0.5);
final UniformContinuousDistribution unif = new UniformContinuousDistribution(-0.5, 0.5);
final double[] shortUniform = new double[20];
System.arraycopy(uniform, 0, shortUniform, 0, 20);
Assert.assertEquals(4.117594598618268E-9, test.kolmogorovSmirnovTest(unif, shortUniform, false), TOLERANCE);
@ -164,7 +164,7 @@ public class KolmogorovSmirnovTestTest {
@Test
public void testOneSampleUniformGaussian() {
final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
final UniformRealDistribution unif = new UniformRealDistribution(-0.5, 0.5);
final UniformContinuousDistribution unif = new UniformContinuousDistribution(-0.5, 0.5);
// Value was obtained via exact test, validated against R. Running exact test takes a long
// time.
Assert.assertEquals(4.9405812774239166E-11, test.kolmogorovSmirnovTest(unif, gaussian, false), TOLERANCE);