[MATH-437] Remove deprecated KS distribution.

2015-02-24 23:07:58 +01:00 · 2015-02-24 23:07:58 +01:00 · 4be09dfff2
parent 4140e01266
commit 4be09dfff2
2 changed files with 0 additions and 476 deletions
--- a/src/main/java/org/apache/commons/math4/distribution/KolmogorovSmirnovDistribution.java
+++ b/src/main/java/org/apache/commons/math4/distribution/KolmogorovSmirnovDistribution.java
@ -1,357 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.commons.math4.distribution;
-
-import java.io.Serializable;
-import java.math.BigDecimal;
-
-import org.apache.commons.math4.exception.MathArithmeticException;
-import org.apache.commons.math4.exception.NotStrictlyPositiveException;
-import org.apache.commons.math4.exception.NumberIsTooLargeException;
-import org.apache.commons.math4.exception.util.LocalizedFormats;
-import org.apache.commons.math4.fraction.BigFraction;
-import org.apache.commons.math4.fraction.BigFractionField;
-import org.apache.commons.math4.fraction.FractionConversionException;
-import org.apache.commons.math4.linear.Array2DRowFieldMatrix;
-import org.apache.commons.math4.linear.Array2DRowRealMatrix;
-import org.apache.commons.math4.linear.FieldMatrix;
-import org.apache.commons.math4.linear.RealMatrix;
-import org.apache.commons.math4.util.FastMath;
-
-/**
- * Implementation of the Kolmogorov-Smirnov distribution.
- *
- * <p>
- * Treats the distribution of the two-sided {@code P(D_n < d)} where
- * {@code D_n = sup_x |G(x) - G_n (x)|} for the theoretical cdf {@code G} and
- * the empirical cdf {@code G_n}.
- * </p>
- * <p>
- * This implementation is based on [1] with certain quick decisions for extreme
- * values given in [2].
- * </p>
- * <p>
- * In short, when wanting to evaluate {@code P(D_n < d)}, the method in [1] is
- * to write {@code d = (k - h) / n} for positive integer {@code k} and
- * {@code 0 <= h < 1}. Then {@code P(D_n < d) = (n! / n^n) * t_kk}, where
- * {@code t_kk} is the {@code (k, k)}'th entry in the special matrix
- * {@code H^n}, i.e. {@code H} to the {@code n}'th power.
- * </p>
- * <p>
- * References:
- * <ul>
- * <li>[1] <a href="http://www.jstatsoft.org/v08/i18/">
- * Evaluating Kolmogorov's Distribution</a> by George Marsaglia, Wai
- * Wan Tsang, and Jingbo Wang</li>
- * <li>[2] <a href="http://www.jstatsoft.org/v39/i11/">
- * Computing the Two-Sided Kolmogorov-Smirnov Distribution</a> by Richard Simard
- * and Pierre L'Ecuyer</li>
- * </ul>
- * Note that [1] contains an error in computing h, refer to
- * <a href="https://issues.apache.org/jira/browse/MATH-437">MATH-437</a> for details.
- * </p>
- *
- * @see <a href="http://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test">
- * Kolmogorov-Smirnov test (Wikipedia)</a>
- * @deprecated to be removed in version 4.0 -
- *  use {@link org.apache.commons.math4.stat.inference.KolmogorovSmirnovTest}
- */
-public class KolmogorovSmirnovDistribution implements Serializable {
-
-    /** Serializable version identifier. */
-    private static final long serialVersionUID = -4670676796862967187L;
-
-    /** Number of observations. */
-    private int n;
-
-    /**
-     * @param n Number of observations
-     * @throws NotStrictlyPositiveException if {@code n <= 0}
-     */
-    public KolmogorovSmirnovDistribution(int n)
-        throws NotStrictlyPositiveException {
-        if (n <= 0) {
-            throw new NotStrictlyPositiveException(LocalizedFormats.NOT_POSITIVE_NUMBER_OF_SAMPLES, n);
-        }
-
-        this.n = n;
-    }
-
-    /**
-     * Calculates {@code P(D_n < d)} using method described in [1] with quick
-     * decisions for extreme values given in [2] (see above). The result is not
-     * exact as with
-     * {@link KolmogorovSmirnovDistribution#cdfExact(double)} because
-     * calculations are based on {@code double} rather than
-     * {@link org.apache.commons.math4.fraction.BigFraction}.
-     *
-     * @param d statistic
-     * @return the two-sided probability of {@code P(D_n < d)}
-     * @throws MathArithmeticException if algorithm fails to convert {@code h}
-     * to a {@link org.apache.commons.math4.fraction.BigFraction} in expressing
-     * {@code d} as {@code (k - h) / m} for integer {@code k, m} and
-     * {@code 0 <= h < 1}.
-     */
-    public double cdf(double d) throws MathArithmeticException {
-        return this.cdf(d, false);
-    }
-
-    /**
-     * Calculates {@code P(D_n < d)} using method described in [1] with quick
-     * decisions for extreme values given in [2] (see above). The result is
-     * exact in the sense that BigFraction/BigReal is used everywhere at the
-     * expense of very slow execution time. Almost never choose this in real
-     * applications unless you are very sure; this is almost solely for
-     * verification purposes. Normally, you would choose
-     * {@link KolmogorovSmirnovDistribution#cdf(double)}
-     *
-     * @param d statistic
-     * @return the two-sided probability of {@code P(D_n < d)}
-     * @throws MathArithmeticException if algorithm fails to convert {@code h}
-     * to a {@link org.apache.commons.math4.fraction.BigFraction} in expressing
-     * {@code d} as {@code (k - h) / m} for integer {@code k, m} and
-     * {@code 0 <= h < 1}.
-     */
-    public double cdfExact(double d) throws MathArithmeticException {
-        return this.cdf(d, true);
-    }
-
-    /**
-     * Calculates {@code P(D_n < d)} using method described in [1] with quick
-     * decisions for extreme values given in [2] (see above).
-     *
-     * @param d statistic
-     * @param exact whether the probability should be calculated exact using
-     * {@link org.apache.commons.math4.fraction.BigFraction} everywhere at the
-     * expense of very slow execution time, or if {@code double} should be used
-     * convenient places to gain speed. Almost never choose {@code true} in real
-     * applications unless you are very sure; {@code true} is almost solely for
-     * verification purposes.
-     * @return the two-sided probability of {@code P(D_n < d)}
-     * @throws MathArithmeticException if algorithm fails to convert {@code h}
-     * to a {@link org.apache.commons.math4.fraction.BigFraction} in expressing
-     * {@code d} as {@code (k - h) / m} for integer {@code k, m} and
-     * {@code 0 <= h < 1}.
-     */
-    public double cdf(double d, boolean exact) throws MathArithmeticException {
-
-        final double ninv = 1 / ((double) n);
-        final double ninvhalf = 0.5 * ninv;
-
-        if (d <= ninvhalf) {
-
-            return 0;
-
-        } else if (ninvhalf < d && d <= ninv) {
-
-            double res = 1;
-            double f = 2 * d - ninv;
-
-            // n! f^n = n*f * (n-1)*f * ... * 1*x
-            for (int i = 1; i <= n; ++i) {
-                res *= i * f;
-            }
-
-            return res;
-
-        } else if (1 - ninv <= d && d < 1) {
-
-            return 1 - 2 * FastMath.pow(1 - d, n);
-
-        } else if (1 <= d) {
-
-            return 1;
-        }
-
-        return exact ? exactK(d) : roundedK(d);
-    }
-
-    /**
-     * Calculates the exact value of {@code P(D_n < d)} using method described
-     * in [1] and {@link org.apache.commons.math4.fraction.BigFraction} (see
-     * above).
-     *
-     * @param d statistic
-     * @return the two-sided probability of {@code P(D_n < d)}
-     * @throws MathArithmeticException if algorithm fails to convert {@code h}
-     * to a {@link org.apache.commons.math4.fraction.BigFraction} in expressing
-     * {@code d} as {@code (k - h) / m} for integer {@code k, m} and
-     * {@code 0 <= h < 1}.
-     */
-    private double exactK(double d) throws MathArithmeticException {
-
-        final int k = (int) FastMath.ceil(n * d);
-
-        final FieldMatrix<BigFraction> H = this.createH(d);
-        final FieldMatrix<BigFraction> Hpower = H.power(n);
-
-        BigFraction pFrac = Hpower.getEntry(k - 1, k - 1);
-
-        for (int i = 1; i <= n; ++i) {
-            pFrac = pFrac.multiply(i).divide(n);
-        }
-
-        /*
-         * BigFraction.doubleValue converts numerator to double and the
-         * denominator to double and divides afterwards. That gives NaN quite
-         * easy. This does not (scale is the number of digits):
-         */
-        return pFrac.bigDecimalValue(20, BigDecimal.ROUND_HALF_UP).doubleValue();
-    }
-
-    /**
-     * Calculates {@code P(D_n < d)} using method described in [1] and doubles
-     * (see above).
-     *
-     * @param d statistic
-     * @return the two-sided probability of {@code P(D_n < d)}
-     * @throws MathArithmeticException if algorithm fails to convert {@code h}
-     * to a {@link org.apache.commons.math4.fraction.BigFraction} in expressing
-     * {@code d} as {@code (k - h) / m} for integer {@code k, m} and
-     * {@code 0 <= h < 1}.
-     */
-    private double roundedK(double d) throws MathArithmeticException {
-
-        final int k = (int) FastMath.ceil(n * d);
-        final FieldMatrix<BigFraction> HBigFraction = this.createH(d);
-        final int m = HBigFraction.getRowDimension();
-
-        /*
-         * Here the rounding part comes into play: use
-         * RealMatrix instead of FieldMatrix<BigFraction>
-         */
-        final RealMatrix H = new Array2DRowRealMatrix(m, m);
-
-        for (int i = 0; i < m; ++i) {
-            for (int j = 0; j < m; ++j) {
-                H.setEntry(i, j, HBigFraction.getEntry(i, j).doubleValue());
-            }
-        }
-
-        final RealMatrix Hpower = H.power(n);
-
-        double pFrac = Hpower.getEntry(k - 1, k - 1);
-
-        for (int i = 1; i <= n; ++i) {
-            pFrac *= (double) i / (double) n;
-        }
-
-        return pFrac;
-    }
-
-    /***
-     * Creates {@code H} of size {@code m x m} as described in [1] (see above).
-     *
-     * @param d statistic
-     * @return H matrix
-     * @throws NumberIsTooLargeException if fractional part is greater than 1
-     * @throws FractionConversionException if algorithm fails to convert
-     * {@code h} to a {@link org.apache.commons.math4.fraction.BigFraction} in
-     * expressing {@code d} as {@code (k - h) / m} for integer {@code k, m} and
-     * {@code 0 <= h < 1}.
-     */
-    private FieldMatrix<BigFraction> createH(double d)
-            throws NumberIsTooLargeException, FractionConversionException {
-
-        int k = (int) FastMath.ceil(n * d);
-
-        int m = 2 * k - 1;
-        double hDouble = k - n * d;
-
-        if (hDouble >= 1) {
-            throw new NumberIsTooLargeException(hDouble, 1.0, false);
-        }
-
-        BigFraction h = null;
-
-        try {
-            h = new BigFraction(hDouble, 1.0e-20, 10000);
-        } catch (FractionConversionException e1) {
-            try {
-                h = new BigFraction(hDouble, 1.0e-10, 10000);
-            } catch (FractionConversionException e2) {
-                h = new BigFraction(hDouble, 1.0e-5, 10000);
-            }
-        }
-
-        final BigFraction[][] Hdata = new BigFraction[m][m];
-
-        /*
-         * Start by filling everything with either 0 or 1.
-         */
-        for (int i = 0; i < m; ++i) {
-            for (int j = 0; j < m; ++j) {
-                if (i - j + 1 < 0) {
-                    Hdata[i][j] = BigFraction.ZERO;
-                } else {
-                    Hdata[i][j] = BigFraction.ONE;
-                }
-            }
-        }
-
-        /*
-         * Setting up power-array to avoid calculating the same value twice:
-         * hPowers[0] = h^1 ... hPowers[m-1] = h^m
-         */
-        final BigFraction[] hPowers = new BigFraction[m];
-        hPowers[0] = h;
-        for (int i = 1; i < m; ++i) {
-            hPowers[i] = h.multiply(hPowers[i - 1]);
-        }
-
-        /*
-         * First column and last row has special values (each other reversed).
-         */
-        for (int i = 0; i < m; ++i) {
-            Hdata[i][0] = Hdata[i][0].subtract(hPowers[i]);
-            Hdata[m - 1][i] = Hdata[m - 1][i].subtract(hPowers[m - i - 1]);
-        }
-
-        /*
-         * [1] states: "For 1/2 < h < 1 the bottom left element of the matrix
-         * should be (1 - 2*h^m + (2h - 1)^m )/m!" Since 0 <= h < 1, then if h >
-         * 1/2 is sufficient to check:
-         */
-        if (h.compareTo(BigFraction.ONE_HALF) == 1) {
-            Hdata[m - 1][0] = Hdata[m - 1][0].add(h.multiply(2).subtract(1).pow(m));
-        }
-
-        /*
-         * Aside from the first column and last row, the (i, j)-th element is
-         * 1/(i - j + 1)! if i - j + 1 >= 0, else 0. 1's and 0's are already
-         * put, so only division with (i - j + 1)! is needed in the elements
-         * that have 1's. There is no need to calculate (i - j + 1)! and then
-         * divide - small steps avoid overflows.
-         *
-         * Note that i - j + 1 > 0 <=> i + 1 > j instead of j'ing all the way to
-         * m. Also note that it is started at g = 2 because dividing by 1 isn't
-         * really necessary.
-         */
-        for (int i = 0; i < m; ++i) {
-            for (int j = 0; j < i + 1; ++j) {
-                if (i - j + 1 > 0) {
-                    for (int g = 2; g <= i - j + 1; ++g) {
-                        Hdata[i][j] = Hdata[i][j].divide(g);
-                    }
-                }
-            }
-        }
-
-        return new Array2DRowFieldMatrix<BigFraction>(BigFractionField.getInstance(), Hdata);
-    }
-}
--- a/src/test/java/org/apache/commons/math4/distribution/KolmogorovSmirnovDistributionTest.java
+++ b/src/test/java/org/apache/commons/math4/distribution/KolmogorovSmirnovDistributionTest.java
@ -1,119 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.commons.math4.distribution;
-
-import org.apache.commons.math4.distribution.KolmogorovSmirnovDistribution;
-import org.junit.Assert;
-import org.junit.Test;
-
-/**
- * Test cases for {@link KolmogorovSmirnovDistribution}.
- *
- */
-@Deprecated
-public class KolmogorovSmirnovDistributionTest {
-    
-    private static final double TOLERANCE = 10e-10;
-
-    @Test
-    public void testCumulativeDensityFunction() {
-        
-        KolmogorovSmirnovDistribution dist;
-        
-        /* The code below is generated using the R-script located in
-         * /src/test/R/KolmogorovSmirnovDistributionTestCases.R
-         */
-        
-        /* R version 2.11.1 (2010-05-31) */
-
-
-        /* formatC(.C("pkolmogorov2x", p = as.double(0.005), n = as.integer(200), PACKAGE = "stats")$p, 40) gives
-         * 4.907829957616471622388047046469198862537e-86
-         */
-        dist = new KolmogorovSmirnovDistribution(200);
-        Assert.assertEquals(4.907829957616471622388047046469198862537e-86, dist.cdf(0.005, false), TOLERANCE);
-
-        /* formatC(.C("pkolmogorov2x", p = as.double(0.02), n = as.integer(200), PACKAGE = "stats")$p, 40) gives
-         * 5.151982014280041957199687829849210629618e-06
-         */
-        dist = new KolmogorovSmirnovDistribution(200);
-        Assert.assertEquals(5.151982014280041957199687829849210629618e-06, dist.cdf(0.02, false), TOLERANCE);
-
-        /* formatC(.C("pkolmogorov2x", p = as.double(0.031111), n = as.integer(200), PACKAGE = "stats")$p, 40) gives
-         * 0.01291614648162886340443389343590752105229
-         */
-        dist = new KolmogorovSmirnovDistribution(200);
-        Assert.assertEquals(0.01291614648162886340443389343590752105229, dist.cdf(0.031111, false), TOLERANCE);
-
-        /* formatC(.C("pkolmogorov2x", p = as.double(0.04), n = as.integer(200), PACKAGE = "stats")$p, 40) gives
-         * 0.1067137011362679355208626930107129737735
-         */
-        dist = new KolmogorovSmirnovDistribution(200);
-        Assert.assertEquals(0.1067137011362679355208626930107129737735, dist.cdf(0.04, false), TOLERANCE);
-
-        /* formatC(.C("pkolmogorov2x", p = as.double(0.005), n = as.integer(341), PACKAGE = "stats")$p, 40) gives
-         * 1.914734701559404553985102395145063418825e-53
-         */
-        dist = new KolmogorovSmirnovDistribution(341);
-        Assert.assertEquals(1.914734701559404553985102395145063418825e-53, dist.cdf(0.005, false), TOLERANCE);
-
-        /* formatC(.C("pkolmogorov2x", p = as.double(0.02), n = as.integer(341), PACKAGE = "stats")$p, 40) gives
-         * 0.001171328985781981343872182321774744195864
-         */
-        dist = new KolmogorovSmirnovDistribution(341);
-        Assert.assertEquals(0.001171328985781981343872182321774744195864, dist.cdf(0.02, false), TOLERANCE);
-
-        /* formatC(.C("pkolmogorov2x", p = as.double(0.031111), n = as.integer(341), PACKAGE = "stats")$p, 40) gives
-         * 0.1142955196267499418105728636874118819833
-         */
-        dist = new KolmogorovSmirnovDistribution(341);
-        Assert.assertEquals(0.1142955196267499418105728636874118819833, dist.cdf(0.031111, false), TOLERANCE);
-
-        /* formatC(.C("pkolmogorov2x", p = as.double(0.04), n = as.integer(341), PACKAGE = "stats")$p, 40) gives
-         * 0.3685529520496805266915885113121476024389
-         */
-        dist = new KolmogorovSmirnovDistribution(341);
-        Assert.assertEquals(0.3685529520496805266915885113121476024389, dist.cdf(0.04, false), TOLERANCE);
-
-        /* formatC(.C("pkolmogorov2x", p = as.double(0.005), n = as.integer(389), PACKAGE = "stats")$p, 40) gives
-         * 1.810657144595055888918455512707637574637e-47
-         */
-        dist = new KolmogorovSmirnovDistribution(389);
-        Assert.assertEquals(1.810657144595055888918455512707637574637e-47, dist.cdf(0.005, false), TOLERANCE);
-
-        /* formatC(.C("pkolmogorov2x", p = as.double(0.02), n = as.integer(389), PACKAGE = "stats")$p, 40) gives
-         * 0.003068542559702356568168690742481885536108
-         */
-        dist = new KolmogorovSmirnovDistribution(389);
-        Assert.assertEquals(0.003068542559702356568168690742481885536108, dist.cdf(0.02, false), TOLERANCE);
-
-        /* formatC(.C("pkolmogorov2x", p = as.double(0.031111), n = as.integer(389), PACKAGE = "stats")$p, 40) gives
-         * 0.1658291700122746237244797384846606291831
-         */
-        dist = new KolmogorovSmirnovDistribution(389);
-        Assert.assertEquals(0.1658291700122746237244797384846606291831, dist.cdf(0.031111, false), TOLERANCE);
-
-        /* formatC(.C("pkolmogorov2x", p = as.double(0.04), n = as.integer(389), PACKAGE = "stats")$p, 40) gives
-         * 0.4513143712128902529379104180407011881471
-         */
-        dist = new KolmogorovSmirnovDistribution(389);
-        Assert.assertEquals(0.4513143712128902529379104180407011881471, dist.cdf(0.04, false), TOLERANCE);
-
-    }
-
-}