Resolved multiple problems leading to inaccuracy and/or failure to compute Normal, ChiSquare and

Poisson probabilities, Erf and Gamma functions.

JIRA: MATH-282
JIRA: MATH-301

Summary of changes:

* BrentSolver has been changed to expose its configured absolute accuracy. This solver is used by
  the default inverse cum implementation in AbstractContinuousDistribution and the hard-coded setting
  (1E-6) was limiting accuracy in inverse cumulative probability estimates. AbstractContinuousDistribution
  was changed to allow distributions to set this value and NormalDistributionImpl was changed to set it to
  1E-9 by default and allow users to configure it via a constructor argument.

* AbstractContinuousDistribution and AbstractIntegerDistribution inverseCumulativeProbability methods
  have been modified to check for NaN values returned by cumulativeProbability and throw MathExceptions
  when this happens.

* The criteria for choosing between the Lanczos series and continued fraction expansion when computing
  regularized gamma functions has been changed to (x >= a + 1). When using the series approximation
  (regularizedGammaP), divergence to infinity is checked and when this happens, 1 is returned.

* When scaling continued fractions to (try to) avoid divergence to infinity, the larger of a and b is
  used as a scale factor and the attempt to scale is repeated up to 5 times, using successive powers
  of the scale factor.

* The maximum number of iterations used in estimating cumulative probabilities for PoissonDistributionImpl
  has been decreased from Integer.MAX_VALUE to 10000000 and made configurable.


git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@920558 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Phil Steitz 2010-03-08 22:57:32 +00:00
parent 79a0ac99bc
commit 09a4643e10
12 changed files with 270 additions and 47 deletions

View File

@ -215,6 +215,8 @@ public class MessagesResources_fr
"Divergence de fraction continue \u00e0 l''infini pour la valeur {0}" },
{ "Continued fraction convergents failed to converge for value {0}",
"\u00c9chec de convergence de fraction continue pour la valeur {0}" },
{ "Continued fraction diverged to NaN for value {0}",
"Divergence de fraction continue \u00e0 NaN pour la valeur {0}"},
// org.apache.commons.math.util.DefaultTransformer
{ "Conversion Exception in Transformation, Object is null",
@ -735,6 +737,15 @@ public class MessagesResources_fr
"la borne inf\u00e9rieure ({0}) devrait \u00eatre inf\u00e9rieure " +
"ou \u00e9gale \u00e0 la borne sup\u00e9rieure ({1})" },
// org.apache.commons.math.distribution.AbstractContinuousDistribution
{ "Cumulative probability function returned NaN for argument {0} p = {1}",
"Fonction de probabilit\u00e9 cumulative retourn\u00e9 NaN \u00e0 l''argument de {0} p = {1}" },
// org.apache.commons.math.distribution.AbstractIntegerDistribution
{ "Discrete cumulative probability function returned NaN for argument {0}",
"Discr\u00e8tes fonction de probabilit\u00e9 cumulative retourn\u00e9 NaN \u00e0 l''argument de {0}" },
// org.apache.commons.math.distribution.BinomialDistributionImpl
{ "number of trials must be non-negative ({0})",
"le nombre d''essais ne doit pas \u00eatre n\u00e9gatif ({0})" },

View File

@ -32,6 +32,12 @@ import org.apache.commons.math.analysis.UnivariateRealFunction;
*/
public class BrentSolver extends UnivariateRealSolverImpl {
/** Default absolute accuracy */
public static final double DEFAULT_ABSOLUTE_ACCURACY = 1E-6;
/** Default maximum number of iterations */
public static final int DEFAULT_MAXIMUM_ITERATIONS = 100;
/** Error message for non-bracketing interval. */
private static final String NON_BRACKETING_MESSAGE =
"function values at endpoints do not have different signs. " +
@ -51,14 +57,33 @@ public class BrentSolver extends UnivariateRealSolverImpl {
*/
@Deprecated
public BrentSolver(UnivariateRealFunction f) {
super(f, 100, 1E-6);
super(f, DEFAULT_MAXIMUM_ITERATIONS, DEFAULT_ABSOLUTE_ACCURACY);
}
/**
* Construct a solver.
* Construct a solver with default properties.
*/
public BrentSolver() {
super(100, 1E-6);
super(DEFAULT_MAXIMUM_ITERATIONS, DEFAULT_ABSOLUTE_ACCURACY);
}
/**
* Construct a solver with the given absolute accuracy.
*
* @param absoluteAccuracy lower bound for absolute accuracy of solutions returned by the solver
*/
public BrentSolver(double absoluteAccuracy) {
super(DEFAULT_MAXIMUM_ITERATIONS, absoluteAccuracy);
}
/**
* Contstruct a solver with the given maximum iterations and absolute accuracy.
*
* @param maximumIterations maximum number of iterations
* @param absoluteAccuracy lower bound for absolute accuracy of solutions returned by the solver
*/
public BrentSolver(int maximumIterations, double absoluteAccuracy) {
super(maximumIterations, absoluteAccuracy);
}
/** {@inheritDoc} */

View File

@ -23,6 +23,7 @@ import org.apache.commons.math.FunctionEvaluationException;
import org.apache.commons.math.MathException;
import org.apache.commons.math.MathRuntimeException;
import org.apache.commons.math.analysis.UnivariateRealFunction;
import org.apache.commons.math.analysis.solvers.BrentSolver;
import org.apache.commons.math.analysis.solvers.UnivariateRealSolverUtils;
/**
@ -39,6 +40,9 @@ public abstract class AbstractContinuousDistribution
/** Serializable version identifier */
private static final long serialVersionUID = -38038050983108802L;
/** Solver absolute accuracy for inverse cum computation */
private double solverAbsoluteAccuracy = BrentSolver.DEFAULT_ABSOLUTE_ACCURACY;
/**
* Default constructor.
*/
@ -69,11 +73,17 @@ public abstract class AbstractContinuousDistribution
UnivariateRealFunction rootFindingFunction =
new UnivariateRealFunction() {
public double value(double x) throws FunctionEvaluationException {
double ret = Double.NaN;
try {
return cumulativeProbability(x) - p;
ret = cumulativeProbability(x) - p;
} catch (MathException ex) {
throw new FunctionEvaluationException(ex, x, ex.getPattern(), ex.getArguments());
}
if (Double.isNaN(ret)) {
throw new FunctionEvaluationException(x,
"Cumulative probability function returned NaN for argument {0} p = {1}", x, p);
}
return ret;
}
};
@ -90,9 +100,6 @@ public abstract class AbstractContinuousDistribution
* Check domain endpoints to see if one gives value that is within
* the default solver's defaultAbsoluteAccuracy of 0 (will be the
* case if density has bounded support and p is 0 or 1).
*
* TODO: expose the default solver, defaultAbsoluteAccuracy as
* a constant.
*/
if (Math.abs(rootFindingFunction.value(lowerBound)) < 1E-6) {
return lowerBound;
@ -106,7 +113,9 @@ public abstract class AbstractContinuousDistribution
// find root
double root = UnivariateRealSolverUtils.solve(rootFindingFunction,
bracket[0],bracket[1]);
// override getSolverAbsoluteAccuracy() to use a Brent solver with
// absolute accuracy different from BrentSolver default
bracket[0],bracket[1], getSolverAbsoluteAccuracy());
return root;
}
@ -141,4 +150,13 @@ public abstract class AbstractContinuousDistribution
* P(X &lt; <i>upper bound</i>) &gt; <code>p</code>
*/
protected abstract double getDomainUpperBound(double p);
/**
* Returns the solver absolute accuracy for inverse cum computation.
*
* @return the maximum absolute error in inverse cumulative probability estimates
*/
protected double getSolverAbsoluteAccuracy() {
return solverAbsoluteAccuracy;
}
}

View File

@ -18,6 +18,7 @@ package org.apache.commons.math.distribution;
import java.io.Serializable;
import org.apache.commons.math.FunctionEvaluationException;
import org.apache.commons.math.MathException;
import org.apache.commons.math.MathRuntimeException;
@ -173,7 +174,7 @@ public abstract class AbstractIntegerDistribution extends AbstractDistribution
double pm;
while (x0 < x1) {
int xm = x0 + (x1 - x0) / 2;
pm = cumulativeProbability(xm);
pm = checkedCumulativeProbability(xm);
if (pm > p) {
// update x1
if (xm == x1) {
@ -198,15 +199,39 @@ public abstract class AbstractIntegerDistribution extends AbstractDistribution
}
// insure x0 is the correct critical point
pm = cumulativeProbability(x0);
pm = checkedCumulativeProbability(x0);
while (pm > p) {
--x0;
pm = cumulativeProbability(x0);
pm = checkedCumulativeProbability(x0);
}
return x0;
}
/**
* Computes the cumulative probablity function and checks for NaN values returned.
* Throws MathException if the value is NaN. Wraps and rethrows any MathException encountered
* evaluating the cumulative probability function in a FunctionEvaluationException. Throws
* FunctionEvaluationException of the cumulative probability function returns NaN.
*
* @param argument input value
* @return cumulative probability
* @throws FunctionEvaluationException if a MathException occurs computing the cumulative probability
*/
private double checkedCumulativeProbability(int argument) throws FunctionEvaluationException {
double result = Double.NaN;
try {
result = cumulativeProbability(argument);
} catch (MathException ex) {
throw new FunctionEvaluationException(ex, argument, ex.getPattern(), ex.getArguments());
}
if (Double.isNaN(result)) {
throw new FunctionEvaluationException(argument,
"Discrete cumulative probability function returned NaN for argument {0}", argument);
}
return result;
}
/**
* Access the domain value lower bound, based on <code>p</code>, used to
* bracket a PDF root. This method is used by

View File

@ -33,6 +33,9 @@ import org.apache.commons.math.special.Erf;
public class NormalDistributionImpl extends AbstractContinuousDistribution
implements NormalDistribution, Serializable {
/** Default inverse cumulative probability accuracy */
public static final double DEFAULT_INVERSE_ABSOLUTE_ACCURACY = 1e-9;
/** Serializable version identifier */
private static final long serialVersionUID = 8589540077390120676L;
@ -45,15 +48,31 @@ public class NormalDistributionImpl extends AbstractContinuousDistribution
/** The standard deviation of this distribution. */
private double standardDeviation = 1;
/** Inverse cumulative probability accuracy */
private final double solverAbsoluteAccuracy;
/**
* Create a normal distribution using the given mean and standard deviation.
* @param mean mean for this distribution
* @param sd standard deviation for this distribution
*/
public NormalDistributionImpl(double mean, double sd){
this(mean, sd, DEFAULT_INVERSE_ABSOLUTE_ACCURACY);
}
/**
* Create a normal distribution using the given mean, standard deviation and
* inverse cumulative distribution accuracy.
*
* @param mean mean for this distribution
* @param sd standard deviation for this distribution
* @param inverseCumAccuracy inverse cumulative probability accuracy
*/
public NormalDistributionImpl(double mean, double sd, double inverseCumAccuracy) {
super();
setMean(mean);
setStandardDeviation(sd);
this.mean = mean;
this.standardDeviation = sd;
solverAbsoluteAccuracy = inverseCumAccuracy;
}
/**
@ -136,6 +155,17 @@ public class NormalDistributionImpl extends AbstractContinuousDistribution
}
}
/**
* Return the absolute accuracy setting of the solver used to estimate
* inverse cumulative probabilities.
*
* @return the solver absolute accuracy
*/
@Override
protected double getSolverAbsoluteAccuracy() {
return solverAbsoluteAccuracy;
}
/**
* For this distribution, X, this method returns the critical point x, such
* that P(X &lt; x) = <code>p</code>.

View File

@ -31,6 +31,16 @@ import org.apache.commons.math.util.MathUtils;
public class PoissonDistributionImpl extends AbstractIntegerDistribution
implements PoissonDistribution, Serializable {
/**
* Default maximum number of iterations for cumulative probability calculations.
*/
public static final int DEFAULT_MAX_ITERATIONS = 10000000;
/**
* Default convergence criterion
*/
public static final double DEFAULT_EPSILON = 1E-12;
/** Serializable version identifier */
private static final long serialVersionUID = -3349935121172596109L;
@ -42,6 +52,19 @@ public class PoissonDistributionImpl extends AbstractIntegerDistribution
*/
private double mean;
/**
* Maximum number of iterations for cumulative probability.
*
* Cumulative probabilities are estimated using either Lanczos series approximation of
* Gamma#regularizedGammaP or continued fraction approximation of Gamma#regularizedGammaQ.
*/
private int maxIterations = DEFAULT_MAX_ITERATIONS;
/**
* Convergence criterion for cumulative probability.
*/
private double epsilon = DEFAULT_EPSILON;
/**
* Create a new Poisson distribution with the given the mean. The mean value
* must be positive; otherwise an <code>IllegalArgument</code> is thrown.
@ -53,6 +76,43 @@ public class PoissonDistributionImpl extends AbstractIntegerDistribution
this(p, new NormalDistributionImpl());
}
/**
* Create a new Poisson distribution with the given mean, convergence criterion
* and maximum number of iterations.
*
* @param p the Poisson mean
* @param epsilon the convergence criteria for cumulative probabilites
* @param maxIterations the maximum number of iterations for cumulative probabilites
*/
public PoissonDistributionImpl(double p, double epsilon, int maxIterations) {
setMean(p);
this.epsilon = epsilon;
this.maxIterations = maxIterations;
}
/**
* Create a new Poisson distribution with the given mean and convergence criterion.
*
* @param p the Poisson mean
* @param epsilon the convergence criteria for cumulative probabilites
*/
public PoissonDistributionImpl(double p, double epsilon) {
setMean(p);
this.epsilon = epsilon;
}
/**
* Create a new Poisson distribution with the given mean and maximum number of iterations.
*
* @param p the Poisson mean
* @param maxIterations the maximum number of iterations for cumulative probabilites
*/
public PoissonDistributionImpl(double p, int maxIterations) {
setMean(p);
this.maxIterations = maxIterations;
}
/**
* Create a new Poisson distribution with the given the mean. The mean value
* must be positive; otherwise an <code>IllegalArgument</code> is thrown.
@ -132,8 +192,7 @@ public class PoissonDistributionImpl extends AbstractIntegerDistribution
if (x == Integer.MAX_VALUE) {
return 1;
}
return Gamma.regularizedGammaQ((double) x + 1, mean, 1E-12,
Integer.MAX_VALUE);
return Gamma.regularizedGammaQ((double) x + 1, mean, epsilon, maxIterations);
}
/**

View File

@ -166,7 +166,7 @@ public class Gamma {
ret = Double.NaN;
} else if (x == 0.0) {
ret = 0.0;
} else if (a >= 1.0 && x > a) {
} else if (x >= a + 1) {
// use regularizedGammaQ because it should converge faster in this
// case.
ret = 1.0 - regularizedGammaQ(a, x, epsilon, maxIterations);
@ -175,7 +175,7 @@ public class Gamma {
double n = 0.0; // current element index
double an = 1.0 / a; // n-th element in the series
double sum = an; // partial sum
while (Math.abs(an) > epsilon && n < maxIterations) {
while (Math.abs(an/sum) > epsilon && n < maxIterations && sum < Double.POSITIVE_INFINITY) {
// compute next element in the series
n = n + 1.0;
an = an * (x / (a + n));
@ -185,6 +185,8 @@ public class Gamma {
}
if (n >= maxIterations) {
throw new MaxIterationsExceededException(maxIterations);
} else if (Double.isInfinite(sum)) {
ret = 1.0;
} else {
ret = Math.exp(-x + (a * Math.log(x)) - logGamma(a)) * sum;
}
@ -216,7 +218,7 @@ public class Gamma {
* <a href="http://mathworld.wolfram.com/RegularizedGammaFunction.html">
* Regularized Gamma Function</a>, equation (1).</li>
* <li>
* <a href=" http://functions.wolfram.com/GammaBetaErf/GammaRegularized/10/0003/">
* <a href="http://functions.wolfram.com/GammaBetaErf/GammaRegularized/10/0003/">
* Regularized incomplete gamma function: Continued fraction representations (formula 06.08.10.0003)</a></li>
* </ul>
*
@ -241,7 +243,7 @@ public class Gamma {
ret = Double.NaN;
} else if (x == 0.0) {
ret = 1.0;
} else if (x < a || a < 1.0) {
} else if (x < a + 1.0) {
// use regularizedGammaP because it should converge faster in this
// case.
ret = 1.0 - regularizedGammaP(a, x, epsilon, maxIterations);

View File

@ -138,22 +138,54 @@ public abstract class ContinuedFraction {
double b = getB(n, x);
double p2 = a * p1 + b * p0;
double q2 = a * q1 + b * q0;
boolean infinite = false;
if (Double.isInfinite(p2) || Double.isInfinite(q2)) {
// need to scale
if (a != 0.0) {
p2 = p1 + (b / a * p0);
q2 = q1 + (b / a * q0);
} else if (b != 0) {
p2 = (a / b * p1) + p0;
q2 = (a / b * q1) + q0;
} else {
// can not scale an convergent is unbounded.
/*
* Need to scale. Try successive powers of the larger of a or b
* up to 5th power. Throw ConvergenceException if one or both
* of p2, q2 still overflow.
*/
double scaleFactor = 1d;
double lastScaleFactor = 1d;
final int maxPower = 5;
final double scale = Math.max(a,b);
if (scale <= 0) { // Can't scale
throw new ConvergenceException(
"Continued fraction convergents diverged to +/- infinity for value {0}",
x);
"Continued fraction convergents diverged to +/- infinity for value {0}",
x);
}
infinite = true;
for (int i = 0; i < maxPower; i++) {
lastScaleFactor = scaleFactor;
scaleFactor *= scale;
if (a != 0.0 && a > b) {
p2 = p1 / lastScaleFactor + (b / scaleFactor * p0);
q2 = q1 / lastScaleFactor + (b / scaleFactor * q0);
} else if (b != 0) {
p2 = (a / scaleFactor * p1) + p0 / lastScaleFactor;
q2 = (a / scaleFactor * q1) + q0 / lastScaleFactor;
}
infinite = Double.isInfinite(p2) || Double.isInfinite(q2);
if (!infinite) {
break;
}
}
}
if (infinite) {
// Scaling failed
throw new ConvergenceException(
"Continued fraction convergents diverged to +/- infinity for value {0}",
x);
}
double r = p2 / q2;
if (Double.isNaN(r)) {
throw new ConvergenceException(
"Continued fraction diverged to NaN for value {0}",
x);
}
relativeError = Math.abs(r / c - 1.0);
// prepare for next iteration

View File

@ -39,6 +39,10 @@ The <action> type attribute can be add,update,fix,remove.
</properties>
<body>
<release version="2.1" date="TBD" description="TBD">
<action dev="psteitz" type="fix" issue="MATH-282">
Resolved multiple problems leading to inaccuracy and/or failure to compute Normal,
ChiSquare and Poisson probabilities, Erf and Gamma functions.
</action>
<action dev="luc" type="fix" issue="MATH-347" >
Fixed too stringent interval check in Brent solver: initial guess is now
allowed to be at either interval end

View File

@ -48,8 +48,8 @@ public class NormalDistributionTest extends ContinuousDistributionAbstractTest
@Override
public double[] makeCumulativeTestPoints() {
// quantiles computed using R
return new double[] {-2.226325d, -1.156887d, -0.6439496d, -0.2027951d, 0.3058278d,
6.426325d, 5.356887d, 4.84395d, 4.402795d, 3.894172d};
return new double[] {-2.226325228634938d, -1.156887023657177d, -0.643949578356075d, -0.2027950777320613d, 0.305827808237559d,
6.42632522863494d, 5.35688702365718d, 4.843949578356074d, 4.40279507773206d, 3.89417219176244d};
}
/** Creates the default cumulative probability density test expected values */
@ -60,10 +60,11 @@ public class NormalDistributionTest extends ContinuousDistributionAbstractTest
}
// --------------------- Override tolerance --------------
protected double defaultTolerance = NormalDistributionImpl.DEFAULT_INVERSE_ABSOLUTE_ACCURACY;
@Override
protected void setUp() throws Exception {
super.setUp();
setTolerance(1E-6);
setTolerance(defaultTolerance);
}
//---------------------------- Additional test cases -------------------------
@ -73,11 +74,11 @@ public class NormalDistributionTest extends ContinuousDistributionAbstractTest
double mu = distribution.getMean();
double sigma = distribution.getStandardDeviation();
setCumulativeTestPoints( new double[] {mu - 2 *sigma, mu - sigma,
mu, mu + sigma, mu +2 * sigma, mu +3 * sigma, mu + 4 * sigma,
mu, mu + sigma, mu + 2 * sigma, mu + 3 * sigma, mu + 4 * sigma,
mu + 5 * sigma});
// Quantiles computed using R (same as Mathematica)
setCumulativeTestValues(new double[] {0.02275013, 0.1586553, 0.5, 0.8413447,
0.9772499, 0.9986501, 0.9999683, 0.9999997});
setCumulativeTestValues(new double[] {0.02275013194817921, 0.158655253931457, 0.5, 0.841344746068543,
0.977249868051821, 0.99865010196837, 0.999968328758167, 0.999999713348428});
verifyCumulativeProbabilities();
}
@ -166,8 +167,14 @@ public class NormalDistributionTest extends ContinuousDistributionAbstractTest
public void testMath280() throws MathException {
NormalDistribution normal = new NormalDistributionImpl(0,1);
double result = normal.inverseCumulativeProbability(0.9772498680518209);
assertEquals(2.0, result, 1.0e-12);
double result = normal.inverseCumulativeProbability(0.9986501019683698);
assertEquals(3.0, result, defaultTolerance);
result = normal.inverseCumulativeProbability(0.841344746068543);
assertEquals(1.0, result, defaultTolerance);
result = normal.inverseCumulativeProbability(0.9999683287581673);
assertEquals(4.0, result, defaultTolerance);
result = normal.inverseCumulativeProbability(0.9772498680518209);
assertEquals(2.0, result, defaultTolerance);
}
}

View File

@ -174,10 +174,8 @@ public class PoissonDistributionTest extends IntegerDistributionAbstractTest {
/**
* JIRA: MATH-282
* TODO: activate this test when MATH-282 is resolved
*/
public void testCumulativeProbabilitySpecial() throws Exception {
/*
PoissonDistribution dist = new PoissonDistributionImpl(1.0);
dist.setMean(9120);
checkProbability(dist, 9075);
@ -186,7 +184,6 @@ public class PoissonDistributionTest extends IntegerDistributionAbstractTest {
checkProbability(dist, 5044);
dist.setMean(6986);
checkProbability(dist, 6950);
*/
}
private void checkProbability(PoissonDistribution dist, double x) throws Exception {
@ -197,23 +194,25 @@ public class PoissonDistributionTest extends IntegerDistributionAbstractTest {
dist.getMean() + " x = " + x, p > 0);
}
public void testLargeMeanInverseCumulativeProbability() {
public void testLargeMeanInverseCumulativeProbability() throws Exception {
PoissonDistribution dist = new PoissonDistributionImpl(1.0);
double mean = 1.0;
while (mean <= 10000000.0) {
while (mean <= 100000.0) { // Extended test value: 1E7. Reduced to limit run time.
dist.setMean(mean);
double p = 0.1;
double dp = p;
while (p < 1.0) {
while (p < .99) {
double ret = Double.NaN;
try {
dist.inverseCumulativeProbability(p);
ret = dist.inverseCumulativeProbability(p);
// Verify that returned value satisties definition
assertTrue(p >= dist.cumulativeProbability(ret));
assertTrue(p < dist.cumulativeProbability(ret + 1));
} catch (MathException ex) {
fail("mean of " + mean + " and p of " + p + " caused " + ex.getMessage());
}
p += dp;
}
mean *= 10.0;
}
}

View File

@ -75,4 +75,15 @@ public class ErfTest extends TestCase {
expected = -expected;
assertEquals(expected, actual, 1.0e-5);
}
/**
* MATH-301
*/
public void testLargeValues() throws Exception {
for (int i = 1; i < 200; i++) {
double result = Erf.erf(i);
assertFalse(Double.isNaN(result));
assertTrue(result > 0 && result <= 1);
}
}
}