Implementation of log-normal distributions (MATH-733). Patch contributed by Dennis Hendriks.

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1232324 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sebastien Brisard 2012-01-17 07:12:02 +00:00
parent 29cd56b6f4
commit 64230d2b42
4 changed files with 652 additions and 6 deletions

View File

@ -0,0 +1,295 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.distribution;
import org.apache.commons.math.exception.NotStrictlyPositiveException;
import org.apache.commons.math.exception.NumberIsTooLargeException;
import org.apache.commons.math.exception.util.LocalizedFormats;
import org.apache.commons.math.special.Erf;
import org.apache.commons.math.util.FastMath;
/**
* Implementation of the log-normal (gaussian) distribution.
*
* <p>
* <a id="parameters"><strong>Parameters:</strong></a>
* {@code X} is log-normally distributed if its natural logarithm {@code log(X)}
* is normally distributed. The probability distribution function of {@code X}
* is given by (for {@code x >= 0})
* </p>
* <p>
* {@code exp(-0.5 * ((ln(x) - m) / s)^2) / (s * sqrt(2 * pi) * x)}
* </p>
* <ul>
* <li>{@code m} is the <em>scale</em> parameter: this is the mean of the
* normally distributed natural logarithm of this distribution,</li>
* <li>{@code s} is the <em>shape</em> parameter: this is the standard
* deviation of the normally distributed natural logarithm of this
* distribution.
* </ul>
*
* @see <a href="http://en.wikipedia.org/wiki/Log-normal_distribution">
* Log-normal distribution (Wikipedia)</a>
* @see <a href="http://mathworld.wolfram.com/LogNormalDistribution.html">
* Log Normal distribution (MathWorld)</a>
*
* @version $Id$
* @since 3.0
*/
public class LogNormalDistribution extends AbstractRealDistribution {
/** Default inverse cumulative probability accuracy. */
public static final double DEFAULT_INVERSE_ABSOLUTE_ACCURACY = 1e-9;
/** Serializable version identifier. */
private static final long serialVersionUID = 20120112;
/** &radic;(2 &pi;) */
private static final double SQRT2PI = FastMath.sqrt(2 * FastMath.PI);
/** &radic;(2) */
private static final double SQRT2 = FastMath.sqrt(2.0);
/** The <a href="#parameters">scale</a> parameter of this distribution. */
private final double scale;
/** The <a href="#parameters">shape</a> parameter of this distribution. */
private final double shape;
/** Inverse cumulative probability accuracy. */
private final double solverAbsoluteAccuracy;
/**
* Create a log-normal distribution using the specified
* <a href="#parameters">scale</a> and
* <a href="#parameters">shape</a>.
*
* @param scale the scale parameter of this distribution
* @param shape the shape parameter of this distribution
* @throws NotStrictlyPositiveException if {@code shape <= 0}.
*/
public LogNormalDistribution(double scale, double shape)
throws NotStrictlyPositiveException {
this(scale, shape, DEFAULT_INVERSE_ABSOLUTE_ACCURACY);
}
/**
* Create a log-normal distribution using the specified
* <a href="#parameters">scale</a>, <a href="#parameters">shape</a> and
* inverse cumulative distribution accuracy.
*
* @param scale the scale parameter of this distribution
* @param shape the shape parameter of this distribution
* @param inverseCumAccuracy Inverse cumulative probability accuracy.
* @throws NotStrictlyPositiveException if {@code shape <= 0}.
*/
public LogNormalDistribution(double scale, double shape,
double inverseCumAccuracy) throws NotStrictlyPositiveException {
if (shape <= 0) {
throw new NotStrictlyPositiveException(LocalizedFormats.STANDARD_DEVIATION, shape);
}
this.scale = scale;
this.shape = shape;
this.solverAbsoluteAccuracy = inverseCumAccuracy;
}
/**
* Create a log-normal distribution, where the mean and standard deviation
* of the {@link NormalDistribution normally distributed} natural
* logarithm of the log-normal distribution are equal to zero and one
* respectively. In other words, the scale of the returned distribution is
* {@code 0}, while its shape is {@code 1}.
*/
public LogNormalDistribution() {
this(0, 1);
}
/**
* Returns the <a href="#parameters">scale</a> parameter of this distribution.
*
* @return the scale parameter
*/
public double getScale() {
return scale;
}
/**
* Returns the <a href="#parameters">shape</a> parameter of this
* distribution.
*
* @return the shape parameter
*/
public double getShape() {
return shape;
}
/**
* {@inheritDoc}
*
* For this distribution {@code P(X = x)} always evaluates to 0.
*
* @return 0
*/
public double probability(double x) {
return 0.0;
}
/**
* {@inheritDoc}
*
* For scale {@code m}, and shape {@code s} of this distribution, the PDF
* is given by
* <ul>
* <li>{@code 0} if {@code x <= 0},</li>
* <li>{@code exp(-0.5 * ((ln(x) - m) / s)^2) / (s * sqrt(2 * pi) * x)}
* otherwise.</li>
* </ul>
*/
public double density(double x) {
if (x <= 0) {
return 0;
}
final double x0 = FastMath.log(x) - scale;
final double x1 = x0 / shape;
return FastMath.exp(-0.5 * x1 * x1) / (shape * SQRT2PI * x);
}
/**
* {@inheritDoc}
*
* For scale {@code m}, and shape {@code s} of this distribution, the CDF
* is given by
* <ul>
* <li>{@code 0} if {@code x <= 0},</li>
* <li>{@code 0} if {@code ln(x) - m < 0} and {@code m - ln(x) > 40 * s}, as
* in these cases the actual value is within {@code Double.MIN_VALUE} of 0,
* <li>{@code 1} if {@code ln(x) - m >= 0} and {@code ln(x) - m > 40 * s},
* as in these cases the actual value is within {@code Double.MIN_VALUE} of
* 1,</li>
* <li>{@code 0.5 + 0.5 * erf((ln(x) - m) / (s * sqrt(2))} otherwise.</li>
* </ul>
*/
public double cumulativeProbability(double x) {
if (x <= 0) {
return 0;
}
final double dev = FastMath.log(x) - scale;
if (FastMath.abs(dev) > 40 * shape) {
return dev < 0 ? 0.0d : 1.0d;
}
return 0.5 + 0.5 * Erf.erf(dev / (shape * SQRT2));
}
/** {@inheritDoc} */
@Override
public double cumulativeProbability(double x0, double x1)
throws NumberIsTooLargeException {
if (x0 > x1) {
throw new NumberIsTooLargeException(LocalizedFormats.LOWER_ENDPOINT_ABOVE_UPPER_ENDPOINT,
x0, x1, true);
}
if (x0 <= 0 || x1 <= 0) {
return super.cumulativeProbability(x0, x1);
}
final double denom = shape * SQRT2;
final double v0 = (FastMath.log(x0) - scale) / denom;
final double v1 = (FastMath.log(x1) - scale) / denom;
return 0.5 * Erf.erf(v0, v1);
}
/** {@inheritDoc} */
@Override
protected double getSolverAbsoluteAccuracy() {
return solverAbsoluteAccuracy;
}
/**
* {@inheritDoc}
*
* For scale {@code m} and shape {@code s}, the mean is
* {@code exp(m + s^2 / 2)}.
*/
public double getNumericalMean() {
double s = shape;
return FastMath.exp(scale + (s * s / 2));
}
/**
* {@inheritDoc}
*
* For scale {@code m} and shape {@code s}, the variance is
* {@code (exp(s^2) - 1) * exp(2 * m + s^2)}.
*/
public double getNumericalVariance() {
final double s = shape;
final double ss = s * s;
return (FastMath.exp(ss) - 1) * FastMath.exp(2 * scale + ss);
}
/**
* {@inheritDoc}
*
* The lower bound of the support is always 0 no matter the parameters.
*
* @return lower bound of the support (always 0)
*/
public double getSupportLowerBound() {
return 0;
}
/**
* {@inheritDoc}
*
* The upper bound of the support is always positive infinity
* no matter the parameters.
*
* @return upper bound of the support (always
* {@code Double.POSITIVE_INFINITY})
*/
public double getSupportUpperBound() {
return Double.POSITIVE_INFINITY;
}
/** {@inheritDoc} */
public boolean isSupportLowerBoundInclusive() {
return true;
}
/** {@inheritDoc} */
public boolean isSupportUpperBoundInclusive() {
return false;
}
/**
* {@inheritDoc}
*
* The support of this distribution is connected.
*
* @return {@code true}
*/
public boolean isSupportConnected() {
return true;
}
/** {@inheritDoc} */
@Override
public double sample() {
double n = randomData.nextGaussian(0, 1);
return FastMath.exp(scale + shape * n);
}
}

View File

@ -0,0 +1,107 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#------------------------------------------------------------------------------
# R source file to validate LogNormal distribution tests in
# org.apache.commons.math.distribution.LogNormalDistributionTest
#
# To run the test, install R, put this file and testFunctions
# into the same directory, launch R from this directory and then enter
# source("<name-of-this-file>")
#
# R functions used
# plnorm(q, mean=0, sd=1, lower.tail = TRUE, log.p = FALSE) <-- distribution
#-----------------------------------------------------------------------------
tol <- 1E-9
# Function definitions
source("testFunctions") # utility test functions
# function to verify distribution computations
verifyDistribution <- function(points, expected, mu, sigma, tol) {
rDistValues <- rep(0, length(points))
i <- 0
for (point in points) {
i <- i + 1
rDistValues[i] <- plnorm(point, mu, sigma, log = FALSE)
}
output <- c("Distribution test mu = ",mu,", sigma = ", sigma)
if (assertEquals(expected, rDistValues, tol, "Distribution Values")) {
displayPadded(output, SUCCEEDED, WIDTH)
} else {
displayPadded(output, FAILED, WIDTH)
}
}
# function to verify density computations
verifyDensity <- function(points, expected, mu, sigma, tol) {
rDensityValues <- rep(0, length(points))
i <- 0
for (point in points) {
i <- i + 1
rDensityValues[i] <- dlnorm(point, mu, sigma, log = FALSE)
}
output <- c("Density test mu = ",mu,", sigma = ", sigma)
if (assertEquals(expected, rDensityValues, tol, "Density Values")) {
displayPadded(output, SUCCEEDED, WIDTH)
} else {
displayPadded(output, FAILED, WIDTH)
}
}
#--------------------------------------------------------------------------
cat("LogNormal test cases\n")
mu <- 2.1
sigma <- 1.4
distributionValues <- c(0, 0, 0, 0, 0.00948199951485, 0.432056525076, 0.381648158697, 0.354555726206, 0.329513316888, 0.298422824228)
densityValues <- c(0, 0, 0, 0, 0.0594218160072, 0.0436977691036, 0.0508364857798, 0.054873528325, 0.0587182664085, 0.0636229042785)
distributionPoints <- c(-2.226325228634938, -1.156887023657177, -0.643949578356075, -0.2027950777320613, 0.305827808237559,
6.42632522863494, 5.35688702365718, 4.843949578356074, 4.40279507773206, 3.89417219176244)
verifyDistribution(distributionPoints, distributionValues, mu, sigma, tol)
verifyDensity(distributionPoints, densityValues, mu, sigma, tol)
distributionValues <- c(0, 0.0396495152787, 0.16601209243, 0.272533253269, 0.357618409638, 0.426488363093, 0.483255136841, 0.530823013877)
densityValues <- c(0, 0.0873055825147, 0.0847676303432, 0.0677935186237, 0.0544105523058, 0.0444614628804, 0.0369750288945, 0.0312206409653)
distributionPoints <- c(mu - 2 *sigma, mu - sigma, mu, mu + sigma,
mu + 2 * sigma, mu + 3 * sigma, mu + 4 * sigma,
mu + 5 * sigma)
verifyDistribution(distributionPoints, distributionValues, mu, sigma, tol)
verifyDensity(distributionPoints, densityValues, mu, sigma, tol)
mu <- 0
sigma <- 1
distributionPoints <- c(mu - 2 *sigma, mu - sigma, mu, mu + sigma,
mu + 2 * sigma, mu + 3 * sigma, mu + 4 * sigma,
mu + 5 * sigma)
distributionValues <- c(0, 0, 0, 0.5, 0.755891404214, 0.864031392359, 0.917171480998, 0.946239689548)
densityValues <- c(0, 0, 0, 0.398942280401, 0.156874019279, 0.07272825614, 0.0381534565119, 0.0218507148303)
verifyDistribution(distributionPoints, distributionValues, mu, sigma, tol)
verifyDensity(distributionPoints, densityValues, mu, sigma, tol)
mu <- 0
sigma <- 0.1
distributionPoints <- c(mu - 2 *sigma, mu - sigma, mu, mu + sigma,
mu + 2 * sigma, mu + 3 * sigma, mu + 4 * sigma,
mu + 5 * sigma)
distributionValues <- c(0, 0, 0, 1.28417563064e-117, 1.39679883412e-58, 1.09839325447e-33, 2.52587961726e-20, 2.0824223487e-12)
densityValues <- c(0, 0, 0, 2.96247992535e-114, 1.1283370232e-55, 4.43812313223e-31, 5.85346445002e-18, 2.9446618076e-10)
verifyDistribution(distributionPoints, distributionValues, mu, sigma, tol)
verifyDensity(distributionPoints, densityValues, mu, sigma, tol)
displayDashes(WIDTH)

View File

@ -0,0 +1,245 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.distribution;
import org.apache.commons.math.exception.NotStrictlyPositiveException;
import org.junit.Assert;
import org.junit.Test;
/**
* Test cases for {@link LogNormalDistribution}. Extends
* {@link RealDistributionAbstractTest}. See class javadoc of that class
* for details.
*
* @version $Id$
* @since 3.0
*/
public class LogNormalDistributionTest extends RealDistributionAbstractTest {
//-------------- Implementations for abstract methods -----------------------
/** Creates the default real distribution instance to use in tests. */
@Override
public LogNormalDistribution makeDistribution() {
return new LogNormalDistribution(2.1, 1.4);
}
/** Creates the default cumulative probability distribution test input values */
@Override
public double[] makeCumulativeTestPoints() {
// quantiles computed using R
return new double[] { -2.226325228634938, -1.156887023657177,
-0.643949578356075, -0.2027950777320613,
0.305827808237559, 6.42632522863494,
5.35688702365718, 4.843949578356074,
4.40279507773206, 3.89417219176244 };
}
/** Creates the default cumulative probability density test expected values */
@Override
public double[] makeCumulativeTestValues() {
return new double[] { 0, 0, 0, 0, 0.00948199951485, 0.432056525076,
0.381648158697, 0.354555726206, 0.329513316888,
0.298422824228 };
}
/** Creates the default probability density test expected values */
@Override
public double[] makeDensityTestValues() {
return new double[] { 0, 0, 0, 0, 0.0594218160072, 0.0436977691036,
0.0508364857798, 0.054873528325, 0.0587182664085,
0.0636229042785 };
}
/**
* Creates the default inverse cumulative probability distribution test
* input values.
*/
@Override
public double[] makeInverseCumulativeTestPoints() {
// Exclude the test points less than zero, as they have cumulative
// probability of zero, meaning the inverse returns zero, and not the
// points less than zero.
double[] points = makeCumulativeTestValues();
double[] points2 = new double[points.length - 4];
System.arraycopy(points, 4, points2, 0, points2.length - 4);
return points2;
//return Arrays.copyOfRange(points, 4, points.length - 4);
}
/**
* Creates the default inverse cumulative probability test expected
* values.
*/
@Override
public double[] makeInverseCumulativeTestValues() {
// Exclude the test points less than zero, as they have cumulative
// probability of zero, meaning the inverse returns zero, and not the
// points less than zero.
double[] points = makeCumulativeTestPoints();
double[] points2 = new double[points.length - 4];
System.arraycopy(points, 4, points2, 0, points2.length - 4);
return points2;
//return Arrays.copyOfRange(points, 1, points.length - 4);
}
// --------------------- Override tolerance --------------
@Override
public void setUp() throws Exception {
super.setUp();
setTolerance(LogNormalDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY);
}
//---------------------------- Additional test cases -------------------------
private void verifyQuantiles() throws Exception {
LogNormalDistribution distribution = (LogNormalDistribution)getDistribution();
double mu = distribution.getScale();
double sigma = distribution.getShape();
setCumulativeTestPoints( new double[] { mu - 2 *sigma, mu - sigma,
mu, mu + sigma, mu + 2 * sigma,
mu + 3 * sigma,mu + 4 * sigma,
mu + 5 * sigma });
verifyCumulativeProbabilities();
}
@Test
public void testQuantiles() throws Exception {
setCumulativeTestValues(new double[] {0, 0.0396495152787,
0.16601209243, 0.272533253269,
0.357618409638, 0.426488363093,
0.483255136841, 0.530823013877});
setDensityTestValues(new double[] {0, 0.0873055825147, 0.0847676303432,
0.0677935186237, 0.0544105523058,
0.0444614628804, 0.0369750288945,
0.0312206409653});
verifyQuantiles();
verifyDensities();
setDistribution(new LogNormalDistribution(0, 1));
setCumulativeTestValues(new double[] {0, 0, 0, 0.5, 0.755891404214,
0.864031392359, 0.917171480998,
0.946239689548});
setDensityTestValues(new double[] {0, 0, 0, 0.398942280401,
0.156874019279, 0.07272825614,
0.0381534565119, 0.0218507148303});
verifyQuantiles();
verifyDensities();
setDistribution(new LogNormalDistribution(0, 0.1));
setCumulativeTestValues(new double[] {0, 0, 0, 1.28417563064e-117,
1.39679883412e-58,
1.09839325447e-33,
2.52587961726e-20,
2.0824223487e-12});
setDensityTestValues(new double[] {0, 0, 0, 2.96247992535e-114,
1.1283370232e-55, 4.43812313223e-31,
5.85346445002e-18,
2.9446618076e-10});
verifyQuantiles();
verifyDensities();
}
@Test
public void testInverseCumulativeProbabilityExtremes() throws Exception {
setInverseCumulativeTestPoints(new double[] {0, 1});
setInverseCumulativeTestValues(
new double[] {0, Double.POSITIVE_INFINITY});
verifyInverseCumulativeProbabilities();
}
@Test
public void testGetMean() {
LogNormalDistribution distribution = (LogNormalDistribution)getDistribution();
Assert.assertEquals(2.1, distribution.getScale(), 0);
}
@Test
public void testGetStandardDeviation() {
LogNormalDistribution distribution = (LogNormalDistribution)getDistribution();
Assert.assertEquals(1.4, distribution.getShape(), 0);
}
@Test(expected=NotStrictlyPositiveException.class)
public void testPreconditions() {
new LogNormalDistribution(1, 0);
}
@Test
public void testDensity() {
double [] x = new double[]{-2, -1, 0, 1, 2};
// R 2.13: print(dlnorm(c(-2,-1,0,1,2)), digits=10)
checkDensity(0, 1, x, new double[] { 0.0000000000, 0.0000000000,
0.0000000000, 0.3989422804,
0.1568740193 });
// R 2.13: print(dlnorm(c(-2,-1,0,1,2), mean=1.1), digits=10)
checkDensity(1.1, 1, x, new double[] { 0.0000000000, 0.0000000000,
0.0000000000, 0.2178521770,
0.1836267118});
}
private void checkDensity(double mean, double sd, double[] x, double[] expected) {
LogNormalDistribution d = new LogNormalDistribution(mean, sd);
for (int i = 0; i < x.length; i++) {
Assert.assertEquals(expected[i], d.density(x[i]), 1e-9);
}
}
/**
* Check to make sure top-coding of extreme values works correctly.
* Verifies fixes for JIRA MATH-167, MATH-414
*/
@Test
public void testExtremeValues() throws Exception {
LogNormalDistribution d = new LogNormalDistribution(0, 1);
for (int i = 0; i < 1e5; i++) { // make sure no convergence exception
double upperTail = d.cumulativeProbability(i);
if (i <= 72) { // make sure not top-coded
Assert.assertTrue(upperTail < 1.0d);
}
else { // make sure top coding not reversed
Assert.assertTrue(upperTail > 0.99999);
}
}
Assert.assertEquals(d.cumulativeProbability(Double.MAX_VALUE), 1, 0);
Assert.assertEquals(d.cumulativeProbability(-Double.MAX_VALUE), 0, 0);
Assert.assertEquals(d.cumulativeProbability(Double.POSITIVE_INFINITY), 1, 0);
Assert.assertEquals(d.cumulativeProbability(Double.NEGATIVE_INFINITY), 0, 0);
}
@Test
public void testMeanVariance() {
final double tol = 1e-9;
LogNormalDistribution dist;
dist = new LogNormalDistribution(0, 1);
Assert.assertEquals(dist.getNumericalMean(), 1.6487212707001282, tol);
Assert.assertEquals(dist.getNumericalVariance(),
4.670774270471604, tol);
dist = new LogNormalDistribution(2.2, 1.4);
Assert.assertEquals(dist.getNumericalMean(), 24.046753552064498, tol);
Assert.assertEquals(dist.getNumericalVariance(),
3526.913651880464, tol);
dist = new LogNormalDistribution(-2000.9, 10.4);
Assert.assertEquals(dist.getNumericalMean(), 0.0, tol);
Assert.assertEquals(dist.getNumericalVariance(), 0.0, tol);
}
}

View File

@ -22,9 +22,9 @@ import org.junit.Assert;
import org.junit.Test;
/**
* Test cases for NormalDistribution.
* Extends ContinuousDistributionAbstractTest. See class javadoc for
* ContinuousDistributionAbstractTest for details.
* Test cases for {@link NormalDistribution}. Extends
* {@link RealDistributionAbstractTest}. See class javadoc of that class
* for details.
*
* @version $Id$
*/
@ -32,7 +32,7 @@ public class NormalDistributionTest extends RealDistributionAbstractTest {
//-------------- Implementations for abstract methods -----------------------
/** Creates the default continuous distribution instance to use in tests. */
/** Creates the default real distribution instance to use in tests. */
@Override
public NormalDistribution makeDistribution() {
return new NormalDistribution(2.1, 1.4);
@ -170,7 +170,6 @@ public class NormalDistributionTest extends RealDistributionAbstractTest {
Assert.assertEquals(distribution.cumulativeProbability(-Double.MAX_VALUE), 0, 0);
Assert.assertEquals(distribution.cumulativeProbability(Double.POSITIVE_INFINITY), 1, 0);
Assert.assertEquals(distribution.cumulativeProbability(Double.NEGATIVE_INFINITY), 0, 0);
}
@Test