Added fix for MATH-597: Implemented faster generation of random exponential distributed values with algorithm from Ahrens and Dieter (1972): Computer methods for sampling from the exponential and normal distributions. Test case was improved, too.

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1137795 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mikkel Meyer Andersen 2011-06-20 21:42:48 +00:00
parent ff92629a3b
commit 6d291937d0
3 changed files with 136 additions and 27 deletions

View File

@ -44,6 +44,7 @@ import org.apache.commons.math.exception.NumberIsTooLargeException;
import org.apache.commons.math.exception.util.LocalizedFormats;
import org.apache.commons.math.util.FastMath;
import org.apache.commons.math.util.MathUtils;
import org.apache.commons.math.util.ResizableDoubleArray;
/**
* Implements the {@link RandomData} interface using a {@link RandomGenerator}
@ -107,12 +108,56 @@ public class RandomDataImpl implements RandomData, Serializable {
/** Serializable version identifier */
private static final long serialVersionUID = -626730818244969716L;
/** Used when generating Exponential samples
* [1] writes:
* One table containing the constants
* q_i = sum_{j=1}^i (ln 2)^j/j! = ln 2 + (ln 2)^2/2 + ... + (ln 2)^i/i!
* until the largest representable fraction below 1 is exceeded.
*
* Note that
* 1 = 2 - 1 = exp(ln 2) - 1 = sum_{n=1}^infty (ln 2)^n / n!
* thus q_i -> 1 as i -> infty,
* so the higher 1, the closer to one we get (the series is not alternating).
*
* By trying, n = 16 in Java is enough to reach 1.0.
*/
private static double[] EXPONENTIAL_SA_QI = null;
/** underlying random number generator */
private RandomGenerator rand = null;
/** underlying secure random number generator */
private SecureRandom secRand = null;
/**
* Initialize tables
*/
static {
/**
* Filling EXPONENTIAL_SA_QI table.
* Note that we don't want qi = 0 in the table.
*/
final double LN2 = FastMath.log(2);
double qi = 0;
int i = 1;
/**
* MathUtils provides factorials up to 20, so let's use that limit together
* with MathUtils.EPSILON to generate the following code (a priori, we know that
* there will be 16 elements, but instead of hardcoding that, this is
* prettier):
*/
final ResizableDoubleArray ra = new ResizableDoubleArray(20);
while (qi < 1) {
qi += FastMath.pow(LN2, i) / MathUtils.factorial(i);
ra.addElement(qi);
++i;
}
EXPONENTIAL_SA_QI = ra.getElements();
}
/**
* Construct a RandomDataImpl.
*/
@ -469,10 +514,11 @@ public class RandomDataImpl implements RandomData, Serializable {
* Returns a random value from an Exponential distribution with the given
* mean.
* <p>
* <strong>Algorithm Description</strong>: Uses the <a
* href="http://www.jesus.ox.ac.uk/~clifford/a5/chap1/node5.html"> Inversion
* Method</a> to generate exponentially distributed random values from
* uniform deviates.
* <strong>Algorithm Description</strong>: Uses the Algorithm SA (Ahrens)
* from p. 876 in:
* [1]: Ahrens, J. H. and Dieter, U. (1972). Computer methods for
* sampling from the exponential and normal distributions.
* Communications of the ACM, 15, 873-882.
* </p>
*
* @param mean the mean of the distribution
@ -483,12 +529,43 @@ public class RandomDataImpl implements RandomData, Serializable {
if (mean <= 0.0) {
throw new NotStrictlyPositiveException(LocalizedFormats.MEAN, mean);
}
final RandomGenerator generator = getRan();
double unif = generator.nextDouble();
while (unif == 0.0d) {
unif = generator.nextDouble();
// Step 1:
double a = 0;
double u = this.nextUniform(0, 1);
// Step 2 and 3:
while (u < 0.5) {
a += EXPONENTIAL_SA_QI[0];
u *= 2;
}
return -mean * FastMath.log(unif);
// Step 4 (now u >= 0.5):
u += u - 1;
// Step 5:
if (u <= EXPONENTIAL_SA_QI[0]) {
return mean * (a + u);
}
// Step 6:
int i = 0; // Should be 1, be we iterate before it in while using 0
double u2 = this.nextUniform(0, 1);
double umin = u2;
// Step 7 and 8:
do {
++i;
u2 = this.nextUniform(0, 1);
if (u2 < umin) {
umin = u2;
}
// Step 8:
} while (u > EXPONENTIAL_SA_QI[i]); // Ensured to exit since EXPONENTIAL_SA_QI[MAX] = 1
return mean * (a + umin * EXPONENTIAL_SA_QI[0]);
}
/**

View File

@ -52,6 +52,11 @@ The <action> type attribute can be add,update,fix,remove.
If the output is not quite correct, check for invisible trailing spaces!
-->
<release version="3.0" date="TBD" description="TBD">
<action dev="mikl" type="fix" issue="MATH-597">
Implemented faster generation of random exponential distributed values with
algorithm from Ahrens and Dieter (1972): Computer methods for sampling
from the exponential and normal distributions.
</action>
<action dev="luc" type="add" issue="MATH-548">
K-means++ clustering can now run multiple trials
</action>

View File

@ -30,6 +30,7 @@ import org.apache.commons.math.distribution.BinomialDistributionImpl;
import org.apache.commons.math.distribution.BinomialDistributionTest;
import org.apache.commons.math.distribution.CauchyDistributionImpl;
import org.apache.commons.math.distribution.ChiSquaredDistributionImpl;
import org.apache.commons.math.distribution.ExponentialDistributionImpl;
import org.apache.commons.math.distribution.FDistributionImpl;
import org.apache.commons.math.distribution.GammaDistributionImpl;
import org.apache.commons.math.distribution.HypergeometricDistributionImpl;
@ -245,10 +246,10 @@ public class RandomDataTest {
@Test
public void testNextPoissonConsistency() throws Exception {
// Reseed randomGenerator to get fixed sequence
randomData.reSeed(1000);
randomData.reSeed(1000);
// Small integral means
for (int i = 1; i < 100; i++) {
checkNextPoissonConsistency(i);
@ -581,7 +582,7 @@ public class RandomDataTest {
/** test failure modes and distribution of nextExponential() */
@Test
public void testNextExponential() {
public void testNextExponential() throws Exception {
try {
randomData.nextExponential(-1);
Assert.fail("negative mean -- expecting MathIllegalArgumentException");
@ -609,6 +610,32 @@ public class RandomDataTest {
*/
Assert.assertEquals("exponential cumulative distribution", (double) cumFreq
/ (double) largeSampleSize, 0.8646647167633873, .2);
/**
* Proposal on improving the test of generating exponentials
*/
double[] quartiles;
long[] counts;
// Mean 1
quartiles = TestUtils.getDistributionQuartiles(new ExponentialDistributionImpl(1));
counts = new long[4];
randomData.reSeed(1000);
for (int i = 0; i < 1000; i++) {
double value = randomData.nextExponential(1);
TestUtils.updateCounts(value, counts, quartiles);
}
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
// Mean 5
quartiles = TestUtils.getDistributionQuartiles(new ExponentialDistributionImpl(5));
counts = new long[4];
randomData.reSeed(1000);
for (int i = 0; i < 1000; i++) {
double value = randomData.nextExponential(5);
TestUtils.updateCounts(value, counts, quartiles);
}
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
/** test reseeding, algorithm/provider games */
@ -810,7 +837,7 @@ public class RandomDataTest {
Assert.fail("permutation not found");
return -1;
}
@Test
public void testNextInversionDeviate() throws Exception {
// Set the seed for the default random generator
@ -830,9 +857,9 @@ public class RandomDataTest {
for (int i = 0; i < 10; i++) {
double value = randomData.nextInversionDeviate(betaDistribution);
Assert.assertEquals(betaDistribution.cumulativeProbability(value), quantiles[i], 10E-9);
}
}
}
@Test
public void testNextBeta() throws Exception {
double[] quartiles = TestUtils.getDistributionQuartiles(new BetaDistributionImpl(2,5));
@ -844,7 +871,7 @@ public class RandomDataTest {
}
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
@Test
public void testNextCauchy() throws Exception {
double[] quartiles = TestUtils.getDistributionQuartiles(new CauchyDistributionImpl(1.2, 2.1));
@ -856,7 +883,7 @@ public class RandomDataTest {
}
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
@Test
public void testNextChiSquare() throws Exception {
double[] quartiles = TestUtils.getDistributionQuartiles(new ChiSquaredDistributionImpl(12));
@ -868,7 +895,7 @@ public class RandomDataTest {
}
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
@Test
public void testNextF() throws Exception {
double[] quartiles = TestUtils.getDistributionQuartiles(new FDistributionImpl(12, 5));
@ -880,7 +907,7 @@ public class RandomDataTest {
}
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
@Test
public void testNextGamma() throws Exception {
double[] quartiles = TestUtils.getDistributionQuartiles(new GammaDistributionImpl(4, 2));
@ -892,7 +919,7 @@ public class RandomDataTest {
}
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
@Test
public void testNextT() throws Exception {
double[] quartiles = TestUtils.getDistributionQuartiles(new TDistributionImpl(10));
@ -904,7 +931,7 @@ public class RandomDataTest {
}
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
@Test
public void testNextWeibull() throws Exception {
double[] quartiles = TestUtils.getDistributionQuartiles(new WeibullDistributionImpl(1.2, 2.1));
@ -916,7 +943,7 @@ public class RandomDataTest {
}
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
@Test
public void testNextBinomial() throws Exception {
BinomialDistributionTest testInstance = new BinomialDistributionTest();
@ -942,7 +969,7 @@ public class RandomDataTest {
}
TestUtils.assertChiSquareAccept(densityPoints, expectedCounts, observedCounts, .001);
}
@Test
public void testNextHypergeometric() throws Exception {
HypergeometricDistributionTest testInstance = new HypergeometricDistributionTest();
@ -968,7 +995,7 @@ public class RandomDataTest {
}
TestUtils.assertChiSquareAccept(densityPoints, expectedCounts, observedCounts, .001);
}
@Test
public void testNextPascal() throws Exception {
PascalDistributionTest testInstance = new PascalDistributionTest();
@ -993,7 +1020,7 @@ public class RandomDataTest {
}
TestUtils.assertChiSquareAccept(densityPoints, expectedCounts, observedCounts, .001);
}
@Test
public void testNextZipf() throws Exception {
ZipfDistributionTest testInstance = new ZipfDistributionTest();
@ -1018,5 +1045,5 @@ public class RandomDataTest {
}
TestUtils.assertChiSquareAccept(densityPoints, expectedCounts, observedCounts, .001);
}
}