Added fix for MATH-597: Implemented faster generation of random exponential distributed values with algorithm from Ahrens and Dieter (1972): Computer methods for sampling from the exponential and normal distributions. Test case was improved, too.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1137795 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ff92629a3b
commit
6d291937d0
|
@ -44,6 +44,7 @@ import org.apache.commons.math.exception.NumberIsTooLargeException;
|
|||
import org.apache.commons.math.exception.util.LocalizedFormats;
|
||||
import org.apache.commons.math.util.FastMath;
|
||||
import org.apache.commons.math.util.MathUtils;
|
||||
import org.apache.commons.math.util.ResizableDoubleArray;
|
||||
|
||||
/**
|
||||
* Implements the {@link RandomData} interface using a {@link RandomGenerator}
|
||||
|
@ -107,12 +108,56 @@ public class RandomDataImpl implements RandomData, Serializable {
|
|||
/** Serializable version identifier */
|
||||
private static final long serialVersionUID = -626730818244969716L;
|
||||
|
||||
/** Used when generating Exponential samples
|
||||
* [1] writes:
|
||||
* One table containing the constants
|
||||
* q_i = sum_{j=1}^i (ln 2)^j/j! = ln 2 + (ln 2)^2/2 + ... + (ln 2)^i/i!
|
||||
* until the largest representable fraction below 1 is exceeded.
|
||||
*
|
||||
* Note that
|
||||
* 1 = 2 - 1 = exp(ln 2) - 1 = sum_{n=1}^infty (ln 2)^n / n!
|
||||
* thus q_i -> 1 as i -> infty,
|
||||
* so the higher 1, the closer to one we get (the series is not alternating).
|
||||
*
|
||||
* By trying, n = 16 in Java is enough to reach 1.0.
|
||||
*/
|
||||
private static double[] EXPONENTIAL_SA_QI = null;
|
||||
|
||||
/** underlying random number generator */
|
||||
private RandomGenerator rand = null;
|
||||
|
||||
/** underlying secure random number generator */
|
||||
private SecureRandom secRand = null;
|
||||
|
||||
/**
|
||||
* Initialize tables
|
||||
*/
|
||||
static {
|
||||
/**
|
||||
* Filling EXPONENTIAL_SA_QI table.
|
||||
* Note that we don't want qi = 0 in the table.
|
||||
*/
|
||||
final double LN2 = FastMath.log(2);
|
||||
double qi = 0;
|
||||
int i = 1;
|
||||
|
||||
/**
|
||||
* MathUtils provides factorials up to 20, so let's use that limit together
|
||||
* with MathUtils.EPSILON to generate the following code (a priori, we know that
|
||||
* there will be 16 elements, but instead of hardcoding that, this is
|
||||
* prettier):
|
||||
*/
|
||||
final ResizableDoubleArray ra = new ResizableDoubleArray(20);
|
||||
|
||||
while (qi < 1) {
|
||||
qi += FastMath.pow(LN2, i) / MathUtils.factorial(i);
|
||||
ra.addElement(qi);
|
||||
++i;
|
||||
}
|
||||
|
||||
EXPONENTIAL_SA_QI = ra.getElements();
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a RandomDataImpl.
|
||||
*/
|
||||
|
@ -469,10 +514,11 @@ public class RandomDataImpl implements RandomData, Serializable {
|
|||
* Returns a random value from an Exponential distribution with the given
|
||||
* mean.
|
||||
* <p>
|
||||
* <strong>Algorithm Description</strong>: Uses the <a
|
||||
* href="http://www.jesus.ox.ac.uk/~clifford/a5/chap1/node5.html"> Inversion
|
||||
* Method</a> to generate exponentially distributed random values from
|
||||
* uniform deviates.
|
||||
* <strong>Algorithm Description</strong>: Uses the Algorithm SA (Ahrens)
|
||||
* from p. 876 in:
|
||||
* [1]: Ahrens, J. H. and Dieter, U. (1972). Computer methods for
|
||||
* sampling from the exponential and normal distributions.
|
||||
* Communications of the ACM, 15, 873-882.
|
||||
* </p>
|
||||
*
|
||||
* @param mean the mean of the distribution
|
||||
|
@ -483,12 +529,43 @@ public class RandomDataImpl implements RandomData, Serializable {
|
|||
if (mean <= 0.0) {
|
||||
throw new NotStrictlyPositiveException(LocalizedFormats.MEAN, mean);
|
||||
}
|
||||
final RandomGenerator generator = getRan();
|
||||
double unif = generator.nextDouble();
|
||||
while (unif == 0.0d) {
|
||||
unif = generator.nextDouble();
|
||||
|
||||
// Step 1:
|
||||
double a = 0;
|
||||
double u = this.nextUniform(0, 1);
|
||||
|
||||
// Step 2 and 3:
|
||||
while (u < 0.5) {
|
||||
a += EXPONENTIAL_SA_QI[0];
|
||||
u *= 2;
|
||||
}
|
||||
return -mean * FastMath.log(unif);
|
||||
|
||||
// Step 4 (now u >= 0.5):
|
||||
u += u - 1;
|
||||
|
||||
// Step 5:
|
||||
if (u <= EXPONENTIAL_SA_QI[0]) {
|
||||
return mean * (a + u);
|
||||
}
|
||||
|
||||
// Step 6:
|
||||
int i = 0; // Should be 1, be we iterate before it in while using 0
|
||||
double u2 = this.nextUniform(0, 1);
|
||||
double umin = u2;
|
||||
|
||||
// Step 7 and 8:
|
||||
do {
|
||||
++i;
|
||||
u2 = this.nextUniform(0, 1);
|
||||
|
||||
if (u2 < umin) {
|
||||
umin = u2;
|
||||
}
|
||||
|
||||
// Step 8:
|
||||
} while (u > EXPONENTIAL_SA_QI[i]); // Ensured to exit since EXPONENTIAL_SA_QI[MAX] = 1
|
||||
|
||||
return mean * (a + umin * EXPONENTIAL_SA_QI[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -52,6 +52,11 @@ The <action> type attribute can be add,update,fix,remove.
|
|||
If the output is not quite correct, check for invisible trailing spaces!
|
||||
-->
|
||||
<release version="3.0" date="TBD" description="TBD">
|
||||
<action dev="mikl" type="fix" issue="MATH-597">
|
||||
Implemented faster generation of random exponential distributed values with
|
||||
algorithm from Ahrens and Dieter (1972): Computer methods for sampling
|
||||
from the exponential and normal distributions.
|
||||
</action>
|
||||
<action dev="luc" type="add" issue="MATH-548">
|
||||
K-means++ clustering can now run multiple trials
|
||||
</action>
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.commons.math.distribution.BinomialDistributionImpl;
|
|||
import org.apache.commons.math.distribution.BinomialDistributionTest;
|
||||
import org.apache.commons.math.distribution.CauchyDistributionImpl;
|
||||
import org.apache.commons.math.distribution.ChiSquaredDistributionImpl;
|
||||
import org.apache.commons.math.distribution.ExponentialDistributionImpl;
|
||||
import org.apache.commons.math.distribution.FDistributionImpl;
|
||||
import org.apache.commons.math.distribution.GammaDistributionImpl;
|
||||
import org.apache.commons.math.distribution.HypergeometricDistributionImpl;
|
||||
|
@ -245,10 +246,10 @@ public class RandomDataTest {
|
|||
|
||||
@Test
|
||||
public void testNextPoissonConsistency() throws Exception {
|
||||
|
||||
|
||||
// Reseed randomGenerator to get fixed sequence
|
||||
randomData.reSeed(1000);
|
||||
|
||||
randomData.reSeed(1000);
|
||||
|
||||
// Small integral means
|
||||
for (int i = 1; i < 100; i++) {
|
||||
checkNextPoissonConsistency(i);
|
||||
|
@ -581,7 +582,7 @@ public class RandomDataTest {
|
|||
|
||||
/** test failure modes and distribution of nextExponential() */
|
||||
@Test
|
||||
public void testNextExponential() {
|
||||
public void testNextExponential() throws Exception {
|
||||
try {
|
||||
randomData.nextExponential(-1);
|
||||
Assert.fail("negative mean -- expecting MathIllegalArgumentException");
|
||||
|
@ -609,6 +610,32 @@ public class RandomDataTest {
|
|||
*/
|
||||
Assert.assertEquals("exponential cumulative distribution", (double) cumFreq
|
||||
/ (double) largeSampleSize, 0.8646647167633873, .2);
|
||||
|
||||
/**
|
||||
* Proposal on improving the test of generating exponentials
|
||||
*/
|
||||
double[] quartiles;
|
||||
long[] counts;
|
||||
|
||||
// Mean 1
|
||||
quartiles = TestUtils.getDistributionQuartiles(new ExponentialDistributionImpl(1));
|
||||
counts = new long[4];
|
||||
randomData.reSeed(1000);
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
double value = randomData.nextExponential(1);
|
||||
TestUtils.updateCounts(value, counts, quartiles);
|
||||
}
|
||||
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
|
||||
|
||||
// Mean 5
|
||||
quartiles = TestUtils.getDistributionQuartiles(new ExponentialDistributionImpl(5));
|
||||
counts = new long[4];
|
||||
randomData.reSeed(1000);
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
double value = randomData.nextExponential(5);
|
||||
TestUtils.updateCounts(value, counts, quartiles);
|
||||
}
|
||||
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
|
||||
}
|
||||
|
||||
/** test reseeding, algorithm/provider games */
|
||||
|
@ -810,7 +837,7 @@ public class RandomDataTest {
|
|||
Assert.fail("permutation not found");
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testNextInversionDeviate() throws Exception {
|
||||
// Set the seed for the default random generator
|
||||
|
@ -830,9 +857,9 @@ public class RandomDataTest {
|
|||
for (int i = 0; i < 10; i++) {
|
||||
double value = randomData.nextInversionDeviate(betaDistribution);
|
||||
Assert.assertEquals(betaDistribution.cumulativeProbability(value), quantiles[i], 10E-9);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testNextBeta() throws Exception {
|
||||
double[] quartiles = TestUtils.getDistributionQuartiles(new BetaDistributionImpl(2,5));
|
||||
|
@ -844,7 +871,7 @@ public class RandomDataTest {
|
|||
}
|
||||
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testNextCauchy() throws Exception {
|
||||
double[] quartiles = TestUtils.getDistributionQuartiles(new CauchyDistributionImpl(1.2, 2.1));
|
||||
|
@ -856,7 +883,7 @@ public class RandomDataTest {
|
|||
}
|
||||
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testNextChiSquare() throws Exception {
|
||||
double[] quartiles = TestUtils.getDistributionQuartiles(new ChiSquaredDistributionImpl(12));
|
||||
|
@ -868,7 +895,7 @@ public class RandomDataTest {
|
|||
}
|
||||
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testNextF() throws Exception {
|
||||
double[] quartiles = TestUtils.getDistributionQuartiles(new FDistributionImpl(12, 5));
|
||||
|
@ -880,7 +907,7 @@ public class RandomDataTest {
|
|||
}
|
||||
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testNextGamma() throws Exception {
|
||||
double[] quartiles = TestUtils.getDistributionQuartiles(new GammaDistributionImpl(4, 2));
|
||||
|
@ -892,7 +919,7 @@ public class RandomDataTest {
|
|||
}
|
||||
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testNextT() throws Exception {
|
||||
double[] quartiles = TestUtils.getDistributionQuartiles(new TDistributionImpl(10));
|
||||
|
@ -904,7 +931,7 @@ public class RandomDataTest {
|
|||
}
|
||||
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testNextWeibull() throws Exception {
|
||||
double[] quartiles = TestUtils.getDistributionQuartiles(new WeibullDistributionImpl(1.2, 2.1));
|
||||
|
@ -916,7 +943,7 @@ public class RandomDataTest {
|
|||
}
|
||||
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testNextBinomial() throws Exception {
|
||||
BinomialDistributionTest testInstance = new BinomialDistributionTest();
|
||||
|
@ -942,7 +969,7 @@ public class RandomDataTest {
|
|||
}
|
||||
TestUtils.assertChiSquareAccept(densityPoints, expectedCounts, observedCounts, .001);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testNextHypergeometric() throws Exception {
|
||||
HypergeometricDistributionTest testInstance = new HypergeometricDistributionTest();
|
||||
|
@ -968,7 +995,7 @@ public class RandomDataTest {
|
|||
}
|
||||
TestUtils.assertChiSquareAccept(densityPoints, expectedCounts, observedCounts, .001);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testNextPascal() throws Exception {
|
||||
PascalDistributionTest testInstance = new PascalDistributionTest();
|
||||
|
@ -993,7 +1020,7 @@ public class RandomDataTest {
|
|||
}
|
||||
TestUtils.assertChiSquareAccept(densityPoints, expectedCounts, observedCounts, .001);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testNextZipf() throws Exception {
|
||||
ZipfDistributionTest testInstance = new ZipfDistributionTest();
|
||||
|
@ -1018,5 +1045,5 @@ public class RandomDataTest {
|
|||
}
|
||||
TestUtils.assertChiSquareAccept(densityPoints, expectedCounts, observedCounts, .001);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue