[MATH-1153] Improve performance of BetaDistribution#sample. Thanks to Sergei Lebedev.

This commit is contained in:
Thomas Neidhart 2015-05-01 11:57:54 +02:00
parent 9b2772e38e
commit 5597ed7ea3
5 changed files with 244 additions and 64 deletions

View File

@ -251,6 +251,9 @@
<contributor>
<name>Piotr Kochanski</name>
</contributor>
<contributor>
<name>Sergei Lebedev</name>
</contributor>
<contributor>
<name>Bob MacCallum</name>
</contributor>

View File

@ -54,12 +54,15 @@ If the output is not quite correct, check for invisible trailing spaces!
</release>
<release version="4.0" date="XXXX-XX-XX" description="">
<action dev="tn" type="fix" issue="MATH-1153" due-to="Sergei Lebedev"> <!-- backported to 3.6 -->
Improve performance of "BetaDistribution#sample()" by using Cheng's algorithm.
</action>
<action dev="tn" type="update" issue="MATH-853">
"MathRuntimeException" is now the base class for all commons-math
exceptions (except for "NullArgumentException" which extends
"NullPointerException").
</action>
<action dev="tn" type="fix" issue="MATH-1197">
<action dev="tn" type="fix" issue="MATH-1197"> <!-- backported to 3.6 -->
Computation of 2-sample Kolmogorov-Smirnov statistic in case of ties
was not correct.
</action>

View File

@ -23,6 +23,7 @@ import org.apache.commons.math4.random.Well19937c;
import org.apache.commons.math4.special.Beta;
import org.apache.commons.math4.special.Gamma;
import org.apache.commons.math4.util.FastMath;
import org.apache.commons.math4.util.Precision;
/**
* Implements the Beta distribution.
@ -162,12 +163,14 @@ public class BetaDistribution extends AbstractRealDistribution {
return Double.NEGATIVE_INFINITY;
} else if (x == 0) {
if (alpha < 1) {
throw new NumberIsTooSmallException(LocalizedFormats.CANNOT_COMPUTE_BETA_DENSITY_AT_0_FOR_SOME_ALPHA, alpha, 1, false);
throw new NumberIsTooSmallException(LocalizedFormats.CANNOT_COMPUTE_BETA_DENSITY_AT_0_FOR_SOME_ALPHA,
alpha, 1, false);
}
return Double.NEGATIVE_INFINITY;
} else if (x == 1) {
if (beta < 1) {
throw new NumberIsTooSmallException(LocalizedFormats.CANNOT_COMPUTE_BETA_DENSITY_AT_1_FOR_SOME_BETA, beta, 1, false);
throw new NumberIsTooSmallException(LocalizedFormats.CANNOT_COMPUTE_BETA_DENSITY_AT_1_FOR_SOME_BETA,
beta, 1, false);
}
return Double.NEGATIVE_INFINITY;
} else {
@ -263,4 +266,127 @@ public class BetaDistribution extends AbstractRealDistribution {
public boolean isSupportConnected() {
return true;
}
/** {@inheritDoc}
* <p>
* Sampling is performed using Cheng algorithms:
* </p>
* <p>
* R. C. H. Cheng, "Generating beta variates with nonintegral shape parameters.".
* Communications of the ACM, 21, 317322, 1978.
* </p>
*/
@Override
public double sample() {
return ChengBetaSampler.sample(random, alpha, beta);
}
/** Utility class implementing Cheng's algorithms for beta distribution sampling.
* <p>
* R. C. H. Cheng, "Generating beta variates with nonintegral shape parameters.".
* Communications of the ACM, 21, 317322, 1978.
* </p>
* @since 3.6
*/
private static final class ChengBetaSampler {
/**
* Returns one sample using Cheng's sampling algorithm.
* @param random random generator to use
* @param alpha distribution first shape parameter
* @param beta distribution second shape parameter
* @return sampled value
*/
static double sample(RandomGenerator random, final double alpha, final double beta) {
final double a = FastMath.min(alpha, beta);
final double b = FastMath.max(alpha, beta);
if (a > 1) {
return algorithmBB(random, alpha, a, b);
} else {
return algorithmBC(random, alpha, b, a);
}
}
/**
* Returns one sample using Cheng's BB algorithm, when both &alpha; and &beta; are greater than 1.
*/
private static double algorithmBB(RandomGenerator random,
final double a0,
final double a,
final double b) {
final double alpha = a + b;
final double beta = FastMath.sqrt((alpha - 2.) / (2. * a * b - alpha));
final double gamma = a + 1. / beta;
double r, w, t;
do {
final double u1 = random.nextDouble();
final double u2 = random.nextDouble();
final double v = beta * (FastMath.log(u1) - FastMath.log1p(-u1));
w = a * FastMath.exp(v);
final double z = u1 * u1 * u2;
r = gamma * v - 1.3862944;
final double s = a + r - w;
if (s + 2.609438 >= 5 * z) {
break;
}
t = FastMath.log(z);
if (s >= t) {
break;
}
} while (r + alpha * (FastMath.log(alpha) - FastMath.log(b + w)) < t);
w = FastMath.min(w, Double.MAX_VALUE);
return Precision.equals(a, a0) ? w / (b + w) : b / (b + w);
}
/**
* Returns one sample using Cheng's BC algorithm, when at least one of &alpha; and &beta; is smaller than 1.
*/
private static double algorithmBC(RandomGenerator random,
final double a0,
final double a,
final double b) {
final double alpha = a + b;
final double beta = 1. / b;
final double delta = 1. + a - b;
final double k1 = delta * (0.0138889 + 0.0416667 * b) / (a * beta - 0.777778);
final double k2 = 0.25 + (0.5 + 0.25 / delta) * b;
double w;
for (;;) {
final double u1 = random.nextDouble();
final double u2 = random.nextDouble();
final double y = u1 * u2;
final double z = u1 * y;
if (u1 < 0.5) {
if (0.25 * u2 + z - y >= k1) {
continue;
}
} else {
if (z <= 0.25) {
final double v = beta * (FastMath.log(u1) - FastMath.log1p(-u1));
w = a * FastMath.exp(v);
break;
}
if (z >= k2) {
continue;
}
}
final double v = beta * (FastMath.log(u1) - FastMath.log1p(-u1));
w = a * FastMath.exp(v);
if (alpha * (FastMath.log(alpha) - FastMath.log(b + w) + v) - 1.3862944 >= FastMath.log(z)) {
break;
}
}
w = FastMath.min(w, Double.MAX_VALUE);
return Precision.equals(a, a0) ? w / (b + w) : b / (b + w);
}
}
}

View File

@ -16,11 +16,23 @@
*/
package org.apache.commons.math4.distribution;
import java.util.Arrays;
import org.apache.commons.math4.distribution.BetaDistribution;
import org.apache.commons.math4.random.RandomGenerator;
import org.apache.commons.math4.random.Well1024a;
import org.apache.commons.math4.random.Well19937a;
import org.apache.commons.math4.stat.StatUtils;
import org.apache.commons.math4.stat.inference.KolmogorovSmirnovTest;
import org.apache.commons.math4.stat.inference.TestUtils;
import org.junit.Assert;
import org.junit.Test;
public class BetaDistributionTest {
static final double[] alphaBetas = {0.1, 1, 10, 100, 1000};
static final double epsilon = StatUtils.min(alphaBetas);
@Test
public void testCumulative() {
double[] x = new double[]{-0.1, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1};
@ -304,4 +316,66 @@ public class BetaDistributionTest {
Assert.assertEquals(dist.getNumericalMean(), 2.0 / 7.0, tol);
Assert.assertEquals(dist.getNumericalVariance(), 10.0 / (49.0 * 8.0), tol);
}
@Test
public void testMomentsSampling() {
RandomGenerator random = new Well1024a(0x7829862c82fec2dal);
final int numSamples = 1000;
for (final double alpha : alphaBetas) {
for (final double beta : alphaBetas) {
final BetaDistribution betaDistribution = new BetaDistribution(random, alpha, beta);
final double[] observed = new BetaDistribution(alpha, beta).sample(numSamples);
Arrays.sort(observed);
final String distribution = String.format("Beta(%.2f, %.2f)", alpha, beta);
Assert.assertEquals(String.format("E[%s]", distribution),
betaDistribution.getNumericalMean(),
StatUtils.mean(observed), epsilon);
Assert.assertEquals(String.format("Var[%s]", distribution),
betaDistribution.getNumericalVariance(),
StatUtils.variance(observed), epsilon);
}
}
}
@Test
public void testGoodnessOfFit() {
RandomGenerator random = new Well19937a(0x237db1db907b089fl);
final int numSamples = 1000;
final double level = 0.01;
for (final double alpha : alphaBetas) {
for (final double beta : alphaBetas) {
final BetaDistribution betaDistribution = new BetaDistribution(random, alpha, beta);
final double[] observed = betaDistribution.sample(numSamples);
Assert.assertFalse("G goodness-of-fit test rejected null at alpha = " + level,
gTest(betaDistribution, observed) < level);
Assert.assertFalse("KS goodness-of-fit test rejected null at alpha = " + level,
new KolmogorovSmirnovTest(random).kolmogorovSmirnovTest(betaDistribution, observed) < level);
}
}
}
private double gTest(final RealDistribution expectedDistribution, final double[] values) {
final int numBins = values.length / 30;
final double[] breaks = new double[numBins];
for (int b = 0; b < breaks.length; b++) {
breaks[b] = expectedDistribution.inverseCumulativeProbability((double) b / numBins);
}
final long[] observed = new long[numBins];
for (final double value : values) {
int b = 0;
do {
b++;
} while (b < numBins && value >= breaks[b]);
observed[b - 1]++;
}
final double[] expected = new double[numBins];
Arrays.fill(expected, (double) values.length / numBins);
return TestUtils.gTest(expected, observed);
}
}

View File

@ -44,8 +44,6 @@ import org.apache.commons.math4.distribution.ZipfDistribution;
import org.apache.commons.math4.distribution.ZipfDistributionTest;
import org.apache.commons.math4.exception.MathIllegalArgumentException;
import org.apache.commons.math4.random.RandomDataGenerator;
import org.apache.commons.math4.random.RandomGenerator;
import org.apache.commons.math4.random.Well19937c;
import org.apache.commons.math4.stat.Frequency;
import org.apache.commons.math4.stat.inference.ChiSquareTest;
import org.apache.commons.math4.util.FastMath;
@ -86,7 +84,7 @@ public class RandomDataGeneratorTest {
long y = randomData.nextLong(Long.MIN_VALUE, Long.MAX_VALUE);
Assert.assertFalse(x == y);
}
@Test
public void testNextUniformExtremeValues() {
double x = randomData.nextUniform(-Double.MAX_VALUE, Double.MAX_VALUE);
@ -97,7 +95,7 @@ public class RandomDataGeneratorTest {
Assert.assertFalse(Double.isInfinite(x));
Assert.assertFalse(Double.isInfinite(y));
}
@Test
public void testNextIntIAE() {
try {
@ -107,7 +105,7 @@ public class RandomDataGeneratorTest {
// ignored
}
}
@Test
public void testNextIntNegativeToPositiveRange() {
for (int i = 0; i < 5; i++) {
@ -116,7 +114,7 @@ public class RandomDataGeneratorTest {
}
}
@Test
@Test
public void testNextIntNegativeRange() {
for (int i = 0; i < 5; i++) {
checkNextIntUniform(-7, -4);
@ -125,7 +123,7 @@ public class RandomDataGeneratorTest {
}
}
@Test
@Test
public void testNextIntPositiveRange() {
for (int i = 0; i < 5; i++) {
checkNextIntUniform(0, 3);
@ -151,7 +149,7 @@ public class RandomDataGeneratorTest {
for (int i = 0; i < len; i++) {
expected[i] = 1d / len;
}
TestUtils.assertChiSquareAccept(expected, observed, 0.001);
}
@ -172,7 +170,7 @@ public class RandomDataGeneratorTest {
(((double) upper) - ((double) lower));
Assert.assertTrue(ratio > 0.99999);
}
@Test
public void testNextLongIAE() {
try {
@ -191,7 +189,7 @@ public class RandomDataGeneratorTest {
}
}
@Test
@Test
public void testNextLongNegativeRange() {
for (int i = 0; i < 5; i++) {
checkNextLongUniform(-7, -4);
@ -200,7 +198,7 @@ public class RandomDataGeneratorTest {
}
}
@Test
@Test
public void testNextLongPositiveRange() {
for (int i = 0; i < 5; i++) {
checkNextLongUniform(0, 3);
@ -226,7 +224,7 @@ public class RandomDataGeneratorTest {
for (int i = 0; i < len; i++) {
expected[i] = 1d / len;
}
TestUtils.assertChiSquareAccept(expected, observed, 0.01);
}
@ -247,7 +245,7 @@ public class RandomDataGeneratorTest {
(((double) upper) - ((double) lower));
Assert.assertTrue(ratio > 0.99999);
}
@Test
public void testNextSecureLongIAE() {
try {
@ -257,7 +255,7 @@ public class RandomDataGeneratorTest {
// ignored
}
}
@Test
@Retry(3)
public void testNextSecureLongNegativeToPositiveRange() {
@ -266,7 +264,7 @@ public class RandomDataGeneratorTest {
checkNextSecureLongUniform(-3, 6);
}
}
@Test
@Retry(3)
public void testNextSecureLongNegativeRange() {
@ -275,7 +273,7 @@ public class RandomDataGeneratorTest {
checkNextSecureLongUniform(-15, -2);
}
}
@Test
@Retry(3)
public void testNextSecureLongPositiveRange() {
@ -284,7 +282,7 @@ public class RandomDataGeneratorTest {
checkNextSecureLongUniform(2, 12);
}
}
private void checkNextSecureLongUniform(int min, int max) {
final Frequency freq = new Frequency();
for (int i = 0; i < smallSampleSize; i++) {
@ -301,7 +299,7 @@ public class RandomDataGeneratorTest {
for (int i = 0; i < len; i++) {
expected[i] = 1d / len;
}
TestUtils.assertChiSquareAccept(expected, observed, 0.0001);
}
@ -314,7 +312,7 @@ public class RandomDataGeneratorTest {
// ignored
}
}
@Test
@Retry(3)
public void testNextSecureIntNegativeToPositiveRange() {
@ -323,7 +321,7 @@ public class RandomDataGeneratorTest {
checkNextSecureIntUniform(-3, 6);
}
}
@Test
@Retry(3)
public void testNextSecureIntNegativeRange() {
@ -332,8 +330,8 @@ public class RandomDataGeneratorTest {
checkNextSecureIntUniform(-15, -2);
}
}
@Test
@Test
@Retry(3)
public void testNextSecureIntPositiveRange() {
for (int i = 0; i < 5; i++) {
@ -341,7 +339,7 @@ public class RandomDataGeneratorTest {
checkNextSecureIntUniform(2, 12);
}
}
private void checkNextSecureIntUniform(int min, int max) {
final Frequency freq = new Frequency();
for (int i = 0; i < smallSampleSize; i++) {
@ -358,11 +356,11 @@ public class RandomDataGeneratorTest {
for (int i = 0; i < len; i++) {
expected[i] = 1d / len;
}
TestUtils.assertChiSquareAccept(expected, observed, 0.0001);
}
/**
* Make sure that empirical distribution of random Poisson(4)'s has P(X <=
@ -389,7 +387,7 @@ public class RandomDataGeneratorTest {
} catch (MathIllegalArgumentException ex) {
// ignored
}
final double mean = 4.0d;
final int len = 5;
PoissonDistribution poissonDistribution = new PoissonDistribution(mean);
@ -406,7 +404,7 @@ public class RandomDataGeneratorTest {
for (int i = 0; i < len; i++) {
expected[i] = poissonDistribution.probability(i + 1) * largeSampleSize;
}
TestUtils.assertChiSquareAccept(expected, observed, 0.0001);
}
@ -686,35 +684,35 @@ public class RandomDataGeneratorTest {
// ignored
}
}
@Test
public void testNextUniformUniformPositiveBounds() {
for (int i = 0; i < 5; i++) {
checkNextUniformUniform(0, 10);
}
}
@Test
public void testNextUniformUniformNegativeToPositiveBounds() {
for (int i = 0; i < 5; i++) {
checkNextUniformUniform(-3, 5);
}
}
@Test
public void testNextUniformUniformNegaiveBounds() {
for (int i = 0; i < 5; i++) {
checkNextUniformUniform(-7, -3);
}
}
@Test
public void testNextUniformUniformMaximalInterval() {
for (int i = 0; i < 5; i++) {
checkNextUniformUniform(-Double.MAX_VALUE, Double.MAX_VALUE);
}
}
private void checkNextUniformUniform(double min, double max) {
// Set up bin bounds - min, binBound[0], ..., binBound[binCount-2], max
final int binCount = 5;
@ -724,7 +722,7 @@ public class RandomDataGeneratorTest {
for (int i = 1; i < binCount - 1; i++) {
binBounds[i] = binBounds[i - 1] + binSize; // + instead of * to avoid overflow in extreme case
}
final Frequency freq = new Frequency();
for (int i = 0; i < smallSampleSize; i++) {
final double value = randomData.nextUniform(min, max);
@ -736,7 +734,7 @@ public class RandomDataGeneratorTest {
}
freq.addValue(j);
}
final long[] observed = new long[binCount];
for (int i = 0; i < binCount; i++) {
observed[i] = freq.getCount(i);
@ -745,7 +743,7 @@ public class RandomDataGeneratorTest {
for (int i = 0; i < binCount; i++) {
expected[i] = 1d / binCount;
}
TestUtils.assertChiSquareAccept(expected, observed, 0.01);
}
@ -954,7 +952,7 @@ public class RandomDataGeneratorTest {
int[] perm = randomData.nextPermutation(3, 3);
observed[findPerm(p, perm)]++;
}
String[] labels = {"{0, 1, 2}", "{ 0, 2, 1 }", "{ 1, 0, 2 }",
"{ 1, 2, 0 }", "{ 2, 0, 1 }", "{ 2, 1, 0 }"};
TestUtils.assertChiSquareAccept(labels, expected, observed, 0.001);
@ -1012,30 +1010,6 @@ public class RandomDataGeneratorTest {
return -1;
}
@Test
public void testNextInversionDeviate() {
// Set the seed for the default random generator
RandomGenerator rg = new Well19937c(100);
RandomDataGenerator rdg = new RandomDataGenerator(rg);
double[] quantiles = new double[10];
for (int i = 0; i < 10; i++) {
quantiles[i] = rdg.nextUniform(0, 1);
}
// Reseed again so the inversion generator gets the same sequence
rg.setSeed(100);
BetaDistribution betaDistribution = new BetaDistribution(rg, 2, 4,
BetaDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY);
/*
* Generate a sequence of deviates using inversion - the distribution function
* evaluated at the random value from the distribution should match the uniform
* random value used to generate it, which is stored in the quantiles[] array.
*/
for (int i = 0; i < 10; i++) {
double value = betaDistribution.sample();
Assert.assertEquals(betaDistribution.cumulativeProbability(value), quantiles[i], 10E-9);
}
}
@Test
public void testNextBeta() {
double[] quartiles = TestUtils.getDistributionQuartiles(new BetaDistribution(2,5));