From a72bbf44cf9fcbf2d5015e7ccf0e29d67403a696 Mon Sep 17 00:00:00 2001 From: Phil Steitz Date: Mon, 28 Sep 2009 10:36:46 +0000 Subject: [PATCH] Added goodness of fit test for poisson deviates. git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@819492 13f79535-47bb-0310-9956-ffa450edef68 --- .../commons/math/random/RandomDataTest.java | 133 ++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/src/test/java/org/apache/commons/math/random/RandomDataTest.java b/src/test/java/org/apache/commons/math/random/RandomDataTest.java index 745979224..96c2a2632 100644 --- a/src/test/java/org/apache/commons/math/random/RandomDataTest.java +++ b/src/test/java/org/apache/commons/math/random/RandomDataTest.java @@ -18,11 +18,18 @@ package org.apache.commons.math.random; import junit.framework.Test; import junit.framework.TestSuite; +import junit.framework.AssertionFailedError; + +import java.util.ArrayList; import java.util.HashSet; +import java.util.List; import org.apache.commons.math.RetryTestCase; +import org.apache.commons.math.distribution.PoissonDistribution; +import org.apache.commons.math.distribution.PoissonDistributionImpl; import org.apache.commons.math.stat.Frequency; import org.apache.commons.math.stat.inference.ChiSquareTestImpl; +import org.apache.commons.math.stat.inference.ChiSquareTest; import org.apache.commons.math.stat.descriptive.SummaryStatistics; /** @@ -218,6 +225,132 @@ public class RandomDataTest extends RetryTestCase { } } + + public void testNextPoissionConistency() throws Exception { + // TODO: increase upper bound to 40 when MATH-294 is resolved + for (int i = 1; i < 6; i++) { + checkNextPoissonConsistency(i); + } + } + + /** + * Verifies that nextPoisson(mean) generates an empirical distribution of values + * consistent with PoissonDistributionImpl by generating 1000 values, computing a + * grouped frequency distribution of the observed values and comparing this distribution + * to the corresponding expected distribution computed using PoissonDistributionImpl. + * Uses ChiSquare test of goodness of fit to evaluate the null hypothesis that the + * distributions are the same. If the null hypothesis can be rejected with confidence + * 1 - alpha, the check fails. This check will fail randomly with probability alpha. + */ + public void checkNextPoissonConsistency(double mean) throws Exception { + // Generate sample values + int sampleSize = 1000; // Number of deviates to generate + int minExpectedCount = 7; // Minimum size of expected bin count + long maxObservedValue = 0; + double alpha = 0.001; // Probability of false failure + Frequency frequency = new Frequency(); + for (int i = 0; i < sampleSize; i++) { + long value = randomData.nextPoisson(mean); + if (value > maxObservedValue) { + maxObservedValue = value; + } + frequency.addValue(value); + } + + /* + * Set up bins for chi-square test. + * Ensure expected counts are all at least minExpectedCount. + * Start with upper and lower tail bins. + * Lower bin = [0, lower); Upper bin = [upper, +inf). + */ + PoissonDistribution poissonDistribution = new PoissonDistributionImpl(mean); + int lower = 1; + while (poissonDistribution.cumulativeProbability(lower - 1) * sampleSize < minExpectedCount) { + lower++; + } + int upper = (int) (5 * mean); // Even for mean = 1, not much mass beyond 5 + while ((1 - poissonDistribution.cumulativeProbability(upper - 1)) * sampleSize < minExpectedCount) { + upper--; + } + + // Set bin width for interior bins. For poisson, only need to look at end bins. + int binWidth = 1; + boolean widthSufficient = false; + double lowerBinMass = 0; + double upperBinMass = 0; + while (!widthSufficient) { + lowerBinMass = poissonDistribution.cumulativeProbability(lower, lower + binWidth - 1); + upperBinMass = poissonDistribution.cumulativeProbability(upper - binWidth + 1, upper); + widthSufficient = Math.min(lowerBinMass, upperBinMass) * sampleSize >= minExpectedCount; + binWidth++; + } + + /* + * Determine interior bin bounds. Bins are + * [0, lower = binBounds[0]), [lower, binBounds[1]), [binBounds[0], binBounds[1]), ... , + * [binBounds[binCount - 2], upper = binBounds[binCount - 1]), [upper, +inf) + * + */ + List binBounds = new ArrayList(); + binBounds.add(lower); + int bound = lower + binWidth; + while (bound < upper - binWidth) { + binBounds.add(bound); + bound += binWidth; + } + binBounds.add(bound); + binBounds.add(upper); + + // Compute observed and expected bin counts + final int binCount = binBounds.size() + 1; + long[] observed = new long[binCount]; + double[] expected = new double[binCount]; + + // Bottom bin + observed[0] = 0; + for (int i = 0; i < lower; i++) { + observed[0] += frequency.getCount(i); + } + expected[0] = poissonDistribution.cumulativeProbability(lower - 1) * sampleSize; + + // Top bin + observed[binCount - 1] = 0; + for (int i = upper; i <= maxObservedValue; i++) { + observed[binCount - 1] += frequency.getCount(i); + } + expected[binCount - 1] = (1 - poissonDistribution.cumulativeProbability(upper - 1)) * sampleSize; + + // Interior bins + for (int i = 1; i < binCount - 1; i++) { + observed[i] = 0; + for (int j = binBounds.get(i - 1); j < binBounds.get(i); j++) { + observed[i] += frequency.getCount(j); + } // Expected count is (mass in [binBounds[i], binBounds[i+1])) * sampleSize + expected[i] = (poissonDistribution.cumulativeProbability(binBounds.get(i) - 1) - + poissonDistribution.cumulativeProbability(binBounds.get(i - 1) -1)) * sampleSize; + } + + // Use chisquare test to verify that generated values are poisson(mean)-distributed + ChiSquareTest chiSquareTest = new ChiSquareTestImpl(); + try { + // Fail if we can reject null hypothesis that distributions are the same + assertFalse(chiSquareTest.chiSquareTest(expected, observed, alpha)); + } catch (AssertionFailedError ex) { + StringBuffer msgBuffer = new StringBuffer(); + msgBuffer.append("Chisquare test failed for mean = "); + msgBuffer.append(mean); + msgBuffer.append(" p-value = "); + msgBuffer.append(chiSquareTest.chiSquareTest(expected, observed)); + msgBuffer.append(" chisquare statistic = "); + msgBuffer.append(chiSquareTest.chiSquare(expected, observed)); + msgBuffer.append(". \n"); + msgBuffer.append("This test can fail randomly due to sampling error with probability "); + msgBuffer.append(alpha); + msgBuffer.append("."); + fail(msgBuffer.toString()); + } + + } public void testNextPoissonLargeMean() { for (int i = 0; i < 1000; i++) {