Added ConfidenceInterval class and BinomialConfidenceInterval providing implementations of several estimates of confidence intervals for binomial probabilities.

JIRA: MATH-1038
Patch provided by Thorsten Schaefer 


git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1533824 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Phil Steitz 2013-10-19 20:37:13 +00:00
parent 1b9489d057
commit 52e85a5f6f
6 changed files with 412 additions and 0 deletions

View File

@ -51,6 +51,10 @@ If the output is not quite correct, check for invisible trailing spaces!
</properties>
<body>
<release version="x.y" date="TBD" description="TBD">
<actiod dev="psteitz" type="add" issue="MATH-1038" due-to="Thorsten Schaefer">
Added ConfidenceInterval class and BinomialConfidenceInterval providing several
estimators for confidence intervals for binomial probabilities.
</action>
<action dev="tn" type="fix" issue="MATH-1035" due-to="derphead">
Simplified and improved performance of "ArithmeticUtils#addAndCheck(long, long)".
</action>

View File

@ -275,6 +275,7 @@ public enum LocalizedFormats implements Localizable {
OBSERVED_COUNTS_BOTTH_ZERO_FOR_ENTRY("observed counts are both zero for entry {0}"),
BOBYQA_BOUND_DIFFERENCE_CONDITION("the difference between the upper and lower bound must be larger than twice the initial trust region radius ({0})"),
OUT_OF_BOUNDS_QUANTILE_VALUE("out of bounds quantile value: {0}, must be in (0, 100]"),
OUT_OF_BOUNDS_CONFIDENCE_LEVEL("out of bounds confidence level {0}, must be between {1} and {2}"),
OUT_OF_BOUND_SIGNIFICANCE_LEVEL("out of bounds significance level {0}, must be between {1} and {2}"),
SIGNIFICANCE_LEVEL("significance level ({0})"), /* keep */
OUT_OF_ORDER_ABSCISSA_ARRAY("the abscissae array must be sorted in a strictly increasing order, but the {0}-th element is {1} whereas {2}-th is {3}"),

View File

@ -0,0 +1,202 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math3.stat.inference;
import org.apache.commons.math3.distribution.FDistribution;
import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.commons.math3.exception.MathIllegalArgumentException;
import org.apache.commons.math3.exception.NotPositiveException;
import org.apache.commons.math3.exception.NotStrictlyPositiveException;
import org.apache.commons.math3.exception.util.LocalizedFormats;
import org.apache.commons.math3.util.FastMath;
/**
* Factory methods to generate confidence intervals for a binomial proportion.
* The supported methods are:
* <ul>
* <li>Clopper-Pearson method (exact method)</li>
* <li>Normal approximation (based on central limit theorem)</li>
* <li>Agresti-Coull interval</li>
* <li>Wilson score interval</li>
* </ul>
*
* @see <a
* href="http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval">Binomial
* proportion confidence interval (Wikipedia)</a>
* @version $Id$
* @since 3.3
*/
public class BinomialConfidenceInterval {
/**
* Create a Clopper-Pearson binomial confidence interval for the true
* probability of success of an unknown binomial distribution with the given
* observed number of trials, successes and confidence level.
* <p>
* Preconditions:
* <ul>
* <li>{@code numberOfTrials} must be positive</li>
* <li>{@code numberOfSuccesses} may not exceed {@code numberOfTrials}</li>
* <li>{@code confidenceLevel} must be strictly between 0 and 1 (exclusive)</li>
* </ul>
* </p>
*
* @param numberOfTrials number of trials
* @param numberOfSuccesses number of successes
* @param confidenceLevel desired probability that the true probability of
* success falls within the returned interval
* @return Confidence interval containing the probability of success with
* probability {@code confidenceLevel}
* @throws MathIllegalArgumentException if the preconditions are not met
*/
public ConfidenceInterval getClopperPearsonInterval(int numberOfTrials, int numberOfSuccesses,
double confidenceLevel) {
checkParameters(numberOfTrials, numberOfSuccesses, confidenceLevel);
double lowerBound = 0;
double upperBound = 0;
final double alpha = (1.0 - confidenceLevel) / 2.0;
final FDistribution distributionLowerBound = new FDistribution(2 * (numberOfTrials - numberOfSuccesses + 1),
2 * numberOfSuccesses);
final double fValueLowerBound = distributionLowerBound.inverseCumulativeProbability(1 - alpha);
if (numberOfSuccesses > 0) {
lowerBound = numberOfSuccesses /
(numberOfSuccesses + (numberOfTrials - numberOfSuccesses + 1) * fValueLowerBound);
}
final FDistribution distributionUpperBound = new FDistribution(2 * (numberOfSuccesses + 1),
2 * (numberOfTrials - numberOfSuccesses));
final double fValueUpperBound = distributionUpperBound.inverseCumulativeProbability(1 - alpha);
if (numberOfSuccesses > 0) {
upperBound = (numberOfSuccesses + 1) * fValueUpperBound /
(numberOfTrials - numberOfSuccesses + (numberOfSuccesses + 1) * fValueUpperBound);
}
return new ConfidenceInterval(lowerBound, upperBound, confidenceLevel);
}
/**
* Create a binomial confidence interval for the true probability of success
* of an unknown binomial distribution with the given observed number of
* trials, successes and confidence level using the Normal approximation to
* the binomial distribution.
*
* @param numberOfTrials number of trials
* @param numberOfSuccesses number of successes
* @param confidenceLevel desired probability that the true probability of
* success falls within the interval
* @return Confidence interval containing the probability of success with
* probability {@code confidenceLevel}
* @throws MathIllegalArgumentException if the preconditions are not met
*/
public ConfidenceInterval getNormalApproximationInterval(int numberOfTrials, int numberOfSuccesses,
double confidenceLevel) {
checkParameters(numberOfTrials, numberOfSuccesses, confidenceLevel);
final double mean = (double) numberOfSuccesses / (double) numberOfTrials;
final double alpha = (1.0 - confidenceLevel) / 2;
final NormalDistribution normalDistribution = new NormalDistribution();
final double difference = normalDistribution.inverseCumulativeProbability(1 - alpha) *
FastMath.sqrt(1.0 / numberOfTrials * mean * (1 - mean));
return new ConfidenceInterval(mean - difference, mean + difference, confidenceLevel);
}
/**
* Create an Agresti-Coull binomial confidence interval for the true
* probability of success of an unknown binomial distribution with the given
* observed number of trials, successes and confidence level.
*
* @param numberOfTrials number of trials
* @param numberOfSuccesses number of successes
* @param confidenceLevel desired probability that the true probability of
* success falls within the returned interval
* @return Confidence interval containing the probability of success with
* probability {@code confidenceLevel}
* @throws MathIllegalArgumentException if the preconditions are not met
*/
public ConfidenceInterval getAgrestiCoullInterval(int numberOfTrials, int numberOfSuccesses, double confidenceLevel) {
checkParameters(numberOfTrials, numberOfSuccesses, confidenceLevel);
final double alpha = (1.0 - confidenceLevel) / 2;
final NormalDistribution normalDistribution = new NormalDistribution();
final double z = normalDistribution.inverseCumulativeProbability(1 - alpha);
final double zSquared = FastMath.pow(z, 2);
final double modifiedNumberOfTrials = numberOfTrials + zSquared;
final double modifiedSuccessesRatio = (1.0 / modifiedNumberOfTrials) * (numberOfSuccesses + 0.5 * zSquared);
final double difference = z *
FastMath.sqrt(1.0 / modifiedNumberOfTrials * modifiedSuccessesRatio *
(1 - modifiedSuccessesRatio));
return new ConfidenceInterval(modifiedSuccessesRatio - difference, modifiedSuccessesRatio + difference,
confidenceLevel);
}
/**
* Create a Wilson score binomial confidence interval for the true
* probability of success of an unknown binomial distribution with the given
* observed number of trials, successes and confidence level.
*
* @param numberOfTrials number of trials
* @param numberOfSuccesses number of successes
* @param confidenceLevel desired probability that the true probability of
* success falls within the returned interval
* @return Confidence interval containing the probability of success with
* probability {@code confidenceLevel}
* @throws MathIllegalArgumentException if the preconditions are not met
*/
public ConfidenceInterval getWilsonScoreInterval(int numberOfTrials, int numberOfSuccesses, double confidenceLevel) {
checkParameters(numberOfTrials, numberOfSuccesses, confidenceLevel);
final double alpha = (1.0 - confidenceLevel) / 2;
final NormalDistribution normalDistribution = new NormalDistribution();
final double z = normalDistribution.inverseCumulativeProbability(1 - alpha);
final double zSquared = FastMath.pow(z, 2);
final double mean = (double) numberOfSuccesses / (double) numberOfTrials;
final double factor = 1.0 / (1 + (1.0 / numberOfTrials) * zSquared);
final double modifiedSuccessRatio = mean + (1.0 / (2 * numberOfTrials)) * zSquared;
final double difference = z *
FastMath.sqrt(1.0 / numberOfTrials * mean * (1 - mean) +
(1.0 / (4 * FastMath.pow(numberOfTrials, 2)) * zSquared));
final double lowerBound = factor * (modifiedSuccessRatio - difference);
final double upperBound = factor * (modifiedSuccessRatio + difference);
return new ConfidenceInterval(lowerBound, upperBound, confidenceLevel);
}
/**
* Verifies that parameters satisfy preconditions.
*
* @param numberOfTrials number of trials (must be positive)
* @param numberOfSuccesses number of successes (must not exceed
* numberOfTrials)
* @param confidenceLevel confidence level (must be strictly between 0 and
* 1)
*/
private void checkParameters(int numberOfTrials, int numberOfSuccesses, double confidenceLevel) {
if (numberOfTrials <= 0) {
throw new NotStrictlyPositiveException(LocalizedFormats.NUMBER_OF_TRIALS, numberOfTrials);
}
if (numberOfSuccesses < 0) {
throw new NotPositiveException(LocalizedFormats.NEGATIVE_NUMBER_OF_SUCCESSES, numberOfSuccesses);
}
if (numberOfSuccesses > numberOfTrials) {
throw new MathIllegalArgumentException(LocalizedFormats.NUMBER_OF_SUCCESS_LARGER_THAN_POPULATION_SIZE,
numberOfSuccesses, numberOfTrials);
}
if (confidenceLevel <= 0 || confidenceLevel >= 1) {
throw new MathIllegalArgumentException(LocalizedFormats.OUT_OF_BOUNDS_CONFIDENCE_LEVEL, confidenceLevel, 0,
1);
}
}
}

View File

@ -0,0 +1,110 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math3.stat.inference;
import org.apache.commons.math3.exception.MathIllegalArgumentException;
import org.apache.commons.math3.exception.util.LocalizedFormats;
/**
* Represents an interval estimate of a population parameter.
*
* @version $Id$
* @since 3.3
*/
public class ConfidenceInterval {
/** Lower endpoint of the interval */
private double lowerBound;
/** Upper endpoint of the interval */
private double upperBound;
/**
* The asserted probability that the interval contains the population
* parameter
*/
private double confidenceLevel;
/**
* Create a confidence interval with the given bounds and confidence level.
* <p>
* Preconditions:
* <ul>
* <li>{@code lower} must be strictly less than {@code upper}</li>
* <li>{@code confidenceLevel} must be strictly between 0 and 1 (exclusive)</li>
* </ul>
* </p>
*
* @param lowerBound lower endpoint of the interval
* @param upperBound upper endpoint of the interval
* @param confidenceLevel coverage probability
* @throws MathIllegalArgumentException if the preconditions are not met
*/
public ConfidenceInterval(double lowerBound, double upperBound, double confidenceLevel) {
checkParameters(lowerBound, upperBound, confidenceLevel);
this.lowerBound = lowerBound;
this.upperBound = upperBound;
this.confidenceLevel = confidenceLevel;
}
/**
* @return the lower endpoint of the interval
*/
public double getLowerBound() {
return lowerBound;
}
/**
* @return the upper endpoint of the interval
*/
public double getUpperBound() {
return upperBound;
}
/**
* @return the asserted probability that the interval contains the
* population parameter
*/
public double getConfidenceLevel() {
return confidenceLevel;
}
/**
* @return String representation of the confidence interval
*/
@Override
public String toString() {
return "[" + lowerBound + ";" + upperBound + "] (confidence level:" + confidenceLevel + ")";
}
/**
* Verifies that (lower, upper) is a valid non-empty interval and confidence
* is strictly between 0 and 1.
*
* @param lower lower endpoint
* @param upper upper endpoint
* @param confidence confidence level
*/
private void checkParameters(double lower, double upper, double confidence) {
if (lower >= upper) {
throw new MathIllegalArgumentException(LocalizedFormats.LOWER_BOUND_NOT_BELOW_UPPER_BOUND, lower, upper);
}
if (confidence <= 0 || confidence >= 1) {
throw new MathIllegalArgumentException(LocalizedFormats.OUT_OF_BOUNDS_CONFIDENCE_LEVEL, confidence, 0, 1);
}
}
}

View File

@ -245,6 +245,7 @@ N_POINTS_GAUSS_LEGENDRE_INTEGRATOR_NOT_SUPPORTED = l''int\u00e9grateur de Legend
OBSERVED_COUNTS_ALL_ZERO = aucune occurrence dans le tableau des observations {0}
OBSERVED_COUNTS_BOTTH_ZERO_FOR_ENTRY = les occurrences observ\u00e9es sont toutes deux nulles pour l''entr\u00e9e {0}
BOBYQA_BOUND_DIFFERENCE_CONDITION = la diff\u00e9rence entre la contrainte sup\u00e9rieure et inf\u00e9rieure doit \u00eatre plus grande que deux fois le rayon de la r\u00e9gion de confiance initiale ({0})
OUT_OF_BOUNDS_CONFIDENCE_LEVEL = niveau de confiance {0} hors domaine, doit \u00eatre entre {1} et {2}
OUT_OF_BOUNDS_QUANTILE_VALUE = valeur de quantile {0} hors bornes, doit \u00eatre dans l''intervalle ]0, 100]
OUT_OF_BOUND_SIGNIFICANCE_LEVEL = niveau de signification {0} hors domaine, doit \u00eatre entre {1} et {2}
SIGNIFICANCE_LEVEL = niveau de signification ({0})

View File

@ -0,0 +1,94 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math3.stat.inference;
import org.apache.commons.math3.exception.MathIllegalArgumentException;
import org.junit.Assert;
import org.junit.Test;
/**
* Test cases for the BinomialConfidenceInterval class.
*/
public class BinomialConfidenceIntervalTest {
protected BinomialConfidenceInterval testStatistic = new BinomialConfidenceInterval();
private final int successes = 50;
private final int trials = 500;
private final double confidenceLevel = 0.9;
@Test
public void testClopperPearsonInterval() {
ConfidenceInterval confidenceInterval = testStatistic.getClopperPearsonInterval(trials, successes,
confidenceLevel);
Assert.assertEquals(0.07873857, confidenceInterval.getLowerBound(), 1E-5);
Assert.assertEquals(0.1248658, confidenceInterval.getUpperBound(), 1E-5);
}
@Test
public void testNormalApproximationInterval() {
ConfidenceInterval confidenceInterval = testStatistic.getNormalApproximationInterval(trials, successes,
confidenceLevel);
Assert.assertEquals(0.07793197, confidenceInterval.getLowerBound(), 1E-5);
Assert.assertEquals(0.1220680, confidenceInterval.getUpperBound(), 1E-5);
}
@Test
public void testAgrestiCoullInterval() {
ConfidenceInterval confidenceInterval = testStatistic.getAgrestiCoullInterval(trials, successes,
confidenceLevel);
Assert.assertEquals(0.07993521, confidenceInterval.getLowerBound(), 1E-5);
Assert.assertEquals(0.1243704, confidenceInterval.getUpperBound(), 1E-5);
}
@Test
public void testWilsonScoreInterval() {
ConfidenceInterval confidenceInterval = testStatistic
.getWilsonScoreInterval(trials, successes, confidenceLevel);
Assert.assertEquals(0.08003919, confidenceInterval.getLowerBound(), 1E-5);
Assert.assertEquals(0.1242664, confidenceInterval.getUpperBound(), 1E-5);
}
@Test(expected = MathIllegalArgumentException.class)
public void testZeroConfidencelevel() {
testStatistic.getWilsonScoreInterval(trials, successes, 0d);
}
@Test(expected = MathIllegalArgumentException.class)
public void testOneConfidencelevel() {
testStatistic.getWilsonScoreInterval(trials, successes, 1d);
}
@Test(expected = MathIllegalArgumentException.class)
public void testZeroTrials() {
testStatistic.getWilsonScoreInterval(0, 0, confidenceLevel);
}
@Test(expected = MathIllegalArgumentException.class)
public void testNegativeSuccesses() {
testStatistic.getWilsonScoreInterval(trials, -1, confidenceLevel);
}
@Test(expected = MathIllegalArgumentException.class)
public void testSuccessesExceedingTrials() {
testStatistic.getWilsonScoreInterval(trials, trials + 1, confidenceLevel);
}
}