Added ConfidenceInterval class and BinomialConfidenceInterval providing implementations of several estimates of confidence intervals for binomial probabilities.
JIRA: MATH-1038 Patch provided by Thorsten Schaefer git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1533824 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1b9489d057
commit
52e85a5f6f
|
@ -51,6 +51,10 @@ If the output is not quite correct, check for invisible trailing spaces!
|
|||
</properties>
|
||||
<body>
|
||||
<release version="x.y" date="TBD" description="TBD">
|
||||
<actiod dev="psteitz" type="add" issue="MATH-1038" due-to="Thorsten Schaefer">
|
||||
Added ConfidenceInterval class and BinomialConfidenceInterval providing several
|
||||
estimators for confidence intervals for binomial probabilities.
|
||||
</action>
|
||||
<action dev="tn" type="fix" issue="MATH-1035" due-to="derphead">
|
||||
Simplified and improved performance of "ArithmeticUtils#addAndCheck(long, long)".
|
||||
</action>
|
||||
|
|
|
@ -275,6 +275,7 @@ public enum LocalizedFormats implements Localizable {
|
|||
OBSERVED_COUNTS_BOTTH_ZERO_FOR_ENTRY("observed counts are both zero for entry {0}"),
|
||||
BOBYQA_BOUND_DIFFERENCE_CONDITION("the difference between the upper and lower bound must be larger than twice the initial trust region radius ({0})"),
|
||||
OUT_OF_BOUNDS_QUANTILE_VALUE("out of bounds quantile value: {0}, must be in (0, 100]"),
|
||||
OUT_OF_BOUNDS_CONFIDENCE_LEVEL("out of bounds confidence level {0}, must be between {1} and {2}"),
|
||||
OUT_OF_BOUND_SIGNIFICANCE_LEVEL("out of bounds significance level {0}, must be between {1} and {2}"),
|
||||
SIGNIFICANCE_LEVEL("significance level ({0})"), /* keep */
|
||||
OUT_OF_ORDER_ABSCISSA_ARRAY("the abscissae array must be sorted in a strictly increasing order, but the {0}-th element is {1} whereas {2}-th is {3}"),
|
||||
|
|
|
@ -0,0 +1,202 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.math3.stat.inference;
|
||||
|
||||
import org.apache.commons.math3.distribution.FDistribution;
|
||||
import org.apache.commons.math3.distribution.NormalDistribution;
|
||||
import org.apache.commons.math3.exception.MathIllegalArgumentException;
|
||||
import org.apache.commons.math3.exception.NotPositiveException;
|
||||
import org.apache.commons.math3.exception.NotStrictlyPositiveException;
|
||||
import org.apache.commons.math3.exception.util.LocalizedFormats;
|
||||
import org.apache.commons.math3.util.FastMath;
|
||||
|
||||
/**
|
||||
* Factory methods to generate confidence intervals for a binomial proportion.
|
||||
* The supported methods are:
|
||||
* <ul>
|
||||
* <li>Clopper-Pearson method (exact method)</li>
|
||||
* <li>Normal approximation (based on central limit theorem)</li>
|
||||
* <li>Agresti-Coull interval</li>
|
||||
* <li>Wilson score interval</li>
|
||||
* </ul>
|
||||
*
|
||||
* @see <a
|
||||
* href="http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval">Binomial
|
||||
* proportion confidence interval (Wikipedia)</a>
|
||||
* @version $Id$
|
||||
* @since 3.3
|
||||
*/
|
||||
public class BinomialConfidenceInterval {
|
||||
|
||||
/**
|
||||
* Create a Clopper-Pearson binomial confidence interval for the true
|
||||
* probability of success of an unknown binomial distribution with the given
|
||||
* observed number of trials, successes and confidence level.
|
||||
* <p>
|
||||
* Preconditions:
|
||||
* <ul>
|
||||
* <li>{@code numberOfTrials} must be positive</li>
|
||||
* <li>{@code numberOfSuccesses} may not exceed {@code numberOfTrials}</li>
|
||||
* <li>{@code confidenceLevel} must be strictly between 0 and 1 (exclusive)</li>
|
||||
* </ul>
|
||||
* </p>
|
||||
*
|
||||
* @param numberOfTrials number of trials
|
||||
* @param numberOfSuccesses number of successes
|
||||
* @param confidenceLevel desired probability that the true probability of
|
||||
* success falls within the returned interval
|
||||
* @return Confidence interval containing the probability of success with
|
||||
* probability {@code confidenceLevel}
|
||||
* @throws MathIllegalArgumentException if the preconditions are not met
|
||||
*/
|
||||
public ConfidenceInterval getClopperPearsonInterval(int numberOfTrials, int numberOfSuccesses,
|
||||
double confidenceLevel) {
|
||||
checkParameters(numberOfTrials, numberOfSuccesses, confidenceLevel);
|
||||
double lowerBound = 0;
|
||||
double upperBound = 0;
|
||||
final double alpha = (1.0 - confidenceLevel) / 2.0;
|
||||
|
||||
final FDistribution distributionLowerBound = new FDistribution(2 * (numberOfTrials - numberOfSuccesses + 1),
|
||||
2 * numberOfSuccesses);
|
||||
final double fValueLowerBound = distributionLowerBound.inverseCumulativeProbability(1 - alpha);
|
||||
if (numberOfSuccesses > 0) {
|
||||
lowerBound = numberOfSuccesses /
|
||||
(numberOfSuccesses + (numberOfTrials - numberOfSuccesses + 1) * fValueLowerBound);
|
||||
}
|
||||
|
||||
final FDistribution distributionUpperBound = new FDistribution(2 * (numberOfSuccesses + 1),
|
||||
2 * (numberOfTrials - numberOfSuccesses));
|
||||
final double fValueUpperBound = distributionUpperBound.inverseCumulativeProbability(1 - alpha);
|
||||
if (numberOfSuccesses > 0) {
|
||||
upperBound = (numberOfSuccesses + 1) * fValueUpperBound /
|
||||
(numberOfTrials - numberOfSuccesses + (numberOfSuccesses + 1) * fValueUpperBound);
|
||||
}
|
||||
|
||||
return new ConfidenceInterval(lowerBound, upperBound, confidenceLevel);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a binomial confidence interval for the true probability of success
|
||||
* of an unknown binomial distribution with the given observed number of
|
||||
* trials, successes and confidence level using the Normal approximation to
|
||||
* the binomial distribution.
|
||||
*
|
||||
* @param numberOfTrials number of trials
|
||||
* @param numberOfSuccesses number of successes
|
||||
* @param confidenceLevel desired probability that the true probability of
|
||||
* success falls within the interval
|
||||
* @return Confidence interval containing the probability of success with
|
||||
* probability {@code confidenceLevel}
|
||||
* @throws MathIllegalArgumentException if the preconditions are not met
|
||||
*/
|
||||
public ConfidenceInterval getNormalApproximationInterval(int numberOfTrials, int numberOfSuccesses,
|
||||
double confidenceLevel) {
|
||||
checkParameters(numberOfTrials, numberOfSuccesses, confidenceLevel);
|
||||
final double mean = (double) numberOfSuccesses / (double) numberOfTrials;
|
||||
final double alpha = (1.0 - confidenceLevel) / 2;
|
||||
final NormalDistribution normalDistribution = new NormalDistribution();
|
||||
final double difference = normalDistribution.inverseCumulativeProbability(1 - alpha) *
|
||||
FastMath.sqrt(1.0 / numberOfTrials * mean * (1 - mean));
|
||||
return new ConfidenceInterval(mean - difference, mean + difference, confidenceLevel);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an Agresti-Coull binomial confidence interval for the true
|
||||
* probability of success of an unknown binomial distribution with the given
|
||||
* observed number of trials, successes and confidence level.
|
||||
*
|
||||
* @param numberOfTrials number of trials
|
||||
* @param numberOfSuccesses number of successes
|
||||
* @param confidenceLevel desired probability that the true probability of
|
||||
* success falls within the returned interval
|
||||
* @return Confidence interval containing the probability of success with
|
||||
* probability {@code confidenceLevel}
|
||||
* @throws MathIllegalArgumentException if the preconditions are not met
|
||||
*/
|
||||
public ConfidenceInterval getAgrestiCoullInterval(int numberOfTrials, int numberOfSuccesses, double confidenceLevel) {
|
||||
checkParameters(numberOfTrials, numberOfSuccesses, confidenceLevel);
|
||||
final double alpha = (1.0 - confidenceLevel) / 2;
|
||||
final NormalDistribution normalDistribution = new NormalDistribution();
|
||||
final double z = normalDistribution.inverseCumulativeProbability(1 - alpha);
|
||||
final double zSquared = FastMath.pow(z, 2);
|
||||
final double modifiedNumberOfTrials = numberOfTrials + zSquared;
|
||||
final double modifiedSuccessesRatio = (1.0 / modifiedNumberOfTrials) * (numberOfSuccesses + 0.5 * zSquared);
|
||||
final double difference = z *
|
||||
FastMath.sqrt(1.0 / modifiedNumberOfTrials * modifiedSuccessesRatio *
|
||||
(1 - modifiedSuccessesRatio));
|
||||
return new ConfidenceInterval(modifiedSuccessesRatio - difference, modifiedSuccessesRatio + difference,
|
||||
confidenceLevel);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a Wilson score binomial confidence interval for the true
|
||||
* probability of success of an unknown binomial distribution with the given
|
||||
* observed number of trials, successes and confidence level.
|
||||
*
|
||||
* @param numberOfTrials number of trials
|
||||
* @param numberOfSuccesses number of successes
|
||||
* @param confidenceLevel desired probability that the true probability of
|
||||
* success falls within the returned interval
|
||||
* @return Confidence interval containing the probability of success with
|
||||
* probability {@code confidenceLevel}
|
||||
* @throws MathIllegalArgumentException if the preconditions are not met
|
||||
*/
|
||||
public ConfidenceInterval getWilsonScoreInterval(int numberOfTrials, int numberOfSuccesses, double confidenceLevel) {
|
||||
checkParameters(numberOfTrials, numberOfSuccesses, confidenceLevel);
|
||||
final double alpha = (1.0 - confidenceLevel) / 2;
|
||||
final NormalDistribution normalDistribution = new NormalDistribution();
|
||||
final double z = normalDistribution.inverseCumulativeProbability(1 - alpha);
|
||||
final double zSquared = FastMath.pow(z, 2);
|
||||
final double mean = (double) numberOfSuccesses / (double) numberOfTrials;
|
||||
|
||||
final double factor = 1.0 / (1 + (1.0 / numberOfTrials) * zSquared);
|
||||
final double modifiedSuccessRatio = mean + (1.0 / (2 * numberOfTrials)) * zSquared;
|
||||
final double difference = z *
|
||||
FastMath.sqrt(1.0 / numberOfTrials * mean * (1 - mean) +
|
||||
(1.0 / (4 * FastMath.pow(numberOfTrials, 2)) * zSquared));
|
||||
|
||||
final double lowerBound = factor * (modifiedSuccessRatio - difference);
|
||||
final double upperBound = factor * (modifiedSuccessRatio + difference);
|
||||
return new ConfidenceInterval(lowerBound, upperBound, confidenceLevel);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies that parameters satisfy preconditions.
|
||||
*
|
||||
* @param numberOfTrials number of trials (must be positive)
|
||||
* @param numberOfSuccesses number of successes (must not exceed
|
||||
* numberOfTrials)
|
||||
* @param confidenceLevel confidence level (must be strictly between 0 and
|
||||
* 1)
|
||||
*/
|
||||
private void checkParameters(int numberOfTrials, int numberOfSuccesses, double confidenceLevel) {
|
||||
if (numberOfTrials <= 0) {
|
||||
throw new NotStrictlyPositiveException(LocalizedFormats.NUMBER_OF_TRIALS, numberOfTrials);
|
||||
}
|
||||
if (numberOfSuccesses < 0) {
|
||||
throw new NotPositiveException(LocalizedFormats.NEGATIVE_NUMBER_OF_SUCCESSES, numberOfSuccesses);
|
||||
}
|
||||
if (numberOfSuccesses > numberOfTrials) {
|
||||
throw new MathIllegalArgumentException(LocalizedFormats.NUMBER_OF_SUCCESS_LARGER_THAN_POPULATION_SIZE,
|
||||
numberOfSuccesses, numberOfTrials);
|
||||
}
|
||||
if (confidenceLevel <= 0 || confidenceLevel >= 1) {
|
||||
throw new MathIllegalArgumentException(LocalizedFormats.OUT_OF_BOUNDS_CONFIDENCE_LEVEL, confidenceLevel, 0,
|
||||
1);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,110 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.math3.stat.inference;
|
||||
|
||||
import org.apache.commons.math3.exception.MathIllegalArgumentException;
|
||||
import org.apache.commons.math3.exception.util.LocalizedFormats;
|
||||
|
||||
/**
|
||||
* Represents an interval estimate of a population parameter.
|
||||
*
|
||||
* @version $Id$
|
||||
* @since 3.3
|
||||
*/
|
||||
public class ConfidenceInterval {
|
||||
|
||||
/** Lower endpoint of the interval */
|
||||
private double lowerBound;
|
||||
|
||||
/** Upper endpoint of the interval */
|
||||
private double upperBound;
|
||||
|
||||
/**
|
||||
* The asserted probability that the interval contains the population
|
||||
* parameter
|
||||
*/
|
||||
private double confidenceLevel;
|
||||
|
||||
/**
|
||||
* Create a confidence interval with the given bounds and confidence level.
|
||||
* <p>
|
||||
* Preconditions:
|
||||
* <ul>
|
||||
* <li>{@code lower} must be strictly less than {@code upper}</li>
|
||||
* <li>{@code confidenceLevel} must be strictly between 0 and 1 (exclusive)</li>
|
||||
* </ul>
|
||||
* </p>
|
||||
*
|
||||
* @param lowerBound lower endpoint of the interval
|
||||
* @param upperBound upper endpoint of the interval
|
||||
* @param confidenceLevel coverage probability
|
||||
* @throws MathIllegalArgumentException if the preconditions are not met
|
||||
*/
|
||||
public ConfidenceInterval(double lowerBound, double upperBound, double confidenceLevel) {
|
||||
checkParameters(lowerBound, upperBound, confidenceLevel);
|
||||
this.lowerBound = lowerBound;
|
||||
this.upperBound = upperBound;
|
||||
this.confidenceLevel = confidenceLevel;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the lower endpoint of the interval
|
||||
*/
|
||||
public double getLowerBound() {
|
||||
return lowerBound;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the upper endpoint of the interval
|
||||
*/
|
||||
public double getUpperBound() {
|
||||
return upperBound;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the asserted probability that the interval contains the
|
||||
* population parameter
|
||||
*/
|
||||
public double getConfidenceLevel() {
|
||||
return confidenceLevel;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return String representation of the confidence interval
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return "[" + lowerBound + ";" + upperBound + "] (confidence level:" + confidenceLevel + ")";
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies that (lower, upper) is a valid non-empty interval and confidence
|
||||
* is strictly between 0 and 1.
|
||||
*
|
||||
* @param lower lower endpoint
|
||||
* @param upper upper endpoint
|
||||
* @param confidence confidence level
|
||||
*/
|
||||
private void checkParameters(double lower, double upper, double confidence) {
|
||||
if (lower >= upper) {
|
||||
throw new MathIllegalArgumentException(LocalizedFormats.LOWER_BOUND_NOT_BELOW_UPPER_BOUND, lower, upper);
|
||||
}
|
||||
if (confidence <= 0 || confidence >= 1) {
|
||||
throw new MathIllegalArgumentException(LocalizedFormats.OUT_OF_BOUNDS_CONFIDENCE_LEVEL, confidence, 0, 1);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -245,6 +245,7 @@ N_POINTS_GAUSS_LEGENDRE_INTEGRATOR_NOT_SUPPORTED = l''int\u00e9grateur de Legend
|
|||
OBSERVED_COUNTS_ALL_ZERO = aucune occurrence dans le tableau des observations {0}
|
||||
OBSERVED_COUNTS_BOTTH_ZERO_FOR_ENTRY = les occurrences observ\u00e9es sont toutes deux nulles pour l''entr\u00e9e {0}
|
||||
BOBYQA_BOUND_DIFFERENCE_CONDITION = la diff\u00e9rence entre la contrainte sup\u00e9rieure et inf\u00e9rieure doit \u00eatre plus grande que deux fois le rayon de la r\u00e9gion de confiance initiale ({0})
|
||||
OUT_OF_BOUNDS_CONFIDENCE_LEVEL = niveau de confiance {0} hors domaine, doit \u00eatre entre {1} et {2}
|
||||
OUT_OF_BOUNDS_QUANTILE_VALUE = valeur de quantile {0} hors bornes, doit \u00eatre dans l''intervalle ]0, 100]
|
||||
OUT_OF_BOUND_SIGNIFICANCE_LEVEL = niveau de signification {0} hors domaine, doit \u00eatre entre {1} et {2}
|
||||
SIGNIFICANCE_LEVEL = niveau de signification ({0})
|
||||
|
|
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.math3.stat.inference;
|
||||
|
||||
import org.apache.commons.math3.exception.MathIllegalArgumentException;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Test cases for the BinomialConfidenceInterval class.
|
||||
*/
|
||||
|
||||
public class BinomialConfidenceIntervalTest {
|
||||
|
||||
protected BinomialConfidenceInterval testStatistic = new BinomialConfidenceInterval();
|
||||
|
||||
private final int successes = 50;
|
||||
|
||||
private final int trials = 500;
|
||||
|
||||
private final double confidenceLevel = 0.9;
|
||||
|
||||
@Test
|
||||
public void testClopperPearsonInterval() {
|
||||
ConfidenceInterval confidenceInterval = testStatistic.getClopperPearsonInterval(trials, successes,
|
||||
confidenceLevel);
|
||||
Assert.assertEquals(0.07873857, confidenceInterval.getLowerBound(), 1E-5);
|
||||
Assert.assertEquals(0.1248658, confidenceInterval.getUpperBound(), 1E-5);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNormalApproximationInterval() {
|
||||
ConfidenceInterval confidenceInterval = testStatistic.getNormalApproximationInterval(trials, successes,
|
||||
confidenceLevel);
|
||||
Assert.assertEquals(0.07793197, confidenceInterval.getLowerBound(), 1E-5);
|
||||
Assert.assertEquals(0.1220680, confidenceInterval.getUpperBound(), 1E-5);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAgrestiCoullInterval() {
|
||||
ConfidenceInterval confidenceInterval = testStatistic.getAgrestiCoullInterval(trials, successes,
|
||||
confidenceLevel);
|
||||
Assert.assertEquals(0.07993521, confidenceInterval.getLowerBound(), 1E-5);
|
||||
Assert.assertEquals(0.1243704, confidenceInterval.getUpperBound(), 1E-5);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWilsonScoreInterval() {
|
||||
ConfidenceInterval confidenceInterval = testStatistic
|
||||
.getWilsonScoreInterval(trials, successes, confidenceLevel);
|
||||
Assert.assertEquals(0.08003919, confidenceInterval.getLowerBound(), 1E-5);
|
||||
Assert.assertEquals(0.1242664, confidenceInterval.getUpperBound(), 1E-5);
|
||||
}
|
||||
|
||||
@Test(expected = MathIllegalArgumentException.class)
|
||||
public void testZeroConfidencelevel() {
|
||||
testStatistic.getWilsonScoreInterval(trials, successes, 0d);
|
||||
}
|
||||
|
||||
@Test(expected = MathIllegalArgumentException.class)
|
||||
public void testOneConfidencelevel() {
|
||||
testStatistic.getWilsonScoreInterval(trials, successes, 1d);
|
||||
}
|
||||
|
||||
@Test(expected = MathIllegalArgumentException.class)
|
||||
public void testZeroTrials() {
|
||||
testStatistic.getWilsonScoreInterval(0, 0, confidenceLevel);
|
||||
}
|
||||
|
||||
@Test(expected = MathIllegalArgumentException.class)
|
||||
public void testNegativeSuccesses() {
|
||||
testStatistic.getWilsonScoreInterval(trials, -1, confidenceLevel);
|
||||
}
|
||||
|
||||
@Test(expected = MathIllegalArgumentException.class)
|
||||
public void testSuccessesExceedingTrials() {
|
||||
testStatistic.getWilsonScoreInterval(trials, trials + 1, confidenceLevel);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue