Added ConfidenceInterval class and BinomialConfidenceInterval providing implementations of several estimates of confidence intervals for binomial probabilities.

JIRA: MATH-1038 Patch provided by Thorsten Schaefer git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1533824 13f79535-47bb-0310-9956-ffa450edef68
2013-10-19 20:37:13 +00:00 · 2013-10-19 20:37:13 +00:00 · 52e85a5f6f
parent 1b9489d057
commit 52e85a5f6f
6 changed files with 412 additions and 0 deletions
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@ -51,6 +51,10 @@ If the output is not quite correct, check for invisible trailing spaces!
  </properties>
  <body>
    <release version="x.y" date="TBD" description="TBD">
+      <actiod dev="psteitz" type="add" issue="MATH-1038" due-to="Thorsten Schaefer">
+        Added ConfidenceInterval class and BinomialConfidenceInterval providing several
+        estimators for confidence intervals for binomial probabilities.
+      </action>
      <action dev="tn" type="fix" issue="MATH-1035" due-to="derphead">
        Simplified and improved performance of "ArithmeticUtils#addAndCheck(long, long)".
      </action>
--- a/src/main/java/org/apache/commons/math3/exception/util/LocalizedFormats.java
+++ b/src/main/java/org/apache/commons/math3/exception/util/LocalizedFormats.java
@ -275,6 +275,7 @@ public enum LocalizedFormats implements Localizable {
    OBSERVED_COUNTS_BOTTH_ZERO_FOR_ENTRY("observed counts are both zero for entry {0}"),
    BOBYQA_BOUND_DIFFERENCE_CONDITION("the difference between the upper and lower bound must be larger than twice the initial trust region radius ({0})"),
    OUT_OF_BOUNDS_QUANTILE_VALUE("out of bounds quantile value: {0}, must be in (0, 100]"),
+    OUT_OF_BOUNDS_CONFIDENCE_LEVEL("out of bounds confidence level {0}, must be between {1} and {2}"),
    OUT_OF_BOUND_SIGNIFICANCE_LEVEL("out of bounds significance level {0}, must be between {1} and {2}"),
    SIGNIFICANCE_LEVEL("significance level ({0})"), /* keep */
    OUT_OF_ORDER_ABSCISSA_ARRAY("the abscissae array must be sorted in a strictly increasing order, but the {0}-th element is {1} whereas {2}-th is {3}"),
--- a/src/main/java/org/apache/commons/math3/stat/inference/BinomialConfidenceInterval.java
+++ b/src/main/java/org/apache/commons/math3/stat/inference/BinomialConfidenceInterval.java
@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math3.stat.inference;
+
+import org.apache.commons.math3.distribution.FDistribution;
+import org.apache.commons.math3.distribution.NormalDistribution;
+import org.apache.commons.math3.exception.MathIllegalArgumentException;
+import org.apache.commons.math3.exception.NotPositiveException;
+import org.apache.commons.math3.exception.NotStrictlyPositiveException;
+import org.apache.commons.math3.exception.util.LocalizedFormats;
+import org.apache.commons.math3.util.FastMath;
+
+/**
+ * Factory methods to generate confidence intervals for a binomial proportion.
+ * The supported methods are:
+ * <ul>
+ * <li>Clopper-Pearson method (exact method)</li>
+ * <li>Normal approximation (based on central limit theorem)</li>
+ * <li>Agresti-Coull interval</li>
+ * <li>Wilson score interval</li>
+ * </ul>
+ *
+ * @see <a
+ *      href="http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval">Binomial
+ *      proportion confidence interval (Wikipedia)</a>
+ * @version $Id$
+ * @since 3.3
+ */
+public class BinomialConfidenceInterval {
+
+    /**
+     * Create a Clopper-Pearson binomial confidence interval for the true
+     * probability of success of an unknown binomial distribution with the given
+     * observed number of trials, successes and confidence level.
+     * <p>
+     * Preconditions:
+     * <ul>
+     * <li>{@code numberOfTrials} must be positive</li>
+     * <li>{@code numberOfSuccesses} may not exceed {@code numberOfTrials}</li>
+     * <li>{@code confidenceLevel} must be strictly between 0 and 1 (exclusive)</li>
+     * </ul>
+     * </p>
+     *
+     * @param numberOfTrials number of trials
+     * @param numberOfSuccesses number of successes
+     * @param confidenceLevel desired probability that the true probability of
+     *        success falls within the returned interval
+     * @return Confidence interval containing the probability of success with
+     *         probability {@code confidenceLevel}
+     * @throws MathIllegalArgumentException if the preconditions are not met
+     */
+    public ConfidenceInterval getClopperPearsonInterval(int numberOfTrials, int numberOfSuccesses,
+                                                        double confidenceLevel) {
+        checkParameters(numberOfTrials, numberOfSuccesses, confidenceLevel);
+        double lowerBound = 0;
+        double upperBound = 0;
+        final double alpha = (1.0 - confidenceLevel) / 2.0;
+
+        final FDistribution distributionLowerBound = new FDistribution(2 * (numberOfTrials - numberOfSuccesses + 1),
+                                                                       2 * numberOfSuccesses);
+        final double fValueLowerBound = distributionLowerBound.inverseCumulativeProbability(1 - alpha);
+        if (numberOfSuccesses > 0) {
+            lowerBound = numberOfSuccesses /
+                         (numberOfSuccesses + (numberOfTrials - numberOfSuccesses + 1) * fValueLowerBound);
+        }
+
+        final FDistribution distributionUpperBound = new FDistribution(2 * (numberOfSuccesses + 1),
+                                                                       2 * (numberOfTrials - numberOfSuccesses));
+        final double fValueUpperBound = distributionUpperBound.inverseCumulativeProbability(1 - alpha);
+        if (numberOfSuccesses > 0) {
+            upperBound = (numberOfSuccesses + 1) * fValueUpperBound /
+                         (numberOfTrials - numberOfSuccesses + (numberOfSuccesses + 1) * fValueUpperBound);
+        }
+
+        return new ConfidenceInterval(lowerBound, upperBound, confidenceLevel);
+    }
+
+    /**
+     * Create a binomial confidence interval for the true probability of success
+     * of an unknown binomial distribution with the given observed number of
+     * trials, successes and confidence level using the Normal approximation to
+     * the binomial distribution.
+     *
+     * @param numberOfTrials number of trials
+     * @param numberOfSuccesses number of successes
+     * @param confidenceLevel desired probability that the true probability of
+     *        success falls within the interval
+     * @return Confidence interval containing the probability of success with
+     *         probability {@code confidenceLevel}
+     * @throws MathIllegalArgumentException if the preconditions are not met
+     */
+    public ConfidenceInterval getNormalApproximationInterval(int numberOfTrials, int numberOfSuccesses,
+                                                             double confidenceLevel) {
+        checkParameters(numberOfTrials, numberOfSuccesses, confidenceLevel);
+        final double mean = (double) numberOfSuccesses / (double) numberOfTrials;
+        final double alpha = (1.0 - confidenceLevel) / 2;
+        final NormalDistribution normalDistribution = new NormalDistribution();
+        final double difference = normalDistribution.inverseCumulativeProbability(1 - alpha) *
+                                  FastMath.sqrt(1.0 / numberOfTrials * mean * (1 - mean));
+        return new ConfidenceInterval(mean - difference, mean + difference, confidenceLevel);
+    }
+
+    /**
+     * Create an Agresti-Coull binomial confidence interval for the true
+     * probability of success of an unknown binomial distribution with the given
+     * observed number of trials, successes and confidence level.
+     *
+     * @param numberOfTrials number of trials
+     * @param numberOfSuccesses number of successes
+     * @param confidenceLevel desired probability that the true probability of
+     *        success falls within the returned interval
+     * @return Confidence interval containing the probability of success with
+     *         probability {@code confidenceLevel}
+     * @throws MathIllegalArgumentException if the preconditions are not met
+     */
+    public ConfidenceInterval getAgrestiCoullInterval(int numberOfTrials, int numberOfSuccesses, double confidenceLevel) {
+        checkParameters(numberOfTrials, numberOfSuccesses, confidenceLevel);
+        final double alpha = (1.0 - confidenceLevel) / 2;
+        final NormalDistribution normalDistribution = new NormalDistribution();
+        final double z = normalDistribution.inverseCumulativeProbability(1 - alpha);
+        final double zSquared = FastMath.pow(z, 2);
+        final double modifiedNumberOfTrials = numberOfTrials + zSquared;
+        final double modifiedSuccessesRatio = (1.0 / modifiedNumberOfTrials) * (numberOfSuccesses + 0.5 * zSquared);
+        final double difference = z *
+                                  FastMath.sqrt(1.0 / modifiedNumberOfTrials * modifiedSuccessesRatio *
+                                                (1 - modifiedSuccessesRatio));
+        return new ConfidenceInterval(modifiedSuccessesRatio - difference, modifiedSuccessesRatio + difference,
+                                      confidenceLevel);
+    }
+
+    /**
+     * Create a Wilson score binomial confidence interval for the true
+     * probability of success of an unknown binomial distribution with the given
+     * observed number of trials, successes and confidence level.
+     *
+     * @param numberOfTrials number of trials
+     * @param numberOfSuccesses number of successes
+     * @param confidenceLevel desired probability that the true probability of
+     *        success falls within the returned interval
+     * @return Confidence interval containing the probability of success with
+     *         probability {@code confidenceLevel}
+     * @throws MathIllegalArgumentException if the preconditions are not met
+     */
+    public ConfidenceInterval getWilsonScoreInterval(int numberOfTrials, int numberOfSuccesses, double confidenceLevel) {
+        checkParameters(numberOfTrials, numberOfSuccesses, confidenceLevel);
+        final double alpha = (1.0 - confidenceLevel) / 2;
+        final NormalDistribution normalDistribution = new NormalDistribution();
+        final double z = normalDistribution.inverseCumulativeProbability(1 - alpha);
+        final double zSquared = FastMath.pow(z, 2);
+        final double mean = (double) numberOfSuccesses / (double) numberOfTrials;
+
+        final double factor = 1.0 / (1 + (1.0 / numberOfTrials) * zSquared);
+        final double modifiedSuccessRatio = mean + (1.0 / (2 * numberOfTrials)) * zSquared;
+        final double difference = z *
+                                  FastMath.sqrt(1.0 / numberOfTrials * mean * (1 - mean) +
+                                                (1.0 / (4 * FastMath.pow(numberOfTrials, 2)) * zSquared));
+
+        final double lowerBound = factor * (modifiedSuccessRatio - difference);
+        final double upperBound = factor * (modifiedSuccessRatio + difference);
+        return new ConfidenceInterval(lowerBound, upperBound, confidenceLevel);
+    }
+
+    /**
+     * Verifies that parameters satisfy preconditions.
+     *
+     * @param numberOfTrials number of trials (must be positive)
+     * @param numberOfSuccesses number of successes (must not exceed
+     *        numberOfTrials)
+     * @param confidenceLevel confidence level (must be strictly between 0 and
+     *        1)
+     */
+    private void checkParameters(int numberOfTrials, int numberOfSuccesses, double confidenceLevel) {
+        if (numberOfTrials <= 0) {
+            throw new NotStrictlyPositiveException(LocalizedFormats.NUMBER_OF_TRIALS, numberOfTrials);
+        }
+        if (numberOfSuccesses < 0) {
+            throw new NotPositiveException(LocalizedFormats.NEGATIVE_NUMBER_OF_SUCCESSES, numberOfSuccesses);
+        }
+        if (numberOfSuccesses > numberOfTrials) {
+            throw new MathIllegalArgumentException(LocalizedFormats.NUMBER_OF_SUCCESS_LARGER_THAN_POPULATION_SIZE,
+                                                   numberOfSuccesses, numberOfTrials);
+        }
+        if (confidenceLevel <= 0 || confidenceLevel >= 1) {
+            throw new MathIllegalArgumentException(LocalizedFormats.OUT_OF_BOUNDS_CONFIDENCE_LEVEL, confidenceLevel, 0,
+                                                   1);
+        }
+    }
+}
--- a/src/main/java/org/apache/commons/math3/stat/inference/ConfidenceInterval.java
+++ b/src/main/java/org/apache/commons/math3/stat/inference/ConfidenceInterval.java
@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math3.stat.inference;
+
+import org.apache.commons.math3.exception.MathIllegalArgumentException;
+import org.apache.commons.math3.exception.util.LocalizedFormats;
+
+/**
+ * Represents an interval estimate of a population parameter.
+ *
+ * @version $Id$
+ * @since 3.3
+ */
+public class ConfidenceInterval {
+
+    /** Lower endpoint of the interval */
+    private double lowerBound;
+
+    /** Upper endpoint of the interval */
+    private double upperBound;
+
+    /**
+     * The asserted probability that the interval contains the population
+     * parameter
+     */
+    private double confidenceLevel;
+
+    /**
+     * Create a confidence interval with the given bounds and confidence level.
+     * <p>
+     * Preconditions:
+     * <ul>
+     * <li>{@code lower} must be strictly less than {@code upper}</li>
+     * <li>{@code confidenceLevel} must be strictly between 0 and 1 (exclusive)</li>
+     * </ul>
+     * </p>
+     *
+     * @param lowerBound lower endpoint of the interval
+     * @param upperBound upper endpoint of the interval
+     * @param confidenceLevel coverage probability
+     * @throws MathIllegalArgumentException if the preconditions are not met
+     */
+    public ConfidenceInterval(double lowerBound, double upperBound, double confidenceLevel) {
+        checkParameters(lowerBound, upperBound, confidenceLevel);
+        this.lowerBound = lowerBound;
+        this.upperBound = upperBound;
+        this.confidenceLevel = confidenceLevel;
+    }
+
+    /**
+     * @return the lower endpoint of the interval
+     */
+    public double getLowerBound() {
+        return lowerBound;
+    }
+
+    /**
+     * @return the upper endpoint of the interval
+     */
+    public double getUpperBound() {
+        return upperBound;
+    }
+
+    /**
+     * @return the asserted probability that the interval contains the
+     *         population parameter
+     */
+    public double getConfidenceLevel() {
+        return confidenceLevel;
+    }
+
+    /**
+     * @return String representation of the confidence interval
+     */
+    @Override
+    public String toString() {
+        return "[" + lowerBound + ";" + upperBound + "] (confidence level:" + confidenceLevel + ")";
+    }
+
+    /**
+     * Verifies that (lower, upper) is a valid non-empty interval and confidence
+     * is strictly between 0 and 1.
+     *
+     * @param lower lower endpoint
+     * @param upper upper endpoint
+     * @param confidence confidence level
+     */
+    private void checkParameters(double lower, double upper, double confidence) {
+        if (lower >= upper) {
+            throw new MathIllegalArgumentException(LocalizedFormats.LOWER_BOUND_NOT_BELOW_UPPER_BOUND, lower, upper);
+        }
+        if (confidence <= 0 || confidence >= 1) {
+            throw new MathIllegalArgumentException(LocalizedFormats.OUT_OF_BOUNDS_CONFIDENCE_LEVEL, confidence, 0, 1);
+        }
+    }
+}
--- a/src/main/resources/assets/org/apache/commons/math3/exception/util/LocalizedFormats_fr.properties
+++ b/src/main/resources/assets/org/apache/commons/math3/exception/util/LocalizedFormats_fr.properties
@ -245,6 +245,7 @@ N_POINTS_GAUSS_LEGENDRE_INTEGRATOR_NOT_SUPPORTED = l''int\u00e9grateur de Legend
 OBSERVED_COUNTS_ALL_ZERO = aucune occurrence dans le tableau des observations {0}
 OBSERVED_COUNTS_BOTTH_ZERO_FOR_ENTRY = les occurrences observ\u00e9es sont toutes deux nulles pour l''entr\u00e9e {0}
 BOBYQA_BOUND_DIFFERENCE_CONDITION = la diff\u00e9rence entre la contrainte sup\u00e9rieure et inf\u00e9rieure doit \u00eatre plus grande que deux fois le rayon de la r\u00e9gion de confiance initiale ({0})
+OUT_OF_BOUNDS_CONFIDENCE_LEVEL = niveau de confiance {0} hors domaine, doit \u00eatre entre {1} et {2}
 OUT_OF_BOUNDS_QUANTILE_VALUE = valeur de quantile {0} hors bornes, doit \u00eatre dans l''intervalle ]0, 100]
 OUT_OF_BOUND_SIGNIFICANCE_LEVEL = niveau de signification {0} hors domaine, doit \u00eatre entre {1} et {2}
 SIGNIFICANCE_LEVEL = niveau de signification ({0})
--- a/src/test/java/org/apache/commons/math3/stat/inference/BinomialConfidenceIntervalTest.java
+++ b/src/test/java/org/apache/commons/math3/stat/inference/BinomialConfidenceIntervalTest.java
@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math3.stat.inference;
+
+import org.apache.commons.math3.exception.MathIllegalArgumentException;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Test cases for the BinomialConfidenceInterval class.
+ */
+
+public class BinomialConfidenceIntervalTest {
+
+    protected BinomialConfidenceInterval testStatistic = new BinomialConfidenceInterval();
+
+    private final int successes = 50;
+
+    private final int trials = 500;
+
+    private final double confidenceLevel = 0.9;
+
+    @Test
+    public void testClopperPearsonInterval() {
+        ConfidenceInterval confidenceInterval = testStatistic.getClopperPearsonInterval(trials, successes,
+                                                                                        confidenceLevel);
+        Assert.assertEquals(0.07873857, confidenceInterval.getLowerBound(), 1E-5);
+        Assert.assertEquals(0.1248658, confidenceInterval.getUpperBound(), 1E-5);
+
+    }
+
+    @Test
+    public void testNormalApproximationInterval() {
+        ConfidenceInterval confidenceInterval = testStatistic.getNormalApproximationInterval(trials, successes,
+                                                                                             confidenceLevel);
+        Assert.assertEquals(0.07793197, confidenceInterval.getLowerBound(), 1E-5);
+        Assert.assertEquals(0.1220680, confidenceInterval.getUpperBound(), 1E-5);
+    }
+
+    @Test
+    public void testAgrestiCoullInterval() {
+        ConfidenceInterval confidenceInterval = testStatistic.getAgrestiCoullInterval(trials, successes,
+                                                                                      confidenceLevel);
+        Assert.assertEquals(0.07993521, confidenceInterval.getLowerBound(), 1E-5);
+        Assert.assertEquals(0.1243704, confidenceInterval.getUpperBound(), 1E-5);
+    }
+
+    @Test
+    public void testWilsonScoreInterval() {
+        ConfidenceInterval confidenceInterval = testStatistic
+            .getWilsonScoreInterval(trials, successes, confidenceLevel);
+        Assert.assertEquals(0.08003919, confidenceInterval.getLowerBound(), 1E-5);
+        Assert.assertEquals(0.1242664, confidenceInterval.getUpperBound(), 1E-5);
+    }
+
+    @Test(expected = MathIllegalArgumentException.class)
+    public void testZeroConfidencelevel() {
+        testStatistic.getWilsonScoreInterval(trials, successes, 0d);
+    }
+
+    @Test(expected = MathIllegalArgumentException.class)
+    public void testOneConfidencelevel() {
+        testStatistic.getWilsonScoreInterval(trials, successes, 1d);
+    }
+
+    @Test(expected = MathIllegalArgumentException.class)
+    public void testZeroTrials() {
+        testStatistic.getWilsonScoreInterval(0, 0, confidenceLevel);
+    }
+
+    @Test(expected = MathIllegalArgumentException.class)
+    public void testNegativeSuccesses() {
+        testStatistic.getWilsonScoreInterval(trials, -1, confidenceLevel);
+    }
+
+    @Test(expected = MathIllegalArgumentException.class)
+    public void testSuccessesExceedingTrials() {
+        testStatistic.getWilsonScoreInterval(trials, trials + 1, confidenceLevel);
+    }
+}