Changed Mean.evaluate() to use a two-pass algorithm, improving accuracy
by exploiting the the fact that this method has access to the full array of data values. git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@602306 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8583cdfe79
commit
458abe99c2
|
@ -22,24 +22,32 @@ import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStati
|
|||
import org.apache.commons.math.stat.descriptive.summary.Sum;
|
||||
|
||||
/**
|
||||
* Returns the arithmetic mean of the available values. Uses the definitional
|
||||
* formula:
|
||||
* <p>Computes the arithmetic mean of a set of values. Uses the definitional
|
||||
* formula:</p>
|
||||
* <p>
|
||||
* mean = sum(x_i) / n
|
||||
* <p>
|
||||
* where <code>n</code> is the number of observations.
|
||||
* <p>
|
||||
* The value of the statistic is computed using the following recursive
|
||||
* updating algorithm:
|
||||
* <p>
|
||||
* </p>
|
||||
* <p>where <code>n</code> is the number of observations.
|
||||
* </p>
|
||||
* <p>When {@link #increment(double)} is used to add data incrementally from a
|
||||
* stream of (unstored) values, the value of the statistic that
|
||||
* {@link #getResult()} returns is computed using the following recursive
|
||||
* updating algorithm: </p>
|
||||
* <ol>
|
||||
* <li>Initialize <code>m = </code> the first value</li>
|
||||
* <li>For each additional value, update using <br>
|
||||
* <code>m = m + (new value - m) / (number of observations)</code></li>
|
||||
* </ol>
|
||||
* <p> If {@link #evaluate(double[])} is used to compute the mean of an array
|
||||
* of stored values, a two-pass, corrected algorithm is used, starting with
|
||||
* the definitional formula computed using the array of stored values and then
|
||||
* correcting this by adding the mean deviation of the data values from the
|
||||
* arithmetic mean. See, e.g. "Comparison of Several Algorithms for Computing
|
||||
* Sample Means and Variances," Robert F. Ling, Journal of the American
|
||||
* Statistical Association, Vol. 69, No. 348 (Dec., 1974), pp. 859-866. </p>
|
||||
* <p>
|
||||
* Returns <code>Double.NaN</code> if the dataset is empty.
|
||||
* <p>
|
||||
* </p>
|
||||
* <strong>Note that this implementation is not synchronized.</strong> If
|
||||
* multiple threads access an instance of this class concurrently, and at least
|
||||
* one of the threads invokes the <code>increment()</code> or
|
||||
|
@ -131,7 +139,17 @@ public class Mean extends AbstractStorelessUnivariateStatistic
|
|||
public double evaluate(final double[] values,final int begin, final int length) {
|
||||
if (test(values, begin, length)) {
|
||||
Sum sum = new Sum();
|
||||
return sum.evaluate(values, begin, length) / ((double) length);
|
||||
double sampleSize = (double) length;
|
||||
|
||||
// Compute initial estimate using definitional formula
|
||||
double xbar = sum.evaluate(values, begin, length) / sampleSize;
|
||||
|
||||
// Compute correction factor in second pass
|
||||
double correction = 0;
|
||||
for (int i = begin; i < begin + length; i++) {
|
||||
correction += (values[i] - xbar);
|
||||
}
|
||||
return xbar + (correction/sampleSize);
|
||||
}
|
||||
return Double.NaN;
|
||||
}
|
||||
|
|
|
@ -61,57 +61,59 @@ public class CertifiedDataTest extends TestCase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Test StorelessDescriptiveStatistics
|
||||
* Test SummaryStatistics - implementations that do not store the data
|
||||
* and use single pass algorithms to compute statistics
|
||||
*/
|
||||
public void testUnivariateImpl() throws Exception {
|
||||
public void testSummaryStatistics() throws Exception {
|
||||
SummaryStatistics u = SummaryStatistics.newInstance(SummaryStatisticsImpl.class);
|
||||
loadStats("data/PiDigits.txt", u);
|
||||
assertEquals("PiDigits: std", std, u.getStandardDeviation(), .0000000000001);
|
||||
assertEquals("PiDigits: mean", mean, u.getMean(), .0000000000001);
|
||||
assertEquals("PiDigits: std", std, u.getStandardDeviation(), 1E-13);
|
||||
assertEquals("PiDigits: mean", mean, u.getMean(), 1E-13);
|
||||
|
||||
loadStats("data/Mavro.txt", u);
|
||||
assertEquals("Mavro: std", std, u.getStandardDeviation(), .00000000000001);
|
||||
assertEquals("Mavro: mean", mean, u.getMean(), .00000000000001);
|
||||
assertEquals("Mavro: std", std, u.getStandardDeviation(), 1E-14);
|
||||
assertEquals("Mavro: mean", mean, u.getMean(), 1E-14);
|
||||
|
||||
//loadStats("data/Michelso.txt");
|
||||
//assertEquals("Michelso: std", std, u.getStandardDeviation(), .00000000000001);
|
||||
//assertEquals("Michelso: mean", mean, u.getMean(), .00000000000001);
|
||||
loadStats("data/Michelso.txt", u);
|
||||
assertEquals("Michelso: std", std, u.getStandardDeviation(), 1E-13);
|
||||
assertEquals("Michelso: mean", mean, u.getMean(), 1E-13);
|
||||
|
||||
loadStats("data/NumAcc1.txt", u);
|
||||
assertEquals("NumAcc1: std", std, u.getStandardDeviation(), .00000000000001);
|
||||
assertEquals("NumAcc1: mean", mean, u.getMean(), .00000000000001);
|
||||
assertEquals("NumAcc1: std", std, u.getStandardDeviation(), 1E-14);
|
||||
assertEquals("NumAcc1: mean", mean, u.getMean(), 1E-14);
|
||||
|
||||
//loadStats("data/NumAcc2.txt");
|
||||
//assertEquals("NumAcc2: std", std, u.getStandardDeviation(), .000000001);
|
||||
//assertEquals("NumAcc2: mean", mean, u.getMean(), .00000000000001);
|
||||
loadStats("data/NumAcc2.txt", u);
|
||||
assertEquals("NumAcc2: std", std, u.getStandardDeviation(), 1E-14);
|
||||
assertEquals("NumAcc2: mean", mean, u.getMean(), 1E-14);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test StorelessDescriptiveStatistics
|
||||
* Test DescriptiveStatistics - implementations that store full array of
|
||||
* values and execute multi-pass algorithms
|
||||
*/
|
||||
public void testStoredUnivariateImpl() throws Exception {
|
||||
public void testDescriptiveStatistics() throws Exception {
|
||||
|
||||
DescriptiveStatistics u = DescriptiveStatistics.newInstance();
|
||||
|
||||
loadStats("data/PiDigits.txt", u);
|
||||
assertEquals("PiDigits: std", std, u.getStandardDeviation(), .0000000000001);
|
||||
assertEquals("PiDigits: mean", mean, u.getMean(), .0000000000001);
|
||||
assertEquals("PiDigits: std", std, u.getStandardDeviation(), 1E-14);
|
||||
assertEquals("PiDigits: mean", mean, u.getMean(), 1E-14);
|
||||
|
||||
loadStats("data/Mavro.txt", u);
|
||||
assertEquals("Mavro: std", std, u.getStandardDeviation(), .00000000000001);
|
||||
assertEquals("Mavro: mean", mean, u.getMean(), .00000000000001);
|
||||
assertEquals("Mavro: std", std, u.getStandardDeviation(), 1E-14);
|
||||
assertEquals("Mavro: mean", mean, u.getMean(), 1E-14);
|
||||
|
||||
//loadStats("data/Michelso.txt");
|
||||
//assertEquals("Michelso: std", std, u.getStandardDeviation(), .00000000000001);
|
||||
//assertEquals("Michelso: mean", mean, u.getMean(), .00000000000001);
|
||||
loadStats("data/Michelso.txt", u);
|
||||
assertEquals("Michelso: std", std, u.getStandardDeviation(), 1E-14);
|
||||
assertEquals("Michelso: mean", mean, u.getMean(), 1E-14);
|
||||
|
||||
loadStats("data/NumAcc1.txt", u);
|
||||
assertEquals("NumAcc1: std", std, u.getStandardDeviation(), .00000000000001);
|
||||
assertEquals("NumAcc1: mean", mean, u.getMean(), .00000000000001);
|
||||
assertEquals("NumAcc1: std", std, u.getStandardDeviation(), 1E-14);
|
||||
assertEquals("NumAcc1: mean", mean, u.getMean(), 1E-14);
|
||||
|
||||
//loadStats("data/NumAcc2.txt");
|
||||
//assertEquals("NumAcc2: std", std, u.getStandardDeviation(), .000000001);
|
||||
//assertEquals("NumAcc2: mean", mean, u.getMean(), .00000000000001);
|
||||
loadStats("data/NumAcc2.txt", u);
|
||||
assertEquals("NumAcc2: std", std, u.getStandardDeviation(), 1E-14);
|
||||
assertEquals("NumAcc2: mean", mean, u.getMean(), 1E-14);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -111,7 +111,12 @@ Commons Math Release Notes</title>
|
|||
and SummaryStatistics concrete classes. Pushed implementations up
|
||||
from DescriptiveStatisticsImpl, SummaryStatisticsImpl. Made
|
||||
implementations of statistics configurable via setters.
|
||||
</action>
|
||||
</action>
|
||||
<action dev="psteitz" type="fix" issue="MATH-174">
|
||||
Changed Mean.evaluate() to use a two-pass algorithm, improving accuracy
|
||||
by exploiting the the fact that this method has access to the full
|
||||
array of data values.
|
||||
</action>
|
||||
</release>
|
||||
<release version="1.1" date="2005-12-17"
|
||||
description="This is a maintenance release containing bug fixes and enhancements.
|
||||
|
|
Loading…
Reference in New Issue