Changed Mean.evaluate() to use a two-pass algorithm, improving accuracy
by exploiting the the fact that this method has access to the full array of data values. git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@602306 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8583cdfe79
commit
458abe99c2
|
@ -22,24 +22,32 @@ import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStati
|
||||||
import org.apache.commons.math.stat.descriptive.summary.Sum;
|
import org.apache.commons.math.stat.descriptive.summary.Sum;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the arithmetic mean of the available values. Uses the definitional
|
* <p>Computes the arithmetic mean of a set of values. Uses the definitional
|
||||||
* formula:
|
* formula:</p>
|
||||||
* <p>
|
* <p>
|
||||||
* mean = sum(x_i) / n
|
* mean = sum(x_i) / n
|
||||||
* <p>
|
* </p>
|
||||||
* where <code>n</code> is the number of observations.
|
* <p>where <code>n</code> is the number of observations.
|
||||||
* <p>
|
* </p>
|
||||||
* The value of the statistic is computed using the following recursive
|
* <p>When {@link #increment(double)} is used to add data incrementally from a
|
||||||
* updating algorithm:
|
* stream of (unstored) values, the value of the statistic that
|
||||||
* <p>
|
* {@link #getResult()} returns is computed using the following recursive
|
||||||
|
* updating algorithm: </p>
|
||||||
* <ol>
|
* <ol>
|
||||||
* <li>Initialize <code>m = </code> the first value</li>
|
* <li>Initialize <code>m = </code> the first value</li>
|
||||||
* <li>For each additional value, update using <br>
|
* <li>For each additional value, update using <br>
|
||||||
* <code>m = m + (new value - m) / (number of observations)</code></li>
|
* <code>m = m + (new value - m) / (number of observations)</code></li>
|
||||||
* </ol>
|
* </ol>
|
||||||
|
* <p> If {@link #evaluate(double[])} is used to compute the mean of an array
|
||||||
|
* of stored values, a two-pass, corrected algorithm is used, starting with
|
||||||
|
* the definitional formula computed using the array of stored values and then
|
||||||
|
* correcting this by adding the mean deviation of the data values from the
|
||||||
|
* arithmetic mean. See, e.g. "Comparison of Several Algorithms for Computing
|
||||||
|
* Sample Means and Variances," Robert F. Ling, Journal of the American
|
||||||
|
* Statistical Association, Vol. 69, No. 348 (Dec., 1974), pp. 859-866. </p>
|
||||||
* <p>
|
* <p>
|
||||||
* Returns <code>Double.NaN</code> if the dataset is empty.
|
* Returns <code>Double.NaN</code> if the dataset is empty.
|
||||||
* <p>
|
* </p>
|
||||||
* <strong>Note that this implementation is not synchronized.</strong> If
|
* <strong>Note that this implementation is not synchronized.</strong> If
|
||||||
* multiple threads access an instance of this class concurrently, and at least
|
* multiple threads access an instance of this class concurrently, and at least
|
||||||
* one of the threads invokes the <code>increment()</code> or
|
* one of the threads invokes the <code>increment()</code> or
|
||||||
|
@ -131,7 +139,17 @@ public class Mean extends AbstractStorelessUnivariateStatistic
|
||||||
public double evaluate(final double[] values,final int begin, final int length) {
|
public double evaluate(final double[] values,final int begin, final int length) {
|
||||||
if (test(values, begin, length)) {
|
if (test(values, begin, length)) {
|
||||||
Sum sum = new Sum();
|
Sum sum = new Sum();
|
||||||
return sum.evaluate(values, begin, length) / ((double) length);
|
double sampleSize = (double) length;
|
||||||
|
|
||||||
|
// Compute initial estimate using definitional formula
|
||||||
|
double xbar = sum.evaluate(values, begin, length) / sampleSize;
|
||||||
|
|
||||||
|
// Compute correction factor in second pass
|
||||||
|
double correction = 0;
|
||||||
|
for (int i = begin; i < begin + length; i++) {
|
||||||
|
correction += (values[i] - xbar);
|
||||||
|
}
|
||||||
|
return xbar + (correction/sampleSize);
|
||||||
}
|
}
|
||||||
return Double.NaN;
|
return Double.NaN;
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,57 +61,59 @@ public class CertifiedDataTest extends TestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test StorelessDescriptiveStatistics
|
* Test SummaryStatistics - implementations that do not store the data
|
||||||
|
* and use single pass algorithms to compute statistics
|
||||||
*/
|
*/
|
||||||
public void testUnivariateImpl() throws Exception {
|
public void testSummaryStatistics() throws Exception {
|
||||||
SummaryStatistics u = SummaryStatistics.newInstance(SummaryStatisticsImpl.class);
|
SummaryStatistics u = SummaryStatistics.newInstance(SummaryStatisticsImpl.class);
|
||||||
loadStats("data/PiDigits.txt", u);
|
loadStats("data/PiDigits.txt", u);
|
||||||
assertEquals("PiDigits: std", std, u.getStandardDeviation(), .0000000000001);
|
assertEquals("PiDigits: std", std, u.getStandardDeviation(), 1E-13);
|
||||||
assertEquals("PiDigits: mean", mean, u.getMean(), .0000000000001);
|
assertEquals("PiDigits: mean", mean, u.getMean(), 1E-13);
|
||||||
|
|
||||||
loadStats("data/Mavro.txt", u);
|
loadStats("data/Mavro.txt", u);
|
||||||
assertEquals("Mavro: std", std, u.getStandardDeviation(), .00000000000001);
|
assertEquals("Mavro: std", std, u.getStandardDeviation(), 1E-14);
|
||||||
assertEquals("Mavro: mean", mean, u.getMean(), .00000000000001);
|
assertEquals("Mavro: mean", mean, u.getMean(), 1E-14);
|
||||||
|
|
||||||
//loadStats("data/Michelso.txt");
|
loadStats("data/Michelso.txt", u);
|
||||||
//assertEquals("Michelso: std", std, u.getStandardDeviation(), .00000000000001);
|
assertEquals("Michelso: std", std, u.getStandardDeviation(), 1E-13);
|
||||||
//assertEquals("Michelso: mean", mean, u.getMean(), .00000000000001);
|
assertEquals("Michelso: mean", mean, u.getMean(), 1E-13);
|
||||||
|
|
||||||
loadStats("data/NumAcc1.txt", u);
|
loadStats("data/NumAcc1.txt", u);
|
||||||
assertEquals("NumAcc1: std", std, u.getStandardDeviation(), .00000000000001);
|
assertEquals("NumAcc1: std", std, u.getStandardDeviation(), 1E-14);
|
||||||
assertEquals("NumAcc1: mean", mean, u.getMean(), .00000000000001);
|
assertEquals("NumAcc1: mean", mean, u.getMean(), 1E-14);
|
||||||
|
|
||||||
//loadStats("data/NumAcc2.txt");
|
loadStats("data/NumAcc2.txt", u);
|
||||||
//assertEquals("NumAcc2: std", std, u.getStandardDeviation(), .000000001);
|
assertEquals("NumAcc2: std", std, u.getStandardDeviation(), 1E-14);
|
||||||
//assertEquals("NumAcc2: mean", mean, u.getMean(), .00000000000001);
|
assertEquals("NumAcc2: mean", mean, u.getMean(), 1E-14);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test StorelessDescriptiveStatistics
|
* Test DescriptiveStatistics - implementations that store full array of
|
||||||
|
* values and execute multi-pass algorithms
|
||||||
*/
|
*/
|
||||||
public void testStoredUnivariateImpl() throws Exception {
|
public void testDescriptiveStatistics() throws Exception {
|
||||||
|
|
||||||
DescriptiveStatistics u = DescriptiveStatistics.newInstance();
|
DescriptiveStatistics u = DescriptiveStatistics.newInstance();
|
||||||
|
|
||||||
loadStats("data/PiDigits.txt", u);
|
loadStats("data/PiDigits.txt", u);
|
||||||
assertEquals("PiDigits: std", std, u.getStandardDeviation(), .0000000000001);
|
assertEquals("PiDigits: std", std, u.getStandardDeviation(), 1E-14);
|
||||||
assertEquals("PiDigits: mean", mean, u.getMean(), .0000000000001);
|
assertEquals("PiDigits: mean", mean, u.getMean(), 1E-14);
|
||||||
|
|
||||||
loadStats("data/Mavro.txt", u);
|
loadStats("data/Mavro.txt", u);
|
||||||
assertEquals("Mavro: std", std, u.getStandardDeviation(), .00000000000001);
|
assertEquals("Mavro: std", std, u.getStandardDeviation(), 1E-14);
|
||||||
assertEquals("Mavro: mean", mean, u.getMean(), .00000000000001);
|
assertEquals("Mavro: mean", mean, u.getMean(), 1E-14);
|
||||||
|
|
||||||
//loadStats("data/Michelso.txt");
|
loadStats("data/Michelso.txt", u);
|
||||||
//assertEquals("Michelso: std", std, u.getStandardDeviation(), .00000000000001);
|
assertEquals("Michelso: std", std, u.getStandardDeviation(), 1E-14);
|
||||||
//assertEquals("Michelso: mean", mean, u.getMean(), .00000000000001);
|
assertEquals("Michelso: mean", mean, u.getMean(), 1E-14);
|
||||||
|
|
||||||
loadStats("data/NumAcc1.txt", u);
|
loadStats("data/NumAcc1.txt", u);
|
||||||
assertEquals("NumAcc1: std", std, u.getStandardDeviation(), .00000000000001);
|
assertEquals("NumAcc1: std", std, u.getStandardDeviation(), 1E-14);
|
||||||
assertEquals("NumAcc1: mean", mean, u.getMean(), .00000000000001);
|
assertEquals("NumAcc1: mean", mean, u.getMean(), 1E-14);
|
||||||
|
|
||||||
//loadStats("data/NumAcc2.txt");
|
loadStats("data/NumAcc2.txt", u);
|
||||||
//assertEquals("NumAcc2: std", std, u.getStandardDeviation(), .000000001);
|
assertEquals("NumAcc2: std", std, u.getStandardDeviation(), 1E-14);
|
||||||
//assertEquals("NumAcc2: mean", mean, u.getMean(), .00000000000001);
|
assertEquals("NumAcc2: mean", mean, u.getMean(), 1E-14);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -111,7 +111,12 @@ Commons Math Release Notes</title>
|
||||||
and SummaryStatistics concrete classes. Pushed implementations up
|
and SummaryStatistics concrete classes. Pushed implementations up
|
||||||
from DescriptiveStatisticsImpl, SummaryStatisticsImpl. Made
|
from DescriptiveStatisticsImpl, SummaryStatisticsImpl. Made
|
||||||
implementations of statistics configurable via setters.
|
implementations of statistics configurable via setters.
|
||||||
</action>
|
</action>
|
||||||
|
<action dev="psteitz" type="fix" issue="MATH-174">
|
||||||
|
Changed Mean.evaluate() to use a two-pass algorithm, improving accuracy
|
||||||
|
by exploiting the the fact that this method has access to the full
|
||||||
|
array of data values.
|
||||||
|
</action>
|
||||||
</release>
|
</release>
|
||||||
<release version="1.1" date="2005-12-17"
|
<release version="1.1" date="2005-12-17"
|
||||||
description="This is a maintenance release containing bug fixes and enhancements.
|
description="This is a maintenance release containing bug fixes and enhancements.
|
||||||
|
|
Loading…
Reference in New Issue