Added append method to SimpleRegression, making this class map/reducible.

JIRA: MATH-987
Contributed by Ajo Fod

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1488849 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Phil Steitz 2013-06-03 05:37:13 +00:00
parent 2a96c0d647
commit 0ee817712b
3 changed files with 145 additions and 3 deletions

View File

@ -51,6 +51,9 @@ If the output is not quite correct, check for invisible trailing spaces!
</properties>
<body>
<release version="x.y" date="TBD" description="TBD">
<action dev="psteitz" type="update" issue="MATH-987" due-to="Ajo Fod">
Added append method to SimpleRegression, making this class map/reducible.
</action>
<action dev="psteitz" type="update" issue="MATH-978" due-to="Ajo Fod">
Added append method to StorelessCovariance, making this class map/reducible.
</action>

View File

@ -18,10 +18,10 @@
package org.apache.commons.math3.stat.regression;
import java.io.Serializable;
import org.apache.commons.math3.exception.OutOfRangeException;
import org.apache.commons.math3.distribution.TDistribution;
import org.apache.commons.math3.exception.MathIllegalArgumentException;
import org.apache.commons.math3.exception.NoDataException;
import org.apache.commons.math3.exception.OutOfRangeException;
import org.apache.commons.math3.exception.util.LocalizedFormats;
import org.apache.commons.math3.util.FastMath;
import org.apache.commons.math3.util.Precision;
@ -156,6 +156,47 @@ public class SimpleRegression implements Serializable, UpdatingMultipleLinearReg
n++;
}
/**
* Appends data from another regression calculation to this one.
*
* <p>The mean update formulae are based on a paper written by Philippe
* P&eacute;bay:
* <a
* href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
* Formulas for Robust, One-Pass Parallel Computation of Covariances and
* Arbitrary-Order Statistical Moments</a>, 2008, Technical Report
* SAND2008-6212, Sandia National Laboratories.</p>
*
* @param reg model to append data from
*/
public void append(SimpleRegression reg) {
if (n == 0) {
xbar = reg.xbar;
ybar = reg.ybar;
sumXX = reg.sumXX;
sumYY = reg.sumYY;
sumXY = reg.sumXY;
} else {
if (hasIntercept) {
final double fact1 = reg.n / (double) (reg.n + n);
final double fact2 = n * reg.n / (double) (reg.n + n);
final double dx = reg.xbar - xbar;
final double dy = reg.ybar - ybar;
sumXX += reg.sumXX + dx * dx * fact2;
sumYY += reg.sumYY + dy * dy * fact2;
sumXY += reg.sumXY + dx * dy * fact2;
xbar += dx * fact1;
ybar += dy * fact1;
}else{
sumXX += reg.sumXX;
sumYY += reg.sumYY;
sumXY += reg.sumXY;
}
}
sumX += reg.sumX;
sumY += reg.sumY;
n += reg.n;
}
/**
* Removes the observation (x,y) from the regression data set.

View File

@ -20,6 +20,7 @@ import java.util.Random;
import org.apache.commons.math3.exception.MathIllegalArgumentException;
import org.apache.commons.math3.exception.OutOfRangeException;
import org.apache.commons.math3.random.ISAACRandom;
import org.junit.Assert;
import org.junit.Test;
@ -109,6 +110,103 @@ public final class SimpleRegressionTest {
{4,6}
};
/**
* Test that the SimpleRegression objects generated from combining two
* SimpleRegression objects created from subsets of data are identical to
* SimpleRegression objects created from the combined data.
*/
@Test
public void testAppend() {
check(false);
check(true);
}
/**
* Checks that adding data to a single model gives the same result
* as adding "parts" of the dataset to smaller models and using append
* to aggregate the smaller models.
*
* @param includeIntercept
*/
private void check(boolean includeIntercept) {
final int sets = 2;
final ISAACRandom rand = new ISAACRandom(10L);// Seed can be changed
final SimpleRegression whole = new SimpleRegression(includeIntercept);// regression of the whole set
final SimpleRegression parts = new SimpleRegression(includeIntercept);// regression with parts.
for (int s = 0; s < sets; s++) {// loop through each subset of data.
final double coef = rand.nextDouble();
final SimpleRegression sub = new SimpleRegression(includeIntercept);// sub regression
for (int i = 0; i < 5; i++) { // loop through individual samlpes.
final double x = rand.nextDouble();
final double y = x * coef + rand.nextDouble();// some noise
sub.addData(x, y);
whole.addData(x, y);
}
parts.append(sub);
Assert.assertTrue(equals(parts, whole, 1E-6));
}
}
/**
* Returns true iff the statistics reported by model1 are all within tol of
* those reported by model2.
*
* @param model1 first model
* @param model2 second model
* @param tol tolerance
* @return true if the two models report the same regression stats
*/
private boolean equals(SimpleRegression model1, SimpleRegression model2, double tol) {
if (model1.getN() != model2.getN()) {
return false;
}
if (Math.abs(model1.getIntercept() - model2.getIntercept()) > tol) {
return false;
}
if (Math.abs(model1.getInterceptStdErr() - model2.getInterceptStdErr()) > tol) {
return false;
}
if (Math.abs(model1.getMeanSquareError() - model2.getMeanSquareError()) > tol) {
return false;
}
if (Math.abs(model1.getR() - model2.getR()) > tol) {
return false;
}
if (Math.abs(model1.getRegressionSumSquares() - model2.getRegressionSumSquares()) > tol) {
return false;
}
if (Math.abs(model1.getRSquare() - model2.getRSquare()) > tol) {
return false;
}
if (Math.abs(model1.getSignificance() - model2.getSignificance()) > tol) {
return false;
}
if (Math.abs(model1.getSlope() - model2.getSlope()) > tol) {
return false;
}
if (Math.abs(model1.getSlopeConfidenceInterval() - model2.getSlopeConfidenceInterval()) > tol) {
return false;
}
if (Math.abs(model1.getSlopeStdErr() - model2.getSlopeStdErr()) > tol) {
return false;
}
if (Math.abs(model1.getSumOfCrossProducts() - model2.getSumOfCrossProducts()) > tol) {
return false;
}
if (Math.abs(model1.getSumSquaredErrors() - model2.getSumSquaredErrors()) > tol) {
return false;
}
if (Math.abs(model1.getTotalSumSquares() - model2.getTotalSumSquares()) > tol) {
return false;
}
if (Math.abs(model1.getXSumSquares() - model2.getXSumSquares()) > tol) {
return false;
}
return true;
}
@Test
public void testRegressIfaceMethod(){
final SimpleRegression regression = new SimpleRegression(true);