From 0ee817712b9e8330fdfd6985752aa8f1b25ba9d9 Mon Sep 17 00:00:00 2001 From: Phil Steitz Date: Mon, 3 Jun 2013 05:37:13 +0000 Subject: [PATCH] Added append method to SimpleRegression, making this class map/reducible. JIRA: MATH-987 Contributed by Ajo Fod git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/trunk@1488849 13f79535-47bb-0310-9956-ffa450edef68 --- src/changes/changes.xml | 3 + .../stat/regression/SimpleRegression.java | 43 +++++++- .../stat/regression/SimpleRegressionTest.java | 102 +++++++++++++++++- 3 files changed, 145 insertions(+), 3 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index cd77eb9bf..a50812f4b 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -51,6 +51,9 @@ If the output is not quite correct, check for invisible trailing spaces! + + Added append method to SimpleRegression, making this class map/reducible. + Added append method to StorelessCovariance, making this class map/reducible. diff --git a/src/main/java/org/apache/commons/math3/stat/regression/SimpleRegression.java b/src/main/java/org/apache/commons/math3/stat/regression/SimpleRegression.java index 4ab6254e0..c1a681a11 100644 --- a/src/main/java/org/apache/commons/math3/stat/regression/SimpleRegression.java +++ b/src/main/java/org/apache/commons/math3/stat/regression/SimpleRegression.java @@ -18,10 +18,10 @@ package org.apache.commons.math3.stat.regression; import java.io.Serializable; -import org.apache.commons.math3.exception.OutOfRangeException; import org.apache.commons.math3.distribution.TDistribution; import org.apache.commons.math3.exception.MathIllegalArgumentException; import org.apache.commons.math3.exception.NoDataException; +import org.apache.commons.math3.exception.OutOfRangeException; import org.apache.commons.math3.exception.util.LocalizedFormats; import org.apache.commons.math3.util.FastMath; import org.apache.commons.math3.util.Precision; @@ -156,6 +156,47 @@ public class SimpleRegression implements Serializable, UpdatingMultipleLinearReg n++; } + /** + * Appends data from another regression calculation to this one. + * + *

The mean update formulae are based on a paper written by Philippe + * Pébay: + * + * Formulas for Robust, One-Pass Parallel Computation of Covariances and + * Arbitrary-Order Statistical Moments, 2008, Technical Report + * SAND2008-6212, Sandia National Laboratories.

+ * + * @param reg model to append data from + */ + public void append(SimpleRegression reg) { + if (n == 0) { + xbar = reg.xbar; + ybar = reg.ybar; + sumXX = reg.sumXX; + sumYY = reg.sumYY; + sumXY = reg.sumXY; + } else { + if (hasIntercept) { + final double fact1 = reg.n / (double) (reg.n + n); + final double fact2 = n * reg.n / (double) (reg.n + n); + final double dx = reg.xbar - xbar; + final double dy = reg.ybar - ybar; + sumXX += reg.sumXX + dx * dx * fact2; + sumYY += reg.sumYY + dy * dy * fact2; + sumXY += reg.sumXY + dx * dy * fact2; + xbar += dx * fact1; + ybar += dy * fact1; + }else{ + sumXX += reg.sumXX; + sumYY += reg.sumYY; + sumXY += reg.sumXY; + } + } + sumX += reg.sumX; + sumY += reg.sumY; + n += reg.n; + } /** * Removes the observation (x,y) from the regression data set. diff --git a/src/test/java/org/apache/commons/math3/stat/regression/SimpleRegressionTest.java b/src/test/java/org/apache/commons/math3/stat/regression/SimpleRegressionTest.java index 73870d3ce..495f60cae 100644 --- a/src/test/java/org/apache/commons/math3/stat/regression/SimpleRegressionTest.java +++ b/src/test/java/org/apache/commons/math3/stat/regression/SimpleRegressionTest.java @@ -20,6 +20,7 @@ import java.util.Random; import org.apache.commons.math3.exception.MathIllegalArgumentException; import org.apache.commons.math3.exception.OutOfRangeException; +import org.apache.commons.math3.random.ISAACRandom; import org.junit.Assert; import org.junit.Test; @@ -109,6 +110,103 @@ public final class SimpleRegressionTest { {4,6} }; + + /** + * Test that the SimpleRegression objects generated from combining two + * SimpleRegression objects created from subsets of data are identical to + * SimpleRegression objects created from the combined data. + */ + @Test + public void testAppend() { + check(false); + check(true); + } + + /** + * Checks that adding data to a single model gives the same result + * as adding "parts" of the dataset to smaller models and using append + * to aggregate the smaller models. + * + * @param includeIntercept + */ + private void check(boolean includeIntercept) { + final int sets = 2; + final ISAACRandom rand = new ISAACRandom(10L);// Seed can be changed + final SimpleRegression whole = new SimpleRegression(includeIntercept);// regression of the whole set + final SimpleRegression parts = new SimpleRegression(includeIntercept);// regression with parts. + + for (int s = 0; s < sets; s++) {// loop through each subset of data. + final double coef = rand.nextDouble(); + final SimpleRegression sub = new SimpleRegression(includeIntercept);// sub regression + for (int i = 0; i < 5; i++) { // loop through individual samlpes. + final double x = rand.nextDouble(); + final double y = x * coef + rand.nextDouble();// some noise + sub.addData(x, y); + whole.addData(x, y); + } + parts.append(sub); + Assert.assertTrue(equals(parts, whole, 1E-6)); + } + } + + /** + * Returns true iff the statistics reported by model1 are all within tol of + * those reported by model2. + * + * @param model1 first model + * @param model2 second model + * @param tol tolerance + * @return true if the two models report the same regression stats + */ + private boolean equals(SimpleRegression model1, SimpleRegression model2, double tol) { + if (model1.getN() != model2.getN()) { + return false; + } + if (Math.abs(model1.getIntercept() - model2.getIntercept()) > tol) { + return false; + } + if (Math.abs(model1.getInterceptStdErr() - model2.getInterceptStdErr()) > tol) { + return false; + } + if (Math.abs(model1.getMeanSquareError() - model2.getMeanSquareError()) > tol) { + return false; + } + if (Math.abs(model1.getR() - model2.getR()) > tol) { + return false; + } + if (Math.abs(model1.getRegressionSumSquares() - model2.getRegressionSumSquares()) > tol) { + return false; + } + if (Math.abs(model1.getRSquare() - model2.getRSquare()) > tol) { + return false; + } + if (Math.abs(model1.getSignificance() - model2.getSignificance()) > tol) { + return false; + } + if (Math.abs(model1.getSlope() - model2.getSlope()) > tol) { + return false; + } + if (Math.abs(model1.getSlopeConfidenceInterval() - model2.getSlopeConfidenceInterval()) > tol) { + return false; + } + if (Math.abs(model1.getSlopeStdErr() - model2.getSlopeStdErr()) > tol) { + return false; + } + if (Math.abs(model1.getSumOfCrossProducts() - model2.getSumOfCrossProducts()) > tol) { + return false; + } + if (Math.abs(model1.getSumSquaredErrors() - model2.getSumSquaredErrors()) > tol) { + return false; + } + if (Math.abs(model1.getTotalSumSquares() - model2.getTotalSumSquares()) > tol) { + return false; + } + if (Math.abs(model1.getXSumSquares() - model2.getXSumSquares()) > tol) { + return false; + } + return true; + } + @Test public void testRegressIfaceMethod(){ final SimpleRegression regression = new SimpleRegression(true); @@ -156,7 +254,7 @@ public final class SimpleRegressionTest { Assert.assertEquals("MSE", regressionIntOnly.getMeanSquareError(), onlyInt.getMeanSquareError() ,1.0E-8); } - + /** * Verify that regress generates exceptions as advertised for bad model specifications. */ @@ -191,7 +289,7 @@ public final class SimpleRegressionTest { } catch (OutOfRangeException ex) { // Expected } - + // With intercept final SimpleRegression regression = new SimpleRegression(true); regression.addData(noint2[0][1], noint2[0][0]);