added removeData methods to the SimpleRegression class.

This allows to support regression calculations across a
sliding window of (time-based) observations without having
to recalculate for the entire window every time.
JIRA: MATH-219

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/math/branches/MATH_2_0@687021 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Luc Maisonobe 2008-08-19 11:18:10 +00:00
parent be008314f3
commit 07e312f0c3
3 changed files with 153 additions and 0 deletions

View File

@ -139,6 +139,39 @@ public class SimpleRegression implements Serializable {
}
}
/**
* Removes the observation (x,y) from the regression data set.
* <p>
* Mirrors the addData method. This method permits the use of
* SimpleRegression instances in streaming mode where the regression
* is applied to a sliding "window" of observations, however the caller is
* responsible for maintaining the set of observations in the window.</p>
*
* The method has no effect if there are no points of data (i.e. n=0)
*
* @param x independent variable value
* @param y dependent variable value
*/
public void removeData(double x, double y) {
if (n > 0) {
double dx = x - xbar;
double dy = y - ybar;
sumXX -= dx * dx * (double) n / (double) (n - 1.0);
sumYY -= dy * dy * (double) n / (double) (n - 1.0);
sumXY -= dx * dy * (double) n / (double) (n - 1.0);
xbar -= dx / (double) (n - 1.0);
ybar -= dy / (double) (n - 1.0);
sumX -= x;
sumY -= y;
n--;
if (n > 2) {
distribution.setDegreesOfFreedom(n - 2);
}
}
}
/**
* Adds the observations represented by the elements in
* <code>data</code>.
@ -161,6 +194,26 @@ public class SimpleRegression implements Serializable {
}
}
/**
* Removes observations represented by the elements in <code>data</code>.
* <p>
* If the array is larger than the current n, only the first n elements are
* processed. This method permits the use of SimpleRegression instances in
* streaming mode where the regression is applied to a sliding "window" of
* observations, however the caller is responsible for maintaining the set
* of observations in the window.</p>
* <p>
* To remove all data, use <code>clear()</code>.</p>
*
* @param data array of observations to be removed
*/
public void removeData(double[][] data) {
for (int i = 0; i < data.length && n > 0; i++) {
removeData(data[i][0], data[i][1]);
}
}
/**
* Clears all data from the model.
*/

View File

@ -39,6 +39,11 @@ The <action> type attribute can be add,update,fix,remove.
</properties>
<body>
<release version="2.0" date="TBD" description="TBD">
<action dev="luc" type="add" issue="MATH-219" due-to="Andrew Berry">
Added removeData methods for the SimpleRegression class. This allows
to support regression calculations across a sliding window of (time-based)
observations without having to recalculate for the entire window every time.
</action>
<action dev="luc" type="add" due-to="Andreas Rieger">
Support for one dimensional vectors has been added to the linear algebra
package with a RealVector interface, a RealVectorImpl default implementation

View File

@ -62,6 +62,15 @@ public final class SimpleRegressionTest extends TestCase {
{35.5, 8.8 }, {18.6, 4.9 }, {15.3, 4.5 }, {7.9, 2.5 }, {0.0, 1.1 }
};
/*
* Points to remove in the remove tests
*/
private double[][] removeSingle = {infData[1]};
private double[][] removeMultiple = { infData[1], infData[2] };
private double removeX = infData[0][0];
private double removeY = infData[0][1];
/*
* Data with bad linear fit
*/
@ -275,4 +284,90 @@ public final class SimpleRegressionTest extends TestCase {
}
assertTrue(reg.getSumSquaredErrors() >= 0.0);
}
// Test remove X,Y (single observation)
public void testRemoveXY() throws Exception {
// Create regression with inference data then remove to test
SimpleRegression regression = new SimpleRegression();
regression.addData(infData);
regression.removeData(removeX, removeY);
regression.addData(removeX, removeY);
// Use the inference assertions to make sure that everything worked
assertEquals("slope std err", 0.011448491,
regression.getSlopeStdErr(), 1E-10);
assertEquals("std err intercept", 0.286036932,
regression.getInterceptStdErr(),1E-8);
assertEquals("significance", 4.596e-07,
regression.getSignificance(),1E-8);
assertEquals("slope conf interval half-width", 0.0270713794287,
regression.getSlopeConfidenceInterval(),1E-8);
}
// Test remove single observation in array
public void testRemoveSingle() throws Exception {
// Create regression with inference data then remove to test
SimpleRegression regression = new SimpleRegression();
regression.addData(infData);
regression.removeData(removeSingle);
regression.addData(removeSingle);
// Use the inference assertions to make sure that everything worked
assertEquals("slope std err", 0.011448491,
regression.getSlopeStdErr(), 1E-10);
assertEquals("std err intercept", 0.286036932,
regression.getInterceptStdErr(),1E-8);
assertEquals("significance", 4.596e-07,
regression.getSignificance(),1E-8);
assertEquals("slope conf interval half-width", 0.0270713794287,
regression.getSlopeConfidenceInterval(),1E-8);
}
// Test remove multiple observations
public void testRemoveMultiple() throws Exception {
// Create regression with inference data then remove to test
SimpleRegression regression = new SimpleRegression();
regression.addData(infData);
regression.removeData(removeMultiple);
regression.addData(removeMultiple);
// Use the inference assertions to make sure that everything worked
assertEquals("slope std err", 0.011448491,
regression.getSlopeStdErr(), 1E-10);
assertEquals("std err intercept", 0.286036932,
regression.getInterceptStdErr(),1E-8);
assertEquals("significance", 4.596e-07,
regression.getSignificance(),1E-8);
assertEquals("slope conf interval half-width", 0.0270713794287,
regression.getSlopeConfidenceInterval(),1E-8);
}
// Remove observation when empty
public void testRemoveObsFromEmpty() {
SimpleRegression regression = new SimpleRegression();
regression.removeData(removeX, removeY);
assertEquals(regression.getN(), 0);
}
// Remove single observation to empty
public void testRemoveObsFromSingle() {
SimpleRegression regression = new SimpleRegression();
regression.addData(removeX, removeY);
regression.removeData(removeX, removeY);
assertEquals(regression.getN(), 0);
}
// Remove multiple observations to empty
public void testRemoveMultipleToEmpty() {
SimpleRegression regression = new SimpleRegression();
regression.addData(removeMultiple);
regression.removeData(removeMultiple);
assertEquals(regression.getN(), 0);
}
// Remove multiple observations past empty (i.e. size of array > n)
public void testRemoveMultiplePastEmpty() {
SimpleRegression regression = new SimpleRegression();
regression.addData(removeX, removeY);
regression.removeData(removeMultiple);
assertEquals(regression.getN(), 0);
}
}